Annotation of XML/parser.c, revision 1.62
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.61 daniel 33: #include "valid.h"
1.1 veillard 34:
1.45 daniel 35: /************************************************************************
36: * *
37: * Parser stacks related functions and macros *
38: * *
39: ************************************************************************/
1.1 veillard 40: /*
1.40 daniel 41: * Generic function for accessing stacks in the Parser Context
1.1 veillard 42: */
43:
1.31 daniel 44: #define PUSH_AND_POP(type, name) \
1.40 daniel 45: int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 46: if (ctxt->name##Nr >= ctxt->name##Max) { \
47: ctxt->name##Max *= 2; \
1.40 daniel 48: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
49: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
50: if (ctxt->name##Tab == NULL) { \
1.31 daniel 51: fprintf(stderr, "realloc failed !\n"); \
52: exit(1); \
53: } \
54: } \
1.40 daniel 55: ctxt->name##Tab[ctxt->name##Nr] = value; \
56: ctxt->name = value; \
57: return(ctxt->name##Nr++); \
1.31 daniel 58: } \
1.40 daniel 59: type name##Pop(xmlParserCtxtPtr ctxt) { \
60: if (ctxt->name##Nr <= 0) return(0); \
61: ctxt->name##Nr--; \
1.50 daniel 62: if (ctxt->name##Nr > 0) \
63: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
64: else \
65: ctxt->name = NULL; \
1.40 daniel 66: return(ctxt->name); \
1.31 daniel 67: } \
68:
1.40 daniel 69: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 70: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 71:
1.55 daniel 72: /*
73: * Macros for accessing the content. Those should be used only by the parser,
74: * and not exported.
75: *
76: * Dirty macros, i.e. one need to make assumption on the context to use them
77: *
78: * CUR_PTR return the current pointer to the CHAR to be parsed.
79: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
80: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
81: * in UNICODE mode. This should be used internally by the parser
82: * only to compare to ASCII values otherwise it would break when
83: * running with UTF-8 encoding.
84: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
85: * to compare on ASCII based substring.
86: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
87: * strings within the parser.
88: *
89: * Clean macros, not dependent of an ASCII context.
90: *
91: * CURRENT Returns the current char value, with the full decoding of
92: * UTF-8 if we are using this mode. It returns an int.
93: * NEXT Skip to the next character, this does the proper decoding
94: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
95: * It returns the pointer to the current CHAR.
96: */
1.45 daniel 97:
98: #define CUR (*ctxt->input->cur)
1.55 daniel 99: #define SKIP(val) ctxt->input->cur += (val)
100: #define NXT(val) ctxt->input->cur[(val)]
101: #define CUR_PTR ctxt->input->cur
102:
103: #define SKIP_BLANKS \
104: while (IS_BLANK(*(ctxt->input->cur))) NEXT
105:
106: #ifndef USE_UTF_8
107: #define CURRENT (*ctxt->input->cur)
1.45 daniel 108: #define NEXT ((*ctxt->input->cur) ? \
109: (((*(ctxt->input->cur) == '\n') ? \
110: (ctxt->input->line++, ctxt->input->col = 1) : \
111: (ctxt->input->col++)), ctxt->input->cur++) : \
112: (xmlPopInput(ctxt), ctxt->input->cur))
1.55 daniel 113: #else
114: #endif
1.42 daniel 115:
1.40 daniel 116:
1.50 daniel 117: /**
118: * xmlPopInput:
119: * @ctxt: an XML parser context
120: *
1.40 daniel 121: * xmlPopInput: the current input pointed by ctxt->input came to an end
122: * pop it and return the next char.
1.45 daniel 123: *
124: * TODO A deallocation of the popped Input structure is needed
1.50 daniel 125: * return values: the current CHAR in the parser context
1.40 daniel 126: */
1.55 daniel 127: CHAR
128: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 129: if (ctxt->inputNr == 1) return(0); /* End of main Input */
130: inputPop(ctxt);
131: return(CUR);
132: }
133:
1.50 daniel 134: /**
135: * xmlPushInput:
136: * @ctxt: an XML parser context
137: * @input: an XML parser input fragment (entity, XML fragment ...).
138: *
1.40 daniel 139: * xmlPushInput: switch to a new input stream which is stacked on top
140: * of the previous one(s).
141: */
1.55 daniel 142: void
143: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 144: if (input == NULL) return;
145: inputPush(ctxt, input);
146: }
147:
1.50 daniel 148: /**
149: * xmlNewEntityInputStream:
150: * @ctxt: an XML parser context
151: * @entity: an Entity pointer
152: *
1.45 daniel 153: * Create a new input stream based on a memory buffer.
1.50 daniel 154: * return vakues: the new input stream
1.45 daniel 155: */
1.50 daniel 156: xmlParserInputPtr
157: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 158: xmlParserInputPtr input;
159:
160: if (entity == NULL) {
1.55 daniel 161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
162: ctxt->sax->error(ctxt,
1.45 daniel 163: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 164: return(NULL);
1.45 daniel 165: }
166: if (entity->content == NULL) {
1.55 daniel 167: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
168: ctxt->sax->error(ctxt,
1.45 daniel 169: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 170: return(NULL);
1.45 daniel 171: }
172: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
173: if (input == NULL) {
1.55 daniel 174: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
175: ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
1.50 daniel 176: return(NULL);
1.45 daniel 177: }
178: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
179: input->base = entity->content;
180: input->cur = entity->content;
181: input->line = 1;
182: input->col = 1;
1.50 daniel 183: return(input);
1.45 daniel 184: }
185:
1.59 daniel 186: /**
187: * xmlNewStringInputStream:
188: * @ctxt: an XML parser context
189: * @entity: an Entity pointer
190: *
191: * Create a new input stream based on a memory buffer.
192: * return vakues: the new input stream
193: */
194: xmlParserInputPtr
195: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, CHAR *string) {
196: xmlParserInputPtr input;
197:
198: if (string == NULL) {
199: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
200: ctxt->sax->error(ctxt,
201: "internal: xmlNewStringInputStream string = NULL\n");
202: return(NULL);
203: }
204: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
205: if (input == NULL) {
206: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
207: ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
208: return(NULL);
209: }
210: input->filename = NULL;
211: input->base = string;
212: input->cur = string;
213: input->line = 1;
214: input->col = 1;
215: return(input);
216: }
217:
1.45 daniel 218: /*
1.40 daniel 219: * A few macros needed to help building the parser.
220: */
221:
1.1 veillard 222: #ifdef UNICODE
1.30 daniel 223: /************************************************************************
224: * *
225: * UNICODE version of the macros. *
226: * *
227: ************************************************************************/
1.1 veillard 228: /*
1.22 daniel 229: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
230: * | [#x10000-#x10FFFF]
231: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 232: */
233: #define IS_CHAR(c) \
1.59 daniel 234: ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
235: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
236: (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) && \
237: ((c) <= 0x10FFFF))
1.1 veillard 238:
1.22 daniel 239: /*
240: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
241: */
1.42 daniel 242: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
243: ((c) == 0x0D))
1.1 veillard 244:
1.22 daniel 245: /*
1.30 daniel 246: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 247: *
1.30 daniel 248: * VI is your friend !
249: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
250: * and
251: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 252: */
1.1 veillard 253: #define IS_BASECHAR(c) \
1.30 daniel 254: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
255: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
256: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
257: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
258: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
259: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
260: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
261: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
262: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
263: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
264: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
265: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
266: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
267: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
268: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
269: ((c) == 0x0386) || \
270: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
271: ((c) == 0x038C) || \
272: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
273: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
274: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
275: ((c) == 0x03DA) || \
276: ((c) == 0x03DC) || \
277: ((c) == 0x03DE) || \
278: ((c) == 0x03E0) || \
279: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
280: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
281: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
282: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
283: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
284: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
285: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
286: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
287: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
288: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
289: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
290: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
291: ((c) == 0x0559) || \
292: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
293: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
294: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
295: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
296: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
297: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
298: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
299: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
300: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
301: ((c) == 0x06D5) || \
302: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
303: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
304: ((c) == 0x093D) || \
305: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
306: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
307: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
308: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
309: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
310: ((c) == 0x09B2) || \
311: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
312: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
313: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
314: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
315: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
316: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
317: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
318: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
319: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
320: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
321: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
322: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
323: ((c) == 0x0A5E) || \
324: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
325: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
326: ((c) == 0x0A8D) || \
327: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
328: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
329: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
330: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
331: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
332: ((c) == 0x0ABD) || \
333: ((c) == 0x0AE0) || \
334: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
335: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
336: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
337: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
338: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
339: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
340: ((c) == 0x0B3D) || \
341: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
342: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
343: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
344: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
345: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
346: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
347: ((c) == 0x0B9C) || \
348: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
349: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
350: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
351: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
352: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
353: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
354: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
355: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
356: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
357: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
358: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
359: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
360: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
361: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
362: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
363: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
364: ((c) == 0x0CDE) || \
365: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
366: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
367: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
368: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
369: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
370: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
371: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
372: ((c) == 0x0E30) || \
373: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
374: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
375: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
376: ((c) == 0x0E84) || \
377: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
378: ((c) == 0x0E8A) || \
379: ((c) == 0x0E8D) || \
380: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
381: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
382: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
383: ((c) == 0x0EA5) || \
384: ((c) == 0x0EA7) || \
385: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
386: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
387: ((c) == 0x0EB0) || \
388: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
389: ((c) == 0x0EBD) || \
390: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
391: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
392: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
393: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
394: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
395: ((c) == 0x1100) || \
396: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
397: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
398: ((c) == 0x1109) || \
399: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
400: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
401: ((c) == 0x113C) || \
402: ((c) == 0x113E) || \
403: ((c) == 0x1140) || \
404: ((c) == 0x114C) || \
405: ((c) == 0x114E) || \
406: ((c) == 0x1150) || \
407: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
408: ((c) == 0x1159) || \
409: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
410: ((c) == 0x1163) || \
411: ((c) == 0x1165) || \
412: ((c) == 0x1167) || \
413: ((c) == 0x1169) || \
414: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
415: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
416: ((c) == 0x1175) || \
417: ((c) == 0x119E) || \
418: ((c) == 0x11A8) || \
419: ((c) == 0x11AB) || \
420: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
421: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
422: ((c) == 0x11BA) || \
423: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
424: ((c) == 0x11EB) || \
425: ((c) == 0x11F0) || \
426: ((c) == 0x11F9) || \
427: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
428: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
429: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
430: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
431: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
432: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
433: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
434: ((c) == 0x1F59) || \
435: ((c) == 0x1F5B) || \
436: ((c) == 0x1F5D) || \
437: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
438: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
439: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
440: ((c) == 0x1FBE) || \
441: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
442: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
443: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
444: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
445: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
446: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
447: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
448: ((c) == 0x2126) || \
449: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
450: ((c) == 0x212E) || \
451: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
452: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
453: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
454: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
455: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 456:
1.22 daniel 457: /*
458: * [88] Digit ::= ... long list see REC ...
459: */
1.30 daniel 460: #define IS_DIGIT(c) \
461: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
462: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
463: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
464: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
465: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
466: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
467: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
468: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
469: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
470: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
471: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
472: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
473: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
474: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
475: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 476:
1.22 daniel 477: /*
478: * [87] CombiningChar ::= ... long list see REC ...
479: */
1.30 daniel 480: #define IS_COMBINING(c) \
481: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
482: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
483: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
484: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
485: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
486: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
487: ((c) == 0x05BF) || \
488: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
489: ((c) == 0x05C4) || \
490: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
491: ((c) == 0x0670) || \
492: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
493: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
494: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
495: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
496: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
497: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
498: ((c) == 0x093C) || \
499: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
500: ((c) == 0x094D) || \
501: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
502: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
503: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
504: ((c) == 0x09BC) || \
505: ((c) == 0x09BE) || \
506: ((c) == 0x09BF) || \
507: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
508: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
509: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
510: ((c) == 0x09D7) || \
511: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
512: ((c) == 0x0A02) || \
513: ((c) == 0x0A3C) || \
514: ((c) == 0x0A3E) || \
515: ((c) == 0x0A3F) || \
516: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
517: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
518: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
519: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
520: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
521: ((c) == 0x0ABC) || \
522: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
523: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
524: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
525: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
526: ((c) == 0x0B3C) || \
527: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
528: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
529: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
530: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
531: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
532: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
533: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
534: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
535: ((c) == 0x0BD7) || \
536: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
537: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
538: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
539: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
540: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
541: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
542: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
543: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
544: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
545: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
546: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
547: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
548: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
549: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
550: ((c) == 0x0D57) || \
551: ((c) == 0x0E31) || \
552: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
553: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
554: ((c) == 0x0EB1) || \
555: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
556: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
557: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
558: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
559: ((c) == 0x0F35) || \
560: ((c) == 0x0F37) || \
561: ((c) == 0x0F39) || \
562: ((c) == 0x0F3E) || \
563: ((c) == 0x0F3F) || \
564: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
565: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
566: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
567: ((c) == 0x0F97) || \
568: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
569: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
570: ((c) == 0x0FB9) || \
571: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
572: ((c) == 0x20E1) || \
573: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
574: ((c) == 0x3099) || \
575: ((c) == 0x309A))
1.3 veillard 576:
1.22 daniel 577: /*
578: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
579: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
580: * [#x309D-#x309E] | [#x30FC-#x30FE]
581: */
1.3 veillard 582: #define IS_EXTENDER(c) \
583: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
584: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
585: ((c) == 0xec6) || ((c) == 0x3005) \
586: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
587: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 588: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 589:
1.22 daniel 590: /*
591: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
592: */
1.1 veillard 593: #define IS_IDEOGRAPHIC(c) \
594: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
595: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
596: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
597: ((c) == 0x3007))
598:
1.22 daniel 599: /*
600: * [84] Letter ::= BaseChar | Ideographic
601: */
1.1 veillard 602: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
603:
604: #else
1.55 daniel 605: #ifndef USE_UTF_8
1.30 daniel 606: /************************************************************************
607: * *
1.55 daniel 608: * 8bits / ISO-Latin version of the macros. *
1.30 daniel 609: * *
610: ************************************************************************/
1.1 veillard 611: /*
1.22 daniel 612: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
613: * | [#x10000-#x10FFFF]
614: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 615: */
616: #define IS_CHAR(c) \
1.59 daniel 617: ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
618: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
619: (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) <= 0x10FFFF))
1.1 veillard 620:
1.22 daniel 621: /*
622: * [85] BaseChar ::= ... long list see REC ...
623: */
1.1 veillard 624: #define IS_BASECHAR(c) \
625: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
626: (((c) >= 0x61) && ((c) <= 0x7a)) || \
627: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
628: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
629: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
630: (((c) >= 0xf8) && ((c) <= 0xff)) || \
631: ((c) == 0xba))
632:
1.22 daniel 633: /*
634: * [88] Digit ::= ... long list see REC ...
635: */
1.1 veillard 636: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
637:
1.22 daniel 638: /*
639: * [84] Letter ::= BaseChar | Ideographic
640: */
1.1 veillard 641: #define IS_LETTER(c) IS_BASECHAR(c)
642:
1.22 daniel 643:
644: /*
645: * [87] CombiningChar ::= ... long list see REC ...
646: */
1.1 veillard 647: #define IS_COMBINING(c) 0
648:
1.22 daniel 649: /*
650: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
651: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
652: * [#x309D-#x309E] | [#x30FC-#x30FE]
653: */
1.3 veillard 654: #define IS_EXTENDER(c) ((c) == 0xb7)
655:
1.55 daniel 656: #else /* USE_UTF_8 */
657: /************************************************************************
658: * *
659: * 8bits / UTF-8 version of the macros. *
660: * *
661: ************************************************************************/
662:
663: TODO !!!
664: #endif /* USE_UTF_8 */
1.21 daniel 665: #endif /* !UNICODE */
1.1 veillard 666:
1.22 daniel 667: /*
668: * Blank chars.
669: *
670: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
671: */
672: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
673: ((c) == 0x0D))
674:
675: /*
676: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
677: */
1.21 daniel 678: #define IS_PUBIDCHAR(c) \
679: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
680: (((c) >= 'a') && ((c) <= 'z')) || \
681: (((c) >= 'A') && ((c) <= 'Z')) || \
682: (((c) >= '0') && ((c) <= '9')) || \
683: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
684: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
685: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
686: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
687: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 688:
689: #define SKIP_EOL(p) \
690: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
691: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
692:
693: #define MOVETO_ENDTAG(p) \
1.39 daniel 694: while (IS_CHAR(*p) && (*(p) != '>')) (p)++
1.1 veillard 695:
696: #define MOVETO_STARTTAG(p) \
1.39 daniel 697: while (IS_CHAR(*p) && (*(p) != '<')) (p)++
1.1 veillard 698:
1.28 daniel 699: /************************************************************************
700: * *
701: * Commodity functions to handle CHARs *
702: * *
703: ************************************************************************/
704:
1.50 daniel 705: /**
706: * xmlStrndup:
707: * @cur: the input CHAR *
708: * @len: the len of @cur
709: *
710: * a strndup for array of CHAR's
711: * return values: a new CHAR * or NULL
1.1 veillard 712: */
713:
1.55 daniel 714: CHAR *
715: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 716: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
717:
718: if (ret == NULL) {
719: fprintf(stderr, "malloc of %d byte failed\n",
720: (len + 1) * sizeof(CHAR));
721: return(NULL);
722: }
723: memcpy(ret, cur, len * sizeof(CHAR));
724: ret[len] = 0;
725: return(ret);
726: }
727:
1.50 daniel 728: /**
729: * xmlStrdup:
730: * @cur: the input CHAR *
731: *
732: * a strdup for array of CHAR's
733: * return values: a new CHAR * or NULL
1.1 veillard 734: */
735:
1.55 daniel 736: CHAR *
737: xmlStrdup(const CHAR *cur) {
1.6 httpng 738: const CHAR *p = cur;
1.1 veillard 739:
740: while (IS_CHAR(*p)) p++;
741: return(xmlStrndup(cur, p - cur));
742: }
743:
1.50 daniel 744: /**
745: * xmlCharStrndup:
746: * @cur: the input char *
747: * @len: the len of @cur
748: *
749: * a strndup for char's to CHAR's
750: * return values: a new CHAR * or NULL
1.45 daniel 751: */
752:
1.55 daniel 753: CHAR *
754: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 755: int i;
756: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
757:
758: if (ret == NULL) {
759: fprintf(stderr, "malloc of %d byte failed\n",
760: (len + 1) * sizeof(CHAR));
761: return(NULL);
762: }
763: for (i = 0;i < len;i++)
764: ret[i] = (CHAR) cur[i];
765: ret[len] = 0;
766: return(ret);
767: }
768:
1.50 daniel 769: /**
770: * xmlCharStrdup:
771: * @cur: the input char *
772: * @len: the len of @cur
773: *
774: * a strdup for char's to CHAR's
775: * return values: a new CHAR * or NULL
1.45 daniel 776: */
777:
1.55 daniel 778: CHAR *
779: xmlCharStrdup(const char *cur) {
1.45 daniel 780: const char *p = cur;
781:
782: while (*p != '\0') p++;
783: return(xmlCharStrndup(cur, p - cur));
784: }
785:
1.50 daniel 786: /**
787: * xmlStrcmp:
788: * @str1: the first CHAR *
789: * @str2: the second CHAR *
790: *
791: * a strcmp for CHAR's
792: * return values: the integer result of the comparison
1.14 veillard 793: */
794:
1.55 daniel 795: int
796: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 797: register int tmp;
798:
799: do {
800: tmp = *str1++ - *str2++;
801: if (tmp != 0) return(tmp);
802: } while ((*str1 != 0) && (*str2 != 0));
803: return (*str1 - *str2);
804: }
805:
1.50 daniel 806: /**
807: * xmlStrncmp:
808: * @str1: the first CHAR *
809: * @str2: the second CHAR *
810: * @len: the max comparison length
811: *
812: * a strncmp for CHAR's
813: * return values: the integer result of the comparison
1.14 veillard 814: */
815:
1.55 daniel 816: int
817: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 818: register int tmp;
819:
820: if (len <= 0) return(0);
821: do {
822: tmp = *str1++ - *str2++;
823: if (tmp != 0) return(tmp);
824: len--;
825: if (len <= 0) return(0);
826: } while ((*str1 != 0) && (*str2 != 0));
827: return (*str1 - *str2);
828: }
829:
1.50 daniel 830: /**
831: * xmlStrchr:
832: * @str: the CHAR * array
833: * @val: the CHAR to search
834: *
835: * a strchr for CHAR's
836: * return values: the CHAR * for the first occurence or NULL.
1.14 veillard 837: */
838:
1.55 daniel 839: CHAR *
840: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 841: while (*str != 0) {
842: if (*str == val) return((CHAR *) str);
843: str++;
844: }
845: return(NULL);
846: }
1.28 daniel 847:
1.50 daniel 848: /**
849: * xmlStrlen:
850: * @str: the CHAR * array
851: *
852: * lenght of a CHAR's string
853: * return values: the number of CHAR contained in the ARRAY.
1.45 daniel 854: */
855:
1.55 daniel 856: int
857: xmlStrlen(const CHAR *str) {
1.45 daniel 858: int len = 0;
859:
860: if (str == NULL) return(0);
861: while (*str != 0) {
862: str++;
863: len++;
864: }
865: return(len);
866: }
867:
1.50 daniel 868: /**
869: * xmlStrncat:
870: * @first: the original CHAR * array
871: * @add: the CHAR * array added
872: * @len: the length of @add
873: *
874: * a strncat for array of CHAR's
875: * return values: a new CHAR * containing the concatenated string.
1.45 daniel 876: */
877:
1.55 daniel 878: CHAR *
879: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 880: int size;
881: CHAR *ret;
882:
883: if ((add == NULL) || (len == 0))
884: return(cur);
885: if (cur == NULL)
886: return(xmlStrndup(add, len));
887:
888: size = xmlStrlen(cur);
889: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
890: if (ret == NULL) {
891: fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
892: (size + len + 1) * sizeof(CHAR));
893: return(cur);
894: }
895: memcpy(&ret[size], add, len * sizeof(CHAR));
896: ret[size + len] = 0;
897: return(ret);
898: }
899:
1.50 daniel 900: /**
901: * xmlStrcat:
902: * @first: the original CHAR * array
903: * @add: the CHAR * array added
904: *
905: * a strcat for array of CHAR's
906: * return values: a new CHAR * containing the concatenated string.
1.45 daniel 907: */
908:
1.55 daniel 909: CHAR *
910: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 911: const CHAR *p = add;
912:
913: if (add == NULL) return(cur);
914: if (cur == NULL)
915: return(xmlStrdup(add));
916:
917: while (IS_CHAR(*p)) p++;
918: return(xmlStrncat(cur, add, p - add));
919: }
920:
921: /************************************************************************
922: * *
923: * Commodity functions, cleanup needed ? *
924: * *
925: ************************************************************************/
926:
1.50 daniel 927: /**
928: * areBlanks:
929: * @ctxt: an XML parser context
930: * @str: a CHAR *
931: * @len: the size of @str
932: *
1.45 daniel 933: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 934: *
935: * TODO: to be corrected accodingly to DTD information if available
936: * return values: 1 if ignorable 0 otherwise.
1.45 daniel 937: */
938:
939: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
940: int i;
941: xmlNodePtr lastChild;
942:
943: for (i = 0;i < len;i++)
944: if (!(IS_BLANK(str[i]))) return(0);
945:
946: if (CUR != '<') return(0);
947: lastChild = xmlGetLastChild(ctxt->node);
948: if (lastChild == NULL) {
949: if (ctxt->node->content != NULL) return(0);
950: } else if (xmlNodeIsText(lastChild))
951: return(0);
952: return(1);
953: }
954:
1.50 daniel 955: /**
956: * xmlHandleEntity:
957: * @ctxt: an XML parser context
958: * @entity: an XML entity pointer.
959: *
960: * Default handling of defined entities, when should we define a new input
1.45 daniel 961: * stream ? When do we just handle that as a set of chars ?
1.50 daniel 962: * TODO: we should call the SAX handler here and have it resolve the issue
1.45 daniel 963: */
964:
1.55 daniel 965: void
966: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 967: int len;
1.50 daniel 968: xmlParserInputPtr input;
1.45 daniel 969:
970: if (entity->content == NULL) {
1.55 daniel 971: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
972: ctxt->sax->error(ctxt, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 973: entity->name);
1.59 daniel 974: ctxt->wellFormed = 0;
1.45 daniel 975: return;
976: }
977: len = xmlStrlen(entity->content);
978: if (len <= 2) goto handle_as_char;
979:
980: /*
981: * Redefine its content as an input stream.
982: */
1.50 daniel 983: input = xmlNewEntityInputStream(ctxt, entity);
984: xmlPushInput(ctxt, input);
1.45 daniel 985: return;
986:
987: handle_as_char:
988: /*
989: * Just handle the content as a set of chars.
990: */
991: if (ctxt->sax != NULL)
992: ctxt->sax->characters(ctxt, entity->content, 0, len);
993:
994: }
995:
996: /*
997: * Forward definition for recusive behaviour.
998: */
999: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.50 daniel 1000: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt);
1001: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1002:
1.28 daniel 1003: /************************************************************************
1004: * *
1005: * Extra stuff for namespace support *
1006: * Relates to http://www.w3.org/TR/WD-xml-names *
1007: * *
1008: ************************************************************************/
1009:
1.50 daniel 1010: /**
1011: * xmlNamespaceParseNCName:
1012: * @ctxt: an XML parser context
1013: *
1014: * parse an XML namespace name.
1.28 daniel 1015: *
1016: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1017: *
1018: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1019: * CombiningChar | Extender
1.50 daniel 1020: * return values: the namespace name or NULL
1.28 daniel 1021: */
1022:
1.55 daniel 1023: CHAR *
1024: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.28 daniel 1025: const CHAR *q;
1026: CHAR *ret = NULL;
1027:
1.40 daniel 1028: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1029: q = NEXT;
1.28 daniel 1030:
1.40 daniel 1031: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1032: (CUR == '.') || (CUR == '-') ||
1033: (CUR == '_') ||
1034: (IS_COMBINING(CUR)) ||
1035: (IS_EXTENDER(CUR)))
1036: NEXT;
1.28 daniel 1037:
1.40 daniel 1038: ret = xmlStrndup(q, CUR_PTR - q);
1.28 daniel 1039:
1040: return(ret);
1041: }
1042:
1.50 daniel 1043: /**
1044: * xmlNamespaceParseQName:
1045: * @ctxt: an XML parser context
1046: * @prefix: a CHAR **
1047: *
1048: * parse an XML qualified name
1.28 daniel 1049: *
1050: * [NS 5] QName ::= (Prefix ':')? LocalPart
1051: *
1052: * [NS 6] Prefix ::= NCName
1053: *
1054: * [NS 7] LocalPart ::= NCName
1.50 daniel 1055: * return values: the function returns the local part, and prefix is updated
1056: * to get the Prefix if any.
1.28 daniel 1057: */
1058:
1.55 daniel 1059: CHAR *
1060: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1061: CHAR *ret = NULL;
1062:
1063: *prefix = NULL;
1064: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1065: if (CUR == ':') {
1.28 daniel 1066: *prefix = ret;
1.40 daniel 1067: NEXT;
1.28 daniel 1068: ret = xmlNamespaceParseNCName(ctxt);
1069: }
1070:
1071: return(ret);
1072: }
1073:
1.50 daniel 1074: /**
1075: * xmlNamespaceParseNSDef:
1076: * @ctxt: an XML parser context
1077: *
1078: * parse a namespace prefix declaration
1.28 daniel 1079: *
1080: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1081: *
1082: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.50 daniel 1083: * return values: the namespace name
1.28 daniel 1084: */
1085:
1.55 daniel 1086: CHAR *
1087: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1088: CHAR *name = NULL;
1089:
1.40 daniel 1090: if ((CUR == 'x') && (NXT(1) == 'm') &&
1091: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1092: (NXT(4) == 's')) {
1093: SKIP(5);
1094: if (CUR == ':') {
1095: NEXT;
1.28 daniel 1096: name = xmlNamespaceParseNCName(ctxt);
1097: }
1098: }
1.39 daniel 1099: return(name);
1.28 daniel 1100: }
1101:
1.50 daniel 1102: /**
1103: * xmlParseQuotedString:
1104: * @ctxt: an XML parser context
1105: *
1.45 daniel 1106: * [OLD] Parse and return a string between quotes or doublequotes
1.50 daniel 1107: * return values: the string parser or NULL.
1.45 daniel 1108: */
1.55 daniel 1109: CHAR *
1110: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1111: CHAR *ret = NULL;
1112: const CHAR *q;
1113:
1114: if (CUR == '"') {
1115: NEXT;
1116: q = CUR_PTR;
1117: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1118: if (CUR != '"') {
1119: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 1120: ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
1.59 daniel 1121: ctxt->wellFormed = 0;
1.55 daniel 1122: } else {
1.45 daniel 1123: ret = xmlStrndup(q, CUR_PTR - q);
1124: NEXT;
1125: }
1126: } else if (CUR == '\''){
1127: NEXT;
1128: q = CUR_PTR;
1129: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1130: if (CUR != '\'') {
1131: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 1132: ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
1.59 daniel 1133: ctxt->wellFormed = 0;
1.55 daniel 1134: } else {
1.45 daniel 1135: ret = xmlStrndup(q, CUR_PTR - q);
1136: NEXT;
1137: }
1138: }
1139: return(ret);
1140: }
1141:
1.50 daniel 1142: /**
1143: * xmlParseNamespace:
1144: * @ctxt: an XML parser context
1145: *
1.45 daniel 1146: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1147: *
1148: * This is what the older xml-name Working Draft specified, a bunch of
1149: * other stuff may still rely on it, so support is still here as
1150: * if ot was declared on the root of the Tree:-(
1151: */
1152:
1.55 daniel 1153: void
1154: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 1155: CHAR *href = NULL;
1156: CHAR *prefix = NULL;
1157: int garbage = 0;
1158:
1159: /*
1160: * We just skipped "namespace" or "xml:namespace"
1161: */
1162: SKIP_BLANKS;
1163:
1164: while (IS_CHAR(CUR) && (CUR != '>')) {
1165: /*
1166: * We can have "ns" or "prefix" attributes
1167: * Old encoding as 'href' or 'AS' attributes is still supported
1168: */
1169: if ((CUR == 'n') && (NXT(1) == 's')) {
1170: garbage = 0;
1171: SKIP(2);
1172: SKIP_BLANKS;
1173:
1174: if (CUR != '=') continue;
1175: NEXT;
1176: SKIP_BLANKS;
1177:
1178: href = xmlParseQuotedString(ctxt);
1179: SKIP_BLANKS;
1180: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1181: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1182: garbage = 0;
1183: SKIP(4);
1184: SKIP_BLANKS;
1185:
1186: if (CUR != '=') continue;
1187: NEXT;
1188: SKIP_BLANKS;
1189:
1190: href = xmlParseQuotedString(ctxt);
1191: SKIP_BLANKS;
1192: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1193: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1194: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1195: garbage = 0;
1196: SKIP(6);
1197: SKIP_BLANKS;
1198:
1199: if (CUR != '=') continue;
1200: NEXT;
1201: SKIP_BLANKS;
1202:
1203: prefix = xmlParseQuotedString(ctxt);
1204: SKIP_BLANKS;
1205: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1206: garbage = 0;
1207: SKIP(2);
1208: SKIP_BLANKS;
1209:
1210: if (CUR != '=') continue;
1211: NEXT;
1212: SKIP_BLANKS;
1213:
1214: prefix = xmlParseQuotedString(ctxt);
1215: SKIP_BLANKS;
1216: } else if ((CUR == '?') && (NXT(1) == '>')) {
1217: garbage = 0;
1218: CUR_PTR ++;
1219: } else {
1220: /*
1221: * Found garbage when parsing the namespace
1222: */
1223: if (!garbage)
1.55 daniel 1224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1225: ctxt->sax->error(ctxt, "xmlParseNamespace found garbage\n");
1.59 daniel 1226: ctxt->wellFormed = 0;
1.45 daniel 1227: NEXT;
1228: }
1229: }
1230:
1231: MOVETO_ENDTAG(CUR_PTR);
1232: NEXT;
1233:
1234: /*
1235: * Register the DTD.
1236: */
1237: if (href != NULL)
1238: xmlNewGlobalNs(ctxt->doc, href, prefix);
1239:
1240: if (prefix != NULL) free(prefix);
1241: if (href != NULL) free(href);
1242: }
1243:
1.28 daniel 1244: /************************************************************************
1245: * *
1246: * The parser itself *
1247: * Relates to http://www.w3.org/TR/REC-xml *
1248: * *
1249: ************************************************************************/
1.14 veillard 1250:
1.50 daniel 1251: /**
1252: * xmlParseName:
1253: * @ctxt: an XML parser context
1254: *
1255: * parse an XML name.
1.22 daniel 1256: *
1257: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1258: * CombiningChar | Extender
1259: *
1260: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1261: *
1262: * [6] Names ::= Name (S Name)*
1.50 daniel 1263: * return values: the Name parsed or NULL
1.1 veillard 1264: */
1265:
1.55 daniel 1266: CHAR *
1267: xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 1268: const CHAR *q;
1269: CHAR *ret = NULL;
1.1 veillard 1270:
1.40 daniel 1271: if (!IS_LETTER(CUR) && (CUR != '_') &&
1272: (CUR != ':')) return(NULL);
1273: q = NEXT;
1274:
1275: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1276: (CUR == '.') || (CUR == '-') ||
1277: (CUR == '_') || (CUR == ':') ||
1278: (IS_COMBINING(CUR)) ||
1279: (IS_EXTENDER(CUR)))
1280: NEXT;
1.22 daniel 1281:
1.40 daniel 1282: ret = xmlStrndup(q, CUR_PTR - q);
1.22 daniel 1283:
1284: return(ret);
1285: }
1286:
1.50 daniel 1287: /**
1288: * xmlParseNmtoken:
1289: * @ctxt: an XML parser context
1290: *
1291: * parse an XML Nmtoken.
1.22 daniel 1292: *
1293: * [7] Nmtoken ::= (NameChar)+
1294: *
1295: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.50 daniel 1296: * return values: the Nmtoken parsed or NULL
1.22 daniel 1297: */
1298:
1.55 daniel 1299: CHAR *
1300: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.22 daniel 1301: const CHAR *q;
1302: CHAR *ret = NULL;
1303:
1.40 daniel 1304: q = NEXT;
1.22 daniel 1305:
1.40 daniel 1306: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1307: (CUR == '.') || (CUR == '-') ||
1308: (CUR == '_') || (CUR == ':') ||
1309: (IS_COMBINING(CUR)) ||
1310: (IS_EXTENDER(CUR)))
1311: NEXT;
1.3 veillard 1312:
1.40 daniel 1313: ret = xmlStrndup(q, CUR_PTR - q);
1.1 veillard 1314:
1.3 veillard 1315: return(ret);
1.1 veillard 1316: }
1317:
1.50 daniel 1318: /**
1319: * xmlParseEntityValue:
1320: * @ctxt: an XML parser context
1321: *
1322: * parse a value for ENTITY decl.
1.24 daniel 1323: *
1324: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1325: * "'" ([^%&'] | PEReference | Reference)* "'"
1.50 daniel 1326: * return values: the EntityValue parsed or NULL
1.24 daniel 1327: */
1328:
1.55 daniel 1329: CHAR *
1330: xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1331: CHAR *ret = NULL, *cur;
1.24 daniel 1332: const CHAR *q;
1333:
1.40 daniel 1334: if (CUR == '"') {
1335: NEXT;
1.24 daniel 1336:
1.40 daniel 1337: q = CUR_PTR;
1338: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1339: if (CUR == '%') {
1.46 daniel 1340: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1341: cur = xmlParsePEReference(ctxt);
1.46 daniel 1342: ret = xmlStrcat(ret, cur);
1343: q = CUR_PTR;
1.40 daniel 1344: } else if (CUR == '&') {
1.46 daniel 1345: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1346: cur = xmlParseReference(ctxt);
1347: if (cur != NULL) {
1348: CHAR buf[2];
1349: buf[0] = '&';
1350: buf[1] = 0;
1351: ret = xmlStrncat(ret, buf, 1);
1352: ret = xmlStrcat(ret, cur);
1353: buf[0] = ';';
1354: buf[1] = 0;
1355: ret = xmlStrncat(ret, buf, 1);
1356: }
1.46 daniel 1357: q = CUR_PTR;
1.24 daniel 1358: } else
1.40 daniel 1359: NEXT;
1.24 daniel 1360: }
1.40 daniel 1361: if (!IS_CHAR(CUR)) {
1.55 daniel 1362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1363: ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
1.59 daniel 1364: ctxt->wellFormed = 0;
1.24 daniel 1365: } else {
1.46 daniel 1366: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1367: NEXT;
1.24 daniel 1368: }
1.40 daniel 1369: } else if (CUR == '\'') {
1370: NEXT;
1371: q = CUR_PTR;
1372: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1373: if (CUR == '%') {
1.46 daniel 1374: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1375: cur = xmlParsePEReference(ctxt);
1.46 daniel 1376: ret = xmlStrcat(ret, cur);
1377: q = CUR_PTR;
1.40 daniel 1378: } else if (CUR == '&') {
1.46 daniel 1379: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1380: cur = xmlParseReference(ctxt);
1381: if (cur != NULL) {
1382: CHAR buf[2];
1383: buf[0] = '&';
1384: buf[1] = 0;
1385: ret = xmlStrncat(ret, buf, 1);
1386: ret = xmlStrcat(ret, cur);
1387: buf[0] = ';';
1388: buf[1] = 0;
1389: ret = xmlStrncat(ret, buf, 1);
1390: }
1.46 daniel 1391: q = CUR_PTR;
1.24 daniel 1392: } else
1.40 daniel 1393: NEXT;
1.24 daniel 1394: }
1.40 daniel 1395: if (!IS_CHAR(CUR)) {
1.55 daniel 1396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1397: ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
1.59 daniel 1398: ctxt->wellFormed = 0;
1.24 daniel 1399: } else {
1.46 daniel 1400: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1401: NEXT;
1.24 daniel 1402: }
1403: } else {
1.55 daniel 1404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1405: ctxt->sax->error(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.59 daniel 1406: ctxt->wellFormed = 0;
1.24 daniel 1407: }
1408:
1409: return(ret);
1410: }
1411:
1.50 daniel 1412: /**
1413: * xmlParseAttValue:
1414: * @ctxt: an XML parser context
1415: *
1416: * parse a value for an attribute
1.29 daniel 1417: *
1418: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1419: * "'" ([^<&'] | Reference)* "'"
1.50 daniel 1420: * return values: the AttValue parsed or NULL.
1.29 daniel 1421: */
1422:
1.55 daniel 1423: CHAR *
1424: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1425: CHAR *ret = NULL, *cur;
1.29 daniel 1426: const CHAR *q;
1427:
1.40 daniel 1428: if (CUR == '"') {
1429: NEXT;
1.29 daniel 1430:
1.40 daniel 1431: q = CUR_PTR;
1432: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1.59 daniel 1433: if (CUR == '<') {
1434: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1435: ctxt->sax->error(ctxt,
1436: "Unescaped '<' not allowed in attributes values\n");
1437: ctxt->wellFormed = 0;
1438: }
1.40 daniel 1439: if (CUR == '&') {
1.46 daniel 1440: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1441: cur = xmlParseReference(ctxt);
1442: if (cur != NULL) {
1443: /*
1444: * Special case for '&', we don't want to
1445: * resolve it here since it will break later
1446: * when searching entities in the string.
1447: */
1448: if ((cur[0] == '&') && (cur[1] == 0)) {
1449: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1450: ret = xmlStrncat(ret, buf, 5);
1451: } else
1452: ret = xmlStrcat(ret, cur);
1453: free(cur);
1454: }
1.46 daniel 1455: q = CUR_PTR;
1.29 daniel 1456: } else
1.40 daniel 1457: NEXT;
1.50 daniel 1458: /*
1459: * Pop out finished entity references.
1460: */
1461: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1462: if (CUR_PTR != q)
1463: ret = xmlStrncat(ret, q, CUR_PTR - q);
1464: xmlPopInput(ctxt);
1465: q = CUR_PTR;
1466: }
1.29 daniel 1467: }
1.40 daniel 1468: if (!IS_CHAR(CUR)) {
1.55 daniel 1469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1470: ctxt->sax->error(ctxt, "Unfinished AttValue\n");
1.59 daniel 1471: ctxt->wellFormed = 0;
1.29 daniel 1472: } else {
1.46 daniel 1473: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1474: NEXT;
1.29 daniel 1475: }
1.40 daniel 1476: } else if (CUR == '\'') {
1477: NEXT;
1478: q = CUR_PTR;
1479: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1.59 daniel 1480: if (CUR == '<') {
1481: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1482: ctxt->sax->error(ctxt,
1483: "Unescaped '<' not allowed in attributes values\n");
1484: ctxt->wellFormed = 0;
1485: }
1.40 daniel 1486: if (CUR == '&') {
1.46 daniel 1487: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1488: cur = xmlParseReference(ctxt);
1489: if (cur != NULL) {
1490: /*
1491: * Special case for '&', we don't want to
1492: * resolve it here since it will break later
1493: * when searching entities in the string.
1494: */
1495: if ((cur[0] == '&') && (cur[1] == 0)) {
1496: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1497: ret = xmlStrncat(ret, buf, 5);
1498: } else
1499: ret = xmlStrcat(ret, cur);
1500: free(cur);
1501: }
1.46 daniel 1502: q = CUR_PTR;
1.29 daniel 1503: } else
1.40 daniel 1504: NEXT;
1.50 daniel 1505: /*
1506: * Pop out finished entity references.
1507: */
1508: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1509: if (CUR_PTR != q)
1510: ret = xmlStrncat(ret, q, CUR_PTR - q);
1511: xmlPopInput(ctxt);
1512: q = CUR_PTR;
1513: }
1.29 daniel 1514: }
1.40 daniel 1515: if (!IS_CHAR(CUR)) {
1.55 daniel 1516: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1517: ctxt->sax->error(ctxt, "Unfinished AttValue\n");
1.59 daniel 1518: ctxt->wellFormed = 0;
1.29 daniel 1519: } else {
1.46 daniel 1520: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1521: NEXT;
1.29 daniel 1522: }
1523: } else {
1.55 daniel 1524: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1525: ctxt->sax->error(ctxt, "AttValue: \" or ' expected\n");
1.59 daniel 1526: ctxt->wellFormed = 0;
1.29 daniel 1527: }
1528:
1529: return(ret);
1530: }
1531:
1.50 daniel 1532: /**
1533: * xmlParseSystemLiteral:
1534: * @ctxt: an XML parser context
1535: *
1536: * parse an XML Literal
1.21 daniel 1537: *
1.22 daniel 1538: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.50 daniel 1539: * return values: the SystemLiteral parsed or NULL
1.21 daniel 1540: */
1541:
1.55 daniel 1542: CHAR *
1543: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1544: const CHAR *q;
1545: CHAR *ret = NULL;
1546:
1.40 daniel 1547: if (CUR == '"') {
1548: NEXT;
1549: q = CUR_PTR;
1550: while ((IS_CHAR(CUR)) && (CUR != '"'))
1551: NEXT;
1552: if (!IS_CHAR(CUR)) {
1.55 daniel 1553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1554: ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
1.59 daniel 1555: ctxt->wellFormed = 0;
1.21 daniel 1556: } else {
1.40 daniel 1557: ret = xmlStrndup(q, CUR_PTR - q);
1558: NEXT;
1.21 daniel 1559: }
1.40 daniel 1560: } else if (CUR == '\'') {
1561: NEXT;
1562: q = CUR_PTR;
1563: while ((IS_CHAR(CUR)) && (CUR != '\''))
1564: NEXT;
1565: if (!IS_CHAR(CUR)) {
1.55 daniel 1566: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1567: ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
1.59 daniel 1568: ctxt->wellFormed = 0;
1.21 daniel 1569: } else {
1.40 daniel 1570: ret = xmlStrndup(q, CUR_PTR - q);
1571: NEXT;
1.21 daniel 1572: }
1573: } else {
1.55 daniel 1574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1575: ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
1.59 daniel 1576: ctxt->wellFormed = 0;
1.21 daniel 1577: }
1578:
1579: return(ret);
1580: }
1581:
1.50 daniel 1582: /**
1583: * xmlParsePubidLiteral:
1584: * @ctxt: an XML parser context
1.21 daniel 1585: *
1.50 daniel 1586: * parse an XML public literal
1587: * return values: the PubidLiteral parsed or NULL.
1.21 daniel 1588: */
1589:
1.55 daniel 1590: CHAR *
1591: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1592: const CHAR *q;
1593: CHAR *ret = NULL;
1594: /*
1595: * Name ::= (Letter | '_') (NameChar)*
1596: */
1.40 daniel 1597: if (CUR == '"') {
1598: NEXT;
1599: q = CUR_PTR;
1600: while (IS_PUBIDCHAR(CUR)) NEXT;
1601: if (CUR != '"') {
1.55 daniel 1602: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603: ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
1.59 daniel 1604: ctxt->wellFormed = 0;
1.21 daniel 1605: } else {
1.40 daniel 1606: ret = xmlStrndup(q, CUR_PTR - q);
1607: NEXT;
1.21 daniel 1608: }
1.40 daniel 1609: } else if (CUR == '\'') {
1610: NEXT;
1611: q = CUR_PTR;
1612: while ((IS_LETTER(CUR)) && (CUR != '\''))
1613: NEXT;
1614: if (!IS_LETTER(CUR)) {
1.55 daniel 1615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1616: ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
1.59 daniel 1617: ctxt->wellFormed = 0;
1.21 daniel 1618: } else {
1.40 daniel 1619: ret = xmlStrndup(q, CUR_PTR - q);
1620: NEXT;
1.21 daniel 1621: }
1622: } else {
1.55 daniel 1623: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1624: ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
1.59 daniel 1625: ctxt->wellFormed = 0;
1.21 daniel 1626: }
1627:
1628: return(ret);
1629: }
1630:
1.50 daniel 1631: /**
1632: * xmlParseCharData:
1633: * @ctxt: an XML parser context
1634: * @cdata: int indicating whether we are within a CDATA section
1635: *
1636: * parse a CharData section.
1637: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 1638: *
1639: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1.50 daniel 1640: * return values:
1.27 daniel 1641: */
1642:
1.55 daniel 1643: void
1644: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.27 daniel 1645: const CHAR *q;
1646:
1.40 daniel 1647: q = CUR_PTR;
1648: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1649: (CUR != '&')) {
1.59 daniel 1650: if ((CUR == ']') && (NXT(1) == ']') &&
1651: (NXT(2) == '>')) {
1652: if (cdata) break;
1653: else {
1654: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655: ctxt->sax->error(ctxt,
1656: "Sequence ']]>' not allowed in content\n");
1657: ctxt->wellFormed = 0;
1658: }
1659: }
1.40 daniel 1660: NEXT;
1.27 daniel 1661: }
1.45 daniel 1662: if (q == CUR_PTR) return;
1663:
1664: /*
1665: * Ok the segment [q CUR_PTR] is to be consumed as chars.
1666: */
1667: if (ctxt->sax != NULL) {
1668: if (areBlanks(ctxt, q, CUR_PTR - q))
1669: ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
1670: else
1671: ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
1672: }
1.27 daniel 1673: }
1674:
1.50 daniel 1675: /**
1676: * xmlParseExternalID:
1677: * @ctxt: an XML parser context
1678: * @publicID: a CHAR** receiving PubidLiteral
1679: *
1680: * Parse an External ID
1.22 daniel 1681: *
1682: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1683: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.50 daniel 1684: * return values: the function returns SystemLiteral and in the second
1685: * case publicID receives PubidLiteral
1.22 daniel 1686: */
1687:
1.55 daniel 1688: CHAR *
1689: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
1.39 daniel 1690: CHAR *URI = NULL;
1.22 daniel 1691:
1.40 daniel 1692: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1693: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1694: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1695: SKIP(6);
1.59 daniel 1696: if (!IS_BLANK(CUR)) {
1697: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1698: ctxt->sax->error(ctxt,
1699: "Space required after 'SYSTEM'\n");
1700: ctxt->wellFormed = 0;
1701: }
1.42 daniel 1702: SKIP_BLANKS;
1.39 daniel 1703: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 1704: if (URI == NULL) {
1.55 daniel 1705: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1706: ctxt->sax->error(ctxt,
1.39 daniel 1707: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 1708: ctxt->wellFormed = 0;
1709: }
1.40 daniel 1710: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1711: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1712: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1713: SKIP(6);
1.59 daniel 1714: if (!IS_BLANK(CUR)) {
1715: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1716: ctxt->sax->error(ctxt,
1717: "Space required after 'PUBLIC'\n");
1718: ctxt->wellFormed = 0;
1719: }
1.42 daniel 1720: SKIP_BLANKS;
1.39 daniel 1721: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 1722: if (*publicID == NULL) {
1.55 daniel 1723: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1724: ctxt->sax->error(ctxt,
1.39 daniel 1725: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 1726: ctxt->wellFormed = 0;
1727: }
1728: if (!IS_BLANK(CUR)) {
1729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1730: ctxt->sax->error(ctxt,
1731: "Space required after the Public Identifier\n");
1732: ctxt->wellFormed = 0;
1733: }
1.42 daniel 1734: SKIP_BLANKS;
1.39 daniel 1735: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 1736: if (URI == NULL) {
1.55 daniel 1737: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1738: ctxt->sax->error(ctxt,
1.39 daniel 1739: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 1740: ctxt->wellFormed = 0;
1741: }
1.22 daniel 1742: }
1.39 daniel 1743: return(URI);
1.22 daniel 1744: }
1745:
1.50 daniel 1746: /**
1747: * xmlParseComment:
1748: * @create: should we create a node
1749: *
1.3 veillard 1750: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1751: * This may or may not create a node (depending on the context)
1.38 daniel 1752: * The spec says that "For compatibility, the string "--" (double-hyphen)
1753: * must not occur within comments. "
1.22 daniel 1754: *
1755: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.50 daniel 1756: *
1757: * TODO: this should call a SAX function which will handle (or not) the
1758: * creation of the comment !
1759: * return values:
1.3 veillard 1760: */
1.31 daniel 1761: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1762: xmlNodePtr ret = NULL;
1.17 daniel 1763: const CHAR *q, *start;
1764: const CHAR *r;
1.39 daniel 1765: CHAR *val;
1.3 veillard 1766:
1767: /*
1.22 daniel 1768: * Check that there is a comment right here.
1.3 veillard 1769: */
1.40 daniel 1770: if ((CUR != '<') || (NXT(1) != '!') ||
1771: (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1.3 veillard 1772:
1.40 daniel 1773: SKIP(4);
1774: start = q = CUR_PTR;
1775: NEXT;
1776: r = CUR_PTR;
1777: NEXT;
1778: while (IS_CHAR(CUR) &&
1779: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 1780: (*r != '-') || (*q != '-'))) {
1.59 daniel 1781: if ((*r == '-') && (*q == '-')) {
1.55 daniel 1782: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1783: ctxt->sax->error(ctxt,
1.38 daniel 1784: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 1785: ctxt->wellFormed = 0;
1786: }
1.40 daniel 1787: NEXT;r++;q++;
1.3 veillard 1788: }
1.40 daniel 1789: if (!IS_CHAR(CUR)) {
1.55 daniel 1790: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1791: ctxt->sax->error(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.59 daniel 1792: ctxt->wellFormed = 0;
1.3 veillard 1793: } else {
1.40 daniel 1794: NEXT;
1.31 daniel 1795: if (create) {
1.39 daniel 1796: val = xmlStrndup(start, q - start);
1.50 daniel 1797: ret = xmlNewDocComment(ctxt->doc, val);
1.39 daniel 1798: free(val);
1.31 daniel 1799: }
1.3 veillard 1800: }
1.39 daniel 1801: return(ret);
1.3 veillard 1802: }
1803:
1.50 daniel 1804: /**
1805: * xmlParsePITarget:
1806: * @ctxt: an XML parser context
1807: *
1808: * parse the name of a PI
1.22 daniel 1809: *
1810: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.50 daniel 1811: * return values: the PITarget name or NULL
1.22 daniel 1812: */
1813:
1.55 daniel 1814: CHAR *
1815: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 1816: CHAR *name;
1817:
1818: name = xmlParseName(ctxt);
1819: if ((name != NULL) && (name[3] == 0) &&
1820: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1821: ((name[1] == 'm') || (name[1] == 'M')) &&
1822: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 1823: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1824: ctxt->sax->error(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1825: return(NULL);
1826: }
1827: return(name);
1828: }
1829:
1.50 daniel 1830: /**
1831: * xmlParsePI:
1832: * @ctxt: an XML parser context
1833: *
1834: * parse an XML Processing Instruction.
1.22 daniel 1835: *
1836: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.50 daniel 1837: * return values: the PI name or NULL
1.3 veillard 1838: */
1839:
1.55 daniel 1840: void
1841: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1842: CHAR *target;
1843:
1.40 daniel 1844: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 1845: /*
1846: * this is a Processing Instruction.
1847: */
1.40 daniel 1848: SKIP(2);
1.3 veillard 1849:
1850: /*
1.22 daniel 1851: * Parse the target name and check for special support like
1852: * namespace.
1853: *
1854: * TODO : PI handling should be dynamically redefinable using an
1855: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1856: */
1.22 daniel 1857: target = xmlParsePITarget(ctxt);
1858: if (target != NULL) {
1859: /*
1.44 daniel 1860: * Support for the old Processing Instruction related to namespace.
1.22 daniel 1861: */
1862: if ((target[0] == 'n') && (target[1] == 'a') &&
1863: (target[2] == 'm') && (target[3] == 'e') &&
1864: (target[4] == 's') && (target[5] == 'p') &&
1865: (target[6] == 'a') && (target[7] == 'c') &&
1866: (target[8] == 'e')) {
1867: xmlParseNamespace(ctxt);
1868: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1869: (target[2] == 'l') && (target[3] == ':') &&
1870: (target[4] == 'n') && (target[5] == 'a') &&
1871: (target[6] == 'm') && (target[7] == 'e') &&
1872: (target[8] == 's') && (target[9] == 'p') &&
1873: (target[10] == 'a') && (target[11] == 'c') &&
1874: (target[12] == 'e')) {
1875: xmlParseNamespace(ctxt);
1876: } else {
1.44 daniel 1877: const CHAR *q = CUR_PTR;
1878:
1.40 daniel 1879: while (IS_CHAR(CUR) &&
1880: ((CUR != '?') || (NXT(1) != '>')))
1881: NEXT;
1882: if (!IS_CHAR(CUR)) {
1.55 daniel 1883: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 1884: ctxt->sax->error(ctxt,
1885: "xmlParsePI: PI %s never end ...\n", target);
1886: ctxt->wellFormed = 0;
1.44 daniel 1887: } else {
1888: CHAR *data;
1889:
1890: data = xmlStrndup(CUR_PTR, CUR_PTR - q);
1.40 daniel 1891: SKIP(2);
1.44 daniel 1892:
1893: /*
1894: * SAX: PI detected.
1895: */
1896: if (ctxt->sax)
1897: ctxt->sax->processingInstruction(ctxt, target, data);
1898: /*
1899: * Unknown PI, ignore it !
1900: */
1901: else
1902: xmlParserWarning(ctxt,
1903: "xmlParsePI : skipping unknown PI %s\n",
1904: target);
1905: free(data);
1906: }
1.22 daniel 1907: }
1.39 daniel 1908: free(target);
1.3 veillard 1909: } else {
1.55 daniel 1910: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1911: ctxt->sax->error(ctxt, "xmlParsePI : no target name\n");
1.59 daniel 1912: ctxt->wellFormed = 0;
1913:
1.22 daniel 1914: /********* Should we try to complete parsing the PI ???
1.40 daniel 1915: while (IS_CHAR(CUR) &&
1916: (CUR != '?') && (CUR != '>'))
1917: NEXT;
1918: if (!IS_CHAR(CUR)) {
1.22 daniel 1919: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1920: target);
1921: }
1922: ********************************************************/
1923: }
1924: }
1925: }
1926:
1.50 daniel 1927: /**
1928: * xmlParseNotationDecl:
1929: * @ctxt: an XML parser context
1930: *
1931: * parse a notation declaration
1.22 daniel 1932: *
1933: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1934: *
1935: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1936: *
1937: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1938: * 'PUBLIC' S PubidLiteral S SystemLiteral
1939: *
1940: * Hence there is actually 3 choices:
1941: * 'PUBLIC' S PubidLiteral
1942: * 'PUBLIC' S PubidLiteral S SystemLiteral
1943: * and 'SYSTEM' S SystemLiteral
1.50 daniel 1944: *
1945: * TODO: no handling of the values parsed !
1.22 daniel 1946: */
1947:
1.55 daniel 1948: void
1949: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 1950: CHAR *name;
1951:
1.40 daniel 1952: if ((CUR == '<') && (NXT(1) == '!') &&
1953: (NXT(2) == 'N') && (NXT(3) == 'O') &&
1954: (NXT(4) == 'T') && (NXT(5) == 'A') &&
1955: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1956: (NXT(8) == 'O') && (NXT(9) == 'N') &&
1957: (IS_BLANK(NXT(10)))) {
1958: SKIP(10);
1.42 daniel 1959: SKIP_BLANKS;
1.22 daniel 1960:
1961: name = xmlParseName(ctxt);
1962: if (name == NULL) {
1.55 daniel 1963: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1964: ctxt->sax->error(ctxt,
1.31 daniel 1965: "xmlParseAttributeListDecl: no name for Element\n");
1.59 daniel 1966: ctxt->wellFormed = 0;
1.22 daniel 1967: return;
1968: }
1.42 daniel 1969: SKIP_BLANKS;
1.22 daniel 1970: /*
1.31 daniel 1971: * TODO !!!
1.22 daniel 1972: */
1.40 daniel 1973: while ((IS_CHAR(CUR)) && (CUR != '>'))
1974: NEXT;
1.22 daniel 1975: free(name);
1976: }
1977: }
1978:
1.50 daniel 1979: /**
1980: * xmlParseEntityDecl:
1981: * @ctxt: an XML parser context
1982: *
1983: * parse <!ENTITY declarations
1.22 daniel 1984: *
1985: * [70] EntityDecl ::= GEDecl | PEDecl
1986: *
1987: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1988: *
1989: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1990: *
1991: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1992: *
1993: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1994: *
1995: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1996: */
1997:
1.55 daniel 1998: void
1999: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 2000: CHAR *name = NULL;
1.24 daniel 2001: CHAR *value = NULL;
1.39 daniel 2002: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 2003: CHAR *ndata = NULL;
1.39 daniel 2004: int isParameter = 0;
1.22 daniel 2005:
1.40 daniel 2006: if ((CUR == '<') && (NXT(1) == '!') &&
2007: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2008: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2009: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.40 daniel 2010: SKIP(8);
1.59 daniel 2011: if (!IS_BLANK(CUR)) {
2012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013: ctxt->sax->error(ctxt, "Space required after '<!ENTITY'\n");
2014: ctxt->wellFormed = 0;
2015: }
2016: SKIP_BLANKS;
1.40 daniel 2017:
2018: if (CUR == '%') {
2019: NEXT;
1.59 daniel 2020: if (!IS_BLANK(CUR)) {
2021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2022: ctxt->sax->error(ctxt, "Space required after '%'\n");
2023: ctxt->wellFormed = 0;
2024: }
1.42 daniel 2025: SKIP_BLANKS;
1.39 daniel 2026: isParameter = 1;
1.22 daniel 2027: }
2028:
2029: name = xmlParseName(ctxt);
1.24 daniel 2030: if (name == NULL) {
1.55 daniel 2031: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2032: ctxt->sax->error(ctxt, "xmlParseEntityDecl: no name\n");
1.59 daniel 2033: ctxt->wellFormed = 0;
1.24 daniel 2034: return;
2035: }
1.59 daniel 2036: if (!IS_BLANK(CUR)) {
2037: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2038: ctxt->sax->error(ctxt,
2039: "Space required after the entity name\n");
2040: ctxt->wellFormed = 0;
2041: }
1.42 daniel 2042: SKIP_BLANKS;
1.24 daniel 2043:
1.22 daniel 2044: /*
1.24 daniel 2045: * TODO handle the various case of definitions...
1.22 daniel 2046: */
1.39 daniel 2047: if (isParameter) {
1.40 daniel 2048: if ((CUR == '"') || (CUR == '\''))
1.24 daniel 2049: value = xmlParseEntityValue(ctxt);
1.39 daniel 2050: if (value) {
2051: xmlAddDocEntity(ctxt->doc, name,
2052: XML_INTERNAL_PARAMETER_ENTITY,
2053: NULL, NULL, value);
2054: }
1.24 daniel 2055: else {
1.39 daniel 2056: URI = xmlParseExternalID(ctxt, &literal);
2057: if (URI) {
2058: xmlAddDocEntity(ctxt->doc, name,
2059: XML_EXTERNAL_PARAMETER_ENTITY,
2060: literal, URI, NULL);
2061: }
1.24 daniel 2062: }
2063: } else {
1.40 daniel 2064: if ((CUR == '"') || (CUR == '\'')) {
1.24 daniel 2065: value = xmlParseEntityValue(ctxt);
1.39 daniel 2066: xmlAddDocEntity(ctxt->doc, name,
2067: XML_INTERNAL_GENERAL_ENTITY,
2068: NULL, NULL, value);
2069: } else {
2070: URI = xmlParseExternalID(ctxt, &literal);
1.59 daniel 2071: if ((CUR != '>') && (!IS_BLANK(CUR))) {
2072: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2073: ctxt->sax->error(ctxt,
2074: "Space required before 'NDATA'\n");
2075: ctxt->wellFormed = 0;
2076: }
1.42 daniel 2077: SKIP_BLANKS;
1.40 daniel 2078: if ((CUR == 'N') && (NXT(1) == 'D') &&
2079: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2080: (NXT(4) == 'A')) {
2081: SKIP(5);
1.59 daniel 2082: if (!IS_BLANK(CUR)) {
2083: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2084: ctxt->sax->error(ctxt,
2085: "Space required after 'NDATA'\n");
2086: ctxt->wellFormed = 0;
2087: }
1.42 daniel 2088: SKIP_BLANKS;
1.24 daniel 2089: ndata = xmlParseName(ctxt);
1.39 daniel 2090: xmlAddDocEntity(ctxt->doc, name,
2091: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
2092: literal, URI, ndata);
2093: } else {
2094: xmlAddDocEntity(ctxt->doc, name,
2095: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
2096: literal, URI, NULL);
1.24 daniel 2097: }
2098: }
2099: }
1.42 daniel 2100: SKIP_BLANKS;
1.40 daniel 2101: if (CUR != '>') {
1.55 daniel 2102: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103: ctxt->sax->error(ctxt,
1.31 daniel 2104: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 2105: ctxt->wellFormed = 0;
1.24 daniel 2106: } else
1.40 daniel 2107: NEXT;
1.39 daniel 2108: if (name != NULL) free(name);
2109: if (value != NULL) free(value);
2110: if (URI != NULL) free(URI);
2111: if (literal != NULL) free(literal);
2112: if (ndata != NULL) free(ndata);
1.22 daniel 2113: }
2114: }
2115:
1.50 daniel 2116: /**
1.59 daniel 2117: * xmlParseDefaultDecl:
2118: * @ctxt: an XML parser context
2119: * @value: Receive a possible fixed default value for the attribute
2120: *
2121: * Parse an attribute default declaration
2122: *
2123: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
2124: *
2125: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
2126: * or XML_ATTRIBUTE_FIXED.
2127: */
2128:
2129: int
2130: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
2131: int val;
2132: CHAR *ret;
2133:
2134: *value = NULL;
2135: if ((CUR == '#') && (NXT(1) == 'R') &&
2136: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
2137: (NXT(4) == 'U') && (NXT(5) == 'I') &&
2138: (NXT(6) == 'R') && (NXT(7) == 'E') &&
2139: (NXT(8) == 'D')) {
2140: SKIP(9);
2141: return(XML_ATTRIBUTE_REQUIRED);
2142: }
2143: if ((CUR == '#') && (NXT(1) == 'I') &&
2144: (NXT(2) == 'M') && (NXT(3) == 'P') &&
2145: (NXT(4) == 'L') && (NXT(5) == 'I') &&
2146: (NXT(6) == 'E') && (NXT(7) == 'D')) {
2147: SKIP(8);
2148: return(XML_ATTRIBUTE_IMPLIED);
2149: }
2150: val = XML_ATTRIBUTE_NONE;
2151: if ((CUR == '#') && (NXT(1) == 'F') &&
2152: (NXT(2) == 'I') && (NXT(3) == 'X') &&
2153: (NXT(4) == 'E') && (NXT(5) == 'D')) {
2154: SKIP(6);
2155: val = XML_ATTRIBUTE_FIXED;
2156: if (!IS_BLANK(CUR)) {
2157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2158: ctxt->sax->error(ctxt, "Space required after '#FIXED'\n");
2159: ctxt->wellFormed = 0;
2160: }
2161: SKIP_BLANKS;
2162: }
2163: ret = xmlParseAttValue(ctxt);
2164: if (ret == NULL) {
2165: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2166: ctxt->sax->error(ctxt,
2167: "Attribute default value declaration error\n");
2168: ctxt->wellFormed = 0;
2169: } else
2170: *value = ret;
2171: return(val);
2172: }
2173:
2174: /**
1.50 daniel 2175: * xmlParseEnumeratedType:
2176: * @ctxt: an XML parser context
2177: * @name: ???
2178: * @:
2179: *
2180: * parse and Enumerated attribute type.
1.22 daniel 2181: *
2182: * [57] EnumeratedType ::= NotationType | Enumeration
2183: *
2184: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2185: *
2186: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1.50 daniel 2187: *
2188: * TODO: not implemented !!!
1.22 daniel 2189: */
2190:
1.55 daniel 2191: void
2192: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.22 daniel 2193: /*
2194: * TODO !!!
2195: */
1.59 daniel 2196: fprintf(stderr, "Production [57] EnumeratedType not yet supported\n");
1.40 daniel 2197: while ((IS_CHAR(CUR)) && (CUR != '>'))
2198: NEXT;
1.22 daniel 2199: }
2200:
1.50 daniel 2201: /**
2202: * xmlParseAttributeType:
2203: * @ctxt: an XML parser context
2204: * @name: ???
2205: *
1.59 daniel 2206: * parse the Attribute list def for an element
1.22 daniel 2207: *
2208: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
2209: *
2210: * [55] StringType ::= 'CDATA'
2211: *
2212: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
2213: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 2214: *
1.59 daniel 2215: * Returns: the attribute type
1.22 daniel 2216: */
1.59 daniel 2217: int
1.55 daniel 2218: xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.45 daniel 2219: /* TODO !!! */
1.40 daniel 2220: if ((CUR == 'C') && (NXT(1) == 'D') &&
2221: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2222: (NXT(4) == 'A')) {
2223: SKIP(5);
1.59 daniel 2224: return(XML_ATTRIBUTE_STRING);
1.40 daniel 2225: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
2226: SKIP(2);
1.59 daniel 2227: return(XML_ATTRIBUTE_ID);
1.40 daniel 2228: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2229: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2230: (NXT(4) == 'F')) {
2231: SKIP(5);
1.59 daniel 2232: return(XML_ATTRIBUTE_IDREF);
1.40 daniel 2233: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2234: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2235: (NXT(4) == 'F') && (NXT(5) == 'S')) {
2236: SKIP(6);
1.59 daniel 2237: return(XML_ATTRIBUTE_IDREFS);
1.40 daniel 2238: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2239: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2240: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
2241: SKIP(6);
1.59 daniel 2242: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 2243: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2244: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2245: (NXT(4) == 'T') && (NXT(5) == 'I') &&
2246: (NXT(6) == 'E') && (NXT(7) == 'S')) {
2247: SKIP(8);
1.59 daniel 2248: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 2249: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2250: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2251: (NXT(4) == 'K') && (NXT(5) == 'E') &&
2252: (NXT(6) == 'N')) {
2253: SKIP(7);
1.59 daniel 2254: return(XML_ATTRIBUTE_NMTOKEN);
1.40 daniel 2255: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2256: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2257: (NXT(4) == 'K') && (NXT(5) == 'E') &&
2258: (NXT(6) == 'N') && (NXT(7) == 'S')) {
1.59 daniel 2259: return(XML_ATTRIBUTE_NMTOKENS);
1.22 daniel 2260: }
1.59 daniel 2261: xmlParseEnumeratedType(ctxt, name);
2262: return(XML_ATTRIBUTE_ENUMERATED);
1.22 daniel 2263: }
2264:
1.50 daniel 2265: /**
2266: * xmlParseAttributeListDecl:
2267: * @ctxt: an XML parser context
2268: *
2269: * : parse the Attribute list def for an element
1.22 daniel 2270: *
2271: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2272: *
2273: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 2274: *
2275: * TODO: not implemented !!!
1.22 daniel 2276: */
1.55 daniel 2277: void
2278: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 daniel 2279: CHAR *elemName;
2280: CHAR *attrName;
1.22 daniel 2281:
1.45 daniel 2282: /* TODO !!! */
1.40 daniel 2283: if ((CUR == '<') && (NXT(1) == '!') &&
2284: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2285: (NXT(4) == 'T') && (NXT(5) == 'L') &&
2286: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 2287: (NXT(8) == 'T')) {
1.40 daniel 2288: SKIP(9);
1.59 daniel 2289: if (!IS_BLANK(CUR)) {
2290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2291: ctxt->sax->error(ctxt, "Space required after '<!ATTLIST'\n");
2292: ctxt->wellFormed = 0;
2293: }
1.42 daniel 2294: SKIP_BLANKS;
1.59 daniel 2295: elemName = xmlParseName(ctxt);
2296: if (elemName == NULL) {
1.55 daniel 2297: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 2298: ctxt->sax->error(ctxt, "ATTLIST: no name for Element\n");
2299: ctxt->wellFormed = 0;
1.22 daniel 2300: return;
2301: }
1.42 daniel 2302: SKIP_BLANKS;
1.40 daniel 2303: while (CUR != '>') {
2304: const CHAR *check = CUR_PTR;
1.59 daniel 2305: int type;
2306: int def;
2307: CHAR *defaultValue = NULL;
2308:
2309: attrName = xmlParseName(ctxt);
2310: if (attrName == NULL) {
2311: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2312: ctxt->sax->error(ctxt, "ATTLIST: no name for Attribute\n");
2313: ctxt->wellFormed = 0;
2314: break;
2315: }
2316: if (!IS_BLANK(CUR)) {
2317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2318: ctxt->sax->error(ctxt,
2319: "Space required after the attribute name\n");
2320: ctxt->wellFormed = 0;
2321: break;
2322: }
2323: SKIP_BLANKS;
2324:
2325: type = xmlParseAttributeType(ctxt, attrName);
2326: if (type <= 0) break;
1.22 daniel 2327:
1.59 daniel 2328: if (!IS_BLANK(CUR)) {
2329: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2330: ctxt->sax->error(ctxt,
2331: "Space required after the attribute type\n");
2332: ctxt->wellFormed = 0;
2333: break;
2334: }
1.42 daniel 2335: SKIP_BLANKS;
1.59 daniel 2336:
2337: def = xmlParseDefaultDecl(ctxt, &defaultValue);
2338: if (def <= 0) break;
2339:
2340: if (CUR != '>') {
2341: if (!IS_BLANK(CUR)) {
2342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2343: ctxt->sax->error(ctxt,
2344: "Space required after the attribute default value\n");
2345: ctxt->wellFormed = 0;
2346: break;
2347: }
2348: SKIP_BLANKS;
2349: }
1.40 daniel 2350: if (check == CUR_PTR) {
1.55 daniel 2351: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2352: ctxt->sax->error(ctxt,
1.59 daniel 2353: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 2354: break;
2355: }
1.59 daniel 2356: if (attrName != NULL)
2357: free(attrName);
2358: if (defaultValue != NULL)
2359: free(defaultValue);
1.22 daniel 2360: }
1.40 daniel 2361: if (CUR == '>')
2362: NEXT;
1.22 daniel 2363:
1.59 daniel 2364: free(elemName);
1.22 daniel 2365: }
2366: }
2367:
1.50 daniel 2368: /**
1.61 daniel 2369: * xmlParseElementMixedContentDecl:
2370: * @ctxt: an XML parser context
2371: *
2372: * parse the declaration for a Mixed Element content
2373: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
2374: *
2375: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
2376: * '(' S? '#PCDATA' S? ')'
2377: *
2378: * returns: the list of the xmlElementContentPtr describing the element choices
2379: */
2380: xmlElementContentPtr
1.62 ! daniel 2381: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.61 daniel 2382: xmlElementContentPtr ret = NULL, cur = NULL;
2383: CHAR *elem = NULL;
2384:
2385: if ((CUR == '#') && (NXT(1) == 'P') &&
2386: (NXT(2) == 'C') && (NXT(3) == 'D') &&
2387: (NXT(4) == 'A') && (NXT(5) == 'T') &&
2388: (NXT(6) == 'A')) {
2389: SKIP(7);
2390: SKIP_BLANKS;
2391: if ((CUR == '(') || (CUR == '|')) {
2392: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2393: if (ret == NULL) return(NULL);
2394: } else {
2395: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2396: ctxt->sax->error(ctxt,
2397: "xmlParseElementMixedContentDecl : '|' or ')' expected\n");
2398: ctxt->wellFormed = 0;
2399: return(NULL);
2400: }
2401: while (CUR == '|') {
2402: if (elem == NULL) {
2403: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2404: if (ret == NULL) return(NULL);
2405: ret->c1 = cur;
2406: } else {
2407: cur->c1 = xmlNewElementContent(elem,
2408: XML_ELEMENT_CONTENT_ELEMENT);
2409: cur->c2 = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2410: cur = cur->c2;
2411: if (cur == NULL) return(NULL);
2412: }
2413: SKIP_BLANKS;
2414: elem = xmlParseName(ctxt);
2415: if (elem == NULL) {
2416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2417: ctxt->sax->error(ctxt,
2418: "xmlParseElementMixedContentDecl : Name expected\n");
2419: ctxt->wellFormed = 0;
2420: xmlFreeElementContent(cur);
2421: return(NULL);
2422: }
2423: SKIP_BLANKS;
2424: }
2425: if (CUR == ')') {
2426: if (elem != NULL)
2427: cur->c2 = xmlNewElementContent(elem,
2428: XML_ELEMENT_CONTENT_ELEMENT);
2429: NEXT;
2430: } else {
2431: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2432: ctxt->sax->error(ctxt,
2433: "xmlParseElementMixedContentDecl : '|' or ')' expected\n");
2434: ctxt->wellFormed = 0;
2435: xmlFreeElementContent(ret);
2436: return(NULL);
2437: }
2438:
2439: } else {
2440: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2441: ctxt->sax->error(ctxt,
2442: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
2443: ctxt->wellFormed = 0;
2444: }
2445: return(ret);
2446: }
2447:
2448: /**
2449: * xmlParseElementChildrenContentDecl:
1.50 daniel 2450: * @ctxt: an XML parser context
2451: *
1.61 daniel 2452: * parse the declaration for a Mixed Element content
2453: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 2454: *
1.61 daniel 2455: *
1.22 daniel 2456: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
2457: *
2458: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
2459: *
2460: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
2461: *
2462: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
2463: *
1.62 ! daniel 2464: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 2465: * hierarchy.
2466: */
2467: xmlElementContentPtr
1.62 ! daniel 2468: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
! 2469: xmlElementContentPtr ret = NULL, cur;
! 2470: CHAR *elem;
! 2471: CHAR type = 0;
! 2472:
! 2473: SKIP_BLANKS;
! 2474: if (CUR == '(') {
! 2475: NEXT;
! 2476: SKIP_BLANKS;
! 2477: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
! 2478: SKIP_BLANKS;
! 2479: } else {
! 2480: elem = xmlParseName(ctxt);
! 2481: if (elem == NULL) {
! 2482: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2483: ctxt->sax->error(ctxt,
! 2484: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
! 2485: ctxt->wellFormed = 0;
! 2486: return(NULL);
! 2487: }
! 2488: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
! 2489: if (CUR == '?') {
! 2490: ret->ocur = XML_ELEMENT_CONTENT_OPT;
! 2491: NEXT;
! 2492: } else if (CUR == '*') {
! 2493: ret->ocur = XML_ELEMENT_CONTENT_MULT;
! 2494: NEXT;
! 2495: } else if (CUR == '+') {
! 2496: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
! 2497: NEXT;
! 2498: } else {
! 2499: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
! 2500: }
! 2501: }
! 2502: SKIP_BLANKS;
! 2503: while (CUR != ')') {
! 2504: if (CUR == ',') {
! 2505: if (type == 0) type = CUR;
! 2506:
! 2507: /*
! 2508: * Detect "Name | Name , Name" error
! 2509: */
! 2510: else if (type != CUR) {
! 2511: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2512: ctxt->sax->error(ctxt,
! 2513: "xmlParseElementChildrenContentDecl : '%c' expected\n",
! 2514: type);
! 2515: ctxt->wellFormed = 0;
! 2516: xmlFreeElementContent(ret);
! 2517: return(NULL);
! 2518: }
! 2519:
! 2520: cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
! 2521: cur-> = ret;
! 2522: ret = cur;
! 2523: } else if (CUR == '|') {
! 2524: if (type == 0) type = CUR;
! 2525:
! 2526: /*
! 2527: * Detect "Name | Name , Name" error
! 2528: */
! 2529: else if (type != CUR) {
! 2530: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2531: ctxt->sax->error(ctxt,
! 2532: "xmlParseElementChildrenContentDecl : '%c' expected\n",
! 2533: type);
! 2534: ctxt->wellFormed = 0;
! 2535: xmlFreeElementContent(ret);
! 2536: return(NULL);
! 2537: }
! 2538:
! 2539: /* TODO !!!!!!! */
! 2540: } else {
! 2541: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2542: ctxt->sax->error(ctxt,
! 2543: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
! 2544: ctxt->wellFormed = 0;
! 2545: xmlFreeElementContent(ret);
! 2546: return(NULL);
! 2547: }
! 2548: SKIP_BLANKS;
! 2549: if (CUR == '(') {
! 2550: NEXT;
! 2551: SKIP_BLANKS;
! 2552: cur = xmlParseElementChildrenContentDecl(ctxt);
! 2553: SKIP_BLANKS;
! 2554: } else {
! 2555: elem = xmlParseName(ctxt);
! 2556: if (elem == NULL) {
! 2557: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2558: ctxt->sax->error(ctxt,
! 2559: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
! 2560: ctxt->wellFormed = 0;
! 2561: return(NULL);
! 2562: }
! 2563: cur = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
! 2564: if (CUR == '?') {
! 2565: ret->ocur = XML_ELEMENT_CONTENT_OPT;
! 2566: NEXT;
! 2567: } else if (CUR == '*') {
! 2568: ret->ocur = XML_ELEMENT_CONTENT_MULT;
! 2569: NEXT;
! 2570: } else if (CUR == '+') {
! 2571: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
! 2572: NEXT;
! 2573: } else {
! 2574: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
! 2575: }
! 2576: !!!!!!!!!!
! 2577: }
! 2578: }
! 2579: NEXT;
! 2580: if (CUR == '?') {
! 2581: ret->ocur = XML_ELEMENT_CONTENT_OPT;
! 2582: NEXT;
! 2583: } else if (CUR == '*') {
! 2584: ret->ocur = XML_ELEMENT_CONTENT_MULT;
! 2585: NEXT;
! 2586: } else if (CUR == '+') {
! 2587: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
! 2588: NEXT;
! 2589: } else {
! 2590: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
! 2591: }
! 2592: return(ret);
1.61 daniel 2593: }
2594:
2595: /**
2596: * xmlParseElementContentDecl:
2597: * @ctxt: an XML parser context
2598: * @name: the name of the element being defined.
2599: * @result: the Element Content pointer will be stored here if any
1.22 daniel 2600: *
1.61 daniel 2601: * parse the declaration for an Element content either Mixed or Children,
2602: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
2603: *
2604: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 2605: *
1.61 daniel 2606: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 2607: */
2608:
1.61 daniel 2609: int
2610: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
2611: xmlElementContentPtr *result) {
2612:
2613: xmlElementContentPtr tree = NULL;
2614: int res;
2615:
2616: *result = NULL;
2617:
2618: if (CUR != '(') {
2619: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2620: ctxt->sax->error(ctxt,
2621: "xmlParseElementContentDecl : '(' expected\n");
2622: ctxt->wellFormed = 0;
2623: return(-1);
2624: }
2625: NEXT;
2626: SKIP_BLANKS;
2627: if ((CUR == '#') && (NXT(1) == 'P') &&
2628: (NXT(2) == 'C') && (NXT(3) == 'D') &&
2629: (NXT(4) == 'A') && (NXT(5) == 'T') &&
2630: (NXT(6) == 'A')) {
1.62 ! daniel 2631: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 2632: res = XML_ELEMENT_TYPE_MIXED;
2633: } else {
1.62 ! daniel 2634: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 2635: res = XML_ELEMENT_TYPE_ELEMENT;
2636: }
2637: SKIP_BLANKS;
2638: if (CUR != ')') {
2639: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2640: ctxt->sax->error(ctxt,
2641: "xmlParseElementContentDecl : ')' expected\n");
2642: ctxt->wellFormed = 0;
2643: /* TODO : free tree if != NULL ... */
2644: return(-1);
2645: }
2646: return(res);
1.22 daniel 2647: }
2648:
1.50 daniel 2649: /**
2650: * xmlParseElementDecl:
2651: * @ctxt: an XML parser context
2652: *
2653: * parse an Element declaration.
1.22 daniel 2654: *
2655: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
2656: *
2657: * TODO There is a check [ VC: Unique Element Type Declaration ]
2658: */
1.59 daniel 2659: int
1.55 daniel 2660: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2661: CHAR *name;
1.59 daniel 2662: int ret = -1;
1.61 daniel 2663: xmlElementContentPtr content = NULL;
1.22 daniel 2664:
1.40 daniel 2665: if ((CUR == '<') && (NXT(1) == '!') &&
2666: (NXT(2) == 'E') && (NXT(3) == 'L') &&
2667: (NXT(4) == 'E') && (NXT(5) == 'M') &&
2668: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 2669: (NXT(8) == 'T')) {
1.40 daniel 2670: SKIP(9);
1.59 daniel 2671: if (!IS_BLANK(CUR)) {
2672: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2673: ctxt->sax->error(ctxt,
2674: "Space required after 'ELEMENT'\n");
2675: ctxt->wellFormed = 0;
2676: }
1.42 daniel 2677: SKIP_BLANKS;
1.22 daniel 2678: name = xmlParseName(ctxt);
2679: if (name == NULL) {
1.55 daniel 2680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 2681: ctxt->sax->error(ctxt,
2682: "xmlParseElementDecl: no name for Element\n");
2683: ctxt->wellFormed = 0;
2684: return(-1);
2685: }
2686: if (!IS_BLANK(CUR)) {
2687: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2688: ctxt->sax->error(ctxt,
2689: "Space required after the element name\n");
2690: ctxt->wellFormed = 0;
1.22 daniel 2691: }
1.42 daniel 2692: SKIP_BLANKS;
1.40 daniel 2693: if ((CUR == 'E') && (NXT(1) == 'M') &&
2694: (NXT(2) == 'P') && (NXT(3) == 'T') &&
2695: (NXT(4) == 'Y')) {
2696: SKIP(5);
1.22 daniel 2697: /*
2698: * Element must always be empty.
2699: */
1.59 daniel 2700: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 2701: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
2702: (NXT(2) == 'Y')) {
2703: SKIP(3);
1.22 daniel 2704: /*
2705: * Element is a generic container.
2706: */
1.59 daniel 2707: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 2708: } else if (CUR == '(') {
2709: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 2710: } else {
1.61 daniel 2711: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2712: ctxt->sax->error(ctxt,
2713: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
2714: ctxt->wellFormed = 0;
2715: if (name != NULL) free(name);
2716: return(-1);
1.22 daniel 2717: }
1.42 daniel 2718: SKIP_BLANKS;
1.40 daniel 2719: if (CUR != '>') {
1.55 daniel 2720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2721: ctxt->sax->error(ctxt,
1.31 daniel 2722: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 2723: ctxt->wellFormed = 0;
1.61 daniel 2724: } else {
1.40 daniel 2725: NEXT;
1.61 daniel 2726: xmlAddElementDecl(ctxt->doc->intSubset, name, ret, content);
2727: }
2728: if (name != NULL) {
2729: free(name);
2730: }
1.22 daniel 2731: }
1.59 daniel 2732: return(ret);
1.22 daniel 2733: }
2734:
1.50 daniel 2735: /**
2736: * xmlParseMarkupDecl:
2737: * @ctxt: an XML parser context
2738: *
2739: * parse Markup declarations
1.22 daniel 2740: *
2741: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
2742: * NotationDecl | PI | Comment
2743: *
2744: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
2745: */
1.55 daniel 2746: void
2747: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2748: xmlParseElementDecl(ctxt);
2749: xmlParseAttributeListDecl(ctxt);
2750: xmlParseEntityDecl(ctxt);
2751: xmlParseNotationDecl(ctxt);
2752: xmlParsePI(ctxt);
1.31 daniel 2753: xmlParseComment(ctxt, 0);
1.22 daniel 2754: }
2755:
1.50 daniel 2756: /**
2757: * xmlParseCharRef:
2758: * @ctxt: an XML parser context
2759: *
2760: * parse Reference declarations
1.24 daniel 2761: *
2762: * [66] CharRef ::= '&#' [0-9]+ ';' |
2763: * '&#x' [0-9a-fA-F]+ ';'
1.50 daniel 2764: * return values: the value parsed
1.24 daniel 2765: */
1.55 daniel 2766: CHAR *
2767: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 2768: int val = 0;
1.44 daniel 2769: CHAR buf[2];
1.24 daniel 2770:
1.40 daniel 2771: if ((CUR == '&') && (NXT(1) == '#') &&
2772: (NXT(2) == 'x')) {
2773: SKIP(3);
2774: while (CUR != ';') {
2775: if ((CUR >= '0') && (CUR <= '9'))
2776: val = val * 16 + (CUR - '0');
2777: else if ((CUR >= 'a') && (CUR <= 'f'))
2778: val = val * 16 + (CUR - 'a') + 10;
2779: else if ((CUR >= 'A') && (CUR <= 'F'))
2780: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 2781: else {
1.55 daniel 2782: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 2783: ctxt->sax->error(ctxt,
1.59 daniel 2784: "xmlParseCharRef: invalid hexadecimal value\n");
2785: ctxt->wellFormed = 0;
1.29 daniel 2786: val = 0;
1.24 daniel 2787: break;
2788: }
1.47 daniel 2789: NEXT;
1.24 daniel 2790: }
1.55 daniel 2791: if (CUR == ';')
1.40 daniel 2792: NEXT;
2793: } else if ((CUR == '&') && (NXT(1) == '#')) {
2794: SKIP(2);
2795: while (CUR != ';') {
2796: if ((CUR >= '0') && (CUR <= '9'))
1.55 daniel 2797: val = val * 10 + (CUR - '0');
1.24 daniel 2798: else {
1.55 daniel 2799: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 2800: ctxt->sax->error(ctxt,
2801: "xmlParseCharRef: invalid decimal value\n");
1.59 daniel 2802: ctxt->wellFormed = 0;
1.29 daniel 2803: val = 0;
1.24 daniel 2804: break;
2805: }
1.47 daniel 2806: NEXT;
1.24 daniel 2807: }
1.55 daniel 2808: if (CUR == ';')
1.40 daniel 2809: NEXT;
1.24 daniel 2810: } else {
1.55 daniel 2811: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2812: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
1.59 daniel 2813: ctxt->wellFormed = 0;
1.24 daniel 2814: }
1.29 daniel 2815: /*
2816: * Check the value IS_CHAR ...
2817: */
1.44 daniel 2818: if (IS_CHAR(val)) {
2819: buf[0] = (CHAR) val;
2820: buf[1] = 0;
1.50 daniel 2821: return(xmlStrndup(buf, 1));
1.44 daniel 2822: } else {
1.55 daniel 2823: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 2824: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid CHAR value %d\n",
2825: val);
1.59 daniel 2826: ctxt->wellFormed = 0;
1.29 daniel 2827: }
1.46 daniel 2828: return(NULL);
1.24 daniel 2829: }
2830:
1.50 daniel 2831: /**
2832: * xmlParseEntityRef:
2833: * @ctxt: an XML parser context
2834: *
2835: * parse ENTITY references declarations
1.24 daniel 2836: *
2837: * [68] EntityRef ::= '&' Name ';'
1.52 daniel 2838: * return values: the entity ref string or NULL if directly as input stream.
1.24 daniel 2839: */
1.55 daniel 2840: CHAR *
2841: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.46 daniel 2842: CHAR *ret = NULL;
1.50 daniel 2843: const CHAR *q;
1.24 daniel 2844: CHAR *name;
1.59 daniel 2845: xmlEntityPtr ent;
1.50 daniel 2846: xmlParserInputPtr input = NULL;
1.24 daniel 2847:
1.50 daniel 2848: q = CUR_PTR;
1.40 daniel 2849: if (CUR == '&') {
2850: NEXT;
1.24 daniel 2851: name = xmlParseName(ctxt);
2852: if (name == NULL) {
1.55 daniel 2853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2854: ctxt->sax->error(ctxt, "xmlParseEntityRef: no name\n");
1.59 daniel 2855: ctxt->wellFormed = 0;
1.24 daniel 2856: } else {
1.40 daniel 2857: if (CUR == ';') {
2858: NEXT;
1.24 daniel 2859: /*
1.59 daniel 2860: * Well Formedness Constraint if:
2861: * - standalone
2862: * or
2863: * - no external subset and no external parameter entities
2864: * referenced
2865: * then
2866: * the entity referenced must have been declared
2867: *
2868: * TODO: to be double checked !!!
2869: */
2870: ent = xmlGetDocEntity(ctxt->doc, name);
2871: if ((ctxt->doc->standalone) ||
2872: ((ctxt->doc->intSubset == NULL) &&
2873: (ctxt->doc->extSubset == NULL))) {
2874: if (ent == NULL) {
2875: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2876: ctxt->sax->error(ctxt,
2877: "Entity '%s' not defined\n", name);
2878: ctxt->wellFormed = 0;
2879: }
2880: }
2881:
2882: /*
2883: * Well Formedness Constraint :
2884: * The referenced entity must be a parsed entity.
2885: */
2886: if (ent != NULL) {
2887: switch (ent->type) {
2888: case XML_INTERNAL_PARAMETER_ENTITY:
2889: case XML_EXTERNAL_PARAMETER_ENTITY:
2890: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2891: ctxt->sax->error(ctxt,
2892: "Attempt to reference the parameter entity '%s'\n", name);
2893: ctxt->wellFormed = 0;
2894: break;
2895:
2896: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2897: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2898: ctxt->sax->error(ctxt,
2899: "Attempt to reference unparsed entity '%s'\n", name);
2900: ctxt->wellFormed = 0;
2901: break;
2902: }
2903: }
2904:
2905: /*
2906: * Well Formedness Constraint :
2907: * The referenced entity must not lead to recursion !
2908: */
2909:
2910: /*
1.52 daniel 2911: * We parsed the entity reference correctly, call SAX
2912: * interface for the proper behaviour:
2913: * - get a new input stream
2914: * - or keep the reference inline
1.24 daniel 2915: */
1.52 daniel 2916: if (ctxt->sax)
2917: input = ctxt->sax->resolveEntity(ctxt, NULL, name);
2918: if (input != NULL)
2919: xmlPushInput(ctxt, input);
2920: else {
2921: ret = xmlStrndup(q, CUR_PTR - q);
2922: }
1.24 daniel 2923: } else {
1.46 daniel 2924: char cst[2] = { '&', 0 };
2925:
1.55 daniel 2926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 2927: ctxt->sax->error(ctxt,
2928: "xmlParseEntityRef: expecting ';'\n");
2929: ctxt->wellFormed = 0;
1.46 daniel 2930: ret = xmlStrndup(cst, 1);
2931: ret = xmlStrcat(ret, name);
1.24 daniel 2932: }
1.45 daniel 2933: free(name);
1.24 daniel 2934: }
2935: }
1.46 daniel 2936: return(ret);
1.24 daniel 2937: }
2938:
1.50 daniel 2939: /**
2940: * xmlParseReference:
2941: * @ctxt: an XML parser context
2942: *
2943: * parse Reference declarations
1.24 daniel 2944: *
2945: * [67] Reference ::= EntityRef | CharRef
1.52 daniel 2946: * return values: the entity string or NULL if handled directly by pushing
2947: * the entity value as the input.
1.24 daniel 2948: */
1.55 daniel 2949: CHAR *
2950: xmlParseReference(xmlParserCtxtPtr ctxt) {
1.44 daniel 2951: if ((CUR == '&') && (NXT(1) == '#')) {
1.59 daniel 2952: CHAR *val = xmlParseCharRef(ctxt);
2953: xmlParserInputPtr in;
2954:
2955: if (val != NULL) {
2956: in = xmlNewStringInputStream(ctxt, val);
2957: xmlPushInput(ctxt, in);
2958: }
2959: return(NULL);
1.44 daniel 2960: } else if (CUR == '&') {
1.50 daniel 2961: return(xmlParseEntityRef(ctxt));
1.24 daniel 2962: }
1.46 daniel 2963: return(NULL);
1.24 daniel 2964: }
2965:
1.50 daniel 2966: /**
2967: * xmlParsePEReference:
2968: * @ctxt: an XML parser context
2969: *
2970: * parse PEReference declarations
1.22 daniel 2971: *
2972: * [69] PEReference ::= '%' Name ';'
1.50 daniel 2973: * return values: the entity content or NULL if handled directly.
1.22 daniel 2974: */
1.55 daniel 2975: CHAR *
2976: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.46 daniel 2977: CHAR *ret = NULL;
1.22 daniel 2978: CHAR *name;
1.45 daniel 2979: xmlEntityPtr entity;
1.50 daniel 2980: xmlParserInputPtr input;
1.22 daniel 2981:
1.40 daniel 2982: if (CUR == '%') {
2983: NEXT;
1.22 daniel 2984: name = xmlParseName(ctxt);
2985: if (name == NULL) {
1.55 daniel 2986: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2987: ctxt->sax->error(ctxt, "xmlParsePEReference: no name\n");
1.59 daniel 2988: ctxt->wellFormed = 0;
1.22 daniel 2989: } else {
1.40 daniel 2990: if (CUR == ';') {
2991: NEXT;
1.45 daniel 2992: entity = xmlGetDtdEntity(ctxt->doc, name);
2993: if (entity == NULL) {
1.55 daniel 2994: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
2995: ctxt->sax->warning(ctxt,
1.59 daniel 2996: "xmlParsePEReference: %%%s; not found\n", name);
1.50 daniel 2997: } else {
2998: input = xmlNewEntityInputStream(ctxt, entity);
2999: xmlPushInput(ctxt, input);
1.45 daniel 3000: }
1.22 daniel 3001: } else {
1.50 daniel 3002: char cst[2] = { '%', 0 };
1.46 daniel 3003:
1.55 daniel 3004: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3005: ctxt->sax->error(ctxt,
3006: "xmlParsePEReference: expecting ';'\n");
3007: ctxt->wellFormed = 0;
1.46 daniel 3008: ret = xmlStrndup(cst, 1);
3009: ret = xmlStrcat(ret, name);
1.22 daniel 3010: }
1.45 daniel 3011: free(name);
1.3 veillard 3012: }
3013: }
1.46 daniel 3014: return(ret);
1.3 veillard 3015: }
3016:
1.50 daniel 3017: /**
3018: * xmlParseDocTypeDecl :
3019: * @ctxt: an XML parser context
3020: *
3021: * parse a DOCTYPE declaration
1.21 daniel 3022: *
1.22 daniel 3023: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
3024: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 3025: */
3026:
1.55 daniel 3027: void
3028: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.36 daniel 3029: xmlDtdPtr dtd;
1.21 daniel 3030: CHAR *name;
3031: CHAR *ExternalID = NULL;
1.39 daniel 3032: CHAR *URI = NULL;
1.21 daniel 3033:
3034: /*
3035: * We know that '<!DOCTYPE' has been detected.
3036: */
1.40 daniel 3037: SKIP(9);
1.21 daniel 3038:
1.42 daniel 3039: SKIP_BLANKS;
1.21 daniel 3040:
3041: /*
3042: * Parse the DOCTYPE name.
3043: */
3044: name = xmlParseName(ctxt);
3045: if (name == NULL) {
1.55 daniel 3046: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3047: ctxt->sax->error(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 3048: ctxt->wellFormed = 0;
1.21 daniel 3049: }
3050:
1.42 daniel 3051: SKIP_BLANKS;
1.21 daniel 3052:
3053: /*
1.22 daniel 3054: * Check for SystemID and ExternalID
3055: */
1.39 daniel 3056: URI = xmlParseExternalID(ctxt, &ExternalID);
1.42 daniel 3057: SKIP_BLANKS;
1.36 daniel 3058:
1.59 daniel 3059: dtd = xmlCreateIntSubset(ctxt->doc, name, ExternalID, URI);
1.22 daniel 3060:
3061: /*
3062: * Is there any DTD definition ?
3063: */
1.40 daniel 3064: if (CUR == '[') {
3065: NEXT;
1.22 daniel 3066: /*
3067: * Parse the succession of Markup declarations and
3068: * PEReferences.
3069: * Subsequence (markupdecl | PEReference | S)*
3070: */
1.40 daniel 3071: while (CUR != ']') {
3072: const CHAR *check = CUR_PTR;
1.22 daniel 3073:
1.42 daniel 3074: SKIP_BLANKS;
1.22 daniel 3075: xmlParseMarkupDecl(ctxt);
1.50 daniel 3076: xmlParsePEReference(ctxt);
1.22 daniel 3077:
1.40 daniel 3078: if (CUR_PTR == check) {
1.55 daniel 3079: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3080: ctxt->sax->error(ctxt,
1.31 daniel 3081: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 3082: ctxt->wellFormed = 0;
1.22 daniel 3083: break;
3084: }
3085: }
1.40 daniel 3086: if (CUR == ']') NEXT;
1.22 daniel 3087: }
3088:
3089: /*
3090: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 3091: */
1.40 daniel 3092: if (CUR != '>') {
1.55 daniel 3093: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3094: ctxt->sax->error(ctxt, "DOCTYPE unproperly terminated\n");
1.59 daniel 3095: ctxt->wellFormed = 0;
1.22 daniel 3096: /* We shouldn't try to resynchronize ... */
1.21 daniel 3097: }
1.40 daniel 3098: NEXT;
1.22 daniel 3099:
3100: /*
3101: * Cleanup, since we don't use all those identifiers
3102: * TODO : the DOCTYPE if available should be stored !
3103: */
1.39 daniel 3104: if (URI != NULL) free(URI);
1.22 daniel 3105: if (ExternalID != NULL) free(ExternalID);
3106: if (name != NULL) free(name);
1.21 daniel 3107: }
3108:
1.50 daniel 3109: /**
3110: * xmlParseAttribute:
3111: * @ctxt: an XML parser context
3112: * @node: the node carrying the attribute
3113: *
3114: * parse an attribute
1.3 veillard 3115: *
1.22 daniel 3116: * [41] Attribute ::= Name Eq AttValue
3117: *
3118: * [25] Eq ::= S? '=' S?
3119: *
1.29 daniel 3120: * With namespace:
3121: *
3122: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 3123: *
3124: * Also the case QName == xmlns:??? is handled independently as a namespace
3125: * definition.
1.3 veillard 3126: */
3127:
1.52 daniel 3128: xmlAttrPtr xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.59 daniel 3129: CHAR *name, *val;
1.29 daniel 3130: CHAR *ns;
1.52 daniel 3131: CHAR *value = NULL;
3132: xmlAttrPtr ret;
1.3 veillard 3133:
1.29 daniel 3134: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 3135: if (name == NULL) {
1.55 daniel 3136: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3137: ctxt->sax->error(ctxt, "error parsing attribute name\n");
1.59 daniel 3138: ctxt->wellFormed = 0;
1.52 daniel 3139: return(NULL);
1.3 veillard 3140: }
3141:
3142: /*
1.29 daniel 3143: * read the value
1.3 veillard 3144: */
1.42 daniel 3145: SKIP_BLANKS;
1.40 daniel 3146: if (CUR == '=') {
3147: NEXT;
1.42 daniel 3148: SKIP_BLANKS;
1.29 daniel 3149: value = xmlParseAttValue(ctxt);
3150: } else {
1.55 daniel 3151: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3152: ctxt->sax->error(ctxt,
3153: "Specification mandate value for attribute %s\n", name);
3154: ctxt->wellFormed = 0;
1.3 veillard 3155: }
3156:
3157: /*
1.43 daniel 3158: * Check whether it's a namespace definition
3159: */
3160: if ((ns == NULL) &&
3161: (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
3162: (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
3163: /* a default namespace definition */
3164: xmlNewNs(node, value, NULL);
3165: if (name != NULL)
3166: free(name);
3167: if (value != NULL)
3168: free(value);
1.52 daniel 3169: return(NULL);
1.43 daniel 3170: }
3171: if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
3172: (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
3173: /* a standard namespace definition */
3174: xmlNewNs(node, value, name);
1.50 daniel 3175: free(ns);
1.43 daniel 3176: if (name != NULL)
3177: free(name);
3178: if (value != NULL)
3179: free(value);
1.52 daniel 3180: return(NULL);
1.43 daniel 3181: }
3182:
1.59 daniel 3183: /*
3184: * Well formedness requires at most one declaration of an attribute
3185: */
3186: if ((val = xmlGetProp(ctxt->node, name)) != NULL) {
3187: free(val);
3188: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3189: ctxt->sax->error(ctxt, "Attribute %s redefined\n", name);
3190: ctxt->wellFormed = 0;
3191: ret = NULL;
3192: } else {
3193: ret = xmlNewProp(ctxt->node, name, NULL);
3194: if (ret != NULL)
3195: ret->val = xmlStringGetNodeList(ctxt->doc, value);
3196: }
1.53 daniel 3197:
3198: if (ns != NULL)
3199: free(ns);
3200: if (value != NULL)
3201: free(value);
3202: free(name);
1.52 daniel 3203: return(ret);
1.3 veillard 3204: }
3205:
1.50 daniel 3206: /**
3207: * xmlParseStartTag:
3208: * @ctxt: an XML parser context
3209: *
3210: * parse a start of tag either for rule element or
3211: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 3212: *
3213: * [40] STag ::= '<' Name (S Attribute)* S? '>'
3214: *
1.29 daniel 3215: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3216: *
3217: * With namespace:
3218: *
3219: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3220: *
3221: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.50 daniel 3222: *
3223: * return values: the XML new node or NULL.
1.2 veillard 3224: */
3225:
1.16 daniel 3226: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.34 daniel 3227: CHAR *namespace, *name;
3228: xmlNsPtr ns = NULL;
1.2 veillard 3229: xmlNodePtr ret = NULL;
1.50 daniel 3230: xmlNodePtr parent = ctxt->node;
1.2 veillard 3231:
1.40 daniel 3232: if (CUR != '<') return(NULL);
3233: NEXT;
1.3 veillard 3234:
1.34 daniel 3235: name = xmlNamespaceParseQName(ctxt, &namespace);
1.59 daniel 3236: if (name == NULL) {
3237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3238: ctxt->sax->error(ctxt,
3239: "xmlParseStartTag: invalid element name\n");
3240: ctxt->wellFormed = 0;
3241: return(NULL);
3242: }
1.3 veillard 3243:
1.43 daniel 3244: /*
3245: * Note : the namespace resolution is deferred until the end of the
3246: * attributes parsing, since local namespace can be defined as
3247: * an attribute at this level.
3248: */
1.50 daniel 3249: ret = xmlNewDocNode(ctxt->doc, ns, name, NULL);
3250: if (ret == NULL) {
3251: if (namespace != NULL)
3252: free(namespace);
3253: free(name);
3254: return(NULL);
3255: }
3256:
3257: /*
3258: * We are parsing a new node.
3259: */
3260: nodePush(ctxt, ret);
1.2 veillard 3261:
1.3 veillard 3262: /*
3263: * Now parse the attributes, it ends up with the ending
3264: *
3265: * (S Attribute)* S?
3266: */
1.42 daniel 3267: SKIP_BLANKS;
1.40 daniel 3268: while ((IS_CHAR(CUR)) &&
3269: (CUR != '>') &&
3270: ((CUR != '/') || (NXT(1) != '>'))) {
3271: const CHAR *q = CUR_PTR;
1.29 daniel 3272:
3273: xmlParseAttribute(ctxt, ret);
1.42 daniel 3274: SKIP_BLANKS;
1.29 daniel 3275:
1.40 daniel 3276: if (q == CUR_PTR) {
1.55 daniel 3277: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3278: ctxt->sax->error(ctxt,
1.31 daniel 3279: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 3280: ctxt->wellFormed = 0;
1.29 daniel 3281: break;
1.3 veillard 3282: }
3283: }
3284:
1.43 daniel 3285: /*
3286: * Search the namespace
3287: */
3288: ns = xmlSearchNs(ctxt->doc, ret, namespace);
3289: if (ns == NULL) /* ret still doesn't have a parent yet ! */
1.50 daniel 3290: ns = xmlSearchNs(ctxt->doc, parent, namespace);
1.43 daniel 3291: xmlSetNs(ret, ns);
3292: if (namespace != NULL)
3293: free(namespace);
3294:
1.44 daniel 3295: /*
3296: * SAX: Start of Element !
3297: */
3298: if (ctxt->sax != NULL)
3299: ctxt->sax->startElement(ctxt, name);
1.52 daniel 3300: free(name);
3301:
3302: /*
3303: * Link the child element
3304: */
3305: if (ctxt->nodeNr < 2) return(ret);
3306: parent = ctxt->nodeTab[ctxt->nodeNr - 2];
3307: if (parent != NULL)
3308: xmlAddChild(parent, ctxt->node);
1.44 daniel 3309:
1.3 veillard 3310: return(ret);
3311: }
3312:
1.50 daniel 3313: /**
3314: * xmlParseEndTag:
3315: * @ctxt: an XML parser context
3316: * @nsPtr: the current node namespace definition
3317: * @tagPtr: CHAR** receive the tag value
3318: *
3319: * parse an end of tag
1.27 daniel 3320: *
3321: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 3322: *
3323: * With namespace
3324: *
3325: * [9] ETag ::= '</' QName S? '>'
1.50 daniel 3326: *
3327: * return values: tagPtr receive the tag name just read
1.7 veillard 3328: */
3329:
1.55 daniel 3330: void
3331: xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
1.34 daniel 3332: CHAR *namespace, *name;
3333: xmlNsPtr ns = NULL;
1.7 veillard 3334:
1.34 daniel 3335: *nsPtr = NULL;
1.7 veillard 3336: *tagPtr = NULL;
3337:
1.40 daniel 3338: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 3339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3340: ctxt->sax->error(ctxt, "xmlParseEndTag: '</' not found\n");
1.59 daniel 3341: ctxt->wellFormed = 0;
1.27 daniel 3342: return;
3343: }
1.40 daniel 3344: SKIP(2);
1.7 veillard 3345:
1.34 daniel 3346: name = xmlNamespaceParseQName(ctxt, &namespace);
1.43 daniel 3347:
3348: /*
3349: * Search the namespace
3350: */
3351: ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
3352: if (namespace != NULL)
1.34 daniel 3353: free(namespace);
1.7 veillard 3354:
1.34 daniel 3355: *nsPtr = ns;
1.7 veillard 3356: *tagPtr = name;
3357:
3358: /*
3359: * We should definitely be at the ending "S? '>'" part
3360: */
1.42 daniel 3361: SKIP_BLANKS;
1.40 daniel 3362: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 3363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3364: ctxt->sax->error(ctxt, "End tag : expected '>'\n");
1.59 daniel 3365: ctxt->wellFormed = 0;
1.7 veillard 3366: } else
1.40 daniel 3367: NEXT;
1.7 veillard 3368:
3369: return;
3370: }
3371:
1.50 daniel 3372: /**
3373: * xmlParseCDSect:
3374: * @ctxt: an XML parser context
3375: *
3376: * Parse escaped pure raw content.
1.29 daniel 3377: *
3378: * [18] CDSect ::= CDStart CData CDEnd
3379: *
3380: * [19] CDStart ::= '<![CDATA['
3381: *
3382: * [20] Data ::= (Char* - (Char* ']]>' Char*))
3383: *
3384: * [21] CDEnd ::= ']]>'
1.3 veillard 3385: */
1.55 daniel 3386: void
3387: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 3388: const CHAR *r, *s, *base;
1.3 veillard 3389:
1.40 daniel 3390: if ((CUR == '<') && (NXT(1) == '!') &&
3391: (NXT(2) == '[') && (NXT(3) == 'C') &&
3392: (NXT(4) == 'D') && (NXT(5) == 'A') &&
3393: (NXT(6) == 'T') && (NXT(7) == 'A') &&
3394: (NXT(8) == '[')) {
3395: SKIP(9);
1.29 daniel 3396: } else
1.45 daniel 3397: return;
1.40 daniel 3398: base = CUR_PTR;
3399: if (!IS_CHAR(CUR)) {
1.55 daniel 3400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3401: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 daniel 3402: ctxt->wellFormed = 0;
1.45 daniel 3403: return;
1.3 veillard 3404: }
1.40 daniel 3405: r = NEXT;
3406: if (!IS_CHAR(CUR)) {
1.55 daniel 3407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 daniel 3409: ctxt->wellFormed = 0;
1.45 daniel 3410: return;
1.3 veillard 3411: }
1.40 daniel 3412: s = NEXT;
3413: while (IS_CHAR(CUR) &&
3414: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
3415: r++;s++;NEXT;
1.3 veillard 3416: }
1.40 daniel 3417: if (!IS_CHAR(CUR)) {
1.55 daniel 3418: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3419: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 daniel 3420: ctxt->wellFormed = 0;
1.45 daniel 3421: return;
1.3 veillard 3422: }
1.16 daniel 3423:
1.45 daniel 3424: /*
3425: * Ok the segment [base CUR_PTR] is to be consumed as chars.
3426: */
3427: if (ctxt->sax != NULL) {
3428: if (areBlanks(ctxt, base, CUR_PTR - base))
1.59 daniel 3429: ctxt->sax->ignorableWhitespace(ctxt, base, 0, (CUR_PTR - base) - 2);
1.45 daniel 3430: else
1.59 daniel 3431: ctxt->sax->characters(ctxt, base, 0, (CUR_PTR - base) - 2);
1.45 daniel 3432: }
1.2 veillard 3433: }
3434:
1.50 daniel 3435: /**
3436: * xmlParseContent:
3437: * @ctxt: an XML parser context
3438: *
3439: * Parse a content:
1.2 veillard 3440: *
1.27 daniel 3441: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 3442: */
3443:
1.55 daniel 3444: void
3445: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.2 veillard 3446: xmlNodePtr ret = NULL;
3447:
1.40 daniel 3448: while ((CUR != '<') || (NXT(1) != '/')) {
3449: const CHAR *test = CUR_PTR;
1.27 daniel 3450: ret = NULL;
3451:
3452: /*
3453: * First case : a Processing Instruction.
3454: */
1.40 daniel 3455: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 3456: xmlParsePI(ctxt);
3457: }
3458: /*
3459: * Second case : a CDSection
3460: */
1.40 daniel 3461: else if ((CUR == '<') && (NXT(1) == '!') &&
3462: (NXT(2) == '[') && (NXT(3) == 'C') &&
3463: (NXT(4) == 'D') && (NXT(5) == 'A') &&
3464: (NXT(6) == 'T') && (NXT(7) == 'A') &&
3465: (NXT(8) == '[')) {
1.45 daniel 3466: xmlParseCDSect(ctxt);
1.27 daniel 3467: }
3468: /*
3469: * Third case : a comment
3470: */
1.40 daniel 3471: else if ((CUR == '<') && (NXT(1) == '!') &&
3472: (NXT(2) == '-') && (NXT(3) == '-')) {
1.31 daniel 3473: ret = xmlParseComment(ctxt, 1);
1.27 daniel 3474: }
3475: /*
3476: * Fourth case : a sub-element.
3477: */
1.40 daniel 3478: else if (CUR == '<') {
1.45 daniel 3479: ret = xmlParseElement(ctxt);
3480: }
3481: /*
1.50 daniel 3482: * Fifth case : a reference. If if has not been resolved,
3483: * parsing returns it's Name, create the node
1.45 daniel 3484: */
3485: else if (CUR == '&') {
1.50 daniel 3486: CHAR *val = xmlParseReference(ctxt);
3487: if (val != NULL) {
3488: if (val[0] != '&') {
3489: /*
3490: * inline predefined entity.
3491: */
3492: if (ctxt->sax != NULL)
3493: ctxt->sax->characters(ctxt, val, 0, xmlStrlen(val));
3494: } else {
3495: /*
3496: * user defined entity, create a node.
3497: */
3498: ret = xmlNewReference(ctxt->doc, val);
3499: xmlAddChild(ctxt->node, ret);
3500: }
3501: free(val);
3502: }
1.27 daniel 3503: }
3504: /*
3505: * Last case, text. Note that References are handled directly.
3506: */
3507: else {
1.45 daniel 3508: xmlParseCharData(ctxt, 0);
1.3 veillard 3509: }
1.14 veillard 3510:
3511: /*
1.45 daniel 3512: * Pop-up of finished entities.
1.14 veillard 3513: */
1.45 daniel 3514: while ((CUR == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt);
3515:
1.40 daniel 3516: if (test == CUR_PTR) {
1.55 daniel 3517: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3518: ctxt->sax->error(ctxt,
3519: "detected an error in element content\n");
3520: ctxt->wellFormed = 0;
1.29 daniel 3521: break;
3522: }
1.3 veillard 3523: }
1.2 veillard 3524: }
3525:
1.50 daniel 3526: /**
3527: * xmlParseElement:
3528: * @ctxt: an XML parser context
3529: *
3530: * parse an XML element, this is highly recursive
1.26 daniel 3531: *
3532: * [39] element ::= EmptyElemTag | STag content ETag
3533: *
3534: * [41] Attribute ::= Name Eq AttValue
1.50 daniel 3535: * return values: the XML new node or NULL
1.2 veillard 3536: */
1.26 daniel 3537:
1.2 veillard 3538:
1.45 daniel 3539: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 3540: xmlNodePtr ret;
1.40 daniel 3541: const CHAR *openTag = CUR_PTR;
1.32 daniel 3542: xmlParserNodeInfo node_info;
1.27 daniel 3543: CHAR *endTag;
1.34 daniel 3544: xmlNsPtr endNs;
1.2 veillard 3545:
1.32 daniel 3546: /* Capture start position */
1.40 daniel 3547: node_info.begin_pos = CUR_PTR - ctxt->input->base;
3548: node_info.begin_line = ctxt->input->line;
1.32 daniel 3549:
1.16 daniel 3550: ret = xmlParseStartTag(ctxt);
1.3 veillard 3551: if (ret == NULL) {
3552: return(NULL);
3553: }
1.2 veillard 3554:
3555: /*
3556: * Check for an Empty Element.
3557: */
1.40 daniel 3558: if ((CUR == '/') && (NXT(1) == '>')) {
3559: SKIP(2);
1.45 daniel 3560: if (ctxt->sax != NULL)
3561: ctxt->sax->endElement(ctxt, ret->name);
3562:
3563: /*
3564: * end of parsing of this node.
3565: */
3566: nodePop(ctxt);
3567:
1.2 veillard 3568: return(ret);
3569: }
1.40 daniel 3570: if (CUR == '>') NEXT;
1.2 veillard 3571: else {
1.55 daniel 3572: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.57 daniel 3573: ctxt->sax->error(ctxt, "Couldn't find end of Start Tag\n%.30s\n",
3574: openTag);
1.59 daniel 3575: ctxt->wellFormed = 0;
1.45 daniel 3576:
3577: /*
3578: * end of parsing of this node.
3579: */
3580: nodePop(ctxt);
3581:
1.16 daniel 3582: return(NULL);
1.2 veillard 3583: }
3584:
3585: /*
3586: * Parse the content of the element:
3587: */
1.45 daniel 3588: xmlParseContent(ctxt);
1.40 daniel 3589: if (!IS_CHAR(CUR)) {
1.55 daniel 3590: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.57 daniel 3591: ctxt->sax->error(ctxt,
3592: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 3593: ctxt->wellFormed = 0;
1.45 daniel 3594:
3595: /*
3596: * end of parsing of this node.
3597: */
3598: nodePop(ctxt);
3599:
1.16 daniel 3600: return(NULL);
1.2 veillard 3601: }
3602:
3603: /*
1.27 daniel 3604: * parse the end of tag: '</' should be here.
1.2 veillard 3605: */
1.34 daniel 3606: xmlParseEndTag(ctxt, &endNs, &endTag);
1.7 veillard 3607:
1.27 daniel 3608: /*
3609: * Check that the Name in the ETag is the same as in the STag.
3610: */
1.34 daniel 3611: if (endNs != ret->ns) {
1.55 daniel 3612: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3613: ctxt->sax->error(ctxt,
1.43 daniel 3614: "Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
1.31 daniel 3615: openTag, endTag);
1.59 daniel 3616: ctxt->wellFormed = 0;
1.27 daniel 3617: }
1.32 daniel 3618: if (endTag == NULL ) {
1.55 daniel 3619: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3620: ctxt->sax->error(ctxt, "The End tag has no name\n%.30s\n", openTag);
1.59 daniel 3621: ctxt->wellFormed = 0;
1.45 daniel 3622: } else if (xmlStrcmp(ret->name, endTag)) {
1.55 daniel 3623: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3624: ctxt->sax->error(ctxt,
1.31 daniel 3625: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
3626: openTag, endTag);
1.59 daniel 3627: ctxt->wellFormed = 0;
1.27 daniel 3628: }
1.44 daniel 3629: /*
3630: * SAX: End of Tag
3631: */
3632: else if (ctxt->sax != NULL)
3633: ctxt->sax->endElement(ctxt, endTag);
1.7 veillard 3634:
1.44 daniel 3635: if (endTag != NULL)
3636: free(endTag);
1.2 veillard 3637:
1.32 daniel 3638: /* Capture end position and add node */
3639: if ( ret != NULL && ctxt->record_info ) {
1.40 daniel 3640: node_info.end_pos = CUR_PTR - ctxt->input->base;
3641: node_info.end_line = ctxt->input->line;
1.32 daniel 3642: node_info.node = ret;
3643: xmlParserAddNodeInfo(ctxt, &node_info);
3644: }
1.43 daniel 3645:
3646: /*
3647: * end of parsing of this node.
3648: */
3649: nodePop(ctxt);
3650:
1.2 veillard 3651: return(ret);
3652: }
3653:
1.50 daniel 3654: /**
3655: * xmlParseVersionNum:
3656: * @ctxt: an XML parser context
3657: *
3658: * parse the XML version value.
1.29 daniel 3659: *
3660: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.50 daniel 3661: * return values: the string giving the XML version number, or NULL
1.29 daniel 3662: */
1.55 daniel 3663: CHAR *
3664: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 3665: const CHAR *q = CUR_PTR;
1.29 daniel 3666: CHAR *ret;
3667:
1.40 daniel 3668: while (IS_CHAR(CUR) &&
3669: (((CUR >= 'a') && (CUR <= 'z')) ||
3670: ((CUR >= 'A') && (CUR <= 'Z')) ||
3671: ((CUR >= '0') && (CUR <= '9')) ||
3672: (CUR == '_') || (CUR == '.') ||
3673: (CUR == ':') || (CUR == '-'))) NEXT;
3674: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 3675: return(ret);
3676: }
3677:
1.50 daniel 3678: /**
3679: * xmlParseVersionInfo:
3680: * @ctxt: an XML parser context
3681: *
3682: * parse the XML version.
1.29 daniel 3683: *
3684: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
3685: *
3686: * [25] Eq ::= S? '=' S?
1.50 daniel 3687: *
3688: * return values: the version string, e.g. "1.0"
1.29 daniel 3689: */
3690:
1.55 daniel 3691: CHAR *
3692: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 3693: CHAR *version = NULL;
3694: const CHAR *q;
3695:
1.40 daniel 3696: if ((CUR == 'v') && (NXT(1) == 'e') &&
3697: (NXT(2) == 'r') && (NXT(3) == 's') &&
3698: (NXT(4) == 'i') && (NXT(5) == 'o') &&
3699: (NXT(6) == 'n')) {
3700: SKIP(7);
1.42 daniel 3701: SKIP_BLANKS;
1.40 daniel 3702: if (CUR != '=') {
1.55 daniel 3703: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3704: ctxt->sax->error(ctxt, "xmlParseVersionInfo : expected '='\n");
1.59 daniel 3705: ctxt->wellFormed = 0;
1.31 daniel 3706: return(NULL);
3707: }
1.40 daniel 3708: NEXT;
1.42 daniel 3709: SKIP_BLANKS;
1.40 daniel 3710: if (CUR == '"') {
3711: NEXT;
3712: q = CUR_PTR;
1.29 daniel 3713: version = xmlParseVersionNum(ctxt);
1.55 daniel 3714: if (CUR != '"') {
3715: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3716: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 3717: ctxt->wellFormed = 0;
1.55 daniel 3718: } else
1.40 daniel 3719: NEXT;
3720: } else if (CUR == '\''){
3721: NEXT;
3722: q = CUR_PTR;
1.29 daniel 3723: version = xmlParseVersionNum(ctxt);
1.55 daniel 3724: if (CUR != '\'') {
3725: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3726: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 3727: ctxt->wellFormed = 0;
1.55 daniel 3728: } else
1.40 daniel 3729: NEXT;
1.31 daniel 3730: } else {
1.55 daniel 3731: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3732: ctxt->sax->error(ctxt,
3733: "xmlParseVersionInfo : expected ' or \"\n");
3734: ctxt->wellFormed = 0;
1.29 daniel 3735: }
3736: }
3737: return(version);
3738: }
3739:
1.50 daniel 3740: /**
3741: * xmlParseEncName:
3742: * @ctxt: an XML parser context
3743: *
3744: * parse the XML encoding name
1.29 daniel 3745: *
3746: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 3747: *
3748: * return values: the encoding name value or NULL
1.29 daniel 3749: */
1.55 daniel 3750: CHAR *
3751: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 3752: const CHAR *q = CUR_PTR;
1.29 daniel 3753: CHAR *ret = NULL;
3754:
1.40 daniel 3755: if (((CUR >= 'a') && (CUR <= 'z')) ||
3756: ((CUR >= 'A') && (CUR <= 'Z'))) {
3757: NEXT;
3758: while (IS_CHAR(CUR) &&
3759: (((CUR >= 'a') && (CUR <= 'z')) ||
3760: ((CUR >= 'A') && (CUR <= 'Z')) ||
3761: ((CUR >= '0') && (CUR <= '9')) ||
3762: (CUR == '-'))) NEXT;
3763: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 3764: } else {
1.55 daniel 3765: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3766: ctxt->sax->error(ctxt, "Invalid XML encoding name\n");
1.59 daniel 3767: ctxt->wellFormed = 0;
1.29 daniel 3768: }
3769: return(ret);
3770: }
3771:
1.50 daniel 3772: /**
3773: * xmlParseEncodingDecl:
3774: * @ctxt: an XML parser context
3775: *
3776: * parse the XML encoding declaration
1.29 daniel 3777: *
3778: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 3779: *
3780: * TODO: this should setup the conversion filters.
3781: *
3782: * return values: the encoding value or NULL
1.29 daniel 3783: */
3784:
1.55 daniel 3785: CHAR *
3786: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 3787: CHAR *encoding = NULL;
3788: const CHAR *q;
3789:
1.42 daniel 3790: SKIP_BLANKS;
1.40 daniel 3791: if ((CUR == 'e') && (NXT(1) == 'n') &&
3792: (NXT(2) == 'c') && (NXT(3) == 'o') &&
3793: (NXT(4) == 'd') && (NXT(5) == 'i') &&
3794: (NXT(6) == 'n') && (NXT(7) == 'g')) {
3795: SKIP(8);
1.42 daniel 3796: SKIP_BLANKS;
1.40 daniel 3797: if (CUR != '=') {
1.55 daniel 3798: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3799: ctxt->sax->error(ctxt, "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 3800: ctxt->wellFormed = 0;
1.31 daniel 3801: return(NULL);
3802: }
1.40 daniel 3803: NEXT;
1.42 daniel 3804: SKIP_BLANKS;
1.40 daniel 3805: if (CUR == '"') {
3806: NEXT;
3807: q = CUR_PTR;
1.29 daniel 3808: encoding = xmlParseEncName(ctxt);
1.55 daniel 3809: if (CUR != '"') {
3810: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3811: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 3812: ctxt->wellFormed = 0;
1.55 daniel 3813: } else
1.40 daniel 3814: NEXT;
3815: } else if (CUR == '\''){
3816: NEXT;
3817: q = CUR_PTR;
1.29 daniel 3818: encoding = xmlParseEncName(ctxt);
1.55 daniel 3819: if (CUR != '\'') {
3820: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3821: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 3822: ctxt->wellFormed = 0;
1.55 daniel 3823: } else
1.40 daniel 3824: NEXT;
3825: } else if (CUR == '"'){
1.55 daniel 3826: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3827: ctxt->sax->error(ctxt,
3828: "xmlParseEncodingDecl : expected ' or \"\n");
3829: ctxt->wellFormed = 0;
1.29 daniel 3830: }
3831: }
3832: return(encoding);
3833: }
3834:
1.50 daniel 3835: /**
3836: * xmlParseSDDecl:
3837: * @ctxt: an XML parser context
3838: *
3839: * parse the XML standalone declaration
1.29 daniel 3840: *
3841: * [32] SDDecl ::= S 'standalone' Eq
3842: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.50 daniel 3843: * return values: 1 if standalone, 0 otherwise
1.29 daniel 3844: */
3845:
1.55 daniel 3846: int
3847: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 3848: int standalone = -1;
3849:
1.42 daniel 3850: SKIP_BLANKS;
1.40 daniel 3851: if ((CUR == 's') && (NXT(1) == 't') &&
3852: (NXT(2) == 'a') && (NXT(3) == 'n') &&
3853: (NXT(4) == 'd') && (NXT(5) == 'a') &&
3854: (NXT(6) == 'l') && (NXT(7) == 'o') &&
3855: (NXT(8) == 'n') && (NXT(9) == 'e')) {
3856: SKIP(10);
3857: if (CUR != '=') {
1.55 daniel 3858: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3859: ctxt->sax->error(ctxt,
3860: "XML standalone declaration : expected '='\n");
3861: ctxt->wellFormed = 0;
1.32 daniel 3862: return(standalone);
3863: }
1.40 daniel 3864: NEXT;
1.42 daniel 3865: SKIP_BLANKS;
1.40 daniel 3866: if (CUR == '\''){
3867: NEXT;
3868: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 3869: standalone = 0;
1.40 daniel 3870: SKIP(2);
3871: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3872: (NXT(2) == 's')) {
1.29 daniel 3873: standalone = 1;
1.40 daniel 3874: SKIP(3);
1.29 daniel 3875: } else {
1.55 daniel 3876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3877: ctxt->sax->error(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 3878: ctxt->wellFormed = 0;
1.29 daniel 3879: }
1.55 daniel 3880: if (CUR != '\'') {
3881: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3882: ctxt->sax->error(ctxt, "String not closed\n");
1.59 daniel 3883: ctxt->wellFormed = 0;
1.55 daniel 3884: } else
1.40 daniel 3885: NEXT;
3886: } else if (CUR == '"'){
3887: NEXT;
3888: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 3889: standalone = 0;
1.40 daniel 3890: SKIP(2);
3891: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3892: (NXT(2) == 's')) {
1.29 daniel 3893: standalone = 1;
1.40 daniel 3894: SKIP(3);
1.29 daniel 3895: } else {
1.55 daniel 3896: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3897: ctxt->sax->error(ctxt,
3898: "standalone accepts only 'yes' or 'no'\n");
3899: ctxt->wellFormed = 0;
1.29 daniel 3900: }
1.55 daniel 3901: if (CUR != '"') {
3902: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3903: ctxt->sax->error(ctxt, "String not closed\n");
1.59 daniel 3904: ctxt->wellFormed = 0;
1.55 daniel 3905: } else
1.40 daniel 3906: NEXT;
1.37 daniel 3907: } else {
1.55 daniel 3908: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3909: ctxt->sax->error(ctxt, "Standalone value not found\n");
1.59 daniel 3910: ctxt->wellFormed = 0;
1.37 daniel 3911: }
1.29 daniel 3912: }
3913: return(standalone);
3914: }
3915:
1.50 daniel 3916: /**
3917: * xmlParseXMLDecl:
3918: * @ctxt: an XML parser context
3919: *
3920: * parse an XML declaration header
1.29 daniel 3921: *
3922: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 3923: */
3924:
1.55 daniel 3925: void
3926: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 3927: CHAR *version;
3928:
3929: /*
1.19 daniel 3930: * We know that '<?xml' is here.
1.1 veillard 3931: */
1.40 daniel 3932: SKIP(5);
1.1 veillard 3933:
1.59 daniel 3934: if (!IS_BLANK(CUR)) {
3935: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3936: ctxt->sax->error(ctxt, "Blank needed after '<?xml'\n");
3937: ctxt->wellFormed = 0;
3938: }
1.42 daniel 3939: SKIP_BLANKS;
1.1 veillard 3940:
3941: /*
1.29 daniel 3942: * We should have the VersionInfo here.
1.1 veillard 3943: */
1.29 daniel 3944: version = xmlParseVersionInfo(ctxt);
3945: if (version == NULL)
1.45 daniel 3946: version = xmlCharStrdup(XML_DEFAULT_VERSION);
3947: ctxt->doc = xmlNewDoc(version);
3948: free(version);
1.29 daniel 3949:
3950: /*
3951: * We may have the encoding declaration
3952: */
1.59 daniel 3953: if (!IS_BLANK(CUR)) {
3954: if ((CUR == '?') && (NXT(1) == '>')) {
3955: SKIP(2);
3956: return;
3957: }
3958: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3959: ctxt->sax->error(ctxt, "Blank needed here\n");
3960: ctxt->wellFormed = 0;
3961: }
1.32 daniel 3962: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 3963:
3964: /*
1.29 daniel 3965: * We may have the standalone status.
1.1 veillard 3966: */
1.59 daniel 3967: if ((ctxt->doc->encoding != NULL) && (!IS_BLANK(CUR))) {
3968: if ((CUR == '?') && (NXT(1) == '>')) {
3969: SKIP(2);
3970: return;
3971: }
3972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3973: ctxt->sax->error(ctxt, "Blank needed here\n");
3974: ctxt->wellFormed = 0;
3975: }
3976: SKIP_BLANKS;
1.32 daniel 3977: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 3978:
1.42 daniel 3979: SKIP_BLANKS;
1.40 daniel 3980: if ((CUR == '?') && (NXT(1) == '>')) {
3981: SKIP(2);
3982: } else if (CUR == '>') {
1.31 daniel 3983: /* Deprecated old WD ... */
1.55 daniel 3984: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3985: ctxt->sax->error(ctxt, "XML declaration must end-up with '?>'\n");
1.59 daniel 3986: ctxt->wellFormed = 0;
1.40 daniel 3987: NEXT;
1.29 daniel 3988: } else {
1.55 daniel 3989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3990: ctxt->sax->error(ctxt, "parsing XML declaration: '?>' expected\n");
1.59 daniel 3991: ctxt->wellFormed = 0;
1.40 daniel 3992: MOVETO_ENDTAG(CUR_PTR);
3993: NEXT;
1.29 daniel 3994: }
1.1 veillard 3995: }
3996:
1.50 daniel 3997: /**
3998: * xmlParseMisc:
3999: * @ctxt: an XML parser context
4000: *
4001: * parse an XML Misc* optionnal field.
1.21 daniel 4002: *
1.22 daniel 4003: * [27] Misc ::= Comment | PI | S
1.1 veillard 4004: */
4005:
1.55 daniel 4006: void
4007: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 4008: while (((CUR == '<') && (NXT(1) == '?')) ||
4009: ((CUR == '<') && (NXT(1) == '!') &&
4010: (NXT(2) == '-') && (NXT(3) == '-')) ||
4011: IS_BLANK(CUR)) {
4012: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 4013: xmlParsePI(ctxt);
1.40 daniel 4014: } else if (IS_BLANK(CUR)) {
4015: NEXT;
1.1 veillard 4016: } else
1.31 daniel 4017: xmlParseComment(ctxt, 0);
1.1 veillard 4018: }
4019: }
4020:
1.50 daniel 4021: /**
4022: * xmlParseDocument :
4023: * @ctxt: an XML parser context
4024: *
4025: * parse an XML document (and build a tree if using the standard SAX
4026: * interface).
1.21 daniel 4027: *
1.22 daniel 4028: * [1] document ::= prolog element Misc*
1.29 daniel 4029: *
4030: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 4031: *
4032: * return values: 0, -1 in case of error. the parser context is augmented
4033: * as a result of the parsing.
1.1 veillard 4034: */
4035:
1.55 daniel 4036: int
4037: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 4038: xmlDefaultSAXHandlerInit();
4039:
1.14 veillard 4040: /*
1.44 daniel 4041: * SAX: beginning of the document processing.
4042: */
4043: if (ctxt->sax)
4044: ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
4045: if (ctxt->sax)
4046: ctxt->sax->startDocument(ctxt);
4047:
4048: /*
1.14 veillard 4049: * We should check for encoding here and plug-in some
4050: * conversion code TODO !!!!
4051: */
1.1 veillard 4052:
4053: /*
4054: * Wipe out everything which is before the first '<'
4055: */
1.59 daniel 4056: if (IS_BLANK(CUR)) {
4057: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4058: ctxt->sax->error(ctxt,
4059: "Extra spaces at the beginning of the document are not allowed\n");
4060: ctxt->wellFormed = 0;
4061: SKIP_BLANKS;
4062: }
4063:
4064: if (CUR == 0) {
4065: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4066: ctxt->sax->error(ctxt, "Document is empty\n");
4067: ctxt->wellFormed = 0;
4068: }
1.1 veillard 4069:
4070: /*
4071: * Check for the XMLDecl in the Prolog.
4072: */
1.40 daniel 4073: if ((CUR == '<') && (NXT(1) == '?') &&
4074: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4075: (NXT(4) == 'l')) {
1.19 daniel 4076: xmlParseXMLDecl(ctxt);
4077: /* SKIP_EOL(cur); */
1.42 daniel 4078: SKIP_BLANKS;
1.40 daniel 4079: } else if ((CUR == '<') && (NXT(1) == '?') &&
4080: (NXT(2) == 'X') && (NXT(3) == 'M') &&
4081: (NXT(4) == 'L')) {
1.19 daniel 4082: /*
4083: * The first drafts were using <?XML and the final W3C REC
4084: * now use <?xml ...
4085: */
1.16 daniel 4086: xmlParseXMLDecl(ctxt);
1.1 veillard 4087: /* SKIP_EOL(cur); */
1.42 daniel 4088: SKIP_BLANKS;
1.1 veillard 4089: } else {
1.45 daniel 4090: CHAR *version;
4091:
4092: version = xmlCharStrdup(XML_DEFAULT_VERSION);
4093: ctxt->doc = xmlNewDoc(version);
4094: free(version);
1.1 veillard 4095: }
4096:
4097: /*
4098: * The Misc part of the Prolog
4099: */
1.16 daniel 4100: xmlParseMisc(ctxt);
1.1 veillard 4101:
4102: /*
1.29 daniel 4103: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 4104: * (doctypedecl Misc*)?
4105: */
1.40 daniel 4106: if ((CUR == '<') && (NXT(1) == '!') &&
4107: (NXT(2) == 'D') && (NXT(3) == 'O') &&
4108: (NXT(4) == 'C') && (NXT(5) == 'T') &&
4109: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
4110: (NXT(8) == 'E')) {
1.22 daniel 4111: xmlParseDocTypeDecl(ctxt);
4112: xmlParseMisc(ctxt);
1.21 daniel 4113: }
4114:
4115: /*
4116: * Time to start parsing the tree itself
1.1 veillard 4117: */
1.45 daniel 4118: ctxt->doc->root = xmlParseElement(ctxt);
1.33 daniel 4119:
4120: /*
4121: * The Misc part at the end
4122: */
4123: xmlParseMisc(ctxt);
1.16 daniel 4124:
1.59 daniel 4125: if (CUR != 0) {
4126: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4127: ctxt->sax->error(ctxt,
4128: "Extra content at the end of the document\n");
4129: ctxt->wellFormed = 0;
4130: }
4131:
1.44 daniel 4132: /*
4133: * SAX: end of the document processing.
4134: */
4135: if (ctxt->sax)
4136: ctxt->sax->endDocument(ctxt);
1.59 daniel 4137: if (! ctxt->wellFormed) return(-1);
1.16 daniel 4138: return(0);
4139: }
4140:
1.50 daniel 4141: /**
1.55 daniel 4142: * xmlSAXParseDoc :
4143: * @sax: the SAX handler block
1.50 daniel 4144: * @cur: a pointer to an array of CHAR
1.59 daniel 4145: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4146: * documents
1.50 daniel 4147: *
4148: * parse an XML in-memory document and build a tree.
1.55 daniel 4149: * It use the given SAX function block to handle the parsing callback.
4150: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 4151: *
4152: * return values: the resulting document tree
1.16 daniel 4153: */
4154:
1.59 daniel 4155: xmlDocPtr xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
1.16 daniel 4156: xmlDocPtr ret;
4157: xmlParserCtxtPtr ctxt;
1.40 daniel 4158: xmlParserInputPtr input;
1.16 daniel 4159:
4160: if (cur == NULL) return(NULL);
1.1 veillard 4161:
1.16 daniel 4162: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4163: if (ctxt == NULL) {
4164: perror("malloc");
4165: return(NULL);
4166: }
1.40 daniel 4167: xmlInitParserCtxt(ctxt);
1.56 daniel 4168: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 4169: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4170: if (input == NULL) {
4171: perror("malloc");
4172: free(ctxt);
4173: return(NULL);
4174: }
4175:
4176: input->filename = NULL;
4177: input->line = 1;
4178: input->col = 1;
4179: input->base = cur;
4180: input->cur = cur;
4181:
4182: inputPush(ctxt, input);
1.16 daniel 4183:
4184:
4185: xmlParseDocument(ctxt);
1.59 daniel 4186: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4187: else {
4188: ret = NULL;
4189: xmlFreeDoc(ctxt->doc);
4190: ctxt->doc = NULL;
4191: }
1.50 daniel 4192: free(ctxt->nodeTab);
4193: free(ctxt->inputTab);
4194: if (input->filename != NULL)
1.51 daniel 4195: free((char *)input->filename);
1.50 daniel 4196: free(input);
1.16 daniel 4197: free(ctxt);
4198:
1.1 veillard 4199: return(ret);
4200: }
4201:
1.50 daniel 4202: /**
1.55 daniel 4203: * xmlParseDoc :
4204: * @cur: a pointer to an array of CHAR
4205: *
4206: * parse an XML in-memory document and build a tree.
4207: *
4208: * return values: the resulting document tree
4209: */
4210:
4211: xmlDocPtr xmlParseDoc(CHAR *cur) {
1.59 daniel 4212: return(xmlSAXParseDoc(NULL, cur, 0));
4213: }
4214:
4215: /**
4216: * xmlRecoverDoc :
4217: * @cur: a pointer to an array of CHAR
4218: *
4219: * parse an XML in-memory document and build a tree.
4220: * In the case the document is not Well Formed, a tree is built anyway
4221: *
4222: * return values: the resulting document tree
4223: */
4224:
4225: xmlDocPtr xmlRecoverDoc(CHAR *cur) {
4226: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 4227: }
4228:
4229: /**
4230: * xmlSAXParseFile :
4231: * @sax: the SAX handler block
1.50 daniel 4232: * @filename: the filename
1.59 daniel 4233: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4234: * documents
1.50 daniel 4235: *
4236: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4237: * compressed document is provided by default if found at compile-time.
1.55 daniel 4238: * It use the given SAX function block to handle the parsing callback.
4239: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 4240: *
4241: * return values: the resulting document tree
1.9 httpng 4242: */
4243:
1.59 daniel 4244: xmlDocPtr xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
4245: int recovery) {
1.9 httpng 4246: xmlDocPtr ret;
1.20 daniel 4247: #ifdef HAVE_ZLIB_H
4248: gzFile input;
4249: #else
1.9 httpng 4250: int input;
1.20 daniel 4251: #endif
1.9 httpng 4252: int res;
1.55 daniel 4253: int len;
1.9 httpng 4254: struct stat buf;
4255: char *buffer;
1.16 daniel 4256: xmlParserCtxtPtr ctxt;
1.40 daniel 4257: xmlParserInputPtr inputStream;
1.9 httpng 4258:
1.11 veillard 4259: res = stat(filename, &buf);
1.9 httpng 4260: if (res < 0) return(NULL);
4261:
1.20 daniel 4262: #ifdef HAVE_ZLIB_H
1.55 daniel 4263: len = (buf.st_size * 8) + 1000;
1.20 daniel 4264: retry_bigger:
1.55 daniel 4265: buffer = malloc(len);
1.20 daniel 4266: #else
1.55 daniel 4267: len = buf.st_size + 100;
4268: buffer = malloc(len);
1.20 daniel 4269: #endif
1.9 httpng 4270: if (buffer == NULL) {
4271: perror("malloc");
4272: return(NULL);
4273: }
4274:
1.55 daniel 4275: memset(buffer, 0, len);
1.20 daniel 4276: #ifdef HAVE_ZLIB_H
4277: input = gzopen (filename, "r");
4278: if (input == NULL) {
4279: fprintf (stderr, "Cannot read file %s :\n", filename);
4280: perror ("gzopen failed");
4281: return(NULL);
4282: }
4283: #else
1.9 httpng 4284: input = open (filename, O_RDONLY);
4285: if (input < 0) {
4286: fprintf (stderr, "Cannot read file %s :\n", filename);
4287: perror ("open failed");
4288: return(NULL);
4289: }
1.20 daniel 4290: #endif
4291: #ifdef HAVE_ZLIB_H
1.55 daniel 4292: res = gzread(input, buffer, len);
1.20 daniel 4293: #else
1.9 httpng 4294: res = read(input, buffer, buf.st_size);
1.20 daniel 4295: #endif
1.9 httpng 4296: if (res < 0) {
4297: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 4298: #ifdef HAVE_ZLIB_H
4299: perror ("gzread failed");
4300: #else
1.9 httpng 4301: perror ("read failed");
1.20 daniel 4302: #endif
1.9 httpng 4303: return(NULL);
4304: }
1.20 daniel 4305: #ifdef HAVE_ZLIB_H
4306: gzclose(input);
1.55 daniel 4307: if (res >= len) {
1.20 daniel 4308: free(buffer);
1.55 daniel 4309: len *= 2;
1.20 daniel 4310: goto retry_bigger;
4311: }
4312: buf.st_size = res;
4313: #else
1.9 httpng 4314: close(input);
1.20 daniel 4315: #endif
4316:
1.40 daniel 4317: buffer[buf.st_size] = '\0';
1.9 httpng 4318:
1.16 daniel 4319: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4320: if (ctxt == NULL) {
4321: perror("malloc");
4322: return(NULL);
4323: }
1.40 daniel 4324: xmlInitParserCtxt(ctxt);
1.56 daniel 4325: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 4326: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4327: if (inputStream == NULL) {
4328: perror("malloc");
4329: free(ctxt);
4330: return(NULL);
4331: }
4332:
4333: inputStream->filename = strdup(filename);
4334: inputStream->line = 1;
4335: inputStream->col = 1;
1.45 daniel 4336:
4337: /*
4338: * TODO : plug some encoding conversion routines here. !!!
4339: */
1.40 daniel 4340: inputStream->base = buffer;
4341: inputStream->cur = buffer;
1.16 daniel 4342:
1.40 daniel 4343: inputPush(ctxt, inputStream);
1.16 daniel 4344:
4345: xmlParseDocument(ctxt);
1.40 daniel 4346:
1.59 daniel 4347: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4348: else {
4349: ret = NULL;
4350: xmlFreeDoc(ctxt->doc);
4351: ctxt->doc = NULL;
4352: }
1.9 httpng 4353: free(buffer);
1.50 daniel 4354: free(ctxt->nodeTab);
4355: free(ctxt->inputTab);
4356: if (inputStream->filename != NULL)
1.51 daniel 4357: free((char *)inputStream->filename);
1.50 daniel 4358: free(inputStream);
1.20 daniel 4359: free(ctxt);
4360:
4361: return(ret);
4362: }
4363:
1.55 daniel 4364: /**
4365: * xmlParseFile :
4366: * @filename: the filename
4367: *
4368: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4369: * compressed document is provided by default if found at compile-time.
4370: *
4371: * return values: the resulting document tree
4372: */
4373:
4374: xmlDocPtr xmlParseFile(const char *filename) {
1.59 daniel 4375: return(xmlSAXParseFile(NULL, filename, 0));
4376: }
4377:
4378: /**
4379: * xmlRecoverFile :
4380: * @filename: the filename
4381: *
4382: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4383: * compressed document is provided by default if found at compile-time.
4384: * In the case the document is not Well Formed, a tree is built anyway
4385: *
4386: * return values: the resulting document tree
4387: */
4388:
4389: xmlDocPtr xmlRecoverFile(const char *filename) {
4390: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 4391: }
1.32 daniel 4392:
1.50 daniel 4393: /**
1.55 daniel 4394: * xmlSAXParseMemory :
4395: * @sax: the SAX handler block
1.50 daniel 4396: * @cur: an pointer to a char array
4397: * @size: the siwe of the array
1.59 daniel 4398: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4399: * documents
1.50 daniel 4400: *
1.55 daniel 4401: * parse an XML in-memory block and use the given SAX function block
4402: * to handle the parsing callback. If sax is NULL, fallback to the default
4403: * DOM tree building routines.
1.50 daniel 4404: *
4405: * TODO : plug some encoding conversion routines here. !!!
4406: *
4407: * return values: the resulting document tree
1.20 daniel 4408: */
1.50 daniel 4409:
1.59 daniel 4410: xmlDocPtr xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size,
4411: int recovery) {
1.20 daniel 4412: xmlDocPtr ret;
4413: xmlParserCtxtPtr ctxt;
1.40 daniel 4414: xmlParserInputPtr input;
4415:
4416: buffer[size - 1] = '\0';
4417:
1.20 daniel 4418: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4419: if (ctxt == NULL) {
4420: perror("malloc");
4421: return(NULL);
4422: }
1.40 daniel 4423: xmlInitParserCtxt(ctxt);
1.56 daniel 4424: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 4425: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4426: if (input == NULL) {
4427: perror("malloc");
1.50 daniel 4428: free(ctxt->nodeTab);
4429: free(ctxt->inputTab);
1.40 daniel 4430: free(ctxt);
4431: return(NULL);
4432: }
1.20 daniel 4433:
1.40 daniel 4434: input->filename = NULL;
4435: input->line = 1;
4436: input->col = 1;
1.45 daniel 4437:
4438: /*
4439: * TODO : plug some encoding conversion routines here. !!!
4440: */
1.40 daniel 4441: input->base = buffer;
4442: input->cur = buffer;
1.20 daniel 4443:
1.40 daniel 4444: inputPush(ctxt, input);
1.20 daniel 4445:
4446: xmlParseDocument(ctxt);
1.40 daniel 4447:
1.59 daniel 4448: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4449: else {
4450: ret = NULL;
4451: xmlFreeDoc(ctxt->doc);
4452: ctxt->doc = NULL;
4453: }
1.50 daniel 4454: free(ctxt->nodeTab);
4455: free(ctxt->inputTab);
4456: if (input->filename != NULL)
1.51 daniel 4457: free((char *)input->filename);
1.50 daniel 4458: free(input);
1.16 daniel 4459: free(ctxt);
4460:
1.9 httpng 4461: return(ret);
1.17 daniel 4462: }
4463:
1.55 daniel 4464: /**
4465: * xmlParseMemory :
4466: * @cur: an pointer to a char array
4467: * @size: the size of the array
4468: *
4469: * parse an XML in-memory block and build a tree.
4470: *
4471: * return values: the resulting document tree
4472: */
4473:
4474: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 4475: return(xmlSAXParseMemory(NULL, buffer, size, 0));
4476: }
4477:
4478: /**
4479: * xmlRecoverMemory :
4480: * @cur: an pointer to a char array
4481: * @size: the size of the array
4482: *
4483: * parse an XML in-memory block and build a tree.
4484: * In the case the document is not Well Formed, a tree is built anyway
4485: *
4486: * return values: the resulting document tree
4487: */
4488:
4489: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
4490: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.55 daniel 4491: }
1.17 daniel 4492:
1.50 daniel 4493: /**
4494: * xmlInitParserCtxt:
4495: * @ctxt: an XML parser context
4496: *
4497: * Initialize a parser context
4498: */
4499:
1.55 daniel 4500: void
4501: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 4502: {
1.40 daniel 4503: /* Allocate the Input stack */
4504: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
4505: ctxt->inputNr = 0;
4506: ctxt->inputMax = 5;
4507: ctxt->input = NULL;
4508:
1.43 daniel 4509: /* Allocate the Node stack */
4510: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
4511: ctxt->nodeNr = 0;
4512: ctxt->nodeMax = 10;
4513: ctxt->node = NULL;
4514:
1.45 daniel 4515: ctxt->sax = &xmlDefaultSAXHandler;
1.32 daniel 4516: ctxt->doc = NULL;
1.59 daniel 4517: ctxt->wellFormed = 1;
1.32 daniel 4518: ctxt->record_info = 0;
4519: xmlInitNodeInfoSeq(&ctxt->node_seq);
1.17 daniel 4520: }
4521:
1.50 daniel 4522: /**
4523: * xmlClearParserCtxt:
4524: * @ctxt: an XML parser context
4525: *
4526: * Clear (release owned resources) and reinitialize a parser context
4527: */
1.17 daniel 4528:
1.55 daniel 4529: void
4530: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 4531: {
1.32 daniel 4532: xmlClearNodeInfoSeq(&ctxt->node_seq);
4533: xmlInitParserCtxt(ctxt);
1.17 daniel 4534: }
4535:
4536:
1.50 daniel 4537: /**
4538: * xmlSetupParserForBuffer:
4539: * @ctxt: an XML parser context
4540: * @buffer: a CHAR * buffer
4541: * @filename: a file name
4542: *
1.19 daniel 4543: * Setup the parser context to parse a new buffer; Clears any prior
4544: * contents from the parser context. The buffer parameter must not be
4545: * NULL, but the filename parameter can be
4546: */
1.50 daniel 4547:
1.55 daniel 4548: void
4549: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 4550: const char* filename)
4551: {
1.40 daniel 4552: xmlParserInputPtr input;
4553:
4554: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4555: if (input == NULL) {
4556: perror("malloc");
4557: free(ctxt);
4558: exit(1);
4559: }
4560:
1.17 daniel 4561: xmlClearParserCtxt(ctxt);
1.40 daniel 4562: if (input->filename != NULL)
4563: input->filename = strdup(filename);
4564: else
4565: input->filename = NULL;
4566: input->line = 1;
4567: input->col = 1;
4568: input->base = buffer;
4569: input->cur = buffer;
4570:
4571: inputPush(ctxt, input);
1.17 daniel 4572: }
4573:
1.32 daniel 4574:
1.50 daniel 4575: /**
4576: * xmlParserFindNodeInfo:
4577: * @ctxt: an XML parser context
4578: * @node: an XML node within the tree
4579: *
4580: * Find the parser node info struct for a given node
4581: *
4582: * return values: an xmlParserNodeInfo block pointer or NULL
1.32 daniel 4583: */
4584: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
4585: const xmlNode* node)
4586: {
4587: unsigned long pos;
4588:
4589: /* Find position where node should be at */
4590: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
4591: if ( ctx->node_seq.buffer[pos].node == node )
4592: return &ctx->node_seq.buffer[pos];
4593: else
4594: return NULL;
4595: }
4596:
4597:
1.50 daniel 4598: /**
4599: * xmlInitNodeInfoSeq :
4600: * @seq: a node info sequence pointer
4601: *
4602: * -- Initialize (set to initial state) node info sequence
1.32 daniel 4603: */
1.55 daniel 4604: void
4605: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 4606: {
4607: seq->length = 0;
4608: seq->maximum = 0;
4609: seq->buffer = NULL;
4610: }
4611:
1.50 daniel 4612: /**
4613: * xmlClearNodeInfoSeq :
4614: * @seq: a node info sequence pointer
4615: *
4616: * -- Clear (release memory and reinitialize) node
1.32 daniel 4617: * info sequence
4618: */
1.55 daniel 4619: void
4620: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 4621: {
4622: if ( seq->buffer != NULL )
4623: free(seq->buffer);
4624: xmlInitNodeInfoSeq(seq);
4625: }
4626:
4627:
1.50 daniel 4628: /**
4629: * xmlParserFindNodeInfoIndex:
4630: * @seq: a node info sequence pointer
4631: * @node: an XML node pointer
4632: *
4633: *
1.32 daniel 4634: * xmlParserFindNodeInfoIndex : Find the index that the info record for
4635: * the given node is or should be at in a sorted sequence
1.50 daniel 4636: * return values: a long indicating the position of the record
1.32 daniel 4637: */
4638: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
4639: const xmlNode* node)
4640: {
4641: unsigned long upper, lower, middle;
4642: int found = 0;
4643:
4644: /* Do a binary search for the key */
4645: lower = 1;
4646: upper = seq->length;
4647: middle = 0;
4648: while ( lower <= upper && !found) {
4649: middle = lower + (upper - lower) / 2;
4650: if ( node == seq->buffer[middle - 1].node )
4651: found = 1;
4652: else if ( node < seq->buffer[middle - 1].node )
4653: upper = middle - 1;
4654: else
4655: lower = middle + 1;
4656: }
4657:
4658: /* Return position */
4659: if ( middle == 0 || seq->buffer[middle - 1].node < node )
4660: return middle;
4661: else
4662: return middle - 1;
4663: }
4664:
4665:
1.50 daniel 4666: /**
4667: * xmlParserAddNodeInfo:
4668: * @ctxt: an XML parser context
4669: * @seq: a node info sequence pointer
4670: *
4671: * Insert node info record into the sorted sequence
1.32 daniel 4672: */
1.55 daniel 4673: void
4674: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.32 daniel 4675: const xmlParserNodeInfo* info)
4676: {
4677: unsigned long pos;
4678: static unsigned int block_size = 5;
4679:
4680: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 4681: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
4682: if ( pos < ctxt->node_seq.length
4683: && ctxt->node_seq.buffer[pos].node == info->node ) {
4684: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 4685: }
4686:
4687: /* Otherwise, we need to add new node to buffer */
4688: else {
4689: /* Expand buffer by 5 if needed */
1.55 daniel 4690: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 4691: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 4692: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
4693: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 4694:
1.55 daniel 4695: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 4696: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
4697: else
1.55 daniel 4698: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 4699:
4700: if ( tmp_buffer == NULL ) {
1.55 daniel 4701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 4702: ctxt->sax->error(ctxt, "Out of memory\n");
1.32 daniel 4703: return;
4704: }
1.55 daniel 4705: ctxt->node_seq.buffer = tmp_buffer;
4706: ctxt->node_seq.maximum += block_size;
1.32 daniel 4707: }
4708:
4709: /* If position is not at end, move elements out of the way */
1.55 daniel 4710: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 4711: unsigned long i;
4712:
1.55 daniel 4713: for ( i = ctxt->node_seq.length; i > pos; i-- )
4714: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 4715: }
4716:
4717: /* Copy element and increase length */
1.55 daniel 4718: ctxt->node_seq.buffer[pos] = *info;
4719: ctxt->node_seq.length++;
1.32 daniel 4720: }
4721: }
Webmaster