Annotation of XML/parser.c, revision 1.41
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.41 ! daniel 6: * $Id: parser.c,v 1.40 1998/08/07 07:33:14 daniel Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.1 veillard 18: #include <malloc.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
34: /*
1.40 daniel 35: * Generic function for accessing stacks in the Parser Context
1.1 veillard 36: */
37:
1.31 daniel 38: #define PUSH_AND_POP(type, name) \
1.40 daniel 39: int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 40: if (ctxt->name##Nr >= ctxt->name##Max) { \
41: ctxt->name##Max *= 2; \
1.40 daniel 42: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
43: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
44: if (ctxt->name##Tab == NULL) { \
1.31 daniel 45: fprintf(stderr, "realloc failed !\n"); \
46: exit(1); \
47: } \
48: } \
1.40 daniel 49: ctxt->name##Tab[ctxt->name##Nr] = value; \
50: ctxt->name = value; \
51: return(ctxt->name##Nr++); \
1.31 daniel 52: } \
1.40 daniel 53: type name##Pop(xmlParserCtxtPtr ctxt) { \
54: if (ctxt->name##Nr <= 0) return(0); \
55: ctxt->name##Nr--; \
56: ctxt->name = ctxt->name##Tab[ctxt->name##Nr]; \
57: return(ctxt->name); \
1.31 daniel 58: } \
59:
1.40 daniel 60: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 ! daniel 61: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 62:
63: #define CUR (*(ctxt->input->cur) ? *(ctxt->input->cur) : xmlPopInput(ctxt))
64: #define CUR_PTR ctxt->input->cur
65: #define NXT(val) ctxt->input->cur[(val)]
66: #define NEXT ctxt->input->cur++
67: #define SKIP(val) ctxt->input->cur += (val)
68:
69: /*
70: * xmlPopInput: the current input pointed by ctxt->input came to an end
71: * pop it and return the next char.
72: */
73: CHAR xmlPopInput(xmlParserCtxtPtr ctxt) {
74: if (ctxt->inputNr == 1) return(0); /* End of main Input */
75: inputPop(ctxt);
76: return(CUR);
77: }
78:
79: /*
80: * xmlPushInput: switch to a new input stream which is stacked on top
81: * of the previous one(s).
82: */
83: void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
84: if (input == NULL) return;
85: inputPush(ctxt, input);
86: }
87:
88: /*
89: * A few macros needed to help building the parser.
90: */
91:
1.1 veillard 92: #ifdef UNICODE
1.30 daniel 93: /************************************************************************
94: * *
95: * UNICODE version of the macros. *
96: * *
97: ************************************************************************/
1.1 veillard 98: /*
1.22 daniel 99: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
100: * | [#x10000-#x10FFFF]
101: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 102: */
103: #define IS_CHAR(c) \
104: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
105: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
106:
1.22 daniel 107: /*
108: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
109: */
1.1 veillard 110: #define SKIP_BLANKS(p) \
111: while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \
1.22 daniel 112: (*(p) == 0xd) || (*(p) == 0x3000)) (p)++;
1.1 veillard 113:
1.22 daniel 114: /*
1.30 daniel 115: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 116: *
1.30 daniel 117: * VI is your friend !
118: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
119: * and
120: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 121: */
1.1 veillard 122: #define IS_BASECHAR(c) \
1.30 daniel 123: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
124: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
125: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
126: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
127: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
128: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
129: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
130: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
131: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
132: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
133: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
134: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
135: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
136: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
137: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
138: ((c) == 0x0386) || \
139: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
140: ((c) == 0x038C) || \
141: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
142: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
143: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
144: ((c) == 0x03DA) || \
145: ((c) == 0x03DC) || \
146: ((c) == 0x03DE) || \
147: ((c) == 0x03E0) || \
148: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
149: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
150: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
151: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
152: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
153: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
154: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
155: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
156: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
157: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
158: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
159: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
160: ((c) == 0x0559) || \
161: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
162: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
163: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
164: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
165: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
166: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
167: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
168: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
169: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
170: ((c) == 0x06D5) || \
171: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
172: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
173: ((c) == 0x093D) || \
174: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
175: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
176: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
177: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
178: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
179: ((c) == 0x09B2) || \
180: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
181: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
182: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
183: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
184: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
185: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
186: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
187: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
188: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
189: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
190: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
191: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
192: ((c) == 0x0A5E) || \
193: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
194: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
195: ((c) == 0x0A8D) || \
196: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
197: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
198: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
199: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
200: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
201: ((c) == 0x0ABD) || \
202: ((c) == 0x0AE0) || \
203: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
204: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
205: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
206: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
207: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
208: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
209: ((c) == 0x0B3D) || \
210: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
211: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
212: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
213: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
214: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
215: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
216: ((c) == 0x0B9C) || \
217: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
218: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
219: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
220: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
221: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
222: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
223: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
224: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
225: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
226: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
227: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
228: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
229: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
230: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
231: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
232: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
233: ((c) == 0x0CDE) || \
234: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
235: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
236: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
237: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
238: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
239: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
240: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
241: ((c) == 0x0E30) || \
242: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
243: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
244: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
245: ((c) == 0x0E84) || \
246: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
247: ((c) == 0x0E8A) || \
248: ((c) == 0x0E8D) || \
249: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
250: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
251: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
252: ((c) == 0x0EA5) || \
253: ((c) == 0x0EA7) || \
254: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
255: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
256: ((c) == 0x0EB0) || \
257: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
258: ((c) == 0x0EBD) || \
259: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
260: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
261: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
262: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
263: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
264: ((c) == 0x1100) || \
265: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
266: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
267: ((c) == 0x1109) || \
268: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
269: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
270: ((c) == 0x113C) || \
271: ((c) == 0x113E) || \
272: ((c) == 0x1140) || \
273: ((c) == 0x114C) || \
274: ((c) == 0x114E) || \
275: ((c) == 0x1150) || \
276: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
277: ((c) == 0x1159) || \
278: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
279: ((c) == 0x1163) || \
280: ((c) == 0x1165) || \
281: ((c) == 0x1167) || \
282: ((c) == 0x1169) || \
283: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
284: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
285: ((c) == 0x1175) || \
286: ((c) == 0x119E) || \
287: ((c) == 0x11A8) || \
288: ((c) == 0x11AB) || \
289: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
290: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
291: ((c) == 0x11BA) || \
292: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
293: ((c) == 0x11EB) || \
294: ((c) == 0x11F0) || \
295: ((c) == 0x11F9) || \
296: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
297: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
298: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
299: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
300: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
301: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
302: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
303: ((c) == 0x1F59) || \
304: ((c) == 0x1F5B) || \
305: ((c) == 0x1F5D) || \
306: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
307: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
308: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
309: ((c) == 0x1FBE) || \
310: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
311: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
312: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
313: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
314: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
315: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
316: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
317: ((c) == 0x2126) || \
318: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
319: ((c) == 0x212E) || \
320: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
321: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
322: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
323: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
324: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 325:
1.22 daniel 326: /*
327: * [88] Digit ::= ... long list see REC ...
328: */
1.30 daniel 329: #define IS_DIGIT(c) \
330: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
331: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
332: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
333: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
334: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
335: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
336: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
337: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
338: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
339: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
340: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
341: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
342: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
343: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
344: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 345:
1.22 daniel 346: /*
347: * [87] CombiningChar ::= ... long list see REC ...
348: */
1.30 daniel 349: #define IS_COMBINING(c) \
350: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
351: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
352: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
353: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
354: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
355: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
356: ((c) == 0x05BF) || \
357: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
358: ((c) == 0x05C4) || \
359: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
360: ((c) == 0x0670) || \
361: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
362: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
363: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
364: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
365: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
366: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
367: ((c) == 0x093C) || \
368: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
369: ((c) == 0x094D) || \
370: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
371: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
372: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
373: ((c) == 0x09BC) || \
374: ((c) == 0x09BE) || \
375: ((c) == 0x09BF) || \
376: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
377: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
378: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
379: ((c) == 0x09D7) || \
380: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
381: ((c) == 0x0A02) || \
382: ((c) == 0x0A3C) || \
383: ((c) == 0x0A3E) || \
384: ((c) == 0x0A3F) || \
385: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
386: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
387: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
388: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
389: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
390: ((c) == 0x0ABC) || \
391: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
392: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
393: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
394: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
395: ((c) == 0x0B3C) || \
396: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
397: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
398: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
399: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
400: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
401: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
402: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
403: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
404: ((c) == 0x0BD7) || \
405: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
406: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
407: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
408: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
409: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
410: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
411: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
412: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
413: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
414: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
415: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
416: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
417: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
418: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
419: ((c) == 0x0D57) || \
420: ((c) == 0x0E31) || \
421: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
422: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
423: ((c) == 0x0EB1) || \
424: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
425: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
426: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
427: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
428: ((c) == 0x0F35) || \
429: ((c) == 0x0F37) || \
430: ((c) == 0x0F39) || \
431: ((c) == 0x0F3E) || \
432: ((c) == 0x0F3F) || \
433: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
434: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
435: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
436: ((c) == 0x0F97) || \
437: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
438: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
439: ((c) == 0x0FB9) || \
440: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
441: ((c) == 0x20E1) || \
442: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
443: ((c) == 0x3099) || \
444: ((c) == 0x309A))
1.3 veillard 445:
1.22 daniel 446: /*
447: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
448: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
449: * [#x309D-#x309E] | [#x30FC-#x30FE]
450: */
1.3 veillard 451: #define IS_EXTENDER(c) \
452: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
453: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
454: ((c) == 0xec6) || ((c) == 0x3005) \
455: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
456: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 457: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 458:
1.22 daniel 459: /*
460: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
461: */
1.1 veillard 462: #define IS_IDEOGRAPHIC(c) \
463: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
464: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
465: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
466: ((c) == 0x3007))
467:
1.22 daniel 468: /*
469: * [84] Letter ::= BaseChar | Ideographic
470: */
1.1 veillard 471: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
472:
473: #else
1.30 daniel 474: /************************************************************************
475: * *
476: * 8bits / ASCII version of the macros. *
477: * *
478: ************************************************************************/
1.1 veillard 479: /*
1.22 daniel 480: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
481: * | [#x10000-#x10FFFF]
482: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 483: */
484: #define IS_CHAR(c) \
1.21 daniel 485: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
486: ((c) == 0xa))
1.1 veillard 487:
1.22 daniel 488: /*
489: * [85] BaseChar ::= ... long list see REC ...
490: */
1.1 veillard 491: #define IS_BASECHAR(c) \
492: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
493: (((c) >= 0x61) && ((c) <= 0x7a)) || \
494: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
495: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
496: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
497: (((c) >= 0xf8) && ((c) <= 0xff)) || \
498: ((c) == 0xba))
499:
1.22 daniel 500: /*
501: * [88] Digit ::= ... long list see REC ...
502: */
1.1 veillard 503: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
504:
1.22 daniel 505: /*
506: * [84] Letter ::= BaseChar | Ideographic
507: */
1.1 veillard 508: #define IS_LETTER(c) IS_BASECHAR(c)
509:
1.22 daniel 510:
511: /*
512: * [87] CombiningChar ::= ... long list see REC ...
513: */
1.1 veillard 514: #define IS_COMBINING(c) 0
515:
1.22 daniel 516: /*
517: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
518: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
519: * [#x309D-#x309E] | [#x30FC-#x30FE]
520: */
1.3 veillard 521: #define IS_EXTENDER(c) ((c) == 0xb7)
522:
1.21 daniel 523: #endif /* !UNICODE */
1.1 veillard 524:
1.22 daniel 525: /*
526: * Blank chars.
527: *
528: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
529: */
530: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
531: ((c) == 0x0D))
532:
533: /*
534: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
535: */
1.21 daniel 536: #define IS_PUBIDCHAR(c) \
537: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
538: (((c) >= 'a') && ((c) <= 'z')) || \
539: (((c) >= 'A') && ((c) <= 'Z')) || \
540: (((c) >= '0') && ((c) <= '9')) || \
541: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
542: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
543: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
544: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
545: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 546:
547: #define SKIP_EOL(p) \
548: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
549: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
550:
551: #define SKIP_BLANKS(p) \
1.39 daniel 552: while (IS_BLANK(*(p))) (p)++
1.1 veillard 553:
554: #define MOVETO_ENDTAG(p) \
1.39 daniel 555: while (IS_CHAR(*p) && (*(p) != '>')) (p)++
1.1 veillard 556:
557: #define MOVETO_STARTTAG(p) \
1.39 daniel 558: while (IS_CHAR(*p) && (*(p) != '<')) (p)++
1.1 veillard 559:
560: /*
1.3 veillard 561: * Forward definition for recusive behaviour.
562: */
1.16 daniel 563: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.3 veillard 564:
565: /*
566: * xmlHandleData : this routine represent's the specific application
567: * behaviour when reading a piece of text.
568: *
569: * For example in WebDav, any piece made only of blanks is eliminated
570: */
571:
572: CHAR *xmlHandleData(CHAR *in) {
573: CHAR *cur;
574:
575: if (in == NULL) return(NULL);
576: cur = in;
577: while (IS_CHAR(*cur)) {
578: if (!IS_BLANK(*cur)) goto not_blank;
579: cur++;
580: }
581: free(in);
582: return(NULL);
583:
584: not_blank:
585: return(in);
586: }
587:
1.28 daniel 588: /************************************************************************
589: * *
590: * Commodity functions to handle CHARs *
591: * *
592: ************************************************************************/
593:
1.3 veillard 594: /*
1.1 veillard 595: * xmlStrndup : a strdup for array of CHAR's
596: */
597:
1.6 httpng 598: CHAR *xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 599: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
600:
601: if (ret == NULL) {
602: fprintf(stderr, "malloc of %d byte failed\n",
603: (len + 1) * sizeof(CHAR));
604: return(NULL);
605: }
606: memcpy(ret, cur, len * sizeof(CHAR));
607: ret[len] = 0;
608: return(ret);
609: }
610:
611: /*
612: * xmlStrdup : a strdup for CHAR's
613: */
614:
1.6 httpng 615: CHAR *xmlStrdup(const CHAR *cur) {
616: const CHAR *p = cur;
1.1 veillard 617:
618: while (IS_CHAR(*p)) p++;
619: return(xmlStrndup(cur, p - cur));
620: }
621:
622: /*
1.14 veillard 623: * xmlStrcmp : a strcmp for CHAR's
624: */
625:
626: int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
627: register int tmp;
628:
629: do {
630: tmp = *str1++ - *str2++;
631: if (tmp != 0) return(tmp);
632: } while ((*str1 != 0) && (*str2 != 0));
633: return (*str1 - *str2);
634: }
635:
636: /*
637: * xmlStrncmp : a strncmp for CHAR's
638: */
639:
640: int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
641: register int tmp;
642:
643: if (len <= 0) return(0);
644: do {
645: tmp = *str1++ - *str2++;
646: if (tmp != 0) return(tmp);
647: len--;
648: if (len <= 0) return(0);
649: } while ((*str1 != 0) && (*str2 != 0));
650: return (*str1 - *str2);
651: }
652:
653: /*
654: * xmlStrchr : a strchr for CHAR's
655: */
656:
657: CHAR *xmlStrchr(const CHAR *str, CHAR val) {
658: while (*str != 0) {
659: if (*str == val) return((CHAR *) str);
660: str++;
661: }
662: return(NULL);
663: }
1.28 daniel 664:
665: /************************************************************************
666: * *
667: * Extra stuff for namespace support *
668: * Relates to http://www.w3.org/TR/WD-xml-names *
669: * *
670: ************************************************************************/
671:
672: /*
673: * xmlNamespaceParseNCName : parse an XML namespace name.
674: *
675: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
676: *
677: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
678: * CombiningChar | Extender
679: */
680:
681: CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
682: const CHAR *q;
683: CHAR *ret = NULL;
684:
1.40 daniel 685: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
686: q = NEXT;
1.28 daniel 687:
1.40 daniel 688: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
689: (CUR == '.') || (CUR == '-') ||
690: (CUR == '_') ||
691: (IS_COMBINING(CUR)) ||
692: (IS_EXTENDER(CUR)))
693: NEXT;
1.28 daniel 694:
1.40 daniel 695: ret = xmlStrndup(q, CUR_PTR - q);
1.28 daniel 696:
697: return(ret);
698: }
699:
700: /*
701: * xmlNamespaceParseQName : parse an XML qualified name
702: *
703: * [NS 5] QName ::= (Prefix ':')? LocalPart
704: *
705: * [NS 6] Prefix ::= NCName
706: *
707: * [NS 7] LocalPart ::= NCName
708: */
709:
710: CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
711: CHAR *ret = NULL;
712:
713: *prefix = NULL;
714: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 715: if (CUR == ':') {
1.28 daniel 716: *prefix = ret;
1.40 daniel 717: NEXT;
1.28 daniel 718: ret = xmlNamespaceParseNCName(ctxt);
719: }
720:
721: return(ret);
722: }
723:
724: /*
725: * xmlNamespaceParseNSDef : parse a namespace prefix declaration
726: *
727: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
728: *
729: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
730: */
731:
1.39 daniel 732: CHAR *xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 733: CHAR *name = NULL;
734:
1.40 daniel 735: if ((CUR == 'x') && (NXT(1) == 'm') &&
736: (NXT(2) == 'l') && (NXT(3) == 'n') &&
737: (NXT(4) == 's')) {
738: SKIP(5);
739: if (CUR == ':') {
740: NEXT;
1.28 daniel 741: name = xmlNamespaceParseNCName(ctxt);
742: }
743: }
1.39 daniel 744: return(name);
1.28 daniel 745: }
746:
747: /************************************************************************
748: * *
749: * The parser itself *
750: * Relates to http://www.w3.org/TR/REC-xml *
751: * *
752: ************************************************************************/
1.14 veillard 753:
754: /*
1.1 veillard 755: * xmlParseName : parse an XML name.
1.22 daniel 756: *
757: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
758: * CombiningChar | Extender
759: *
760: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
761: *
762: * [6] Names ::= Name (S Name)*
1.1 veillard 763: */
764:
1.16 daniel 765: CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 766: const CHAR *q;
767: CHAR *ret = NULL;
1.1 veillard 768:
1.40 daniel 769: if (!IS_LETTER(CUR) && (CUR != '_') &&
770: (CUR != ':')) return(NULL);
771: q = NEXT;
772:
773: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
774: (CUR == '.') || (CUR == '-') ||
775: (CUR == '_') || (CUR == ':') ||
776: (IS_COMBINING(CUR)) ||
777: (IS_EXTENDER(CUR)))
778: NEXT;
1.22 daniel 779:
1.40 daniel 780: ret = xmlStrndup(q, CUR_PTR - q);
1.22 daniel 781:
782: return(ret);
783: }
784:
785: /*
786: * xmlParseNmtoken : parse an XML Nmtoken.
787: *
788: * [7] Nmtoken ::= (NameChar)+
789: *
790: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
791: */
792:
793: CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
794: const CHAR *q;
795: CHAR *ret = NULL;
796:
1.40 daniel 797: q = NEXT;
1.22 daniel 798:
1.40 daniel 799: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
800: (CUR == '.') || (CUR == '-') ||
801: (CUR == '_') || (CUR == ':') ||
802: (IS_COMBINING(CUR)) ||
803: (IS_EXTENDER(CUR)))
804: NEXT;
1.3 veillard 805:
1.40 daniel 806: ret = xmlStrndup(q, CUR_PTR - q);
1.1 veillard 807:
1.3 veillard 808: return(ret);
1.1 veillard 809: }
810:
811: /*
1.24 daniel 812: * xmlParseEntityValue : parse a value for ENTITY decl.
813: *
814: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
815: * "'" ([^%&'] | PEReference | Reference)* "'"
816: */
817:
818: CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
1.39 daniel 819: CHAR *ret = NULL, *dec;
1.24 daniel 820: const CHAR *q;
1.39 daniel 821: int needSubst = 0;
822: int needParam = 0;
1.24 daniel 823:
1.40 daniel 824: if (CUR == '"') {
825: NEXT;
1.24 daniel 826:
1.40 daniel 827: q = CUR_PTR;
828: while ((IS_CHAR(CUR)) && (CUR != '"')) {
829: if (CUR == '%') {
1.39 daniel 830: needParam = 1; /* TODO !!! */
1.40 daniel 831: NEXT;
832: } else if (CUR == '&') {
1.39 daniel 833: needSubst = 1;
1.40 daniel 834: NEXT;
1.24 daniel 835: } else
1.40 daniel 836: NEXT;
1.24 daniel 837: }
1.40 daniel 838: if (!IS_CHAR(CUR)) {
1.31 daniel 839: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 840: } else {
1.39 daniel 841: if (needSubst) {
1.40 daniel 842: dec = xmlDecodeEntities(ctxt->doc, q, CUR_PTR - q);
1.39 daniel 843: ret = xmlStrdup(dec);
844: free(dec);
845: } else
1.40 daniel 846: ret = xmlStrndup(q, CUR_PTR - q);
847: NEXT;
1.24 daniel 848: }
1.40 daniel 849: } else if (CUR == '\'') {
850: NEXT;
851: q = CUR_PTR;
852: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
853: if (CUR == '%') {
1.39 daniel 854: needParam = 1; /* TODO !!! */
1.40 daniel 855: NEXT;
856: } else if (CUR == '&') {
1.39 daniel 857: needSubst = 1;
1.40 daniel 858: NEXT;
1.24 daniel 859: } else
1.40 daniel 860: NEXT;
1.24 daniel 861: }
1.40 daniel 862: if (!IS_CHAR(CUR)) {
1.31 daniel 863: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 864: } else {
1.39 daniel 865: if (needSubst) {
1.40 daniel 866: dec = xmlDecodeEntities(ctxt->doc, q, CUR_PTR - q);
1.39 daniel 867: ret = xmlStrdup(dec);
868: free(dec);
869: } else
1.40 daniel 870: ret = xmlStrndup(q, CUR_PTR - q);
871: NEXT;
1.24 daniel 872: }
873: } else {
1.31 daniel 874: xmlParserError(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.24 daniel 875: }
876:
877: return(ret);
878: }
879:
880: /*
1.29 daniel 881: * xmlParseAttValue : parse a value for an attribute
882: *
883: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
884: * "'" ([^<&'] | Reference)* "'"
885: */
886:
887: CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.39 daniel 888: CHAR *ret = NULL, *dec;
1.29 daniel 889: const CHAR *q;
1.36 daniel 890: int needSubst = 0;
1.29 daniel 891:
1.40 daniel 892: if (CUR == '"') {
893: NEXT;
1.29 daniel 894:
1.40 daniel 895: q = CUR_PTR;
896: while ((IS_CHAR(CUR)) && (CUR != '"')) {
897: if (CUR == '&') {
1.36 daniel 898: needSubst = 1;
1.40 daniel 899: NEXT;
1.29 daniel 900: } else
1.40 daniel 901: NEXT;
1.29 daniel 902: }
1.40 daniel 903: if (!IS_CHAR(CUR)) {
1.31 daniel 904: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 905: } else {
1.39 daniel 906: if (needSubst) {
1.40 daniel 907: dec = xmlDecodeEntities(ctxt->doc, q, CUR_PTR - q);
1.39 daniel 908: ret = xmlStrdup(dec);
909: free(dec);
910: } else
1.40 daniel 911: ret = xmlStrndup(q, CUR_PTR - q);
912: NEXT;
1.29 daniel 913: }
1.40 daniel 914: } else if (CUR == '\'') {
915: NEXT;
916: q = CUR_PTR;
917: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
918: if (CUR == '&') {
1.36 daniel 919: needSubst = 1;
1.40 daniel 920: NEXT;
1.29 daniel 921: } else
1.40 daniel 922: NEXT;
1.29 daniel 923: }
1.40 daniel 924: if (!IS_CHAR(CUR)) {
1.31 daniel 925: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 926: } else {
1.39 daniel 927: if (needSubst) {
1.40 daniel 928: dec = xmlDecodeEntities(ctxt->doc, q, CUR_PTR - q);
1.39 daniel 929: ret = xmlStrdup(dec);
930: free(dec);
931: } else
1.40 daniel 932: ret = xmlStrndup(q, CUR_PTR - q);
933: NEXT;
1.29 daniel 934: }
935: } else {
1.31 daniel 936: xmlParserError(ctxt, "AttValue: \" or ' expected\n");
1.29 daniel 937: }
938:
939: return(ret);
940: }
941:
942: /*
1.21 daniel 943: * xmlParseSystemLiteral : parse an XML Literal
944: *
1.22 daniel 945: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.21 daniel 946: */
947:
948: CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
949: const CHAR *q;
950: CHAR *ret = NULL;
951:
1.40 daniel 952: if (CUR == '"') {
953: NEXT;
954: q = CUR_PTR;
955: while ((IS_CHAR(CUR)) && (CUR != '"'))
956: NEXT;
957: if (!IS_CHAR(CUR)) {
1.31 daniel 958: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 959: } else {
1.40 daniel 960: ret = xmlStrndup(q, CUR_PTR - q);
961: NEXT;
1.21 daniel 962: }
1.40 daniel 963: } else if (CUR == '\'') {
964: NEXT;
965: q = CUR_PTR;
966: while ((IS_CHAR(CUR)) && (CUR != '\''))
967: NEXT;
968: if (!IS_CHAR(CUR)) {
1.31 daniel 969: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 970: } else {
1.40 daniel 971: ret = xmlStrndup(q, CUR_PTR - q);
972: NEXT;
1.21 daniel 973: }
974: } else {
1.31 daniel 975: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 976: }
977:
978: return(ret);
979: }
980:
981: /*
1.27 daniel 982: * xmlParsePubidLiteral: parse an XML public literal
1.21 daniel 983: *
1.22 daniel 984: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1.21 daniel 985: */
986:
987: CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
988: const CHAR *q;
989: CHAR *ret = NULL;
990: /*
991: * Name ::= (Letter | '_') (NameChar)*
992: */
1.40 daniel 993: if (CUR == '"') {
994: NEXT;
995: q = CUR_PTR;
996: while (IS_PUBIDCHAR(CUR)) NEXT;
997: if (CUR != '"') {
1.31 daniel 998: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 999: } else {
1.40 daniel 1000: ret = xmlStrndup(q, CUR_PTR - q);
1001: NEXT;
1.21 daniel 1002: }
1.40 daniel 1003: } else if (CUR == '\'') {
1004: NEXT;
1005: q = CUR_PTR;
1006: while ((IS_LETTER(CUR)) && (CUR != '\''))
1007: NEXT;
1008: if (!IS_LETTER(CUR)) {
1.31 daniel 1009: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 1010: } else {
1.40 daniel 1011: ret = xmlStrndup(q, CUR_PTR - q);
1012: NEXT;
1.21 daniel 1013: }
1014: } else {
1.31 daniel 1015: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 1016: }
1017:
1018: return(ret);
1019: }
1020:
1021: /*
1.27 daniel 1022: * xmlParseCharData: parse a CharData section.
1023: * if we are within a CDATA section ']]>' marks an end of section.
1024: *
1025: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1026: */
1027:
1028: CHAR *xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1029: const CHAR *q;
1030: CHAR *ret = NULL;
1031:
1.40 daniel 1032: q = CUR_PTR;
1033: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1034: (CUR != '&')) {
1035: NEXT;
1036: if ((cdata) && (CUR == ']') && (NXT(1) == ']') &&
1037: (NXT(2) == '>')) break;
1.27 daniel 1038: }
1.40 daniel 1039: if (q == CUR_PTR) return(NULL);
1040: ret = xmlStrndup(q, CUR_PTR - q);
1.27 daniel 1041: return(ret);
1042: }
1043:
1044: /*
1.22 daniel 1045: * xmlParseExternalID: Parse an External ID
1046: *
1047: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1048: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1049: */
1050:
1.39 daniel 1051: CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
1052: CHAR *URI = NULL;
1.22 daniel 1053:
1.40 daniel 1054: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1055: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1056: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1057: SKIP(6);
1058: SKIP_BLANKS(CUR_PTR);
1.39 daniel 1059: URI = xmlParseSystemLiteral(ctxt);
1060: if (URI == NULL)
1.31 daniel 1061: xmlParserError(ctxt,
1.39 daniel 1062: "xmlParseExternalID: SYSTEM, no URI\n");
1.40 daniel 1063: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1064: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1065: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1066: SKIP(6);
1067: SKIP_BLANKS(CUR_PTR);
1.39 daniel 1068: *publicID = xmlParsePubidLiteral(ctxt);
1069: if (*publicID == NULL)
1.31 daniel 1070: xmlParserError(ctxt,
1.39 daniel 1071: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.40 daniel 1072: SKIP_BLANKS(CUR_PTR);
1.39 daniel 1073: URI = xmlParseSystemLiteral(ctxt);
1074: if (URI == NULL)
1.31 daniel 1075: xmlParserError(ctxt,
1.39 daniel 1076: "xmlParseExternalID: PUBLIC, no URI\n");
1.22 daniel 1077: }
1.39 daniel 1078: return(URI);
1.22 daniel 1079: }
1080:
1081: /*
1.1 veillard 1082: * Parse and return a string between quotes or doublequotes
1083: */
1.16 daniel 1084: CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.1 veillard 1085: CHAR *ret = NULL;
1.17 daniel 1086: const CHAR *q;
1.1 veillard 1087:
1.40 daniel 1088: if (CUR == '"') {
1089: NEXT;
1090: q = CUR_PTR;
1091: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1092: if (CUR != '"')
1.31 daniel 1093: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1.1 veillard 1094: else {
1.40 daniel 1095: ret = xmlStrndup(q, CUR_PTR - q);
1096: NEXT;
1.1 veillard 1097: }
1.40 daniel 1098: } else if (CUR == '\''){
1099: NEXT;
1100: q = CUR_PTR;
1101: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1102: if (CUR != '\'')
1.31 daniel 1103: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1.1 veillard 1104: else {
1.40 daniel 1105: ret = xmlStrndup(q, CUR_PTR - q);
1106: NEXT;
1.1 veillard 1107: }
1108: }
1109: return(ret);
1110: }
1111:
1112: /*
1.3 veillard 1113: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1114: * This may or may not create a node (depending on the context)
1.38 daniel 1115: * The spec says that "For compatibility, the string "--" (double-hyphen)
1116: * must not occur within comments. "
1.22 daniel 1117: *
1118: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 1119: */
1.31 daniel 1120: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1121: xmlNodePtr ret = NULL;
1.17 daniel 1122: const CHAR *q, *start;
1123: const CHAR *r;
1.39 daniel 1124: CHAR *val;
1.3 veillard 1125:
1126: /*
1.22 daniel 1127: * Check that there is a comment right here.
1.3 veillard 1128: */
1.40 daniel 1129: if ((CUR != '<') || (NXT(1) != '!') ||
1130: (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1.3 veillard 1131:
1.40 daniel 1132: SKIP(4);
1133: start = q = CUR_PTR;
1134: NEXT;
1135: r = CUR_PTR;
1136: NEXT;
1137: while (IS_CHAR(CUR) &&
1138: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 1139: (*r != '-') || (*q != '-'))) {
1.38 daniel 1140: if ((*r == '-') && (*q == '-'))
1141: xmlParserError(ctxt,
1142: "Comment must not contain '--' (double-hyphen)`\n");
1.40 daniel 1143: NEXT;r++;q++;
1.3 veillard 1144: }
1.40 daniel 1145: if (!IS_CHAR(CUR)) {
1.31 daniel 1146: xmlParserError(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.3 veillard 1147: } else {
1.40 daniel 1148: NEXT;
1.31 daniel 1149: if (create) {
1.39 daniel 1150: val = xmlStrndup(start, q - start);
1151: ret = xmlNewComment(val);
1152: free(val);
1.31 daniel 1153: }
1.3 veillard 1154: }
1.39 daniel 1155: return(ret);
1.3 veillard 1156: }
1157:
1158: /*
1.13 veillard 1159: * xmlParseNamespace: parse specific '<?namespace ...' constructs.
1.22 daniel 1160: *
1.29 daniel 1161: * TODO !!!!!!!!!!
1162: *
1163: * This is what the older xml-name Working Draft specified, a bunch of
1164: * other stuff may still rely on it, so support is still here as
1165: * if ot was declared on the root of the Tree:-(
1.1 veillard 1166: */
1167:
1.16 daniel 1168: void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.1 veillard 1169: CHAR *href = NULL;
1.34 daniel 1170: CHAR *prefix = NULL;
1.3 veillard 1171: int garbage = 0;
1.1 veillard 1172:
1173: /*
1.18 daniel 1174: * We just skipped "namespace" or "xml:namespace"
1.1 veillard 1175: */
1.40 daniel 1176: SKIP_BLANKS(CUR_PTR);
1.1 veillard 1177:
1.40 daniel 1178: while (IS_CHAR(CUR) && (CUR != '>')) {
1.1 veillard 1179: /*
1.18 daniel 1180: * We can have "ns" or "prefix" attributes
1181: * Old encoding as 'href' or 'AS' attributes is still supported
1.1 veillard 1182: */
1.40 daniel 1183: if ((CUR == 'n') && (NXT(1) == 's')) {
1.18 daniel 1184: garbage = 0;
1.40 daniel 1185: SKIP(2);
1186: SKIP_BLANKS(CUR_PTR);
1.18 daniel 1187:
1.40 daniel 1188: if (CUR != '=') continue;
1189: NEXT;
1190: SKIP_BLANKS(CUR_PTR);
1.18 daniel 1191:
1192: href = xmlParseQuotedString(ctxt);
1.40 daniel 1193: SKIP_BLANKS(CUR_PTR);
1194: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1195: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1.3 veillard 1196: garbage = 0;
1.40 daniel 1197: SKIP(4);
1198: SKIP_BLANKS(CUR_PTR);
1.1 veillard 1199:
1.40 daniel 1200: if (CUR != '=') continue;
1201: NEXT;
1202: SKIP_BLANKS(CUR_PTR);
1.16 daniel 1203:
1204: href = xmlParseQuotedString(ctxt);
1.40 daniel 1205: SKIP_BLANKS(CUR_PTR);
1206: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1207: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1208: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1.18 daniel 1209: garbage = 0;
1.40 daniel 1210: SKIP(6);
1211: SKIP_BLANKS(CUR_PTR);
1.18 daniel 1212:
1.40 daniel 1213: if (CUR != '=') continue;
1214: NEXT;
1215: SKIP_BLANKS(CUR_PTR);
1.18 daniel 1216:
1.34 daniel 1217: prefix = xmlParseQuotedString(ctxt);
1.40 daniel 1218: SKIP_BLANKS(CUR_PTR);
1219: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1.3 veillard 1220: garbage = 0;
1.40 daniel 1221: SKIP(2);
1222: SKIP_BLANKS(CUR_PTR);
1.1 veillard 1223:
1.40 daniel 1224: if (CUR != '=') continue;
1225: NEXT;
1226: SKIP_BLANKS(CUR_PTR);
1.16 daniel 1227:
1.34 daniel 1228: prefix = xmlParseQuotedString(ctxt);
1.40 daniel 1229: SKIP_BLANKS(CUR_PTR);
1230: } else if ((CUR == '?') && (NXT(1) == '>')) {
1.3 veillard 1231: garbage = 0;
1.40 daniel 1232: CUR_PTR ++;
1.1 veillard 1233: } else {
1.3 veillard 1234: /*
1235: * Found garbage when parsing the namespace
1236: */
1.31 daniel 1237: if (!garbage)
1238: xmlParserError(ctxt, "xmlParseNamespace found garbage\n");
1.40 daniel 1239: NEXT;
1.1 veillard 1240: }
1241: }
1242:
1.40 daniel 1243: MOVETO_ENDTAG(CUR_PTR);
1244: NEXT;
1.1 veillard 1245:
1246: /*
1247: * Register the DTD.
1248: */
1249: if (href != NULL)
1.34 daniel 1250: xmlNewNs(ctxt->doc, href, prefix);
1.1 veillard 1251:
1.34 daniel 1252: if (prefix != NULL) free(prefix);
1.8 veillard 1253: if (href != NULL) free(href);
1.1 veillard 1254: }
1255:
1256: /*
1.22 daniel 1257: * xmlParsePITarget: parse the name of a PI
1258: *
1259: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1260: */
1261:
1262: CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1263: CHAR *name;
1264:
1265: name = xmlParseName(ctxt);
1266: if ((name != NULL) && (name[3] == 0) &&
1267: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1268: ((name[1] == 'm') || (name[1] == 'M')) &&
1269: ((name[2] == 'l') || (name[2] == 'L'))) {
1270: xmlParserError(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1271: return(NULL);
1272: }
1273: return(name);
1274: }
1275:
1276: /*
1.3 veillard 1277: * xmlParsePI: parse an XML Processing Instruction.
1.22 daniel 1278: *
1279: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.3 veillard 1280: */
1281:
1.16 daniel 1282: void xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1283: CHAR *target;
1284:
1.40 daniel 1285: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 1286: /*
1287: * this is a Processing Instruction.
1288: */
1.40 daniel 1289: SKIP(2);
1.3 veillard 1290:
1291: /*
1.22 daniel 1292: * Parse the target name and check for special support like
1293: * namespace.
1294: *
1295: * TODO : PI handling should be dynamically redefinable using an
1296: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1297: */
1.22 daniel 1298: target = xmlParsePITarget(ctxt);
1299: if (target != NULL) {
1300: /*
1301: * Support for the Processing Instruction related to namespace.
1302: */
1303: if ((target[0] == 'n') && (target[1] == 'a') &&
1304: (target[2] == 'm') && (target[3] == 'e') &&
1305: (target[4] == 's') && (target[5] == 'p') &&
1306: (target[6] == 'a') && (target[7] == 'c') &&
1307: (target[8] == 'e')) {
1308: xmlParseNamespace(ctxt);
1309: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1310: (target[2] == 'l') && (target[3] == ':') &&
1311: (target[4] == 'n') && (target[5] == 'a') &&
1312: (target[6] == 'm') && (target[7] == 'e') &&
1313: (target[8] == 's') && (target[9] == 'p') &&
1314: (target[10] == 'a') && (target[11] == 'c') &&
1315: (target[12] == 'e')) {
1316: xmlParseNamespace(ctxt);
1317: } else {
1318: /* Unknown PI, ignore it ! */
1.31 daniel 1319: xmlParserError(ctxt, "xmlParsePI : skipping unknown PI %s\n",
1320: target);
1.40 daniel 1321: while (IS_CHAR(CUR) &&
1322: ((CUR != '?') || (NXT(1) != '>')))
1323: NEXT;
1324: if (!IS_CHAR(CUR)) {
1.31 daniel 1325: xmlParserError(ctxt, "xmlParsePI: PI %s never end ...\n",
1326: target);
1.24 daniel 1327: } else
1.40 daniel 1328: SKIP(2);
1.22 daniel 1329: }
1.39 daniel 1330: free(target);
1.3 veillard 1331: } else {
1.31 daniel 1332: xmlParserError(ctxt, "xmlParsePI : no target name\n");
1.22 daniel 1333: /********* Should we try to complete parsing the PI ???
1.40 daniel 1334: while (IS_CHAR(CUR) &&
1335: (CUR != '?') && (CUR != '>'))
1336: NEXT;
1337: if (!IS_CHAR(CUR)) {
1.22 daniel 1338: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1339: target);
1340: }
1341: ********************************************************/
1342: }
1343: }
1344: }
1345:
1346: /*
1347: * xmlParseNotationDecl: parse a notation declaration
1348: *
1349: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1350: *
1351: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1352: *
1353: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1354: * 'PUBLIC' S PubidLiteral S SystemLiteral
1355: *
1356: * Hence there is actually 3 choices:
1357: * 'PUBLIC' S PubidLiteral
1358: * 'PUBLIC' S PubidLiteral S SystemLiteral
1359: * and 'SYSTEM' S SystemLiteral
1360: */
1361:
1362: void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1363: CHAR *name;
1364:
1.40 daniel 1365: if ((CUR == '<') && (NXT(1) == '!') &&
1366: (NXT(2) == 'N') && (NXT(3) == 'O') &&
1367: (NXT(4) == 'T') && (NXT(5) == 'A') &&
1368: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1369: (NXT(8) == 'O') && (NXT(9) == 'N') &&
1370: (IS_BLANK(NXT(10)))) {
1371: SKIP(10);
1372: SKIP_BLANKS(CUR_PTR);
1.22 daniel 1373:
1374: name = xmlParseName(ctxt);
1375: if (name == NULL) {
1.31 daniel 1376: xmlParserError(ctxt,
1377: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1378: return;
1379: }
1.40 daniel 1380: SKIP_BLANKS(CUR_PTR);
1.22 daniel 1381: /*
1.31 daniel 1382: * TODO !!!
1.22 daniel 1383: */
1.40 daniel 1384: while ((IS_CHAR(CUR)) && (CUR != '>'))
1385: NEXT;
1.22 daniel 1386: free(name);
1387: }
1388: }
1389:
1390: /*
1391: * xmlParseEntityDecl: parse <!ENTITY declarations
1392: *
1393: * [70] EntityDecl ::= GEDecl | PEDecl
1394: *
1395: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1396: *
1397: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1398: *
1399: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1400: *
1401: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1402: *
1403: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1404: */
1405:
1406: void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 1407: CHAR *name = NULL;
1.24 daniel 1408: CHAR *value = NULL;
1.39 daniel 1409: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 1410: CHAR *ndata = NULL;
1.39 daniel 1411: int isParameter = 0;
1.22 daniel 1412:
1.40 daniel 1413: if ((CUR == '<') && (NXT(1) == '!') &&
1414: (NXT(2) == 'E') && (NXT(3) == 'N') &&
1415: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1416: (NXT(6) == 'T') && (NXT(7) == 'Y') &&
1417: (IS_BLANK(NXT(8)))) {
1418: SKIP(8);
1419: SKIP_BLANKS(CUR_PTR);
1420:
1421: if (CUR == '%') {
1422: NEXT;
1423: SKIP_BLANKS(CUR_PTR);
1.39 daniel 1424: isParameter = 1;
1.22 daniel 1425: }
1426:
1427: name = xmlParseName(ctxt);
1.24 daniel 1428: if (name == NULL) {
1.31 daniel 1429: xmlParserError(ctxt, "xmlParseEntityDecl: no name\n");
1.24 daniel 1430: return;
1431: }
1.40 daniel 1432: SKIP_BLANKS(CUR_PTR);
1.24 daniel 1433:
1.22 daniel 1434: /*
1.24 daniel 1435: * TODO handle the various case of definitions...
1.22 daniel 1436: */
1.39 daniel 1437: if (isParameter) {
1.40 daniel 1438: if ((CUR == '"') || (CUR == '\''))
1.24 daniel 1439: value = xmlParseEntityValue(ctxt);
1.39 daniel 1440: if (value) {
1441: xmlAddDocEntity(ctxt->doc, name,
1442: XML_INTERNAL_PARAMETER_ENTITY,
1443: NULL, NULL, value);
1444: }
1.24 daniel 1445: else {
1.39 daniel 1446: URI = xmlParseExternalID(ctxt, &literal);
1447: if (URI) {
1448: xmlAddDocEntity(ctxt->doc, name,
1449: XML_EXTERNAL_PARAMETER_ENTITY,
1450: literal, URI, NULL);
1451: }
1.24 daniel 1452: }
1453: } else {
1.40 daniel 1454: if ((CUR == '"') || (CUR == '\'')) {
1.24 daniel 1455: value = xmlParseEntityValue(ctxt);
1.39 daniel 1456: xmlAddDocEntity(ctxt->doc, name,
1457: XML_INTERNAL_GENERAL_ENTITY,
1458: NULL, NULL, value);
1459: } else {
1460: URI = xmlParseExternalID(ctxt, &literal);
1.40 daniel 1461: SKIP_BLANKS(CUR_PTR);
1462: if ((CUR == 'N') && (NXT(1) == 'D') &&
1463: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1464: (NXT(4) == 'A')) {
1465: SKIP(5);
1466: SKIP_BLANKS(CUR_PTR);
1.24 daniel 1467: ndata = xmlParseName(ctxt);
1.39 daniel 1468: xmlAddDocEntity(ctxt->doc, name,
1469: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
1470: literal, URI, ndata);
1471: } else {
1472: xmlAddDocEntity(ctxt->doc, name,
1473: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
1474: literal, URI, NULL);
1.24 daniel 1475: }
1476: }
1477: }
1.40 daniel 1478: SKIP_BLANKS(CUR_PTR);
1479: if (CUR != '>') {
1.31 daniel 1480: xmlParserError(ctxt,
1481: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.24 daniel 1482: } else
1.40 daniel 1483: NEXT;
1.39 daniel 1484: if (name != NULL) free(name);
1485: if (value != NULL) free(value);
1486: if (URI != NULL) free(URI);
1487: if (literal != NULL) free(literal);
1488: if (ndata != NULL) free(ndata);
1.22 daniel 1489: }
1490: }
1491:
1492: /*
1493: * xmlParseEnumeratedType: parse and Enumerated attribute type.
1494: *
1495: * [57] EnumeratedType ::= NotationType | Enumeration
1496: *
1497: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1498: *
1499: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1500: */
1501:
1502: void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1503: /*
1504: * TODO !!!
1505: */
1.40 daniel 1506: while ((IS_CHAR(CUR)) && (CUR != '>'))
1507: NEXT;
1.22 daniel 1508: }
1509:
1510: /*
1511: * xmlParseAttributeType: parse the Attribute list def for an element
1512: *
1513: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1514: *
1515: * [55] StringType ::= 'CDATA'
1516: *
1517: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1518: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1519: */
1520: void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.40 daniel 1521: if ((CUR == 'C') && (NXT(1) == 'D') &&
1522: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1523: (NXT(4) == 'A')) {
1524: SKIP(5);
1525: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
1526: SKIP(2);
1527: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1528: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1529: (NXT(4) == 'F')) {
1530: SKIP(5);
1531: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1532: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1533: (NXT(4) == 'F') && (NXT(5) == 'S')) {
1534: SKIP(6);
1535: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1536: (NXT(2) == 'T') && (NXT(3) == 'I') &&
1537: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
1538: SKIP(6);
1539: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1540: (NXT(2) == 'T') && (NXT(3) == 'I') &&
1541: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1542: (NXT(6) == 'E') && (NXT(7) == 'S')) {
1543: SKIP(8);
1544: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1545: (NXT(2) == 'T') && (NXT(3) == 'O') &&
1546: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1547: (NXT(6) == 'N')) {
1548: SKIP(7);
1549: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1550: (NXT(2) == 'T') && (NXT(3) == 'O') &&
1551: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1552: (NXT(6) == 'N') && (NXT(7) == 'S')) {
1.22 daniel 1553: } else {
1554: xmlParseEnumeratedType(ctxt, name);
1555: }
1556: }
1557:
1558: /*
1559: * xmlParseAttributeListDecl: parse the Attribute list def for an element
1560: *
1561: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1562: *
1563: * [53] AttDef ::= S Name S AttType S DefaultDecl
1564: */
1565: void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1566: CHAR *name;
1567:
1.40 daniel 1568: if ((CUR == '<') && (NXT(1) == '!') &&
1569: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1570: (NXT(4) == 'T') && (NXT(5) == 'L') &&
1571: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1572: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
1573: SKIP(9);
1574: SKIP_BLANKS(CUR_PTR);
1.22 daniel 1575: name = xmlParseName(ctxt);
1576: if (name == NULL) {
1.31 daniel 1577: xmlParserError(ctxt,
1578: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1579: return;
1580: }
1.40 daniel 1581: SKIP_BLANKS(CUR_PTR);
1582: while (CUR != '>') {
1583: const CHAR *check = CUR_PTR;
1.22 daniel 1584:
1585: xmlParseAttributeType(ctxt, name);
1.40 daniel 1586: SKIP_BLANKS(CUR_PTR);
1587: if (check == CUR_PTR) {
1.31 daniel 1588: xmlParserError(ctxt,
1589: "xmlParseAttributeListDecl: detected error\n");
1.22 daniel 1590: break;
1591: }
1592: }
1.40 daniel 1593: if (CUR == '>')
1594: NEXT;
1.22 daniel 1595:
1596: free(name);
1597: }
1598: }
1599:
1600: /*
1601: * xmlParseElementContentDecl: parse the declaration for an Element content
1602: * either Mixed or Children, the cases EMPTY and ANY being handled
1603: * int xmlParseElementDecl.
1604: *
1605: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
1606: *
1607: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1608: *
1609: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
1610: *
1611: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
1612: *
1613: * or
1614: *
1615: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
1616: * '(' S? '#PCDATA' S? ')'
1617: */
1618:
1619: void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1620: /*
1621: * TODO This has to be parsed correctly, currently we just skip until
1622: * we reach the first '>'.
1.31 daniel 1623: * !!!
1.22 daniel 1624: */
1.40 daniel 1625: while ((IS_CHAR(CUR)) && (CUR != '>'))
1626: NEXT;
1.22 daniel 1627: }
1628:
1629: /*
1630: * xmlParseElementDecl: parse an Element declaration.
1631: *
1632: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1633: *
1634: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1635: *
1636: * TODO There is a check [ VC: Unique Element Type Declaration ]
1637: */
1638: void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1639: CHAR *name;
1640:
1.40 daniel 1641: if ((CUR == '<') && (NXT(1) == '!') &&
1642: (NXT(2) == 'E') && (NXT(3) == 'L') &&
1643: (NXT(4) == 'E') && (NXT(5) == 'M') &&
1644: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1645: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
1646: SKIP(9);
1647: SKIP_BLANKS(CUR_PTR);
1.22 daniel 1648: name = xmlParseName(ctxt);
1649: if (name == NULL) {
1.31 daniel 1650: xmlParserError(ctxt, "xmlParseElementDecl: no name for Element\n");
1.22 daniel 1651: return;
1652: }
1.40 daniel 1653: SKIP_BLANKS(CUR_PTR);
1654: if ((CUR == 'E') && (NXT(1) == 'M') &&
1655: (NXT(2) == 'P') && (NXT(3) == 'T') &&
1656: (NXT(4) == 'Y')) {
1657: SKIP(5);
1.22 daniel 1658: /*
1659: * Element must always be empty.
1660: */
1.40 daniel 1661: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
1662: (NXT(2) == 'Y')) {
1663: SKIP(3);
1.22 daniel 1664: /*
1665: * Element is a generic container.
1666: */
1667: } else {
1668: xmlParseElementContentDecl(ctxt, name);
1669: }
1.40 daniel 1670: SKIP_BLANKS(CUR_PTR);
1671: if (CUR != '>') {
1.31 daniel 1672: xmlParserError(ctxt,
1673: "xmlParseElementDecl: expected '>' at the end\n");
1.22 daniel 1674: } else
1.40 daniel 1675: NEXT;
1.22 daniel 1676: }
1677: }
1678:
1679: /*
1680: * xmlParseMarkupDecl: parse Markup declarations
1681: *
1682: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
1683: * NotationDecl | PI | Comment
1684: *
1685: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
1686: */
1687: void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1688: xmlParseElementDecl(ctxt);
1689: xmlParseAttributeListDecl(ctxt);
1690: xmlParseEntityDecl(ctxt);
1691: xmlParseNotationDecl(ctxt);
1692: xmlParsePI(ctxt);
1.31 daniel 1693: xmlParseComment(ctxt, 0);
1.22 daniel 1694: }
1695:
1696: /*
1.24 daniel 1697: * xmlParseCharRef: parse Reference declarations
1698: *
1699: * [66] CharRef ::= '&#' [0-9]+ ';' |
1700: * '&#x' [0-9a-fA-F]+ ';'
1701: */
1702: CHAR xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 1703: int val = 0;
1.24 daniel 1704: CHAR ret = 0;
1705:
1.40 daniel 1706: if ((CUR == '&') && (NXT(1) == '#') &&
1707: (NXT(2) == 'x')) {
1708: SKIP(3);
1709: while (CUR != ';') {
1710: if ((CUR >= '0') && (CUR <= '9'))
1711: val = val * 16 + (CUR - '0');
1712: else if ((CUR >= 'a') && (CUR <= 'f'))
1713: val = val * 16 + (CUR - 'a') + 10;
1714: else if ((CUR >= 'A') && (CUR <= 'F'))
1715: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 1716: else {
1.31 daniel 1717: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1718: val = 0;
1.24 daniel 1719: break;
1720: }
1721: }
1.40 daniel 1722: if (CUR != ';')
1723: NEXT;
1724: } else if ((CUR == '&') && (NXT(1) == '#')) {
1725: SKIP(2);
1726: while (CUR != ';') {
1727: if ((CUR >= '0') && (CUR <= '9'))
1728: val = val * 16 + (CUR - '0');
1.24 daniel 1729: else {
1.31 daniel 1730: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1731: val = 0;
1.24 daniel 1732: break;
1733: }
1734: }
1.40 daniel 1735: if (CUR != ';')
1736: NEXT;
1.24 daniel 1737: } else {
1.31 daniel 1738: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.24 daniel 1739: }
1.29 daniel 1740: /*
1741: * Check the value IS_CHAR ...
1742: */
1743: if (IS_CHAR(val))
1744: ret = (CHAR) val;
1745: else {
1.39 daniel 1746: xmlParserError(ctxt, "xmlParseCharRef: invalid value");
1.29 daniel 1747: ret = '?';
1748: }
1.24 daniel 1749: return(ret);
1750: }
1751:
1752: /*
1753: * xmlParseEntityRef: parse ENTITY references declarations
1754: *
1755: * [68] EntityRef ::= '&' Name ';'
1756: */
1757: CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1758: CHAR *name;
1759:
1.40 daniel 1760: if (CUR == '&') {
1761: NEXT;
1.24 daniel 1762: name = xmlParseName(ctxt);
1763: if (name == NULL) {
1.31 daniel 1764: xmlParserError(ctxt, "xmlParseEntityRef: no name\n");
1.24 daniel 1765: } else {
1.40 daniel 1766: if (CUR == ';') {
1767: NEXT;
1.24 daniel 1768: /*
1769: * TODO there is a VC check here !!!
1770: * [ VC: Entity Declared ]
1771: */
1772: free(name);
1773: } else {
1.31 daniel 1774: xmlParserError(ctxt, "xmlParseEntityRef: expecting ';'\n");
1.24 daniel 1775: }
1776: }
1777: }
1.25 daniel 1778: return(NULL); /* TODO !!!! */
1.24 daniel 1779: }
1780:
1781: /*
1782: * xmlParseReference: parse Reference declarations
1783: *
1784: * [67] Reference ::= EntityRef | CharRef
1785: */
1786: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt) {
1787: CHAR *name;
1788:
1.40 daniel 1789: if (CUR == '&') {
1.24 daniel 1790: return(xmlParseEntityRef(ctxt));
1791: } else {
1.40 daniel 1792: NEXT;
1.24 daniel 1793: name = xmlParseName(ctxt);
1794: if (name == NULL) {
1.31 daniel 1795: xmlParserError(ctxt, "xmlParseReference: no name\n");
1.24 daniel 1796: } else {
1.40 daniel 1797: if (CUR == ';') {
1798: NEXT;
1.24 daniel 1799: /*
1800: * TODO there is a VC check here !!!
1801: * [ VC: Entity Declared ]
1802: */
1803: free(name);
1804: } else {
1.31 daniel 1805: xmlParserError(ctxt, "xmlParseReference: expecting ';'\n");
1.24 daniel 1806: }
1807: }
1808: }
1.25 daniel 1809: return(NULL); /* TODO !!!! */
1.24 daniel 1810: }
1811:
1812: /*
1.22 daniel 1813: * xmlParsePEReference: parse PEReference declarations
1814: *
1815: * [69] PEReference ::= '%' Name ';'
1816: */
1.24 daniel 1817: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 1818: CHAR *name;
1819:
1.40 daniel 1820: if (CUR == '%') {
1821: NEXT;
1.22 daniel 1822: name = xmlParseName(ctxt);
1823: if (name == NULL) {
1.31 daniel 1824: xmlParserError(ctxt, "xmlParsePEReference: no name\n");
1.22 daniel 1825: } else {
1.40 daniel 1826: if (CUR == ';') {
1827: NEXT;
1.22 daniel 1828: /*
1829: * TODO there is a VC check here !!!
1830: * [ VC: Entity Declared ]
1831: */
1832: free(name);
1833: } else {
1.31 daniel 1834: xmlParserError(ctxt, "xmlParsePEReference: expecting ';'\n");
1.22 daniel 1835: }
1.3 veillard 1836: }
1837: }
1.25 daniel 1838: return(NULL); /* TODO !!!! */
1.3 veillard 1839: }
1840:
1841: /*
1.21 daniel 1842: * xmlParseDocTypeDecl : parse a DOCTYPE declaration
1843: *
1.22 daniel 1844: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
1845: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 1846: */
1847:
1848: void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.36 daniel 1849: xmlDtdPtr dtd;
1.21 daniel 1850: CHAR *name;
1851: CHAR *ExternalID = NULL;
1.39 daniel 1852: CHAR *URI = NULL;
1.21 daniel 1853:
1854: /*
1855: * We know that '<!DOCTYPE' has been detected.
1856: */
1.40 daniel 1857: SKIP(9);
1.21 daniel 1858:
1.40 daniel 1859: SKIP_BLANKS(CUR_PTR);
1.21 daniel 1860:
1861: /*
1862: * Parse the DOCTYPE name.
1863: */
1864: name = xmlParseName(ctxt);
1865: if (name == NULL) {
1.31 daniel 1866: xmlParserError(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.21 daniel 1867: }
1868:
1.40 daniel 1869: SKIP_BLANKS(CUR_PTR);
1.21 daniel 1870:
1871: /*
1.22 daniel 1872: * Check for SystemID and ExternalID
1873: */
1.39 daniel 1874: URI = xmlParseExternalID(ctxt, &ExternalID);
1.40 daniel 1875: SKIP_BLANKS(CUR_PTR);
1.36 daniel 1876:
1.39 daniel 1877: dtd = xmlNewDtd(ctxt->doc, name, ExternalID, URI);
1.22 daniel 1878:
1879: /*
1880: * Is there any DTD definition ?
1881: */
1.40 daniel 1882: if (CUR == '[') {
1883: NEXT;
1.22 daniel 1884: /*
1885: * Parse the succession of Markup declarations and
1886: * PEReferences.
1887: * Subsequence (markupdecl | PEReference | S)*
1888: */
1.40 daniel 1889: while (CUR != ']') {
1890: const CHAR *check = CUR_PTR;
1.22 daniel 1891:
1.40 daniel 1892: SKIP_BLANKS(CUR_PTR);
1.22 daniel 1893: xmlParseMarkupDecl(ctxt);
1894: xmlParsePEReference(ctxt);
1895:
1.40 daniel 1896: if (CUR_PTR == check) {
1.31 daniel 1897: xmlParserError(ctxt,
1898: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.22 daniel 1899: break;
1900: }
1901: }
1.40 daniel 1902: if (CUR == ']') NEXT;
1.22 daniel 1903: }
1904:
1905: /*
1906: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 1907: */
1.40 daniel 1908: if (CUR != '>') {
1.31 daniel 1909: xmlParserError(ctxt, "DOCTYPE unproperly terminated\n");
1.22 daniel 1910: /* We shouldn't try to resynchronize ... */
1.21 daniel 1911: }
1.40 daniel 1912: NEXT;
1.22 daniel 1913:
1914: /*
1915: * Cleanup, since we don't use all those identifiers
1916: * TODO : the DOCTYPE if available should be stored !
1917: */
1.39 daniel 1918: if (URI != NULL) free(URI);
1.22 daniel 1919: if (ExternalID != NULL) free(ExternalID);
1920: if (name != NULL) free(name);
1.21 daniel 1921: }
1922:
1923: /*
1.3 veillard 1924: * xmlParseAttribute: parse a start of tag.
1925: *
1.22 daniel 1926: * [41] Attribute ::= Name Eq AttValue
1927: *
1928: * [25] Eq ::= S? '=' S?
1929: *
1.29 daniel 1930: * With namespace:
1931: *
1932: * [NS 11] Attribute ::= QName Eq AttValue
1.3 veillard 1933: */
1934:
1.16 daniel 1935: void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 1936: CHAR *name, *value = NULL;
1.29 daniel 1937: CHAR *ns;
1.3 veillard 1938:
1.29 daniel 1939: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 1940: if (name == NULL) {
1.31 daniel 1941: xmlParserError(ctxt, "error parsing attribute name\n");
1.29 daniel 1942: return;
1.3 veillard 1943: }
1.22 daniel 1944: /*
1945: * TODO: Check for Namespace ...
1946: */
1.29 daniel 1947: if (ns != NULL) {
1948: free(ns);
1949: }
1.3 veillard 1950:
1951: /*
1.29 daniel 1952: * read the value
1.3 veillard 1953: */
1.40 daniel 1954: SKIP_BLANKS(CUR_PTR);
1955: if (CUR == '=') {
1956: NEXT;
1957: SKIP_BLANKS(CUR_PTR);
1.29 daniel 1958: value = xmlParseAttValue(ctxt);
1959: } else {
1.31 daniel 1960: xmlParserError(ctxt, "Specification mandate value for attribute %s\n",
1961: name);
1.3 veillard 1962: }
1963:
1964: /*
1965: * Add the attribute to the node.
1966: */
1.17 daniel 1967: if (name != NULL) {
1.3 veillard 1968: xmlNewProp(node, name, value);
1.17 daniel 1969: free(name);
1970: }
1.29 daniel 1971: if (value != NULL)
1.17 daniel 1972: free(value);
1.3 veillard 1973: }
1974:
1975: /*
1.29 daniel 1976: * xmlParseStartTag: parse a start of tag either for rule element or
1977: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 1978: *
1979: * [40] STag ::= '<' Name (S Attribute)* S? '>'
1980: *
1.29 daniel 1981: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1982: *
1983: * With namespace:
1984: *
1985: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
1986: *
1987: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.2 veillard 1988: */
1989:
1.16 daniel 1990: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.34 daniel 1991: CHAR *namespace, *name;
1992: xmlNsPtr ns = NULL;
1.2 veillard 1993: xmlNodePtr ret = NULL;
1994:
1.40 daniel 1995: if (CUR != '<') return(NULL);
1996: NEXT;
1.3 veillard 1997:
1.34 daniel 1998: name = xmlNamespaceParseQName(ctxt, &namespace);
1999: if (namespace != NULL) {
1.3 veillard 2000: /*
2001: * Search the DTD associated to ns.
2002: */
1.34 daniel 2003: ns = xmlSearchNs(ctxt->doc, namespace);
2004: if (ns == NULL)
2005: xmlParserError(ctxt, "Start tag : Couldn't find namespace %s\n",
2006: namespace);
2007: free(namespace);
1.29 daniel 2008: }
1.3 veillard 2009:
1.34 daniel 2010: ret = xmlNewNode(ns, name, NULL);
1.2 veillard 2011:
1.3 veillard 2012: /*
2013: * Now parse the attributes, it ends up with the ending
2014: *
2015: * (S Attribute)* S?
2016: */
1.40 daniel 2017: SKIP_BLANKS(CUR_PTR);
2018: while ((IS_CHAR(CUR)) &&
2019: (CUR != '>') &&
2020: ((CUR != '/') || (NXT(1) != '>'))) {
2021: const CHAR *q = CUR_PTR;
1.29 daniel 2022:
2023: xmlParseAttribute(ctxt, ret);
1.40 daniel 2024: SKIP_BLANKS(CUR_PTR);
1.29 daniel 2025:
1.40 daniel 2026: if (q == CUR_PTR) {
1.31 daniel 2027: xmlParserError(ctxt,
2028: "xmlParseStartTag: problem parsing attributes\n");
1.29 daniel 2029: break;
1.3 veillard 2030: }
2031: }
2032:
2033: return(ret);
2034: }
2035:
2036: /*
1.27 daniel 2037: * xmlParseEndTag: parse an end of tag
2038: *
2039: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 2040: *
2041: * With namespace
2042: *
2043: * [9] ETag ::= '</' QName S? '>'
1.7 veillard 2044: */
2045:
1.34 daniel 2046: void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
2047: CHAR *namespace, *name;
2048: xmlNsPtr ns = NULL;
1.7 veillard 2049:
1.34 daniel 2050: *nsPtr = NULL;
1.7 veillard 2051: *tagPtr = NULL;
2052:
1.40 daniel 2053: if ((CUR != '<') || (NXT(1) != '/')) {
1.31 daniel 2054: xmlParserError(ctxt, "xmlParseEndTag: '</' not found\n");
1.27 daniel 2055: return;
2056: }
1.40 daniel 2057: SKIP(2);
1.7 veillard 2058:
1.34 daniel 2059: name = xmlNamespaceParseQName(ctxt, &namespace);
2060: if (namespace != NULL) {
1.7 veillard 2061: /*
2062: * Search the DTD associated to ns.
2063: */
1.34 daniel 2064: ns = xmlSearchNs(ctxt->doc, namespace);
2065: if (ns == NULL)
2066: xmlParserError(ctxt, "End tag : Couldn't find namespace %s\n",
2067: namespace);
2068: free(namespace);
1.29 daniel 2069: }
1.7 veillard 2070:
1.34 daniel 2071: *nsPtr = ns;
1.7 veillard 2072: *tagPtr = name;
2073:
2074: /*
2075: * We should definitely be at the ending "S? '>'" part
2076: */
1.40 daniel 2077: SKIP_BLANKS(CUR_PTR);
2078: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.31 daniel 2079: xmlParserError(ctxt, "End tag : expected '>'\n");
1.7 veillard 2080: } else
1.40 daniel 2081: NEXT;
1.7 veillard 2082:
2083: return;
2084: }
2085:
2086: /*
1.3 veillard 2087: * xmlParseCDSect: escaped pure raw content.
1.29 daniel 2088: *
2089: * [18] CDSect ::= CDStart CData CDEnd
2090: *
2091: * [19] CDStart ::= '<![CDATA['
2092: *
2093: * [20] Data ::= (Char* - (Char* ']]>' Char*))
2094: *
2095: * [21] CDEnd ::= ']]>'
1.3 veillard 2096: */
1.16 daniel 2097: CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 2098: const CHAR *r, *s, *base;
2099: CHAR *ret;
1.3 veillard 2100:
1.40 daniel 2101: if ((CUR == '<') && (NXT(1) == '!') &&
2102: (NXT(2) == '[') && (NXT(3) == 'C') &&
2103: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2104: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2105: (NXT(8) == '[')) {
2106: SKIP(9);
1.29 daniel 2107: } else
2108: return(NULL);
1.40 daniel 2109: base = CUR_PTR;
2110: if (!IS_CHAR(CUR)) {
1.31 daniel 2111: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2112: return(NULL);
2113: }
1.40 daniel 2114: r = NEXT;
2115: if (!IS_CHAR(CUR)) {
1.31 daniel 2116: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2117: return(NULL);
2118: }
1.40 daniel 2119: s = NEXT;
2120: while (IS_CHAR(CUR) &&
2121: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
2122: r++;s++;NEXT;
1.3 veillard 2123: }
1.40 daniel 2124: if (!IS_CHAR(CUR)) {
1.31 daniel 2125: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2126: return(NULL);
2127: }
1.40 daniel 2128: ret = xmlStrndup(base, CUR_PTR - base);
1.16 daniel 2129:
1.2 veillard 2130: return(ret);
2131: }
2132:
2133: /*
2134: * xmlParseContent: a content is
2135: * (element | PCData | Reference | CDSect | PI | Comment)
2136: *
1.27 daniel 2137: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 2138: */
2139:
1.27 daniel 2140: void xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.39 daniel 2141: const CHAR *q, *dec;
1.17 daniel 2142: CHAR *data = NULL;
1.2 veillard 2143: xmlNodePtr ret = NULL;
2144:
1.40 daniel 2145: while ((CUR != '<') || (NXT(1) != '/')) {
2146: const CHAR *test = CUR_PTR;
1.27 daniel 2147: ret = NULL;
2148: data = NULL;
2149:
2150: /*
2151: * First case : a Processing Instruction.
2152: */
1.40 daniel 2153: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 2154: xmlParsePI(ctxt);
2155: }
2156: /*
2157: * Second case : a CDSection
2158: */
1.40 daniel 2159: else if ((CUR == '<') && (NXT(1) == '!') &&
2160: (NXT(2) == '[') && (NXT(3) == 'C') &&
2161: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2162: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2163: (NXT(8) == '[')) {
1.27 daniel 2164: data = xmlParseCDSect(ctxt);
2165: }
2166: /*
2167: * Third case : a comment
2168: */
1.40 daniel 2169: else if ((CUR == '<') && (NXT(1) == '!') &&
2170: (NXT(2) == '-') && (NXT(3) == '-')) {
1.31 daniel 2171: ret = xmlParseComment(ctxt, 1);
1.27 daniel 2172: }
2173: /*
2174: * Fourth case : a sub-element.
2175: */
1.40 daniel 2176: else if (CUR == '<') {
1.27 daniel 2177: ret = xmlParseElement(ctxt);
2178: }
2179: /*
2180: * Last case, text. Note that References are handled directly.
2181: */
2182: else {
1.40 daniel 2183: q = CUR_PTR;
2184: while (IS_CHAR(CUR) && (CUR != '<')) NEXT;
1.27 daniel 2185:
1.40 daniel 2186: if (!IS_CHAR(CUR)) {
1.31 daniel 2187: xmlParserError(ctxt, "Truncated content\n%.50s\n", q);
1.27 daniel 2188: return;
2189: }
1.3 veillard 2190:
1.27 daniel 2191: /*
2192: * Do the Entities decoding...
2193: */
1.40 daniel 2194: dec = xmlDecodeEntities(ctxt->doc, q, CUR_PTR - q);
1.39 daniel 2195: data = xmlStrdup(dec);
1.40 daniel 2196: free((char *) dec);
1.3 veillard 2197: }
1.14 veillard 2198:
2199: /*
1.27 daniel 2200: * Handle the data if any. If there is no child
2201: * add it as content, otherwise create a new node of type text.
1.14 veillard 2202: */
1.27 daniel 2203: if (data != NULL)
2204: data = xmlHandleData(data);
2205: if (data != NULL) {
2206: if (node->childs == NULL)
2207: xmlNodeSetContent(node, data);
2208: else
2209: ret = xmlNewText(data);
2210: free(data);
2211: }
2212: if (ret != NULL)
2213: xmlAddChild(node, ret);
1.40 daniel 2214: if (test == CUR_PTR) {
1.31 daniel 2215: xmlParserError(ctxt, "detected an error in element content\n");
1.29 daniel 2216: break;
2217: }
1.3 veillard 2218: }
1.2 veillard 2219: }
2220:
2221: /*
2222: * xmlParseElement: parse an XML element
1.26 daniel 2223: *
2224: * [39] element ::= EmptyElemTag | STag content ETag
2225: *
2226: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 2227: */
1.26 daniel 2228:
1.2 veillard 2229:
1.16 daniel 2230: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 2231: xmlNodePtr ret;
1.40 daniel 2232: const CHAR *openTag = CUR_PTR;
1.32 daniel 2233: xmlParserNodeInfo node_info;
1.27 daniel 2234: CHAR *endTag;
1.34 daniel 2235: xmlNsPtr endNs;
1.2 veillard 2236:
1.32 daniel 2237: /* Capture start position */
1.40 daniel 2238: node_info.begin_pos = CUR_PTR - ctxt->input->base;
2239: node_info.begin_line = ctxt->input->line;
1.32 daniel 2240:
1.16 daniel 2241: ret = xmlParseStartTag(ctxt);
1.3 veillard 2242: if (ret == NULL) {
2243: return(NULL);
2244: }
1.2 veillard 2245:
2246: /*
2247: * Check for an Empty Element.
2248: */
1.40 daniel 2249: if ((CUR == '/') && (NXT(1) == '>')) {
2250: SKIP(2);
1.2 veillard 2251: return(ret);
2252: }
1.40 daniel 2253: if (CUR == '>') NEXT;
1.2 veillard 2254: else {
1.31 daniel 2255: xmlParserError(ctxt, "Couldn't find end of Start Tag\n%.30s\n",openTag);
1.16 daniel 2256: return(NULL);
1.2 veillard 2257: }
2258:
2259: /*
2260: * Parse the content of the element:
2261: */
1.27 daniel 2262: xmlParseContent(ctxt, ret);
1.40 daniel 2263: if (!IS_CHAR(CUR)) {
1.31 daniel 2264: xmlParserError(ctxt, "Premature end of data in tag %.30s\n%.30s\n",
2265: openTag);
1.16 daniel 2266: return(NULL);
1.2 veillard 2267: }
2268:
2269: /*
1.27 daniel 2270: * parse the end of tag: '</' should be here.
1.2 veillard 2271: */
1.34 daniel 2272: xmlParseEndTag(ctxt, &endNs, &endTag);
1.7 veillard 2273:
1.27 daniel 2274: /*
2275: * Check that the Name in the ETag is the same as in the STag.
2276: */
1.34 daniel 2277: if (endNs != ret->ns) {
1.31 daniel 2278: xmlParserError(ctxt,
2279: "Start and End tags don't use the same DTD\n%.30s\n%.30s\n",
2280: openTag, endTag);
1.27 daniel 2281: }
1.32 daniel 2282: if (endTag == NULL ) {
2283: xmlParserError(ctxt, "The End tag has no name\n%.30s\n", openTag);
2284: } else if (strcmp(ret->name, endTag)) {
1.31 daniel 2285: xmlParserError(ctxt,
2286: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
2287: openTag, endTag);
1.27 daniel 2288: }
1.7 veillard 2289:
1.27 daniel 2290: if ( endTag != NULL )
2291: free(endTag);
1.2 veillard 2292:
1.32 daniel 2293: /* Capture end position and add node */
2294: if ( ret != NULL && ctxt->record_info ) {
1.40 daniel 2295: node_info.end_pos = CUR_PTR - ctxt->input->base;
2296: node_info.end_line = ctxt->input->line;
1.32 daniel 2297: node_info.node = ret;
2298: xmlParserAddNodeInfo(ctxt, &node_info);
2299: }
1.2 veillard 2300: return(ret);
2301: }
2302:
2303: /*
1.29 daniel 2304: * xmlParseVersionNum: parse the XML version value.
2305: *
2306: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
2307: */
2308: CHAR *xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 2309: const CHAR *q = CUR_PTR;
1.29 daniel 2310: CHAR *ret;
2311:
1.40 daniel 2312: while (IS_CHAR(CUR) &&
2313: (((CUR >= 'a') && (CUR <= 'z')) ||
2314: ((CUR >= 'A') && (CUR <= 'Z')) ||
2315: ((CUR >= '0') && (CUR <= '9')) ||
2316: (CUR == '_') || (CUR == '.') ||
2317: (CUR == ':') || (CUR == '-'))) NEXT;
2318: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 2319: return(ret);
2320: }
2321:
2322: /*
2323: * xmlParseVersionInfo: parse the XML version.
2324: *
2325: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
2326: *
2327: * [25] Eq ::= S? '=' S?
2328: */
2329:
2330: CHAR *xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
2331: CHAR *version = NULL;
2332: const CHAR *q;
2333:
1.40 daniel 2334: if ((CUR == 'v') && (NXT(1) == 'e') &&
2335: (NXT(2) == 'r') && (NXT(3) == 's') &&
2336: (NXT(4) == 'i') && (NXT(5) == 'o') &&
2337: (NXT(6) == 'n')) {
2338: SKIP(7);
2339: SKIP_BLANKS(CUR_PTR);
2340: if (CUR != '=') {
1.31 daniel 2341: xmlParserError(ctxt, "xmlParseVersionInfo : expected '='\n");
2342: return(NULL);
2343: }
1.40 daniel 2344: NEXT;
2345: SKIP_BLANKS(CUR_PTR);
2346: if (CUR == '"') {
2347: NEXT;
2348: q = CUR_PTR;
1.29 daniel 2349: version = xmlParseVersionNum(ctxt);
1.40 daniel 2350: if (CUR != '"')
1.31 daniel 2351: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2352: else
1.40 daniel 2353: NEXT;
2354: } else if (CUR == '\''){
2355: NEXT;
2356: q = CUR_PTR;
1.29 daniel 2357: version = xmlParseVersionNum(ctxt);
1.40 daniel 2358: if (CUR != '\'')
1.31 daniel 2359: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2360: else
1.40 daniel 2361: NEXT;
1.31 daniel 2362: } else {
2363: xmlParserError(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
1.29 daniel 2364: }
2365: }
2366: return(version);
2367: }
2368:
2369: /*
2370: * xmlParseEncName: parse the XML encoding name
2371: *
2372: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
2373: */
2374: CHAR *xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 2375: const CHAR *q = CUR_PTR;
1.29 daniel 2376: CHAR *ret = NULL;
2377:
1.40 daniel 2378: if (((CUR >= 'a') && (CUR <= 'z')) ||
2379: ((CUR >= 'A') && (CUR <= 'Z'))) {
2380: NEXT;
2381: while (IS_CHAR(CUR) &&
2382: (((CUR >= 'a') && (CUR <= 'z')) ||
2383: ((CUR >= 'A') && (CUR <= 'Z')) ||
2384: ((CUR >= '0') && (CUR <= '9')) ||
2385: (CUR == '-'))) NEXT;
2386: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 2387: } else {
1.31 daniel 2388: xmlParserError(ctxt, "Invalid XML encoding name\n");
1.29 daniel 2389: }
2390: return(ret);
2391: }
2392:
2393: /*
2394: * xmlParseEncodingDecl: parse the XML encoding declaration
2395: *
2396: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
2397: */
2398:
2399: CHAR *xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
2400: CHAR *encoding = NULL;
2401: const CHAR *q;
2402:
1.40 daniel 2403: SKIP_BLANKS(CUR_PTR);
2404: if ((CUR == 'e') && (NXT(1) == 'n') &&
2405: (NXT(2) == 'c') && (NXT(3) == 'o') &&
2406: (NXT(4) == 'd') && (NXT(5) == 'i') &&
2407: (NXT(6) == 'n') && (NXT(7) == 'g')) {
2408: SKIP(8);
2409: SKIP_BLANKS(CUR_PTR);
2410: if (CUR != '=') {
1.31 daniel 2411: xmlParserError(ctxt, "xmlParseEncodingDecl : expected '='\n");
2412: return(NULL);
2413: }
1.40 daniel 2414: NEXT;
2415: SKIP_BLANKS(CUR_PTR);
2416: if (CUR == '"') {
2417: NEXT;
2418: q = CUR_PTR;
1.29 daniel 2419: encoding = xmlParseEncName(ctxt);
1.40 daniel 2420: if (CUR != '"')
1.31 daniel 2421: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2422: else
1.40 daniel 2423: NEXT;
2424: } else if (CUR == '\''){
2425: NEXT;
2426: q = CUR_PTR;
1.29 daniel 2427: encoding = xmlParseEncName(ctxt);
1.40 daniel 2428: if (CUR != '\'')
1.31 daniel 2429: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2430: else
1.40 daniel 2431: NEXT;
2432: } else if (CUR == '"'){
1.31 daniel 2433: xmlParserError(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
1.29 daniel 2434: }
2435: }
2436: return(encoding);
2437: }
2438:
2439: /*
2440: * xmlParseSDDecl: parse the XML standalone declaration
2441: *
2442: * [32] SDDecl ::= S 'standalone' Eq
2443: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
2444: */
2445:
2446: int xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
2447: int standalone = -1;
2448:
1.40 daniel 2449: SKIP_BLANKS(CUR_PTR);
2450: if ((CUR == 's') && (NXT(1) == 't') &&
2451: (NXT(2) == 'a') && (NXT(3) == 'n') &&
2452: (NXT(4) == 'd') && (NXT(5) == 'a') &&
2453: (NXT(6) == 'l') && (NXT(7) == 'o') &&
2454: (NXT(8) == 'n') && (NXT(9) == 'e')) {
2455: SKIP(10);
2456: if (CUR != '=') {
1.32 daniel 2457: xmlParserError(ctxt, "XML standalone declaration : expected '='\n");
2458: return(standalone);
2459: }
1.40 daniel 2460: NEXT;
2461: SKIP_BLANKS(CUR_PTR);
2462: if (CUR == '\''){
2463: NEXT;
2464: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 2465: standalone = 0;
1.40 daniel 2466: SKIP(2);
2467: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
2468: (NXT(2) == 's')) {
1.29 daniel 2469: standalone = 1;
1.40 daniel 2470: SKIP(3);
1.29 daniel 2471: } else {
1.31 daniel 2472: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2473: }
1.40 daniel 2474: if (CUR != '\'')
1.31 daniel 2475: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2476: else
1.40 daniel 2477: NEXT;
2478: } else if (CUR == '"'){
2479: NEXT;
2480: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 2481: standalone = 0;
1.40 daniel 2482: SKIP(2);
2483: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
2484: (NXT(2) == 's')) {
1.29 daniel 2485: standalone = 1;
1.40 daniel 2486: SKIP(3);
1.29 daniel 2487: } else {
1.31 daniel 2488: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2489: }
1.40 daniel 2490: if (CUR != '"')
1.31 daniel 2491: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2492: else
1.40 daniel 2493: NEXT;
1.37 daniel 2494: } else {
2495: xmlParserError(ctxt, "Standalone value not found\n");
2496: }
1.29 daniel 2497: }
2498: return(standalone);
2499: }
2500:
2501: /*
1.1 veillard 2502: * xmlParseXMLDecl: parse an XML declaration header
1.29 daniel 2503: *
2504: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 2505: */
2506:
1.16 daniel 2507: void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 2508: CHAR *version;
2509:
2510: /*
1.19 daniel 2511: * We know that '<?xml' is here.
1.1 veillard 2512: */
1.40 daniel 2513: SKIP(5);
1.1 veillard 2514:
1.40 daniel 2515: SKIP_BLANKS(CUR_PTR);
1.1 veillard 2516:
2517: /*
1.29 daniel 2518: * We should have the VersionInfo here.
1.1 veillard 2519: */
1.29 daniel 2520: version = xmlParseVersionInfo(ctxt);
2521: if (version == NULL)
1.16 daniel 2522: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.29 daniel 2523: else {
2524: ctxt->doc = xmlNewDoc(version);
2525: free(version);
2526: }
2527:
2528: /*
2529: * We may have the encoding declaration
2530: */
1.32 daniel 2531: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 2532:
2533: /*
1.29 daniel 2534: * We may have the standalone status.
1.1 veillard 2535: */
1.32 daniel 2536: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 2537:
1.40 daniel 2538: SKIP_BLANKS(CUR_PTR);
2539: if ((CUR == '?') && (NXT(1) == '>')) {
2540: SKIP(2);
2541: } else if (CUR == '>') {
1.31 daniel 2542: /* Deprecated old WD ... */
2543: xmlParserError(ctxt, "XML declaration must end-up with '?>'\n");
1.40 daniel 2544: NEXT;
1.29 daniel 2545: } else {
1.31 daniel 2546: xmlParserError(ctxt, "parsing XML declaration: '?>' expected\n");
1.40 daniel 2547: MOVETO_ENDTAG(CUR_PTR);
2548: NEXT;
1.29 daniel 2549: }
1.1 veillard 2550: }
2551:
2552: /*
1.22 daniel 2553: * xmlParseMisc: parse an XML Misc* optionnal field.
1.21 daniel 2554: * Misc*
2555: *
1.22 daniel 2556: * [27] Misc ::= Comment | PI | S
1.1 veillard 2557: */
2558:
1.16 daniel 2559: void xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 2560: while (((CUR == '<') && (NXT(1) == '?')) ||
2561: ((CUR == '<') && (NXT(1) == '!') &&
2562: (NXT(2) == '-') && (NXT(3) == '-')) ||
2563: IS_BLANK(CUR)) {
2564: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 2565: xmlParsePI(ctxt);
1.40 daniel 2566: } else if (IS_BLANK(CUR)) {
2567: NEXT;
1.1 veillard 2568: } else
1.31 daniel 2569: xmlParseComment(ctxt, 0);
1.1 veillard 2570: }
2571: }
2572:
2573: /*
1.16 daniel 2574: * xmlParseDocument : parse an XML document and build a tree.
1.21 daniel 2575: *
1.22 daniel 2576: * [1] document ::= prolog element Misc*
1.29 daniel 2577: *
2578: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.1 veillard 2579: */
2580:
1.16 daniel 2581: int xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.14 veillard 2582: /*
2583: * We should check for encoding here and plug-in some
2584: * conversion code TODO !!!!
2585: */
1.1 veillard 2586:
2587: /*
2588: * Wipe out everything which is before the first '<'
2589: */
1.40 daniel 2590: SKIP_BLANKS(CUR_PTR);
1.1 veillard 2591:
2592: /*
2593: * Check for the XMLDecl in the Prolog.
2594: */
1.40 daniel 2595: if ((CUR == '<') && (NXT(1) == '?') &&
2596: (NXT(2) == 'x') && (NXT(3) == 'm') &&
2597: (NXT(4) == 'l')) {
1.19 daniel 2598: xmlParseXMLDecl(ctxt);
2599: /* SKIP_EOL(cur); */
1.40 daniel 2600: SKIP_BLANKS(CUR_PTR);
2601: } else if ((CUR == '<') && (NXT(1) == '?') &&
2602: (NXT(2) == 'X') && (NXT(3) == 'M') &&
2603: (NXT(4) == 'L')) {
1.19 daniel 2604: /*
2605: * The first drafts were using <?XML and the final W3C REC
2606: * now use <?xml ...
2607: */
1.16 daniel 2608: xmlParseXMLDecl(ctxt);
1.1 veillard 2609: /* SKIP_EOL(cur); */
1.40 daniel 2610: SKIP_BLANKS(CUR_PTR);
1.1 veillard 2611: } else {
1.16 daniel 2612: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 2613: }
2614:
2615: /*
2616: * The Misc part of the Prolog
2617: */
1.16 daniel 2618: xmlParseMisc(ctxt);
1.1 veillard 2619:
2620: /*
1.29 daniel 2621: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 2622: * (doctypedecl Misc*)?
2623: */
1.40 daniel 2624: if ((CUR == '<') && (NXT(1) == '!') &&
2625: (NXT(2) == 'D') && (NXT(3) == 'O') &&
2626: (NXT(4) == 'C') && (NXT(5) == 'T') &&
2627: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
2628: (NXT(8) == 'E')) {
1.22 daniel 2629: xmlParseDocTypeDecl(ctxt);
2630: xmlParseMisc(ctxt);
1.21 daniel 2631: }
2632:
2633: /*
2634: * Time to start parsing the tree itself
1.1 veillard 2635: */
1.16 daniel 2636: ctxt->doc->root = xmlParseElement(ctxt);
1.33 daniel 2637:
2638: /*
2639: * The Misc part at the end
2640: */
2641: xmlParseMisc(ctxt);
1.16 daniel 2642:
2643: return(0);
2644: }
2645:
2646: /*
2647: * xmlParseDoc : parse an XML in-memory document and build a tree.
2648: */
2649:
2650: xmlDocPtr xmlParseDoc(CHAR *cur) {
2651: xmlDocPtr ret;
2652: xmlParserCtxtPtr ctxt;
1.40 daniel 2653: xmlParserInputPtr input;
1.16 daniel 2654:
2655: if (cur == NULL) return(NULL);
1.1 veillard 2656:
1.16 daniel 2657: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2658: if (ctxt == NULL) {
2659: perror("malloc");
2660: return(NULL);
2661: }
1.40 daniel 2662: xmlInitParserCtxt(ctxt);
2663: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
2664: if (input == NULL) {
2665: perror("malloc");
2666: free(ctxt);
2667: return(NULL);
2668: }
2669:
2670: input->filename = NULL;
2671: input->line = 1;
2672: input->col = 1;
2673: input->base = cur;
2674: input->cur = cur;
2675:
2676: inputPush(ctxt, input);
1.16 daniel 2677:
2678:
2679: xmlParseDocument(ctxt);
2680: ret = ctxt->doc;
1.20 daniel 2681: free(ctxt->nodes);
1.16 daniel 2682: free(ctxt);
2683:
1.1 veillard 2684: return(ret);
2685: }
2686:
1.9 httpng 2687: /*
2688: * xmlParseFile : parse an XML file and build a tree.
2689: */
2690:
2691: xmlDocPtr xmlParseFile(const char *filename) {
2692: xmlDocPtr ret;
1.20 daniel 2693: #ifdef HAVE_ZLIB_H
2694: gzFile input;
2695: #else
1.9 httpng 2696: int input;
1.20 daniel 2697: #endif
1.9 httpng 2698: int res;
2699: struct stat buf;
2700: char *buffer;
1.16 daniel 2701: xmlParserCtxtPtr ctxt;
1.40 daniel 2702: xmlParserInputPtr inputStream;
1.9 httpng 2703:
1.11 veillard 2704: res = stat(filename, &buf);
1.9 httpng 2705: if (res < 0) return(NULL);
2706:
1.20 daniel 2707: #ifdef HAVE_ZLIB_H
2708: retry_bigger:
2709: buffer = malloc((buf.st_size * 20) + 100);
2710: #else
1.9 httpng 2711: buffer = malloc(buf.st_size + 100);
1.20 daniel 2712: #endif
1.9 httpng 2713: if (buffer == NULL) {
2714: perror("malloc");
2715: return(NULL);
2716: }
2717:
2718: memset(buffer, 0, sizeof(buffer));
1.20 daniel 2719: #ifdef HAVE_ZLIB_H
2720: input = gzopen (filename, "r");
2721: if (input == NULL) {
2722: fprintf (stderr, "Cannot read file %s :\n", filename);
2723: perror ("gzopen failed");
2724: return(NULL);
2725: }
2726: #else
1.9 httpng 2727: input = open (filename, O_RDONLY);
2728: if (input < 0) {
2729: fprintf (stderr, "Cannot read file %s :\n", filename);
2730: perror ("open failed");
2731: return(NULL);
2732: }
1.20 daniel 2733: #endif
2734: #ifdef HAVE_ZLIB_H
2735: res = gzread(input, buffer, 20 * buf.st_size);
2736: #else
1.9 httpng 2737: res = read(input, buffer, buf.st_size);
1.20 daniel 2738: #endif
1.9 httpng 2739: if (res < 0) {
2740: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 2741: #ifdef HAVE_ZLIB_H
2742: perror ("gzread failed");
2743: #else
1.9 httpng 2744: perror ("read failed");
1.20 daniel 2745: #endif
1.9 httpng 2746: return(NULL);
2747: }
1.20 daniel 2748: #ifdef HAVE_ZLIB_H
2749: gzclose(input);
2750: if (res >= 20 * buf.st_size) {
2751: free(buffer);
2752: buf.st_size *= 2;
2753: goto retry_bigger;
2754: }
2755: buf.st_size = res;
2756: #else
1.9 httpng 2757: close(input);
1.20 daniel 2758: #endif
2759:
1.40 daniel 2760: buffer[buf.st_size] = '\0';
1.9 httpng 2761:
1.16 daniel 2762: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2763: if (ctxt == NULL) {
2764: perror("malloc");
2765: return(NULL);
2766: }
1.40 daniel 2767: xmlInitParserCtxt(ctxt);
2768: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
2769: if (inputStream == NULL) {
2770: perror("malloc");
2771: free(ctxt);
2772: return(NULL);
2773: }
2774:
2775: inputStream->filename = strdup(filename);
2776: inputStream->line = 1;
2777: inputStream->col = 1;
2778: inputStream->base = buffer;
2779: inputStream->cur = buffer;
1.16 daniel 2780:
1.40 daniel 2781: inputPush(ctxt, inputStream);
1.16 daniel 2782:
2783: xmlParseDocument(ctxt);
1.40 daniel 2784:
1.16 daniel 2785: ret = ctxt->doc;
1.9 httpng 2786: free(buffer);
1.20 daniel 2787: free(ctxt->nodes);
2788: free(ctxt);
2789:
2790: return(ret);
2791: }
2792:
1.32 daniel 2793:
1.20 daniel 2794: /*
1.32 daniel 2795: * xmlParseMemory : parse an XML memory block and build a tree.
1.20 daniel 2796: */
2797: xmlDocPtr xmlParseMemory(char *buffer, int size) {
2798: xmlDocPtr ret;
2799: xmlParserCtxtPtr ctxt;
1.40 daniel 2800: xmlParserInputPtr input;
2801:
2802: buffer[size - 1] = '\0';
2803:
1.20 daniel 2804: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2805: if (ctxt == NULL) {
2806: perror("malloc");
2807: return(NULL);
2808: }
1.40 daniel 2809: xmlInitParserCtxt(ctxt);
2810: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
2811: if (input == NULL) {
2812: perror("malloc");
2813: free(ctxt);
2814: return(NULL);
2815: }
1.20 daniel 2816:
1.40 daniel 2817: input->filename = NULL;
2818: input->line = 1;
2819: input->col = 1;
2820: input->base = buffer;
2821: input->cur = buffer;
1.20 daniel 2822:
1.40 daniel 2823: inputPush(ctxt, input);
1.20 daniel 2824:
2825: xmlParseDocument(ctxt);
1.40 daniel 2826:
1.20 daniel 2827: ret = ctxt->doc;
2828: free(ctxt->nodes);
1.16 daniel 2829: free(ctxt);
2830:
1.9 httpng 2831: return(ret);
1.17 daniel 2832: }
2833:
2834:
2835: /* Initialize parser context */
2836: void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2837: {
1.32 daniel 2838: int i;
1.19 daniel 2839:
1.40 daniel 2840: /* Allocate the Input stack */
2841: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
2842: ctxt->inputNr = 0;
2843: ctxt->inputMax = 5;
2844: ctxt->input = NULL;
2845:
1.32 daniel 2846: ctxt->doc = NULL;
2847: ctxt->depth = 0;
2848: ctxt->max_depth = 10;
2849: ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr));
2850: if (ctxt->nodes == NULL) {
2851: fprintf(stderr, "malloc of %d byte failed\n",
2852: ctxt->max_depth * sizeof(xmlNodePtr));
2853: ctxt->max_depth = 0;
2854: } else {
2855: for (i = 0;i < ctxt->max_depth;i++)
2856: ctxt->nodes[i] = NULL;
2857: }
2858: ctxt->record_info = 0;
2859: xmlInitNodeInfoSeq(&ctxt->node_seq);
1.17 daniel 2860: }
2861:
2862:
1.19 daniel 2863: /*
2864: * Clear (release owned resources) and reinitialize context
2865: */
1.32 daniel 2866: void xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 2867: {
1.32 daniel 2868: if ( ctxt->nodes != NULL )
2869: free(ctxt->nodes);
2870: xmlClearNodeInfoSeq(&ctxt->node_seq);
2871: xmlInitParserCtxt(ctxt);
1.17 daniel 2872: }
2873:
2874:
1.19 daniel 2875: /*
2876: * Setup the parser context to parse a new buffer; Clears any prior
2877: * contents from the parser context. The buffer parameter must not be
2878: * NULL, but the filename parameter can be
2879: */
1.17 daniel 2880: void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
2881: const char* filename)
2882: {
1.40 daniel 2883: xmlParserInputPtr input;
2884:
2885: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
2886: if (input == NULL) {
2887: perror("malloc");
2888: free(ctxt);
2889: exit(1);
2890: }
2891:
1.17 daniel 2892: xmlClearParserCtxt(ctxt);
1.40 daniel 2893: if (input->filename != NULL)
2894: input->filename = strdup(filename);
2895: else
2896: input->filename = NULL;
2897: input->line = 1;
2898: input->col = 1;
2899: input->base = buffer;
2900: input->cur = buffer;
2901:
2902: inputPush(ctxt, input);
1.17 daniel 2903: }
2904:
1.32 daniel 2905:
2906: /*
2907: * xmlParserFindNodeInfo : Find the parser node info struct for a given node
2908: */
2909: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2910: const xmlNode* node)
2911: {
2912: unsigned long pos;
2913:
2914: /* Find position where node should be at */
2915: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2916: if ( ctx->node_seq.buffer[pos].node == node )
2917: return &ctx->node_seq.buffer[pos];
2918: else
2919: return NULL;
2920: }
2921:
2922:
2923: /*
2924: * xmlInitNodeInfoSeq -- Initialize (set to initial state) node info sequence
2925: */
2926: void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2927: {
2928: seq->length = 0;
2929: seq->maximum = 0;
2930: seq->buffer = NULL;
2931: }
2932:
2933: /*
2934: * xmlClearNodeInfoSeq -- Clear (release memory and reinitialize) node
2935: * info sequence
2936: */
2937: void xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2938: {
2939: if ( seq->buffer != NULL )
2940: free(seq->buffer);
2941: xmlInitNodeInfoSeq(seq);
2942: }
2943:
2944:
2945: /*
2946: * xmlParserFindNodeInfoIndex : Find the index that the info record for
2947: * the given node is or should be at in a sorted sequence
2948: */
2949: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2950: const xmlNode* node)
2951: {
2952: unsigned long upper, lower, middle;
2953: int found = 0;
2954:
2955: /* Do a binary search for the key */
2956: lower = 1;
2957: upper = seq->length;
2958: middle = 0;
2959: while ( lower <= upper && !found) {
2960: middle = lower + (upper - lower) / 2;
2961: if ( node == seq->buffer[middle - 1].node )
2962: found = 1;
2963: else if ( node < seq->buffer[middle - 1].node )
2964: upper = middle - 1;
2965: else
2966: lower = middle + 1;
2967: }
2968:
2969: /* Return position */
2970: if ( middle == 0 || seq->buffer[middle - 1].node < node )
2971: return middle;
2972: else
2973: return middle - 1;
2974: }
2975:
2976:
2977: /*
2978: * xmlParserAddNodeInfo : Insert node info record into sorted sequence
2979: */
2980: void xmlParserAddNodeInfo(xmlParserCtxtPtr ctx,
2981: const xmlParserNodeInfo* info)
2982: {
2983: unsigned long pos;
2984: static unsigned int block_size = 5;
2985:
2986: /* Find pos and check to see if node is already in the sequence */
2987: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, info->node);
2988: if ( pos < ctx->node_seq.length
2989: && ctx->node_seq.buffer[pos].node == info->node ) {
2990: ctx->node_seq.buffer[pos] = *info;
2991: }
2992:
2993: /* Otherwise, we need to add new node to buffer */
2994: else {
2995: /* Expand buffer by 5 if needed */
2996: if ( ctx->node_seq.length + 1 > ctx->node_seq.maximum ) {
2997: xmlParserNodeInfo* tmp_buffer;
2998: unsigned int byte_size = (sizeof(*ctx->node_seq.buffer)
2999: *(ctx->node_seq.maximum + block_size));
3000:
3001: if ( ctx->node_seq.buffer == NULL )
3002: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
3003: else
3004: tmp_buffer = (xmlParserNodeInfo*)realloc(ctx->node_seq.buffer, byte_size);
3005:
3006: if ( tmp_buffer == NULL ) {
3007: xmlParserError(ctx, "Out of memory");
3008: return;
3009: }
3010: ctx->node_seq.buffer = tmp_buffer;
3011: ctx->node_seq.maximum += block_size;
3012: }
3013:
3014: /* If position is not at end, move elements out of the way */
3015: if ( pos != ctx->node_seq.length ) {
3016: unsigned long i;
3017:
3018: for ( i = ctx->node_seq.length; i > pos; i-- )
3019: ctx->node_seq.buffer[i] = ctx->node_seq.buffer[i - 1];
3020: }
3021:
3022: /* Copy element and increase length */
3023: ctx->node_seq.buffer[pos] = *info;
3024: ctx->node_seq.length++;
3025: }
3026: }
Webmaster