Annotation of XML/parser.c, revision 1.33
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.33 ! daniel 6: * $Id: parser.c,v 1.32 1998/08/05 19:13:27 daniel Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.1 veillard 18: #include <malloc.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
34: /*
35: * A few macros needed to help building the parser.
36: */
37:
1.31 daniel 38: #define PUSH_AND_POP(type, name) \
39: int name##Push(type value) { \
40: if (ctxt->name##Nr >= ctxt->name##Max) { \
41: ctxt->name##Max *= 2; \
42: ctxt->name##tab = (void *) realloc(ctxt->name##tab, \
43: ctxt->name##Max * sizeof(ctxt->name##tab[0])); \
44: if (ctxt->name##tab == NULL) { \
45: fprintf(stderr, "realloc failed !\n"); \
46: exit(1); \
47: } \
48: } \
49: ctxt->name##tab[ctxt->name##Nr] = value; \
50: return(ctxt->type##Nr++); \
51: } \
52: type name##Pop() { \
53: if (ctxt->type##Nr <= 0) return(0); \
54: ctxt->type##Nr--; \
55: return(ctxt->name##tab[ctxt->name##Nr]); \
56: } \
57:
1.1 veillard 58: #ifdef UNICODE
1.30 daniel 59: /************************************************************************
60: * *
61: * UNICODE version of the macros. *
62: * *
63: ************************************************************************/
1.1 veillard 64: /*
1.22 daniel 65: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
66: * | [#x10000-#x10FFFF]
67: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 68: */
69: #define IS_CHAR(c) \
70: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
71: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
72:
1.22 daniel 73: /*
74: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
75: */
1.1 veillard 76: #define SKIP_BLANKS(p) \
77: while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \
1.22 daniel 78: (*(p) == 0xd) || (*(p) == 0x3000)) (p)++;
1.1 veillard 79:
1.22 daniel 80: /*
1.30 daniel 81: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 82: *
1.30 daniel 83: * VI is your friend !
84: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
85: * and
86: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 87: */
1.1 veillard 88: #define IS_BASECHAR(c) \
1.30 daniel 89: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
90: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
91: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
92: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
93: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
94: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
95: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
96: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
97: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
98: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
99: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
100: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
101: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
102: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
103: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
104: ((c) == 0x0386) || \
105: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
106: ((c) == 0x038C) || \
107: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
108: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
109: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
110: ((c) == 0x03DA) || \
111: ((c) == 0x03DC) || \
112: ((c) == 0x03DE) || \
113: ((c) == 0x03E0) || \
114: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
115: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
116: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
117: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
118: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
119: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
120: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
121: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
122: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
123: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
124: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
125: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
126: ((c) == 0x0559) || \
127: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
128: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
129: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
130: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
131: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
132: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
133: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
134: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
135: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
136: ((c) == 0x06D5) || \
137: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
138: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
139: ((c) == 0x093D) || \
140: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
141: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
142: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
143: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
144: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
145: ((c) == 0x09B2) || \
146: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
147: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
148: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
149: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
150: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
151: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
152: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
153: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
154: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
155: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
156: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
157: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
158: ((c) == 0x0A5E) || \
159: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
160: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
161: ((c) == 0x0A8D) || \
162: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
163: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
164: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
165: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
166: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
167: ((c) == 0x0ABD) || \
168: ((c) == 0x0AE0) || \
169: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
170: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
171: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
172: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
173: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
174: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
175: ((c) == 0x0B3D) || \
176: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
177: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
178: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
179: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
180: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
181: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
182: ((c) == 0x0B9C) || \
183: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
184: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
185: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
186: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
187: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
188: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
189: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
190: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
191: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
192: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
193: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
194: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
195: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
196: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
197: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
198: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
199: ((c) == 0x0CDE) || \
200: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
201: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
202: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
203: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
204: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
205: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
206: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
207: ((c) == 0x0E30) || \
208: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
209: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
210: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
211: ((c) == 0x0E84) || \
212: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
213: ((c) == 0x0E8A) || \
214: ((c) == 0x0E8D) || \
215: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
216: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
217: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
218: ((c) == 0x0EA5) || \
219: ((c) == 0x0EA7) || \
220: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
221: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
222: ((c) == 0x0EB0) || \
223: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
224: ((c) == 0x0EBD) || \
225: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
226: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
227: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
228: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
229: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
230: ((c) == 0x1100) || \
231: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
232: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
233: ((c) == 0x1109) || \
234: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
235: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
236: ((c) == 0x113C) || \
237: ((c) == 0x113E) || \
238: ((c) == 0x1140) || \
239: ((c) == 0x114C) || \
240: ((c) == 0x114E) || \
241: ((c) == 0x1150) || \
242: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
243: ((c) == 0x1159) || \
244: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
245: ((c) == 0x1163) || \
246: ((c) == 0x1165) || \
247: ((c) == 0x1167) || \
248: ((c) == 0x1169) || \
249: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
250: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
251: ((c) == 0x1175) || \
252: ((c) == 0x119E) || \
253: ((c) == 0x11A8) || \
254: ((c) == 0x11AB) || \
255: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
256: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
257: ((c) == 0x11BA) || \
258: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
259: ((c) == 0x11EB) || \
260: ((c) == 0x11F0) || \
261: ((c) == 0x11F9) || \
262: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
263: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
264: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
265: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
266: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
267: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
268: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
269: ((c) == 0x1F59) || \
270: ((c) == 0x1F5B) || \
271: ((c) == 0x1F5D) || \
272: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
273: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
274: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
275: ((c) == 0x1FBE) || \
276: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
277: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
278: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
279: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
280: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
281: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
282: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
283: ((c) == 0x2126) || \
284: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
285: ((c) == 0x212E) || \
286: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
287: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
288: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
289: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
290: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 291:
1.22 daniel 292: /*
293: * [88] Digit ::= ... long list see REC ...
294: */
1.30 daniel 295: #define IS_DIGIT(c) \
296: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
297: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
298: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
299: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
300: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
301: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
302: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
303: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
304: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
305: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
306: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
307: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
308: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
309: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
310: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 311:
1.22 daniel 312: /*
313: * [87] CombiningChar ::= ... long list see REC ...
314: */
1.30 daniel 315: #define IS_COMBINING(c) \
316: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
317: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
318: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
319: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
320: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
321: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
322: ((c) == 0x05BF) || \
323: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
324: ((c) == 0x05C4) || \
325: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
326: ((c) == 0x0670) || \
327: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
328: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
329: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
330: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
331: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
332: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
333: ((c) == 0x093C) || \
334: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
335: ((c) == 0x094D) || \
336: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
337: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
338: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
339: ((c) == 0x09BC) || \
340: ((c) == 0x09BE) || \
341: ((c) == 0x09BF) || \
342: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
343: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
344: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
345: ((c) == 0x09D7) || \
346: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
347: ((c) == 0x0A02) || \
348: ((c) == 0x0A3C) || \
349: ((c) == 0x0A3E) || \
350: ((c) == 0x0A3F) || \
351: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
352: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
353: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
354: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
355: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
356: ((c) == 0x0ABC) || \
357: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
358: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
359: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
360: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
361: ((c) == 0x0B3C) || \
362: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
363: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
364: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
365: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
366: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
367: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
368: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
369: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
370: ((c) == 0x0BD7) || \
371: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
372: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
373: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
374: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
375: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
376: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
377: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
378: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
379: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
380: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
381: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
382: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
383: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
384: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
385: ((c) == 0x0D57) || \
386: ((c) == 0x0E31) || \
387: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
388: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
389: ((c) == 0x0EB1) || \
390: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
391: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
392: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
393: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
394: ((c) == 0x0F35) || \
395: ((c) == 0x0F37) || \
396: ((c) == 0x0F39) || \
397: ((c) == 0x0F3E) || \
398: ((c) == 0x0F3F) || \
399: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
400: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
401: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
402: ((c) == 0x0F97) || \
403: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
404: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
405: ((c) == 0x0FB9) || \
406: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
407: ((c) == 0x20E1) || \
408: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
409: ((c) == 0x3099) || \
410: ((c) == 0x309A))
1.3 veillard 411:
1.22 daniel 412: /*
413: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
414: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
415: * [#x309D-#x309E] | [#x30FC-#x30FE]
416: */
1.3 veillard 417: #define IS_EXTENDER(c) \
418: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
419: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
420: ((c) == 0xec6) || ((c) == 0x3005) \
421: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
422: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 423: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 424:
1.22 daniel 425: /*
426: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
427: */
1.1 veillard 428: #define IS_IDEOGRAPHIC(c) \
429: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
430: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
431: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
432: ((c) == 0x3007))
433:
1.22 daniel 434: /*
435: * [84] Letter ::= BaseChar | Ideographic
436: */
1.1 veillard 437: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
438:
439: #else
1.30 daniel 440: /************************************************************************
441: * *
442: * 8bits / ASCII version of the macros. *
443: * *
444: ************************************************************************/
1.1 veillard 445: /*
1.22 daniel 446: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
447: * | [#x10000-#x10FFFF]
448: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 449: */
450: #define IS_CHAR(c) \
1.21 daniel 451: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
452: ((c) == 0xa))
1.1 veillard 453:
1.22 daniel 454: /*
455: * [85] BaseChar ::= ... long list see REC ...
456: */
1.1 veillard 457: #define IS_BASECHAR(c) \
458: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
459: (((c) >= 0x61) && ((c) <= 0x7a)) || \
460: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
461: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
462: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
463: (((c) >= 0xf8) && ((c) <= 0xff)) || \
464: ((c) == 0xba))
465:
1.22 daniel 466: /*
467: * [88] Digit ::= ... long list see REC ...
468: */
1.1 veillard 469: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
470:
1.22 daniel 471: /*
472: * [84] Letter ::= BaseChar | Ideographic
473: */
1.1 veillard 474: #define IS_LETTER(c) IS_BASECHAR(c)
475:
1.22 daniel 476:
477: /*
478: * [87] CombiningChar ::= ... long list see REC ...
479: */
1.1 veillard 480: #define IS_COMBINING(c) 0
481:
1.22 daniel 482: /*
483: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
484: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
485: * [#x309D-#x309E] | [#x30FC-#x30FE]
486: */
1.3 veillard 487: #define IS_EXTENDER(c) ((c) == 0xb7)
488:
1.21 daniel 489: #endif /* !UNICODE */
1.1 veillard 490:
1.22 daniel 491: /*
492: * Blank chars.
493: *
494: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
495: */
496: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
497: ((c) == 0x0D))
498:
499: /*
500: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
501: */
1.21 daniel 502: #define IS_PUBIDCHAR(c) \
503: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
504: (((c) >= 'a') && ((c) <= 'z')) || \
505: (((c) >= 'A') && ((c) <= 'Z')) || \
506: (((c) >= '0') && ((c) <= '9')) || \
507: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
508: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
509: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
510: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
511: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 512:
513: #define SKIP_EOL(p) \
514: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
515: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
516:
517: #define SKIP_BLANKS(p) \
518: while (IS_BLANK(*(p))) (p)++;
519:
520: #define MOVETO_ENDTAG(p) \
521: while (IS_CHAR(*p) && (*(p) != '>')) (p)++;
522:
523: #define MOVETO_STARTTAG(p) \
524: while (IS_CHAR(*p) && (*(p) != '<')) (p)++;
525:
526: /*
1.3 veillard 527: * Forward definition for recusive behaviour.
528: */
1.16 daniel 529: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.3 veillard 530:
531: /*
532: * xmlHandleData : this routine represent's the specific application
533: * behaviour when reading a piece of text.
534: *
535: * For example in WebDav, any piece made only of blanks is eliminated
536: */
537:
538: CHAR *xmlHandleData(CHAR *in) {
539: CHAR *cur;
540:
541: if (in == NULL) return(NULL);
542: cur = in;
543: while (IS_CHAR(*cur)) {
544: if (!IS_BLANK(*cur)) goto not_blank;
545: cur++;
546: }
547: free(in);
548: return(NULL);
549:
550: not_blank:
551: return(in);
552: }
553:
1.28 daniel 554: /************************************************************************
555: * *
556: * Commodity functions to handle CHARs *
557: * *
558: ************************************************************************/
559:
1.3 veillard 560: /*
1.1 veillard 561: * xmlStrndup : a strdup for array of CHAR's
562: */
563:
1.6 httpng 564: CHAR *xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 565: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
566:
567: if (ret == NULL) {
568: fprintf(stderr, "malloc of %d byte failed\n",
569: (len + 1) * sizeof(CHAR));
570: return(NULL);
571: }
572: memcpy(ret, cur, len * sizeof(CHAR));
573: ret[len] = 0;
574: return(ret);
575: }
576:
577: /*
578: * xmlStrdup : a strdup for CHAR's
579: */
580:
1.6 httpng 581: CHAR *xmlStrdup(const CHAR *cur) {
582: const CHAR *p = cur;
1.1 veillard 583:
584: while (IS_CHAR(*p)) p++;
585: return(xmlStrndup(cur, p - cur));
586: }
587:
588: /*
1.14 veillard 589: * xmlStrcmp : a strcmp for CHAR's
590: */
591:
592: int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
593: register int tmp;
594:
595: do {
596: tmp = *str1++ - *str2++;
597: if (tmp != 0) return(tmp);
598: } while ((*str1 != 0) && (*str2 != 0));
599: return (*str1 - *str2);
600: }
601:
602: /*
603: * xmlStrncmp : a strncmp for CHAR's
604: */
605:
606: int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
607: register int tmp;
608:
609: if (len <= 0) return(0);
610: do {
611: tmp = *str1++ - *str2++;
612: if (tmp != 0) return(tmp);
613: len--;
614: if (len <= 0) return(0);
615: } while ((*str1 != 0) && (*str2 != 0));
616: return (*str1 - *str2);
617: }
618:
619: /*
620: * xmlStrchr : a strchr for CHAR's
621: */
622:
623: CHAR *xmlStrchr(const CHAR *str, CHAR val) {
624: while (*str != 0) {
625: if (*str == val) return((CHAR *) str);
626: str++;
627: }
628: return(NULL);
629: }
1.28 daniel 630:
631: /************************************************************************
632: * *
633: * Extra stuff for namespace support *
634: * Relates to http://www.w3.org/TR/WD-xml-names *
635: * *
636: ************************************************************************/
637:
638: /*
639: * xmlNamespaceParseNCName : parse an XML namespace name.
640: *
641: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
642: *
643: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
644: * CombiningChar | Extender
645: */
646:
647: CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
648: const CHAR *q;
649: CHAR *ret = NULL;
650:
651: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
652: q = ctxt->cur++;
653:
654: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
655: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
656: (ctxt->cur[0] == '_') ||
657: (IS_COMBINING(ctxt->cur[0])) ||
658: (IS_EXTENDER(ctxt->cur[0])))
659: ctxt->cur++;
660:
661: ret = xmlStrndup(q, ctxt->cur - q);
662:
663: return(ret);
664: }
665:
666: /*
667: * xmlNamespaceParseQName : parse an XML qualified name
668: *
669: * [NS 5] QName ::= (Prefix ':')? LocalPart
670: *
671: * [NS 6] Prefix ::= NCName
672: *
673: * [NS 7] LocalPart ::= NCName
674: */
675:
676: CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
677: CHAR *ret = NULL;
678:
679: *prefix = NULL;
680: ret = xmlNamespaceParseNCName(ctxt);
681: if (ctxt->cur[0] == ':') {
682: *prefix = ret;
683: ctxt->cur++;
684: ret = xmlNamespaceParseNCName(ctxt);
685: }
686:
687: return(ret);
688: }
689:
690: /*
691: * xmlNamespaceParseNSDef : parse a namespace prefix declaration
692: *
693: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
694: *
695: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
696: */
697:
698: void xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
699: CHAR *name = NULL;
700:
701: if ((ctxt->cur[0] == 'x') && (ctxt->cur[1] == 'm') &&
702: (ctxt->cur[2] == 'l') && (ctxt->cur[3] == 'n') &&
703: (ctxt->cur[4] == 's')) {
704: ctxt->cur += 5;
705: if (ctxt->cur[0] == ':') {
706: ctxt->cur++;
707: name = xmlNamespaceParseNCName(ctxt);
708: }
709: }
710: }
711:
712: /************************************************************************
713: * *
714: * The parser itself *
715: * Relates to http://www.w3.org/TR/REC-xml *
716: * *
717: ************************************************************************/
1.14 veillard 718:
719: /*
1.1 veillard 720: * xmlParseName : parse an XML name.
1.22 daniel 721: *
722: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
723: * CombiningChar | Extender
724: *
725: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
726: *
727: * [6] Names ::= Name (S Name)*
1.1 veillard 728: */
729:
1.16 daniel 730: CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 731: const CHAR *q;
732: CHAR *ret = NULL;
1.1 veillard 733:
1.22 daniel 734: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_') &&
735: (ctxt->cur[0] != ':')) return(NULL);
736: q = ctxt->cur++;
737:
738: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
739: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
740: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
741: (IS_COMBINING(ctxt->cur[0])) ||
742: (IS_EXTENDER(ctxt->cur[0])))
743: ctxt->cur++;
744:
745: ret = xmlStrndup(q, ctxt->cur - q);
746:
747: return(ret);
748: }
749:
750: /*
751: * xmlParseNmtoken : parse an XML Nmtoken.
752: *
753: * [7] Nmtoken ::= (NameChar)+
754: *
755: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
756: */
757:
758: CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
759: const CHAR *q;
760: CHAR *ret = NULL;
761:
1.16 daniel 762: q = ctxt->cur++;
1.22 daniel 763:
1.16 daniel 764: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1.22 daniel 765: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
766: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
767: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 768: (IS_EXTENDER(ctxt->cur[0])))
769: ctxt->cur++;
1.3 veillard 770:
1.16 daniel 771: ret = xmlStrndup(q, ctxt->cur - q);
1.1 veillard 772:
1.3 veillard 773: return(ret);
1.1 veillard 774: }
775:
776: /*
1.24 daniel 777: * xmlParseEntityValue : parse a value for ENTITY decl.
778: *
779: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
780: * "'" ([^%&'] | PEReference | Reference)* "'"
781: */
782:
783: CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
784: CHAR *ret = NULL;
785: const CHAR *q;
786: int needSubst;
787:
788: if (ctxt->cur[0] == '"') {
789: ctxt->cur++;
790:
791: q = ctxt->cur;
792: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"')) {
793: if (ctxt->cur[0] == '%') {
794: needSubst = 1; /* TODO !!! */
795: ctxt->cur++;
1.29 daniel 796: } else if (ctxt->cur[0] == '&') {
1.24 daniel 797: needSubst = 1; /* TODO !!! */
798: ctxt->cur++;
799: } else
800: ctxt->cur++;
801: }
802: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 803: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 804: } else {
805: ret = xmlStrndup(q, ctxt->cur - q);
806: ctxt->cur++;
807: }
808: } else if (ctxt->cur[0] == '\'') {
809: ctxt->cur++;
810: q = ctxt->cur;
811: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\'')) {
812: if (ctxt->cur[0] == '%') {
813: needSubst = 1; /* TODO !!! */
814: ctxt->cur++;
1.29 daniel 815: } else if (ctxt->cur[0] == '&') {
1.24 daniel 816: needSubst = 1; /* TODO !!! */
817: ctxt->cur++;
818: } else
819: ctxt->cur++;
820: }
821: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 822: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 823: } else {
824: ret = xmlStrndup(q, ctxt->cur - q);
825: ctxt->cur++;
826: }
827: } else {
1.31 daniel 828: xmlParserError(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.24 daniel 829: }
830:
831: return(ret);
832: }
833:
834: /*
1.29 daniel 835: * xmlParseAttValue : parse a value for an attribute
836: *
837: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
838: * "'" ([^<&'] | Reference)* "'"
839: */
840:
841: CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) {
842: CHAR *ret = NULL;
843: const CHAR *q;
844: int needSubst;
845:
846: if (ctxt->cur[0] == '"') {
847: ctxt->cur++;
848:
849: q = ctxt->cur;
850: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"')) {
851: if (ctxt->cur[0] == '%') {
852: needSubst = 1; /* TODO !!! */
853: ctxt->cur++;
854: } else if (ctxt->cur[0] == '&') {
855: needSubst = 1; /* TODO !!! */
856: ctxt->cur++;
857: } else
858: ctxt->cur++;
859: }
860: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 861: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 862: } else {
863: ret = xmlStrndup(q, ctxt->cur - q);
864: ctxt->cur++;
865: }
866: } else if (ctxt->cur[0] == '\'') {
867: ctxt->cur++;
868: q = ctxt->cur;
869: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\'')) {
870: if (ctxt->cur[0] == '%') {
871: needSubst = 1; /* TODO !!! */
872: ctxt->cur++;
873: } else if (ctxt->cur[0] == '&') {
874: needSubst = 1; /* TODO !!! */
875: ctxt->cur++;
876: } else
877: ctxt->cur++;
878: }
879: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 880: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 881: } else {
882: ret = xmlStrndup(q, ctxt->cur - q);
883: ctxt->cur++;
884: }
885: } else {
1.31 daniel 886: xmlParserError(ctxt, "AttValue: \" or ' expected\n");
1.29 daniel 887: }
888:
889: return(ret);
890: }
891:
892: /*
1.21 daniel 893: * xmlParseSystemLiteral : parse an XML Literal
894: *
1.22 daniel 895: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.21 daniel 896: */
897:
898: CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
899: const CHAR *q;
900: CHAR *ret = NULL;
901:
902: if (ctxt->cur[0] == '"') {
903: ctxt->cur++;
904: q = ctxt->cur;
1.22 daniel 905: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"'))
1.21 daniel 906: ctxt->cur++;
1.22 daniel 907: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 908: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 909: } else {
910: ret = xmlStrndup(q, ctxt->cur - q);
911: ctxt->cur++;
912: }
913: } else if (ctxt->cur[0] == '\'') {
914: ctxt->cur++;
915: q = ctxt->cur;
1.22 daniel 916: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
1.21 daniel 917: ctxt->cur++;
1.22 daniel 918: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 919: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 920: } else {
921: ret = xmlStrndup(q, ctxt->cur - q);
922: ctxt->cur++;
923: }
924: } else {
1.31 daniel 925: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 926: }
927:
928: return(ret);
929: }
930:
931: /*
1.27 daniel 932: * xmlParsePubidLiteral: parse an XML public literal
1.21 daniel 933: *
1.22 daniel 934: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1.21 daniel 935: */
936:
937: CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
938: const CHAR *q;
939: CHAR *ret = NULL;
940: /*
941: * Name ::= (Letter | '_') (NameChar)*
942: */
943: if (ctxt->cur[0] == '"') {
944: ctxt->cur++;
945: q = ctxt->cur;
946: while (IS_PUBIDCHAR(ctxt->cur[0])) ctxt->cur++;
947: if (ctxt->cur[0] != '"') {
1.31 daniel 948: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 949: } else {
950: ret = xmlStrndup(q, ctxt->cur - q);
951: ctxt->cur++;
952: }
953: } else if (ctxt->cur[0] == '\'') {
954: ctxt->cur++;
955: q = ctxt->cur;
956: while ((IS_LETTER(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
957: ctxt->cur++;
958: if (!IS_LETTER(ctxt->cur[0])) {
1.31 daniel 959: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 960: } else {
961: ret = xmlStrndup(q, ctxt->cur - q);
962: ctxt->cur++;
963: }
964: } else {
1.31 daniel 965: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 966: }
967:
968: return(ret);
969: }
970:
971: /*
1.27 daniel 972: * xmlParseCharData: parse a CharData section.
973: * if we are within a CDATA section ']]>' marks an end of section.
974: *
975: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
976: */
977:
978: CHAR *xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
979: const CHAR *q;
980: CHAR *ret = NULL;
981:
982: q = ctxt->cur;
983: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '<') &&
984: (ctxt->cur[0] != '&')) {
985: ctxt->cur++;
986: if ((cdata) && (ctxt->cur[0] == ']') && (ctxt->cur[1] == ']') &&
987: (ctxt->cur[2] == '>')) break;
988: }
989: if (q == ctxt->cur) return(NULL);
990: ret = xmlStrndup(q, ctxt->cur - q);
991: return(ret);
992: }
993:
994: /*
1.22 daniel 995: * xmlParseExternalID: Parse an External ID
996: *
997: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
998: * | 'PUBLIC' S PubidLiteral S SystemLiteral
999: */
1000:
1001: CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **PubidLiteral) {
1002: CHAR *ExternalID = NULL;
1003:
1004: *PubidLiteral = NULL;
1005: if ((ctxt->cur[0] == 'S') && (ctxt->cur[1] == 'Y') &&
1006: (ctxt->cur[2] == 'S') && (ctxt->cur[3] == 'T') &&
1007: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M')) {
1008: ctxt->cur += 6;
1009: SKIP_BLANKS(ctxt->cur);
1010: ExternalID = xmlParseSystemLiteral(ctxt);
1011: if (ExternalID == NULL)
1.31 daniel 1012: xmlParserError(ctxt,
1013: "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
1.22 daniel 1014: } else if ((ctxt->cur[0] == 'P') && (ctxt->cur[1] == 'U') &&
1015: (ctxt->cur[2] == 'B') && (ctxt->cur[3] == 'L') &&
1016: (ctxt->cur[4] == 'I') && (ctxt->cur[5] == 'C')) {
1017: ctxt->cur += 6;
1018: SKIP_BLANKS(ctxt->cur);
1019: *PubidLiteral = xmlParsePubidLiteral(ctxt);
1020: if (*PubidLiteral == NULL)
1.31 daniel 1021: xmlParserError(ctxt,
1022: "xmlParseExternalID: PUBLIC, no PubidLiteral\n");
1.22 daniel 1023: SKIP_BLANKS(ctxt->cur);
1024: ExternalID = xmlParseSystemLiteral(ctxt);
1025: if (ExternalID == NULL)
1.31 daniel 1026: xmlParserError(ctxt,
1027: "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
1.22 daniel 1028: }
1029: return(ExternalID);
1030: }
1031:
1032: /*
1.1 veillard 1033: * Parse and return a string between quotes or doublequotes
1034: */
1.16 daniel 1035: CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.1 veillard 1036: CHAR *ret = NULL;
1.17 daniel 1037: const CHAR *q;
1.1 veillard 1038:
1.16 daniel 1039: if (ctxt->cur[0] == '"') {
1040: ctxt->cur++;
1041: q = ctxt->cur;
1042: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++;
1043: if (ctxt->cur[0] != '"')
1.31 daniel 1044: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1.1 veillard 1045: else {
1.16 daniel 1046: ret = xmlStrndup(q, ctxt->cur - q);
1047: ctxt->cur++;
1.1 veillard 1048: }
1.16 daniel 1049: } else if (ctxt->cur[0] == '\''){
1050: ctxt->cur++;
1051: q = ctxt->cur;
1052: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++;
1053: if (ctxt->cur[0] != '\'')
1.31 daniel 1054: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1.1 veillard 1055: else {
1.16 daniel 1056: ret = xmlStrndup(q, ctxt->cur - q);
1057: ctxt->cur++;
1.1 veillard 1058: }
1059: }
1060: return(ret);
1061: }
1062:
1063: /*
1.3 veillard 1064: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1065: * This may or may not create a node (depending on the context)
1.22 daniel 1066: *
1067: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 1068: */
1.31 daniel 1069: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1070: xmlNodePtr ret = NULL;
1.17 daniel 1071: const CHAR *q, *start;
1072: const CHAR *r;
1.3 veillard 1073:
1074: /*
1.22 daniel 1075: * Check that there is a comment right here.
1.3 veillard 1076: */
1.16 daniel 1077: if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '!') ||
1.31 daniel 1078: (ctxt->cur[2] != '-') || (ctxt->cur[3] != '-')) return(NULL);
1.3 veillard 1079:
1.16 daniel 1080: ctxt->cur += 4;
1081: start = q = ctxt->cur;
1082: ctxt->cur++;
1083: r = ctxt->cur;
1084: ctxt->cur++;
1085: while (IS_CHAR(ctxt->cur[0]) &&
1086: ((ctxt->cur[0] == ':') || (ctxt->cur[0] != '>') ||
1087: (*r != '-') || (*q != '-'))) {
1088: ctxt->cur++;r++;q++;
1.3 veillard 1089: }
1.16 daniel 1090: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 1091: xmlParserError(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.16 daniel 1092: ctxt->cur = start; /* !!! We shouldn't really try to recover !!! */
1.3 veillard 1093: } else {
1.16 daniel 1094: ctxt->cur++;
1.31 daniel 1095: if (create) {
1096: ret = xmlNewComment(xmlStrndup(start, q - start));
1097: }
1.3 veillard 1098: }
1099: }
1100:
1101: /*
1.13 veillard 1102: * xmlParseNamespace: parse specific '<?namespace ...' constructs.
1.22 daniel 1103: *
1.29 daniel 1104: * TODO !!!!!!!!!!
1105: *
1106: * This is what the older xml-name Working Draft specified, a bunch of
1107: * other stuff may still rely on it, so support is still here as
1108: * if ot was declared on the root of the Tree:-(
1.1 veillard 1109: */
1110:
1.16 daniel 1111: void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.1 veillard 1112: CHAR *href = NULL;
1113: CHAR *AS = NULL;
1.3 veillard 1114: int garbage = 0;
1.1 veillard 1115:
1116: /*
1.18 daniel 1117: * We just skipped "namespace" or "xml:namespace"
1.1 veillard 1118: */
1.16 daniel 1119: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1120:
1.16 daniel 1121: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '>')) {
1.1 veillard 1122: /*
1.18 daniel 1123: * We can have "ns" or "prefix" attributes
1124: * Old encoding as 'href' or 'AS' attributes is still supported
1.1 veillard 1125: */
1.18 daniel 1126: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 's')) {
1127: garbage = 0;
1128: ctxt->cur += 2;
1129: SKIP_BLANKS(ctxt->cur);
1130:
1131: if (ctxt->cur[0] != '=') continue;
1132: ctxt->cur++;
1133: SKIP_BLANKS(ctxt->cur);
1134:
1135: href = xmlParseQuotedString(ctxt);
1136: SKIP_BLANKS(ctxt->cur);
1137: } else if ((ctxt->cur[0] == 'h') && (ctxt->cur[1] == 'r') &&
1.16 daniel 1138: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f')) {
1.3 veillard 1139: garbage = 0;
1.16 daniel 1140: ctxt->cur += 4;
1141: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1142:
1.16 daniel 1143: if (ctxt->cur[0] != '=') continue;
1144: ctxt->cur++;
1145: SKIP_BLANKS(ctxt->cur);
1146:
1147: href = xmlParseQuotedString(ctxt);
1148: SKIP_BLANKS(ctxt->cur);
1.18 daniel 1149: } else if ((ctxt->cur[0] == 'p') && (ctxt->cur[1] == 'r') &&
1150: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f') &&
1151: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'x')) {
1152: garbage = 0;
1153: ctxt->cur += 6;
1154: SKIP_BLANKS(ctxt->cur);
1155:
1156: if (ctxt->cur[0] != '=') continue;
1157: ctxt->cur++;
1158: SKIP_BLANKS(ctxt->cur);
1159:
1160: AS = xmlParseQuotedString(ctxt);
1161: SKIP_BLANKS(ctxt->cur);
1.16 daniel 1162: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'S')) {
1.3 veillard 1163: garbage = 0;
1.16 daniel 1164: ctxt->cur += 2;
1165: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1166:
1.16 daniel 1167: if (ctxt->cur[0] != '=') continue;
1168: ctxt->cur++;
1169: SKIP_BLANKS(ctxt->cur);
1170:
1171: AS = xmlParseQuotedString(ctxt);
1172: SKIP_BLANKS(ctxt->cur);
1173: } else if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
1.3 veillard 1174: garbage = 0;
1.16 daniel 1175: ctxt->cur ++;
1.1 veillard 1176: } else {
1.3 veillard 1177: /*
1178: * Found garbage when parsing the namespace
1179: */
1.31 daniel 1180: if (!garbage)
1181: xmlParserError(ctxt, "xmlParseNamespace found garbage\n");
1.16 daniel 1182: ctxt->cur++;
1.1 veillard 1183: }
1184: }
1185:
1.16 daniel 1186: MOVETO_ENDTAG(ctxt->cur);
1187: ctxt->cur++;
1.1 veillard 1188:
1189: /*
1190: * Register the DTD.
1191: */
1192: if (href != NULL)
1.16 daniel 1193: xmlNewDtd(ctxt->doc, href, AS);
1.1 veillard 1194:
1.8 veillard 1195: if (AS != NULL) free(AS);
1196: if (href != NULL) free(href);
1.1 veillard 1197: }
1198:
1199: /*
1.22 daniel 1200: * xmlParsePITarget: parse the name of a PI
1201: *
1202: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1203: */
1204:
1205: CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1206: CHAR *name;
1207:
1208: name = xmlParseName(ctxt);
1209: if ((name != NULL) && (name[3] == 0) &&
1210: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1211: ((name[1] == 'm') || (name[1] == 'M')) &&
1212: ((name[2] == 'l') || (name[2] == 'L'))) {
1213: xmlParserError(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1214: return(NULL);
1215: }
1216: return(name);
1217: }
1218:
1219: /*
1.3 veillard 1220: * xmlParsePI: parse an XML Processing Instruction.
1.22 daniel 1221: *
1222: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.3 veillard 1223: */
1224:
1.16 daniel 1225: void xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1226: CHAR *target;
1227:
1.16 daniel 1228: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
1.3 veillard 1229: /*
1230: * this is a Processing Instruction.
1231: */
1.16 daniel 1232: ctxt->cur += 2;
1.3 veillard 1233:
1234: /*
1.22 daniel 1235: * Parse the target name and check for special support like
1236: * namespace.
1237: *
1238: * TODO : PI handling should be dynamically redefinable using an
1239: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1240: */
1.22 daniel 1241: target = xmlParsePITarget(ctxt);
1242: if (target != NULL) {
1243: /*
1244: * Support for the Processing Instruction related to namespace.
1245: */
1246: if ((target[0] == 'n') && (target[1] == 'a') &&
1247: (target[2] == 'm') && (target[3] == 'e') &&
1248: (target[4] == 's') && (target[5] == 'p') &&
1249: (target[6] == 'a') && (target[7] == 'c') &&
1250: (target[8] == 'e')) {
1251: xmlParseNamespace(ctxt);
1252: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1253: (target[2] == 'l') && (target[3] == ':') &&
1254: (target[4] == 'n') && (target[5] == 'a') &&
1255: (target[6] == 'm') && (target[7] == 'e') &&
1256: (target[8] == 's') && (target[9] == 'p') &&
1257: (target[10] == 'a') && (target[11] == 'c') &&
1258: (target[12] == 'e')) {
1259: xmlParseNamespace(ctxt);
1260: } else {
1261: /* Unknown PI, ignore it ! */
1.31 daniel 1262: xmlParserError(ctxt, "xmlParsePI : skipping unknown PI %s\n",
1263: target);
1.22 daniel 1264: while (IS_CHAR(ctxt->cur[0]) &&
1.24 daniel 1265: ((ctxt->cur[0] != '?') || (ctxt->cur[1] != '>')))
1.22 daniel 1266: ctxt->cur++;
1267: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 1268: xmlParserError(ctxt, "xmlParsePI: PI %s never end ...\n",
1269: target);
1.24 daniel 1270: } else
1271: ctxt->cur += 2;
1.22 daniel 1272: }
1.3 veillard 1273: } else {
1.31 daniel 1274: xmlParserError(ctxt, "xmlParsePI : no target name\n");
1.22 daniel 1275: /********* Should we try to complete parsing the PI ???
1276: while (IS_CHAR(ctxt->cur[0]) &&
1277: (ctxt->cur[0] != '?') && (ctxt->cur[0] != '>'))
1278: ctxt->cur++;
1279: if (!IS_CHAR(ctxt->cur[0])) {
1280: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1281: target);
1282: }
1283: ********************************************************/
1284: }
1285: }
1286: }
1287:
1288: /*
1289: * xmlParseNotationDecl: parse a notation declaration
1290: *
1291: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1292: *
1293: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1294: *
1295: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1296: * 'PUBLIC' S PubidLiteral S SystemLiteral
1297: *
1298: * Hence there is actually 3 choices:
1299: * 'PUBLIC' S PubidLiteral
1300: * 'PUBLIC' S PubidLiteral S SystemLiteral
1301: * and 'SYSTEM' S SystemLiteral
1302: */
1303:
1304: void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1305: CHAR *name;
1306:
1307: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1308: (ctxt->cur[2] == 'N') && (ctxt->cur[3] == 'O') &&
1309: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'A') &&
1310: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'I') &&
1311: (ctxt->cur[8] == 'O') && (ctxt->cur[9] == 'N') &&
1312: (IS_BLANK(ctxt->cur[10]))) {
1313: ctxt->cur += 10;
1314: SKIP_BLANKS(ctxt->cur);
1315:
1316: name = xmlParseName(ctxt);
1317: if (name == NULL) {
1.31 daniel 1318: xmlParserError(ctxt,
1319: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1320: return;
1321: }
1322: SKIP_BLANKS(ctxt->cur);
1323: /*
1.31 daniel 1324: * TODO !!!
1.22 daniel 1325: */
1326: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1327: ctxt->cur++;
1328: free(name);
1329: }
1330: }
1331:
1332: /*
1333: * xmlParseEntityDecl: parse <!ENTITY declarations
1334: *
1335: * [70] EntityDecl ::= GEDecl | PEDecl
1336: *
1337: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1338: *
1339: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1340: *
1341: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1342: *
1343: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1344: *
1345: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1346: */
1347:
1348: void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1349: CHAR *name;
1.24 daniel 1350: CHAR *value = NULL;
1351: CHAR *id = NULL, *literal = NULL;
1352: CHAR *ndata = NULL;
1.22 daniel 1353: int typePEDef = 0;
1354:
1355: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1356: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'N') &&
1357: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
1358: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'Y') &&
1359: (IS_BLANK(ctxt->cur[8]))) {
1360: ctxt->cur += 8;
1361: SKIP_BLANKS(ctxt->cur);
1362:
1363: if (ctxt->cur[0] == '%') {
1.16 daniel 1364: ctxt->cur++;
1.22 daniel 1365: SKIP_BLANKS(ctxt->cur);
1366: typePEDef = 1;
1367: }
1368:
1369: name = xmlParseName(ctxt);
1.24 daniel 1370: if (name == NULL) {
1.31 daniel 1371: xmlParserError(ctxt, "xmlParseEntityDecl: no name\n");
1.24 daniel 1372: return;
1373: }
1374: SKIP_BLANKS(ctxt->cur);
1375:
1.22 daniel 1376: /*
1.24 daniel 1377: * TODO handle the various case of definitions...
1.22 daniel 1378: */
1.24 daniel 1379: if (typePEDef) {
1380: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
1381: value = xmlParseEntityValue(ctxt);
1382: else {
1383: id = xmlParseExternalID(ctxt, &literal);
1384: }
1385: } else {
1386: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
1387: value = xmlParseEntityValue(ctxt);
1388: else {
1389: id = xmlParseExternalID(ctxt, &literal);
1390: SKIP_BLANKS(ctxt->cur);
1391: if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'D') &&
1392: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1393: (ctxt->cur[4] == 'A')) {
1394: ndata = xmlParseName(ctxt);
1395: }
1396: }
1397: }
1398: SKIP_BLANKS(ctxt->cur);
1399: if (ctxt->cur[0] != '>') {
1.31 daniel 1400: xmlParserError(ctxt,
1401: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.24 daniel 1402: } else
1.22 daniel 1403: ctxt->cur++;
1404: }
1405: }
1406:
1407: /*
1408: * xmlParseEnumeratedType: parse and Enumerated attribute type.
1409: *
1410: * [57] EnumeratedType ::= NotationType | Enumeration
1411: *
1412: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1413: *
1414: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1415: */
1416:
1417: void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1418: /*
1419: * TODO !!!
1420: */
1421: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1422: ctxt->cur++;
1423: }
1424:
1425: /*
1426: * xmlParseAttributeType: parse the Attribute list def for an element
1427: *
1428: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1429: *
1430: * [55] StringType ::= 'CDATA'
1431: *
1432: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1433: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1434: */
1435: void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1436: if ((ctxt->cur[0] == 'C') && (ctxt->cur[1] == 'D') &&
1437: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1438: (ctxt->cur[4] == 'A')) {
1439: ctxt->cur += 5;
1440: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D')) {
1441: ctxt->cur += 2;
1442: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
1443: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
1444: (ctxt->cur[4] == 'F')) {
1445: ctxt->cur += 5;
1446: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
1447: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
1448: (ctxt->cur[4] == 'F') && (ctxt->cur[5] == 'S')) {
1449: ctxt->cur += 6;
1450: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
1451: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
1452: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'Y')) {
1453: ctxt->cur += 6;
1454: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
1455: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
1456: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
1457: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'S')) {
1458: ctxt->cur += 8;
1459: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
1460: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
1461: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
1462: (ctxt->cur[6] == 'N')) {
1463: ctxt->cur += 7;
1464: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
1465: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
1466: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
1467: (ctxt->cur[6] == 'N') && (ctxt->cur[7] == 'S')) {
1468: } else {
1469: xmlParseEnumeratedType(ctxt, name);
1470: }
1471: }
1472:
1473: /*
1474: * xmlParseAttributeListDecl: parse the Attribute list def for an element
1475: *
1476: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1477: *
1478: * [53] AttDef ::= S Name S AttType S DefaultDecl
1479: */
1480: void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1481: CHAR *name;
1482:
1483: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1484: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1485: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'L') &&
1486: (ctxt->cur[6] == 'I') && (ctxt->cur[7] == 'S') &&
1487: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
1488: ctxt->cur += 9;
1489: SKIP_BLANKS(ctxt->cur);
1490: name = xmlParseName(ctxt);
1491: if (name == NULL) {
1.31 daniel 1492: xmlParserError(ctxt,
1493: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1494: return;
1495: }
1496: SKIP_BLANKS(ctxt->cur);
1497: while (ctxt->cur[0] != '>') {
1498: const CHAR *check = ctxt->cur;
1499:
1500: xmlParseAttributeType(ctxt, name);
1501: SKIP_BLANKS(ctxt->cur);
1502: if (check == ctxt->cur) {
1.31 daniel 1503: xmlParserError(ctxt,
1504: "xmlParseAttributeListDecl: detected error\n");
1.22 daniel 1505: break;
1506: }
1507: }
1508: if (ctxt->cur[0] == '>')
1509: ctxt->cur++;
1510:
1511: free(name);
1512: }
1513: }
1514:
1515: /*
1516: * xmlParseElementContentDecl: parse the declaration for an Element content
1517: * either Mixed or Children, the cases EMPTY and ANY being handled
1518: * int xmlParseElementDecl.
1519: *
1520: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
1521: *
1522: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1523: *
1524: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
1525: *
1526: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
1527: *
1528: * or
1529: *
1530: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
1531: * '(' S? '#PCDATA' S? ')'
1532: */
1533:
1534: void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1535: /*
1536: * TODO This has to be parsed correctly, currently we just skip until
1537: * we reach the first '>'.
1.31 daniel 1538: * !!!
1.22 daniel 1539: */
1540: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1541: ctxt->cur++;
1542: }
1543:
1544: /*
1545: * xmlParseElementDecl: parse an Element declaration.
1546: *
1547: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1548: *
1549: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1550: *
1551: * TODO There is a check [ VC: Unique Element Type Declaration ]
1552: */
1553: void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1554: CHAR *name;
1555:
1556: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1557: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'L') &&
1558: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M') &&
1559: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'N') &&
1560: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
1561: ctxt->cur += 9;
1562: SKIP_BLANKS(ctxt->cur);
1563: name = xmlParseName(ctxt);
1564: if (name == NULL) {
1.31 daniel 1565: xmlParserError(ctxt, "xmlParseElementDecl: no name for Element\n");
1.22 daniel 1566: return;
1567: }
1568: SKIP_BLANKS(ctxt->cur);
1569: if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'M') &&
1570: (ctxt->cur[2] == 'P') && (ctxt->cur[3] == 'T') &&
1571: (ctxt->cur[4] == 'Y')) {
1572: ctxt->cur += 5;
1573: /*
1574: * Element must always be empty.
1575: */
1576: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'N') &&
1577: (ctxt->cur[2] == 'Y')) {
1578: ctxt->cur += 3;
1579: /*
1580: * Element is a generic container.
1581: */
1582: } else {
1583: xmlParseElementContentDecl(ctxt, name);
1584: }
1585: SKIP_BLANKS(ctxt->cur);
1586: if (ctxt->cur[0] != '>') {
1.31 daniel 1587: xmlParserError(ctxt,
1588: "xmlParseElementDecl: expected '>' at the end\n");
1.22 daniel 1589: } else
1590: ctxt->cur++;
1591: }
1592: }
1593:
1594: /*
1595: * xmlParseMarkupDecl: parse Markup declarations
1596: *
1597: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
1598: * NotationDecl | PI | Comment
1599: *
1600: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
1601: */
1602: void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1603: xmlParseElementDecl(ctxt);
1604: xmlParseAttributeListDecl(ctxt);
1605: xmlParseEntityDecl(ctxt);
1606: xmlParseNotationDecl(ctxt);
1607: xmlParsePI(ctxt);
1.31 daniel 1608: xmlParseComment(ctxt, 0);
1.22 daniel 1609: }
1610:
1611: /*
1.24 daniel 1612: * xmlParseCharRef: parse Reference declarations
1613: *
1614: * [66] CharRef ::= '&#' [0-9]+ ';' |
1615: * '&#x' [0-9a-fA-F]+ ';'
1616: */
1617: CHAR xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 1618: int val = 0;
1.24 daniel 1619: CHAR ret = 0;
1620:
1621: if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#') &&
1622: (ctxt->cur[2] == 'x')) {
1623: ctxt->cur += 3;
1624: while (ctxt->cur[0] != ';') {
1625: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
1.29 daniel 1626: val = val * 16 + (ctxt->cur[0] - '0');
1.24 daniel 1627: else if ((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'f'))
1.29 daniel 1628: val = val * 16 + (ctxt->cur[0] - 'a') + 10;
1.24 daniel 1629: else if ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'F'))
1.29 daniel 1630: val = val * 16 + (ctxt->cur[0] - 'A') + 10;
1.24 daniel 1631: else {
1.31 daniel 1632: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1633: val = 0;
1.24 daniel 1634: break;
1635: }
1636: }
1637: if (ctxt->cur[0] != ';')
1638: ctxt->cur++;
1639: } else if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#')) {
1640: ctxt->cur += 2;
1641: while (ctxt->cur[0] != ';') {
1642: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
1.29 daniel 1643: val = val * 16 + (ctxt->cur[0] - '0');
1.24 daniel 1644: else {
1.31 daniel 1645: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1646: val = 0;
1.24 daniel 1647: break;
1648: }
1649: }
1650: if (ctxt->cur[0] != ';')
1651: ctxt->cur++;
1652: } else {
1.31 daniel 1653: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.24 daniel 1654: }
1.29 daniel 1655: /*
1656: * Check the value IS_CHAR ...
1657: */
1658: if (IS_CHAR(val))
1659: ret = (CHAR) val;
1660: else {
1661: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
1662: ctxt->cur - 10);
1663:
1664: ret = '?';
1665: }
1.24 daniel 1666: return(ret);
1667: }
1668:
1669: /*
1670: * xmlParseEntityRef: parse ENTITY references declarations
1671: *
1672: * [68] EntityRef ::= '&' Name ';'
1673: */
1674: CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1675: CHAR *name;
1676:
1677: if (ctxt->cur[0] == '&') {
1678: ctxt->cur++;
1679: name = xmlParseName(ctxt);
1680: if (name == NULL) {
1.31 daniel 1681: xmlParserError(ctxt, "xmlParseEntityRef: no name\n");
1.24 daniel 1682: } else {
1683: if (ctxt->cur[0] == ';') {
1684: ctxt->cur++;
1685: /*
1686: * TODO there is a VC check here !!!
1687: * [ VC: Entity Declared ]
1688: */
1689: free(name);
1690: } else {
1.31 daniel 1691: xmlParserError(ctxt, "xmlParseEntityRef: expecting ';'\n");
1.24 daniel 1692: }
1693: }
1694: }
1.25 daniel 1695: return(NULL); /* TODO !!!! */
1.24 daniel 1696: }
1697:
1698: /*
1699: * xmlParseReference: parse Reference declarations
1700: *
1701: * [67] Reference ::= EntityRef | CharRef
1702: */
1703: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt) {
1704: CHAR *name;
1705:
1706: if (ctxt->cur[0] == '&') {
1707: return(xmlParseEntityRef(ctxt));
1708: } else {
1709: ctxt->cur++;
1710: name = xmlParseName(ctxt);
1711: if (name == NULL) {
1.31 daniel 1712: xmlParserError(ctxt, "xmlParseReference: no name\n");
1.24 daniel 1713: } else {
1714: if (ctxt->cur[0] == ';') {
1715: ctxt->cur++;
1716: /*
1717: * TODO there is a VC check here !!!
1718: * [ VC: Entity Declared ]
1719: */
1720: free(name);
1721: } else {
1.31 daniel 1722: xmlParserError(ctxt, "xmlParseReference: expecting ';'\n");
1.24 daniel 1723: }
1724: }
1725: }
1.25 daniel 1726: return(NULL); /* TODO !!!! */
1.24 daniel 1727: }
1728:
1729: /*
1.22 daniel 1730: * xmlParsePEReference: parse PEReference declarations
1731: *
1732: * [69] PEReference ::= '%' Name ';'
1733: */
1.24 daniel 1734: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 1735: CHAR *name;
1736:
1737: if (ctxt->cur[0] == '%') {
1738: ctxt->cur++;
1739: name = xmlParseName(ctxt);
1740: if (name == NULL) {
1.31 daniel 1741: xmlParserError(ctxt, "xmlParsePEReference: no name\n");
1.22 daniel 1742: } else {
1743: if (ctxt->cur[0] == ';') {
1744: ctxt->cur++;
1745: /*
1746: * TODO there is a VC check here !!!
1747: * [ VC: Entity Declared ]
1748: */
1749: free(name);
1750: } else {
1.31 daniel 1751: xmlParserError(ctxt, "xmlParsePEReference: expecting ';'\n");
1.22 daniel 1752: }
1.3 veillard 1753: }
1754: }
1.25 daniel 1755: return(NULL); /* TODO !!!! */
1.3 veillard 1756: }
1757:
1758: /*
1.21 daniel 1759: * xmlParseDocTypeDecl : parse a DOCTYPE declaration
1760: *
1.22 daniel 1761: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
1762: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 1763: */
1764:
1765: void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1766: CHAR *name;
1767: CHAR *ExternalID = NULL;
1.22 daniel 1768: CHAR *SystemID = NULL;
1.21 daniel 1769:
1770: /*
1771: * We know that '<!DOCTYPE' has been detected.
1772: */
1773: ctxt->cur += 9;
1774:
1775: SKIP_BLANKS(ctxt->cur);
1776:
1777: /*
1778: * Parse the DOCTYPE name.
1779: */
1780: name = xmlParseName(ctxt);
1781: if (name == NULL) {
1.31 daniel 1782: xmlParserError(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.21 daniel 1783: }
1784:
1785: SKIP_BLANKS(ctxt->cur);
1786:
1787: /*
1.22 daniel 1788: * Check for SystemID and ExternalID
1789: */
1790: SystemID = xmlParseExternalID(ctxt, &ExternalID);
1791: SKIP_BLANKS(ctxt->cur);
1792:
1793: /*
1794: * Is there any DTD definition ?
1795: */
1796: if (ctxt->cur[0] == '[') {
1797: ctxt->cur++;
1798: /*
1799: * Parse the succession of Markup declarations and
1800: * PEReferences.
1801: * Subsequence (markupdecl | PEReference | S)*
1802: */
1803: while (ctxt->cur[0] != ']') {
1804: const CHAR *check = ctxt->cur;
1805:
1806: SKIP_BLANKS(ctxt->cur);
1807: xmlParseMarkupDecl(ctxt);
1808: xmlParsePEReference(ctxt);
1809:
1810: if (ctxt->cur == check) {
1.31 daniel 1811: xmlParserError(ctxt,
1812: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.22 daniel 1813: break;
1814: }
1815: }
1816: if (ctxt->cur[0] == ']') ctxt->cur++;
1817: }
1818:
1819: /*
1820: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 1821: */
1.22 daniel 1822: if (ctxt->cur[0] != '>') {
1.31 daniel 1823: xmlParserError(ctxt, "DOCTYPE unproperly terminated\n");
1.22 daniel 1824: /* We shouldn't try to resynchronize ... */
1.21 daniel 1825: }
1.22 daniel 1826: ctxt->cur++;
1827:
1828: /*
1829: * Cleanup, since we don't use all those identifiers
1830: * TODO : the DOCTYPE if available should be stored !
1831: */
1832: if (SystemID != NULL) free(SystemID);
1833: if (ExternalID != NULL) free(ExternalID);
1834: if (name != NULL) free(name);
1.21 daniel 1835: }
1836:
1837: /*
1.3 veillard 1838: * xmlParseAttribute: parse a start of tag.
1839: *
1.22 daniel 1840: * [41] Attribute ::= Name Eq AttValue
1841: *
1842: * [25] Eq ::= S? '=' S?
1843: *
1.29 daniel 1844: * With namespace:
1845: *
1846: * [NS 11] Attribute ::= QName Eq AttValue
1.3 veillard 1847: */
1848:
1.16 daniel 1849: void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 1850: CHAR *name, *value = NULL;
1.29 daniel 1851: CHAR *ns;
1.3 veillard 1852:
1.29 daniel 1853: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 1854: if (name == NULL) {
1.31 daniel 1855: xmlParserError(ctxt, "error parsing attribute name\n");
1.29 daniel 1856: return;
1.3 veillard 1857: }
1.22 daniel 1858: /*
1859: * TODO: Check for Namespace ...
1860: */
1.29 daniel 1861: if (ns != NULL) {
1.31 daniel 1862: xmlParserError(ctxt,
1863: "Internal: xmlParseAttribute: don't handle attributes namespace\n");
1.29 daniel 1864: free(ns);
1865: }
1.3 veillard 1866:
1867: /*
1.29 daniel 1868: * read the value
1.3 veillard 1869: */
1.16 daniel 1870: SKIP_BLANKS(ctxt->cur);
1871: if (ctxt->cur[0] == '=') {
1872: ctxt->cur++;
1873: SKIP_BLANKS(ctxt->cur);
1.29 daniel 1874: value = xmlParseAttValue(ctxt);
1875: } else {
1.31 daniel 1876: xmlParserError(ctxt, "Specification mandate value for attribute %s\n",
1877: name);
1.3 veillard 1878: }
1879:
1880: /*
1881: * Add the attribute to the node.
1882: */
1.17 daniel 1883: if (name != NULL) {
1.3 veillard 1884: xmlNewProp(node, name, value);
1.17 daniel 1885: free(name);
1886: }
1.29 daniel 1887: if (value != NULL)
1.17 daniel 1888: free(value);
1.3 veillard 1889: }
1890:
1891: /*
1.29 daniel 1892: * xmlParseStartTag: parse a start of tag either for rule element or
1893: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 1894: *
1895: * [40] STag ::= '<' Name (S Attribute)* S? '>'
1896: *
1.29 daniel 1897: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1898: *
1899: * With namespace:
1900: *
1901: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
1902: *
1903: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.2 veillard 1904: */
1905:
1.16 daniel 1906: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.17 daniel 1907: const CHAR *q;
1908: CHAR *ns, *name;
1.3 veillard 1909: xmlDtdPtr dtd = NULL;
1.2 veillard 1910: xmlNodePtr ret = NULL;
1911:
1.16 daniel 1912: if (ctxt->cur[0] != '<') return(NULL);
1913: ctxt->cur++;
1.3 veillard 1914:
1.29 daniel 1915: name = xmlNamespaceParseQName(ctxt, &ns);
1916: if (ns != NULL) {
1.3 veillard 1917: /*
1918: * Search the DTD associated to ns.
1919: */
1.16 daniel 1920: dtd = xmlSearchDtd(ctxt->doc, ns);
1.3 veillard 1921: if (dtd == NULL)
1.31 daniel 1922: xmlParserError(ctxt, "Start tag : Couldn't find namespace %s\n",ns);
1.3 veillard 1923: free(ns);
1.29 daniel 1924: }
1.3 veillard 1925:
1926: ret = xmlNewNode(dtd, name, NULL);
1.2 veillard 1927:
1.3 veillard 1928: /*
1929: * Now parse the attributes, it ends up with the ending
1930: *
1931: * (S Attribute)* S?
1932: */
1.16 daniel 1933: SKIP_BLANKS(ctxt->cur);
1934: while ((IS_CHAR(ctxt->cur[0])) &&
1935: (ctxt->cur[0] != '>') &&
1936: ((ctxt->cur[0] != '/') || (ctxt->cur[1] != '>'))) {
1.29 daniel 1937: const CHAR *q = ctxt->cur;
1938:
1939: xmlParseAttribute(ctxt, ret);
1940: SKIP_BLANKS(ctxt->cur);
1941:
1942: if (q == ctxt->cur) {
1.31 daniel 1943: xmlParserError(ctxt,
1944: "xmlParseStartTag: problem parsing attributes\n");
1.29 daniel 1945: break;
1.3 veillard 1946: }
1947: }
1948:
1949: return(ret);
1950: }
1951:
1952: /*
1.27 daniel 1953: * xmlParseEndTag: parse an end of tag
1954: *
1955: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 1956: *
1957: * With namespace
1958: *
1959: * [9] ETag ::= '</' QName S? '>'
1.7 veillard 1960: */
1961:
1.16 daniel 1962: void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlDtdPtr *dtdPtr, CHAR **tagPtr) {
1.17 daniel 1963: const CHAR *q;
1964: CHAR *ns, *name;
1.7 veillard 1965: xmlDtdPtr dtd = NULL;
1966:
1967: *dtdPtr = NULL;
1968: *tagPtr = NULL;
1969:
1.27 daniel 1970: if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/')) {
1.31 daniel 1971: xmlParserError(ctxt, "xmlParseEndTag: '</' not found\n");
1.27 daniel 1972: return;
1973: }
1974: ctxt->cur += 2;
1.7 veillard 1975:
1.29 daniel 1976: name = xmlNamespaceParseQName(ctxt, &ns);
1977: if (ns != NULL) {
1.7 veillard 1978: /*
1979: * Search the DTD associated to ns.
1980: */
1.16 daniel 1981: dtd = xmlSearchDtd(ctxt->doc, ns);
1.7 veillard 1982: if (dtd == NULL)
1.31 daniel 1983: xmlParserError(ctxt, "End tag : Couldn't find namespace %s\n", ns);
1.7 veillard 1984: free(ns);
1.29 daniel 1985: }
1.7 veillard 1986:
1987: *dtdPtr = dtd;
1988: *tagPtr = name;
1989:
1990: /*
1991: * We should definitely be at the ending "S? '>'" part
1992: */
1.16 daniel 1993: SKIP_BLANKS(ctxt->cur);
1994: if ((!IS_CHAR(ctxt->cur[0])) || (ctxt->cur[0] != '>')) {
1.31 daniel 1995: xmlParserError(ctxt, "End tag : expected '>'\n");
1.7 veillard 1996: } else
1.16 daniel 1997: ctxt->cur++;
1.7 veillard 1998:
1999: return;
2000: }
2001:
2002: /*
1.3 veillard 2003: * xmlParseCDSect: escaped pure raw content.
1.29 daniel 2004: *
2005: * [18] CDSect ::= CDStart CData CDEnd
2006: *
2007: * [19] CDStart ::= '<![CDATA['
2008: *
2009: * [20] Data ::= (Char* - (Char* ']]>' Char*))
2010: *
2011: * [21] CDEnd ::= ']]>'
1.3 veillard 2012: */
1.16 daniel 2013: CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 2014: const CHAR *r, *s, *base;
2015: CHAR *ret;
1.3 veillard 2016:
1.29 daniel 2017: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2018: (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
2019: (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
2020: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
2021: (ctxt->cur[8] == '[')) {
2022: ctxt->cur += 9;
2023: } else
2024: return(NULL);
1.16 daniel 2025: base = ctxt->cur;
2026: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 2027: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2028: return(NULL);
2029: }
1.16 daniel 2030: r = ctxt->cur++;
2031: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 2032: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2033: return(NULL);
2034: }
1.16 daniel 2035: s = ctxt->cur++;
2036: while (IS_CHAR(ctxt->cur[0]) &&
2037: ((*r != ']') || (*s != ']') || (ctxt->cur[0] != '>'))) {
2038: r++;s++;ctxt->cur++;
1.3 veillard 2039: }
1.16 daniel 2040: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 2041: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2042: return(NULL);
2043: }
1.16 daniel 2044: ret = xmlStrndup(base, ctxt->cur-base);
2045:
1.2 veillard 2046: return(ret);
2047: }
2048:
2049: /*
2050: * xmlParseContent: a content is
2051: * (element | PCData | Reference | CDSect | PI | Comment)
2052: *
1.27 daniel 2053: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 2054: */
2055:
1.27 daniel 2056: void xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 2057: const CHAR *q;
2058: CHAR *data = NULL;
1.2 veillard 2059: xmlNodePtr ret = NULL;
2060:
1.27 daniel 2061: while ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/')) {
1.29 daniel 2062: const CHAR *test;
1.27 daniel 2063: ret = NULL;
2064: data = NULL;
2065:
2066: /*
2067: * First case : a Processing Instruction.
2068: */
2069: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
2070: xmlParsePI(ctxt);
2071: }
2072: /*
2073: * Second case : a CDSection
2074: */
2075: else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2076: (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
2077: (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
2078: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
2079: (ctxt->cur[8] == '[')) {
2080: data = xmlParseCDSect(ctxt);
2081: }
2082: /*
2083: * Third case : a comment
2084: */
2085: else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2086: (ctxt->cur[2] == '-') && (ctxt->cur[3] == '-')) {
1.31 daniel 2087: ret = xmlParseComment(ctxt, 1);
1.27 daniel 2088: }
2089: /*
2090: * Fourth case : a sub-element.
2091: */
2092: else if (ctxt->cur[0] == '<') {
2093: ret = xmlParseElement(ctxt);
2094: }
2095: /*
2096: * Last case, text. Note that References are handled directly.
2097: */
2098: else {
2099: q = ctxt->cur;
2100: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '<')) ctxt->cur++;
2101:
2102: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 2103: xmlParserError(ctxt, "Truncated content\n%.50s\n", q);
1.27 daniel 2104: return;
2105: }
1.3 veillard 2106:
1.27 daniel 2107: /*
2108: * Do the Entities decoding...
2109: */
2110: data = xmlStrdup(xmlDecodeEntities(ctxt->doc, q, ctxt->cur - q));
1.3 veillard 2111: }
1.14 veillard 2112:
2113: /*
1.27 daniel 2114: * Handle the data if any. If there is no child
2115: * add it as content, otherwise create a new node of type text.
1.14 veillard 2116: */
1.27 daniel 2117: if (data != NULL)
2118: data = xmlHandleData(data);
2119: if (data != NULL) {
2120: if (node->childs == NULL)
2121: xmlNodeSetContent(node, data);
2122: else
2123: ret = xmlNewText(data);
2124: free(data);
2125: }
2126: if (ret != NULL)
2127: xmlAddChild(node, ret);
1.29 daniel 2128: if (test == ctxt->cur) {
1.31 daniel 2129: xmlParserError(ctxt, "detected an error in element content\n");
1.29 daniel 2130: break;
2131: }
1.3 veillard 2132: }
1.2 veillard 2133: }
2134:
2135: /*
2136: * xmlParseElement: parse an XML element
1.26 daniel 2137: *
2138: * [39] element ::= EmptyElemTag | STag content ETag
2139: *
2140: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 2141: */
1.26 daniel 2142:
1.2 veillard 2143:
1.16 daniel 2144: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 2145: xmlNodePtr ret;
1.17 daniel 2146: const CHAR *openTag = ctxt->cur;
1.32 daniel 2147: xmlParserNodeInfo node_info;
1.27 daniel 2148: CHAR *endTag;
2149: xmlDtdPtr endDtd;
1.2 veillard 2150:
1.32 daniel 2151: /* Capture start position */
2152: node_info.begin_pos = ctxt->cur - ctxt->base;
2153: node_info.begin_line = ctxt->line;
2154:
1.16 daniel 2155: ret = xmlParseStartTag(ctxt);
1.3 veillard 2156: if (ret == NULL) {
2157: return(NULL);
2158: }
1.2 veillard 2159:
2160: /*
2161: * Check for an Empty Element.
2162: */
1.16 daniel 2163: if ((ctxt->cur[0] == '/') && (ctxt->cur[1] == '>')) {
2164: ctxt->cur += 2;
1.2 veillard 2165: return(ret);
2166: }
1.16 daniel 2167: if (ctxt->cur[0] == '>') ctxt->cur++;
1.2 veillard 2168: else {
1.31 daniel 2169: xmlParserError(ctxt, "Couldn't find end of Start Tag\n%.30s\n",openTag);
1.16 daniel 2170: return(NULL);
1.2 veillard 2171: }
2172:
2173: /*
2174: * Parse the content of the element:
2175: */
1.27 daniel 2176: xmlParseContent(ctxt, ret);
1.16 daniel 2177: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 2178: xmlParserError(ctxt, "Premature end of data in tag %.30s\n%.30s\n",
2179: openTag);
1.16 daniel 2180: return(NULL);
1.2 veillard 2181: }
2182:
2183: /*
1.27 daniel 2184: * parse the end of tag: '</' should be here.
1.2 veillard 2185: */
1.27 daniel 2186: xmlParseEndTag(ctxt, &endDtd, &endTag);
1.7 veillard 2187:
1.27 daniel 2188: /*
2189: * Check that the Name in the ETag is the same as in the STag.
2190: */
2191: if (endDtd != ret->dtd) {
1.31 daniel 2192: xmlParserError(ctxt,
2193: "Start and End tags don't use the same DTD\n%.30s\n%.30s\n",
2194: openTag, endTag);
1.27 daniel 2195: }
1.32 daniel 2196: if (endTag == NULL ) {
2197: xmlParserError(ctxt, "The End tag has no name\n%.30s\n", openTag);
2198: } else if (strcmp(ret->name, endTag)) {
1.31 daniel 2199: xmlParserError(ctxt,
2200: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
2201: openTag, endTag);
1.27 daniel 2202: }
1.7 veillard 2203:
1.27 daniel 2204: if ( endTag != NULL )
2205: free(endTag);
1.2 veillard 2206:
1.32 daniel 2207: /* Capture end position and add node */
2208: if ( ret != NULL && ctxt->record_info ) {
2209: node_info.end_pos = ctxt->cur - ctxt->base;
2210: node_info.end_line = ctxt->line;
2211: node_info.node = ret;
2212: xmlParserAddNodeInfo(ctxt, &node_info);
2213: }
1.2 veillard 2214: return(ret);
2215: }
2216:
2217: /*
1.29 daniel 2218: * xmlParseVersionNum: parse the XML version value.
2219: *
2220: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
2221: */
2222: CHAR *xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
2223: const CHAR *q = ctxt->cur;
2224: CHAR *ret;
2225:
2226: while (IS_CHAR(ctxt->cur[0]) &&
2227: (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2228: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z')) ||
2229: ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9')) ||
1.31 daniel 2230: (ctxt->cur[0] == '_') || (ctxt->cur[0] == '.') ||
2231: (ctxt->cur[0] == ':') || (ctxt->cur[0] == '-'))) ctxt->cur++;
1.29 daniel 2232: ret = xmlStrndup(q, ctxt->cur - q);
2233: return(ret);
2234: }
2235:
2236: /*
2237: * xmlParseVersionInfo: parse the XML version.
2238: *
2239: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
2240: *
2241: * [25] Eq ::= S? '=' S?
2242: */
2243:
2244: CHAR *xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
2245: CHAR *version = NULL;
2246: const CHAR *q;
2247:
2248: if ((ctxt->cur[0] == 'v') && (ctxt->cur[1] == 'e') &&
2249: (ctxt->cur[2] == 'r') && (ctxt->cur[3] == 's') &&
2250: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'o') &&
2251: (ctxt->cur[6] == 'n')) {
2252: ctxt->cur += 7;
2253: SKIP_BLANKS(ctxt->cur);
1.31 daniel 2254: if (ctxt->cur[0] != '=') {
2255: xmlParserError(ctxt, "xmlParseVersionInfo : expected '='\n");
2256: return(NULL);
2257: }
2258: ctxt->cur++;
2259: SKIP_BLANKS(ctxt->cur);
1.29 daniel 2260: if (ctxt->cur[0] == '"') {
2261: ctxt->cur++;
2262: q = ctxt->cur;
2263: version = xmlParseVersionNum(ctxt);
1.31 daniel 2264: if (ctxt->cur[0] != '"')
2265: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2266: else
2267: ctxt->cur++;
1.31 daniel 2268: } else if (ctxt->cur[0] == '\''){
1.29 daniel 2269: ctxt->cur++;
2270: q = ctxt->cur;
2271: version = xmlParseVersionNum(ctxt);
1.31 daniel 2272: if (ctxt->cur[0] != '\'')
2273: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2274: else
2275: ctxt->cur++;
1.31 daniel 2276: } else {
2277: xmlParserError(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
1.29 daniel 2278: }
2279: }
2280: return(version);
2281: }
2282:
2283: /*
2284: * xmlParseEncName: parse the XML encoding name
2285: *
2286: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
2287: */
2288: CHAR *xmlParseEncName(xmlParserCtxtPtr ctxt) {
2289: const CHAR *q = ctxt->cur;
2290: CHAR *ret = NULL;
2291:
2292: if (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2293: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z'))) {
2294: ctxt->cur++;
2295: while (IS_CHAR(ctxt->cur[0]) &&
2296: (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2297: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z')) ||
2298: ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9')) ||
2299: (ctxt->cur[0] == '-'))) ctxt->cur++;
2300: ret = xmlStrndup(q, ctxt->cur - q);
2301: } else {
1.31 daniel 2302: xmlParserError(ctxt, "Invalid XML encoding name\n");
1.29 daniel 2303: }
2304: return(ret);
2305: }
2306:
2307: /*
2308: * xmlParseEncodingDecl: parse the XML encoding declaration
2309: *
2310: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
2311: */
2312:
2313: CHAR *xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
2314: CHAR *encoding = NULL;
2315: const CHAR *q;
2316:
2317: SKIP_BLANKS(ctxt->cur);
2318: if ((ctxt->cur[0] == 'e') && (ctxt->cur[1] == 'n') &&
2319: (ctxt->cur[2] == 'c') && (ctxt->cur[3] == 'o') &&
2320: (ctxt->cur[4] == 'd') && (ctxt->cur[5] == 'i') &&
2321: (ctxt->cur[6] == 'n') && (ctxt->cur[7] == 'g')) {
2322: ctxt->cur += 8;
2323: SKIP_BLANKS(ctxt->cur);
1.31 daniel 2324: if (ctxt->cur[0] != '=') {
2325: xmlParserError(ctxt, "xmlParseEncodingDecl : expected '='\n");
2326: return(NULL);
2327: }
2328: ctxt->cur++;
2329: SKIP_BLANKS(ctxt->cur);
1.29 daniel 2330: if (ctxt->cur[0] == '"') {
2331: ctxt->cur++;
2332: q = ctxt->cur;
2333: encoding = xmlParseEncName(ctxt);
1.31 daniel 2334: if (ctxt->cur[0] != '"')
2335: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2336: else
2337: ctxt->cur++;
1.31 daniel 2338: } else if (ctxt->cur[0] == '\''){
1.29 daniel 2339: ctxt->cur++;
2340: q = ctxt->cur;
2341: encoding = xmlParseEncName(ctxt);
1.31 daniel 2342: if (ctxt->cur[0] != '\'')
2343: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2344: else
2345: ctxt->cur++;
1.31 daniel 2346: } else if (ctxt->cur[0] == '"'){
2347: xmlParserError(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
1.29 daniel 2348: }
2349: }
2350: return(encoding);
2351: }
2352:
2353: /*
2354: * xmlParseSDDecl: parse the XML standalone declaration
2355: *
2356: * [32] SDDecl ::= S 'standalone' Eq
2357: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
2358: */
2359:
2360: int xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
2361: int standalone = -1;
2362:
2363: SKIP_BLANKS(ctxt->cur);
2364: if ((ctxt->cur[0] == 's') && (ctxt->cur[1] == 't') &&
2365: (ctxt->cur[2] == 'a') && (ctxt->cur[3] == 'n') &&
2366: (ctxt->cur[4] == 'd') && (ctxt->cur[5] == 'a') &&
2367: (ctxt->cur[6] == 'l') && (ctxt->cur[7] == 'o') &&
2368: (ctxt->cur[8] == 'n') && (ctxt->cur[9] == 'e')) {
2369: ctxt->cur += 10;
2370: SKIP_BLANKS(ctxt->cur);
1.32 daniel 2371: if (ctxt->cur[0] != '=') {
2372: xmlParserError(ctxt, "XML standalone declaration : expected '='\n");
2373: return(standalone);
2374: }
2375: ctxt->cur++;
2376: SKIP_BLANKS(ctxt->cur);
1.29 daniel 2377: if (ctxt->cur[0] == '"') {
2378: ctxt->cur++;
2379: } else if (ctxt->cur[0] == '\''){
2380: ctxt->cur++;
2381: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'o')) {
2382: standalone = 0;
1.32 daniel 2383: ctxt->cur += 2;
1.29 daniel 2384: } else if ((ctxt->cur[0] == 'y') && (ctxt->cur[1] == 'e') &&
2385: (ctxt->cur[2] == 's')) {
2386: standalone = 1;
1.32 daniel 2387: ctxt->cur += 3;
1.29 daniel 2388: } else {
1.31 daniel 2389: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2390: }
2391: if (ctxt->cur[0] != '\'')
1.31 daniel 2392: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2393: else
2394: ctxt->cur++;
2395: } else if (ctxt->cur[0] == '"'){
2396: ctxt->cur++;
2397: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'o')) {
2398: standalone = 0;
1.32 daniel 2399: ctxt->cur += 2;
1.29 daniel 2400: } else if ((ctxt->cur[0] == 'y') && (ctxt->cur[1] == 'e') &&
2401: (ctxt->cur[2] == 's')) {
2402: standalone = 1;
1.32 daniel 2403: ctxt->cur += 3;
1.29 daniel 2404: } else {
1.31 daniel 2405: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2406: }
2407: if (ctxt->cur[0] != '"')
1.31 daniel 2408: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2409: else
2410: ctxt->cur++;
2411: }
2412: }
2413: return(standalone);
2414: }
2415:
2416: /*
1.1 veillard 2417: * xmlParseXMLDecl: parse an XML declaration header
1.29 daniel 2418: *
2419: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 2420: */
2421:
1.16 daniel 2422: void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 2423: CHAR *version;
2424:
2425: /*
1.19 daniel 2426: * We know that '<?xml' is here.
1.1 veillard 2427: */
1.16 daniel 2428: ctxt->cur += 5;
1.1 veillard 2429:
1.16 daniel 2430: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2431:
2432: /*
1.29 daniel 2433: * We should have the VersionInfo here.
1.1 veillard 2434: */
1.29 daniel 2435: version = xmlParseVersionInfo(ctxt);
2436: if (version == NULL)
1.16 daniel 2437: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.29 daniel 2438: else {
2439: ctxt->doc = xmlNewDoc(version);
2440: free(version);
2441: }
2442:
2443: /*
2444: * We may have the encoding declaration
2445: */
1.32 daniel 2446: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 2447:
2448: /*
1.29 daniel 2449: * We may have the standalone status.
1.1 veillard 2450: */
1.32 daniel 2451: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 2452:
1.29 daniel 2453: SKIP_BLANKS(ctxt->cur);
2454: if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
2455: ctxt->cur += 2;
1.31 daniel 2456: } else if (ctxt->cur[0] == '>') {
2457: /* Deprecated old WD ... */
2458: xmlParserError(ctxt, "XML declaration must end-up with '?>'\n");
2459: ctxt->cur++;
1.29 daniel 2460: } else {
1.31 daniel 2461: xmlParserError(ctxt, "parsing XML declaration: '?>' expected\n");
1.29 daniel 2462: MOVETO_ENDTAG(ctxt->cur);
1.31 daniel 2463: ctxt->cur++;
1.29 daniel 2464: }
1.1 veillard 2465: }
2466:
2467: /*
1.22 daniel 2468: * xmlParseMisc: parse an XML Misc* optionnal field.
1.21 daniel 2469: * Misc*
2470: *
1.22 daniel 2471: * [27] Misc ::= Comment | PI | S
1.1 veillard 2472: */
2473:
1.16 daniel 2474: void xmlParseMisc(xmlParserCtxtPtr ctxt) {
2475: while (((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) ||
2476: ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1.21 daniel 2477: (ctxt->cur[2] == '-') && (ctxt->cur[3] == '-')) ||
1.16 daniel 2478: IS_BLANK(ctxt->cur[0])) {
2479: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
2480: xmlParsePI(ctxt);
2481: } else if (IS_BLANK(ctxt->cur[0])) {
2482: ctxt->cur++;
1.1 veillard 2483: } else
1.31 daniel 2484: xmlParseComment(ctxt, 0);
1.1 veillard 2485: }
2486: }
2487:
2488: /*
1.16 daniel 2489: * xmlParseDocument : parse an XML document and build a tree.
1.21 daniel 2490: *
1.22 daniel 2491: * [1] document ::= prolog element Misc*
1.29 daniel 2492: *
2493: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.1 veillard 2494: */
2495:
1.16 daniel 2496: int xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.14 veillard 2497: /*
2498: * We should check for encoding here and plug-in some
2499: * conversion code TODO !!!!
2500: */
1.1 veillard 2501:
2502: /*
2503: * Wipe out everything which is before the first '<'
2504: */
1.16 daniel 2505: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2506:
2507: /*
2508: * Check for the XMLDecl in the Prolog.
2509: */
1.16 daniel 2510: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.19 daniel 2511: (ctxt->cur[2] == 'x') && (ctxt->cur[3] == 'm') &&
2512: (ctxt->cur[4] == 'l')) {
2513: xmlParseXMLDecl(ctxt);
2514: /* SKIP_EOL(cur); */
2515: SKIP_BLANKS(ctxt->cur);
2516: } else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.16 daniel 2517: (ctxt->cur[2] == 'X') && (ctxt->cur[3] == 'M') &&
2518: (ctxt->cur[4] == 'L')) {
1.19 daniel 2519: /*
2520: * The first drafts were using <?XML and the final W3C REC
2521: * now use <?xml ...
2522: */
1.16 daniel 2523: xmlParseXMLDecl(ctxt);
1.1 veillard 2524: /* SKIP_EOL(cur); */
1.16 daniel 2525: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2526: } else {
1.16 daniel 2527: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 2528: }
2529:
2530: /*
2531: * The Misc part of the Prolog
2532: */
1.16 daniel 2533: xmlParseMisc(ctxt);
1.1 veillard 2534:
2535: /*
1.29 daniel 2536: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 2537: * (doctypedecl Misc*)?
2538: */
1.22 daniel 2539: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2540: (ctxt->cur[2] == 'D') && (ctxt->cur[3] == 'O') &&
2541: (ctxt->cur[4] == 'C') && (ctxt->cur[5] == 'T') &&
2542: (ctxt->cur[6] == 'Y') && (ctxt->cur[7] == 'P') &&
2543: (ctxt->cur[8] == 'E')) {
2544: xmlParseDocTypeDecl(ctxt);
2545: xmlParseMisc(ctxt);
1.21 daniel 2546: }
2547:
2548: /*
2549: * Time to start parsing the tree itself
1.1 veillard 2550: */
1.16 daniel 2551: ctxt->doc->root = xmlParseElement(ctxt);
1.33 ! daniel 2552:
! 2553: /*
! 2554: * The Misc part at the end
! 2555: */
! 2556: xmlParseMisc(ctxt);
1.16 daniel 2557:
2558: return(0);
2559: }
2560:
2561: /*
2562: * xmlParseDoc : parse an XML in-memory document and build a tree.
2563: */
2564:
2565: xmlDocPtr xmlParseDoc(CHAR *cur) {
2566: xmlDocPtr ret;
2567: xmlParserCtxtPtr ctxt;
2568:
2569: if (cur == NULL) return(NULL);
1.1 veillard 2570:
1.16 daniel 2571: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2572: if (ctxt == NULL) {
2573: perror("malloc");
2574: return(NULL);
2575: }
2576:
1.19 daniel 2577: xmlInitParserCtxt(ctxt);
1.16 daniel 2578: ctxt->base = cur;
2579: ctxt->cur = cur;
2580:
2581: xmlParseDocument(ctxt);
2582: ret = ctxt->doc;
1.20 daniel 2583: free(ctxt->nodes);
1.16 daniel 2584: free(ctxt);
2585:
1.1 veillard 2586: return(ret);
2587: }
2588:
1.9 httpng 2589: /*
2590: * xmlParseFile : parse an XML file and build a tree.
2591: */
2592:
2593: xmlDocPtr xmlParseFile(const char *filename) {
2594: xmlDocPtr ret;
1.20 daniel 2595: #ifdef HAVE_ZLIB_H
2596: gzFile input;
2597: #else
1.9 httpng 2598: int input;
1.20 daniel 2599: #endif
1.9 httpng 2600: int res;
2601: struct stat buf;
2602: char *buffer;
1.16 daniel 2603: xmlParserCtxtPtr ctxt;
1.9 httpng 2604:
1.11 veillard 2605: res = stat(filename, &buf);
1.9 httpng 2606: if (res < 0) return(NULL);
2607:
1.20 daniel 2608: #ifdef HAVE_ZLIB_H
2609: retry_bigger:
2610: buffer = malloc((buf.st_size * 20) + 100);
2611: #else
1.9 httpng 2612: buffer = malloc(buf.st_size + 100);
1.20 daniel 2613: #endif
1.9 httpng 2614: if (buffer == NULL) {
2615: perror("malloc");
2616: return(NULL);
2617: }
2618:
2619: memset(buffer, 0, sizeof(buffer));
1.20 daniel 2620: #ifdef HAVE_ZLIB_H
2621: input = gzopen (filename, "r");
2622: if (input == NULL) {
2623: fprintf (stderr, "Cannot read file %s :\n", filename);
2624: perror ("gzopen failed");
2625: return(NULL);
2626: }
2627: #else
1.9 httpng 2628: input = open (filename, O_RDONLY);
2629: if (input < 0) {
2630: fprintf (stderr, "Cannot read file %s :\n", filename);
2631: perror ("open failed");
2632: return(NULL);
2633: }
1.20 daniel 2634: #endif
2635: #ifdef HAVE_ZLIB_H
2636: res = gzread(input, buffer, 20 * buf.st_size);
2637: #else
1.9 httpng 2638: res = read(input, buffer, buf.st_size);
1.20 daniel 2639: #endif
1.9 httpng 2640: if (res < 0) {
2641: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 2642: #ifdef HAVE_ZLIB_H
2643: perror ("gzread failed");
2644: #else
1.9 httpng 2645: perror ("read failed");
1.20 daniel 2646: #endif
1.9 httpng 2647: return(NULL);
2648: }
1.20 daniel 2649: #ifdef HAVE_ZLIB_H
2650: gzclose(input);
2651: if (res >= 20 * buf.st_size) {
2652: free(buffer);
2653: buf.st_size *= 2;
2654: goto retry_bigger;
2655: }
2656: buf.st_size = res;
2657: #else
1.9 httpng 2658: close(input);
1.20 daniel 2659: #endif
2660:
1.9 httpng 2661:
1.16 daniel 2662: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2663: if (ctxt == NULL) {
2664: perror("malloc");
2665: return(NULL);
2666: }
1.9 httpng 2667: buffer[buf.st_size] = '\0';
1.16 daniel 2668:
1.19 daniel 2669: xmlInitParserCtxt(ctxt);
1.17 daniel 2670: ctxt->filename = filename;
1.16 daniel 2671: ctxt->base = buffer;
2672: ctxt->cur = buffer;
2673:
2674: xmlParseDocument(ctxt);
2675: ret = ctxt->doc;
1.9 httpng 2676: free(buffer);
1.20 daniel 2677: free(ctxt->nodes);
2678: free(ctxt);
2679:
2680: return(ret);
2681: }
2682:
1.32 daniel 2683:
1.20 daniel 2684: /*
1.32 daniel 2685: * xmlParseMemory : parse an XML memory block and build a tree.
1.20 daniel 2686: */
2687: xmlDocPtr xmlParseMemory(char *buffer, int size) {
2688: xmlDocPtr ret;
2689: xmlParserCtxtPtr ctxt;
2690: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2691: if (ctxt == NULL) {
2692: perror("malloc");
2693: return(NULL);
2694: }
2695:
2696: buffer[size - 1] = '\0';
2697:
2698: xmlInitParserCtxt(ctxt);
2699: ctxt->base = buffer;
2700: ctxt->cur = buffer;
2701:
2702: xmlParseDocument(ctxt);
2703: ret = ctxt->doc;
2704: free(ctxt->nodes);
1.16 daniel 2705: free(ctxt);
2706:
1.9 httpng 2707: return(ret);
1.17 daniel 2708: }
2709:
2710:
2711: /* Initialize parser context */
2712: void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2713: {
1.32 daniel 2714: int i;
1.19 daniel 2715:
1.32 daniel 2716: ctxt->filename = NULL;
2717: ctxt->base = NULL;
2718: ctxt->cur = NULL;
2719: ctxt->line = 1;
2720: ctxt->col = 1;
2721: ctxt->doc = NULL;
2722: ctxt->depth = 0;
2723: ctxt->max_depth = 10;
2724: ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr));
2725: if (ctxt->nodes == NULL) {
2726: fprintf(stderr, "malloc of %d byte failed\n",
2727: ctxt->max_depth * sizeof(xmlNodePtr));
2728: ctxt->max_depth = 0;
2729: } else {
2730: for (i = 0;i < ctxt->max_depth;i++)
2731: ctxt->nodes[i] = NULL;
2732: }
2733: ctxt->record_info = 0;
2734: xmlInitNodeInfoSeq(&ctxt->node_seq);
1.17 daniel 2735: }
2736:
2737:
1.19 daniel 2738: /*
2739: * Clear (release owned resources) and reinitialize context
2740: */
1.32 daniel 2741: void xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 2742: {
1.32 daniel 2743: if ( ctxt->nodes != NULL )
2744: free(ctxt->nodes);
2745: xmlClearNodeInfoSeq(&ctxt->node_seq);
2746: xmlInitParserCtxt(ctxt);
1.17 daniel 2747: }
2748:
2749:
1.19 daniel 2750: /*
2751: * Setup the parser context to parse a new buffer; Clears any prior
2752: * contents from the parser context. The buffer parameter must not be
2753: * NULL, but the filename parameter can be
2754: */
1.17 daniel 2755: void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
2756: const char* filename)
2757: {
2758: xmlClearParserCtxt(ctxt);
2759: ctxt->base = buffer;
2760: ctxt->cur = buffer;
2761: ctxt->filename = filename;
2762: }
2763:
1.32 daniel 2764:
2765: /*
2766: * xmlParserFindNodeInfo : Find the parser node info struct for a given node
2767: */
2768: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2769: const xmlNode* node)
2770: {
2771: unsigned long pos;
2772:
2773: /* Find position where node should be at */
2774: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2775: if ( ctx->node_seq.buffer[pos].node == node )
2776: return &ctx->node_seq.buffer[pos];
2777: else
2778: return NULL;
2779: }
2780:
2781:
2782: /*
2783: * xmlInitNodeInfoSeq -- Initialize (set to initial state) node info sequence
2784: */
2785: void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2786: {
2787: seq->length = 0;
2788: seq->maximum = 0;
2789: seq->buffer = NULL;
2790: }
2791:
2792: /*
2793: * xmlClearNodeInfoSeq -- Clear (release memory and reinitialize) node
2794: * info sequence
2795: */
2796: void xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2797: {
2798: if ( seq->buffer != NULL )
2799: free(seq->buffer);
2800: xmlInitNodeInfoSeq(seq);
2801: }
2802:
2803:
2804: /*
2805: * xmlParserFindNodeInfoIndex : Find the index that the info record for
2806: * the given node is or should be at in a sorted sequence
2807: */
2808: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2809: const xmlNode* node)
2810: {
2811: unsigned long upper, lower, middle;
2812: int found = 0;
2813:
2814: /* Do a binary search for the key */
2815: lower = 1;
2816: upper = seq->length;
2817: middle = 0;
2818: while ( lower <= upper && !found) {
2819: middle = lower + (upper - lower) / 2;
2820: if ( node == seq->buffer[middle - 1].node )
2821: found = 1;
2822: else if ( node < seq->buffer[middle - 1].node )
2823: upper = middle - 1;
2824: else
2825: lower = middle + 1;
2826: }
2827:
2828: /* Return position */
2829: if ( middle == 0 || seq->buffer[middle - 1].node < node )
2830: return middle;
2831: else
2832: return middle - 1;
2833: }
2834:
2835:
2836: /*
2837: * xmlParserAddNodeInfo : Insert node info record into sorted sequence
2838: */
2839: void xmlParserAddNodeInfo(xmlParserCtxtPtr ctx,
2840: const xmlParserNodeInfo* info)
2841: {
2842: unsigned long pos;
2843: static unsigned int block_size = 5;
2844:
2845: /* Find pos and check to see if node is already in the sequence */
2846: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, info->node);
2847: if ( pos < ctx->node_seq.length
2848: && ctx->node_seq.buffer[pos].node == info->node ) {
2849: ctx->node_seq.buffer[pos] = *info;
2850: }
2851:
2852: /* Otherwise, we need to add new node to buffer */
2853: else {
2854: /* Expand buffer by 5 if needed */
2855: if ( ctx->node_seq.length + 1 > ctx->node_seq.maximum ) {
2856: xmlParserNodeInfo* tmp_buffer;
2857: unsigned int byte_size = (sizeof(*ctx->node_seq.buffer)
2858: *(ctx->node_seq.maximum + block_size));
2859:
2860: if ( ctx->node_seq.buffer == NULL )
2861: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
2862: else
2863: tmp_buffer = (xmlParserNodeInfo*)realloc(ctx->node_seq.buffer, byte_size);
2864:
2865: if ( tmp_buffer == NULL ) {
2866: xmlParserError(ctx, "Out of memory");
2867: return;
2868: }
2869: ctx->node_seq.buffer = tmp_buffer;
2870: ctx->node_seq.maximum += block_size;
2871: }
2872:
2873: /* If position is not at end, move elements out of the way */
2874: if ( pos != ctx->node_seq.length ) {
2875: unsigned long i;
2876:
2877: for ( i = ctx->node_seq.length; i > pos; i-- )
2878: ctx->node_seq.buffer[i] = ctx->node_seq.buffer[i - 1];
2879: }
2880:
2881: /* Copy element and increase length */
2882: ctx->node_seq.buffer[pos] = *info;
2883: ctx->node_seq.length++;
2884: }
2885: }
Webmaster