Annotation of XML/parser.c, revision 1.31
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.31 ! daniel 6: * $Id: parser.c,v 1.30 1998/08/04 20:35:40 daniel Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.1 veillard 18: #include <malloc.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
34: /*
35: * A few macros needed to help building the parser.
36: */
37:
1.31 ! daniel 38: #define PUSH_AND_POP(type, name) \
! 39: int name##Push(type value) { \
! 40: if (ctxt->name##Nr >= ctxt->name##Max) { \
! 41: ctxt->name##Max *= 2; \
! 42: ctxt->name##tab = (void *) realloc(ctxt->name##tab, \
! 43: ctxt->name##Max * sizeof(ctxt->name##tab[0])); \
! 44: if (ctxt->name##tab == NULL) { \
! 45: fprintf(stderr, "realloc failed !\n"); \
! 46: exit(1); \
! 47: } \
! 48: } \
! 49: ctxt->name##tab[ctxt->name##Nr] = value; \
! 50: return(ctxt->type##Nr++); \
! 51: } \
! 52: type name##Pop() { \
! 53: if (ctxt->type##Nr <= 0) return(0); \
! 54: ctxt->type##Nr--; \
! 55: return(ctxt->name##tab[ctxt->name##Nr]); \
! 56: } \
! 57:
1.1 veillard 58: #ifdef UNICODE
1.30 daniel 59: /************************************************************************
60: * *
61: * UNICODE version of the macros. *
62: * *
63: ************************************************************************/
1.1 veillard 64: /*
1.22 daniel 65: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
66: * | [#x10000-#x10FFFF]
67: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 68: */
69: #define IS_CHAR(c) \
70: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
71: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
72:
1.22 daniel 73: /*
74: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
75: */
1.1 veillard 76: #define SKIP_BLANKS(p) \
77: while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \
1.22 daniel 78: (*(p) == 0xd) || (*(p) == 0x3000)) (p)++;
1.1 veillard 79:
1.22 daniel 80: /*
1.30 daniel 81: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 82: *
1.30 daniel 83: * VI is your friend !
84: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
85: * and
86: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 87: */
1.1 veillard 88: #define IS_BASECHAR(c) \
1.30 daniel 89: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
90: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
91: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
92: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
93: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
94: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
95: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
96: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
97: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
98: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
99: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
100: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
101: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
102: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
103: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
104: ((c) == 0x0386) || \
105: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
106: ((c) == 0x038C) || \
107: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
108: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
109: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
110: ((c) == 0x03DA) || \
111: ((c) == 0x03DC) || \
112: ((c) == 0x03DE) || \
113: ((c) == 0x03E0) || \
114: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
115: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
116: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
117: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
118: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
119: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
120: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
121: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
122: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
123: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
124: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
125: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
126: ((c) == 0x0559) || \
127: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
128: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
129: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
130: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
131: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
132: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
133: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
134: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
135: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
136: ((c) == 0x06D5) || \
137: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
138: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
139: ((c) == 0x093D) || \
140: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
141: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
142: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
143: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
144: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
145: ((c) == 0x09B2) || \
146: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
147: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
148: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
149: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
150: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
151: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
152: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
153: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
154: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
155: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
156: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
157: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
158: ((c) == 0x0A5E) || \
159: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
160: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
161: ((c) == 0x0A8D) || \
162: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
163: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
164: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
165: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
166: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
167: ((c) == 0x0ABD) || \
168: ((c) == 0x0AE0) || \
169: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
170: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
171: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
172: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
173: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
174: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
175: ((c) == 0x0B3D) || \
176: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
177: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
178: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
179: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
180: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
181: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
182: ((c) == 0x0B9C) || \
183: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
184: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
185: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
186: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
187: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
188: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
189: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
190: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
191: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
192: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
193: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
194: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
195: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
196: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
197: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
198: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
199: ((c) == 0x0CDE) || \
200: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
201: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
202: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
203: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
204: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
205: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
206: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
207: ((c) == 0x0E30) || \
208: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
209: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
210: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
211: ((c) == 0x0E84) || \
212: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
213: ((c) == 0x0E8A) || \
214: ((c) == 0x0E8D) || \
215: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
216: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
217: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
218: ((c) == 0x0EA5) || \
219: ((c) == 0x0EA7) || \
220: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
221: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
222: ((c) == 0x0EB0) || \
223: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
224: ((c) == 0x0EBD) || \
225: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
226: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
227: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
228: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
229: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
230: ((c) == 0x1100) || \
231: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
232: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
233: ((c) == 0x1109) || \
234: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
235: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
236: ((c) == 0x113C) || \
237: ((c) == 0x113E) || \
238: ((c) == 0x1140) || \
239: ((c) == 0x114C) || \
240: ((c) == 0x114E) || \
241: ((c) == 0x1150) || \
242: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
243: ((c) == 0x1159) || \
244: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
245: ((c) == 0x1163) || \
246: ((c) == 0x1165) || \
247: ((c) == 0x1167) || \
248: ((c) == 0x1169) || \
249: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
250: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
251: ((c) == 0x1175) || \
252: ((c) == 0x119E) || \
253: ((c) == 0x11A8) || \
254: ((c) == 0x11AB) || \
255: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
256: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
257: ((c) == 0x11BA) || \
258: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
259: ((c) == 0x11EB) || \
260: ((c) == 0x11F0) || \
261: ((c) == 0x11F9) || \
262: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
263: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
264: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
265: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
266: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
267: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
268: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
269: ((c) == 0x1F59) || \
270: ((c) == 0x1F5B) || \
271: ((c) == 0x1F5D) || \
272: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
273: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
274: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
275: ((c) == 0x1FBE) || \
276: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
277: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
278: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
279: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
280: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
281: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
282: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
283: ((c) == 0x2126) || \
284: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
285: ((c) == 0x212E) || \
286: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
287: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
288: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
289: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
290: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 291:
1.22 daniel 292: /*
293: * [88] Digit ::= ... long list see REC ...
294: */
1.30 daniel 295: #define IS_DIGIT(c) \
296: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
297: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
298: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
299: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
300: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
301: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
302: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
303: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
304: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
305: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
306: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
307: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
308: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
309: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
310: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 311:
1.22 daniel 312: /*
313: * [87] CombiningChar ::= ... long list see REC ...
314: */
1.30 daniel 315: #define IS_COMBINING(c) \
316: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
317: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
318: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
319: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
320: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
321: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
322: ((c) == 0x05BF) || \
323: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
324: ((c) == 0x05C4) || \
325: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
326: ((c) == 0x0670) || \
327: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
328: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
329: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
330: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
331: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
332: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
333: ((c) == 0x093C) || \
334: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
335: ((c) == 0x094D) || \
336: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
337: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
338: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
339: ((c) == 0x09BC) || \
340: ((c) == 0x09BE) || \
341: ((c) == 0x09BF) || \
342: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
343: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
344: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
345: ((c) == 0x09D7) || \
346: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
347: ((c) == 0x0A02) || \
348: ((c) == 0x0A3C) || \
349: ((c) == 0x0A3E) || \
350: ((c) == 0x0A3F) || \
351: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
352: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
353: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
354: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
355: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
356: ((c) == 0x0ABC) || \
357: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
358: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
359: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
360: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
361: ((c) == 0x0B3C) || \
362: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
363: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
364: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
365: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
366: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
367: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
368: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
369: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
370: ((c) == 0x0BD7) || \
371: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
372: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
373: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
374: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
375: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
376: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
377: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
378: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
379: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
380: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
381: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
382: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
383: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
384: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
385: ((c) == 0x0D57) || \
386: ((c) == 0x0E31) || \
387: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
388: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
389: ((c) == 0x0EB1) || \
390: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
391: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
392: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
393: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
394: ((c) == 0x0F35) || \
395: ((c) == 0x0F37) || \
396: ((c) == 0x0F39) || \
397: ((c) == 0x0F3E) || \
398: ((c) == 0x0F3F) || \
399: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
400: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
401: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
402: ((c) == 0x0F97) || \
403: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
404: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
405: ((c) == 0x0FB9) || \
406: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
407: ((c) == 0x20E1) || \
408: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
409: ((c) == 0x3099) || \
410: ((c) == 0x309A))
1.3 veillard 411:
1.22 daniel 412: /*
413: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
414: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
415: * [#x309D-#x309E] | [#x30FC-#x30FE]
416: */
1.3 veillard 417: #define IS_EXTENDER(c) \
418: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
419: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
420: ((c) == 0xec6) || ((c) == 0x3005) \
421: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
422: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 423: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 424:
1.22 daniel 425: /*
426: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
427: */
1.1 veillard 428: #define IS_IDEOGRAPHIC(c) \
429: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
430: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
431: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
432: ((c) == 0x3007))
433:
1.22 daniel 434: /*
435: * [84] Letter ::= BaseChar | Ideographic
436: */
1.1 veillard 437: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
438:
439: #else
1.30 daniel 440: /************************************************************************
441: * *
442: * 8bits / ASCII version of the macros. *
443: * *
444: ************************************************************************/
1.1 veillard 445: /*
1.22 daniel 446: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
447: * | [#x10000-#x10FFFF]
448: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 449: */
450: #define IS_CHAR(c) \
1.21 daniel 451: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
452: ((c) == 0xa))
1.1 veillard 453:
1.22 daniel 454: /*
455: * [85] BaseChar ::= ... long list see REC ...
456: */
1.1 veillard 457: #define IS_BASECHAR(c) \
458: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
459: (((c) >= 0x61) && ((c) <= 0x7a)) || \
460: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
461: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
462: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
463: (((c) >= 0xf8) && ((c) <= 0xff)) || \
464: ((c) == 0xba))
465:
1.22 daniel 466: /*
467: * [88] Digit ::= ... long list see REC ...
468: */
1.1 veillard 469: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
470:
1.22 daniel 471: /*
472: * [84] Letter ::= BaseChar | Ideographic
473: */
1.1 veillard 474: #define IS_LETTER(c) IS_BASECHAR(c)
475:
1.22 daniel 476:
477: /*
478: * [87] CombiningChar ::= ... long list see REC ...
479: */
1.1 veillard 480: #define IS_COMBINING(c) 0
481:
1.22 daniel 482: /*
483: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
484: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
485: * [#x309D-#x309E] | [#x30FC-#x30FE]
486: */
1.3 veillard 487: #define IS_EXTENDER(c) ((c) == 0xb7)
488:
1.21 daniel 489: #endif /* !UNICODE */
1.1 veillard 490:
1.22 daniel 491: /*
492: * Blank chars.
493: *
494: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
495: */
496: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
497: ((c) == 0x0D))
498:
499: /*
500: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
501: */
1.21 daniel 502: #define IS_PUBIDCHAR(c) \
503: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
504: (((c) >= 'a') && ((c) <= 'z')) || \
505: (((c) >= 'A') && ((c) <= 'Z')) || \
506: (((c) >= '0') && ((c) <= '9')) || \
507: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
508: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
509: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
510: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
511: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 512:
513: #define SKIP_EOL(p) \
514: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
515: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
516:
517: #define SKIP_BLANKS(p) \
518: while (IS_BLANK(*(p))) (p)++;
519:
520: #define MOVETO_ENDTAG(p) \
521: while (IS_CHAR(*p) && (*(p) != '>')) (p)++;
522:
523: #define MOVETO_STARTTAG(p) \
524: while (IS_CHAR(*p) && (*(p) != '<')) (p)++;
525:
526: /*
1.3 veillard 527: * Forward definition for recusive behaviour.
528: */
1.16 daniel 529: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.3 veillard 530:
531: /*
532: * xmlHandleData : this routine represent's the specific application
533: * behaviour when reading a piece of text.
534: *
535: * For example in WebDav, any piece made only of blanks is eliminated
536: */
537:
538: CHAR *xmlHandleData(CHAR *in) {
539: CHAR *cur;
540:
541: if (in == NULL) return(NULL);
542: cur = in;
543: while (IS_CHAR(*cur)) {
544: if (!IS_BLANK(*cur)) goto not_blank;
545: cur++;
546: }
547: free(in);
548: return(NULL);
549:
550: not_blank:
551: return(in);
552: }
553:
1.28 daniel 554: /************************************************************************
555: * *
556: * Commodity functions to handle CHARs *
557: * *
558: ************************************************************************/
559:
1.3 veillard 560: /*
1.1 veillard 561: * xmlStrndup : a strdup for array of CHAR's
562: */
563:
1.6 httpng 564: CHAR *xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 565: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
566:
567: if (ret == NULL) {
568: fprintf(stderr, "malloc of %d byte failed\n",
569: (len + 1) * sizeof(CHAR));
570: return(NULL);
571: }
572: memcpy(ret, cur, len * sizeof(CHAR));
573: ret[len] = 0;
574: return(ret);
575: }
576:
577: /*
578: * xmlStrdup : a strdup for CHAR's
579: */
580:
1.6 httpng 581: CHAR *xmlStrdup(const CHAR *cur) {
582: const CHAR *p = cur;
1.1 veillard 583:
584: while (IS_CHAR(*p)) p++;
585: return(xmlStrndup(cur, p - cur));
586: }
587:
588: /*
1.14 veillard 589: * xmlStrcmp : a strcmp for CHAR's
590: */
591:
592: int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
593: register int tmp;
594:
595: do {
596: tmp = *str1++ - *str2++;
597: if (tmp != 0) return(tmp);
598: } while ((*str1 != 0) && (*str2 != 0));
599: return (*str1 - *str2);
600: }
601:
602: /*
603: * xmlStrncmp : a strncmp for CHAR's
604: */
605:
606: int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
607: register int tmp;
608:
609: if (len <= 0) return(0);
610: do {
611: tmp = *str1++ - *str2++;
612: if (tmp != 0) return(tmp);
613: len--;
614: if (len <= 0) return(0);
615: } while ((*str1 != 0) && (*str2 != 0));
616: return (*str1 - *str2);
617: }
618:
619: /*
620: * xmlStrchr : a strchr for CHAR's
621: */
622:
623: CHAR *xmlStrchr(const CHAR *str, CHAR val) {
624: while (*str != 0) {
625: if (*str == val) return((CHAR *) str);
626: str++;
627: }
628: return(NULL);
629: }
1.28 daniel 630:
631: /************************************************************************
632: * *
633: * Extra stuff for namespace support *
634: * Relates to http://www.w3.org/TR/WD-xml-names *
635: * *
636: ************************************************************************/
637:
638: /*
639: * xmlNamespaceParseNCName : parse an XML namespace name.
640: *
641: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
642: *
643: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
644: * CombiningChar | Extender
645: */
646:
647: CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
648: const CHAR *q;
649: CHAR *ret = NULL;
650:
651: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
652: q = ctxt->cur++;
653:
654: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
655: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
656: (ctxt->cur[0] == '_') ||
657: (IS_COMBINING(ctxt->cur[0])) ||
658: (IS_EXTENDER(ctxt->cur[0])))
659: ctxt->cur++;
660:
661: ret = xmlStrndup(q, ctxt->cur - q);
662:
663: return(ret);
664: }
665:
666: /*
667: * xmlNamespaceParseQName : parse an XML qualified name
668: *
669: * [NS 5] QName ::= (Prefix ':')? LocalPart
670: *
671: * [NS 6] Prefix ::= NCName
672: *
673: * [NS 7] LocalPart ::= NCName
674: */
675:
676: CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
677: CHAR *ret = NULL;
678:
679: *prefix = NULL;
680: ret = xmlNamespaceParseNCName(ctxt);
681: if (ctxt->cur[0] == ':') {
682: *prefix = ret;
683: ctxt->cur++;
684: ret = xmlNamespaceParseNCName(ctxt);
685: }
686:
687: return(ret);
688: }
689:
690: /*
691: * xmlNamespaceParseNSDef : parse a namespace prefix declaration
692: *
693: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
694: *
695: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
696: */
697:
698: void xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
699: CHAR *name = NULL;
700:
701: if ((ctxt->cur[0] == 'x') && (ctxt->cur[1] == 'm') &&
702: (ctxt->cur[2] == 'l') && (ctxt->cur[3] == 'n') &&
703: (ctxt->cur[4] == 's')) {
704: ctxt->cur += 5;
705: if (ctxt->cur[0] == ':') {
706: ctxt->cur++;
707: name = xmlNamespaceParseNCName(ctxt);
708: }
709: }
710: }
711:
712: /************************************************************************
713: * *
714: * The parser itself *
715: * Relates to http://www.w3.org/TR/REC-xml *
716: * *
717: ************************************************************************/
1.14 veillard 718:
719: /*
1.1 veillard 720: * xmlParseName : parse an XML name.
1.22 daniel 721: *
722: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
723: * CombiningChar | Extender
724: *
725: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
726: *
727: * [6] Names ::= Name (S Name)*
1.1 veillard 728: */
729:
1.16 daniel 730: CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 731: const CHAR *q;
732: CHAR *ret = NULL;
1.1 veillard 733:
1.22 daniel 734: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_') &&
735: (ctxt->cur[0] != ':')) return(NULL);
736: q = ctxt->cur++;
737:
738: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
739: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
740: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
741: (IS_COMBINING(ctxt->cur[0])) ||
742: (IS_EXTENDER(ctxt->cur[0])))
743: ctxt->cur++;
744:
745: ret = xmlStrndup(q, ctxt->cur - q);
746:
747: return(ret);
748: }
749:
750: /*
751: * xmlParseNmtoken : parse an XML Nmtoken.
752: *
753: * [7] Nmtoken ::= (NameChar)+
754: *
755: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
756: */
757:
758: CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
759: const CHAR *q;
760: CHAR *ret = NULL;
761:
1.16 daniel 762: q = ctxt->cur++;
1.22 daniel 763:
1.16 daniel 764: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1.22 daniel 765: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
766: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
767: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 768: (IS_EXTENDER(ctxt->cur[0])))
769: ctxt->cur++;
1.3 veillard 770:
1.16 daniel 771: ret = xmlStrndup(q, ctxt->cur - q);
1.1 veillard 772:
1.3 veillard 773: return(ret);
1.1 veillard 774: }
775:
776: /*
1.24 daniel 777: * xmlParseEntityValue : parse a value for ENTITY decl.
778: *
779: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
780: * "'" ([^%&'] | PEReference | Reference)* "'"
781: */
782:
783: CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
784: CHAR *ret = NULL;
785: const CHAR *q;
786: int needSubst;
787:
788: if (ctxt->cur[0] == '"') {
789: ctxt->cur++;
790:
791: q = ctxt->cur;
792: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"')) {
793: if (ctxt->cur[0] == '%') {
794: needSubst = 1; /* TODO !!! */
795: ctxt->cur++;
1.29 daniel 796: } else if (ctxt->cur[0] == '&') {
1.24 daniel 797: needSubst = 1; /* TODO !!! */
798: ctxt->cur++;
799: } else
800: ctxt->cur++;
801: }
802: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 803: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 804: } else {
805: ret = xmlStrndup(q, ctxt->cur - q);
806: ctxt->cur++;
807: }
808: } else if (ctxt->cur[0] == '\'') {
809: ctxt->cur++;
810: q = ctxt->cur;
811: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\'')) {
812: if (ctxt->cur[0] == '%') {
813: needSubst = 1; /* TODO !!! */
814: ctxt->cur++;
1.29 daniel 815: } else if (ctxt->cur[0] == '&') {
1.24 daniel 816: needSubst = 1; /* TODO !!! */
817: ctxt->cur++;
818: } else
819: ctxt->cur++;
820: }
821: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 822: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 823: } else {
824: ret = xmlStrndup(q, ctxt->cur - q);
825: ctxt->cur++;
826: }
827: } else {
1.31 ! daniel 828: xmlParserError(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.24 daniel 829: }
830:
831: return(ret);
832: }
833:
834: /*
1.29 daniel 835: * xmlParseAttValue : parse a value for an attribute
836: *
837: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
838: * "'" ([^<&'] | Reference)* "'"
839: */
840:
841: CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) {
842: CHAR *ret = NULL;
843: const CHAR *q;
844: int needSubst;
845:
846: if (ctxt->cur[0] == '"') {
847: ctxt->cur++;
848:
849: q = ctxt->cur;
850: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"')) {
851: if (ctxt->cur[0] == '%') {
852: needSubst = 1; /* TODO !!! */
853: ctxt->cur++;
854: } else if (ctxt->cur[0] == '&') {
855: needSubst = 1; /* TODO !!! */
856: ctxt->cur++;
857: } else
858: ctxt->cur++;
859: }
860: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 861: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 862: } else {
863: ret = xmlStrndup(q, ctxt->cur - q);
864: ctxt->cur++;
865: }
866: } else if (ctxt->cur[0] == '\'') {
867: ctxt->cur++;
868: q = ctxt->cur;
869: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\'')) {
870: if (ctxt->cur[0] == '%') {
871: needSubst = 1; /* TODO !!! */
872: ctxt->cur++;
873: } else if (ctxt->cur[0] == '&') {
874: needSubst = 1; /* TODO !!! */
875: ctxt->cur++;
876: } else
877: ctxt->cur++;
878: }
879: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 880: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 881: } else {
882: ret = xmlStrndup(q, ctxt->cur - q);
883: ctxt->cur++;
884: }
885: } else {
1.31 ! daniel 886: xmlParserError(ctxt, "AttValue: \" or ' expected\n");
1.29 daniel 887: }
888:
889: return(ret);
890: }
891:
892: /*
1.21 daniel 893: * xmlParseSystemLiteral : parse an XML Literal
894: *
1.22 daniel 895: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.21 daniel 896: */
897:
898: CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
899: const CHAR *q;
900: CHAR *ret = NULL;
901:
902: if (ctxt->cur[0] == '"') {
903: ctxt->cur++;
904: q = ctxt->cur;
1.22 daniel 905: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"'))
1.21 daniel 906: ctxt->cur++;
1.22 daniel 907: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 908: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 909: } else {
910: ret = xmlStrndup(q, ctxt->cur - q);
911: ctxt->cur++;
912: }
913: } else if (ctxt->cur[0] == '\'') {
914: ctxt->cur++;
915: q = ctxt->cur;
1.22 daniel 916: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
1.21 daniel 917: ctxt->cur++;
1.22 daniel 918: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 919: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 920: } else {
921: ret = xmlStrndup(q, ctxt->cur - q);
922: ctxt->cur++;
923: }
924: } else {
1.31 ! daniel 925: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 926: }
927:
928: return(ret);
929: }
930:
931: /*
1.27 daniel 932: * xmlParsePubidLiteral: parse an XML public literal
1.21 daniel 933: *
1.22 daniel 934: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1.21 daniel 935: */
936:
937: CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
938: const CHAR *q;
939: CHAR *ret = NULL;
940: /*
941: * Name ::= (Letter | '_') (NameChar)*
942: */
943: if (ctxt->cur[0] == '"') {
944: ctxt->cur++;
945: q = ctxt->cur;
946: while (IS_PUBIDCHAR(ctxt->cur[0])) ctxt->cur++;
947: if (ctxt->cur[0] != '"') {
1.31 ! daniel 948: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 949: } else {
950: ret = xmlStrndup(q, ctxt->cur - q);
951: ctxt->cur++;
952: }
953: } else if (ctxt->cur[0] == '\'') {
954: ctxt->cur++;
955: q = ctxt->cur;
956: while ((IS_LETTER(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
957: ctxt->cur++;
958: if (!IS_LETTER(ctxt->cur[0])) {
1.31 ! daniel 959: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 960: } else {
961: ret = xmlStrndup(q, ctxt->cur - q);
962: ctxt->cur++;
963: }
964: } else {
1.31 ! daniel 965: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 966: }
967:
968: return(ret);
969: }
970:
971: /*
1.27 daniel 972: * xmlParseCharData: parse a CharData section.
973: * if we are within a CDATA section ']]>' marks an end of section.
974: *
975: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
976: */
977:
978: CHAR *xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
979: const CHAR *q;
980: CHAR *ret = NULL;
981:
982: q = ctxt->cur;
983: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '<') &&
984: (ctxt->cur[0] != '&')) {
985: ctxt->cur++;
986: if ((cdata) && (ctxt->cur[0] == ']') && (ctxt->cur[1] == ']') &&
987: (ctxt->cur[2] == '>')) break;
988: }
989: if (q == ctxt->cur) return(NULL);
990: ret = xmlStrndup(q, ctxt->cur - q);
991: return(ret);
992: }
993:
994: /*
1.22 daniel 995: * xmlParseExternalID: Parse an External ID
996: *
997: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
998: * | 'PUBLIC' S PubidLiteral S SystemLiteral
999: */
1000:
1001: CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **PubidLiteral) {
1002: CHAR *ExternalID = NULL;
1003:
1004: *PubidLiteral = NULL;
1005: if ((ctxt->cur[0] == 'S') && (ctxt->cur[1] == 'Y') &&
1006: (ctxt->cur[2] == 'S') && (ctxt->cur[3] == 'T') &&
1007: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M')) {
1008: ctxt->cur += 6;
1009: SKIP_BLANKS(ctxt->cur);
1010: ExternalID = xmlParseSystemLiteral(ctxt);
1011: if (ExternalID == NULL)
1.31 ! daniel 1012: xmlParserError(ctxt,
! 1013: "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
1.22 daniel 1014: } else if ((ctxt->cur[0] == 'P') && (ctxt->cur[1] == 'U') &&
1015: (ctxt->cur[2] == 'B') && (ctxt->cur[3] == 'L') &&
1016: (ctxt->cur[4] == 'I') && (ctxt->cur[5] == 'C')) {
1017: ctxt->cur += 6;
1018: SKIP_BLANKS(ctxt->cur);
1019: *PubidLiteral = xmlParsePubidLiteral(ctxt);
1020: if (*PubidLiteral == NULL)
1.31 ! daniel 1021: xmlParserError(ctxt,
! 1022: "xmlParseExternalID: PUBLIC, no PubidLiteral\n");
1.22 daniel 1023: SKIP_BLANKS(ctxt->cur);
1024: ExternalID = xmlParseSystemLiteral(ctxt);
1025: if (ExternalID == NULL)
1.31 ! daniel 1026: xmlParserError(ctxt,
! 1027: "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
1.22 daniel 1028: }
1029: return(ExternalID);
1030: }
1031:
1032: /*
1.1 veillard 1033: * Parse and return a string between quotes or doublequotes
1034: */
1.16 daniel 1035: CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.1 veillard 1036: CHAR *ret = NULL;
1.17 daniel 1037: const CHAR *q;
1.1 veillard 1038:
1.16 daniel 1039: if (ctxt->cur[0] == '"') {
1040: ctxt->cur++;
1041: q = ctxt->cur;
1042: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++;
1043: if (ctxt->cur[0] != '"')
1.31 ! daniel 1044: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1.1 veillard 1045: else {
1.16 daniel 1046: ret = xmlStrndup(q, ctxt->cur - q);
1047: ctxt->cur++;
1.1 veillard 1048: }
1.16 daniel 1049: } else if (ctxt->cur[0] == '\''){
1050: ctxt->cur++;
1051: q = ctxt->cur;
1052: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++;
1053: if (ctxt->cur[0] != '\'')
1.31 ! daniel 1054: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1.1 veillard 1055: else {
1.16 daniel 1056: ret = xmlStrndup(q, ctxt->cur - q);
1057: ctxt->cur++;
1.1 veillard 1058: }
1059: }
1060: return(ret);
1061: }
1062:
1063: /*
1.3 veillard 1064: * Skip an XML (SGML) comment <!-- .... -->
1.31 ! daniel 1065: * This may or may not create a node (depending on the context)
1.22 daniel 1066: *
1067: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 1068: */
1.31 ! daniel 1069: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
! 1070: xmlNodePtr ret = NULL;
1.17 daniel 1071: const CHAR *q, *start;
1072: const CHAR *r;
1.3 veillard 1073:
1074: /*
1.22 daniel 1075: * Check that there is a comment right here.
1.3 veillard 1076: */
1.16 daniel 1077: if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '!') ||
1.31 ! daniel 1078: (ctxt->cur[2] != '-') || (ctxt->cur[3] != '-')) return(NULL);
1.3 veillard 1079:
1.16 daniel 1080: ctxt->cur += 4;
1081: start = q = ctxt->cur;
1082: ctxt->cur++;
1083: r = ctxt->cur;
1084: ctxt->cur++;
1085: while (IS_CHAR(ctxt->cur[0]) &&
1086: ((ctxt->cur[0] == ':') || (ctxt->cur[0] != '>') ||
1087: (*r != '-') || (*q != '-'))) {
1088: ctxt->cur++;r++;q++;
1.3 veillard 1089: }
1.16 daniel 1090: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 1091: xmlParserError(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.16 daniel 1092: ctxt->cur = start; /* !!! We shouldn't really try to recover !!! */
1.3 veillard 1093: } else {
1.16 daniel 1094: ctxt->cur++;
1.31 ! daniel 1095: if (create) {
! 1096: ret = xmlNewComment(xmlStrndup(start, q - start));
! 1097: }
1.3 veillard 1098: }
1099: }
1100:
1101: /*
1.13 veillard 1102: * xmlParseNamespace: parse specific '<?namespace ...' constructs.
1.22 daniel 1103: *
1.29 daniel 1104: * TODO !!!!!!!!!!
1105: *
1106: * This is what the older xml-name Working Draft specified, a bunch of
1107: * other stuff may still rely on it, so support is still here as
1108: * if ot was declared on the root of the Tree:-(
1.1 veillard 1109: */
1110:
1.16 daniel 1111: void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.1 veillard 1112: CHAR *href = NULL;
1113: CHAR *AS = NULL;
1.3 veillard 1114: int garbage = 0;
1.1 veillard 1115:
1116: /*
1.18 daniel 1117: * We just skipped "namespace" or "xml:namespace"
1.1 veillard 1118: */
1.16 daniel 1119: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1120:
1.16 daniel 1121: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '>')) {
1.1 veillard 1122: /*
1.18 daniel 1123: * We can have "ns" or "prefix" attributes
1124: * Old encoding as 'href' or 'AS' attributes is still supported
1.1 veillard 1125: */
1.18 daniel 1126: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 's')) {
1127: garbage = 0;
1128: ctxt->cur += 2;
1129: SKIP_BLANKS(ctxt->cur);
1130:
1131: if (ctxt->cur[0] != '=') continue;
1132: ctxt->cur++;
1133: SKIP_BLANKS(ctxt->cur);
1134:
1135: href = xmlParseQuotedString(ctxt);
1136: SKIP_BLANKS(ctxt->cur);
1137: } else if ((ctxt->cur[0] == 'h') && (ctxt->cur[1] == 'r') &&
1.16 daniel 1138: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f')) {
1.3 veillard 1139: garbage = 0;
1.16 daniel 1140: ctxt->cur += 4;
1141: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1142:
1.16 daniel 1143: if (ctxt->cur[0] != '=') continue;
1144: ctxt->cur++;
1145: SKIP_BLANKS(ctxt->cur);
1146:
1147: href = xmlParseQuotedString(ctxt);
1148: SKIP_BLANKS(ctxt->cur);
1.18 daniel 1149: } else if ((ctxt->cur[0] == 'p') && (ctxt->cur[1] == 'r') &&
1150: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f') &&
1151: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'x')) {
1152: garbage = 0;
1153: ctxt->cur += 6;
1154: SKIP_BLANKS(ctxt->cur);
1155:
1156: if (ctxt->cur[0] != '=') continue;
1157: ctxt->cur++;
1158: SKIP_BLANKS(ctxt->cur);
1159:
1160: AS = xmlParseQuotedString(ctxt);
1161: SKIP_BLANKS(ctxt->cur);
1.16 daniel 1162: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'S')) {
1.3 veillard 1163: garbage = 0;
1.16 daniel 1164: ctxt->cur += 2;
1165: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1166:
1.16 daniel 1167: if (ctxt->cur[0] != '=') continue;
1168: ctxt->cur++;
1169: SKIP_BLANKS(ctxt->cur);
1170:
1171: AS = xmlParseQuotedString(ctxt);
1172: SKIP_BLANKS(ctxt->cur);
1173: } else if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
1.3 veillard 1174: garbage = 0;
1.16 daniel 1175: ctxt->cur ++;
1.1 veillard 1176: } else {
1.3 veillard 1177: /*
1178: * Found garbage when parsing the namespace
1179: */
1.31 ! daniel 1180: if (!garbage)
! 1181: xmlParserError(ctxt, "xmlParseNamespace found garbage\n");
1.16 daniel 1182: ctxt->cur++;
1.1 veillard 1183: }
1184: }
1185:
1.16 daniel 1186: MOVETO_ENDTAG(ctxt->cur);
1187: ctxt->cur++;
1.1 veillard 1188:
1189: /*
1190: * Register the DTD.
1191: */
1192: if (href != NULL)
1.16 daniel 1193: xmlNewDtd(ctxt->doc, href, AS);
1.1 veillard 1194:
1.8 veillard 1195: if (AS != NULL) free(AS);
1196: if (href != NULL) free(href);
1.1 veillard 1197: }
1198:
1199: /*
1.22 daniel 1200: * xmlParsePITarget: parse the name of a PI
1201: *
1202: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1203: */
1204:
1205: CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1206: CHAR *name;
1207:
1208: name = xmlParseName(ctxt);
1209: if ((name != NULL) && (name[3] == 0) &&
1210: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 ! daniel 1211: ((name[1] == 'm') || (name[1] == 'M')) &&
! 1212: ((name[2] == 'l') || (name[2] == 'L'))) {
! 1213: xmlParserError(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1214: return(NULL);
1215: }
1216: return(name);
1217: }
1218:
1219: /*
1.3 veillard 1220: * xmlParsePI: parse an XML Processing Instruction.
1.22 daniel 1221: *
1222: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.3 veillard 1223: */
1224:
1.16 daniel 1225: void xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1226: CHAR *target;
1227:
1.16 daniel 1228: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
1.3 veillard 1229: /*
1230: * this is a Processing Instruction.
1231: */
1.16 daniel 1232: ctxt->cur += 2;
1.3 veillard 1233:
1234: /*
1.22 daniel 1235: * Parse the target name and check for special support like
1236: * namespace.
1237: *
1238: * TODO : PI handling should be dynamically redefinable using an
1239: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1240: */
1.22 daniel 1241: target = xmlParsePITarget(ctxt);
1242: if (target != NULL) {
1243: /*
1244: * Support for the Processing Instruction related to namespace.
1245: */
1246: if ((target[0] == 'n') && (target[1] == 'a') &&
1247: (target[2] == 'm') && (target[3] == 'e') &&
1248: (target[4] == 's') && (target[5] == 'p') &&
1249: (target[6] == 'a') && (target[7] == 'c') &&
1250: (target[8] == 'e')) {
1251: xmlParseNamespace(ctxt);
1252: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1253: (target[2] == 'l') && (target[3] == ':') &&
1254: (target[4] == 'n') && (target[5] == 'a') &&
1255: (target[6] == 'm') && (target[7] == 'e') &&
1256: (target[8] == 's') && (target[9] == 'p') &&
1257: (target[10] == 'a') && (target[11] == 'c') &&
1258: (target[12] == 'e')) {
1259: xmlParseNamespace(ctxt);
1260: } else {
1261: /* Unknown PI, ignore it ! */
1.31 ! daniel 1262: xmlParserError(ctxt, "xmlParsePI : skipping unknown PI %s\n",
! 1263: target);
1.22 daniel 1264: while (IS_CHAR(ctxt->cur[0]) &&
1.24 daniel 1265: ((ctxt->cur[0] != '?') || (ctxt->cur[1] != '>')))
1.22 daniel 1266: ctxt->cur++;
1267: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 1268: xmlParserError(ctxt, "xmlParsePI: PI %s never end ...\n",
! 1269: target);
1.24 daniel 1270: } else
1271: ctxt->cur += 2;
1.22 daniel 1272: }
1.3 veillard 1273: } else {
1.31 ! daniel 1274: xmlParserError(ctxt, "xmlParsePI : no target name\n");
1.22 daniel 1275: /********* Should we try to complete parsing the PI ???
1276: while (IS_CHAR(ctxt->cur[0]) &&
1277: (ctxt->cur[0] != '?') && (ctxt->cur[0] != '>'))
1278: ctxt->cur++;
1279: if (!IS_CHAR(ctxt->cur[0])) {
1280: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1281: target);
1282: }
1283: ********************************************************/
1284: }
1285: }
1286: }
1287:
1288: /*
1289: * xmlParseNotationDecl: parse a notation declaration
1290: *
1291: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1292: *
1293: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1294: *
1295: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1296: * 'PUBLIC' S PubidLiteral S SystemLiteral
1297: *
1298: * Hence there is actually 3 choices:
1299: * 'PUBLIC' S PubidLiteral
1300: * 'PUBLIC' S PubidLiteral S SystemLiteral
1301: * and 'SYSTEM' S SystemLiteral
1302: */
1303:
1304: void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1305: CHAR *name;
1306:
1307: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1308: (ctxt->cur[2] == 'N') && (ctxt->cur[3] == 'O') &&
1309: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'A') &&
1310: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'I') &&
1311: (ctxt->cur[8] == 'O') && (ctxt->cur[9] == 'N') &&
1312: (IS_BLANK(ctxt->cur[10]))) {
1313: ctxt->cur += 10;
1314: SKIP_BLANKS(ctxt->cur);
1315:
1316: name = xmlParseName(ctxt);
1317: if (name == NULL) {
1.31 ! daniel 1318: xmlParserError(ctxt,
! 1319: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1320: return;
1321: }
1322: SKIP_BLANKS(ctxt->cur);
1323: /*
1.31 ! daniel 1324: * TODO !!!
1.22 daniel 1325: */
1326: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1327: ctxt->cur++;
1328: free(name);
1329: }
1330: }
1331:
1332: /*
1333: * xmlParseEntityDecl: parse <!ENTITY declarations
1334: *
1335: * [70] EntityDecl ::= GEDecl | PEDecl
1336: *
1337: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1338: *
1339: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1340: *
1341: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1342: *
1343: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1344: *
1345: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1346: */
1347:
1348: void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1349: CHAR *name;
1.24 daniel 1350: CHAR *value = NULL;
1351: CHAR *id = NULL, *literal = NULL;
1352: CHAR *ndata = NULL;
1.22 daniel 1353: int typePEDef = 0;
1354:
1355: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1356: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'N') &&
1357: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
1358: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'Y') &&
1359: (IS_BLANK(ctxt->cur[8]))) {
1360: ctxt->cur += 8;
1361: SKIP_BLANKS(ctxt->cur);
1362:
1363: if (ctxt->cur[0] == '%') {
1.16 daniel 1364: ctxt->cur++;
1.22 daniel 1365: SKIP_BLANKS(ctxt->cur);
1366: typePEDef = 1;
1367: }
1368:
1369: name = xmlParseName(ctxt);
1.24 daniel 1370: if (name == NULL) {
1.31 ! daniel 1371: xmlParserError(ctxt, "xmlParseEntityDecl: no name\n");
1.24 daniel 1372: return;
1373: }
1374: SKIP_BLANKS(ctxt->cur);
1375:
1.22 daniel 1376: /*
1.24 daniel 1377: * TODO handle the various case of definitions...
1.22 daniel 1378: */
1.24 daniel 1379: if (typePEDef) {
1380: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
1381: value = xmlParseEntityValue(ctxt);
1382: else {
1383: id = xmlParseExternalID(ctxt, &literal);
1384: }
1385: } else {
1386: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
1387: value = xmlParseEntityValue(ctxt);
1388: else {
1389: id = xmlParseExternalID(ctxt, &literal);
1390: SKIP_BLANKS(ctxt->cur);
1391: if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'D') &&
1392: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1393: (ctxt->cur[4] == 'A')) {
1394: ndata = xmlParseName(ctxt);
1395: }
1396: }
1397: }
1398: SKIP_BLANKS(ctxt->cur);
1399: if (ctxt->cur[0] != '>') {
1.31 ! daniel 1400: xmlParserError(ctxt,
! 1401: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.24 daniel 1402: } else
1.22 daniel 1403: ctxt->cur++;
1404: }
1405: }
1406:
1407: /*
1408: * xmlParseEnumeratedType: parse and Enumerated attribute type.
1409: *
1410: * [57] EnumeratedType ::= NotationType | Enumeration
1411: *
1412: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1413: *
1414: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1415: */
1416:
1417: void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1418: /*
1419: * TODO !!!
1420: */
1421: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1422: ctxt->cur++;
1423: }
1424:
1425: /*
1426: * xmlParseAttributeType: parse the Attribute list def for an element
1427: *
1428: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1429: *
1430: * [55] StringType ::= 'CDATA'
1431: *
1432: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1433: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1434: */
1435: void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1436: if ((ctxt->cur[0] == 'C') && (ctxt->cur[1] == 'D') &&
1437: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1438: (ctxt->cur[4] == 'A')) {
1439: ctxt->cur += 5;
1440: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D')) {
1441: ctxt->cur += 2;
1442: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
1443: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
1444: (ctxt->cur[4] == 'F')) {
1445: ctxt->cur += 5;
1446: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
1447: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
1448: (ctxt->cur[4] == 'F') && (ctxt->cur[5] == 'S')) {
1449: ctxt->cur += 6;
1450: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
1451: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
1452: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'Y')) {
1453: ctxt->cur += 6;
1454: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
1455: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
1456: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
1457: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'S')) {
1458: ctxt->cur += 8;
1459: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
1460: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
1461: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
1462: (ctxt->cur[6] == 'N')) {
1463: ctxt->cur += 7;
1464: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
1465: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
1466: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
1467: (ctxt->cur[6] == 'N') && (ctxt->cur[7] == 'S')) {
1468: } else {
1469: xmlParseEnumeratedType(ctxt, name);
1470: }
1471: }
1472:
1473: /*
1474: * xmlParseAttributeListDecl: parse the Attribute list def for an element
1475: *
1476: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1477: *
1478: * [53] AttDef ::= S Name S AttType S DefaultDecl
1479: */
1480: void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1481: CHAR *name;
1482:
1483: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1484: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1485: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'L') &&
1486: (ctxt->cur[6] == 'I') && (ctxt->cur[7] == 'S') &&
1487: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
1488: ctxt->cur += 9;
1489: SKIP_BLANKS(ctxt->cur);
1490: name = xmlParseName(ctxt);
1491: if (name == NULL) {
1.31 ! daniel 1492: xmlParserError(ctxt,
! 1493: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1494: return;
1495: }
1496: SKIP_BLANKS(ctxt->cur);
1497: while (ctxt->cur[0] != '>') {
1498: const CHAR *check = ctxt->cur;
1499:
1500: xmlParseAttributeType(ctxt, name);
1501: SKIP_BLANKS(ctxt->cur);
1502: if (check == ctxt->cur) {
1.31 ! daniel 1503: xmlParserError(ctxt,
! 1504: "xmlParseAttributeListDecl: detected error\n");
1.22 daniel 1505: break;
1506: }
1507: }
1508: if (ctxt->cur[0] == '>')
1509: ctxt->cur++;
1510:
1511: free(name);
1512: }
1513: }
1514:
1515: /*
1516: * xmlParseElementContentDecl: parse the declaration for an Element content
1517: * either Mixed or Children, the cases EMPTY and ANY being handled
1518: * int xmlParseElementDecl.
1519: *
1520: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
1521: *
1522: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1523: *
1524: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
1525: *
1526: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
1527: *
1528: * or
1529: *
1530: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
1531: * '(' S? '#PCDATA' S? ')'
1532: */
1533:
1534: void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1535: /*
1536: * TODO This has to be parsed correctly, currently we just skip until
1537: * we reach the first '>'.
1.31 ! daniel 1538: * !!!
1.22 daniel 1539: */
1540: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1541: ctxt->cur++;
1542: }
1543:
1544: /*
1545: * xmlParseElementDecl: parse an Element declaration.
1546: *
1547: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1548: *
1549: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1550: *
1551: * TODO There is a check [ VC: Unique Element Type Declaration ]
1552: */
1553: void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1554: CHAR *name;
1555:
1556: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1557: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'L') &&
1558: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M') &&
1559: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'N') &&
1560: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
1561: ctxt->cur += 9;
1562: SKIP_BLANKS(ctxt->cur);
1563: name = xmlParseName(ctxt);
1564: if (name == NULL) {
1.31 ! daniel 1565: xmlParserError(ctxt, "xmlParseElementDecl: no name for Element\n");
1.22 daniel 1566: return;
1567: }
1568: SKIP_BLANKS(ctxt->cur);
1569: if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'M') &&
1570: (ctxt->cur[2] == 'P') && (ctxt->cur[3] == 'T') &&
1571: (ctxt->cur[4] == 'Y')) {
1572: ctxt->cur += 5;
1573: /*
1574: * Element must always be empty.
1575: */
1576: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'N') &&
1577: (ctxt->cur[2] == 'Y')) {
1578: ctxt->cur += 3;
1579: /*
1580: * Element is a generic container.
1581: */
1582: } else {
1583: xmlParseElementContentDecl(ctxt, name);
1584: }
1585: SKIP_BLANKS(ctxt->cur);
1586: if (ctxt->cur[0] != '>') {
1.31 ! daniel 1587: xmlParserError(ctxt,
! 1588: "xmlParseElementDecl: expected '>' at the end\n");
1.22 daniel 1589: } else
1590: ctxt->cur++;
1591: }
1592: }
1593:
1594: /*
1595: * xmlParseMarkupDecl: parse Markup declarations
1596: *
1597: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
1598: * NotationDecl | PI | Comment
1599: *
1600: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
1601: */
1602: void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1603: xmlParseElementDecl(ctxt);
1604: xmlParseAttributeListDecl(ctxt);
1605: xmlParseEntityDecl(ctxt);
1606: xmlParseNotationDecl(ctxt);
1607: xmlParsePI(ctxt);
1.31 ! daniel 1608: xmlParseComment(ctxt, 0);
1.22 daniel 1609: }
1610:
1611: /*
1.24 daniel 1612: * xmlParseCharRef: parse Reference declarations
1613: *
1614: * [66] CharRef ::= '&#' [0-9]+ ';' |
1615: * '&#x' [0-9a-fA-F]+ ';'
1616: */
1617: CHAR xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 1618: int val = 0;
1.24 daniel 1619: CHAR ret = 0;
1620:
1621: if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#') &&
1622: (ctxt->cur[2] == 'x')) {
1623: ctxt->cur += 3;
1624: while (ctxt->cur[0] != ';') {
1625: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
1.29 daniel 1626: val = val * 16 + (ctxt->cur[0] - '0');
1.24 daniel 1627: else if ((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'f'))
1.29 daniel 1628: val = val * 16 + (ctxt->cur[0] - 'a') + 10;
1.24 daniel 1629: else if ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'F'))
1.29 daniel 1630: val = val * 16 + (ctxt->cur[0] - 'A') + 10;
1.24 daniel 1631: else {
1.31 ! daniel 1632: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1633: val = 0;
1.24 daniel 1634: break;
1635: }
1636: }
1637: if (ctxt->cur[0] != ';')
1638: ctxt->cur++;
1639: } else if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#')) {
1640: ctxt->cur += 2;
1641: while (ctxt->cur[0] != ';') {
1642: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
1.29 daniel 1643: val = val * 16 + (ctxt->cur[0] - '0');
1.24 daniel 1644: else {
1.31 ! daniel 1645: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1646: val = 0;
1.24 daniel 1647: break;
1648: }
1649: }
1650: if (ctxt->cur[0] != ';')
1651: ctxt->cur++;
1652: } else {
1.31 ! daniel 1653: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.24 daniel 1654: }
1.29 daniel 1655: /*
1656: * Check the value IS_CHAR ...
1657: */
1658: if (IS_CHAR(val))
1659: ret = (CHAR) val;
1660: else {
1661: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
1662: ctxt->cur - 10);
1663:
1664: ret = '?';
1665: }
1.24 daniel 1666: return(ret);
1667: }
1668:
1669: /*
1670: * xmlParseEntityRef: parse ENTITY references declarations
1671: *
1672: * [68] EntityRef ::= '&' Name ';'
1673: */
1674: CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1675: CHAR *name;
1676:
1677: if (ctxt->cur[0] == '&') {
1678: ctxt->cur++;
1679: name = xmlParseName(ctxt);
1680: if (name == NULL) {
1.31 ! daniel 1681: xmlParserError(ctxt, "xmlParseEntityRef: no name\n");
1.24 daniel 1682: } else {
1683: if (ctxt->cur[0] == ';') {
1684: ctxt->cur++;
1685: /*
1686: * TODO there is a VC check here !!!
1687: * [ VC: Entity Declared ]
1688: */
1689: free(name);
1690: } else {
1.31 ! daniel 1691: xmlParserError(ctxt, "xmlParseEntityRef: expecting ';'\n");
1.24 daniel 1692: }
1693: }
1694: }
1.25 daniel 1695: return(NULL); /* TODO !!!! */
1.24 daniel 1696: }
1697:
1698: /*
1699: * xmlParseReference: parse Reference declarations
1700: *
1701: * [67] Reference ::= EntityRef | CharRef
1702: */
1703: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt) {
1704: CHAR *name;
1705:
1706: if (ctxt->cur[0] == '&') {
1707: return(xmlParseEntityRef(ctxt));
1708: } else {
1709: ctxt->cur++;
1710: name = xmlParseName(ctxt);
1711: if (name == NULL) {
1.31 ! daniel 1712: xmlParserError(ctxt, "xmlParseReference: no name\n");
1.24 daniel 1713: } else {
1714: if (ctxt->cur[0] == ';') {
1715: ctxt->cur++;
1716: /*
1717: * TODO there is a VC check here !!!
1718: * [ VC: Entity Declared ]
1719: */
1720: free(name);
1721: } else {
1.31 ! daniel 1722: xmlParserError(ctxt, "xmlParseReference: expecting ';'\n");
1.24 daniel 1723: }
1724: }
1725: }
1.25 daniel 1726: return(NULL); /* TODO !!!! */
1.24 daniel 1727: }
1728:
1729: /*
1.22 daniel 1730: * xmlParsePEReference: parse PEReference declarations
1731: *
1732: * [69] PEReference ::= '%' Name ';'
1733: */
1.24 daniel 1734: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 1735: CHAR *name;
1736:
1737: if (ctxt->cur[0] == '%') {
1738: ctxt->cur++;
1739: name = xmlParseName(ctxt);
1740: if (name == NULL) {
1.31 ! daniel 1741: xmlParserError(ctxt, "xmlParsePEReference: no name\n");
1.22 daniel 1742: } else {
1743: if (ctxt->cur[0] == ';') {
1744: ctxt->cur++;
1745: /*
1746: * TODO there is a VC check here !!!
1747: * [ VC: Entity Declared ]
1748: */
1749: free(name);
1750: } else {
1.31 ! daniel 1751: xmlParserError(ctxt, "xmlParsePEReference: expecting ';'\n");
1.22 daniel 1752: }
1.3 veillard 1753: }
1754: }
1.25 daniel 1755: return(NULL); /* TODO !!!! */
1.3 veillard 1756: }
1757:
1758: /*
1.21 daniel 1759: * xmlParseDocTypeDecl : parse a DOCTYPE declaration
1760: *
1.22 daniel 1761: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
1762: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 1763: */
1764:
1765: void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1766: CHAR *name;
1767: CHAR *ExternalID = NULL;
1.22 daniel 1768: CHAR *SystemID = NULL;
1.21 daniel 1769:
1770: /*
1771: * We know that '<!DOCTYPE' has been detected.
1772: */
1773: ctxt->cur += 9;
1774:
1775: SKIP_BLANKS(ctxt->cur);
1776:
1777: /*
1778: * Parse the DOCTYPE name.
1779: */
1780: name = xmlParseName(ctxt);
1781: if (name == NULL) {
1.31 ! daniel 1782: xmlParserError(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.21 daniel 1783: }
1784:
1785: SKIP_BLANKS(ctxt->cur);
1786:
1787: /*
1.22 daniel 1788: * Check for SystemID and ExternalID
1789: */
1790: SystemID = xmlParseExternalID(ctxt, &ExternalID);
1791: SKIP_BLANKS(ctxt->cur);
1792:
1793: /*
1794: * Is there any DTD definition ?
1795: */
1796: if (ctxt->cur[0] == '[') {
1797: ctxt->cur++;
1798: /*
1799: * Parse the succession of Markup declarations and
1800: * PEReferences.
1801: * Subsequence (markupdecl | PEReference | S)*
1802: */
1803: while (ctxt->cur[0] != ']') {
1804: const CHAR *check = ctxt->cur;
1805:
1806: SKIP_BLANKS(ctxt->cur);
1807: xmlParseMarkupDecl(ctxt);
1808: xmlParsePEReference(ctxt);
1809:
1810: if (ctxt->cur == check) {
1.31 ! daniel 1811: xmlParserError(ctxt,
! 1812: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.22 daniel 1813: break;
1814: }
1815: }
1816: if (ctxt->cur[0] == ']') ctxt->cur++;
1817: }
1818:
1819: /*
1820: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 1821: */
1.22 daniel 1822: if (ctxt->cur[0] != '>') {
1.31 ! daniel 1823: xmlParserError(ctxt, "DOCTYPE unproperly terminated\n");
1.22 daniel 1824: /* We shouldn't try to resynchronize ... */
1.21 daniel 1825: }
1.22 daniel 1826: ctxt->cur++;
1827:
1828: /*
1829: * Cleanup, since we don't use all those identifiers
1830: * TODO : the DOCTYPE if available should be stored !
1831: */
1832: if (SystemID != NULL) free(SystemID);
1833: if (ExternalID != NULL) free(ExternalID);
1834: if (name != NULL) free(name);
1.21 daniel 1835: }
1836:
1837: /*
1.3 veillard 1838: * xmlParseAttribute: parse a start of tag.
1839: *
1.22 daniel 1840: * [41] Attribute ::= Name Eq AttValue
1841: *
1842: * [25] Eq ::= S? '=' S?
1843: *
1.29 daniel 1844: * With namespace:
1845: *
1846: * [NS 11] Attribute ::= QName Eq AttValue
1.3 veillard 1847: */
1848:
1.16 daniel 1849: void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 1850: CHAR *name, *value = NULL;
1.29 daniel 1851: CHAR *ns;
1.3 veillard 1852:
1.29 daniel 1853: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 1854: if (name == NULL) {
1.31 ! daniel 1855: xmlParserError(ctxt, "error parsing attribute name\n");
1.29 daniel 1856: return;
1.3 veillard 1857: }
1.22 daniel 1858: /*
1859: * TODO: Check for Namespace ...
1860: */
1.29 daniel 1861: if (ns != NULL) {
1.31 ! daniel 1862: xmlParserError(ctxt,
! 1863: "Internal: xmlParseAttribute: don't handle attributes namespace\n");
1.29 daniel 1864: free(ns);
1865: }
1.3 veillard 1866:
1867: /*
1.29 daniel 1868: * read the value
1.3 veillard 1869: */
1.16 daniel 1870: SKIP_BLANKS(ctxt->cur);
1871: if (ctxt->cur[0] == '=') {
1872: ctxt->cur++;
1873: SKIP_BLANKS(ctxt->cur);
1.29 daniel 1874: value = xmlParseAttValue(ctxt);
1875: } else {
1.31 ! daniel 1876: xmlParserError(ctxt, "Specification mandate value for attribute %s\n",
! 1877: name);
1.3 veillard 1878: }
1879:
1880: /*
1881: * Add the attribute to the node.
1882: */
1.17 daniel 1883: if (name != NULL) {
1.3 veillard 1884: xmlNewProp(node, name, value);
1.17 daniel 1885: free(name);
1886: }
1.29 daniel 1887: if (value != NULL)
1.17 daniel 1888: free(value);
1.3 veillard 1889: }
1890:
1891: /*
1.29 daniel 1892: * xmlParseStartTag: parse a start of tag either for rule element or
1893: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 1894: *
1895: * [40] STag ::= '<' Name (S Attribute)* S? '>'
1896: *
1.29 daniel 1897: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1898: *
1899: * With namespace:
1900: *
1901: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
1902: *
1903: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.2 veillard 1904: */
1905:
1.16 daniel 1906: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.17 daniel 1907: const CHAR *q;
1908: CHAR *ns, *name;
1.3 veillard 1909: xmlDtdPtr dtd = NULL;
1.2 veillard 1910: xmlNodePtr ret = NULL;
1911:
1.16 daniel 1912: if (ctxt->cur[0] != '<') return(NULL);
1913: ctxt->cur++;
1.3 veillard 1914:
1.29 daniel 1915: name = xmlNamespaceParseQName(ctxt, &ns);
1916: if (ns != NULL) {
1.3 veillard 1917: /*
1918: * Search the DTD associated to ns.
1919: */
1.16 daniel 1920: dtd = xmlSearchDtd(ctxt->doc, ns);
1.3 veillard 1921: if (dtd == NULL)
1.31 ! daniel 1922: xmlParserError(ctxt, "Start tag : Couldn't find namespace %s\n",ns);
1.3 veillard 1923: free(ns);
1.29 daniel 1924: }
1.3 veillard 1925:
1926: ret = xmlNewNode(dtd, name, NULL);
1.2 veillard 1927:
1.3 veillard 1928: /*
1929: * Now parse the attributes, it ends up with the ending
1930: *
1931: * (S Attribute)* S?
1932: */
1.16 daniel 1933: SKIP_BLANKS(ctxt->cur);
1934: while ((IS_CHAR(ctxt->cur[0])) &&
1935: (ctxt->cur[0] != '>') &&
1936: ((ctxt->cur[0] != '/') || (ctxt->cur[1] != '>'))) {
1.29 daniel 1937: const CHAR *q = ctxt->cur;
1938:
1939: xmlParseAttribute(ctxt, ret);
1940: SKIP_BLANKS(ctxt->cur);
1941:
1942: if (q == ctxt->cur) {
1.31 ! daniel 1943: xmlParserError(ctxt,
! 1944: "xmlParseStartTag: problem parsing attributes\n");
1.29 daniel 1945: break;
1.3 veillard 1946: }
1947: }
1948:
1949: return(ret);
1950: }
1951:
1952: /*
1.27 daniel 1953: * xmlParseEndTag: parse an end of tag
1954: *
1955: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 1956: *
1957: * With namespace
1958: *
1959: * [9] ETag ::= '</' QName S? '>'
1.7 veillard 1960: */
1961:
1.16 daniel 1962: void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlDtdPtr *dtdPtr, CHAR **tagPtr) {
1.17 daniel 1963: const CHAR *q;
1964: CHAR *ns, *name;
1.7 veillard 1965: xmlDtdPtr dtd = NULL;
1966:
1967: *dtdPtr = NULL;
1968: *tagPtr = NULL;
1969:
1.27 daniel 1970: if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/')) {
1.31 ! daniel 1971: xmlParserError(ctxt, "xmlParseEndTag: '</' not found\n");
1.27 daniel 1972: return;
1973: }
1974: ctxt->cur += 2;
1.7 veillard 1975:
1.29 daniel 1976: name = xmlNamespaceParseQName(ctxt, &ns);
1977: if (ns != NULL) {
1.7 veillard 1978: /*
1979: * Search the DTD associated to ns.
1980: */
1.16 daniel 1981: dtd = xmlSearchDtd(ctxt->doc, ns);
1.7 veillard 1982: if (dtd == NULL)
1.31 ! daniel 1983: xmlParserError(ctxt, "End tag : Couldn't find namespace %s\n", ns);
1.7 veillard 1984: free(ns);
1.29 daniel 1985: }
1.7 veillard 1986:
1987: *dtdPtr = dtd;
1988: *tagPtr = name;
1989:
1990: /*
1991: * We should definitely be at the ending "S? '>'" part
1992: */
1.16 daniel 1993: SKIP_BLANKS(ctxt->cur);
1994: if ((!IS_CHAR(ctxt->cur[0])) || (ctxt->cur[0] != '>')) {
1.31 ! daniel 1995: xmlParserError(ctxt, "End tag : expected '>'\n");
1.7 veillard 1996: } else
1.16 daniel 1997: ctxt->cur++;
1.7 veillard 1998:
1999: return;
2000: }
2001:
2002: /*
1.3 veillard 2003: * xmlParseCDSect: escaped pure raw content.
1.29 daniel 2004: *
2005: * [18] CDSect ::= CDStart CData CDEnd
2006: *
2007: * [19] CDStart ::= '<![CDATA['
2008: *
2009: * [20] Data ::= (Char* - (Char* ']]>' Char*))
2010: *
2011: * [21] CDEnd ::= ']]>'
1.3 veillard 2012: */
1.16 daniel 2013: CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 2014: const CHAR *r, *s, *base;
2015: CHAR *ret;
1.3 veillard 2016:
1.29 daniel 2017: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2018: (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
2019: (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
2020: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
2021: (ctxt->cur[8] == '[')) {
2022: ctxt->cur += 9;
2023: } else
2024: return(NULL);
1.16 daniel 2025: base = ctxt->cur;
2026: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 2027: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2028: return(NULL);
2029: }
1.16 daniel 2030: r = ctxt->cur++;
2031: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 2032: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2033: return(NULL);
2034: }
1.16 daniel 2035: s = ctxt->cur++;
2036: while (IS_CHAR(ctxt->cur[0]) &&
2037: ((*r != ']') || (*s != ']') || (ctxt->cur[0] != '>'))) {
2038: r++;s++;ctxt->cur++;
1.3 veillard 2039: }
1.16 daniel 2040: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 2041: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2042: return(NULL);
2043: }
1.16 daniel 2044: ret = xmlStrndup(base, ctxt->cur-base);
2045:
1.2 veillard 2046: return(ret);
2047: }
2048:
2049: /*
2050: * xmlParseContent: a content is
2051: * (element | PCData | Reference | CDSect | PI | Comment)
2052: *
1.27 daniel 2053: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 2054: */
2055:
1.27 daniel 2056: void xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 2057: const CHAR *q;
2058: CHAR *data = NULL;
1.2 veillard 2059: xmlNodePtr ret = NULL;
2060:
1.27 daniel 2061: while ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/')) {
1.29 daniel 2062: const CHAR *test;
1.27 daniel 2063: ret = NULL;
2064: data = NULL;
2065:
2066: /*
2067: * First case : a Processing Instruction.
2068: */
2069: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
2070: xmlParsePI(ctxt);
2071: }
2072: /*
2073: * Second case : a CDSection
2074: */
2075: else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2076: (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
2077: (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
2078: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
2079: (ctxt->cur[8] == '[')) {
2080: data = xmlParseCDSect(ctxt);
2081: }
2082: /*
2083: * Third case : a comment
2084: */
2085: else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2086: (ctxt->cur[2] == '-') && (ctxt->cur[3] == '-')) {
1.31 ! daniel 2087: ret = xmlParseComment(ctxt, 1);
1.27 daniel 2088: }
2089: /*
2090: * Fourth case : a sub-element.
2091: */
2092: else if (ctxt->cur[0] == '<') {
2093: ret = xmlParseElement(ctxt);
2094: }
2095: /*
2096: * Last case, text. Note that References are handled directly.
2097: */
2098: else {
2099: q = ctxt->cur;
2100: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '<')) ctxt->cur++;
2101:
2102: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 2103: xmlParserError(ctxt, "Truncated content\n%.50s\n", q);
1.27 daniel 2104: return;
2105: }
1.3 veillard 2106:
1.27 daniel 2107: /*
2108: * Do the Entities decoding...
2109: */
2110: data = xmlStrdup(xmlDecodeEntities(ctxt->doc, q, ctxt->cur - q));
1.3 veillard 2111: }
1.14 veillard 2112:
2113: /*
1.27 daniel 2114: * Handle the data if any. If there is no child
2115: * add it as content, otherwise create a new node of type text.
1.14 veillard 2116: */
1.27 daniel 2117: if (data != NULL)
2118: data = xmlHandleData(data);
2119: if (data != NULL) {
2120: if (node->childs == NULL)
2121: xmlNodeSetContent(node, data);
2122: else
2123: ret = xmlNewText(data);
2124: free(data);
2125: }
2126: if (ret != NULL)
2127: xmlAddChild(node, ret);
1.29 daniel 2128: if (test == ctxt->cur) {
1.31 ! daniel 2129: xmlParserError(ctxt, "detected an error in element content\n");
1.29 daniel 2130: break;
2131: }
1.3 veillard 2132: }
1.2 veillard 2133: }
2134:
2135: /*
2136: * xmlParseElement: parse an XML element
1.26 daniel 2137: *
2138: * [39] element ::= EmptyElemTag | STag content ETag
2139: *
2140: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 2141: */
1.26 daniel 2142:
1.2 veillard 2143:
1.16 daniel 2144: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 2145: xmlNodePtr ret;
1.17 daniel 2146: const CHAR *openTag = ctxt->cur;
1.27 daniel 2147: CHAR *endTag;
2148: xmlDtdPtr endDtd;
1.2 veillard 2149:
1.16 daniel 2150: ret = xmlParseStartTag(ctxt);
1.3 veillard 2151: if (ret == NULL) {
2152: return(NULL);
2153: }
1.2 veillard 2154:
2155: /*
2156: * Check for an Empty Element.
2157: */
1.16 daniel 2158: if ((ctxt->cur[0] == '/') && (ctxt->cur[1] == '>')) {
2159: ctxt->cur += 2;
1.2 veillard 2160: return(ret);
2161: }
1.16 daniel 2162: if (ctxt->cur[0] == '>') ctxt->cur++;
1.2 veillard 2163: else {
1.31 ! daniel 2164: xmlParserError(ctxt, "Couldn't find end of Start Tag\n%.30s\n",openTag);
1.16 daniel 2165: return(NULL);
1.2 veillard 2166: }
2167:
2168: /*
2169: * Parse the content of the element:
2170: */
1.27 daniel 2171: xmlParseContent(ctxt, ret);
1.16 daniel 2172: if (!IS_CHAR(ctxt->cur[0])) {
1.31 ! daniel 2173: xmlParserError(ctxt, "Premature end of data in tag %.30s\n%.30s\n",
! 2174: openTag);
1.16 daniel 2175: return(NULL);
1.2 veillard 2176: }
2177:
2178: /*
1.27 daniel 2179: * parse the end of tag: '</' should be here.
1.2 veillard 2180: */
1.27 daniel 2181: xmlParseEndTag(ctxt, &endDtd, &endTag);
1.7 veillard 2182:
1.27 daniel 2183: /*
2184: * Check that the Name in the ETag is the same as in the STag.
2185: */
2186: if (endDtd != ret->dtd) {
1.31 ! daniel 2187: xmlParserError(ctxt,
! 2188: "Start and End tags don't use the same DTD\n%.30s\n%.30s\n",
! 2189: openTag, endTag);
1.27 daniel 2190: }
2191: if (strcmp(ret->name, endTag)) {
1.31 ! daniel 2192: xmlParserError(ctxt,
! 2193: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
! 2194: openTag, endTag);
1.27 daniel 2195: }
1.7 veillard 2196:
1.27 daniel 2197: if ( endTag != NULL )
2198: free(endTag);
1.2 veillard 2199:
2200: return(ret);
2201: }
2202:
2203: /*
1.29 daniel 2204: * xmlParseVersionNum: parse the XML version value.
2205: *
2206: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
2207: */
2208: CHAR *xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
2209: const CHAR *q = ctxt->cur;
2210: CHAR *ret;
2211:
2212: while (IS_CHAR(ctxt->cur[0]) &&
2213: (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2214: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z')) ||
2215: ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9')) ||
1.31 ! daniel 2216: (ctxt->cur[0] == '_') || (ctxt->cur[0] == '.') ||
! 2217: (ctxt->cur[0] == ':') || (ctxt->cur[0] == '-'))) ctxt->cur++;
1.29 daniel 2218: ret = xmlStrndup(q, ctxt->cur - q);
2219: return(ret);
2220: }
2221:
2222: /*
2223: * xmlParseVersionInfo: parse the XML version.
2224: *
2225: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
2226: *
2227: * [25] Eq ::= S? '=' S?
2228: */
2229:
2230: CHAR *xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
2231: CHAR *version = NULL;
2232: const CHAR *q;
2233:
2234: if ((ctxt->cur[0] == 'v') && (ctxt->cur[1] == 'e') &&
2235: (ctxt->cur[2] == 'r') && (ctxt->cur[3] == 's') &&
2236: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'o') &&
2237: (ctxt->cur[6] == 'n')) {
2238: ctxt->cur += 7;
2239: SKIP_BLANKS(ctxt->cur);
1.31 ! daniel 2240: if (ctxt->cur[0] != '=') {
! 2241: xmlParserError(ctxt, "xmlParseVersionInfo : expected '='\n");
! 2242: return(NULL);
! 2243: }
! 2244: ctxt->cur++;
! 2245: SKIP_BLANKS(ctxt->cur);
1.29 daniel 2246: if (ctxt->cur[0] == '"') {
2247: ctxt->cur++;
2248: q = ctxt->cur;
2249: version = xmlParseVersionNum(ctxt);
1.31 ! daniel 2250: if (ctxt->cur[0] != '"')
! 2251: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2252: else
2253: ctxt->cur++;
1.31 ! daniel 2254: } else if (ctxt->cur[0] == '\''){
1.29 daniel 2255: ctxt->cur++;
2256: q = ctxt->cur;
2257: version = xmlParseVersionNum(ctxt);
1.31 ! daniel 2258: if (ctxt->cur[0] != '\'')
! 2259: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2260: else
2261: ctxt->cur++;
1.31 ! daniel 2262: } else {
! 2263: xmlParserError(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
1.29 daniel 2264: }
2265: }
2266: return(version);
2267: }
2268:
2269: /*
2270: * xmlParseEncName: parse the XML encoding name
2271: *
2272: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
2273: */
2274: CHAR *xmlParseEncName(xmlParserCtxtPtr ctxt) {
2275: const CHAR *q = ctxt->cur;
2276: CHAR *ret = NULL;
2277:
2278: if (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2279: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z'))) {
2280: ctxt->cur++;
2281: while (IS_CHAR(ctxt->cur[0]) &&
2282: (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2283: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z')) ||
2284: ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9')) ||
2285: (ctxt->cur[0] == '-'))) ctxt->cur++;
2286: ret = xmlStrndup(q, ctxt->cur - q);
2287: } else {
1.31 ! daniel 2288: xmlParserError(ctxt, "Invalid XML encoding name\n");
1.29 daniel 2289: }
2290: return(ret);
2291: }
2292:
2293: /*
2294: * xmlParseEncodingDecl: parse the XML encoding declaration
2295: *
2296: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
2297: */
2298:
2299: CHAR *xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
2300: CHAR *encoding = NULL;
2301: const CHAR *q;
2302:
2303: SKIP_BLANKS(ctxt->cur);
2304: if ((ctxt->cur[0] == 'e') && (ctxt->cur[1] == 'n') &&
2305: (ctxt->cur[2] == 'c') && (ctxt->cur[3] == 'o') &&
2306: (ctxt->cur[4] == 'd') && (ctxt->cur[5] == 'i') &&
2307: (ctxt->cur[6] == 'n') && (ctxt->cur[7] == 'g')) {
2308: ctxt->cur += 8;
2309: SKIP_BLANKS(ctxt->cur);
1.31 ! daniel 2310: if (ctxt->cur[0] != '=') {
! 2311: xmlParserError(ctxt, "xmlParseEncodingDecl : expected '='\n");
! 2312: return(NULL);
! 2313: }
! 2314: ctxt->cur++;
! 2315: SKIP_BLANKS(ctxt->cur);
1.29 daniel 2316: if (ctxt->cur[0] == '"') {
2317: ctxt->cur++;
2318: q = ctxt->cur;
2319: encoding = xmlParseEncName(ctxt);
1.31 ! daniel 2320: if (ctxt->cur[0] != '"')
! 2321: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2322: else
2323: ctxt->cur++;
1.31 ! daniel 2324: } else if (ctxt->cur[0] == '\''){
1.29 daniel 2325: ctxt->cur++;
2326: q = ctxt->cur;
2327: encoding = xmlParseEncName(ctxt);
1.31 ! daniel 2328: if (ctxt->cur[0] != '\'')
! 2329: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2330: else
2331: ctxt->cur++;
1.31 ! daniel 2332: } else if (ctxt->cur[0] == '"'){
! 2333: xmlParserError(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
1.29 daniel 2334: }
2335: }
2336: return(encoding);
2337: }
2338:
2339: /*
2340: * xmlParseSDDecl: parse the XML standalone declaration
2341: *
2342: * [32] SDDecl ::= S 'standalone' Eq
2343: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
2344: */
2345:
2346: int xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
2347: int standalone = -1;
2348:
2349: SKIP_BLANKS(ctxt->cur);
2350: if ((ctxt->cur[0] == 's') && (ctxt->cur[1] == 't') &&
2351: (ctxt->cur[2] == 'a') && (ctxt->cur[3] == 'n') &&
2352: (ctxt->cur[4] == 'd') && (ctxt->cur[5] == 'a') &&
2353: (ctxt->cur[6] == 'l') && (ctxt->cur[7] == 'o') &&
2354: (ctxt->cur[8] == 'n') && (ctxt->cur[9] == 'e')) {
2355: ctxt->cur += 10;
2356: SKIP_BLANKS(ctxt->cur);
2357: if (ctxt->cur[0] == '"') {
2358: ctxt->cur++;
2359: } else if (ctxt->cur[0] == '\''){
2360: ctxt->cur++;
2361: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'o')) {
2362: standalone = 0;
2363: } else if ((ctxt->cur[0] == 'y') && (ctxt->cur[1] == 'e') &&
2364: (ctxt->cur[2] == 's')) {
2365: standalone = 1;
2366: } else {
1.31 ! daniel 2367: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2368: }
2369: if (ctxt->cur[0] != '\'')
1.31 ! daniel 2370: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2371: else
2372: ctxt->cur++;
2373: } else if (ctxt->cur[0] == '"'){
2374: ctxt->cur++;
2375: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'o')) {
2376: standalone = 0;
2377: } else if ((ctxt->cur[0] == 'y') && (ctxt->cur[1] == 'e') &&
2378: (ctxt->cur[2] == 's')) {
2379: standalone = 1;
2380: } else {
1.31 ! daniel 2381: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2382: }
2383: if (ctxt->cur[0] != '"')
1.31 ! daniel 2384: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2385: else
2386: ctxt->cur++;
2387: }
2388: }
2389: return(standalone);
2390: }
2391:
2392: /*
1.1 veillard 2393: * xmlParseXMLDecl: parse an XML declaration header
1.29 daniel 2394: *
2395: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 2396: */
2397:
1.16 daniel 2398: void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 2399: CHAR *version;
1.29 daniel 2400: CHAR *encoding;
2401: int standalone;
1.1 veillard 2402:
2403: /*
1.19 daniel 2404: * We know that '<?xml' is here.
1.1 veillard 2405: */
1.16 daniel 2406: ctxt->cur += 5;
1.1 veillard 2407:
1.16 daniel 2408: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2409:
2410: /*
1.29 daniel 2411: * We should have the VersionInfo here.
1.1 veillard 2412: */
1.29 daniel 2413: version = xmlParseVersionInfo(ctxt);
2414: if (version == NULL)
1.16 daniel 2415: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.29 daniel 2416: else {
2417: ctxt->doc = xmlNewDoc(version);
2418: free(version);
2419: }
2420:
2421: /*
2422: * We may have the encoding declaration
2423: */
2424: encoding = xmlParseEncodingDecl(ctxt);
2425: if (encoding != NULL) {
1.31 ! daniel 2426: /* TODO !!!! encoding support ... */
1.29 daniel 2427: free(encoding);
1.1 veillard 2428: }
2429:
2430: /*
1.29 daniel 2431: * We may have the standalone status.
1.1 veillard 2432: */
1.29 daniel 2433: standalone = xmlParseSDDecl(ctxt);
1.1 veillard 2434:
1.29 daniel 2435: SKIP_BLANKS(ctxt->cur);
2436: if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
2437: ctxt->cur += 2;
1.31 ! daniel 2438: } else if (ctxt->cur[0] == '>') {
! 2439: /* Deprecated old WD ... */
! 2440: xmlParserError(ctxt, "XML declaration must end-up with '?>'\n");
! 2441: ctxt->cur++;
1.29 daniel 2442: } else {
1.31 ! daniel 2443: xmlParserError(ctxt, "parsing XML declaration: '?>' expected\n");
1.29 daniel 2444: MOVETO_ENDTAG(ctxt->cur);
1.31 ! daniel 2445: ctxt->cur++;
1.29 daniel 2446: }
1.1 veillard 2447: }
2448:
2449: /*
1.22 daniel 2450: * xmlParseMisc: parse an XML Misc* optionnal field.
1.21 daniel 2451: * Misc*
2452: *
1.22 daniel 2453: * [27] Misc ::= Comment | PI | S
1.1 veillard 2454: */
2455:
1.16 daniel 2456: void xmlParseMisc(xmlParserCtxtPtr ctxt) {
2457: while (((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) ||
2458: ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1.21 daniel 2459: (ctxt->cur[2] == '-') && (ctxt->cur[3] == '-')) ||
1.16 daniel 2460: IS_BLANK(ctxt->cur[0])) {
2461: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
2462: xmlParsePI(ctxt);
2463: } else if (IS_BLANK(ctxt->cur[0])) {
2464: ctxt->cur++;
1.1 veillard 2465: } else
1.31 ! daniel 2466: xmlParseComment(ctxt, 0);
1.1 veillard 2467: }
2468: }
2469:
2470: /*
1.16 daniel 2471: * xmlParseDocument : parse an XML document and build a tree.
1.21 daniel 2472: *
1.22 daniel 2473: * [1] document ::= prolog element Misc*
1.29 daniel 2474: *
2475: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.1 veillard 2476: */
2477:
1.16 daniel 2478: int xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.14 veillard 2479: /*
2480: * We should check for encoding here and plug-in some
2481: * conversion code TODO !!!!
2482: */
1.1 veillard 2483:
2484: /*
2485: * Wipe out everything which is before the first '<'
2486: */
1.16 daniel 2487: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2488:
2489: /*
2490: * Check for the XMLDecl in the Prolog.
2491: */
1.16 daniel 2492: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.19 daniel 2493: (ctxt->cur[2] == 'x') && (ctxt->cur[3] == 'm') &&
2494: (ctxt->cur[4] == 'l')) {
2495: xmlParseXMLDecl(ctxt);
2496: /* SKIP_EOL(cur); */
2497: SKIP_BLANKS(ctxt->cur);
2498: } else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.16 daniel 2499: (ctxt->cur[2] == 'X') && (ctxt->cur[3] == 'M') &&
2500: (ctxt->cur[4] == 'L')) {
1.19 daniel 2501: /*
2502: * The first drafts were using <?XML and the final W3C REC
2503: * now use <?xml ...
2504: */
1.16 daniel 2505: xmlParseXMLDecl(ctxt);
1.1 veillard 2506: /* SKIP_EOL(cur); */
1.16 daniel 2507: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2508: } else {
1.16 daniel 2509: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 2510: }
2511:
2512: /*
2513: * The Misc part of the Prolog
1.21 daniel 2514: * Misc*
2515: * Misc ::= Comment | PI | S
1.1 veillard 2516: */
1.16 daniel 2517: xmlParseMisc(ctxt);
1.1 veillard 2518:
2519: /*
1.29 daniel 2520: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 2521: * (doctypedecl Misc*)?
2522: */
1.22 daniel 2523: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2524: (ctxt->cur[2] == 'D') && (ctxt->cur[3] == 'O') &&
2525: (ctxt->cur[4] == 'C') && (ctxt->cur[5] == 'T') &&
2526: (ctxt->cur[6] == 'Y') && (ctxt->cur[7] == 'P') &&
2527: (ctxt->cur[8] == 'E')) {
2528: xmlParseDocTypeDecl(ctxt);
2529: xmlParseMisc(ctxt);
1.21 daniel 2530: }
2531:
2532: /*
2533: * Time to start parsing the tree itself
1.1 veillard 2534: */
1.16 daniel 2535: ctxt->doc->root = xmlParseElement(ctxt);
2536:
2537: return(0);
2538: }
2539:
2540: /*
2541: * xmlParseDoc : parse an XML in-memory document and build a tree.
2542: */
2543:
2544: xmlDocPtr xmlParseDoc(CHAR *cur) {
2545: xmlDocPtr ret;
2546: xmlParserCtxtPtr ctxt;
2547:
2548: if (cur == NULL) return(NULL);
1.1 veillard 2549:
1.16 daniel 2550: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2551: if (ctxt == NULL) {
2552: perror("malloc");
2553: return(NULL);
2554: }
2555:
1.19 daniel 2556: xmlInitParserCtxt(ctxt);
1.16 daniel 2557: ctxt->base = cur;
2558: ctxt->cur = cur;
2559:
2560: xmlParseDocument(ctxt);
2561: ret = ctxt->doc;
1.20 daniel 2562: free(ctxt->nodes);
1.16 daniel 2563: free(ctxt);
2564:
1.1 veillard 2565: return(ret);
2566: }
2567:
1.9 httpng 2568: /*
2569: * xmlParseFile : parse an XML file and build a tree.
2570: */
2571:
2572: xmlDocPtr xmlParseFile(const char *filename) {
2573: xmlDocPtr ret;
1.20 daniel 2574: #ifdef HAVE_ZLIB_H
2575: gzFile input;
2576: #else
1.9 httpng 2577: int input;
1.20 daniel 2578: #endif
1.9 httpng 2579: int res;
2580: struct stat buf;
2581: char *buffer;
1.16 daniel 2582: xmlParserCtxtPtr ctxt;
1.9 httpng 2583:
1.11 veillard 2584: res = stat(filename, &buf);
1.9 httpng 2585: if (res < 0) return(NULL);
2586:
1.20 daniel 2587: #ifdef HAVE_ZLIB_H
2588: retry_bigger:
2589: buffer = malloc((buf.st_size * 20) + 100);
2590: #else
1.9 httpng 2591: buffer = malloc(buf.st_size + 100);
1.20 daniel 2592: #endif
1.9 httpng 2593: if (buffer == NULL) {
2594: perror("malloc");
2595: return(NULL);
2596: }
2597:
2598: memset(buffer, 0, sizeof(buffer));
1.20 daniel 2599: #ifdef HAVE_ZLIB_H
2600: input = gzopen (filename, "r");
2601: if (input == NULL) {
2602: fprintf (stderr, "Cannot read file %s :\n", filename);
2603: perror ("gzopen failed");
2604: return(NULL);
2605: }
2606: #else
1.9 httpng 2607: input = open (filename, O_RDONLY);
2608: if (input < 0) {
2609: fprintf (stderr, "Cannot read file %s :\n", filename);
2610: perror ("open failed");
2611: return(NULL);
2612: }
1.20 daniel 2613: #endif
2614: #ifdef HAVE_ZLIB_H
2615: res = gzread(input, buffer, 20 * buf.st_size);
2616: #else
1.9 httpng 2617: res = read(input, buffer, buf.st_size);
1.20 daniel 2618: #endif
1.9 httpng 2619: if (res < 0) {
2620: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 2621: #ifdef HAVE_ZLIB_H
2622: perror ("gzread failed");
2623: #else
1.9 httpng 2624: perror ("read failed");
1.20 daniel 2625: #endif
1.9 httpng 2626: return(NULL);
2627: }
1.20 daniel 2628: #ifdef HAVE_ZLIB_H
2629: gzclose(input);
2630: if (res >= 20 * buf.st_size) {
2631: free(buffer);
2632: buf.st_size *= 2;
2633: goto retry_bigger;
2634: }
2635: buf.st_size = res;
2636: #else
1.9 httpng 2637: close(input);
1.20 daniel 2638: #endif
2639:
1.9 httpng 2640:
1.16 daniel 2641: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2642: if (ctxt == NULL) {
2643: perror("malloc");
2644: return(NULL);
2645: }
1.9 httpng 2646: buffer[buf.st_size] = '\0';
1.16 daniel 2647:
1.19 daniel 2648: xmlInitParserCtxt(ctxt);
1.17 daniel 2649: ctxt->filename = filename;
1.16 daniel 2650: ctxt->base = buffer;
2651: ctxt->cur = buffer;
2652:
2653: xmlParseDocument(ctxt);
2654: ret = ctxt->doc;
1.9 httpng 2655: free(buffer);
1.20 daniel 2656: free(ctxt->nodes);
2657: free(ctxt);
2658:
2659: return(ret);
2660: }
2661:
2662: /*
2663: * xmlParseFile : parse an XML memory block and build a tree.
2664: */
2665:
2666: xmlDocPtr xmlParseMemory(char *buffer, int size) {
2667: xmlDocPtr ret;
2668: xmlParserCtxtPtr ctxt;
2669:
2670: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2671: if (ctxt == NULL) {
2672: perror("malloc");
2673: return(NULL);
2674: }
2675:
2676: buffer[size - 1] = '\0';
2677:
2678: xmlInitParserCtxt(ctxt);
2679: ctxt->base = buffer;
2680: ctxt->cur = buffer;
2681:
2682: xmlParseDocument(ctxt);
2683: ret = ctxt->doc;
2684: free(ctxt->nodes);
1.16 daniel 2685: free(ctxt);
2686:
1.9 httpng 2687: return(ret);
1.17 daniel 2688: }
2689:
2690:
2691:
2692:
2693: /* Initialize parser context */
2694: void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2695: {
1.19 daniel 2696: int i;
2697:
2698: ctxt->filename = NULL;
2699: ctxt->base = NULL;
2700: ctxt->cur = NULL;
2701: ctxt->line = 1;
2702: ctxt->col = 1;
2703: ctxt->doc = NULL;
2704: ctxt->depth = 0;
2705: ctxt->max_depth = 10;
2706: ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr));
2707: if (ctxt->nodes == NULL) {
2708: fprintf(stderr, "malloc of %d byte failed\n",
2709: ctxt->max_depth * sizeof(xmlNodePtr));
2710: ctxt->max_depth = 0;
2711: } else {
2712: for (i = 0;i < ctxt->max_depth;i++)
2713: ctxt->nodes[i] = NULL;
2714: }
1.17 daniel 2715: }
2716:
2717:
1.19 daniel 2718: /*
2719: * Clear (release owned resources) and reinitialize context
2720: */
1.17 daniel 2721: void xmlClearParserCtxt(xmlParserCtxtPtr ctx)
2722: {
1.19 daniel 2723: xmlInitParserCtxt(ctx);
1.17 daniel 2724: }
2725:
2726:
1.19 daniel 2727: /*
2728: * Setup the parser context to parse a new buffer; Clears any prior
2729: * contents from the parser context. The buffer parameter must not be
2730: * NULL, but the filename parameter can be
2731: */
1.17 daniel 2732: void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
2733: const char* filename)
2734: {
2735: xmlClearParserCtxt(ctxt);
2736: ctxt->base = buffer;
2737: ctxt->cur = buffer;
2738: ctxt->filename = filename;
2739: }
2740:
Webmaster