Annotation of XML/parser.c, revision 1.34
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.34 ! daniel 6: * $Id: parser.c,v 1.33 1998/08/06 01:30:55 daniel Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.1 veillard 18: #include <malloc.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
34: /*
35: * A few macros needed to help building the parser.
36: */
37:
1.31 daniel 38: #define PUSH_AND_POP(type, name) \
39: int name##Push(type value) { \
40: if (ctxt->name##Nr >= ctxt->name##Max) { \
41: ctxt->name##Max *= 2; \
42: ctxt->name##tab = (void *) realloc(ctxt->name##tab, \
43: ctxt->name##Max * sizeof(ctxt->name##tab[0])); \
44: if (ctxt->name##tab == NULL) { \
45: fprintf(stderr, "realloc failed !\n"); \
46: exit(1); \
47: } \
48: } \
49: ctxt->name##tab[ctxt->name##Nr] = value; \
50: return(ctxt->type##Nr++); \
51: } \
52: type name##Pop() { \
53: if (ctxt->type##Nr <= 0) return(0); \
54: ctxt->type##Nr--; \
55: return(ctxt->name##tab[ctxt->name##Nr]); \
56: } \
57:
1.1 veillard 58: #ifdef UNICODE
1.30 daniel 59: /************************************************************************
60: * *
61: * UNICODE version of the macros. *
62: * *
63: ************************************************************************/
1.1 veillard 64: /*
1.22 daniel 65: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
66: * | [#x10000-#x10FFFF]
67: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 68: */
69: #define IS_CHAR(c) \
70: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
71: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
72:
1.22 daniel 73: /*
74: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
75: */
1.1 veillard 76: #define SKIP_BLANKS(p) \
77: while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \
1.22 daniel 78: (*(p) == 0xd) || (*(p) == 0x3000)) (p)++;
1.1 veillard 79:
1.22 daniel 80: /*
1.30 daniel 81: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 82: *
1.30 daniel 83: * VI is your friend !
84: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
85: * and
86: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 87: */
1.1 veillard 88: #define IS_BASECHAR(c) \
1.30 daniel 89: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
90: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
91: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
92: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
93: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
94: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
95: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
96: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
97: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
98: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
99: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
100: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
101: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
102: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
103: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
104: ((c) == 0x0386) || \
105: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
106: ((c) == 0x038C) || \
107: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
108: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
109: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
110: ((c) == 0x03DA) || \
111: ((c) == 0x03DC) || \
112: ((c) == 0x03DE) || \
113: ((c) == 0x03E0) || \
114: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
115: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
116: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
117: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
118: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
119: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
120: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
121: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
122: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
123: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
124: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
125: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
126: ((c) == 0x0559) || \
127: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
128: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
129: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
130: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
131: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
132: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
133: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
134: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
135: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
136: ((c) == 0x06D5) || \
137: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
138: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
139: ((c) == 0x093D) || \
140: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
141: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
142: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
143: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
144: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
145: ((c) == 0x09B2) || \
146: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
147: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
148: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
149: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
150: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
151: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
152: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
153: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
154: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
155: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
156: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
157: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
158: ((c) == 0x0A5E) || \
159: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
160: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
161: ((c) == 0x0A8D) || \
162: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
163: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
164: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
165: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
166: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
167: ((c) == 0x0ABD) || \
168: ((c) == 0x0AE0) || \
169: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
170: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
171: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
172: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
173: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
174: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
175: ((c) == 0x0B3D) || \
176: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
177: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
178: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
179: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
180: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
181: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
182: ((c) == 0x0B9C) || \
183: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
184: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
185: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
186: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
187: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
188: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
189: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
190: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
191: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
192: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
193: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
194: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
195: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
196: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
197: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
198: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
199: ((c) == 0x0CDE) || \
200: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
201: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
202: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
203: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
204: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
205: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
206: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
207: ((c) == 0x0E30) || \
208: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
209: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
210: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
211: ((c) == 0x0E84) || \
212: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
213: ((c) == 0x0E8A) || \
214: ((c) == 0x0E8D) || \
215: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
216: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
217: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
218: ((c) == 0x0EA5) || \
219: ((c) == 0x0EA7) || \
220: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
221: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
222: ((c) == 0x0EB0) || \
223: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
224: ((c) == 0x0EBD) || \
225: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
226: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
227: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
228: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
229: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
230: ((c) == 0x1100) || \
231: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
232: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
233: ((c) == 0x1109) || \
234: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
235: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
236: ((c) == 0x113C) || \
237: ((c) == 0x113E) || \
238: ((c) == 0x1140) || \
239: ((c) == 0x114C) || \
240: ((c) == 0x114E) || \
241: ((c) == 0x1150) || \
242: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
243: ((c) == 0x1159) || \
244: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
245: ((c) == 0x1163) || \
246: ((c) == 0x1165) || \
247: ((c) == 0x1167) || \
248: ((c) == 0x1169) || \
249: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
250: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
251: ((c) == 0x1175) || \
252: ((c) == 0x119E) || \
253: ((c) == 0x11A8) || \
254: ((c) == 0x11AB) || \
255: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
256: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
257: ((c) == 0x11BA) || \
258: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
259: ((c) == 0x11EB) || \
260: ((c) == 0x11F0) || \
261: ((c) == 0x11F9) || \
262: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
263: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
264: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
265: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
266: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
267: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
268: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
269: ((c) == 0x1F59) || \
270: ((c) == 0x1F5B) || \
271: ((c) == 0x1F5D) || \
272: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
273: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
274: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
275: ((c) == 0x1FBE) || \
276: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
277: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
278: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
279: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
280: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
281: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
282: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
283: ((c) == 0x2126) || \
284: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
285: ((c) == 0x212E) || \
286: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
287: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
288: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
289: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
290: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 291:
1.22 daniel 292: /*
293: * [88] Digit ::= ... long list see REC ...
294: */
1.30 daniel 295: #define IS_DIGIT(c) \
296: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
297: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
298: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
299: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
300: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
301: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
302: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
303: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
304: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
305: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
306: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
307: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
308: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
309: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
310: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 311:
1.22 daniel 312: /*
313: * [87] CombiningChar ::= ... long list see REC ...
314: */
1.30 daniel 315: #define IS_COMBINING(c) \
316: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
317: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
318: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
319: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
320: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
321: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
322: ((c) == 0x05BF) || \
323: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
324: ((c) == 0x05C4) || \
325: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
326: ((c) == 0x0670) || \
327: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
328: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
329: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
330: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
331: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
332: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
333: ((c) == 0x093C) || \
334: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
335: ((c) == 0x094D) || \
336: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
337: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
338: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
339: ((c) == 0x09BC) || \
340: ((c) == 0x09BE) || \
341: ((c) == 0x09BF) || \
342: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
343: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
344: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
345: ((c) == 0x09D7) || \
346: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
347: ((c) == 0x0A02) || \
348: ((c) == 0x0A3C) || \
349: ((c) == 0x0A3E) || \
350: ((c) == 0x0A3F) || \
351: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
352: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
353: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
354: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
355: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
356: ((c) == 0x0ABC) || \
357: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
358: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
359: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
360: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
361: ((c) == 0x0B3C) || \
362: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
363: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
364: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
365: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
366: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
367: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
368: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
369: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
370: ((c) == 0x0BD7) || \
371: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
372: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
373: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
374: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
375: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
376: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
377: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
378: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
379: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
380: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
381: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
382: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
383: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
384: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
385: ((c) == 0x0D57) || \
386: ((c) == 0x0E31) || \
387: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
388: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
389: ((c) == 0x0EB1) || \
390: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
391: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
392: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
393: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
394: ((c) == 0x0F35) || \
395: ((c) == 0x0F37) || \
396: ((c) == 0x0F39) || \
397: ((c) == 0x0F3E) || \
398: ((c) == 0x0F3F) || \
399: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
400: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
401: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
402: ((c) == 0x0F97) || \
403: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
404: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
405: ((c) == 0x0FB9) || \
406: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
407: ((c) == 0x20E1) || \
408: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
409: ((c) == 0x3099) || \
410: ((c) == 0x309A))
1.3 veillard 411:
1.22 daniel 412: /*
413: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
414: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
415: * [#x309D-#x309E] | [#x30FC-#x30FE]
416: */
1.3 veillard 417: #define IS_EXTENDER(c) \
418: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
419: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
420: ((c) == 0xec6) || ((c) == 0x3005) \
421: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
422: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 423: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 424:
1.22 daniel 425: /*
426: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
427: */
1.1 veillard 428: #define IS_IDEOGRAPHIC(c) \
429: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
430: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
431: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
432: ((c) == 0x3007))
433:
1.22 daniel 434: /*
435: * [84] Letter ::= BaseChar | Ideographic
436: */
1.1 veillard 437: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
438:
439: #else
1.30 daniel 440: /************************************************************************
441: * *
442: * 8bits / ASCII version of the macros. *
443: * *
444: ************************************************************************/
1.1 veillard 445: /*
1.22 daniel 446: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
447: * | [#x10000-#x10FFFF]
448: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 449: */
450: #define IS_CHAR(c) \
1.21 daniel 451: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
452: ((c) == 0xa))
1.1 veillard 453:
1.22 daniel 454: /*
455: * [85] BaseChar ::= ... long list see REC ...
456: */
1.1 veillard 457: #define IS_BASECHAR(c) \
458: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
459: (((c) >= 0x61) && ((c) <= 0x7a)) || \
460: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
461: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
462: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
463: (((c) >= 0xf8) && ((c) <= 0xff)) || \
464: ((c) == 0xba))
465:
1.22 daniel 466: /*
467: * [88] Digit ::= ... long list see REC ...
468: */
1.1 veillard 469: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
470:
1.22 daniel 471: /*
472: * [84] Letter ::= BaseChar | Ideographic
473: */
1.1 veillard 474: #define IS_LETTER(c) IS_BASECHAR(c)
475:
1.22 daniel 476:
477: /*
478: * [87] CombiningChar ::= ... long list see REC ...
479: */
1.1 veillard 480: #define IS_COMBINING(c) 0
481:
1.22 daniel 482: /*
483: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
484: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
485: * [#x309D-#x309E] | [#x30FC-#x30FE]
486: */
1.3 veillard 487: #define IS_EXTENDER(c) ((c) == 0xb7)
488:
1.21 daniel 489: #endif /* !UNICODE */
1.1 veillard 490:
1.22 daniel 491: /*
492: * Blank chars.
493: *
494: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
495: */
496: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
497: ((c) == 0x0D))
498:
499: /*
500: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
501: */
1.21 daniel 502: #define IS_PUBIDCHAR(c) \
503: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
504: (((c) >= 'a') && ((c) <= 'z')) || \
505: (((c) >= 'A') && ((c) <= 'Z')) || \
506: (((c) >= '0') && ((c) <= '9')) || \
507: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
508: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
509: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
510: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
511: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 512:
513: #define SKIP_EOL(p) \
514: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
515: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
516:
517: #define SKIP_BLANKS(p) \
518: while (IS_BLANK(*(p))) (p)++;
519:
520: #define MOVETO_ENDTAG(p) \
521: while (IS_CHAR(*p) && (*(p) != '>')) (p)++;
522:
523: #define MOVETO_STARTTAG(p) \
524: while (IS_CHAR(*p) && (*(p) != '<')) (p)++;
525:
526: /*
1.3 veillard 527: * Forward definition for recusive behaviour.
528: */
1.16 daniel 529: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.3 veillard 530:
531: /*
532: * xmlHandleData : this routine represent's the specific application
533: * behaviour when reading a piece of text.
534: *
535: * For example in WebDav, any piece made only of blanks is eliminated
536: */
537:
538: CHAR *xmlHandleData(CHAR *in) {
539: CHAR *cur;
540:
541: if (in == NULL) return(NULL);
542: cur = in;
543: while (IS_CHAR(*cur)) {
544: if (!IS_BLANK(*cur)) goto not_blank;
545: cur++;
546: }
547: free(in);
548: return(NULL);
549:
550: not_blank:
551: return(in);
552: }
553:
1.28 daniel 554: /************************************************************************
555: * *
556: * Commodity functions to handle CHARs *
557: * *
558: ************************************************************************/
559:
1.3 veillard 560: /*
1.1 veillard 561: * xmlStrndup : a strdup for array of CHAR's
562: */
563:
1.6 httpng 564: CHAR *xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 565: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
566:
567: if (ret == NULL) {
568: fprintf(stderr, "malloc of %d byte failed\n",
569: (len + 1) * sizeof(CHAR));
570: return(NULL);
571: }
572: memcpy(ret, cur, len * sizeof(CHAR));
573: ret[len] = 0;
574: return(ret);
575: }
576:
577: /*
578: * xmlStrdup : a strdup for CHAR's
579: */
580:
1.6 httpng 581: CHAR *xmlStrdup(const CHAR *cur) {
582: const CHAR *p = cur;
1.1 veillard 583:
584: while (IS_CHAR(*p)) p++;
585: return(xmlStrndup(cur, p - cur));
586: }
587:
588: /*
1.14 veillard 589: * xmlStrcmp : a strcmp for CHAR's
590: */
591:
592: int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
593: register int tmp;
594:
595: do {
596: tmp = *str1++ - *str2++;
597: if (tmp != 0) return(tmp);
598: } while ((*str1 != 0) && (*str2 != 0));
599: return (*str1 - *str2);
600: }
601:
602: /*
603: * xmlStrncmp : a strncmp for CHAR's
604: */
605:
606: int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
607: register int tmp;
608:
609: if (len <= 0) return(0);
610: do {
611: tmp = *str1++ - *str2++;
612: if (tmp != 0) return(tmp);
613: len--;
614: if (len <= 0) return(0);
615: } while ((*str1 != 0) && (*str2 != 0));
616: return (*str1 - *str2);
617: }
618:
619: /*
620: * xmlStrchr : a strchr for CHAR's
621: */
622:
623: CHAR *xmlStrchr(const CHAR *str, CHAR val) {
624: while (*str != 0) {
625: if (*str == val) return((CHAR *) str);
626: str++;
627: }
628: return(NULL);
629: }
1.28 daniel 630:
631: /************************************************************************
632: * *
633: * Extra stuff for namespace support *
634: * Relates to http://www.w3.org/TR/WD-xml-names *
635: * *
636: ************************************************************************/
637:
638: /*
639: * xmlNamespaceParseNCName : parse an XML namespace name.
640: *
641: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
642: *
643: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
644: * CombiningChar | Extender
645: */
646:
647: CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
648: const CHAR *q;
649: CHAR *ret = NULL;
650:
651: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
652: q = ctxt->cur++;
653:
654: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
655: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
656: (ctxt->cur[0] == '_') ||
657: (IS_COMBINING(ctxt->cur[0])) ||
658: (IS_EXTENDER(ctxt->cur[0])))
659: ctxt->cur++;
660:
661: ret = xmlStrndup(q, ctxt->cur - q);
662:
663: return(ret);
664: }
665:
666: /*
667: * xmlNamespaceParseQName : parse an XML qualified name
668: *
669: * [NS 5] QName ::= (Prefix ':')? LocalPart
670: *
671: * [NS 6] Prefix ::= NCName
672: *
673: * [NS 7] LocalPart ::= NCName
674: */
675:
676: CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
677: CHAR *ret = NULL;
678:
679: *prefix = NULL;
680: ret = xmlNamespaceParseNCName(ctxt);
681: if (ctxt->cur[0] == ':') {
682: *prefix = ret;
683: ctxt->cur++;
684: ret = xmlNamespaceParseNCName(ctxt);
685: }
686:
687: return(ret);
688: }
689:
690: /*
691: * xmlNamespaceParseNSDef : parse a namespace prefix declaration
692: *
693: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
694: *
695: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
696: */
697:
698: void xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
699: CHAR *name = NULL;
700:
701: if ((ctxt->cur[0] == 'x') && (ctxt->cur[1] == 'm') &&
702: (ctxt->cur[2] == 'l') && (ctxt->cur[3] == 'n') &&
703: (ctxt->cur[4] == 's')) {
704: ctxt->cur += 5;
705: if (ctxt->cur[0] == ':') {
706: ctxt->cur++;
707: name = xmlNamespaceParseNCName(ctxt);
708: }
709: }
710: }
711:
712: /************************************************************************
713: * *
714: * The parser itself *
715: * Relates to http://www.w3.org/TR/REC-xml *
716: * *
717: ************************************************************************/
1.14 veillard 718:
719: /*
1.1 veillard 720: * xmlParseName : parse an XML name.
1.22 daniel 721: *
722: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
723: * CombiningChar | Extender
724: *
725: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
726: *
727: * [6] Names ::= Name (S Name)*
1.1 veillard 728: */
729:
1.16 daniel 730: CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 731: const CHAR *q;
732: CHAR *ret = NULL;
1.1 veillard 733:
1.22 daniel 734: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_') &&
735: (ctxt->cur[0] != ':')) return(NULL);
736: q = ctxt->cur++;
737:
738: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
739: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
740: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
741: (IS_COMBINING(ctxt->cur[0])) ||
742: (IS_EXTENDER(ctxt->cur[0])))
743: ctxt->cur++;
744:
745: ret = xmlStrndup(q, ctxt->cur - q);
746:
747: return(ret);
748: }
749:
750: /*
751: * xmlParseNmtoken : parse an XML Nmtoken.
752: *
753: * [7] Nmtoken ::= (NameChar)+
754: *
755: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
756: */
757:
758: CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
759: const CHAR *q;
760: CHAR *ret = NULL;
761:
1.16 daniel 762: q = ctxt->cur++;
1.22 daniel 763:
1.16 daniel 764: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1.22 daniel 765: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
766: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
767: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 768: (IS_EXTENDER(ctxt->cur[0])))
769: ctxt->cur++;
1.3 veillard 770:
1.16 daniel 771: ret = xmlStrndup(q, ctxt->cur - q);
1.1 veillard 772:
1.3 veillard 773: return(ret);
1.1 veillard 774: }
775:
776: /*
1.24 daniel 777: * xmlParseEntityValue : parse a value for ENTITY decl.
778: *
779: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
780: * "'" ([^%&'] | PEReference | Reference)* "'"
781: */
782:
783: CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
784: CHAR *ret = NULL;
785: const CHAR *q;
786: int needSubst;
787:
788: if (ctxt->cur[0] == '"') {
789: ctxt->cur++;
790:
791: q = ctxt->cur;
792: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"')) {
793: if (ctxt->cur[0] == '%') {
794: needSubst = 1; /* TODO !!! */
795: ctxt->cur++;
1.29 daniel 796: } else if (ctxt->cur[0] == '&') {
1.24 daniel 797: needSubst = 1; /* TODO !!! */
798: ctxt->cur++;
799: } else
800: ctxt->cur++;
801: }
802: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 803: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 804: } else {
805: ret = xmlStrndup(q, ctxt->cur - q);
806: ctxt->cur++;
807: }
808: } else if (ctxt->cur[0] == '\'') {
809: ctxt->cur++;
810: q = ctxt->cur;
811: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\'')) {
812: if (ctxt->cur[0] == '%') {
813: needSubst = 1; /* TODO !!! */
814: ctxt->cur++;
1.29 daniel 815: } else if (ctxt->cur[0] == '&') {
1.24 daniel 816: needSubst = 1; /* TODO !!! */
817: ctxt->cur++;
818: } else
819: ctxt->cur++;
820: }
821: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 822: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 823: } else {
824: ret = xmlStrndup(q, ctxt->cur - q);
825: ctxt->cur++;
826: }
827: } else {
1.31 daniel 828: xmlParserError(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.24 daniel 829: }
830:
831: return(ret);
832: }
833:
834: /*
1.29 daniel 835: * xmlParseAttValue : parse a value for an attribute
836: *
837: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
838: * "'" ([^<&'] | Reference)* "'"
839: */
840:
841: CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) {
842: CHAR *ret = NULL;
843: const CHAR *q;
844: int needSubst;
845:
846: if (ctxt->cur[0] == '"') {
847: ctxt->cur++;
848:
849: q = ctxt->cur;
850: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"')) {
851: if (ctxt->cur[0] == '%') {
852: needSubst = 1; /* TODO !!! */
853: ctxt->cur++;
854: } else if (ctxt->cur[0] == '&') {
855: needSubst = 1; /* TODO !!! */
856: ctxt->cur++;
857: } else
858: ctxt->cur++;
859: }
860: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 861: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 862: } else {
863: ret = xmlStrndup(q, ctxt->cur - q);
864: ctxt->cur++;
865: }
866: } else if (ctxt->cur[0] == '\'') {
867: ctxt->cur++;
868: q = ctxt->cur;
869: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\'')) {
870: if (ctxt->cur[0] == '%') {
871: needSubst = 1; /* TODO !!! */
872: ctxt->cur++;
873: } else if (ctxt->cur[0] == '&') {
874: needSubst = 1; /* TODO !!! */
875: ctxt->cur++;
876: } else
877: ctxt->cur++;
878: }
879: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 880: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 881: } else {
882: ret = xmlStrndup(q, ctxt->cur - q);
883: ctxt->cur++;
884: }
885: } else {
1.31 daniel 886: xmlParserError(ctxt, "AttValue: \" or ' expected\n");
1.29 daniel 887: }
888:
889: return(ret);
890: }
891:
892: /*
1.21 daniel 893: * xmlParseSystemLiteral : parse an XML Literal
894: *
1.22 daniel 895: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.21 daniel 896: */
897:
898: CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
899: const CHAR *q;
900: CHAR *ret = NULL;
901:
902: if (ctxt->cur[0] == '"') {
903: ctxt->cur++;
904: q = ctxt->cur;
1.22 daniel 905: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"'))
1.21 daniel 906: ctxt->cur++;
1.22 daniel 907: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 908: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 909: } else {
910: ret = xmlStrndup(q, ctxt->cur - q);
911: ctxt->cur++;
912: }
913: } else if (ctxt->cur[0] == '\'') {
914: ctxt->cur++;
915: q = ctxt->cur;
1.22 daniel 916: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
1.21 daniel 917: ctxt->cur++;
1.22 daniel 918: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 919: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 920: } else {
921: ret = xmlStrndup(q, ctxt->cur - q);
922: ctxt->cur++;
923: }
924: } else {
1.31 daniel 925: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 926: }
927:
928: return(ret);
929: }
930:
931: /*
1.27 daniel 932: * xmlParsePubidLiteral: parse an XML public literal
1.21 daniel 933: *
1.22 daniel 934: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1.21 daniel 935: */
936:
937: CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
938: const CHAR *q;
939: CHAR *ret = NULL;
940: /*
941: * Name ::= (Letter | '_') (NameChar)*
942: */
943: if (ctxt->cur[0] == '"') {
944: ctxt->cur++;
945: q = ctxt->cur;
946: while (IS_PUBIDCHAR(ctxt->cur[0])) ctxt->cur++;
947: if (ctxt->cur[0] != '"') {
1.31 daniel 948: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 949: } else {
950: ret = xmlStrndup(q, ctxt->cur - q);
951: ctxt->cur++;
952: }
953: } else if (ctxt->cur[0] == '\'') {
954: ctxt->cur++;
955: q = ctxt->cur;
956: while ((IS_LETTER(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
957: ctxt->cur++;
958: if (!IS_LETTER(ctxt->cur[0])) {
1.31 daniel 959: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 960: } else {
961: ret = xmlStrndup(q, ctxt->cur - q);
962: ctxt->cur++;
963: }
964: } else {
1.31 daniel 965: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 966: }
967:
968: return(ret);
969: }
970:
971: /*
1.27 daniel 972: * xmlParseCharData: parse a CharData section.
973: * if we are within a CDATA section ']]>' marks an end of section.
974: *
975: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
976: */
977:
978: CHAR *xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
979: const CHAR *q;
980: CHAR *ret = NULL;
981:
982: q = ctxt->cur;
983: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '<') &&
984: (ctxt->cur[0] != '&')) {
985: ctxt->cur++;
986: if ((cdata) && (ctxt->cur[0] == ']') && (ctxt->cur[1] == ']') &&
987: (ctxt->cur[2] == '>')) break;
988: }
989: if (q == ctxt->cur) return(NULL);
990: ret = xmlStrndup(q, ctxt->cur - q);
991: return(ret);
992: }
993:
994: /*
1.22 daniel 995: * xmlParseExternalID: Parse an External ID
996: *
997: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
998: * | 'PUBLIC' S PubidLiteral S SystemLiteral
999: */
1000:
1001: CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **PubidLiteral) {
1002: CHAR *ExternalID = NULL;
1003:
1004: *PubidLiteral = NULL;
1005: if ((ctxt->cur[0] == 'S') && (ctxt->cur[1] == 'Y') &&
1006: (ctxt->cur[2] == 'S') && (ctxt->cur[3] == 'T') &&
1007: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M')) {
1008: ctxt->cur += 6;
1009: SKIP_BLANKS(ctxt->cur);
1010: ExternalID = xmlParseSystemLiteral(ctxt);
1011: if (ExternalID == NULL)
1.31 daniel 1012: xmlParserError(ctxt,
1013: "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
1.22 daniel 1014: } else if ((ctxt->cur[0] == 'P') && (ctxt->cur[1] == 'U') &&
1015: (ctxt->cur[2] == 'B') && (ctxt->cur[3] == 'L') &&
1016: (ctxt->cur[4] == 'I') && (ctxt->cur[5] == 'C')) {
1017: ctxt->cur += 6;
1018: SKIP_BLANKS(ctxt->cur);
1019: *PubidLiteral = xmlParsePubidLiteral(ctxt);
1020: if (*PubidLiteral == NULL)
1.31 daniel 1021: xmlParserError(ctxt,
1022: "xmlParseExternalID: PUBLIC, no PubidLiteral\n");
1.22 daniel 1023: SKIP_BLANKS(ctxt->cur);
1024: ExternalID = xmlParseSystemLiteral(ctxt);
1025: if (ExternalID == NULL)
1.31 daniel 1026: xmlParserError(ctxt,
1027: "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
1.22 daniel 1028: }
1029: return(ExternalID);
1030: }
1031:
1032: /*
1.1 veillard 1033: * Parse and return a string between quotes or doublequotes
1034: */
1.16 daniel 1035: CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.1 veillard 1036: CHAR *ret = NULL;
1.17 daniel 1037: const CHAR *q;
1.1 veillard 1038:
1.16 daniel 1039: if (ctxt->cur[0] == '"') {
1040: ctxt->cur++;
1041: q = ctxt->cur;
1042: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++;
1043: if (ctxt->cur[0] != '"')
1.31 daniel 1044: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1.1 veillard 1045: else {
1.16 daniel 1046: ret = xmlStrndup(q, ctxt->cur - q);
1047: ctxt->cur++;
1.1 veillard 1048: }
1.16 daniel 1049: } else if (ctxt->cur[0] == '\''){
1050: ctxt->cur++;
1051: q = ctxt->cur;
1052: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++;
1053: if (ctxt->cur[0] != '\'')
1.31 daniel 1054: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1.1 veillard 1055: else {
1.16 daniel 1056: ret = xmlStrndup(q, ctxt->cur - q);
1057: ctxt->cur++;
1.1 veillard 1058: }
1059: }
1060: return(ret);
1061: }
1062:
1063: /*
1.3 veillard 1064: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1065: * This may or may not create a node (depending on the context)
1.22 daniel 1066: *
1067: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 1068: */
1.31 daniel 1069: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1070: xmlNodePtr ret = NULL;
1.17 daniel 1071: const CHAR *q, *start;
1072: const CHAR *r;
1.3 veillard 1073:
1074: /*
1.22 daniel 1075: * Check that there is a comment right here.
1.3 veillard 1076: */
1.16 daniel 1077: if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '!') ||
1.31 daniel 1078: (ctxt->cur[2] != '-') || (ctxt->cur[3] != '-')) return(NULL);
1.3 veillard 1079:
1.16 daniel 1080: ctxt->cur += 4;
1081: start = q = ctxt->cur;
1082: ctxt->cur++;
1083: r = ctxt->cur;
1084: ctxt->cur++;
1085: while (IS_CHAR(ctxt->cur[0]) &&
1086: ((ctxt->cur[0] == ':') || (ctxt->cur[0] != '>') ||
1087: (*r != '-') || (*q != '-'))) {
1088: ctxt->cur++;r++;q++;
1.3 veillard 1089: }
1.16 daniel 1090: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 1091: xmlParserError(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.16 daniel 1092: ctxt->cur = start; /* !!! We shouldn't really try to recover !!! */
1.3 veillard 1093: } else {
1.16 daniel 1094: ctxt->cur++;
1.31 daniel 1095: if (create) {
1096: ret = xmlNewComment(xmlStrndup(start, q - start));
1097: }
1.3 veillard 1098: }
1.34 ! daniel 1099: return(NULL);
1.3 veillard 1100: }
1101:
1102: /*
1.13 veillard 1103: * xmlParseNamespace: parse specific '<?namespace ...' constructs.
1.22 daniel 1104: *
1.29 daniel 1105: * TODO !!!!!!!!!!
1106: *
1107: * This is what the older xml-name Working Draft specified, a bunch of
1108: * other stuff may still rely on it, so support is still here as
1109: * if ot was declared on the root of the Tree:-(
1.1 veillard 1110: */
1111:
1.16 daniel 1112: void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.1 veillard 1113: CHAR *href = NULL;
1.34 ! daniel 1114: CHAR *prefix = NULL;
1.3 veillard 1115: int garbage = 0;
1.1 veillard 1116:
1117: /*
1.18 daniel 1118: * We just skipped "namespace" or "xml:namespace"
1.1 veillard 1119: */
1.16 daniel 1120: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1121:
1.16 daniel 1122: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '>')) {
1.1 veillard 1123: /*
1.18 daniel 1124: * We can have "ns" or "prefix" attributes
1125: * Old encoding as 'href' or 'AS' attributes is still supported
1.1 veillard 1126: */
1.18 daniel 1127: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 's')) {
1128: garbage = 0;
1129: ctxt->cur += 2;
1130: SKIP_BLANKS(ctxt->cur);
1131:
1132: if (ctxt->cur[0] != '=') continue;
1133: ctxt->cur++;
1134: SKIP_BLANKS(ctxt->cur);
1135:
1136: href = xmlParseQuotedString(ctxt);
1137: SKIP_BLANKS(ctxt->cur);
1138: } else if ((ctxt->cur[0] == 'h') && (ctxt->cur[1] == 'r') &&
1.16 daniel 1139: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f')) {
1.3 veillard 1140: garbage = 0;
1.16 daniel 1141: ctxt->cur += 4;
1142: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1143:
1.16 daniel 1144: if (ctxt->cur[0] != '=') continue;
1145: ctxt->cur++;
1146: SKIP_BLANKS(ctxt->cur);
1147:
1148: href = xmlParseQuotedString(ctxt);
1149: SKIP_BLANKS(ctxt->cur);
1.18 daniel 1150: } else if ((ctxt->cur[0] == 'p') && (ctxt->cur[1] == 'r') &&
1151: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f') &&
1152: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'x')) {
1153: garbage = 0;
1154: ctxt->cur += 6;
1155: SKIP_BLANKS(ctxt->cur);
1156:
1157: if (ctxt->cur[0] != '=') continue;
1158: ctxt->cur++;
1159: SKIP_BLANKS(ctxt->cur);
1160:
1.34 ! daniel 1161: prefix = xmlParseQuotedString(ctxt);
1.18 daniel 1162: SKIP_BLANKS(ctxt->cur);
1.16 daniel 1163: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'S')) {
1.3 veillard 1164: garbage = 0;
1.16 daniel 1165: ctxt->cur += 2;
1166: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1167:
1.16 daniel 1168: if (ctxt->cur[0] != '=') continue;
1169: ctxt->cur++;
1170: SKIP_BLANKS(ctxt->cur);
1171:
1.34 ! daniel 1172: prefix = xmlParseQuotedString(ctxt);
1.16 daniel 1173: SKIP_BLANKS(ctxt->cur);
1174: } else if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
1.3 veillard 1175: garbage = 0;
1.16 daniel 1176: ctxt->cur ++;
1.1 veillard 1177: } else {
1.3 veillard 1178: /*
1179: * Found garbage when parsing the namespace
1180: */
1.31 daniel 1181: if (!garbage)
1182: xmlParserError(ctxt, "xmlParseNamespace found garbage\n");
1.16 daniel 1183: ctxt->cur++;
1.1 veillard 1184: }
1185: }
1186:
1.16 daniel 1187: MOVETO_ENDTAG(ctxt->cur);
1188: ctxt->cur++;
1.1 veillard 1189:
1190: /*
1191: * Register the DTD.
1192: */
1193: if (href != NULL)
1.34 ! daniel 1194: xmlNewNs(ctxt->doc, href, prefix);
1.1 veillard 1195:
1.34 ! daniel 1196: if (prefix != NULL) free(prefix);
1.8 veillard 1197: if (href != NULL) free(href);
1.1 veillard 1198: }
1199:
1200: /*
1.22 daniel 1201: * xmlParsePITarget: parse the name of a PI
1202: *
1203: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1204: */
1205:
1206: CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1207: CHAR *name;
1208:
1209: name = xmlParseName(ctxt);
1210: if ((name != NULL) && (name[3] == 0) &&
1211: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1212: ((name[1] == 'm') || (name[1] == 'M')) &&
1213: ((name[2] == 'l') || (name[2] == 'L'))) {
1214: xmlParserError(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1215: return(NULL);
1216: }
1217: return(name);
1218: }
1219:
1220: /*
1.3 veillard 1221: * xmlParsePI: parse an XML Processing Instruction.
1.22 daniel 1222: *
1223: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.3 veillard 1224: */
1225:
1.16 daniel 1226: void xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1227: CHAR *target;
1228:
1.16 daniel 1229: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
1.3 veillard 1230: /*
1231: * this is a Processing Instruction.
1232: */
1.16 daniel 1233: ctxt->cur += 2;
1.3 veillard 1234:
1235: /*
1.22 daniel 1236: * Parse the target name and check for special support like
1237: * namespace.
1238: *
1239: * TODO : PI handling should be dynamically redefinable using an
1240: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1241: */
1.22 daniel 1242: target = xmlParsePITarget(ctxt);
1243: if (target != NULL) {
1244: /*
1245: * Support for the Processing Instruction related to namespace.
1246: */
1247: if ((target[0] == 'n') && (target[1] == 'a') &&
1248: (target[2] == 'm') && (target[3] == 'e') &&
1249: (target[4] == 's') && (target[5] == 'p') &&
1250: (target[6] == 'a') && (target[7] == 'c') &&
1251: (target[8] == 'e')) {
1252: xmlParseNamespace(ctxt);
1253: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1254: (target[2] == 'l') && (target[3] == ':') &&
1255: (target[4] == 'n') && (target[5] == 'a') &&
1256: (target[6] == 'm') && (target[7] == 'e') &&
1257: (target[8] == 's') && (target[9] == 'p') &&
1258: (target[10] == 'a') && (target[11] == 'c') &&
1259: (target[12] == 'e')) {
1260: xmlParseNamespace(ctxt);
1261: } else {
1262: /* Unknown PI, ignore it ! */
1.31 daniel 1263: xmlParserError(ctxt, "xmlParsePI : skipping unknown PI %s\n",
1264: target);
1.22 daniel 1265: while (IS_CHAR(ctxt->cur[0]) &&
1.24 daniel 1266: ((ctxt->cur[0] != '?') || (ctxt->cur[1] != '>')))
1.22 daniel 1267: ctxt->cur++;
1268: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 1269: xmlParserError(ctxt, "xmlParsePI: PI %s never end ...\n",
1270: target);
1.24 daniel 1271: } else
1272: ctxt->cur += 2;
1.22 daniel 1273: }
1.3 veillard 1274: } else {
1.31 daniel 1275: xmlParserError(ctxt, "xmlParsePI : no target name\n");
1.22 daniel 1276: /********* Should we try to complete parsing the PI ???
1277: while (IS_CHAR(ctxt->cur[0]) &&
1278: (ctxt->cur[0] != '?') && (ctxt->cur[0] != '>'))
1279: ctxt->cur++;
1280: if (!IS_CHAR(ctxt->cur[0])) {
1281: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1282: target);
1283: }
1284: ********************************************************/
1285: }
1286: }
1287: }
1288:
1289: /*
1290: * xmlParseNotationDecl: parse a notation declaration
1291: *
1292: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1293: *
1294: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1295: *
1296: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1297: * 'PUBLIC' S PubidLiteral S SystemLiteral
1298: *
1299: * Hence there is actually 3 choices:
1300: * 'PUBLIC' S PubidLiteral
1301: * 'PUBLIC' S PubidLiteral S SystemLiteral
1302: * and 'SYSTEM' S SystemLiteral
1303: */
1304:
1305: void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1306: CHAR *name;
1307:
1308: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1309: (ctxt->cur[2] == 'N') && (ctxt->cur[3] == 'O') &&
1310: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'A') &&
1311: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'I') &&
1312: (ctxt->cur[8] == 'O') && (ctxt->cur[9] == 'N') &&
1313: (IS_BLANK(ctxt->cur[10]))) {
1314: ctxt->cur += 10;
1315: SKIP_BLANKS(ctxt->cur);
1316:
1317: name = xmlParseName(ctxt);
1318: if (name == NULL) {
1.31 daniel 1319: xmlParserError(ctxt,
1320: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1321: return;
1322: }
1323: SKIP_BLANKS(ctxt->cur);
1324: /*
1.31 daniel 1325: * TODO !!!
1.22 daniel 1326: */
1327: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1328: ctxt->cur++;
1329: free(name);
1330: }
1331: }
1332:
1333: /*
1334: * xmlParseEntityDecl: parse <!ENTITY declarations
1335: *
1336: * [70] EntityDecl ::= GEDecl | PEDecl
1337: *
1338: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1339: *
1340: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1341: *
1342: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1343: *
1344: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1345: *
1346: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1347: */
1348:
1349: void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1350: CHAR *name;
1.24 daniel 1351: CHAR *value = NULL;
1352: CHAR *id = NULL, *literal = NULL;
1353: CHAR *ndata = NULL;
1.22 daniel 1354: int typePEDef = 0;
1355:
1356: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1357: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'N') &&
1358: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
1359: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'Y') &&
1360: (IS_BLANK(ctxt->cur[8]))) {
1361: ctxt->cur += 8;
1362: SKIP_BLANKS(ctxt->cur);
1363:
1364: if (ctxt->cur[0] == '%') {
1.16 daniel 1365: ctxt->cur++;
1.22 daniel 1366: SKIP_BLANKS(ctxt->cur);
1367: typePEDef = 1;
1368: }
1369:
1370: name = xmlParseName(ctxt);
1.24 daniel 1371: if (name == NULL) {
1.31 daniel 1372: xmlParserError(ctxt, "xmlParseEntityDecl: no name\n");
1.24 daniel 1373: return;
1374: }
1375: SKIP_BLANKS(ctxt->cur);
1376:
1.22 daniel 1377: /*
1.24 daniel 1378: * TODO handle the various case of definitions...
1.22 daniel 1379: */
1.24 daniel 1380: if (typePEDef) {
1381: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
1382: value = xmlParseEntityValue(ctxt);
1383: else {
1384: id = xmlParseExternalID(ctxt, &literal);
1385: }
1386: } else {
1387: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
1388: value = xmlParseEntityValue(ctxt);
1389: else {
1390: id = xmlParseExternalID(ctxt, &literal);
1391: SKIP_BLANKS(ctxt->cur);
1392: if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'D') &&
1393: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1394: (ctxt->cur[4] == 'A')) {
1395: ndata = xmlParseName(ctxt);
1396: }
1397: }
1398: }
1399: SKIP_BLANKS(ctxt->cur);
1400: if (ctxt->cur[0] != '>') {
1.31 daniel 1401: xmlParserError(ctxt,
1402: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.24 daniel 1403: } else
1.22 daniel 1404: ctxt->cur++;
1405: }
1406: }
1407:
1408: /*
1409: * xmlParseEnumeratedType: parse and Enumerated attribute type.
1410: *
1411: * [57] EnumeratedType ::= NotationType | Enumeration
1412: *
1413: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1414: *
1415: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1416: */
1417:
1418: void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1419: /*
1420: * TODO !!!
1421: */
1422: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1423: ctxt->cur++;
1424: }
1425:
1426: /*
1427: * xmlParseAttributeType: parse the Attribute list def for an element
1428: *
1429: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1430: *
1431: * [55] StringType ::= 'CDATA'
1432: *
1433: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1434: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1435: */
1436: void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1437: if ((ctxt->cur[0] == 'C') && (ctxt->cur[1] == 'D') &&
1438: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1439: (ctxt->cur[4] == 'A')) {
1440: ctxt->cur += 5;
1441: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D')) {
1442: ctxt->cur += 2;
1443: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
1444: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
1445: (ctxt->cur[4] == 'F')) {
1446: ctxt->cur += 5;
1447: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
1448: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
1449: (ctxt->cur[4] == 'F') && (ctxt->cur[5] == 'S')) {
1450: ctxt->cur += 6;
1451: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
1452: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
1453: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'Y')) {
1454: ctxt->cur += 6;
1455: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
1456: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
1457: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
1458: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'S')) {
1459: ctxt->cur += 8;
1460: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
1461: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
1462: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
1463: (ctxt->cur[6] == 'N')) {
1464: ctxt->cur += 7;
1465: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
1466: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
1467: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
1468: (ctxt->cur[6] == 'N') && (ctxt->cur[7] == 'S')) {
1469: } else {
1470: xmlParseEnumeratedType(ctxt, name);
1471: }
1472: }
1473:
1474: /*
1475: * xmlParseAttributeListDecl: parse the Attribute list def for an element
1476: *
1477: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1478: *
1479: * [53] AttDef ::= S Name S AttType S DefaultDecl
1480: */
1481: void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1482: CHAR *name;
1483:
1484: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1485: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1486: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'L') &&
1487: (ctxt->cur[6] == 'I') && (ctxt->cur[7] == 'S') &&
1488: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
1489: ctxt->cur += 9;
1490: SKIP_BLANKS(ctxt->cur);
1491: name = xmlParseName(ctxt);
1492: if (name == NULL) {
1.31 daniel 1493: xmlParserError(ctxt,
1494: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1495: return;
1496: }
1497: SKIP_BLANKS(ctxt->cur);
1498: while (ctxt->cur[0] != '>') {
1499: const CHAR *check = ctxt->cur;
1500:
1501: xmlParseAttributeType(ctxt, name);
1502: SKIP_BLANKS(ctxt->cur);
1503: if (check == ctxt->cur) {
1.31 daniel 1504: xmlParserError(ctxt,
1505: "xmlParseAttributeListDecl: detected error\n");
1.22 daniel 1506: break;
1507: }
1508: }
1509: if (ctxt->cur[0] == '>')
1510: ctxt->cur++;
1511:
1512: free(name);
1513: }
1514: }
1515:
1516: /*
1517: * xmlParseElementContentDecl: parse the declaration for an Element content
1518: * either Mixed or Children, the cases EMPTY and ANY being handled
1519: * int xmlParseElementDecl.
1520: *
1521: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
1522: *
1523: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1524: *
1525: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
1526: *
1527: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
1528: *
1529: * or
1530: *
1531: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
1532: * '(' S? '#PCDATA' S? ')'
1533: */
1534:
1535: void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1536: /*
1537: * TODO This has to be parsed correctly, currently we just skip until
1538: * we reach the first '>'.
1.31 daniel 1539: * !!!
1.22 daniel 1540: */
1541: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1542: ctxt->cur++;
1543: }
1544:
1545: /*
1546: * xmlParseElementDecl: parse an Element declaration.
1547: *
1548: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1549: *
1550: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1551: *
1552: * TODO There is a check [ VC: Unique Element Type Declaration ]
1553: */
1554: void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1555: CHAR *name;
1556:
1557: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1558: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'L') &&
1559: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M') &&
1560: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'N') &&
1561: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
1562: ctxt->cur += 9;
1563: SKIP_BLANKS(ctxt->cur);
1564: name = xmlParseName(ctxt);
1565: if (name == NULL) {
1.31 daniel 1566: xmlParserError(ctxt, "xmlParseElementDecl: no name for Element\n");
1.22 daniel 1567: return;
1568: }
1569: SKIP_BLANKS(ctxt->cur);
1570: if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'M') &&
1571: (ctxt->cur[2] == 'P') && (ctxt->cur[3] == 'T') &&
1572: (ctxt->cur[4] == 'Y')) {
1573: ctxt->cur += 5;
1574: /*
1575: * Element must always be empty.
1576: */
1577: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'N') &&
1578: (ctxt->cur[2] == 'Y')) {
1579: ctxt->cur += 3;
1580: /*
1581: * Element is a generic container.
1582: */
1583: } else {
1584: xmlParseElementContentDecl(ctxt, name);
1585: }
1586: SKIP_BLANKS(ctxt->cur);
1587: if (ctxt->cur[0] != '>') {
1.31 daniel 1588: xmlParserError(ctxt,
1589: "xmlParseElementDecl: expected '>' at the end\n");
1.22 daniel 1590: } else
1591: ctxt->cur++;
1592: }
1593: }
1594:
1595: /*
1596: * xmlParseMarkupDecl: parse Markup declarations
1597: *
1598: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
1599: * NotationDecl | PI | Comment
1600: *
1601: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
1602: */
1603: void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1604: xmlParseElementDecl(ctxt);
1605: xmlParseAttributeListDecl(ctxt);
1606: xmlParseEntityDecl(ctxt);
1607: xmlParseNotationDecl(ctxt);
1608: xmlParsePI(ctxt);
1.31 daniel 1609: xmlParseComment(ctxt, 0);
1.22 daniel 1610: }
1611:
1612: /*
1.24 daniel 1613: * xmlParseCharRef: parse Reference declarations
1614: *
1615: * [66] CharRef ::= '&#' [0-9]+ ';' |
1616: * '&#x' [0-9a-fA-F]+ ';'
1617: */
1618: CHAR xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 1619: int val = 0;
1.24 daniel 1620: CHAR ret = 0;
1621:
1622: if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#') &&
1623: (ctxt->cur[2] == 'x')) {
1624: ctxt->cur += 3;
1625: while (ctxt->cur[0] != ';') {
1626: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
1.29 daniel 1627: val = val * 16 + (ctxt->cur[0] - '0');
1.24 daniel 1628: else if ((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'f'))
1.29 daniel 1629: val = val * 16 + (ctxt->cur[0] - 'a') + 10;
1.24 daniel 1630: else if ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'F'))
1.29 daniel 1631: val = val * 16 + (ctxt->cur[0] - 'A') + 10;
1.24 daniel 1632: else {
1.31 daniel 1633: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1634: val = 0;
1.24 daniel 1635: break;
1636: }
1637: }
1638: if (ctxt->cur[0] != ';')
1639: ctxt->cur++;
1640: } else if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#')) {
1641: ctxt->cur += 2;
1642: while (ctxt->cur[0] != ';') {
1643: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
1.29 daniel 1644: val = val * 16 + (ctxt->cur[0] - '0');
1.24 daniel 1645: else {
1.31 daniel 1646: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1647: val = 0;
1.24 daniel 1648: break;
1649: }
1650: }
1651: if (ctxt->cur[0] != ';')
1652: ctxt->cur++;
1653: } else {
1.31 daniel 1654: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.24 daniel 1655: }
1.29 daniel 1656: /*
1657: * Check the value IS_CHAR ...
1658: */
1659: if (IS_CHAR(val))
1660: ret = (CHAR) val;
1661: else {
1662: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
1663: ctxt->cur - 10);
1664:
1665: ret = '?';
1666: }
1.24 daniel 1667: return(ret);
1668: }
1669:
1670: /*
1671: * xmlParseEntityRef: parse ENTITY references declarations
1672: *
1673: * [68] EntityRef ::= '&' Name ';'
1674: */
1675: CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1676: CHAR *name;
1677:
1678: if (ctxt->cur[0] == '&') {
1679: ctxt->cur++;
1680: name = xmlParseName(ctxt);
1681: if (name == NULL) {
1.31 daniel 1682: xmlParserError(ctxt, "xmlParseEntityRef: no name\n");
1.24 daniel 1683: } else {
1684: if (ctxt->cur[0] == ';') {
1685: ctxt->cur++;
1686: /*
1687: * TODO there is a VC check here !!!
1688: * [ VC: Entity Declared ]
1689: */
1690: free(name);
1691: } else {
1.31 daniel 1692: xmlParserError(ctxt, "xmlParseEntityRef: expecting ';'\n");
1.24 daniel 1693: }
1694: }
1695: }
1.25 daniel 1696: return(NULL); /* TODO !!!! */
1.24 daniel 1697: }
1698:
1699: /*
1700: * xmlParseReference: parse Reference declarations
1701: *
1702: * [67] Reference ::= EntityRef | CharRef
1703: */
1704: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt) {
1705: CHAR *name;
1706:
1707: if (ctxt->cur[0] == '&') {
1708: return(xmlParseEntityRef(ctxt));
1709: } else {
1710: ctxt->cur++;
1711: name = xmlParseName(ctxt);
1712: if (name == NULL) {
1.31 daniel 1713: xmlParserError(ctxt, "xmlParseReference: no name\n");
1.24 daniel 1714: } else {
1715: if (ctxt->cur[0] == ';') {
1716: ctxt->cur++;
1717: /*
1718: * TODO there is a VC check here !!!
1719: * [ VC: Entity Declared ]
1720: */
1721: free(name);
1722: } else {
1.31 daniel 1723: xmlParserError(ctxt, "xmlParseReference: expecting ';'\n");
1.24 daniel 1724: }
1725: }
1726: }
1.25 daniel 1727: return(NULL); /* TODO !!!! */
1.24 daniel 1728: }
1729:
1730: /*
1.22 daniel 1731: * xmlParsePEReference: parse PEReference declarations
1732: *
1733: * [69] PEReference ::= '%' Name ';'
1734: */
1.24 daniel 1735: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 1736: CHAR *name;
1737:
1738: if (ctxt->cur[0] == '%') {
1739: ctxt->cur++;
1740: name = xmlParseName(ctxt);
1741: if (name == NULL) {
1.31 daniel 1742: xmlParserError(ctxt, "xmlParsePEReference: no name\n");
1.22 daniel 1743: } else {
1744: if (ctxt->cur[0] == ';') {
1745: ctxt->cur++;
1746: /*
1747: * TODO there is a VC check here !!!
1748: * [ VC: Entity Declared ]
1749: */
1750: free(name);
1751: } else {
1.31 daniel 1752: xmlParserError(ctxt, "xmlParsePEReference: expecting ';'\n");
1.22 daniel 1753: }
1.3 veillard 1754: }
1755: }
1.25 daniel 1756: return(NULL); /* TODO !!!! */
1.3 veillard 1757: }
1758:
1759: /*
1.21 daniel 1760: * xmlParseDocTypeDecl : parse a DOCTYPE declaration
1761: *
1.22 daniel 1762: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
1763: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 1764: */
1765:
1766: void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1767: CHAR *name;
1768: CHAR *ExternalID = NULL;
1.22 daniel 1769: CHAR *SystemID = NULL;
1.21 daniel 1770:
1771: /*
1772: * We know that '<!DOCTYPE' has been detected.
1773: */
1774: ctxt->cur += 9;
1775:
1776: SKIP_BLANKS(ctxt->cur);
1777:
1778: /*
1779: * Parse the DOCTYPE name.
1780: */
1781: name = xmlParseName(ctxt);
1782: if (name == NULL) {
1.31 daniel 1783: xmlParserError(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.21 daniel 1784: }
1785:
1786: SKIP_BLANKS(ctxt->cur);
1787:
1788: /*
1.22 daniel 1789: * Check for SystemID and ExternalID
1790: */
1791: SystemID = xmlParseExternalID(ctxt, &ExternalID);
1792: SKIP_BLANKS(ctxt->cur);
1793:
1794: /*
1795: * Is there any DTD definition ?
1796: */
1797: if (ctxt->cur[0] == '[') {
1798: ctxt->cur++;
1799: /*
1800: * Parse the succession of Markup declarations and
1801: * PEReferences.
1802: * Subsequence (markupdecl | PEReference | S)*
1803: */
1804: while (ctxt->cur[0] != ']') {
1805: const CHAR *check = ctxt->cur;
1806:
1807: SKIP_BLANKS(ctxt->cur);
1808: xmlParseMarkupDecl(ctxt);
1809: xmlParsePEReference(ctxt);
1810:
1811: if (ctxt->cur == check) {
1.31 daniel 1812: xmlParserError(ctxt,
1813: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.22 daniel 1814: break;
1815: }
1816: }
1817: if (ctxt->cur[0] == ']') ctxt->cur++;
1818: }
1819:
1820: /*
1821: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 1822: */
1.22 daniel 1823: if (ctxt->cur[0] != '>') {
1.31 daniel 1824: xmlParserError(ctxt, "DOCTYPE unproperly terminated\n");
1.22 daniel 1825: /* We shouldn't try to resynchronize ... */
1.21 daniel 1826: }
1.22 daniel 1827: ctxt->cur++;
1828:
1829: /*
1830: * Cleanup, since we don't use all those identifiers
1831: * TODO : the DOCTYPE if available should be stored !
1832: */
1833: if (SystemID != NULL) free(SystemID);
1834: if (ExternalID != NULL) free(ExternalID);
1835: if (name != NULL) free(name);
1.21 daniel 1836: }
1837:
1838: /*
1.3 veillard 1839: * xmlParseAttribute: parse a start of tag.
1840: *
1.22 daniel 1841: * [41] Attribute ::= Name Eq AttValue
1842: *
1843: * [25] Eq ::= S? '=' S?
1844: *
1.29 daniel 1845: * With namespace:
1846: *
1847: * [NS 11] Attribute ::= QName Eq AttValue
1.3 veillard 1848: */
1849:
1.16 daniel 1850: void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 1851: CHAR *name, *value = NULL;
1.29 daniel 1852: CHAR *ns;
1.3 veillard 1853:
1.29 daniel 1854: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 1855: if (name == NULL) {
1.31 daniel 1856: xmlParserError(ctxt, "error parsing attribute name\n");
1.29 daniel 1857: return;
1.3 veillard 1858: }
1.22 daniel 1859: /*
1860: * TODO: Check for Namespace ...
1861: */
1.29 daniel 1862: if (ns != NULL) {
1.31 daniel 1863: xmlParserError(ctxt,
1864: "Internal: xmlParseAttribute: don't handle attributes namespace\n");
1.29 daniel 1865: free(ns);
1866: }
1.3 veillard 1867:
1868: /*
1.29 daniel 1869: * read the value
1.3 veillard 1870: */
1.16 daniel 1871: SKIP_BLANKS(ctxt->cur);
1872: if (ctxt->cur[0] == '=') {
1873: ctxt->cur++;
1874: SKIP_BLANKS(ctxt->cur);
1.29 daniel 1875: value = xmlParseAttValue(ctxt);
1876: } else {
1.31 daniel 1877: xmlParserError(ctxt, "Specification mandate value for attribute %s\n",
1878: name);
1.3 veillard 1879: }
1880:
1881: /*
1882: * Add the attribute to the node.
1883: */
1.17 daniel 1884: if (name != NULL) {
1.3 veillard 1885: xmlNewProp(node, name, value);
1.17 daniel 1886: free(name);
1887: }
1.29 daniel 1888: if (value != NULL)
1.17 daniel 1889: free(value);
1.3 veillard 1890: }
1891:
1892: /*
1.29 daniel 1893: * xmlParseStartTag: parse a start of tag either for rule element or
1894: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 1895: *
1896: * [40] STag ::= '<' Name (S Attribute)* S? '>'
1897: *
1.29 daniel 1898: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1899: *
1900: * With namespace:
1901: *
1902: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
1903: *
1904: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.2 veillard 1905: */
1906:
1.16 daniel 1907: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.34 ! daniel 1908: CHAR *namespace, *name;
! 1909: xmlNsPtr ns = NULL;
1.2 veillard 1910: xmlNodePtr ret = NULL;
1911:
1.16 daniel 1912: if (ctxt->cur[0] != '<') return(NULL);
1913: ctxt->cur++;
1.3 veillard 1914:
1.34 ! daniel 1915: name = xmlNamespaceParseQName(ctxt, &namespace);
! 1916: if (namespace != NULL) {
1.3 veillard 1917: /*
1918: * Search the DTD associated to ns.
1919: */
1.34 ! daniel 1920: ns = xmlSearchNs(ctxt->doc, namespace);
! 1921: if (ns == NULL)
! 1922: xmlParserError(ctxt, "Start tag : Couldn't find namespace %s\n",
! 1923: namespace);
! 1924: free(namespace);
1.29 daniel 1925: }
1.3 veillard 1926:
1.34 ! daniel 1927: ret = xmlNewNode(ns, name, NULL);
1.2 veillard 1928:
1.3 veillard 1929: /*
1930: * Now parse the attributes, it ends up with the ending
1931: *
1932: * (S Attribute)* S?
1933: */
1.16 daniel 1934: SKIP_BLANKS(ctxt->cur);
1935: while ((IS_CHAR(ctxt->cur[0])) &&
1936: (ctxt->cur[0] != '>') &&
1937: ((ctxt->cur[0] != '/') || (ctxt->cur[1] != '>'))) {
1.29 daniel 1938: const CHAR *q = ctxt->cur;
1939:
1940: xmlParseAttribute(ctxt, ret);
1941: SKIP_BLANKS(ctxt->cur);
1942:
1943: if (q == ctxt->cur) {
1.31 daniel 1944: xmlParserError(ctxt,
1945: "xmlParseStartTag: problem parsing attributes\n");
1.29 daniel 1946: break;
1.3 veillard 1947: }
1948: }
1949:
1950: return(ret);
1951: }
1952:
1953: /*
1.27 daniel 1954: * xmlParseEndTag: parse an end of tag
1955: *
1956: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 1957: *
1958: * With namespace
1959: *
1960: * [9] ETag ::= '</' QName S? '>'
1.7 veillard 1961: */
1962:
1.34 ! daniel 1963: void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
! 1964: CHAR *namespace, *name;
! 1965: xmlNsPtr ns = NULL;
1.7 veillard 1966:
1.34 ! daniel 1967: *nsPtr = NULL;
1.7 veillard 1968: *tagPtr = NULL;
1969:
1.27 daniel 1970: if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/')) {
1.31 daniel 1971: xmlParserError(ctxt, "xmlParseEndTag: '</' not found\n");
1.27 daniel 1972: return;
1973: }
1974: ctxt->cur += 2;
1.7 veillard 1975:
1.34 ! daniel 1976: name = xmlNamespaceParseQName(ctxt, &namespace);
! 1977: if (namespace != NULL) {
1.7 veillard 1978: /*
1979: * Search the DTD associated to ns.
1980: */
1.34 ! daniel 1981: ns = xmlSearchNs(ctxt->doc, namespace);
! 1982: if (ns == NULL)
! 1983: xmlParserError(ctxt, "End tag : Couldn't find namespace %s\n",
! 1984: namespace);
! 1985: free(namespace);
1.29 daniel 1986: }
1.7 veillard 1987:
1.34 ! daniel 1988: *nsPtr = ns;
1.7 veillard 1989: *tagPtr = name;
1990:
1991: /*
1992: * We should definitely be at the ending "S? '>'" part
1993: */
1.16 daniel 1994: SKIP_BLANKS(ctxt->cur);
1995: if ((!IS_CHAR(ctxt->cur[0])) || (ctxt->cur[0] != '>')) {
1.31 daniel 1996: xmlParserError(ctxt, "End tag : expected '>'\n");
1.7 veillard 1997: } else
1.16 daniel 1998: ctxt->cur++;
1.7 veillard 1999:
2000: return;
2001: }
2002:
2003: /*
1.3 veillard 2004: * xmlParseCDSect: escaped pure raw content.
1.29 daniel 2005: *
2006: * [18] CDSect ::= CDStart CData CDEnd
2007: *
2008: * [19] CDStart ::= '<![CDATA['
2009: *
2010: * [20] Data ::= (Char* - (Char* ']]>' Char*))
2011: *
2012: * [21] CDEnd ::= ']]>'
1.3 veillard 2013: */
1.16 daniel 2014: CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 2015: const CHAR *r, *s, *base;
2016: CHAR *ret;
1.3 veillard 2017:
1.29 daniel 2018: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2019: (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
2020: (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
2021: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
2022: (ctxt->cur[8] == '[')) {
2023: ctxt->cur += 9;
2024: } else
2025: return(NULL);
1.16 daniel 2026: base = ctxt->cur;
2027: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 2028: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2029: return(NULL);
2030: }
1.16 daniel 2031: r = ctxt->cur++;
2032: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 2033: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2034: return(NULL);
2035: }
1.16 daniel 2036: s = ctxt->cur++;
2037: while (IS_CHAR(ctxt->cur[0]) &&
2038: ((*r != ']') || (*s != ']') || (ctxt->cur[0] != '>'))) {
2039: r++;s++;ctxt->cur++;
1.3 veillard 2040: }
1.16 daniel 2041: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 2042: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2043: return(NULL);
2044: }
1.16 daniel 2045: ret = xmlStrndup(base, ctxt->cur-base);
2046:
1.2 veillard 2047: return(ret);
2048: }
2049:
2050: /*
2051: * xmlParseContent: a content is
2052: * (element | PCData | Reference | CDSect | PI | Comment)
2053: *
1.27 daniel 2054: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 2055: */
2056:
1.27 daniel 2057: void xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 2058: const CHAR *q;
2059: CHAR *data = NULL;
1.2 veillard 2060: xmlNodePtr ret = NULL;
2061:
1.27 daniel 2062: while ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/')) {
1.34 ! daniel 2063: const CHAR *test = ctxt->cur;
1.27 daniel 2064: ret = NULL;
2065: data = NULL;
2066:
2067: /*
2068: * First case : a Processing Instruction.
2069: */
2070: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
2071: xmlParsePI(ctxt);
2072: }
2073: /*
2074: * Second case : a CDSection
2075: */
2076: else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2077: (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
2078: (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
2079: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
2080: (ctxt->cur[8] == '[')) {
2081: data = xmlParseCDSect(ctxt);
2082: }
2083: /*
2084: * Third case : a comment
2085: */
2086: else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2087: (ctxt->cur[2] == '-') && (ctxt->cur[3] == '-')) {
1.31 daniel 2088: ret = xmlParseComment(ctxt, 1);
1.27 daniel 2089: }
2090: /*
2091: * Fourth case : a sub-element.
2092: */
2093: else if (ctxt->cur[0] == '<') {
2094: ret = xmlParseElement(ctxt);
2095: }
2096: /*
2097: * Last case, text. Note that References are handled directly.
2098: */
2099: else {
2100: q = ctxt->cur;
2101: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '<')) ctxt->cur++;
2102:
2103: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 2104: xmlParserError(ctxt, "Truncated content\n%.50s\n", q);
1.27 daniel 2105: return;
2106: }
1.3 veillard 2107:
1.27 daniel 2108: /*
2109: * Do the Entities decoding...
2110: */
2111: data = xmlStrdup(xmlDecodeEntities(ctxt->doc, q, ctxt->cur - q));
1.3 veillard 2112: }
1.14 veillard 2113:
2114: /*
1.27 daniel 2115: * Handle the data if any. If there is no child
2116: * add it as content, otherwise create a new node of type text.
1.14 veillard 2117: */
1.27 daniel 2118: if (data != NULL)
2119: data = xmlHandleData(data);
2120: if (data != NULL) {
2121: if (node->childs == NULL)
2122: xmlNodeSetContent(node, data);
2123: else
2124: ret = xmlNewText(data);
2125: free(data);
2126: }
2127: if (ret != NULL)
2128: xmlAddChild(node, ret);
1.29 daniel 2129: if (test == ctxt->cur) {
1.31 daniel 2130: xmlParserError(ctxt, "detected an error in element content\n");
1.29 daniel 2131: break;
2132: }
1.3 veillard 2133: }
1.2 veillard 2134: }
2135:
2136: /*
2137: * xmlParseElement: parse an XML element
1.26 daniel 2138: *
2139: * [39] element ::= EmptyElemTag | STag content ETag
2140: *
2141: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 2142: */
1.26 daniel 2143:
1.2 veillard 2144:
1.16 daniel 2145: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 2146: xmlNodePtr ret;
1.17 daniel 2147: const CHAR *openTag = ctxt->cur;
1.32 daniel 2148: xmlParserNodeInfo node_info;
1.27 daniel 2149: CHAR *endTag;
1.34 ! daniel 2150: xmlNsPtr endNs;
1.2 veillard 2151:
1.32 daniel 2152: /* Capture start position */
2153: node_info.begin_pos = ctxt->cur - ctxt->base;
2154: node_info.begin_line = ctxt->line;
2155:
1.16 daniel 2156: ret = xmlParseStartTag(ctxt);
1.3 veillard 2157: if (ret == NULL) {
2158: return(NULL);
2159: }
1.2 veillard 2160:
2161: /*
2162: * Check for an Empty Element.
2163: */
1.16 daniel 2164: if ((ctxt->cur[0] == '/') && (ctxt->cur[1] == '>')) {
2165: ctxt->cur += 2;
1.2 veillard 2166: return(ret);
2167: }
1.16 daniel 2168: if (ctxt->cur[0] == '>') ctxt->cur++;
1.2 veillard 2169: else {
1.31 daniel 2170: xmlParserError(ctxt, "Couldn't find end of Start Tag\n%.30s\n",openTag);
1.16 daniel 2171: return(NULL);
1.2 veillard 2172: }
2173:
2174: /*
2175: * Parse the content of the element:
2176: */
1.27 daniel 2177: xmlParseContent(ctxt, ret);
1.16 daniel 2178: if (!IS_CHAR(ctxt->cur[0])) {
1.31 daniel 2179: xmlParserError(ctxt, "Premature end of data in tag %.30s\n%.30s\n",
2180: openTag);
1.16 daniel 2181: return(NULL);
1.2 veillard 2182: }
2183:
2184: /*
1.27 daniel 2185: * parse the end of tag: '</' should be here.
1.2 veillard 2186: */
1.34 ! daniel 2187: xmlParseEndTag(ctxt, &endNs, &endTag);
1.7 veillard 2188:
1.27 daniel 2189: /*
2190: * Check that the Name in the ETag is the same as in the STag.
2191: */
1.34 ! daniel 2192: if (endNs != ret->ns) {
1.31 daniel 2193: xmlParserError(ctxt,
2194: "Start and End tags don't use the same DTD\n%.30s\n%.30s\n",
2195: openTag, endTag);
1.27 daniel 2196: }
1.32 daniel 2197: if (endTag == NULL ) {
2198: xmlParserError(ctxt, "The End tag has no name\n%.30s\n", openTag);
2199: } else if (strcmp(ret->name, endTag)) {
1.31 daniel 2200: xmlParserError(ctxt,
2201: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
2202: openTag, endTag);
1.27 daniel 2203: }
1.7 veillard 2204:
1.27 daniel 2205: if ( endTag != NULL )
2206: free(endTag);
1.2 veillard 2207:
1.32 daniel 2208: /* Capture end position and add node */
2209: if ( ret != NULL && ctxt->record_info ) {
2210: node_info.end_pos = ctxt->cur - ctxt->base;
2211: node_info.end_line = ctxt->line;
2212: node_info.node = ret;
2213: xmlParserAddNodeInfo(ctxt, &node_info);
2214: }
1.2 veillard 2215: return(ret);
2216: }
2217:
2218: /*
1.29 daniel 2219: * xmlParseVersionNum: parse the XML version value.
2220: *
2221: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
2222: */
2223: CHAR *xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
2224: const CHAR *q = ctxt->cur;
2225: CHAR *ret;
2226:
2227: while (IS_CHAR(ctxt->cur[0]) &&
2228: (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2229: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z')) ||
2230: ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9')) ||
1.31 daniel 2231: (ctxt->cur[0] == '_') || (ctxt->cur[0] == '.') ||
2232: (ctxt->cur[0] == ':') || (ctxt->cur[0] == '-'))) ctxt->cur++;
1.29 daniel 2233: ret = xmlStrndup(q, ctxt->cur - q);
2234: return(ret);
2235: }
2236:
2237: /*
2238: * xmlParseVersionInfo: parse the XML version.
2239: *
2240: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
2241: *
2242: * [25] Eq ::= S? '=' S?
2243: */
2244:
2245: CHAR *xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
2246: CHAR *version = NULL;
2247: const CHAR *q;
2248:
2249: if ((ctxt->cur[0] == 'v') && (ctxt->cur[1] == 'e') &&
2250: (ctxt->cur[2] == 'r') && (ctxt->cur[3] == 's') &&
2251: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'o') &&
2252: (ctxt->cur[6] == 'n')) {
2253: ctxt->cur += 7;
2254: SKIP_BLANKS(ctxt->cur);
1.31 daniel 2255: if (ctxt->cur[0] != '=') {
2256: xmlParserError(ctxt, "xmlParseVersionInfo : expected '='\n");
2257: return(NULL);
2258: }
2259: ctxt->cur++;
2260: SKIP_BLANKS(ctxt->cur);
1.29 daniel 2261: if (ctxt->cur[0] == '"') {
2262: ctxt->cur++;
2263: q = ctxt->cur;
2264: version = xmlParseVersionNum(ctxt);
1.31 daniel 2265: if (ctxt->cur[0] != '"')
2266: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2267: else
2268: ctxt->cur++;
1.31 daniel 2269: } else if (ctxt->cur[0] == '\''){
1.29 daniel 2270: ctxt->cur++;
2271: q = ctxt->cur;
2272: version = xmlParseVersionNum(ctxt);
1.31 daniel 2273: if (ctxt->cur[0] != '\'')
2274: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2275: else
2276: ctxt->cur++;
1.31 daniel 2277: } else {
2278: xmlParserError(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
1.29 daniel 2279: }
2280: }
2281: return(version);
2282: }
2283:
2284: /*
2285: * xmlParseEncName: parse the XML encoding name
2286: *
2287: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
2288: */
2289: CHAR *xmlParseEncName(xmlParserCtxtPtr ctxt) {
2290: const CHAR *q = ctxt->cur;
2291: CHAR *ret = NULL;
2292:
2293: if (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2294: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z'))) {
2295: ctxt->cur++;
2296: while (IS_CHAR(ctxt->cur[0]) &&
2297: (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2298: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z')) ||
2299: ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9')) ||
2300: (ctxt->cur[0] == '-'))) ctxt->cur++;
2301: ret = xmlStrndup(q, ctxt->cur - q);
2302: } else {
1.31 daniel 2303: xmlParserError(ctxt, "Invalid XML encoding name\n");
1.29 daniel 2304: }
2305: return(ret);
2306: }
2307:
2308: /*
2309: * xmlParseEncodingDecl: parse the XML encoding declaration
2310: *
2311: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
2312: */
2313:
2314: CHAR *xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
2315: CHAR *encoding = NULL;
2316: const CHAR *q;
2317:
2318: SKIP_BLANKS(ctxt->cur);
2319: if ((ctxt->cur[0] == 'e') && (ctxt->cur[1] == 'n') &&
2320: (ctxt->cur[2] == 'c') && (ctxt->cur[3] == 'o') &&
2321: (ctxt->cur[4] == 'd') && (ctxt->cur[5] == 'i') &&
2322: (ctxt->cur[6] == 'n') && (ctxt->cur[7] == 'g')) {
2323: ctxt->cur += 8;
2324: SKIP_BLANKS(ctxt->cur);
1.31 daniel 2325: if (ctxt->cur[0] != '=') {
2326: xmlParserError(ctxt, "xmlParseEncodingDecl : expected '='\n");
2327: return(NULL);
2328: }
2329: ctxt->cur++;
2330: SKIP_BLANKS(ctxt->cur);
1.29 daniel 2331: if (ctxt->cur[0] == '"') {
2332: ctxt->cur++;
2333: q = ctxt->cur;
2334: encoding = xmlParseEncName(ctxt);
1.31 daniel 2335: if (ctxt->cur[0] != '"')
2336: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2337: else
2338: ctxt->cur++;
1.31 daniel 2339: } else if (ctxt->cur[0] == '\''){
1.29 daniel 2340: ctxt->cur++;
2341: q = ctxt->cur;
2342: encoding = xmlParseEncName(ctxt);
1.31 daniel 2343: if (ctxt->cur[0] != '\'')
2344: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2345: else
2346: ctxt->cur++;
1.31 daniel 2347: } else if (ctxt->cur[0] == '"'){
2348: xmlParserError(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
1.29 daniel 2349: }
2350: }
2351: return(encoding);
2352: }
2353:
2354: /*
2355: * xmlParseSDDecl: parse the XML standalone declaration
2356: *
2357: * [32] SDDecl ::= S 'standalone' Eq
2358: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
2359: */
2360:
2361: int xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
2362: int standalone = -1;
2363:
2364: SKIP_BLANKS(ctxt->cur);
2365: if ((ctxt->cur[0] == 's') && (ctxt->cur[1] == 't') &&
2366: (ctxt->cur[2] == 'a') && (ctxt->cur[3] == 'n') &&
2367: (ctxt->cur[4] == 'd') && (ctxt->cur[5] == 'a') &&
2368: (ctxt->cur[6] == 'l') && (ctxt->cur[7] == 'o') &&
2369: (ctxt->cur[8] == 'n') && (ctxt->cur[9] == 'e')) {
2370: ctxt->cur += 10;
2371: SKIP_BLANKS(ctxt->cur);
1.32 daniel 2372: if (ctxt->cur[0] != '=') {
2373: xmlParserError(ctxt, "XML standalone declaration : expected '='\n");
2374: return(standalone);
2375: }
2376: ctxt->cur++;
2377: SKIP_BLANKS(ctxt->cur);
1.29 daniel 2378: if (ctxt->cur[0] == '"') {
2379: ctxt->cur++;
2380: } else if (ctxt->cur[0] == '\''){
2381: ctxt->cur++;
2382: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'o')) {
2383: standalone = 0;
1.32 daniel 2384: ctxt->cur += 2;
1.29 daniel 2385: } else if ((ctxt->cur[0] == 'y') && (ctxt->cur[1] == 'e') &&
2386: (ctxt->cur[2] == 's')) {
2387: standalone = 1;
1.32 daniel 2388: ctxt->cur += 3;
1.29 daniel 2389: } else {
1.31 daniel 2390: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2391: }
2392: if (ctxt->cur[0] != '\'')
1.31 daniel 2393: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2394: else
2395: ctxt->cur++;
2396: } else if (ctxt->cur[0] == '"'){
2397: ctxt->cur++;
2398: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'o')) {
2399: standalone = 0;
1.32 daniel 2400: ctxt->cur += 2;
1.29 daniel 2401: } else if ((ctxt->cur[0] == 'y') && (ctxt->cur[1] == 'e') &&
2402: (ctxt->cur[2] == 's')) {
2403: standalone = 1;
1.32 daniel 2404: ctxt->cur += 3;
1.29 daniel 2405: } else {
1.31 daniel 2406: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2407: }
2408: if (ctxt->cur[0] != '"')
1.31 daniel 2409: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2410: else
2411: ctxt->cur++;
2412: }
2413: }
2414: return(standalone);
2415: }
2416:
2417: /*
1.1 veillard 2418: * xmlParseXMLDecl: parse an XML declaration header
1.29 daniel 2419: *
2420: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 2421: */
2422:
1.16 daniel 2423: void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 2424: CHAR *version;
2425:
2426: /*
1.19 daniel 2427: * We know that '<?xml' is here.
1.1 veillard 2428: */
1.16 daniel 2429: ctxt->cur += 5;
1.1 veillard 2430:
1.16 daniel 2431: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2432:
2433: /*
1.29 daniel 2434: * We should have the VersionInfo here.
1.1 veillard 2435: */
1.29 daniel 2436: version = xmlParseVersionInfo(ctxt);
2437: if (version == NULL)
1.16 daniel 2438: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.29 daniel 2439: else {
2440: ctxt->doc = xmlNewDoc(version);
2441: free(version);
2442: }
2443:
2444: /*
2445: * We may have the encoding declaration
2446: */
1.32 daniel 2447: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 2448:
2449: /*
1.29 daniel 2450: * We may have the standalone status.
1.1 veillard 2451: */
1.32 daniel 2452: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 2453:
1.29 daniel 2454: SKIP_BLANKS(ctxt->cur);
2455: if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
2456: ctxt->cur += 2;
1.31 daniel 2457: } else if (ctxt->cur[0] == '>') {
2458: /* Deprecated old WD ... */
2459: xmlParserError(ctxt, "XML declaration must end-up with '?>'\n");
2460: ctxt->cur++;
1.29 daniel 2461: } else {
1.31 daniel 2462: xmlParserError(ctxt, "parsing XML declaration: '?>' expected\n");
1.29 daniel 2463: MOVETO_ENDTAG(ctxt->cur);
1.31 daniel 2464: ctxt->cur++;
1.29 daniel 2465: }
1.1 veillard 2466: }
2467:
2468: /*
1.22 daniel 2469: * xmlParseMisc: parse an XML Misc* optionnal field.
1.21 daniel 2470: * Misc*
2471: *
1.22 daniel 2472: * [27] Misc ::= Comment | PI | S
1.1 veillard 2473: */
2474:
1.16 daniel 2475: void xmlParseMisc(xmlParserCtxtPtr ctxt) {
2476: while (((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) ||
2477: ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1.21 daniel 2478: (ctxt->cur[2] == '-') && (ctxt->cur[3] == '-')) ||
1.16 daniel 2479: IS_BLANK(ctxt->cur[0])) {
2480: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
2481: xmlParsePI(ctxt);
2482: } else if (IS_BLANK(ctxt->cur[0])) {
2483: ctxt->cur++;
1.1 veillard 2484: } else
1.31 daniel 2485: xmlParseComment(ctxt, 0);
1.1 veillard 2486: }
2487: }
2488:
2489: /*
1.16 daniel 2490: * xmlParseDocument : parse an XML document and build a tree.
1.21 daniel 2491: *
1.22 daniel 2492: * [1] document ::= prolog element Misc*
1.29 daniel 2493: *
2494: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.1 veillard 2495: */
2496:
1.16 daniel 2497: int xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.14 veillard 2498: /*
2499: * We should check for encoding here and plug-in some
2500: * conversion code TODO !!!!
2501: */
1.1 veillard 2502:
2503: /*
2504: * Wipe out everything which is before the first '<'
2505: */
1.16 daniel 2506: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2507:
2508: /*
2509: * Check for the XMLDecl in the Prolog.
2510: */
1.16 daniel 2511: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.19 daniel 2512: (ctxt->cur[2] == 'x') && (ctxt->cur[3] == 'm') &&
2513: (ctxt->cur[4] == 'l')) {
2514: xmlParseXMLDecl(ctxt);
2515: /* SKIP_EOL(cur); */
2516: SKIP_BLANKS(ctxt->cur);
2517: } else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.16 daniel 2518: (ctxt->cur[2] == 'X') && (ctxt->cur[3] == 'M') &&
2519: (ctxt->cur[4] == 'L')) {
1.19 daniel 2520: /*
2521: * The first drafts were using <?XML and the final W3C REC
2522: * now use <?xml ...
2523: */
1.16 daniel 2524: xmlParseXMLDecl(ctxt);
1.1 veillard 2525: /* SKIP_EOL(cur); */
1.16 daniel 2526: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2527: } else {
1.16 daniel 2528: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 2529: }
2530:
2531: /*
2532: * The Misc part of the Prolog
2533: */
1.16 daniel 2534: xmlParseMisc(ctxt);
1.1 veillard 2535:
2536: /*
1.29 daniel 2537: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 2538: * (doctypedecl Misc*)?
2539: */
1.22 daniel 2540: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2541: (ctxt->cur[2] == 'D') && (ctxt->cur[3] == 'O') &&
2542: (ctxt->cur[4] == 'C') && (ctxt->cur[5] == 'T') &&
2543: (ctxt->cur[6] == 'Y') && (ctxt->cur[7] == 'P') &&
2544: (ctxt->cur[8] == 'E')) {
2545: xmlParseDocTypeDecl(ctxt);
2546: xmlParseMisc(ctxt);
1.21 daniel 2547: }
2548:
2549: /*
2550: * Time to start parsing the tree itself
1.1 veillard 2551: */
1.16 daniel 2552: ctxt->doc->root = xmlParseElement(ctxt);
1.33 daniel 2553:
2554: /*
2555: * The Misc part at the end
2556: */
2557: xmlParseMisc(ctxt);
1.16 daniel 2558:
2559: return(0);
2560: }
2561:
2562: /*
2563: * xmlParseDoc : parse an XML in-memory document and build a tree.
2564: */
2565:
2566: xmlDocPtr xmlParseDoc(CHAR *cur) {
2567: xmlDocPtr ret;
2568: xmlParserCtxtPtr ctxt;
2569:
2570: if (cur == NULL) return(NULL);
1.1 veillard 2571:
1.16 daniel 2572: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2573: if (ctxt == NULL) {
2574: perror("malloc");
2575: return(NULL);
2576: }
2577:
1.19 daniel 2578: xmlInitParserCtxt(ctxt);
1.16 daniel 2579: ctxt->base = cur;
2580: ctxt->cur = cur;
2581:
2582: xmlParseDocument(ctxt);
2583: ret = ctxt->doc;
1.20 daniel 2584: free(ctxt->nodes);
1.16 daniel 2585: free(ctxt);
2586:
1.1 veillard 2587: return(ret);
2588: }
2589:
1.9 httpng 2590: /*
2591: * xmlParseFile : parse an XML file and build a tree.
2592: */
2593:
2594: xmlDocPtr xmlParseFile(const char *filename) {
2595: xmlDocPtr ret;
1.20 daniel 2596: #ifdef HAVE_ZLIB_H
2597: gzFile input;
2598: #else
1.9 httpng 2599: int input;
1.20 daniel 2600: #endif
1.9 httpng 2601: int res;
2602: struct stat buf;
2603: char *buffer;
1.16 daniel 2604: xmlParserCtxtPtr ctxt;
1.9 httpng 2605:
1.11 veillard 2606: res = stat(filename, &buf);
1.9 httpng 2607: if (res < 0) return(NULL);
2608:
1.20 daniel 2609: #ifdef HAVE_ZLIB_H
2610: retry_bigger:
2611: buffer = malloc((buf.st_size * 20) + 100);
2612: #else
1.9 httpng 2613: buffer = malloc(buf.st_size + 100);
1.20 daniel 2614: #endif
1.9 httpng 2615: if (buffer == NULL) {
2616: perror("malloc");
2617: return(NULL);
2618: }
2619:
2620: memset(buffer, 0, sizeof(buffer));
1.20 daniel 2621: #ifdef HAVE_ZLIB_H
2622: input = gzopen (filename, "r");
2623: if (input == NULL) {
2624: fprintf (stderr, "Cannot read file %s :\n", filename);
2625: perror ("gzopen failed");
2626: return(NULL);
2627: }
2628: #else
1.9 httpng 2629: input = open (filename, O_RDONLY);
2630: if (input < 0) {
2631: fprintf (stderr, "Cannot read file %s :\n", filename);
2632: perror ("open failed");
2633: return(NULL);
2634: }
1.20 daniel 2635: #endif
2636: #ifdef HAVE_ZLIB_H
2637: res = gzread(input, buffer, 20 * buf.st_size);
2638: #else
1.9 httpng 2639: res = read(input, buffer, buf.st_size);
1.20 daniel 2640: #endif
1.9 httpng 2641: if (res < 0) {
2642: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 2643: #ifdef HAVE_ZLIB_H
2644: perror ("gzread failed");
2645: #else
1.9 httpng 2646: perror ("read failed");
1.20 daniel 2647: #endif
1.9 httpng 2648: return(NULL);
2649: }
1.20 daniel 2650: #ifdef HAVE_ZLIB_H
2651: gzclose(input);
2652: if (res >= 20 * buf.st_size) {
2653: free(buffer);
2654: buf.st_size *= 2;
2655: goto retry_bigger;
2656: }
2657: buf.st_size = res;
2658: #else
1.9 httpng 2659: close(input);
1.20 daniel 2660: #endif
2661:
1.9 httpng 2662:
1.16 daniel 2663: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2664: if (ctxt == NULL) {
2665: perror("malloc");
2666: return(NULL);
2667: }
1.9 httpng 2668: buffer[buf.st_size] = '\0';
1.16 daniel 2669:
1.19 daniel 2670: xmlInitParserCtxt(ctxt);
1.17 daniel 2671: ctxt->filename = filename;
1.16 daniel 2672: ctxt->base = buffer;
2673: ctxt->cur = buffer;
2674:
2675: xmlParseDocument(ctxt);
2676: ret = ctxt->doc;
1.9 httpng 2677: free(buffer);
1.20 daniel 2678: free(ctxt->nodes);
2679: free(ctxt);
2680:
2681: return(ret);
2682: }
2683:
1.32 daniel 2684:
1.20 daniel 2685: /*
1.32 daniel 2686: * xmlParseMemory : parse an XML memory block and build a tree.
1.20 daniel 2687: */
2688: xmlDocPtr xmlParseMemory(char *buffer, int size) {
2689: xmlDocPtr ret;
2690: xmlParserCtxtPtr ctxt;
2691: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2692: if (ctxt == NULL) {
2693: perror("malloc");
2694: return(NULL);
2695: }
2696:
2697: buffer[size - 1] = '\0';
2698:
2699: xmlInitParserCtxt(ctxt);
2700: ctxt->base = buffer;
2701: ctxt->cur = buffer;
2702:
2703: xmlParseDocument(ctxt);
2704: ret = ctxt->doc;
2705: free(ctxt->nodes);
1.16 daniel 2706: free(ctxt);
2707:
1.9 httpng 2708: return(ret);
1.17 daniel 2709: }
2710:
2711:
2712: /* Initialize parser context */
2713: void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2714: {
1.32 daniel 2715: int i;
1.19 daniel 2716:
1.32 daniel 2717: ctxt->filename = NULL;
2718: ctxt->base = NULL;
2719: ctxt->cur = NULL;
2720: ctxt->line = 1;
2721: ctxt->col = 1;
2722: ctxt->doc = NULL;
2723: ctxt->depth = 0;
2724: ctxt->max_depth = 10;
2725: ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr));
2726: if (ctxt->nodes == NULL) {
2727: fprintf(stderr, "malloc of %d byte failed\n",
2728: ctxt->max_depth * sizeof(xmlNodePtr));
2729: ctxt->max_depth = 0;
2730: } else {
2731: for (i = 0;i < ctxt->max_depth;i++)
2732: ctxt->nodes[i] = NULL;
2733: }
2734: ctxt->record_info = 0;
2735: xmlInitNodeInfoSeq(&ctxt->node_seq);
1.17 daniel 2736: }
2737:
2738:
1.19 daniel 2739: /*
2740: * Clear (release owned resources) and reinitialize context
2741: */
1.32 daniel 2742: void xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 2743: {
1.32 daniel 2744: if ( ctxt->nodes != NULL )
2745: free(ctxt->nodes);
2746: xmlClearNodeInfoSeq(&ctxt->node_seq);
2747: xmlInitParserCtxt(ctxt);
1.17 daniel 2748: }
2749:
2750:
1.19 daniel 2751: /*
2752: * Setup the parser context to parse a new buffer; Clears any prior
2753: * contents from the parser context. The buffer parameter must not be
2754: * NULL, but the filename parameter can be
2755: */
1.17 daniel 2756: void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
2757: const char* filename)
2758: {
2759: xmlClearParserCtxt(ctxt);
2760: ctxt->base = buffer;
2761: ctxt->cur = buffer;
2762: ctxt->filename = filename;
2763: }
2764:
1.32 daniel 2765:
2766: /*
2767: * xmlParserFindNodeInfo : Find the parser node info struct for a given node
2768: */
2769: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2770: const xmlNode* node)
2771: {
2772: unsigned long pos;
2773:
2774: /* Find position where node should be at */
2775: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2776: if ( ctx->node_seq.buffer[pos].node == node )
2777: return &ctx->node_seq.buffer[pos];
2778: else
2779: return NULL;
2780: }
2781:
2782:
2783: /*
2784: * xmlInitNodeInfoSeq -- Initialize (set to initial state) node info sequence
2785: */
2786: void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2787: {
2788: seq->length = 0;
2789: seq->maximum = 0;
2790: seq->buffer = NULL;
2791: }
2792:
2793: /*
2794: * xmlClearNodeInfoSeq -- Clear (release memory and reinitialize) node
2795: * info sequence
2796: */
2797: void xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2798: {
2799: if ( seq->buffer != NULL )
2800: free(seq->buffer);
2801: xmlInitNodeInfoSeq(seq);
2802: }
2803:
2804:
2805: /*
2806: * xmlParserFindNodeInfoIndex : Find the index that the info record for
2807: * the given node is or should be at in a sorted sequence
2808: */
2809: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2810: const xmlNode* node)
2811: {
2812: unsigned long upper, lower, middle;
2813: int found = 0;
2814:
2815: /* Do a binary search for the key */
2816: lower = 1;
2817: upper = seq->length;
2818: middle = 0;
2819: while ( lower <= upper && !found) {
2820: middle = lower + (upper - lower) / 2;
2821: if ( node == seq->buffer[middle - 1].node )
2822: found = 1;
2823: else if ( node < seq->buffer[middle - 1].node )
2824: upper = middle - 1;
2825: else
2826: lower = middle + 1;
2827: }
2828:
2829: /* Return position */
2830: if ( middle == 0 || seq->buffer[middle - 1].node < node )
2831: return middle;
2832: else
2833: return middle - 1;
2834: }
2835:
2836:
2837: /*
2838: * xmlParserAddNodeInfo : Insert node info record into sorted sequence
2839: */
2840: void xmlParserAddNodeInfo(xmlParserCtxtPtr ctx,
2841: const xmlParserNodeInfo* info)
2842: {
2843: unsigned long pos;
2844: static unsigned int block_size = 5;
2845:
2846: /* Find pos and check to see if node is already in the sequence */
2847: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, info->node);
2848: if ( pos < ctx->node_seq.length
2849: && ctx->node_seq.buffer[pos].node == info->node ) {
2850: ctx->node_seq.buffer[pos] = *info;
2851: }
2852:
2853: /* Otherwise, we need to add new node to buffer */
2854: else {
2855: /* Expand buffer by 5 if needed */
2856: if ( ctx->node_seq.length + 1 > ctx->node_seq.maximum ) {
2857: xmlParserNodeInfo* tmp_buffer;
2858: unsigned int byte_size = (sizeof(*ctx->node_seq.buffer)
2859: *(ctx->node_seq.maximum + block_size));
2860:
2861: if ( ctx->node_seq.buffer == NULL )
2862: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
2863: else
2864: tmp_buffer = (xmlParserNodeInfo*)realloc(ctx->node_seq.buffer, byte_size);
2865:
2866: if ( tmp_buffer == NULL ) {
2867: xmlParserError(ctx, "Out of memory");
2868: return;
2869: }
2870: ctx->node_seq.buffer = tmp_buffer;
2871: ctx->node_seq.maximum += block_size;
2872: }
2873:
2874: /* If position is not at end, move elements out of the way */
2875: if ( pos != ctx->node_seq.length ) {
2876: unsigned long i;
2877:
2878: for ( i = ctx->node_seq.length; i > pos; i-- )
2879: ctx->node_seq.buffer[i] = ctx->node_seq.buffer[i - 1];
2880: }
2881:
2882: /* Copy element and increase length */
2883: ctx->node_seq.buffer[pos] = *info;
2884: ctx->node_seq.length++;
2885: }
2886: }
Webmaster