Annotation of XML/parser.c, revision 1.42
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.42 ! daniel 6: * $Id: parser.c,v 1.41 1998/08/08 02:45:38 daniel Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.1 veillard 18: #include <malloc.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
34: /*
1.40 daniel 35: * Generic function for accessing stacks in the Parser Context
1.1 veillard 36: */
37:
1.31 daniel 38: #define PUSH_AND_POP(type, name) \
1.40 daniel 39: int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 40: if (ctxt->name##Nr >= ctxt->name##Max) { \
41: ctxt->name##Max *= 2; \
1.40 daniel 42: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
43: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
44: if (ctxt->name##Tab == NULL) { \
1.31 daniel 45: fprintf(stderr, "realloc failed !\n"); \
46: exit(1); \
47: } \
48: } \
1.40 daniel 49: ctxt->name##Tab[ctxt->name##Nr] = value; \
50: ctxt->name = value; \
51: return(ctxt->name##Nr++); \
1.31 daniel 52: } \
1.40 daniel 53: type name##Pop(xmlParserCtxtPtr ctxt) { \
54: if (ctxt->name##Nr <= 0) return(0); \
55: ctxt->name##Nr--; \
56: ctxt->name = ctxt->name##Tab[ctxt->name##Nr]; \
57: return(ctxt->name); \
1.31 daniel 58: } \
59:
1.40 daniel 60: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 61: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 62:
63: #define CUR (*(ctxt->input->cur) ? *(ctxt->input->cur) : xmlPopInput(ctxt))
1.42 ! daniel 64:
1.40 daniel 65: #define CUR_PTR ctxt->input->cur
1.42 ! daniel 66:
1.40 daniel 67: #define NXT(val) ctxt->input->cur[(val)]
1.42 ! daniel 68: #define NEXT (((*(ctxt->input->cur) == '\n') ? \
! 69: (ctxt->input->line++, ctxt->input->col = 1) : \
! 70: (ctxt->input->col++)), ctxt->input->cur++)
! 71:
1.40 daniel 72: #define SKIP(val) ctxt->input->cur += (val)
1.42 ! daniel 73: #define SKIP_BLANKS \
! 74: while (IS_BLANK(*(ctxt->input->cur))) NEXT
! 75:
1.40 daniel 76:
77: /*
78: * xmlPopInput: the current input pointed by ctxt->input came to an end
79: * pop it and return the next char.
80: */
81: CHAR xmlPopInput(xmlParserCtxtPtr ctxt) {
82: if (ctxt->inputNr == 1) return(0); /* End of main Input */
83: inputPop(ctxt);
84: return(CUR);
85: }
86:
87: /*
88: * xmlPushInput: switch to a new input stream which is stacked on top
89: * of the previous one(s).
90: */
91: void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
92: if (input == NULL) return;
93: inputPush(ctxt, input);
94: }
95:
96: /*
97: * A few macros needed to help building the parser.
98: */
99:
1.1 veillard 100: #ifdef UNICODE
1.30 daniel 101: /************************************************************************
102: * *
103: * UNICODE version of the macros. *
104: * *
105: ************************************************************************/
1.1 veillard 106: /*
1.22 daniel 107: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
108: * | [#x10000-#x10FFFF]
109: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 110: */
111: #define IS_CHAR(c) \
112: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
113: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
114:
1.22 daniel 115: /*
116: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
117: */
1.42 ! daniel 118: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
! 119: ((c) == 0x0D))
1.1 veillard 120:
1.22 daniel 121: /*
1.30 daniel 122: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 123: *
1.30 daniel 124: * VI is your friend !
125: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
126: * and
127: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 128: */
1.1 veillard 129: #define IS_BASECHAR(c) \
1.30 daniel 130: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
131: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
132: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
133: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
134: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
135: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
136: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
137: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
138: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
139: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
140: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
141: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
142: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
143: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
144: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
145: ((c) == 0x0386) || \
146: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
147: ((c) == 0x038C) || \
148: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
149: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
150: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
151: ((c) == 0x03DA) || \
152: ((c) == 0x03DC) || \
153: ((c) == 0x03DE) || \
154: ((c) == 0x03E0) || \
155: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
156: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
157: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
158: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
159: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
160: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
161: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
162: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
163: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
164: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
165: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
166: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
167: ((c) == 0x0559) || \
168: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
169: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
170: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
171: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
172: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
173: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
174: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
175: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
176: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
177: ((c) == 0x06D5) || \
178: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
179: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
180: ((c) == 0x093D) || \
181: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
182: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
183: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
184: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
185: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
186: ((c) == 0x09B2) || \
187: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
188: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
189: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
190: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
191: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
192: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
193: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
194: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
195: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
196: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
197: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
198: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
199: ((c) == 0x0A5E) || \
200: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
201: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
202: ((c) == 0x0A8D) || \
203: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
204: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
205: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
206: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
207: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
208: ((c) == 0x0ABD) || \
209: ((c) == 0x0AE0) || \
210: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
211: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
212: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
213: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
214: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
215: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
216: ((c) == 0x0B3D) || \
217: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
218: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
219: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
220: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
221: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
222: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
223: ((c) == 0x0B9C) || \
224: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
225: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
226: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
227: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
228: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
229: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
230: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
231: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
232: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
233: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
234: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
235: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
236: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
237: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
238: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
239: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
240: ((c) == 0x0CDE) || \
241: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
242: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
243: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
244: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
245: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
246: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
247: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
248: ((c) == 0x0E30) || \
249: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
250: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
251: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
252: ((c) == 0x0E84) || \
253: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
254: ((c) == 0x0E8A) || \
255: ((c) == 0x0E8D) || \
256: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
257: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
258: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
259: ((c) == 0x0EA5) || \
260: ((c) == 0x0EA7) || \
261: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
262: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
263: ((c) == 0x0EB0) || \
264: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
265: ((c) == 0x0EBD) || \
266: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
267: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
268: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
269: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
270: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
271: ((c) == 0x1100) || \
272: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
273: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
274: ((c) == 0x1109) || \
275: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
276: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
277: ((c) == 0x113C) || \
278: ((c) == 0x113E) || \
279: ((c) == 0x1140) || \
280: ((c) == 0x114C) || \
281: ((c) == 0x114E) || \
282: ((c) == 0x1150) || \
283: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
284: ((c) == 0x1159) || \
285: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
286: ((c) == 0x1163) || \
287: ((c) == 0x1165) || \
288: ((c) == 0x1167) || \
289: ((c) == 0x1169) || \
290: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
291: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
292: ((c) == 0x1175) || \
293: ((c) == 0x119E) || \
294: ((c) == 0x11A8) || \
295: ((c) == 0x11AB) || \
296: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
297: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
298: ((c) == 0x11BA) || \
299: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
300: ((c) == 0x11EB) || \
301: ((c) == 0x11F0) || \
302: ((c) == 0x11F9) || \
303: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
304: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
305: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
306: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
307: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
308: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
309: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
310: ((c) == 0x1F59) || \
311: ((c) == 0x1F5B) || \
312: ((c) == 0x1F5D) || \
313: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
314: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
315: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
316: ((c) == 0x1FBE) || \
317: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
318: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
319: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
320: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
321: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
322: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
323: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
324: ((c) == 0x2126) || \
325: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
326: ((c) == 0x212E) || \
327: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
328: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
329: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
330: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
331: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 332:
1.22 daniel 333: /*
334: * [88] Digit ::= ... long list see REC ...
335: */
1.30 daniel 336: #define IS_DIGIT(c) \
337: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
338: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
339: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
340: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
341: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
342: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
343: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
344: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
345: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
346: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
347: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
348: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
349: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
350: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
351: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 352:
1.22 daniel 353: /*
354: * [87] CombiningChar ::= ... long list see REC ...
355: */
1.30 daniel 356: #define IS_COMBINING(c) \
357: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
358: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
359: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
360: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
361: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
362: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
363: ((c) == 0x05BF) || \
364: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
365: ((c) == 0x05C4) || \
366: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
367: ((c) == 0x0670) || \
368: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
369: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
370: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
371: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
372: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
373: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
374: ((c) == 0x093C) || \
375: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
376: ((c) == 0x094D) || \
377: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
378: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
379: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
380: ((c) == 0x09BC) || \
381: ((c) == 0x09BE) || \
382: ((c) == 0x09BF) || \
383: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
384: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
385: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
386: ((c) == 0x09D7) || \
387: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
388: ((c) == 0x0A02) || \
389: ((c) == 0x0A3C) || \
390: ((c) == 0x0A3E) || \
391: ((c) == 0x0A3F) || \
392: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
393: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
394: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
395: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
396: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
397: ((c) == 0x0ABC) || \
398: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
399: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
400: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
401: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
402: ((c) == 0x0B3C) || \
403: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
404: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
405: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
406: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
407: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
408: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
409: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
410: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
411: ((c) == 0x0BD7) || \
412: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
413: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
414: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
415: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
416: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
417: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
418: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
419: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
420: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
421: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
422: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
423: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
424: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
425: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
426: ((c) == 0x0D57) || \
427: ((c) == 0x0E31) || \
428: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
429: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
430: ((c) == 0x0EB1) || \
431: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
432: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
433: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
434: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
435: ((c) == 0x0F35) || \
436: ((c) == 0x0F37) || \
437: ((c) == 0x0F39) || \
438: ((c) == 0x0F3E) || \
439: ((c) == 0x0F3F) || \
440: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
441: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
442: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
443: ((c) == 0x0F97) || \
444: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
445: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
446: ((c) == 0x0FB9) || \
447: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
448: ((c) == 0x20E1) || \
449: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
450: ((c) == 0x3099) || \
451: ((c) == 0x309A))
1.3 veillard 452:
1.22 daniel 453: /*
454: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
455: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
456: * [#x309D-#x309E] | [#x30FC-#x30FE]
457: */
1.3 veillard 458: #define IS_EXTENDER(c) \
459: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
460: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
461: ((c) == 0xec6) || ((c) == 0x3005) \
462: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
463: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 464: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 465:
1.22 daniel 466: /*
467: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
468: */
1.1 veillard 469: #define IS_IDEOGRAPHIC(c) \
470: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
471: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
472: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
473: ((c) == 0x3007))
474:
1.22 daniel 475: /*
476: * [84] Letter ::= BaseChar | Ideographic
477: */
1.1 veillard 478: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
479:
480: #else
1.30 daniel 481: /************************************************************************
482: * *
483: * 8bits / ASCII version of the macros. *
484: * *
485: ************************************************************************/
1.1 veillard 486: /*
1.22 daniel 487: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
488: * | [#x10000-#x10FFFF]
489: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 490: */
491: #define IS_CHAR(c) \
1.21 daniel 492: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
493: ((c) == 0xa))
1.1 veillard 494:
1.22 daniel 495: /*
496: * [85] BaseChar ::= ... long list see REC ...
497: */
1.1 veillard 498: #define IS_BASECHAR(c) \
499: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
500: (((c) >= 0x61) && ((c) <= 0x7a)) || \
501: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
502: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
503: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
504: (((c) >= 0xf8) && ((c) <= 0xff)) || \
505: ((c) == 0xba))
506:
1.22 daniel 507: /*
508: * [88] Digit ::= ... long list see REC ...
509: */
1.1 veillard 510: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
511:
1.22 daniel 512: /*
513: * [84] Letter ::= BaseChar | Ideographic
514: */
1.1 veillard 515: #define IS_LETTER(c) IS_BASECHAR(c)
516:
1.22 daniel 517:
518: /*
519: * [87] CombiningChar ::= ... long list see REC ...
520: */
1.1 veillard 521: #define IS_COMBINING(c) 0
522:
1.22 daniel 523: /*
524: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
525: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
526: * [#x309D-#x309E] | [#x30FC-#x30FE]
527: */
1.3 veillard 528: #define IS_EXTENDER(c) ((c) == 0xb7)
529:
1.21 daniel 530: #endif /* !UNICODE */
1.1 veillard 531:
1.22 daniel 532: /*
533: * Blank chars.
534: *
535: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
536: */
537: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
538: ((c) == 0x0D))
539:
540: /*
541: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
542: */
1.21 daniel 543: #define IS_PUBIDCHAR(c) \
544: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
545: (((c) >= 'a') && ((c) <= 'z')) || \
546: (((c) >= 'A') && ((c) <= 'Z')) || \
547: (((c) >= '0') && ((c) <= '9')) || \
548: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
549: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
550: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
551: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
552: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 553:
554: #define SKIP_EOL(p) \
555: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
556: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
557:
558: #define MOVETO_ENDTAG(p) \
1.39 daniel 559: while (IS_CHAR(*p) && (*(p) != '>')) (p)++
1.1 veillard 560:
561: #define MOVETO_STARTTAG(p) \
1.39 daniel 562: while (IS_CHAR(*p) && (*(p) != '<')) (p)++
1.1 veillard 563:
564: /*
1.3 veillard 565: * Forward definition for recusive behaviour.
566: */
1.16 daniel 567: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.3 veillard 568:
569: /*
570: * xmlHandleData : this routine represent's the specific application
571: * behaviour when reading a piece of text.
572: *
573: * For example in WebDav, any piece made only of blanks is eliminated
574: */
575:
576: CHAR *xmlHandleData(CHAR *in) {
577: CHAR *cur;
578:
579: if (in == NULL) return(NULL);
580: cur = in;
581: while (IS_CHAR(*cur)) {
582: if (!IS_BLANK(*cur)) goto not_blank;
583: cur++;
584: }
585: free(in);
586: return(NULL);
587:
588: not_blank:
589: return(in);
590: }
591:
1.28 daniel 592: /************************************************************************
593: * *
594: * Commodity functions to handle CHARs *
595: * *
596: ************************************************************************/
597:
1.3 veillard 598: /*
1.1 veillard 599: * xmlStrndup : a strdup for array of CHAR's
600: */
601:
1.6 httpng 602: CHAR *xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 603: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
604:
605: if (ret == NULL) {
606: fprintf(stderr, "malloc of %d byte failed\n",
607: (len + 1) * sizeof(CHAR));
608: return(NULL);
609: }
610: memcpy(ret, cur, len * sizeof(CHAR));
611: ret[len] = 0;
612: return(ret);
613: }
614:
615: /*
616: * xmlStrdup : a strdup for CHAR's
617: */
618:
1.6 httpng 619: CHAR *xmlStrdup(const CHAR *cur) {
620: const CHAR *p = cur;
1.1 veillard 621:
622: while (IS_CHAR(*p)) p++;
623: return(xmlStrndup(cur, p - cur));
624: }
625:
626: /*
1.14 veillard 627: * xmlStrcmp : a strcmp for CHAR's
628: */
629:
630: int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
631: register int tmp;
632:
633: do {
634: tmp = *str1++ - *str2++;
635: if (tmp != 0) return(tmp);
636: } while ((*str1 != 0) && (*str2 != 0));
637: return (*str1 - *str2);
638: }
639:
640: /*
641: * xmlStrncmp : a strncmp for CHAR's
642: */
643:
644: int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
645: register int tmp;
646:
647: if (len <= 0) return(0);
648: do {
649: tmp = *str1++ - *str2++;
650: if (tmp != 0) return(tmp);
651: len--;
652: if (len <= 0) return(0);
653: } while ((*str1 != 0) && (*str2 != 0));
654: return (*str1 - *str2);
655: }
656:
657: /*
658: * xmlStrchr : a strchr for CHAR's
659: */
660:
661: CHAR *xmlStrchr(const CHAR *str, CHAR val) {
662: while (*str != 0) {
663: if (*str == val) return((CHAR *) str);
664: str++;
665: }
666: return(NULL);
667: }
1.28 daniel 668:
669: /************************************************************************
670: * *
671: * Extra stuff for namespace support *
672: * Relates to http://www.w3.org/TR/WD-xml-names *
673: * *
674: ************************************************************************/
675:
676: /*
677: * xmlNamespaceParseNCName : parse an XML namespace name.
678: *
679: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
680: *
681: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
682: * CombiningChar | Extender
683: */
684:
685: CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
686: const CHAR *q;
687: CHAR *ret = NULL;
688:
1.40 daniel 689: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
690: q = NEXT;
1.28 daniel 691:
1.40 daniel 692: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
693: (CUR == '.') || (CUR == '-') ||
694: (CUR == '_') ||
695: (IS_COMBINING(CUR)) ||
696: (IS_EXTENDER(CUR)))
697: NEXT;
1.28 daniel 698:
1.40 daniel 699: ret = xmlStrndup(q, CUR_PTR - q);
1.28 daniel 700:
701: return(ret);
702: }
703:
704: /*
705: * xmlNamespaceParseQName : parse an XML qualified name
706: *
707: * [NS 5] QName ::= (Prefix ':')? LocalPart
708: *
709: * [NS 6] Prefix ::= NCName
710: *
711: * [NS 7] LocalPart ::= NCName
712: */
713:
714: CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
715: CHAR *ret = NULL;
716:
717: *prefix = NULL;
718: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 719: if (CUR == ':') {
1.28 daniel 720: *prefix = ret;
1.40 daniel 721: NEXT;
1.28 daniel 722: ret = xmlNamespaceParseNCName(ctxt);
723: }
724:
725: return(ret);
726: }
727:
728: /*
729: * xmlNamespaceParseNSDef : parse a namespace prefix declaration
730: *
731: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
732: *
733: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
734: */
735:
1.39 daniel 736: CHAR *xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 737: CHAR *name = NULL;
738:
1.40 daniel 739: if ((CUR == 'x') && (NXT(1) == 'm') &&
740: (NXT(2) == 'l') && (NXT(3) == 'n') &&
741: (NXT(4) == 's')) {
742: SKIP(5);
743: if (CUR == ':') {
744: NEXT;
1.28 daniel 745: name = xmlNamespaceParseNCName(ctxt);
746: }
747: }
1.39 daniel 748: return(name);
1.28 daniel 749: }
750:
751: /************************************************************************
752: * *
753: * The parser itself *
754: * Relates to http://www.w3.org/TR/REC-xml *
755: * *
756: ************************************************************************/
1.14 veillard 757:
758: /*
1.1 veillard 759: * xmlParseName : parse an XML name.
1.22 daniel 760: *
761: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
762: * CombiningChar | Extender
763: *
764: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
765: *
766: * [6] Names ::= Name (S Name)*
1.1 veillard 767: */
768:
1.16 daniel 769: CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 770: const CHAR *q;
771: CHAR *ret = NULL;
1.1 veillard 772:
1.40 daniel 773: if (!IS_LETTER(CUR) && (CUR != '_') &&
774: (CUR != ':')) return(NULL);
775: q = NEXT;
776:
777: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
778: (CUR == '.') || (CUR == '-') ||
779: (CUR == '_') || (CUR == ':') ||
780: (IS_COMBINING(CUR)) ||
781: (IS_EXTENDER(CUR)))
782: NEXT;
1.22 daniel 783:
1.40 daniel 784: ret = xmlStrndup(q, CUR_PTR - q);
1.22 daniel 785:
786: return(ret);
787: }
788:
789: /*
790: * xmlParseNmtoken : parse an XML Nmtoken.
791: *
792: * [7] Nmtoken ::= (NameChar)+
793: *
794: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
795: */
796:
797: CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
798: const CHAR *q;
799: CHAR *ret = NULL;
800:
1.40 daniel 801: q = NEXT;
1.22 daniel 802:
1.40 daniel 803: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
804: (CUR == '.') || (CUR == '-') ||
805: (CUR == '_') || (CUR == ':') ||
806: (IS_COMBINING(CUR)) ||
807: (IS_EXTENDER(CUR)))
808: NEXT;
1.3 veillard 809:
1.40 daniel 810: ret = xmlStrndup(q, CUR_PTR - q);
1.1 veillard 811:
1.3 veillard 812: return(ret);
1.1 veillard 813: }
814:
815: /*
1.24 daniel 816: * xmlParseEntityValue : parse a value for ENTITY decl.
817: *
818: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
819: * "'" ([^%&'] | PEReference | Reference)* "'"
820: */
821:
822: CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
1.39 daniel 823: CHAR *ret = NULL, *dec;
1.24 daniel 824: const CHAR *q;
1.39 daniel 825: int needSubst = 0;
826: int needParam = 0;
1.24 daniel 827:
1.40 daniel 828: if (CUR == '"') {
829: NEXT;
1.24 daniel 830:
1.40 daniel 831: q = CUR_PTR;
832: while ((IS_CHAR(CUR)) && (CUR != '"')) {
833: if (CUR == '%') {
1.39 daniel 834: needParam = 1; /* TODO !!! */
1.40 daniel 835: NEXT;
836: } else if (CUR == '&') {
1.39 daniel 837: needSubst = 1;
1.40 daniel 838: NEXT;
1.24 daniel 839: } else
1.40 daniel 840: NEXT;
1.24 daniel 841: }
1.40 daniel 842: if (!IS_CHAR(CUR)) {
1.31 daniel 843: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 844: } else {
1.39 daniel 845: if (needSubst) {
1.40 daniel 846: dec = xmlDecodeEntities(ctxt->doc, q, CUR_PTR - q);
1.39 daniel 847: ret = xmlStrdup(dec);
848: free(dec);
849: } else
1.40 daniel 850: ret = xmlStrndup(q, CUR_PTR - q);
851: NEXT;
1.24 daniel 852: }
1.40 daniel 853: } else if (CUR == '\'') {
854: NEXT;
855: q = CUR_PTR;
856: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
857: if (CUR == '%') {
1.39 daniel 858: needParam = 1; /* TODO !!! */
1.40 daniel 859: NEXT;
860: } else if (CUR == '&') {
1.39 daniel 861: needSubst = 1;
1.40 daniel 862: NEXT;
1.24 daniel 863: } else
1.40 daniel 864: NEXT;
1.24 daniel 865: }
1.40 daniel 866: if (!IS_CHAR(CUR)) {
1.31 daniel 867: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 868: } else {
1.39 daniel 869: if (needSubst) {
1.40 daniel 870: dec = xmlDecodeEntities(ctxt->doc, q, CUR_PTR - q);
1.39 daniel 871: ret = xmlStrdup(dec);
872: free(dec);
873: } else
1.40 daniel 874: ret = xmlStrndup(q, CUR_PTR - q);
875: NEXT;
1.24 daniel 876: }
877: } else {
1.31 daniel 878: xmlParserError(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.24 daniel 879: }
880:
881: return(ret);
882: }
883:
884: /*
1.29 daniel 885: * xmlParseAttValue : parse a value for an attribute
886: *
887: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
888: * "'" ([^<&'] | Reference)* "'"
889: */
890:
891: CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.39 daniel 892: CHAR *ret = NULL, *dec;
1.29 daniel 893: const CHAR *q;
1.36 daniel 894: int needSubst = 0;
1.29 daniel 895:
1.40 daniel 896: if (CUR == '"') {
897: NEXT;
1.29 daniel 898:
1.40 daniel 899: q = CUR_PTR;
900: while ((IS_CHAR(CUR)) && (CUR != '"')) {
901: if (CUR == '&') {
1.36 daniel 902: needSubst = 1;
1.40 daniel 903: NEXT;
1.29 daniel 904: } else
1.40 daniel 905: NEXT;
1.29 daniel 906: }
1.40 daniel 907: if (!IS_CHAR(CUR)) {
1.31 daniel 908: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 909: } else {
1.39 daniel 910: if (needSubst) {
1.40 daniel 911: dec = xmlDecodeEntities(ctxt->doc, q, CUR_PTR - q);
1.39 daniel 912: ret = xmlStrdup(dec);
913: free(dec);
914: } else
1.40 daniel 915: ret = xmlStrndup(q, CUR_PTR - q);
916: NEXT;
1.29 daniel 917: }
1.40 daniel 918: } else if (CUR == '\'') {
919: NEXT;
920: q = CUR_PTR;
921: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
922: if (CUR == '&') {
1.36 daniel 923: needSubst = 1;
1.40 daniel 924: NEXT;
1.29 daniel 925: } else
1.40 daniel 926: NEXT;
1.29 daniel 927: }
1.40 daniel 928: if (!IS_CHAR(CUR)) {
1.31 daniel 929: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 930: } else {
1.39 daniel 931: if (needSubst) {
1.40 daniel 932: dec = xmlDecodeEntities(ctxt->doc, q, CUR_PTR - q);
1.39 daniel 933: ret = xmlStrdup(dec);
934: free(dec);
935: } else
1.40 daniel 936: ret = xmlStrndup(q, CUR_PTR - q);
937: NEXT;
1.29 daniel 938: }
939: } else {
1.31 daniel 940: xmlParserError(ctxt, "AttValue: \" or ' expected\n");
1.29 daniel 941: }
942:
943: return(ret);
944: }
945:
946: /*
1.21 daniel 947: * xmlParseSystemLiteral : parse an XML Literal
948: *
1.22 daniel 949: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.21 daniel 950: */
951:
952: CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
953: const CHAR *q;
954: CHAR *ret = NULL;
955:
1.40 daniel 956: if (CUR == '"') {
957: NEXT;
958: q = CUR_PTR;
959: while ((IS_CHAR(CUR)) && (CUR != '"'))
960: NEXT;
961: if (!IS_CHAR(CUR)) {
1.31 daniel 962: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 963: } else {
1.40 daniel 964: ret = xmlStrndup(q, CUR_PTR - q);
965: NEXT;
1.21 daniel 966: }
1.40 daniel 967: } else if (CUR == '\'') {
968: NEXT;
969: q = CUR_PTR;
970: while ((IS_CHAR(CUR)) && (CUR != '\''))
971: NEXT;
972: if (!IS_CHAR(CUR)) {
1.31 daniel 973: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 974: } else {
1.40 daniel 975: ret = xmlStrndup(q, CUR_PTR - q);
976: NEXT;
1.21 daniel 977: }
978: } else {
1.31 daniel 979: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 980: }
981:
982: return(ret);
983: }
984:
985: /*
1.27 daniel 986: * xmlParsePubidLiteral: parse an XML public literal
1.21 daniel 987: *
1.22 daniel 988: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1.21 daniel 989: */
990:
991: CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
992: const CHAR *q;
993: CHAR *ret = NULL;
994: /*
995: * Name ::= (Letter | '_') (NameChar)*
996: */
1.40 daniel 997: if (CUR == '"') {
998: NEXT;
999: q = CUR_PTR;
1000: while (IS_PUBIDCHAR(CUR)) NEXT;
1001: if (CUR != '"') {
1.31 daniel 1002: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 1003: } else {
1.40 daniel 1004: ret = xmlStrndup(q, CUR_PTR - q);
1005: NEXT;
1.21 daniel 1006: }
1.40 daniel 1007: } else if (CUR == '\'') {
1008: NEXT;
1009: q = CUR_PTR;
1010: while ((IS_LETTER(CUR)) && (CUR != '\''))
1011: NEXT;
1012: if (!IS_LETTER(CUR)) {
1.31 daniel 1013: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 1014: } else {
1.40 daniel 1015: ret = xmlStrndup(q, CUR_PTR - q);
1016: NEXT;
1.21 daniel 1017: }
1018: } else {
1.31 daniel 1019: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 1020: }
1021:
1022: return(ret);
1023: }
1024:
1025: /*
1.27 daniel 1026: * xmlParseCharData: parse a CharData section.
1027: * if we are within a CDATA section ']]>' marks an end of section.
1028: *
1029: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1030: */
1031:
1032: CHAR *xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1033: const CHAR *q;
1034: CHAR *ret = NULL;
1035:
1.40 daniel 1036: q = CUR_PTR;
1037: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1038: (CUR != '&')) {
1039: NEXT;
1040: if ((cdata) && (CUR == ']') && (NXT(1) == ']') &&
1041: (NXT(2) == '>')) break;
1.27 daniel 1042: }
1.40 daniel 1043: if (q == CUR_PTR) return(NULL);
1044: ret = xmlStrndup(q, CUR_PTR - q);
1.27 daniel 1045: return(ret);
1046: }
1047:
1048: /*
1.22 daniel 1049: * xmlParseExternalID: Parse an External ID
1050: *
1051: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1052: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1053: */
1054:
1.39 daniel 1055: CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
1056: CHAR *URI = NULL;
1.22 daniel 1057:
1.40 daniel 1058: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1059: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1060: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1061: SKIP(6);
1.42 ! daniel 1062: SKIP_BLANKS;
1.39 daniel 1063: URI = xmlParseSystemLiteral(ctxt);
1064: if (URI == NULL)
1.31 daniel 1065: xmlParserError(ctxt,
1.39 daniel 1066: "xmlParseExternalID: SYSTEM, no URI\n");
1.40 daniel 1067: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1068: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1069: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1070: SKIP(6);
1.42 ! daniel 1071: SKIP_BLANKS;
1.39 daniel 1072: *publicID = xmlParsePubidLiteral(ctxt);
1073: if (*publicID == NULL)
1.31 daniel 1074: xmlParserError(ctxt,
1.39 daniel 1075: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.42 ! daniel 1076: SKIP_BLANKS;
1.39 daniel 1077: URI = xmlParseSystemLiteral(ctxt);
1078: if (URI == NULL)
1.31 daniel 1079: xmlParserError(ctxt,
1.39 daniel 1080: "xmlParseExternalID: PUBLIC, no URI\n");
1.22 daniel 1081: }
1.39 daniel 1082: return(URI);
1.22 daniel 1083: }
1084:
1085: /*
1.1 veillard 1086: * Parse and return a string between quotes or doublequotes
1087: */
1.16 daniel 1088: CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.1 veillard 1089: CHAR *ret = NULL;
1.17 daniel 1090: const CHAR *q;
1.1 veillard 1091:
1.40 daniel 1092: if (CUR == '"') {
1093: NEXT;
1094: q = CUR_PTR;
1095: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1096: if (CUR != '"')
1.31 daniel 1097: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1.1 veillard 1098: else {
1.40 daniel 1099: ret = xmlStrndup(q, CUR_PTR - q);
1100: NEXT;
1.1 veillard 1101: }
1.40 daniel 1102: } else if (CUR == '\''){
1103: NEXT;
1104: q = CUR_PTR;
1105: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1106: if (CUR != '\'')
1.31 daniel 1107: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1.1 veillard 1108: else {
1.40 daniel 1109: ret = xmlStrndup(q, CUR_PTR - q);
1110: NEXT;
1.1 veillard 1111: }
1112: }
1113: return(ret);
1114: }
1115:
1116: /*
1.3 veillard 1117: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1118: * This may or may not create a node (depending on the context)
1.38 daniel 1119: * The spec says that "For compatibility, the string "--" (double-hyphen)
1120: * must not occur within comments. "
1.22 daniel 1121: *
1122: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 1123: */
1.31 daniel 1124: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1125: xmlNodePtr ret = NULL;
1.17 daniel 1126: const CHAR *q, *start;
1127: const CHAR *r;
1.39 daniel 1128: CHAR *val;
1.3 veillard 1129:
1130: /*
1.22 daniel 1131: * Check that there is a comment right here.
1.3 veillard 1132: */
1.40 daniel 1133: if ((CUR != '<') || (NXT(1) != '!') ||
1134: (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1.3 veillard 1135:
1.40 daniel 1136: SKIP(4);
1137: start = q = CUR_PTR;
1138: NEXT;
1139: r = CUR_PTR;
1140: NEXT;
1141: while (IS_CHAR(CUR) &&
1142: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 1143: (*r != '-') || (*q != '-'))) {
1.38 daniel 1144: if ((*r == '-') && (*q == '-'))
1145: xmlParserError(ctxt,
1146: "Comment must not contain '--' (double-hyphen)`\n");
1.40 daniel 1147: NEXT;r++;q++;
1.3 veillard 1148: }
1.40 daniel 1149: if (!IS_CHAR(CUR)) {
1.31 daniel 1150: xmlParserError(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.3 veillard 1151: } else {
1.40 daniel 1152: NEXT;
1.31 daniel 1153: if (create) {
1.39 daniel 1154: val = xmlStrndup(start, q - start);
1155: ret = xmlNewComment(val);
1156: free(val);
1.31 daniel 1157: }
1.3 veillard 1158: }
1.39 daniel 1159: return(ret);
1.3 veillard 1160: }
1161:
1162: /*
1.13 veillard 1163: * xmlParseNamespace: parse specific '<?namespace ...' constructs.
1.22 daniel 1164: *
1.29 daniel 1165: * TODO !!!!!!!!!!
1166: *
1167: * This is what the older xml-name Working Draft specified, a bunch of
1168: * other stuff may still rely on it, so support is still here as
1169: * if ot was declared on the root of the Tree:-(
1.1 veillard 1170: */
1171:
1.16 daniel 1172: void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.1 veillard 1173: CHAR *href = NULL;
1.34 daniel 1174: CHAR *prefix = NULL;
1.3 veillard 1175: int garbage = 0;
1.1 veillard 1176:
1177: /*
1.18 daniel 1178: * We just skipped "namespace" or "xml:namespace"
1.1 veillard 1179: */
1.42 ! daniel 1180: SKIP_BLANKS;
1.1 veillard 1181:
1.40 daniel 1182: while (IS_CHAR(CUR) && (CUR != '>')) {
1.1 veillard 1183: /*
1.18 daniel 1184: * We can have "ns" or "prefix" attributes
1185: * Old encoding as 'href' or 'AS' attributes is still supported
1.1 veillard 1186: */
1.40 daniel 1187: if ((CUR == 'n') && (NXT(1) == 's')) {
1.18 daniel 1188: garbage = 0;
1.40 daniel 1189: SKIP(2);
1.42 ! daniel 1190: SKIP_BLANKS;
1.18 daniel 1191:
1.40 daniel 1192: if (CUR != '=') continue;
1193: NEXT;
1.42 ! daniel 1194: SKIP_BLANKS;
1.18 daniel 1195:
1196: href = xmlParseQuotedString(ctxt);
1.42 ! daniel 1197: SKIP_BLANKS;
1.40 daniel 1198: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1199: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1.3 veillard 1200: garbage = 0;
1.40 daniel 1201: SKIP(4);
1.42 ! daniel 1202: SKIP_BLANKS;
1.1 veillard 1203:
1.40 daniel 1204: if (CUR != '=') continue;
1205: NEXT;
1.42 ! daniel 1206: SKIP_BLANKS;
1.16 daniel 1207:
1208: href = xmlParseQuotedString(ctxt);
1.42 ! daniel 1209: SKIP_BLANKS;
1.40 daniel 1210: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1211: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1212: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1.18 daniel 1213: garbage = 0;
1.40 daniel 1214: SKIP(6);
1.42 ! daniel 1215: SKIP_BLANKS;
1.18 daniel 1216:
1.40 daniel 1217: if (CUR != '=') continue;
1218: NEXT;
1.42 ! daniel 1219: SKIP_BLANKS;
1.18 daniel 1220:
1.34 daniel 1221: prefix = xmlParseQuotedString(ctxt);
1.42 ! daniel 1222: SKIP_BLANKS;
1.40 daniel 1223: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1.3 veillard 1224: garbage = 0;
1.40 daniel 1225: SKIP(2);
1.42 ! daniel 1226: SKIP_BLANKS;
1.1 veillard 1227:
1.40 daniel 1228: if (CUR != '=') continue;
1229: NEXT;
1.42 ! daniel 1230: SKIP_BLANKS;
1.16 daniel 1231:
1.34 daniel 1232: prefix = xmlParseQuotedString(ctxt);
1.42 ! daniel 1233: SKIP_BLANKS;
1.40 daniel 1234: } else if ((CUR == '?') && (NXT(1) == '>')) {
1.3 veillard 1235: garbage = 0;
1.40 daniel 1236: CUR_PTR ++;
1.1 veillard 1237: } else {
1.3 veillard 1238: /*
1239: * Found garbage when parsing the namespace
1240: */
1.31 daniel 1241: if (!garbage)
1242: xmlParserError(ctxt, "xmlParseNamespace found garbage\n");
1.40 daniel 1243: NEXT;
1.1 veillard 1244: }
1245: }
1246:
1.40 daniel 1247: MOVETO_ENDTAG(CUR_PTR);
1248: NEXT;
1.1 veillard 1249:
1250: /*
1251: * Register the DTD.
1252: */
1253: if (href != NULL)
1.34 daniel 1254: xmlNewNs(ctxt->doc, href, prefix);
1.1 veillard 1255:
1.34 daniel 1256: if (prefix != NULL) free(prefix);
1.8 veillard 1257: if (href != NULL) free(href);
1.1 veillard 1258: }
1259:
1260: /*
1.22 daniel 1261: * xmlParsePITarget: parse the name of a PI
1262: *
1263: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1264: */
1265:
1266: CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1267: CHAR *name;
1268:
1269: name = xmlParseName(ctxt);
1270: if ((name != NULL) && (name[3] == 0) &&
1271: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1272: ((name[1] == 'm') || (name[1] == 'M')) &&
1273: ((name[2] == 'l') || (name[2] == 'L'))) {
1274: xmlParserError(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1275: return(NULL);
1276: }
1277: return(name);
1278: }
1279:
1280: /*
1.3 veillard 1281: * xmlParsePI: parse an XML Processing Instruction.
1.22 daniel 1282: *
1283: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.3 veillard 1284: */
1285:
1.16 daniel 1286: void xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1287: CHAR *target;
1288:
1.40 daniel 1289: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 1290: /*
1291: * this is a Processing Instruction.
1292: */
1.40 daniel 1293: SKIP(2);
1.3 veillard 1294:
1295: /*
1.22 daniel 1296: * Parse the target name and check for special support like
1297: * namespace.
1298: *
1299: * TODO : PI handling should be dynamically redefinable using an
1300: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1301: */
1.22 daniel 1302: target = xmlParsePITarget(ctxt);
1303: if (target != NULL) {
1304: /*
1305: * Support for the Processing Instruction related to namespace.
1306: */
1307: if ((target[0] == 'n') && (target[1] == 'a') &&
1308: (target[2] == 'm') && (target[3] == 'e') &&
1309: (target[4] == 's') && (target[5] == 'p') &&
1310: (target[6] == 'a') && (target[7] == 'c') &&
1311: (target[8] == 'e')) {
1312: xmlParseNamespace(ctxt);
1313: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1314: (target[2] == 'l') && (target[3] == ':') &&
1315: (target[4] == 'n') && (target[5] == 'a') &&
1316: (target[6] == 'm') && (target[7] == 'e') &&
1317: (target[8] == 's') && (target[9] == 'p') &&
1318: (target[10] == 'a') && (target[11] == 'c') &&
1319: (target[12] == 'e')) {
1320: xmlParseNamespace(ctxt);
1321: } else {
1322: /* Unknown PI, ignore it ! */
1.31 daniel 1323: xmlParserError(ctxt, "xmlParsePI : skipping unknown PI %s\n",
1324: target);
1.40 daniel 1325: while (IS_CHAR(CUR) &&
1326: ((CUR != '?') || (NXT(1) != '>')))
1327: NEXT;
1328: if (!IS_CHAR(CUR)) {
1.31 daniel 1329: xmlParserError(ctxt, "xmlParsePI: PI %s never end ...\n",
1330: target);
1.24 daniel 1331: } else
1.40 daniel 1332: SKIP(2);
1.22 daniel 1333: }
1.39 daniel 1334: free(target);
1.3 veillard 1335: } else {
1.31 daniel 1336: xmlParserError(ctxt, "xmlParsePI : no target name\n");
1.22 daniel 1337: /********* Should we try to complete parsing the PI ???
1.40 daniel 1338: while (IS_CHAR(CUR) &&
1339: (CUR != '?') && (CUR != '>'))
1340: NEXT;
1341: if (!IS_CHAR(CUR)) {
1.22 daniel 1342: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1343: target);
1344: }
1345: ********************************************************/
1346: }
1347: }
1348: }
1349:
1350: /*
1351: * xmlParseNotationDecl: parse a notation declaration
1352: *
1353: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1354: *
1355: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1356: *
1357: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1358: * 'PUBLIC' S PubidLiteral S SystemLiteral
1359: *
1360: * Hence there is actually 3 choices:
1361: * 'PUBLIC' S PubidLiteral
1362: * 'PUBLIC' S PubidLiteral S SystemLiteral
1363: * and 'SYSTEM' S SystemLiteral
1364: */
1365:
1366: void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1367: CHAR *name;
1368:
1.40 daniel 1369: if ((CUR == '<') && (NXT(1) == '!') &&
1370: (NXT(2) == 'N') && (NXT(3) == 'O') &&
1371: (NXT(4) == 'T') && (NXT(5) == 'A') &&
1372: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1373: (NXT(8) == 'O') && (NXT(9) == 'N') &&
1374: (IS_BLANK(NXT(10)))) {
1375: SKIP(10);
1.42 ! daniel 1376: SKIP_BLANKS;
1.22 daniel 1377:
1378: name = xmlParseName(ctxt);
1379: if (name == NULL) {
1.31 daniel 1380: xmlParserError(ctxt,
1381: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1382: return;
1383: }
1.42 ! daniel 1384: SKIP_BLANKS;
1.22 daniel 1385: /*
1.31 daniel 1386: * TODO !!!
1.22 daniel 1387: */
1.40 daniel 1388: while ((IS_CHAR(CUR)) && (CUR != '>'))
1389: NEXT;
1.22 daniel 1390: free(name);
1391: }
1392: }
1393:
1394: /*
1395: * xmlParseEntityDecl: parse <!ENTITY declarations
1396: *
1397: * [70] EntityDecl ::= GEDecl | PEDecl
1398: *
1399: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1400: *
1401: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1402: *
1403: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1404: *
1405: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1406: *
1407: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1408: */
1409:
1410: void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 1411: CHAR *name = NULL;
1.24 daniel 1412: CHAR *value = NULL;
1.39 daniel 1413: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 1414: CHAR *ndata = NULL;
1.39 daniel 1415: int isParameter = 0;
1.22 daniel 1416:
1.40 daniel 1417: if ((CUR == '<') && (NXT(1) == '!') &&
1418: (NXT(2) == 'E') && (NXT(3) == 'N') &&
1419: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1420: (NXT(6) == 'T') && (NXT(7) == 'Y') &&
1421: (IS_BLANK(NXT(8)))) {
1422: SKIP(8);
1.42 ! daniel 1423: SKIP_BLANKS;
1.40 daniel 1424:
1425: if (CUR == '%') {
1426: NEXT;
1.42 ! daniel 1427: SKIP_BLANKS;
1.39 daniel 1428: isParameter = 1;
1.22 daniel 1429: }
1430:
1431: name = xmlParseName(ctxt);
1.24 daniel 1432: if (name == NULL) {
1.31 daniel 1433: xmlParserError(ctxt, "xmlParseEntityDecl: no name\n");
1.24 daniel 1434: return;
1435: }
1.42 ! daniel 1436: SKIP_BLANKS;
1.24 daniel 1437:
1.22 daniel 1438: /*
1.24 daniel 1439: * TODO handle the various case of definitions...
1.22 daniel 1440: */
1.39 daniel 1441: if (isParameter) {
1.40 daniel 1442: if ((CUR == '"') || (CUR == '\''))
1.24 daniel 1443: value = xmlParseEntityValue(ctxt);
1.39 daniel 1444: if (value) {
1445: xmlAddDocEntity(ctxt->doc, name,
1446: XML_INTERNAL_PARAMETER_ENTITY,
1447: NULL, NULL, value);
1448: }
1.24 daniel 1449: else {
1.39 daniel 1450: URI = xmlParseExternalID(ctxt, &literal);
1451: if (URI) {
1452: xmlAddDocEntity(ctxt->doc, name,
1453: XML_EXTERNAL_PARAMETER_ENTITY,
1454: literal, URI, NULL);
1455: }
1.24 daniel 1456: }
1457: } else {
1.40 daniel 1458: if ((CUR == '"') || (CUR == '\'')) {
1.24 daniel 1459: value = xmlParseEntityValue(ctxt);
1.39 daniel 1460: xmlAddDocEntity(ctxt->doc, name,
1461: XML_INTERNAL_GENERAL_ENTITY,
1462: NULL, NULL, value);
1463: } else {
1464: URI = xmlParseExternalID(ctxt, &literal);
1.42 ! daniel 1465: SKIP_BLANKS;
1.40 daniel 1466: if ((CUR == 'N') && (NXT(1) == 'D') &&
1467: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1468: (NXT(4) == 'A')) {
1469: SKIP(5);
1.42 ! daniel 1470: SKIP_BLANKS;
1.24 daniel 1471: ndata = xmlParseName(ctxt);
1.39 daniel 1472: xmlAddDocEntity(ctxt->doc, name,
1473: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
1474: literal, URI, ndata);
1475: } else {
1476: xmlAddDocEntity(ctxt->doc, name,
1477: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
1478: literal, URI, NULL);
1.24 daniel 1479: }
1480: }
1481: }
1.42 ! daniel 1482: SKIP_BLANKS;
1.40 daniel 1483: if (CUR != '>') {
1.31 daniel 1484: xmlParserError(ctxt,
1485: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.24 daniel 1486: } else
1.40 daniel 1487: NEXT;
1.39 daniel 1488: if (name != NULL) free(name);
1489: if (value != NULL) free(value);
1490: if (URI != NULL) free(URI);
1491: if (literal != NULL) free(literal);
1492: if (ndata != NULL) free(ndata);
1.22 daniel 1493: }
1494: }
1495:
1496: /*
1497: * xmlParseEnumeratedType: parse and Enumerated attribute type.
1498: *
1499: * [57] EnumeratedType ::= NotationType | Enumeration
1500: *
1501: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1502: *
1503: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1504: */
1505:
1506: void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1507: /*
1508: * TODO !!!
1509: */
1.40 daniel 1510: while ((IS_CHAR(CUR)) && (CUR != '>'))
1511: NEXT;
1.22 daniel 1512: }
1513:
1514: /*
1515: * xmlParseAttributeType: parse the Attribute list def for an element
1516: *
1517: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1518: *
1519: * [55] StringType ::= 'CDATA'
1520: *
1521: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1522: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1523: */
1524: void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.40 daniel 1525: if ((CUR == 'C') && (NXT(1) == 'D') &&
1526: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1527: (NXT(4) == 'A')) {
1528: SKIP(5);
1529: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
1530: SKIP(2);
1531: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1532: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1533: (NXT(4) == 'F')) {
1534: SKIP(5);
1535: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1536: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1537: (NXT(4) == 'F') && (NXT(5) == 'S')) {
1538: SKIP(6);
1539: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1540: (NXT(2) == 'T') && (NXT(3) == 'I') &&
1541: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
1542: SKIP(6);
1543: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1544: (NXT(2) == 'T') && (NXT(3) == 'I') &&
1545: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1546: (NXT(6) == 'E') && (NXT(7) == 'S')) {
1547: SKIP(8);
1548: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1549: (NXT(2) == 'T') && (NXT(3) == 'O') &&
1550: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1551: (NXT(6) == 'N')) {
1552: SKIP(7);
1553: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1554: (NXT(2) == 'T') && (NXT(3) == 'O') &&
1555: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1556: (NXT(6) == 'N') && (NXT(7) == 'S')) {
1.22 daniel 1557: } else {
1558: xmlParseEnumeratedType(ctxt, name);
1559: }
1560: }
1561:
1562: /*
1563: * xmlParseAttributeListDecl: parse the Attribute list def for an element
1564: *
1565: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1566: *
1567: * [53] AttDef ::= S Name S AttType S DefaultDecl
1568: */
1569: void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1570: CHAR *name;
1571:
1.40 daniel 1572: if ((CUR == '<') && (NXT(1) == '!') &&
1573: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1574: (NXT(4) == 'T') && (NXT(5) == 'L') &&
1575: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1576: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
1577: SKIP(9);
1.42 ! daniel 1578: SKIP_BLANKS;
1.22 daniel 1579: name = xmlParseName(ctxt);
1580: if (name == NULL) {
1.31 daniel 1581: xmlParserError(ctxt,
1582: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1583: return;
1584: }
1.42 ! daniel 1585: SKIP_BLANKS;
1.40 daniel 1586: while (CUR != '>') {
1587: const CHAR *check = CUR_PTR;
1.22 daniel 1588:
1589: xmlParseAttributeType(ctxt, name);
1.42 ! daniel 1590: SKIP_BLANKS;
1.40 daniel 1591: if (check == CUR_PTR) {
1.31 daniel 1592: xmlParserError(ctxt,
1593: "xmlParseAttributeListDecl: detected error\n");
1.22 daniel 1594: break;
1595: }
1596: }
1.40 daniel 1597: if (CUR == '>')
1598: NEXT;
1.22 daniel 1599:
1600: free(name);
1601: }
1602: }
1603:
1604: /*
1605: * xmlParseElementContentDecl: parse the declaration for an Element content
1606: * either Mixed or Children, the cases EMPTY and ANY being handled
1607: * int xmlParseElementDecl.
1608: *
1609: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
1610: *
1611: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1612: *
1613: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
1614: *
1615: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
1616: *
1617: * or
1618: *
1619: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
1620: * '(' S? '#PCDATA' S? ')'
1621: */
1622:
1623: void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1624: /*
1625: * TODO This has to be parsed correctly, currently we just skip until
1626: * we reach the first '>'.
1.31 daniel 1627: * !!!
1.22 daniel 1628: */
1.40 daniel 1629: while ((IS_CHAR(CUR)) && (CUR != '>'))
1630: NEXT;
1.22 daniel 1631: }
1632:
1633: /*
1634: * xmlParseElementDecl: parse an Element declaration.
1635: *
1636: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1637: *
1638: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1639: *
1640: * TODO There is a check [ VC: Unique Element Type Declaration ]
1641: */
1642: void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1643: CHAR *name;
1644:
1.40 daniel 1645: if ((CUR == '<') && (NXT(1) == '!') &&
1646: (NXT(2) == 'E') && (NXT(3) == 'L') &&
1647: (NXT(4) == 'E') && (NXT(5) == 'M') &&
1648: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1649: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
1650: SKIP(9);
1.42 ! daniel 1651: SKIP_BLANKS;
1.22 daniel 1652: name = xmlParseName(ctxt);
1653: if (name == NULL) {
1.31 daniel 1654: xmlParserError(ctxt, "xmlParseElementDecl: no name for Element\n");
1.22 daniel 1655: return;
1656: }
1.42 ! daniel 1657: SKIP_BLANKS;
1.40 daniel 1658: if ((CUR == 'E') && (NXT(1) == 'M') &&
1659: (NXT(2) == 'P') && (NXT(3) == 'T') &&
1660: (NXT(4) == 'Y')) {
1661: SKIP(5);
1.22 daniel 1662: /*
1663: * Element must always be empty.
1664: */
1.40 daniel 1665: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
1666: (NXT(2) == 'Y')) {
1667: SKIP(3);
1.22 daniel 1668: /*
1669: * Element is a generic container.
1670: */
1671: } else {
1672: xmlParseElementContentDecl(ctxt, name);
1673: }
1.42 ! daniel 1674: SKIP_BLANKS;
1.40 daniel 1675: if (CUR != '>') {
1.31 daniel 1676: xmlParserError(ctxt,
1677: "xmlParseElementDecl: expected '>' at the end\n");
1.22 daniel 1678: } else
1.40 daniel 1679: NEXT;
1.22 daniel 1680: }
1681: }
1682:
1683: /*
1684: * xmlParseMarkupDecl: parse Markup declarations
1685: *
1686: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
1687: * NotationDecl | PI | Comment
1688: *
1689: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
1690: */
1691: void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1692: xmlParseElementDecl(ctxt);
1693: xmlParseAttributeListDecl(ctxt);
1694: xmlParseEntityDecl(ctxt);
1695: xmlParseNotationDecl(ctxt);
1696: xmlParsePI(ctxt);
1.31 daniel 1697: xmlParseComment(ctxt, 0);
1.22 daniel 1698: }
1699:
1700: /*
1.24 daniel 1701: * xmlParseCharRef: parse Reference declarations
1702: *
1703: * [66] CharRef ::= '&#' [0-9]+ ';' |
1704: * '&#x' [0-9a-fA-F]+ ';'
1705: */
1706: CHAR xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 1707: int val = 0;
1.24 daniel 1708: CHAR ret = 0;
1709:
1.40 daniel 1710: if ((CUR == '&') && (NXT(1) == '#') &&
1711: (NXT(2) == 'x')) {
1712: SKIP(3);
1713: while (CUR != ';') {
1714: if ((CUR >= '0') && (CUR <= '9'))
1715: val = val * 16 + (CUR - '0');
1716: else if ((CUR >= 'a') && (CUR <= 'f'))
1717: val = val * 16 + (CUR - 'a') + 10;
1718: else if ((CUR >= 'A') && (CUR <= 'F'))
1719: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 1720: else {
1.31 daniel 1721: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1722: val = 0;
1.24 daniel 1723: break;
1724: }
1725: }
1.40 daniel 1726: if (CUR != ';')
1727: NEXT;
1728: } else if ((CUR == '&') && (NXT(1) == '#')) {
1729: SKIP(2);
1730: while (CUR != ';') {
1731: if ((CUR >= '0') && (CUR <= '9'))
1732: val = val * 16 + (CUR - '0');
1.24 daniel 1733: else {
1.31 daniel 1734: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1735: val = 0;
1.24 daniel 1736: break;
1737: }
1738: }
1.40 daniel 1739: if (CUR != ';')
1740: NEXT;
1.24 daniel 1741: } else {
1.31 daniel 1742: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.24 daniel 1743: }
1.29 daniel 1744: /*
1745: * Check the value IS_CHAR ...
1746: */
1747: if (IS_CHAR(val))
1748: ret = (CHAR) val;
1749: else {
1.39 daniel 1750: xmlParserError(ctxt, "xmlParseCharRef: invalid value");
1.29 daniel 1751: ret = '?';
1752: }
1.24 daniel 1753: return(ret);
1754: }
1755:
1756: /*
1757: * xmlParseEntityRef: parse ENTITY references declarations
1758: *
1759: * [68] EntityRef ::= '&' Name ';'
1760: */
1761: CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1762: CHAR *name;
1763:
1.40 daniel 1764: if (CUR == '&') {
1765: NEXT;
1.24 daniel 1766: name = xmlParseName(ctxt);
1767: if (name == NULL) {
1.31 daniel 1768: xmlParserError(ctxt, "xmlParseEntityRef: no name\n");
1.24 daniel 1769: } else {
1.40 daniel 1770: if (CUR == ';') {
1771: NEXT;
1.24 daniel 1772: /*
1773: * TODO there is a VC check here !!!
1774: * [ VC: Entity Declared ]
1775: */
1776: free(name);
1777: } else {
1.31 daniel 1778: xmlParserError(ctxt, "xmlParseEntityRef: expecting ';'\n");
1.24 daniel 1779: }
1780: }
1781: }
1.25 daniel 1782: return(NULL); /* TODO !!!! */
1.24 daniel 1783: }
1784:
1785: /*
1786: * xmlParseReference: parse Reference declarations
1787: *
1788: * [67] Reference ::= EntityRef | CharRef
1789: */
1790: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt) {
1791: CHAR *name;
1792:
1.40 daniel 1793: if (CUR == '&') {
1.24 daniel 1794: return(xmlParseEntityRef(ctxt));
1795: } else {
1.40 daniel 1796: NEXT;
1.24 daniel 1797: name = xmlParseName(ctxt);
1798: if (name == NULL) {
1.31 daniel 1799: xmlParserError(ctxt, "xmlParseReference: no name\n");
1.24 daniel 1800: } else {
1.40 daniel 1801: if (CUR == ';') {
1802: NEXT;
1.24 daniel 1803: /*
1804: * TODO there is a VC check here !!!
1805: * [ VC: Entity Declared ]
1806: */
1807: free(name);
1808: } else {
1.31 daniel 1809: xmlParserError(ctxt, "xmlParseReference: expecting ';'\n");
1.24 daniel 1810: }
1811: }
1812: }
1.25 daniel 1813: return(NULL); /* TODO !!!! */
1.24 daniel 1814: }
1815:
1816: /*
1.22 daniel 1817: * xmlParsePEReference: parse PEReference declarations
1818: *
1819: * [69] PEReference ::= '%' Name ';'
1820: */
1.24 daniel 1821: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 1822: CHAR *name;
1823:
1.40 daniel 1824: if (CUR == '%') {
1825: NEXT;
1.22 daniel 1826: name = xmlParseName(ctxt);
1827: if (name == NULL) {
1.31 daniel 1828: xmlParserError(ctxt, "xmlParsePEReference: no name\n");
1.22 daniel 1829: } else {
1.40 daniel 1830: if (CUR == ';') {
1831: NEXT;
1.22 daniel 1832: /*
1833: * TODO there is a VC check here !!!
1834: * [ VC: Entity Declared ]
1835: */
1836: free(name);
1837: } else {
1.31 daniel 1838: xmlParserError(ctxt, "xmlParsePEReference: expecting ';'\n");
1.22 daniel 1839: }
1.3 veillard 1840: }
1841: }
1.25 daniel 1842: return(NULL); /* TODO !!!! */
1.3 veillard 1843: }
1844:
1845: /*
1.21 daniel 1846: * xmlParseDocTypeDecl : parse a DOCTYPE declaration
1847: *
1.22 daniel 1848: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
1849: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 1850: */
1851:
1852: void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.36 daniel 1853: xmlDtdPtr dtd;
1.21 daniel 1854: CHAR *name;
1855: CHAR *ExternalID = NULL;
1.39 daniel 1856: CHAR *URI = NULL;
1.21 daniel 1857:
1858: /*
1859: * We know that '<!DOCTYPE' has been detected.
1860: */
1.40 daniel 1861: SKIP(9);
1.21 daniel 1862:
1.42 ! daniel 1863: SKIP_BLANKS;
1.21 daniel 1864:
1865: /*
1866: * Parse the DOCTYPE name.
1867: */
1868: name = xmlParseName(ctxt);
1869: if (name == NULL) {
1.31 daniel 1870: xmlParserError(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.21 daniel 1871: }
1872:
1.42 ! daniel 1873: SKIP_BLANKS;
1.21 daniel 1874:
1875: /*
1.22 daniel 1876: * Check for SystemID and ExternalID
1877: */
1.39 daniel 1878: URI = xmlParseExternalID(ctxt, &ExternalID);
1.42 ! daniel 1879: SKIP_BLANKS;
1.36 daniel 1880:
1.39 daniel 1881: dtd = xmlNewDtd(ctxt->doc, name, ExternalID, URI);
1.22 daniel 1882:
1883: /*
1884: * Is there any DTD definition ?
1885: */
1.40 daniel 1886: if (CUR == '[') {
1887: NEXT;
1.22 daniel 1888: /*
1889: * Parse the succession of Markup declarations and
1890: * PEReferences.
1891: * Subsequence (markupdecl | PEReference | S)*
1892: */
1.40 daniel 1893: while (CUR != ']') {
1894: const CHAR *check = CUR_PTR;
1.22 daniel 1895:
1.42 ! daniel 1896: SKIP_BLANKS;
1.22 daniel 1897: xmlParseMarkupDecl(ctxt);
1898: xmlParsePEReference(ctxt);
1899:
1.40 daniel 1900: if (CUR_PTR == check) {
1.31 daniel 1901: xmlParserError(ctxt,
1902: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.22 daniel 1903: break;
1904: }
1905: }
1.40 daniel 1906: if (CUR == ']') NEXT;
1.22 daniel 1907: }
1908:
1909: /*
1910: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 1911: */
1.40 daniel 1912: if (CUR != '>') {
1.31 daniel 1913: xmlParserError(ctxt, "DOCTYPE unproperly terminated\n");
1.22 daniel 1914: /* We shouldn't try to resynchronize ... */
1.21 daniel 1915: }
1.40 daniel 1916: NEXT;
1.22 daniel 1917:
1918: /*
1919: * Cleanup, since we don't use all those identifiers
1920: * TODO : the DOCTYPE if available should be stored !
1921: */
1.39 daniel 1922: if (URI != NULL) free(URI);
1.22 daniel 1923: if (ExternalID != NULL) free(ExternalID);
1924: if (name != NULL) free(name);
1.21 daniel 1925: }
1926:
1927: /*
1.3 veillard 1928: * xmlParseAttribute: parse a start of tag.
1929: *
1.22 daniel 1930: * [41] Attribute ::= Name Eq AttValue
1931: *
1932: * [25] Eq ::= S? '=' S?
1933: *
1.29 daniel 1934: * With namespace:
1935: *
1936: * [NS 11] Attribute ::= QName Eq AttValue
1.3 veillard 1937: */
1938:
1.16 daniel 1939: void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 1940: CHAR *name, *value = NULL;
1.29 daniel 1941: CHAR *ns;
1.3 veillard 1942:
1.29 daniel 1943: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 1944: if (name == NULL) {
1.31 daniel 1945: xmlParserError(ctxt, "error parsing attribute name\n");
1.29 daniel 1946: return;
1.3 veillard 1947: }
1.22 daniel 1948: /*
1949: * TODO: Check for Namespace ...
1950: */
1.29 daniel 1951: if (ns != NULL) {
1952: free(ns);
1953: }
1.3 veillard 1954:
1955: /*
1.29 daniel 1956: * read the value
1.3 veillard 1957: */
1.42 ! daniel 1958: SKIP_BLANKS;
1.40 daniel 1959: if (CUR == '=') {
1960: NEXT;
1.42 ! daniel 1961: SKIP_BLANKS;
1.29 daniel 1962: value = xmlParseAttValue(ctxt);
1963: } else {
1.31 daniel 1964: xmlParserError(ctxt, "Specification mandate value for attribute %s\n",
1965: name);
1.3 veillard 1966: }
1967:
1968: /*
1969: * Add the attribute to the node.
1970: */
1.17 daniel 1971: if (name != NULL) {
1.3 veillard 1972: xmlNewProp(node, name, value);
1.17 daniel 1973: free(name);
1974: }
1.29 daniel 1975: if (value != NULL)
1.17 daniel 1976: free(value);
1.3 veillard 1977: }
1978:
1979: /*
1.29 daniel 1980: * xmlParseStartTag: parse a start of tag either for rule element or
1981: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 1982: *
1983: * [40] STag ::= '<' Name (S Attribute)* S? '>'
1984: *
1.29 daniel 1985: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1986: *
1987: * With namespace:
1988: *
1989: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
1990: *
1991: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.2 veillard 1992: */
1993:
1.16 daniel 1994: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.34 daniel 1995: CHAR *namespace, *name;
1996: xmlNsPtr ns = NULL;
1.2 veillard 1997: xmlNodePtr ret = NULL;
1998:
1.40 daniel 1999: if (CUR != '<') return(NULL);
2000: NEXT;
1.3 veillard 2001:
1.34 daniel 2002: name = xmlNamespaceParseQName(ctxt, &namespace);
2003: if (namespace != NULL) {
1.3 veillard 2004: /*
2005: * Search the DTD associated to ns.
2006: */
1.34 daniel 2007: ns = xmlSearchNs(ctxt->doc, namespace);
2008: if (ns == NULL)
2009: xmlParserError(ctxt, "Start tag : Couldn't find namespace %s\n",
2010: namespace);
2011: free(namespace);
1.29 daniel 2012: }
1.3 veillard 2013:
1.34 daniel 2014: ret = xmlNewNode(ns, name, NULL);
1.2 veillard 2015:
1.3 veillard 2016: /*
2017: * Now parse the attributes, it ends up with the ending
2018: *
2019: * (S Attribute)* S?
2020: */
1.42 ! daniel 2021: SKIP_BLANKS;
1.40 daniel 2022: while ((IS_CHAR(CUR)) &&
2023: (CUR != '>') &&
2024: ((CUR != '/') || (NXT(1) != '>'))) {
2025: const CHAR *q = CUR_PTR;
1.29 daniel 2026:
2027: xmlParseAttribute(ctxt, ret);
1.42 ! daniel 2028: SKIP_BLANKS;
1.29 daniel 2029:
1.40 daniel 2030: if (q == CUR_PTR) {
1.31 daniel 2031: xmlParserError(ctxt,
2032: "xmlParseStartTag: problem parsing attributes\n");
1.29 daniel 2033: break;
1.3 veillard 2034: }
2035: }
2036:
2037: return(ret);
2038: }
2039:
2040: /*
1.27 daniel 2041: * xmlParseEndTag: parse an end of tag
2042: *
2043: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 2044: *
2045: * With namespace
2046: *
2047: * [9] ETag ::= '</' QName S? '>'
1.7 veillard 2048: */
2049:
1.34 daniel 2050: void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
2051: CHAR *namespace, *name;
2052: xmlNsPtr ns = NULL;
1.7 veillard 2053:
1.34 daniel 2054: *nsPtr = NULL;
1.7 veillard 2055: *tagPtr = NULL;
2056:
1.40 daniel 2057: if ((CUR != '<') || (NXT(1) != '/')) {
1.31 daniel 2058: xmlParserError(ctxt, "xmlParseEndTag: '</' not found\n");
1.27 daniel 2059: return;
2060: }
1.40 daniel 2061: SKIP(2);
1.7 veillard 2062:
1.34 daniel 2063: name = xmlNamespaceParseQName(ctxt, &namespace);
2064: if (namespace != NULL) {
1.7 veillard 2065: /*
2066: * Search the DTD associated to ns.
2067: */
1.34 daniel 2068: ns = xmlSearchNs(ctxt->doc, namespace);
2069: if (ns == NULL)
2070: xmlParserError(ctxt, "End tag : Couldn't find namespace %s\n",
2071: namespace);
2072: free(namespace);
1.29 daniel 2073: }
1.7 veillard 2074:
1.34 daniel 2075: *nsPtr = ns;
1.7 veillard 2076: *tagPtr = name;
2077:
2078: /*
2079: * We should definitely be at the ending "S? '>'" part
2080: */
1.42 ! daniel 2081: SKIP_BLANKS;
1.40 daniel 2082: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.31 daniel 2083: xmlParserError(ctxt, "End tag : expected '>'\n");
1.7 veillard 2084: } else
1.40 daniel 2085: NEXT;
1.7 veillard 2086:
2087: return;
2088: }
2089:
2090: /*
1.3 veillard 2091: * xmlParseCDSect: escaped pure raw content.
1.29 daniel 2092: *
2093: * [18] CDSect ::= CDStart CData CDEnd
2094: *
2095: * [19] CDStart ::= '<![CDATA['
2096: *
2097: * [20] Data ::= (Char* - (Char* ']]>' Char*))
2098: *
2099: * [21] CDEnd ::= ']]>'
1.3 veillard 2100: */
1.16 daniel 2101: CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 2102: const CHAR *r, *s, *base;
2103: CHAR *ret;
1.3 veillard 2104:
1.40 daniel 2105: if ((CUR == '<') && (NXT(1) == '!') &&
2106: (NXT(2) == '[') && (NXT(3) == 'C') &&
2107: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2108: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2109: (NXT(8) == '[')) {
2110: SKIP(9);
1.29 daniel 2111: } else
2112: return(NULL);
1.40 daniel 2113: base = CUR_PTR;
2114: if (!IS_CHAR(CUR)) {
1.31 daniel 2115: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2116: return(NULL);
2117: }
1.40 daniel 2118: r = NEXT;
2119: if (!IS_CHAR(CUR)) {
1.31 daniel 2120: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2121: return(NULL);
2122: }
1.40 daniel 2123: s = NEXT;
2124: while (IS_CHAR(CUR) &&
2125: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
2126: r++;s++;NEXT;
1.3 veillard 2127: }
1.40 daniel 2128: if (!IS_CHAR(CUR)) {
1.31 daniel 2129: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.3 veillard 2130: return(NULL);
2131: }
1.40 daniel 2132: ret = xmlStrndup(base, CUR_PTR - base);
1.16 daniel 2133:
1.2 veillard 2134: return(ret);
2135: }
2136:
2137: /*
2138: * xmlParseContent: a content is
2139: * (element | PCData | Reference | CDSect | PI | Comment)
2140: *
1.27 daniel 2141: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 2142: */
2143:
1.27 daniel 2144: void xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.39 daniel 2145: const CHAR *q, *dec;
1.17 daniel 2146: CHAR *data = NULL;
1.2 veillard 2147: xmlNodePtr ret = NULL;
2148:
1.40 daniel 2149: while ((CUR != '<') || (NXT(1) != '/')) {
2150: const CHAR *test = CUR_PTR;
1.27 daniel 2151: ret = NULL;
2152: data = NULL;
2153:
2154: /*
2155: * First case : a Processing Instruction.
2156: */
1.40 daniel 2157: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 2158: xmlParsePI(ctxt);
2159: }
2160: /*
2161: * Second case : a CDSection
2162: */
1.40 daniel 2163: else if ((CUR == '<') && (NXT(1) == '!') &&
2164: (NXT(2) == '[') && (NXT(3) == 'C') &&
2165: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2166: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2167: (NXT(8) == '[')) {
1.27 daniel 2168: data = xmlParseCDSect(ctxt);
2169: }
2170: /*
2171: * Third case : a comment
2172: */
1.40 daniel 2173: else if ((CUR == '<') && (NXT(1) == '!') &&
2174: (NXT(2) == '-') && (NXT(3) == '-')) {
1.31 daniel 2175: ret = xmlParseComment(ctxt, 1);
1.27 daniel 2176: }
2177: /*
2178: * Fourth case : a sub-element.
2179: */
1.40 daniel 2180: else if (CUR == '<') {
1.27 daniel 2181: ret = xmlParseElement(ctxt);
2182: }
2183: /*
2184: * Last case, text. Note that References are handled directly.
2185: */
2186: else {
1.40 daniel 2187: q = CUR_PTR;
2188: while (IS_CHAR(CUR) && (CUR != '<')) NEXT;
1.27 daniel 2189:
1.40 daniel 2190: if (!IS_CHAR(CUR)) {
1.31 daniel 2191: xmlParserError(ctxt, "Truncated content\n%.50s\n", q);
1.27 daniel 2192: return;
2193: }
1.3 veillard 2194:
1.27 daniel 2195: /*
2196: * Do the Entities decoding...
2197: */
1.40 daniel 2198: dec = xmlDecodeEntities(ctxt->doc, q, CUR_PTR - q);
1.39 daniel 2199: data = xmlStrdup(dec);
1.40 daniel 2200: free((char *) dec);
1.3 veillard 2201: }
1.14 veillard 2202:
2203: /*
1.27 daniel 2204: * Handle the data if any. If there is no child
2205: * add it as content, otherwise create a new node of type text.
1.14 veillard 2206: */
1.27 daniel 2207: if (data != NULL)
2208: data = xmlHandleData(data);
2209: if (data != NULL) {
2210: if (node->childs == NULL)
2211: xmlNodeSetContent(node, data);
2212: else
2213: ret = xmlNewText(data);
2214: free(data);
2215: }
2216: if (ret != NULL)
2217: xmlAddChild(node, ret);
1.40 daniel 2218: if (test == CUR_PTR) {
1.31 daniel 2219: xmlParserError(ctxt, "detected an error in element content\n");
1.29 daniel 2220: break;
2221: }
1.3 veillard 2222: }
1.2 veillard 2223: }
2224:
2225: /*
2226: * xmlParseElement: parse an XML element
1.26 daniel 2227: *
2228: * [39] element ::= EmptyElemTag | STag content ETag
2229: *
2230: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 2231: */
1.26 daniel 2232:
1.2 veillard 2233:
1.16 daniel 2234: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 2235: xmlNodePtr ret;
1.40 daniel 2236: const CHAR *openTag = CUR_PTR;
1.32 daniel 2237: xmlParserNodeInfo node_info;
1.27 daniel 2238: CHAR *endTag;
1.34 daniel 2239: xmlNsPtr endNs;
1.2 veillard 2240:
1.32 daniel 2241: /* Capture start position */
1.40 daniel 2242: node_info.begin_pos = CUR_PTR - ctxt->input->base;
2243: node_info.begin_line = ctxt->input->line;
1.32 daniel 2244:
1.16 daniel 2245: ret = xmlParseStartTag(ctxt);
1.3 veillard 2246: if (ret == NULL) {
2247: return(NULL);
2248: }
1.2 veillard 2249:
2250: /*
2251: * Check for an Empty Element.
2252: */
1.40 daniel 2253: if ((CUR == '/') && (NXT(1) == '>')) {
2254: SKIP(2);
1.2 veillard 2255: return(ret);
2256: }
1.40 daniel 2257: if (CUR == '>') NEXT;
1.2 veillard 2258: else {
1.31 daniel 2259: xmlParserError(ctxt, "Couldn't find end of Start Tag\n%.30s\n",openTag);
1.16 daniel 2260: return(NULL);
1.2 veillard 2261: }
2262:
2263: /*
2264: * Parse the content of the element:
2265: */
1.27 daniel 2266: xmlParseContent(ctxt, ret);
1.40 daniel 2267: if (!IS_CHAR(CUR)) {
1.31 daniel 2268: xmlParserError(ctxt, "Premature end of data in tag %.30s\n%.30s\n",
2269: openTag);
1.16 daniel 2270: return(NULL);
1.2 veillard 2271: }
2272:
2273: /*
1.27 daniel 2274: * parse the end of tag: '</' should be here.
1.2 veillard 2275: */
1.34 daniel 2276: xmlParseEndTag(ctxt, &endNs, &endTag);
1.7 veillard 2277:
1.27 daniel 2278: /*
2279: * Check that the Name in the ETag is the same as in the STag.
2280: */
1.34 daniel 2281: if (endNs != ret->ns) {
1.31 daniel 2282: xmlParserError(ctxt,
2283: "Start and End tags don't use the same DTD\n%.30s\n%.30s\n",
2284: openTag, endTag);
1.27 daniel 2285: }
1.32 daniel 2286: if (endTag == NULL ) {
2287: xmlParserError(ctxt, "The End tag has no name\n%.30s\n", openTag);
2288: } else if (strcmp(ret->name, endTag)) {
1.31 daniel 2289: xmlParserError(ctxt,
2290: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
2291: openTag, endTag);
1.27 daniel 2292: }
1.7 veillard 2293:
1.27 daniel 2294: if ( endTag != NULL )
2295: free(endTag);
1.2 veillard 2296:
1.32 daniel 2297: /* Capture end position and add node */
2298: if ( ret != NULL && ctxt->record_info ) {
1.40 daniel 2299: node_info.end_pos = CUR_PTR - ctxt->input->base;
2300: node_info.end_line = ctxt->input->line;
1.32 daniel 2301: node_info.node = ret;
2302: xmlParserAddNodeInfo(ctxt, &node_info);
2303: }
1.2 veillard 2304: return(ret);
2305: }
2306:
2307: /*
1.29 daniel 2308: * xmlParseVersionNum: parse the XML version value.
2309: *
2310: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
2311: */
2312: CHAR *xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 2313: const CHAR *q = CUR_PTR;
1.29 daniel 2314: CHAR *ret;
2315:
1.40 daniel 2316: while (IS_CHAR(CUR) &&
2317: (((CUR >= 'a') && (CUR <= 'z')) ||
2318: ((CUR >= 'A') && (CUR <= 'Z')) ||
2319: ((CUR >= '0') && (CUR <= '9')) ||
2320: (CUR == '_') || (CUR == '.') ||
2321: (CUR == ':') || (CUR == '-'))) NEXT;
2322: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 2323: return(ret);
2324: }
2325:
2326: /*
2327: * xmlParseVersionInfo: parse the XML version.
2328: *
2329: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
2330: *
2331: * [25] Eq ::= S? '=' S?
2332: */
2333:
2334: CHAR *xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
2335: CHAR *version = NULL;
2336: const CHAR *q;
2337:
1.40 daniel 2338: if ((CUR == 'v') && (NXT(1) == 'e') &&
2339: (NXT(2) == 'r') && (NXT(3) == 's') &&
2340: (NXT(4) == 'i') && (NXT(5) == 'o') &&
2341: (NXT(6) == 'n')) {
2342: SKIP(7);
1.42 ! daniel 2343: SKIP_BLANKS;
1.40 daniel 2344: if (CUR != '=') {
1.31 daniel 2345: xmlParserError(ctxt, "xmlParseVersionInfo : expected '='\n");
2346: return(NULL);
2347: }
1.40 daniel 2348: NEXT;
1.42 ! daniel 2349: SKIP_BLANKS;
1.40 daniel 2350: if (CUR == '"') {
2351: NEXT;
2352: q = CUR_PTR;
1.29 daniel 2353: version = xmlParseVersionNum(ctxt);
1.40 daniel 2354: if (CUR != '"')
1.31 daniel 2355: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2356: else
1.40 daniel 2357: NEXT;
2358: } else if (CUR == '\''){
2359: NEXT;
2360: q = CUR_PTR;
1.29 daniel 2361: version = xmlParseVersionNum(ctxt);
1.40 daniel 2362: if (CUR != '\'')
1.31 daniel 2363: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2364: else
1.40 daniel 2365: NEXT;
1.31 daniel 2366: } else {
2367: xmlParserError(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
1.29 daniel 2368: }
2369: }
2370: return(version);
2371: }
2372:
2373: /*
2374: * xmlParseEncName: parse the XML encoding name
2375: *
2376: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
2377: */
2378: CHAR *xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 2379: const CHAR *q = CUR_PTR;
1.29 daniel 2380: CHAR *ret = NULL;
2381:
1.40 daniel 2382: if (((CUR >= 'a') && (CUR <= 'z')) ||
2383: ((CUR >= 'A') && (CUR <= 'Z'))) {
2384: NEXT;
2385: while (IS_CHAR(CUR) &&
2386: (((CUR >= 'a') && (CUR <= 'z')) ||
2387: ((CUR >= 'A') && (CUR <= 'Z')) ||
2388: ((CUR >= '0') && (CUR <= '9')) ||
2389: (CUR == '-'))) NEXT;
2390: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 2391: } else {
1.31 daniel 2392: xmlParserError(ctxt, "Invalid XML encoding name\n");
1.29 daniel 2393: }
2394: return(ret);
2395: }
2396:
2397: /*
2398: * xmlParseEncodingDecl: parse the XML encoding declaration
2399: *
2400: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
2401: */
2402:
2403: CHAR *xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
2404: CHAR *encoding = NULL;
2405: const CHAR *q;
2406:
1.42 ! daniel 2407: SKIP_BLANKS;
1.40 daniel 2408: if ((CUR == 'e') && (NXT(1) == 'n') &&
2409: (NXT(2) == 'c') && (NXT(3) == 'o') &&
2410: (NXT(4) == 'd') && (NXT(5) == 'i') &&
2411: (NXT(6) == 'n') && (NXT(7) == 'g')) {
2412: SKIP(8);
1.42 ! daniel 2413: SKIP_BLANKS;
1.40 daniel 2414: if (CUR != '=') {
1.31 daniel 2415: xmlParserError(ctxt, "xmlParseEncodingDecl : expected '='\n");
2416: return(NULL);
2417: }
1.40 daniel 2418: NEXT;
1.42 ! daniel 2419: SKIP_BLANKS;
1.40 daniel 2420: if (CUR == '"') {
2421: NEXT;
2422: q = CUR_PTR;
1.29 daniel 2423: encoding = xmlParseEncName(ctxt);
1.40 daniel 2424: if (CUR != '"')
1.31 daniel 2425: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2426: else
1.40 daniel 2427: NEXT;
2428: } else if (CUR == '\''){
2429: NEXT;
2430: q = CUR_PTR;
1.29 daniel 2431: encoding = xmlParseEncName(ctxt);
1.40 daniel 2432: if (CUR != '\'')
1.31 daniel 2433: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2434: else
1.40 daniel 2435: NEXT;
2436: } else if (CUR == '"'){
1.31 daniel 2437: xmlParserError(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
1.29 daniel 2438: }
2439: }
2440: return(encoding);
2441: }
2442:
2443: /*
2444: * xmlParseSDDecl: parse the XML standalone declaration
2445: *
2446: * [32] SDDecl ::= S 'standalone' Eq
2447: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
2448: */
2449:
2450: int xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
2451: int standalone = -1;
2452:
1.42 ! daniel 2453: SKIP_BLANKS;
1.40 daniel 2454: if ((CUR == 's') && (NXT(1) == 't') &&
2455: (NXT(2) == 'a') && (NXT(3) == 'n') &&
2456: (NXT(4) == 'd') && (NXT(5) == 'a') &&
2457: (NXT(6) == 'l') && (NXT(7) == 'o') &&
2458: (NXT(8) == 'n') && (NXT(9) == 'e')) {
2459: SKIP(10);
2460: if (CUR != '=') {
1.32 daniel 2461: xmlParserError(ctxt, "XML standalone declaration : expected '='\n");
2462: return(standalone);
2463: }
1.40 daniel 2464: NEXT;
1.42 ! daniel 2465: SKIP_BLANKS;
1.40 daniel 2466: if (CUR == '\''){
2467: NEXT;
2468: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 2469: standalone = 0;
1.40 daniel 2470: SKIP(2);
2471: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
2472: (NXT(2) == 's')) {
1.29 daniel 2473: standalone = 1;
1.40 daniel 2474: SKIP(3);
1.29 daniel 2475: } else {
1.31 daniel 2476: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2477: }
1.40 daniel 2478: if (CUR != '\'')
1.31 daniel 2479: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2480: else
1.40 daniel 2481: NEXT;
2482: } else if (CUR == '"'){
2483: NEXT;
2484: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 2485: standalone = 0;
1.40 daniel 2486: SKIP(2);
2487: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
2488: (NXT(2) == 's')) {
1.29 daniel 2489: standalone = 1;
1.40 daniel 2490: SKIP(3);
1.29 daniel 2491: } else {
1.31 daniel 2492: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2493: }
1.40 daniel 2494: if (CUR != '"')
1.31 daniel 2495: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2496: else
1.40 daniel 2497: NEXT;
1.37 daniel 2498: } else {
2499: xmlParserError(ctxt, "Standalone value not found\n");
2500: }
1.29 daniel 2501: }
2502: return(standalone);
2503: }
2504:
2505: /*
1.1 veillard 2506: * xmlParseXMLDecl: parse an XML declaration header
1.29 daniel 2507: *
2508: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 2509: */
2510:
1.16 daniel 2511: void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 2512: CHAR *version;
2513:
2514: /*
1.19 daniel 2515: * We know that '<?xml' is here.
1.1 veillard 2516: */
1.40 daniel 2517: SKIP(5);
1.1 veillard 2518:
1.42 ! daniel 2519: SKIP_BLANKS;
1.1 veillard 2520:
2521: /*
1.29 daniel 2522: * We should have the VersionInfo here.
1.1 veillard 2523: */
1.29 daniel 2524: version = xmlParseVersionInfo(ctxt);
2525: if (version == NULL)
1.16 daniel 2526: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.29 daniel 2527: else {
2528: ctxt->doc = xmlNewDoc(version);
2529: free(version);
2530: }
2531:
2532: /*
2533: * We may have the encoding declaration
2534: */
1.32 daniel 2535: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 2536:
2537: /*
1.29 daniel 2538: * We may have the standalone status.
1.1 veillard 2539: */
1.32 daniel 2540: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 2541:
1.42 ! daniel 2542: SKIP_BLANKS;
1.40 daniel 2543: if ((CUR == '?') && (NXT(1) == '>')) {
2544: SKIP(2);
2545: } else if (CUR == '>') {
1.31 daniel 2546: /* Deprecated old WD ... */
2547: xmlParserError(ctxt, "XML declaration must end-up with '?>'\n");
1.40 daniel 2548: NEXT;
1.29 daniel 2549: } else {
1.31 daniel 2550: xmlParserError(ctxt, "parsing XML declaration: '?>' expected\n");
1.40 daniel 2551: MOVETO_ENDTAG(CUR_PTR);
2552: NEXT;
1.29 daniel 2553: }
1.1 veillard 2554: }
2555:
2556: /*
1.22 daniel 2557: * xmlParseMisc: parse an XML Misc* optionnal field.
1.21 daniel 2558: * Misc*
2559: *
1.22 daniel 2560: * [27] Misc ::= Comment | PI | S
1.1 veillard 2561: */
2562:
1.16 daniel 2563: void xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 2564: while (((CUR == '<') && (NXT(1) == '?')) ||
2565: ((CUR == '<') && (NXT(1) == '!') &&
2566: (NXT(2) == '-') && (NXT(3) == '-')) ||
2567: IS_BLANK(CUR)) {
2568: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 2569: xmlParsePI(ctxt);
1.40 daniel 2570: } else if (IS_BLANK(CUR)) {
2571: NEXT;
1.1 veillard 2572: } else
1.31 daniel 2573: xmlParseComment(ctxt, 0);
1.1 veillard 2574: }
2575: }
2576:
2577: /*
1.16 daniel 2578: * xmlParseDocument : parse an XML document and build a tree.
1.21 daniel 2579: *
1.22 daniel 2580: * [1] document ::= prolog element Misc*
1.29 daniel 2581: *
2582: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.1 veillard 2583: */
2584:
1.16 daniel 2585: int xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.14 veillard 2586: /*
2587: * We should check for encoding here and plug-in some
2588: * conversion code TODO !!!!
2589: */
1.1 veillard 2590:
2591: /*
2592: * Wipe out everything which is before the first '<'
2593: */
1.42 ! daniel 2594: SKIP_BLANKS;
1.1 veillard 2595:
2596: /*
2597: * Check for the XMLDecl in the Prolog.
2598: */
1.40 daniel 2599: if ((CUR == '<') && (NXT(1) == '?') &&
2600: (NXT(2) == 'x') && (NXT(3) == 'm') &&
2601: (NXT(4) == 'l')) {
1.19 daniel 2602: xmlParseXMLDecl(ctxt);
2603: /* SKIP_EOL(cur); */
1.42 ! daniel 2604: SKIP_BLANKS;
1.40 daniel 2605: } else if ((CUR == '<') && (NXT(1) == '?') &&
2606: (NXT(2) == 'X') && (NXT(3) == 'M') &&
2607: (NXT(4) == 'L')) {
1.19 daniel 2608: /*
2609: * The first drafts were using <?XML and the final W3C REC
2610: * now use <?xml ...
2611: */
1.16 daniel 2612: xmlParseXMLDecl(ctxt);
1.1 veillard 2613: /* SKIP_EOL(cur); */
1.42 ! daniel 2614: SKIP_BLANKS;
1.1 veillard 2615: } else {
1.16 daniel 2616: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 2617: }
2618:
2619: /*
2620: * The Misc part of the Prolog
2621: */
1.16 daniel 2622: xmlParseMisc(ctxt);
1.1 veillard 2623:
2624: /*
1.29 daniel 2625: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 2626: * (doctypedecl Misc*)?
2627: */
1.40 daniel 2628: if ((CUR == '<') && (NXT(1) == '!') &&
2629: (NXT(2) == 'D') && (NXT(3) == 'O') &&
2630: (NXT(4) == 'C') && (NXT(5) == 'T') &&
2631: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
2632: (NXT(8) == 'E')) {
1.22 daniel 2633: xmlParseDocTypeDecl(ctxt);
2634: xmlParseMisc(ctxt);
1.21 daniel 2635: }
2636:
2637: /*
2638: * Time to start parsing the tree itself
1.1 veillard 2639: */
1.16 daniel 2640: ctxt->doc->root = xmlParseElement(ctxt);
1.33 daniel 2641:
2642: /*
2643: * The Misc part at the end
2644: */
2645: xmlParseMisc(ctxt);
1.16 daniel 2646:
2647: return(0);
2648: }
2649:
2650: /*
2651: * xmlParseDoc : parse an XML in-memory document and build a tree.
2652: */
2653:
2654: xmlDocPtr xmlParseDoc(CHAR *cur) {
2655: xmlDocPtr ret;
2656: xmlParserCtxtPtr ctxt;
1.40 daniel 2657: xmlParserInputPtr input;
1.16 daniel 2658:
2659: if (cur == NULL) return(NULL);
1.1 veillard 2660:
1.16 daniel 2661: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2662: if (ctxt == NULL) {
2663: perror("malloc");
2664: return(NULL);
2665: }
1.40 daniel 2666: xmlInitParserCtxt(ctxt);
2667: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
2668: if (input == NULL) {
2669: perror("malloc");
2670: free(ctxt);
2671: return(NULL);
2672: }
2673:
2674: input->filename = NULL;
2675: input->line = 1;
2676: input->col = 1;
2677: input->base = cur;
2678: input->cur = cur;
2679:
2680: inputPush(ctxt, input);
1.16 daniel 2681:
2682:
2683: xmlParseDocument(ctxt);
2684: ret = ctxt->doc;
1.20 daniel 2685: free(ctxt->nodes);
1.16 daniel 2686: free(ctxt);
2687:
1.1 veillard 2688: return(ret);
2689: }
2690:
1.9 httpng 2691: /*
2692: * xmlParseFile : parse an XML file and build a tree.
2693: */
2694:
2695: xmlDocPtr xmlParseFile(const char *filename) {
2696: xmlDocPtr ret;
1.20 daniel 2697: #ifdef HAVE_ZLIB_H
2698: gzFile input;
2699: #else
1.9 httpng 2700: int input;
1.20 daniel 2701: #endif
1.9 httpng 2702: int res;
2703: struct stat buf;
2704: char *buffer;
1.16 daniel 2705: xmlParserCtxtPtr ctxt;
1.40 daniel 2706: xmlParserInputPtr inputStream;
1.9 httpng 2707:
1.11 veillard 2708: res = stat(filename, &buf);
1.9 httpng 2709: if (res < 0) return(NULL);
2710:
1.20 daniel 2711: #ifdef HAVE_ZLIB_H
2712: retry_bigger:
2713: buffer = malloc((buf.st_size * 20) + 100);
2714: #else
1.9 httpng 2715: buffer = malloc(buf.st_size + 100);
1.20 daniel 2716: #endif
1.9 httpng 2717: if (buffer == NULL) {
2718: perror("malloc");
2719: return(NULL);
2720: }
2721:
2722: memset(buffer, 0, sizeof(buffer));
1.20 daniel 2723: #ifdef HAVE_ZLIB_H
2724: input = gzopen (filename, "r");
2725: if (input == NULL) {
2726: fprintf (stderr, "Cannot read file %s :\n", filename);
2727: perror ("gzopen failed");
2728: return(NULL);
2729: }
2730: #else
1.9 httpng 2731: input = open (filename, O_RDONLY);
2732: if (input < 0) {
2733: fprintf (stderr, "Cannot read file %s :\n", filename);
2734: perror ("open failed");
2735: return(NULL);
2736: }
1.20 daniel 2737: #endif
2738: #ifdef HAVE_ZLIB_H
2739: res = gzread(input, buffer, 20 * buf.st_size);
2740: #else
1.9 httpng 2741: res = read(input, buffer, buf.st_size);
1.20 daniel 2742: #endif
1.9 httpng 2743: if (res < 0) {
2744: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 2745: #ifdef HAVE_ZLIB_H
2746: perror ("gzread failed");
2747: #else
1.9 httpng 2748: perror ("read failed");
1.20 daniel 2749: #endif
1.9 httpng 2750: return(NULL);
2751: }
1.20 daniel 2752: #ifdef HAVE_ZLIB_H
2753: gzclose(input);
2754: if (res >= 20 * buf.st_size) {
2755: free(buffer);
2756: buf.st_size *= 2;
2757: goto retry_bigger;
2758: }
2759: buf.st_size = res;
2760: #else
1.9 httpng 2761: close(input);
1.20 daniel 2762: #endif
2763:
1.40 daniel 2764: buffer[buf.st_size] = '\0';
1.9 httpng 2765:
1.16 daniel 2766: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2767: if (ctxt == NULL) {
2768: perror("malloc");
2769: return(NULL);
2770: }
1.40 daniel 2771: xmlInitParserCtxt(ctxt);
2772: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
2773: if (inputStream == NULL) {
2774: perror("malloc");
2775: free(ctxt);
2776: return(NULL);
2777: }
2778:
2779: inputStream->filename = strdup(filename);
2780: inputStream->line = 1;
2781: inputStream->col = 1;
2782: inputStream->base = buffer;
2783: inputStream->cur = buffer;
1.16 daniel 2784:
1.40 daniel 2785: inputPush(ctxt, inputStream);
1.16 daniel 2786:
2787: xmlParseDocument(ctxt);
1.40 daniel 2788:
1.16 daniel 2789: ret = ctxt->doc;
1.9 httpng 2790: free(buffer);
1.20 daniel 2791: free(ctxt->nodes);
2792: free(ctxt);
2793:
2794: return(ret);
2795: }
2796:
1.32 daniel 2797:
1.20 daniel 2798: /*
1.32 daniel 2799: * xmlParseMemory : parse an XML memory block and build a tree.
1.20 daniel 2800: */
2801: xmlDocPtr xmlParseMemory(char *buffer, int size) {
2802: xmlDocPtr ret;
2803: xmlParserCtxtPtr ctxt;
1.40 daniel 2804: xmlParserInputPtr input;
2805:
2806: buffer[size - 1] = '\0';
2807:
1.20 daniel 2808: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2809: if (ctxt == NULL) {
2810: perror("malloc");
2811: return(NULL);
2812: }
1.40 daniel 2813: xmlInitParserCtxt(ctxt);
2814: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
2815: if (input == NULL) {
2816: perror("malloc");
2817: free(ctxt);
2818: return(NULL);
2819: }
1.20 daniel 2820:
1.40 daniel 2821: input->filename = NULL;
2822: input->line = 1;
2823: input->col = 1;
2824: input->base = buffer;
2825: input->cur = buffer;
1.20 daniel 2826:
1.40 daniel 2827: inputPush(ctxt, input);
1.20 daniel 2828:
2829: xmlParseDocument(ctxt);
1.40 daniel 2830:
1.20 daniel 2831: ret = ctxt->doc;
2832: free(ctxt->nodes);
1.16 daniel 2833: free(ctxt);
2834:
1.9 httpng 2835: return(ret);
1.17 daniel 2836: }
2837:
2838:
2839: /* Initialize parser context */
2840: void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2841: {
1.32 daniel 2842: int i;
1.19 daniel 2843:
1.40 daniel 2844: /* Allocate the Input stack */
2845: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
2846: ctxt->inputNr = 0;
2847: ctxt->inputMax = 5;
2848: ctxt->input = NULL;
2849:
1.32 daniel 2850: ctxt->doc = NULL;
2851: ctxt->depth = 0;
2852: ctxt->max_depth = 10;
2853: ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr));
2854: if (ctxt->nodes == NULL) {
2855: fprintf(stderr, "malloc of %d byte failed\n",
2856: ctxt->max_depth * sizeof(xmlNodePtr));
2857: ctxt->max_depth = 0;
2858: } else {
2859: for (i = 0;i < ctxt->max_depth;i++)
2860: ctxt->nodes[i] = NULL;
2861: }
2862: ctxt->record_info = 0;
2863: xmlInitNodeInfoSeq(&ctxt->node_seq);
1.17 daniel 2864: }
2865:
2866:
1.19 daniel 2867: /*
2868: * Clear (release owned resources) and reinitialize context
2869: */
1.32 daniel 2870: void xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 2871: {
1.32 daniel 2872: if ( ctxt->nodes != NULL )
2873: free(ctxt->nodes);
2874: xmlClearNodeInfoSeq(&ctxt->node_seq);
2875: xmlInitParserCtxt(ctxt);
1.17 daniel 2876: }
2877:
2878:
1.19 daniel 2879: /*
2880: * Setup the parser context to parse a new buffer; Clears any prior
2881: * contents from the parser context. The buffer parameter must not be
2882: * NULL, but the filename parameter can be
2883: */
1.17 daniel 2884: void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
2885: const char* filename)
2886: {
1.40 daniel 2887: xmlParserInputPtr input;
2888:
2889: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
2890: if (input == NULL) {
2891: perror("malloc");
2892: free(ctxt);
2893: exit(1);
2894: }
2895:
1.17 daniel 2896: xmlClearParserCtxt(ctxt);
1.40 daniel 2897: if (input->filename != NULL)
2898: input->filename = strdup(filename);
2899: else
2900: input->filename = NULL;
2901: input->line = 1;
2902: input->col = 1;
2903: input->base = buffer;
2904: input->cur = buffer;
2905:
2906: inputPush(ctxt, input);
1.17 daniel 2907: }
2908:
1.32 daniel 2909:
2910: /*
2911: * xmlParserFindNodeInfo : Find the parser node info struct for a given node
2912: */
2913: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2914: const xmlNode* node)
2915: {
2916: unsigned long pos;
2917:
2918: /* Find position where node should be at */
2919: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2920: if ( ctx->node_seq.buffer[pos].node == node )
2921: return &ctx->node_seq.buffer[pos];
2922: else
2923: return NULL;
2924: }
2925:
2926:
2927: /*
2928: * xmlInitNodeInfoSeq -- Initialize (set to initial state) node info sequence
2929: */
2930: void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2931: {
2932: seq->length = 0;
2933: seq->maximum = 0;
2934: seq->buffer = NULL;
2935: }
2936:
2937: /*
2938: * xmlClearNodeInfoSeq -- Clear (release memory and reinitialize) node
2939: * info sequence
2940: */
2941: void xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2942: {
2943: if ( seq->buffer != NULL )
2944: free(seq->buffer);
2945: xmlInitNodeInfoSeq(seq);
2946: }
2947:
2948:
2949: /*
2950: * xmlParserFindNodeInfoIndex : Find the index that the info record for
2951: * the given node is or should be at in a sorted sequence
2952: */
2953: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2954: const xmlNode* node)
2955: {
2956: unsigned long upper, lower, middle;
2957: int found = 0;
2958:
2959: /* Do a binary search for the key */
2960: lower = 1;
2961: upper = seq->length;
2962: middle = 0;
2963: while ( lower <= upper && !found) {
2964: middle = lower + (upper - lower) / 2;
2965: if ( node == seq->buffer[middle - 1].node )
2966: found = 1;
2967: else if ( node < seq->buffer[middle - 1].node )
2968: upper = middle - 1;
2969: else
2970: lower = middle + 1;
2971: }
2972:
2973: /* Return position */
2974: if ( middle == 0 || seq->buffer[middle - 1].node < node )
2975: return middle;
2976: else
2977: return middle - 1;
2978: }
2979:
2980:
2981: /*
2982: * xmlParserAddNodeInfo : Insert node info record into sorted sequence
2983: */
2984: void xmlParserAddNodeInfo(xmlParserCtxtPtr ctx,
2985: const xmlParserNodeInfo* info)
2986: {
2987: unsigned long pos;
2988: static unsigned int block_size = 5;
2989:
2990: /* Find pos and check to see if node is already in the sequence */
2991: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, info->node);
2992: if ( pos < ctx->node_seq.length
2993: && ctx->node_seq.buffer[pos].node == info->node ) {
2994: ctx->node_seq.buffer[pos] = *info;
2995: }
2996:
2997: /* Otherwise, we need to add new node to buffer */
2998: else {
2999: /* Expand buffer by 5 if needed */
3000: if ( ctx->node_seq.length + 1 > ctx->node_seq.maximum ) {
3001: xmlParserNodeInfo* tmp_buffer;
3002: unsigned int byte_size = (sizeof(*ctx->node_seq.buffer)
3003: *(ctx->node_seq.maximum + block_size));
3004:
3005: if ( ctx->node_seq.buffer == NULL )
3006: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
3007: else
3008: tmp_buffer = (xmlParserNodeInfo*)realloc(ctx->node_seq.buffer, byte_size);
3009:
3010: if ( tmp_buffer == NULL ) {
3011: xmlParserError(ctx, "Out of memory");
3012: return;
3013: }
3014: ctx->node_seq.buffer = tmp_buffer;
3015: ctx->node_seq.maximum += block_size;
3016: }
3017:
3018: /* If position is not at end, move elements out of the way */
3019: if ( pos != ctx->node_seq.length ) {
3020: unsigned long i;
3021:
3022: for ( i = ctx->node_seq.length; i > pos; i-- )
3023: ctx->node_seq.buffer[i] = ctx->node_seq.buffer[i - 1];
3024: }
3025:
3026: /* Copy element and increase length */
3027: ctx->node_seq.buffer[pos] = *info;
3028: ctx->node_seq.length++;
3029: }
3030: }
Webmaster