Annotation of XML/parser.c, revision 1.46
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.46 ! daniel 6: * $Id: parser.c,v 1.45 1998/08/12 21:53:36 daniel Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.1 veillard 18: #include <malloc.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
1.45 daniel 34: /************************************************************************
35: * *
36: * Parser stacks related functions and macros *
37: * *
38: ************************************************************************/
1.1 veillard 39: /*
1.40 daniel 40: * Generic function for accessing stacks in the Parser Context
1.1 veillard 41: */
42:
1.31 daniel 43: #define PUSH_AND_POP(type, name) \
1.40 daniel 44: int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 45: if (ctxt->name##Nr >= ctxt->name##Max) { \
46: ctxt->name##Max *= 2; \
1.40 daniel 47: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
48: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
49: if (ctxt->name##Tab == NULL) { \
1.31 daniel 50: fprintf(stderr, "realloc failed !\n"); \
51: exit(1); \
52: } \
53: } \
1.40 daniel 54: ctxt->name##Tab[ctxt->name##Nr] = value; \
55: ctxt->name = value; \
56: return(ctxt->name##Nr++); \
1.31 daniel 57: } \
1.40 daniel 58: type name##Pop(xmlParserCtxtPtr ctxt) { \
59: if (ctxt->name##Nr <= 0) return(0); \
60: ctxt->name##Nr--; \
1.45 daniel 61: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
1.40 daniel 62: return(ctxt->name); \
1.31 daniel 63: } \
64:
1.40 daniel 65: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 66: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 67:
1.45 daniel 68: /*************
1.40 daniel 69: #define CUR (*(ctxt->input->cur) ? *(ctxt->input->cur) : xmlPopInput(ctxt))
1.45 daniel 70: #define NEXT (((*(ctxt->input->cur) == '\n') ? \
71: (ctxt->input->line++, ctxt->input->col = 1) : \
72: (ctxt->input->col++)), ctxt->input->cur++)
73: *************/
74:
75: #define CUR (*ctxt->input->cur)
76: #define NEXT ((*ctxt->input->cur) ? \
77: (((*(ctxt->input->cur) == '\n') ? \
78: (ctxt->input->line++, ctxt->input->col = 1) : \
79: (ctxt->input->col++)), ctxt->input->cur++) : \
80: (xmlPopInput(ctxt), ctxt->input->cur))
1.42 daniel 81:
1.40 daniel 82: #define CUR_PTR ctxt->input->cur
1.42 daniel 83:
1.40 daniel 84: #define NXT(val) ctxt->input->cur[(val)]
1.42 daniel 85:
1.40 daniel 86: #define SKIP(val) ctxt->input->cur += (val)
1.42 daniel 87: #define SKIP_BLANKS \
88: while (IS_BLANK(*(ctxt->input->cur))) NEXT
89:
1.40 daniel 90:
91: /*
92: * xmlPopInput: the current input pointed by ctxt->input came to an end
93: * pop it and return the next char.
1.45 daniel 94: *
95: * TODO A deallocation of the popped Input structure is needed
1.40 daniel 96: */
97: CHAR xmlPopInput(xmlParserCtxtPtr ctxt) {
98: if (ctxt->inputNr == 1) return(0); /* End of main Input */
99: inputPop(ctxt);
100: return(CUR);
101: }
102:
103: /*
104: * xmlPushInput: switch to a new input stream which is stacked on top
105: * of the previous one(s).
106: */
107: void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
108: if (input == NULL) return;
109: inputPush(ctxt, input);
110: }
111:
112: /*
1.45 daniel 113: * Create a new input stream based on a memory buffer.
114: */
115: void xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
116: xmlParserInputPtr input;
117:
118: if (entity == NULL) {
119: xmlParserError(ctxt,
120: "internal: xmlNewEntityInputStream entity = NULL\n");
121: return;
122: }
123: if (entity->content == NULL) {
124: xmlParserError(ctxt,
125: "internal: xmlNewEntityInputStream entity->input = NULL\n");
126: return;
127: }
128: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
129: if (input == NULL) {
130: xmlParserError(ctxt, "malloc: couldn't allocate a new input stream\n");
131: return;
132: }
133: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
134: input->base = entity->content;
135: input->cur = entity->content;
136: input->line = 1;
137: input->col = 1;
138: xmlPushInput(ctxt, input);
139: }
140:
141: /*
1.40 daniel 142: * A few macros needed to help building the parser.
143: */
144:
1.1 veillard 145: #ifdef UNICODE
1.30 daniel 146: /************************************************************************
147: * *
148: * UNICODE version of the macros. *
149: * *
150: ************************************************************************/
1.1 veillard 151: /*
1.22 daniel 152: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
153: * | [#x10000-#x10FFFF]
154: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 155: */
156: #define IS_CHAR(c) \
157: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
158: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
159:
1.22 daniel 160: /*
161: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
162: */
1.42 daniel 163: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
164: ((c) == 0x0D))
1.1 veillard 165:
1.22 daniel 166: /*
1.30 daniel 167: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 168: *
1.30 daniel 169: * VI is your friend !
170: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
171: * and
172: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 173: */
1.1 veillard 174: #define IS_BASECHAR(c) \
1.30 daniel 175: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
176: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
177: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
178: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
179: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
180: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
181: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
182: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
183: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
184: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
185: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
186: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
187: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
188: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
189: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
190: ((c) == 0x0386) || \
191: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
192: ((c) == 0x038C) || \
193: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
194: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
195: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
196: ((c) == 0x03DA) || \
197: ((c) == 0x03DC) || \
198: ((c) == 0x03DE) || \
199: ((c) == 0x03E0) || \
200: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
201: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
202: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
203: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
204: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
205: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
206: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
207: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
208: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
209: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
210: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
211: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
212: ((c) == 0x0559) || \
213: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
214: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
215: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
216: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
217: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
218: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
219: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
220: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
221: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
222: ((c) == 0x06D5) || \
223: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
224: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
225: ((c) == 0x093D) || \
226: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
227: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
228: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
229: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
230: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
231: ((c) == 0x09B2) || \
232: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
233: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
234: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
235: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
236: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
237: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
238: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
239: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
240: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
241: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
242: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
243: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
244: ((c) == 0x0A5E) || \
245: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
246: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
247: ((c) == 0x0A8D) || \
248: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
249: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
250: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
251: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
252: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
253: ((c) == 0x0ABD) || \
254: ((c) == 0x0AE0) || \
255: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
256: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
257: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
258: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
259: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
260: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
261: ((c) == 0x0B3D) || \
262: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
263: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
264: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
265: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
266: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
267: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
268: ((c) == 0x0B9C) || \
269: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
270: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
271: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
272: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
273: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
274: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
275: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
276: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
277: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
278: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
279: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
280: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
281: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
282: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
283: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
284: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
285: ((c) == 0x0CDE) || \
286: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
287: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
288: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
289: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
290: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
291: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
292: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
293: ((c) == 0x0E30) || \
294: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
295: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
296: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
297: ((c) == 0x0E84) || \
298: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
299: ((c) == 0x0E8A) || \
300: ((c) == 0x0E8D) || \
301: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
302: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
303: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
304: ((c) == 0x0EA5) || \
305: ((c) == 0x0EA7) || \
306: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
307: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
308: ((c) == 0x0EB0) || \
309: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
310: ((c) == 0x0EBD) || \
311: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
312: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
313: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
314: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
315: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
316: ((c) == 0x1100) || \
317: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
318: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
319: ((c) == 0x1109) || \
320: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
321: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
322: ((c) == 0x113C) || \
323: ((c) == 0x113E) || \
324: ((c) == 0x1140) || \
325: ((c) == 0x114C) || \
326: ((c) == 0x114E) || \
327: ((c) == 0x1150) || \
328: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
329: ((c) == 0x1159) || \
330: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
331: ((c) == 0x1163) || \
332: ((c) == 0x1165) || \
333: ((c) == 0x1167) || \
334: ((c) == 0x1169) || \
335: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
336: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
337: ((c) == 0x1175) || \
338: ((c) == 0x119E) || \
339: ((c) == 0x11A8) || \
340: ((c) == 0x11AB) || \
341: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
342: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
343: ((c) == 0x11BA) || \
344: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
345: ((c) == 0x11EB) || \
346: ((c) == 0x11F0) || \
347: ((c) == 0x11F9) || \
348: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
349: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
350: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
351: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
352: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
353: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
354: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
355: ((c) == 0x1F59) || \
356: ((c) == 0x1F5B) || \
357: ((c) == 0x1F5D) || \
358: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
359: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
360: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
361: ((c) == 0x1FBE) || \
362: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
363: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
364: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
365: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
366: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
367: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
368: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
369: ((c) == 0x2126) || \
370: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
371: ((c) == 0x212E) || \
372: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
373: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
374: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
375: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
376: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 377:
1.22 daniel 378: /*
379: * [88] Digit ::= ... long list see REC ...
380: */
1.30 daniel 381: #define IS_DIGIT(c) \
382: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
383: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
384: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
385: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
386: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
387: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
388: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
389: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
390: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
391: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
392: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
393: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
394: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
395: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
396: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 397:
1.22 daniel 398: /*
399: * [87] CombiningChar ::= ... long list see REC ...
400: */
1.30 daniel 401: #define IS_COMBINING(c) \
402: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
403: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
404: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
405: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
406: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
407: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
408: ((c) == 0x05BF) || \
409: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
410: ((c) == 0x05C4) || \
411: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
412: ((c) == 0x0670) || \
413: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
414: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
415: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
416: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
417: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
418: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
419: ((c) == 0x093C) || \
420: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
421: ((c) == 0x094D) || \
422: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
423: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
424: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
425: ((c) == 0x09BC) || \
426: ((c) == 0x09BE) || \
427: ((c) == 0x09BF) || \
428: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
429: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
430: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
431: ((c) == 0x09D7) || \
432: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
433: ((c) == 0x0A02) || \
434: ((c) == 0x0A3C) || \
435: ((c) == 0x0A3E) || \
436: ((c) == 0x0A3F) || \
437: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
438: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
439: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
440: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
441: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
442: ((c) == 0x0ABC) || \
443: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
444: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
445: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
446: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
447: ((c) == 0x0B3C) || \
448: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
449: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
450: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
451: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
452: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
453: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
454: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
455: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
456: ((c) == 0x0BD7) || \
457: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
458: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
459: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
460: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
461: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
462: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
463: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
464: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
465: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
466: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
467: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
468: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
469: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
470: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
471: ((c) == 0x0D57) || \
472: ((c) == 0x0E31) || \
473: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
474: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
475: ((c) == 0x0EB1) || \
476: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
477: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
478: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
479: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
480: ((c) == 0x0F35) || \
481: ((c) == 0x0F37) || \
482: ((c) == 0x0F39) || \
483: ((c) == 0x0F3E) || \
484: ((c) == 0x0F3F) || \
485: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
486: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
487: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
488: ((c) == 0x0F97) || \
489: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
490: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
491: ((c) == 0x0FB9) || \
492: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
493: ((c) == 0x20E1) || \
494: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
495: ((c) == 0x3099) || \
496: ((c) == 0x309A))
1.3 veillard 497:
1.22 daniel 498: /*
499: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
500: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
501: * [#x309D-#x309E] | [#x30FC-#x30FE]
502: */
1.3 veillard 503: #define IS_EXTENDER(c) \
504: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
505: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
506: ((c) == 0xec6) || ((c) == 0x3005) \
507: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
508: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 509: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 510:
1.22 daniel 511: /*
512: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
513: */
1.1 veillard 514: #define IS_IDEOGRAPHIC(c) \
515: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
516: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
517: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
518: ((c) == 0x3007))
519:
1.22 daniel 520: /*
521: * [84] Letter ::= BaseChar | Ideographic
522: */
1.1 veillard 523: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
524:
525: #else
1.30 daniel 526: /************************************************************************
527: * *
528: * 8bits / ASCII version of the macros. *
529: * *
530: ************************************************************************/
1.1 veillard 531: /*
1.22 daniel 532: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
533: * | [#x10000-#x10FFFF]
534: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 535: */
536: #define IS_CHAR(c) \
1.21 daniel 537: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
538: ((c) == 0xa))
1.1 veillard 539:
1.22 daniel 540: /*
541: * [85] BaseChar ::= ... long list see REC ...
542: */
1.1 veillard 543: #define IS_BASECHAR(c) \
544: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
545: (((c) >= 0x61) && ((c) <= 0x7a)) || \
546: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
547: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
548: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
549: (((c) >= 0xf8) && ((c) <= 0xff)) || \
550: ((c) == 0xba))
551:
1.22 daniel 552: /*
553: * [88] Digit ::= ... long list see REC ...
554: */
1.1 veillard 555: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
556:
1.22 daniel 557: /*
558: * [84] Letter ::= BaseChar | Ideographic
559: */
1.1 veillard 560: #define IS_LETTER(c) IS_BASECHAR(c)
561:
1.22 daniel 562:
563: /*
564: * [87] CombiningChar ::= ... long list see REC ...
565: */
1.1 veillard 566: #define IS_COMBINING(c) 0
567:
1.22 daniel 568: /*
569: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
570: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
571: * [#x309D-#x309E] | [#x30FC-#x30FE]
572: */
1.3 veillard 573: #define IS_EXTENDER(c) ((c) == 0xb7)
574:
1.21 daniel 575: #endif /* !UNICODE */
1.1 veillard 576:
1.22 daniel 577: /*
578: * Blank chars.
579: *
580: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
581: */
582: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
583: ((c) == 0x0D))
584:
585: /*
586: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
587: */
1.21 daniel 588: #define IS_PUBIDCHAR(c) \
589: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
590: (((c) >= 'a') && ((c) <= 'z')) || \
591: (((c) >= 'A') && ((c) <= 'Z')) || \
592: (((c) >= '0') && ((c) <= '9')) || \
593: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
594: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
595: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
596: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
597: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 598:
599: #define SKIP_EOL(p) \
600: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
601: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
602:
603: #define MOVETO_ENDTAG(p) \
1.39 daniel 604: while (IS_CHAR(*p) && (*(p) != '>')) (p)++
1.1 veillard 605:
606: #define MOVETO_STARTTAG(p) \
1.39 daniel 607: while (IS_CHAR(*p) && (*(p) != '<')) (p)++
1.1 veillard 608:
1.28 daniel 609: /************************************************************************
610: * *
611: * Commodity functions to handle CHARs *
612: * *
613: ************************************************************************/
614:
1.3 veillard 615: /*
1.45 daniel 616: * xmlStrndup : a strndup for array of CHAR's
1.1 veillard 617: */
618:
1.6 httpng 619: CHAR *xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 620: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
621:
622: if (ret == NULL) {
623: fprintf(stderr, "malloc of %d byte failed\n",
624: (len + 1) * sizeof(CHAR));
625: return(NULL);
626: }
627: memcpy(ret, cur, len * sizeof(CHAR));
628: ret[len] = 0;
629: return(ret);
630: }
631:
632: /*
633: * xmlStrdup : a strdup for CHAR's
634: */
635:
1.6 httpng 636: CHAR *xmlStrdup(const CHAR *cur) {
637: const CHAR *p = cur;
1.1 veillard 638:
639: while (IS_CHAR(*p)) p++;
640: return(xmlStrndup(cur, p - cur));
641: }
642:
643: /*
1.45 daniel 644: * xmlCharStrndup : a strndup for char's to CHAR's
645: */
646:
647: CHAR *xmlCharStrndup(const char *cur, int len) {
648: int i;
649: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
650:
651: if (ret == NULL) {
652: fprintf(stderr, "malloc of %d byte failed\n",
653: (len + 1) * sizeof(CHAR));
654: return(NULL);
655: }
656: for (i = 0;i < len;i++)
657: ret[i] = (CHAR) cur[i];
658: ret[len] = 0;
659: return(ret);
660: }
661:
662: /*
663: * xmlCharStrdup : a strdup for char's to CHAR's
664: */
665:
666: CHAR *xmlCharStrdup(const char *cur) {
667: const char *p = cur;
668:
669: while (*p != '\0') p++;
670: return(xmlCharStrndup(cur, p - cur));
671: }
672:
673: /*
1.14 veillard 674: * xmlStrcmp : a strcmp for CHAR's
675: */
676:
677: int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
678: register int tmp;
679:
680: do {
681: tmp = *str1++ - *str2++;
682: if (tmp != 0) return(tmp);
683: } while ((*str1 != 0) && (*str2 != 0));
684: return (*str1 - *str2);
685: }
686:
687: /*
688: * xmlStrncmp : a strncmp for CHAR's
689: */
690:
691: int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
692: register int tmp;
693:
694: if (len <= 0) return(0);
695: do {
696: tmp = *str1++ - *str2++;
697: if (tmp != 0) return(tmp);
698: len--;
699: if (len <= 0) return(0);
700: } while ((*str1 != 0) && (*str2 != 0));
701: return (*str1 - *str2);
702: }
703:
704: /*
705: * xmlStrchr : a strchr for CHAR's
706: */
707:
708: CHAR *xmlStrchr(const CHAR *str, CHAR val) {
709: while (*str != 0) {
710: if (*str == val) return((CHAR *) str);
711: str++;
712: }
713: return(NULL);
714: }
1.28 daniel 715:
1.45 daniel 716: /*
717: * xmlStrlen : lenght of a CHAR's string
718: */
719:
720: int xmlStrlen(const CHAR *str) {
721: int len = 0;
722:
723: if (str == NULL) return(0);
724: while (*str != 0) {
725: str++;
726: len++;
727: }
728: return(len);
729: }
730:
731: /*
732: * xmlStrncat : a strncat for array of CHAR's
733: */
734:
735: CHAR *xmlStrncat(CHAR *cur, const CHAR *add, int len) {
736: int size;
737: CHAR *ret;
738:
739: if ((add == NULL) || (len == 0))
740: return(cur);
741: if (cur == NULL)
742: return(xmlStrndup(add, len));
743:
744: size = xmlStrlen(cur);
745: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
746: if (ret == NULL) {
747: fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
748: (size + len + 1) * sizeof(CHAR));
749: return(cur);
750: }
751: memcpy(&ret[size], add, len * sizeof(CHAR));
752: ret[size + len] = 0;
753: return(ret);
754: }
755:
756: /*
757: * xmlStrcat : a strcat for CHAR's
758: */
759:
760: CHAR *xmlStrcat(CHAR *cur, const CHAR *add) {
761: const CHAR *p = add;
762:
763: if (add == NULL) return(cur);
764: if (cur == NULL)
765: return(xmlStrdup(add));
766:
767: while (IS_CHAR(*p)) p++;
768: return(xmlStrncat(cur, add, p - add));
769: }
770:
771: /************************************************************************
772: * *
773: * Commodity functions, cleanup needed ? *
774: * *
775: ************************************************************************/
776:
777: /*
778: * Is this a sequence of blank chars that one can ignore ?
779: */
780:
781: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
782: int i;
783: xmlNodePtr lastChild;
784:
785: for (i = 0;i < len;i++)
786: if (!(IS_BLANK(str[i]))) return(0);
787:
788: if (CUR != '<') return(0);
789: lastChild = xmlGetLastChild(ctxt->node);
790: if (lastChild == NULL) {
791: if (ctxt->node->content != NULL) return(0);
792: } else if (xmlNodeIsText(lastChild))
793: return(0);
794: return(1);
795: }
796:
797: /*
798: * Handling of defined entities, when should we define a new input
799: * stream ? When do we just handle that as a set of chars ?
800: */
801:
802: void xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
803: int len;
804:
805: if (entity->content == NULL) {
806: xmlParserError(ctxt, "xmlHandleEntity %s: content == NULL\n",
807: entity->name);
808: return;
809: }
810: len = xmlStrlen(entity->content);
811: if (len <= 2) goto handle_as_char;
812:
813: /*
814: * Redefine its content as an input stream.
815: */
816: xmlNewEntityInputStream(ctxt, entity);
817: return;
818:
819: handle_as_char:
820: /*
821: * Just handle the content as a set of chars.
822: */
823: if (ctxt->sax != NULL)
824: ctxt->sax->characters(ctxt, entity->content, 0, len);
825:
826: }
827:
828: /*
829: * Forward definition for recusive behaviour.
830: */
831: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.46 ! daniel 832: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt, int inLine);
! 833: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt, int inLine);
1.45 daniel 834:
1.28 daniel 835: /************************************************************************
836: * *
837: * Extra stuff for namespace support *
838: * Relates to http://www.w3.org/TR/WD-xml-names *
839: * *
840: ************************************************************************/
841:
842: /*
843: * xmlNamespaceParseNCName : parse an XML namespace name.
844: *
845: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
846: *
847: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
848: * CombiningChar | Extender
849: */
850:
851: CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
852: const CHAR *q;
853: CHAR *ret = NULL;
854:
1.40 daniel 855: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
856: q = NEXT;
1.28 daniel 857:
1.40 daniel 858: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
859: (CUR == '.') || (CUR == '-') ||
860: (CUR == '_') ||
861: (IS_COMBINING(CUR)) ||
862: (IS_EXTENDER(CUR)))
863: NEXT;
1.28 daniel 864:
1.40 daniel 865: ret = xmlStrndup(q, CUR_PTR - q);
1.28 daniel 866:
867: return(ret);
868: }
869:
870: /*
871: * xmlNamespaceParseQName : parse an XML qualified name
872: *
873: * [NS 5] QName ::= (Prefix ':')? LocalPart
874: *
875: * [NS 6] Prefix ::= NCName
876: *
877: * [NS 7] LocalPart ::= NCName
878: */
879:
880: CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
881: CHAR *ret = NULL;
882:
883: *prefix = NULL;
884: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 885: if (CUR == ':') {
1.28 daniel 886: *prefix = ret;
1.40 daniel 887: NEXT;
1.28 daniel 888: ret = xmlNamespaceParseNCName(ctxt);
889: }
890:
891: return(ret);
892: }
893:
894: /*
895: * xmlNamespaceParseNSDef : parse a namespace prefix declaration
896: *
897: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
898: *
899: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
900: */
901:
1.39 daniel 902: CHAR *xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 903: CHAR *name = NULL;
904:
1.40 daniel 905: if ((CUR == 'x') && (NXT(1) == 'm') &&
906: (NXT(2) == 'l') && (NXT(3) == 'n') &&
907: (NXT(4) == 's')) {
908: SKIP(5);
909: if (CUR == ':') {
910: NEXT;
1.28 daniel 911: name = xmlNamespaceParseNCName(ctxt);
912: }
913: }
1.39 daniel 914: return(name);
1.28 daniel 915: }
916:
1.45 daniel 917: /*
918: * [OLD] Parse and return a string between quotes or doublequotes
919: */
920: CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
921: CHAR *ret = NULL;
922: const CHAR *q;
923:
924: if (CUR == '"') {
925: NEXT;
926: q = CUR_PTR;
927: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
928: if (CUR != '"')
929: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
930: else {
931: ret = xmlStrndup(q, CUR_PTR - q);
932: NEXT;
933: }
934: } else if (CUR == '\''){
935: NEXT;
936: q = CUR_PTR;
937: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
938: if (CUR != '\'')
939: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
940: else {
941: ret = xmlStrndup(q, CUR_PTR - q);
942: NEXT;
943: }
944: }
945: return(ret);
946: }
947:
948: /*
949: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
950: *
951: * This is what the older xml-name Working Draft specified, a bunch of
952: * other stuff may still rely on it, so support is still here as
953: * if ot was declared on the root of the Tree:-(
954: */
955:
956: void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
957: CHAR *href = NULL;
958: CHAR *prefix = NULL;
959: int garbage = 0;
960:
961: /*
962: * We just skipped "namespace" or "xml:namespace"
963: */
964: SKIP_BLANKS;
965:
966: while (IS_CHAR(CUR) && (CUR != '>')) {
967: /*
968: * We can have "ns" or "prefix" attributes
969: * Old encoding as 'href' or 'AS' attributes is still supported
970: */
971: if ((CUR == 'n') && (NXT(1) == 's')) {
972: garbage = 0;
973: SKIP(2);
974: SKIP_BLANKS;
975:
976: if (CUR != '=') continue;
977: NEXT;
978: SKIP_BLANKS;
979:
980: href = xmlParseQuotedString(ctxt);
981: SKIP_BLANKS;
982: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
983: (NXT(2) == 'e') && (NXT(3) == 'f')) {
984: garbage = 0;
985: SKIP(4);
986: SKIP_BLANKS;
987:
988: if (CUR != '=') continue;
989: NEXT;
990: SKIP_BLANKS;
991:
992: href = xmlParseQuotedString(ctxt);
993: SKIP_BLANKS;
994: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
995: (NXT(2) == 'e') && (NXT(3) == 'f') &&
996: (NXT(4) == 'i') && (NXT(5) == 'x')) {
997: garbage = 0;
998: SKIP(6);
999: SKIP_BLANKS;
1000:
1001: if (CUR != '=') continue;
1002: NEXT;
1003: SKIP_BLANKS;
1004:
1005: prefix = xmlParseQuotedString(ctxt);
1006: SKIP_BLANKS;
1007: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1008: garbage = 0;
1009: SKIP(2);
1010: SKIP_BLANKS;
1011:
1012: if (CUR != '=') continue;
1013: NEXT;
1014: SKIP_BLANKS;
1015:
1016: prefix = xmlParseQuotedString(ctxt);
1017: SKIP_BLANKS;
1018: } else if ((CUR == '?') && (NXT(1) == '>')) {
1019: garbage = 0;
1020: CUR_PTR ++;
1021: } else {
1022: /*
1023: * Found garbage when parsing the namespace
1024: */
1025: if (!garbage)
1026: xmlParserError(ctxt, "xmlParseNamespace found garbage\n");
1027: NEXT;
1028: }
1029: }
1030:
1031: MOVETO_ENDTAG(CUR_PTR);
1032: NEXT;
1033:
1034: /*
1035: * Register the DTD.
1036: */
1037: if (href != NULL)
1038: xmlNewGlobalNs(ctxt->doc, href, prefix);
1039:
1040: if (prefix != NULL) free(prefix);
1041: if (href != NULL) free(href);
1042: }
1043:
1.28 daniel 1044: /************************************************************************
1045: * *
1046: * The parser itself *
1047: * Relates to http://www.w3.org/TR/REC-xml *
1048: * *
1049: ************************************************************************/
1.14 veillard 1050:
1051: /*
1.1 veillard 1052: * xmlParseName : parse an XML name.
1.22 daniel 1053: *
1054: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1055: * CombiningChar | Extender
1056: *
1057: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1058: *
1059: * [6] Names ::= Name (S Name)*
1.1 veillard 1060: */
1061:
1.16 daniel 1062: CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 1063: const CHAR *q;
1064: CHAR *ret = NULL;
1.1 veillard 1065:
1.40 daniel 1066: if (!IS_LETTER(CUR) && (CUR != '_') &&
1067: (CUR != ':')) return(NULL);
1068: q = NEXT;
1069:
1070: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1071: (CUR == '.') || (CUR == '-') ||
1072: (CUR == '_') || (CUR == ':') ||
1073: (IS_COMBINING(CUR)) ||
1074: (IS_EXTENDER(CUR)))
1075: NEXT;
1.22 daniel 1076:
1.40 daniel 1077: ret = xmlStrndup(q, CUR_PTR - q);
1.22 daniel 1078:
1079: return(ret);
1080: }
1081:
1082: /*
1083: * xmlParseNmtoken : parse an XML Nmtoken.
1084: *
1085: * [7] Nmtoken ::= (NameChar)+
1086: *
1087: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1088: */
1089:
1090: CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1091: const CHAR *q;
1092: CHAR *ret = NULL;
1093:
1.40 daniel 1094: q = NEXT;
1.22 daniel 1095:
1.40 daniel 1096: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1097: (CUR == '.') || (CUR == '-') ||
1098: (CUR == '_') || (CUR == ':') ||
1099: (IS_COMBINING(CUR)) ||
1100: (IS_EXTENDER(CUR)))
1101: NEXT;
1.3 veillard 1102:
1.40 daniel 1103: ret = xmlStrndup(q, CUR_PTR - q);
1.1 veillard 1104:
1.3 veillard 1105: return(ret);
1.1 veillard 1106: }
1107:
1108: /*
1.24 daniel 1109: * xmlParseEntityValue : parse a value for ENTITY decl.
1110: *
1111: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1112: * "'" ([^%&'] | PEReference | Reference)* "'"
1113: */
1114:
1115: CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
1.46 ! daniel 1116: CHAR *ret = NULL, *cur;
1.24 daniel 1117: const CHAR *q;
1118:
1.40 daniel 1119: if (CUR == '"') {
1120: NEXT;
1.24 daniel 1121:
1.40 daniel 1122: q = CUR_PTR;
1123: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1124: if (CUR == '%') {
1.46 ! daniel 1125: ret = xmlStrncat(ret, q, CUR_PTR - q);
! 1126: cur = xmlParsePEReference(ctxt, 1);
! 1127: ret = xmlStrcat(ret, cur);
! 1128: q = CUR_PTR;
1.40 daniel 1129: } else if (CUR == '&') {
1.46 ! daniel 1130: ret = xmlStrncat(ret, q, CUR_PTR - q);
! 1131: cur = xmlParseReference(ctxt, 1);
! 1132: ret = xmlStrcat(ret, cur);
! 1133: q = CUR_PTR;
1.24 daniel 1134: } else
1.40 daniel 1135: NEXT;
1.24 daniel 1136: }
1.40 daniel 1137: if (!IS_CHAR(CUR)) {
1.31 daniel 1138: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 1139: } else {
1.46 ! daniel 1140: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1141: NEXT;
1.24 daniel 1142: }
1.40 daniel 1143: } else if (CUR == '\'') {
1144: NEXT;
1145: q = CUR_PTR;
1146: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1147: if (CUR == '%') {
1.46 ! daniel 1148: ret = xmlStrncat(ret, q, CUR_PTR - q);
! 1149: cur = xmlParsePEReference(ctxt, 1);
! 1150: ret = xmlStrcat(ret, cur);
! 1151: q = CUR_PTR;
1.40 daniel 1152: } else if (CUR == '&') {
1.46 ! daniel 1153: ret = xmlStrncat(ret, q, CUR_PTR - q);
! 1154: cur = xmlParseReference(ctxt, 1);
! 1155: ret = xmlStrcat(ret, cur);
! 1156: q = CUR_PTR;
1.24 daniel 1157: } else
1.40 daniel 1158: NEXT;
1.24 daniel 1159: }
1.40 daniel 1160: if (!IS_CHAR(CUR)) {
1.31 daniel 1161: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 1162: } else {
1.46 ! daniel 1163: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1164: NEXT;
1.24 daniel 1165: }
1166: } else {
1.31 daniel 1167: xmlParserError(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.24 daniel 1168: }
1169:
1170: return(ret);
1171: }
1172:
1173: /*
1.29 daniel 1174: * xmlParseAttValue : parse a value for an attribute
1175: *
1176: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1177: * "'" ([^<&'] | Reference)* "'"
1178: */
1179:
1180: CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.46 ! daniel 1181: CHAR *ret = NULL, *cur;
1.29 daniel 1182: const CHAR *q;
1183:
1.40 daniel 1184: if (CUR == '"') {
1185: NEXT;
1.29 daniel 1186:
1.40 daniel 1187: q = CUR_PTR;
1188: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1189: if (CUR == '&') {
1.46 ! daniel 1190: ret = xmlStrncat(ret, q, CUR_PTR - q);
! 1191: cur = xmlParseReference(ctxt, 1);
! 1192: ret = xmlStrcat(ret, cur);
! 1193: q = CUR_PTR;
1.29 daniel 1194: } else
1.40 daniel 1195: NEXT;
1.29 daniel 1196: }
1.40 daniel 1197: if (!IS_CHAR(CUR)) {
1.31 daniel 1198: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 1199: } else {
1.46 ! daniel 1200: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1201: NEXT;
1.29 daniel 1202: }
1.40 daniel 1203: } else if (CUR == '\'') {
1204: NEXT;
1205: q = CUR_PTR;
1206: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1207: if (CUR == '&') {
1.46 ! daniel 1208: ret = xmlStrncat(ret, q, CUR_PTR - q);
! 1209: cur = xmlParseReference(ctxt, 1);
! 1210: ret = xmlStrcat(ret, cur);
! 1211: q = CUR_PTR;
1.29 daniel 1212: } else
1.40 daniel 1213: NEXT;
1.29 daniel 1214: }
1.40 daniel 1215: if (!IS_CHAR(CUR)) {
1.31 daniel 1216: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 1217: } else {
1.46 ! daniel 1218: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1219: NEXT;
1.29 daniel 1220: }
1221: } else {
1.31 daniel 1222: xmlParserError(ctxt, "AttValue: \" or ' expected\n");
1.29 daniel 1223: }
1224:
1225: return(ret);
1226: }
1227:
1228: /*
1.21 daniel 1229: * xmlParseSystemLiteral : parse an XML Literal
1230: *
1.22 daniel 1231: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.21 daniel 1232: */
1233:
1234: CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1235: const CHAR *q;
1236: CHAR *ret = NULL;
1237:
1.40 daniel 1238: if (CUR == '"') {
1239: NEXT;
1240: q = CUR_PTR;
1241: while ((IS_CHAR(CUR)) && (CUR != '"'))
1242: NEXT;
1243: if (!IS_CHAR(CUR)) {
1.31 daniel 1244: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 1245: } else {
1.40 daniel 1246: ret = xmlStrndup(q, CUR_PTR - q);
1247: NEXT;
1.21 daniel 1248: }
1.40 daniel 1249: } else if (CUR == '\'') {
1250: NEXT;
1251: q = CUR_PTR;
1252: while ((IS_CHAR(CUR)) && (CUR != '\''))
1253: NEXT;
1254: if (!IS_CHAR(CUR)) {
1.31 daniel 1255: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 1256: } else {
1.40 daniel 1257: ret = xmlStrndup(q, CUR_PTR - q);
1258: NEXT;
1.21 daniel 1259: }
1260: } else {
1.31 daniel 1261: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 1262: }
1263:
1264: return(ret);
1265: }
1266:
1267: /*
1.27 daniel 1268: * xmlParsePubidLiteral: parse an XML public literal
1.21 daniel 1269: *
1.22 daniel 1270: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1.21 daniel 1271: */
1272:
1273: CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1274: const CHAR *q;
1275: CHAR *ret = NULL;
1276: /*
1277: * Name ::= (Letter | '_') (NameChar)*
1278: */
1.40 daniel 1279: if (CUR == '"') {
1280: NEXT;
1281: q = CUR_PTR;
1282: while (IS_PUBIDCHAR(CUR)) NEXT;
1283: if (CUR != '"') {
1.31 daniel 1284: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 1285: } else {
1.40 daniel 1286: ret = xmlStrndup(q, CUR_PTR - q);
1287: NEXT;
1.21 daniel 1288: }
1.40 daniel 1289: } else if (CUR == '\'') {
1290: NEXT;
1291: q = CUR_PTR;
1292: while ((IS_LETTER(CUR)) && (CUR != '\''))
1293: NEXT;
1294: if (!IS_LETTER(CUR)) {
1.31 daniel 1295: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 1296: } else {
1.40 daniel 1297: ret = xmlStrndup(q, CUR_PTR - q);
1298: NEXT;
1.21 daniel 1299: }
1300: } else {
1.31 daniel 1301: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 1302: }
1303:
1304: return(ret);
1305: }
1306:
1307: /*
1.27 daniel 1308: * xmlParseCharData: parse a CharData section.
1309: * if we are within a CDATA section ']]>' marks an end of section.
1310: *
1311: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1312: */
1313:
1.45 daniel 1314: void xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.27 daniel 1315: const CHAR *q;
1316:
1.40 daniel 1317: q = CUR_PTR;
1318: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1319: (CUR != '&')) {
1320: NEXT;
1321: if ((cdata) && (CUR == ']') && (NXT(1) == ']') &&
1322: (NXT(2) == '>')) break;
1.27 daniel 1323: }
1.45 daniel 1324: if (q == CUR_PTR) return;
1325:
1326: /*
1327: * Ok the segment [q CUR_PTR] is to be consumed as chars.
1328: */
1329: if (ctxt->sax != NULL) {
1330: if (areBlanks(ctxt, q, CUR_PTR - q))
1331: ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
1332: else
1333: ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
1334: }
1.27 daniel 1335: }
1336:
1337: /*
1.22 daniel 1338: * xmlParseExternalID: Parse an External ID
1339: *
1340: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1341: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1342: */
1343:
1.39 daniel 1344: CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
1345: CHAR *URI = NULL;
1.22 daniel 1346:
1.40 daniel 1347: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1348: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1349: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1350: SKIP(6);
1.42 daniel 1351: SKIP_BLANKS;
1.39 daniel 1352: URI = xmlParseSystemLiteral(ctxt);
1353: if (URI == NULL)
1.31 daniel 1354: xmlParserError(ctxt,
1.39 daniel 1355: "xmlParseExternalID: SYSTEM, no URI\n");
1.40 daniel 1356: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1357: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1358: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1359: SKIP(6);
1.42 daniel 1360: SKIP_BLANKS;
1.39 daniel 1361: *publicID = xmlParsePubidLiteral(ctxt);
1362: if (*publicID == NULL)
1.31 daniel 1363: xmlParserError(ctxt,
1.39 daniel 1364: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.42 daniel 1365: SKIP_BLANKS;
1.39 daniel 1366: URI = xmlParseSystemLiteral(ctxt);
1367: if (URI == NULL)
1.31 daniel 1368: xmlParserError(ctxt,
1.39 daniel 1369: "xmlParseExternalID: PUBLIC, no URI\n");
1.22 daniel 1370: }
1.39 daniel 1371: return(URI);
1.22 daniel 1372: }
1373:
1374: /*
1.3 veillard 1375: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1376: * This may or may not create a node (depending on the context)
1.38 daniel 1377: * The spec says that "For compatibility, the string "--" (double-hyphen)
1378: * must not occur within comments. "
1.22 daniel 1379: *
1380: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 1381: */
1.31 daniel 1382: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1383: xmlNodePtr ret = NULL;
1.17 daniel 1384: const CHAR *q, *start;
1385: const CHAR *r;
1.39 daniel 1386: CHAR *val;
1.3 veillard 1387:
1388: /*
1.22 daniel 1389: * Check that there is a comment right here.
1.3 veillard 1390: */
1.40 daniel 1391: if ((CUR != '<') || (NXT(1) != '!') ||
1392: (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1.3 veillard 1393:
1.40 daniel 1394: SKIP(4);
1395: start = q = CUR_PTR;
1396: NEXT;
1397: r = CUR_PTR;
1398: NEXT;
1399: while (IS_CHAR(CUR) &&
1400: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 1401: (*r != '-') || (*q != '-'))) {
1.38 daniel 1402: if ((*r == '-') && (*q == '-'))
1403: xmlParserError(ctxt,
1404: "Comment must not contain '--' (double-hyphen)`\n");
1.40 daniel 1405: NEXT;r++;q++;
1.3 veillard 1406: }
1.40 daniel 1407: if (!IS_CHAR(CUR)) {
1.31 daniel 1408: xmlParserError(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.3 veillard 1409: } else {
1.40 daniel 1410: NEXT;
1.31 daniel 1411: if (create) {
1.39 daniel 1412: val = xmlStrndup(start, q - start);
1413: ret = xmlNewComment(val);
1414: free(val);
1.31 daniel 1415: }
1.3 veillard 1416: }
1.39 daniel 1417: return(ret);
1.3 veillard 1418: }
1419:
1420: /*
1.22 daniel 1421: * xmlParsePITarget: parse the name of a PI
1422: *
1423: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1424: */
1425:
1426: CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1427: CHAR *name;
1428:
1429: name = xmlParseName(ctxt);
1430: if ((name != NULL) && (name[3] == 0) &&
1431: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1432: ((name[1] == 'm') || (name[1] == 'M')) &&
1433: ((name[2] == 'l') || (name[2] == 'L'))) {
1434: xmlParserError(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1435: return(NULL);
1436: }
1437: return(name);
1438: }
1439:
1440: /*
1.3 veillard 1441: * xmlParsePI: parse an XML Processing Instruction.
1.22 daniel 1442: *
1443: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.3 veillard 1444: */
1445:
1.16 daniel 1446: void xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1447: CHAR *target;
1448:
1.40 daniel 1449: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 1450: /*
1451: * this is a Processing Instruction.
1452: */
1.40 daniel 1453: SKIP(2);
1.3 veillard 1454:
1455: /*
1.22 daniel 1456: * Parse the target name and check for special support like
1457: * namespace.
1458: *
1459: * TODO : PI handling should be dynamically redefinable using an
1460: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1461: */
1.22 daniel 1462: target = xmlParsePITarget(ctxt);
1463: if (target != NULL) {
1464: /*
1.44 daniel 1465: * Support for the old Processing Instruction related to namespace.
1.22 daniel 1466: */
1467: if ((target[0] == 'n') && (target[1] == 'a') &&
1468: (target[2] == 'm') && (target[3] == 'e') &&
1469: (target[4] == 's') && (target[5] == 'p') &&
1470: (target[6] == 'a') && (target[7] == 'c') &&
1471: (target[8] == 'e')) {
1472: xmlParseNamespace(ctxt);
1473: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1474: (target[2] == 'l') && (target[3] == ':') &&
1475: (target[4] == 'n') && (target[5] == 'a') &&
1476: (target[6] == 'm') && (target[7] == 'e') &&
1477: (target[8] == 's') && (target[9] == 'p') &&
1478: (target[10] == 'a') && (target[11] == 'c') &&
1479: (target[12] == 'e')) {
1480: xmlParseNamespace(ctxt);
1481: } else {
1.44 daniel 1482: const CHAR *q = CUR_PTR;
1483:
1.40 daniel 1484: while (IS_CHAR(CUR) &&
1485: ((CUR != '?') || (NXT(1) != '>')))
1486: NEXT;
1487: if (!IS_CHAR(CUR)) {
1.31 daniel 1488: xmlParserError(ctxt, "xmlParsePI: PI %s never end ...\n",
1489: target);
1.44 daniel 1490: } else {
1491: CHAR *data;
1492:
1493: data = xmlStrndup(CUR_PTR, CUR_PTR - q);
1.40 daniel 1494: SKIP(2);
1.44 daniel 1495:
1496: /*
1497: * SAX: PI detected.
1498: */
1499: if (ctxt->sax)
1500: ctxt->sax->processingInstruction(ctxt, target, data);
1501: /*
1502: * Unknown PI, ignore it !
1503: */
1504: else
1505: xmlParserWarning(ctxt,
1506: "xmlParsePI : skipping unknown PI %s\n",
1507: target);
1508: free(data);
1509: }
1.22 daniel 1510: }
1.39 daniel 1511: free(target);
1.3 veillard 1512: } else {
1.31 daniel 1513: xmlParserError(ctxt, "xmlParsePI : no target name\n");
1.22 daniel 1514: /********* Should we try to complete parsing the PI ???
1.40 daniel 1515: while (IS_CHAR(CUR) &&
1516: (CUR != '?') && (CUR != '>'))
1517: NEXT;
1518: if (!IS_CHAR(CUR)) {
1.22 daniel 1519: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1520: target);
1521: }
1522: ********************************************************/
1523: }
1524: }
1525: }
1526:
1527: /*
1528: * xmlParseNotationDecl: parse a notation declaration
1529: *
1530: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1531: *
1532: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1533: *
1534: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1535: * 'PUBLIC' S PubidLiteral S SystemLiteral
1536: *
1537: * Hence there is actually 3 choices:
1538: * 'PUBLIC' S PubidLiteral
1539: * 'PUBLIC' S PubidLiteral S SystemLiteral
1540: * and 'SYSTEM' S SystemLiteral
1541: */
1542:
1543: void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1544: CHAR *name;
1545:
1.40 daniel 1546: if ((CUR == '<') && (NXT(1) == '!') &&
1547: (NXT(2) == 'N') && (NXT(3) == 'O') &&
1548: (NXT(4) == 'T') && (NXT(5) == 'A') &&
1549: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1550: (NXT(8) == 'O') && (NXT(9) == 'N') &&
1551: (IS_BLANK(NXT(10)))) {
1552: SKIP(10);
1.42 daniel 1553: SKIP_BLANKS;
1.22 daniel 1554:
1555: name = xmlParseName(ctxt);
1556: if (name == NULL) {
1.31 daniel 1557: xmlParserError(ctxt,
1558: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1559: return;
1560: }
1.42 daniel 1561: SKIP_BLANKS;
1.22 daniel 1562: /*
1.31 daniel 1563: * TODO !!!
1.22 daniel 1564: */
1.40 daniel 1565: while ((IS_CHAR(CUR)) && (CUR != '>'))
1566: NEXT;
1.22 daniel 1567: free(name);
1568: }
1569: }
1570:
1571: /*
1572: * xmlParseEntityDecl: parse <!ENTITY declarations
1573: *
1574: * [70] EntityDecl ::= GEDecl | PEDecl
1575: *
1576: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1577: *
1578: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1579: *
1580: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1581: *
1582: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1583: *
1584: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1585: */
1586:
1587: void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 1588: CHAR *name = NULL;
1.24 daniel 1589: CHAR *value = NULL;
1.39 daniel 1590: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 1591: CHAR *ndata = NULL;
1.39 daniel 1592: int isParameter = 0;
1.22 daniel 1593:
1.40 daniel 1594: if ((CUR == '<') && (NXT(1) == '!') &&
1595: (NXT(2) == 'E') && (NXT(3) == 'N') &&
1596: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1597: (NXT(6) == 'T') && (NXT(7) == 'Y') &&
1598: (IS_BLANK(NXT(8)))) {
1599: SKIP(8);
1.42 daniel 1600: SKIP_BLANKS;
1.40 daniel 1601:
1602: if (CUR == '%') {
1603: NEXT;
1.42 daniel 1604: SKIP_BLANKS;
1.39 daniel 1605: isParameter = 1;
1.22 daniel 1606: }
1607:
1608: name = xmlParseName(ctxt);
1.24 daniel 1609: if (name == NULL) {
1.31 daniel 1610: xmlParserError(ctxt, "xmlParseEntityDecl: no name\n");
1.24 daniel 1611: return;
1612: }
1.42 daniel 1613: SKIP_BLANKS;
1.24 daniel 1614:
1.22 daniel 1615: /*
1.24 daniel 1616: * TODO handle the various case of definitions...
1.22 daniel 1617: */
1.39 daniel 1618: if (isParameter) {
1.40 daniel 1619: if ((CUR == '"') || (CUR == '\''))
1.24 daniel 1620: value = xmlParseEntityValue(ctxt);
1.39 daniel 1621: if (value) {
1622: xmlAddDocEntity(ctxt->doc, name,
1623: XML_INTERNAL_PARAMETER_ENTITY,
1624: NULL, NULL, value);
1625: }
1.24 daniel 1626: else {
1.39 daniel 1627: URI = xmlParseExternalID(ctxt, &literal);
1628: if (URI) {
1629: xmlAddDocEntity(ctxt->doc, name,
1630: XML_EXTERNAL_PARAMETER_ENTITY,
1631: literal, URI, NULL);
1632: }
1.24 daniel 1633: }
1634: } else {
1.40 daniel 1635: if ((CUR == '"') || (CUR == '\'')) {
1.24 daniel 1636: value = xmlParseEntityValue(ctxt);
1.39 daniel 1637: xmlAddDocEntity(ctxt->doc, name,
1638: XML_INTERNAL_GENERAL_ENTITY,
1639: NULL, NULL, value);
1640: } else {
1641: URI = xmlParseExternalID(ctxt, &literal);
1.42 daniel 1642: SKIP_BLANKS;
1.40 daniel 1643: if ((CUR == 'N') && (NXT(1) == 'D') &&
1644: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1645: (NXT(4) == 'A')) {
1646: SKIP(5);
1.42 daniel 1647: SKIP_BLANKS;
1.24 daniel 1648: ndata = xmlParseName(ctxt);
1.39 daniel 1649: xmlAddDocEntity(ctxt->doc, name,
1650: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
1651: literal, URI, ndata);
1652: } else {
1653: xmlAddDocEntity(ctxt->doc, name,
1654: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
1655: literal, URI, NULL);
1.24 daniel 1656: }
1657: }
1658: }
1.42 daniel 1659: SKIP_BLANKS;
1.40 daniel 1660: if (CUR != '>') {
1.31 daniel 1661: xmlParserError(ctxt,
1662: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.24 daniel 1663: } else
1.40 daniel 1664: NEXT;
1.39 daniel 1665: if (name != NULL) free(name);
1666: if (value != NULL) free(value);
1667: if (URI != NULL) free(URI);
1668: if (literal != NULL) free(literal);
1669: if (ndata != NULL) free(ndata);
1.22 daniel 1670: }
1671: }
1672:
1673: /*
1674: * xmlParseEnumeratedType: parse and Enumerated attribute type.
1675: *
1676: * [57] EnumeratedType ::= NotationType | Enumeration
1677: *
1678: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1679: *
1680: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1681: */
1682:
1683: void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1684: /*
1685: * TODO !!!
1686: */
1.40 daniel 1687: while ((IS_CHAR(CUR)) && (CUR != '>'))
1688: NEXT;
1.22 daniel 1689: }
1690:
1691: /*
1692: * xmlParseAttributeType: parse the Attribute list def for an element
1693: *
1694: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1695: *
1696: * [55] StringType ::= 'CDATA'
1697: *
1698: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1699: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1700: */
1701: void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.45 daniel 1702: /* TODO !!! */
1.40 daniel 1703: if ((CUR == 'C') && (NXT(1) == 'D') &&
1704: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1705: (NXT(4) == 'A')) {
1706: SKIP(5);
1707: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
1708: SKIP(2);
1709: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1710: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1711: (NXT(4) == 'F')) {
1712: SKIP(5);
1713: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1714: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1715: (NXT(4) == 'F') && (NXT(5) == 'S')) {
1716: SKIP(6);
1717: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1718: (NXT(2) == 'T') && (NXT(3) == 'I') &&
1719: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
1720: SKIP(6);
1721: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1722: (NXT(2) == 'T') && (NXT(3) == 'I') &&
1723: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1724: (NXT(6) == 'E') && (NXT(7) == 'S')) {
1725: SKIP(8);
1726: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1727: (NXT(2) == 'T') && (NXT(3) == 'O') &&
1728: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1729: (NXT(6) == 'N')) {
1730: SKIP(7);
1731: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1732: (NXT(2) == 'T') && (NXT(3) == 'O') &&
1733: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1734: (NXT(6) == 'N') && (NXT(7) == 'S')) {
1.22 daniel 1735: } else {
1736: xmlParseEnumeratedType(ctxt, name);
1737: }
1738: }
1739:
1740: /*
1741: * xmlParseAttributeListDecl: parse the Attribute list def for an element
1742: *
1743: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1744: *
1745: * [53] AttDef ::= S Name S AttType S DefaultDecl
1746: */
1747: void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1748: CHAR *name;
1749:
1.45 daniel 1750: /* TODO !!! */
1.40 daniel 1751: if ((CUR == '<') && (NXT(1) == '!') &&
1752: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1753: (NXT(4) == 'T') && (NXT(5) == 'L') &&
1754: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1755: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
1756: SKIP(9);
1.42 daniel 1757: SKIP_BLANKS;
1.22 daniel 1758: name = xmlParseName(ctxt);
1759: if (name == NULL) {
1.31 daniel 1760: xmlParserError(ctxt,
1761: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1762: return;
1763: }
1.42 daniel 1764: SKIP_BLANKS;
1.40 daniel 1765: while (CUR != '>') {
1766: const CHAR *check = CUR_PTR;
1.22 daniel 1767:
1768: xmlParseAttributeType(ctxt, name);
1.42 daniel 1769: SKIP_BLANKS;
1.40 daniel 1770: if (check == CUR_PTR) {
1.31 daniel 1771: xmlParserError(ctxt,
1772: "xmlParseAttributeListDecl: detected error\n");
1.22 daniel 1773: break;
1774: }
1775: }
1.40 daniel 1776: if (CUR == '>')
1777: NEXT;
1.22 daniel 1778:
1779: free(name);
1780: }
1781: }
1782:
1783: /*
1784: * xmlParseElementContentDecl: parse the declaration for an Element content
1785: * either Mixed or Children, the cases EMPTY and ANY being handled
1786: * int xmlParseElementDecl.
1787: *
1788: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
1789: *
1790: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1791: *
1792: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
1793: *
1794: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
1795: *
1796: * or
1797: *
1798: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
1799: * '(' S? '#PCDATA' S? ')'
1800: */
1801:
1802: void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1803: /*
1804: * TODO This has to be parsed correctly, currently we just skip until
1805: * we reach the first '>'.
1.31 daniel 1806: * !!!
1.22 daniel 1807: */
1.40 daniel 1808: while ((IS_CHAR(CUR)) && (CUR != '>'))
1809: NEXT;
1.22 daniel 1810: }
1811:
1812: /*
1813: * xmlParseElementDecl: parse an Element declaration.
1814: *
1815: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1816: *
1817: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1818: *
1819: * TODO There is a check [ VC: Unique Element Type Declaration ]
1820: */
1821: void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1822: CHAR *name;
1823:
1.40 daniel 1824: if ((CUR == '<') && (NXT(1) == '!') &&
1825: (NXT(2) == 'E') && (NXT(3) == 'L') &&
1826: (NXT(4) == 'E') && (NXT(5) == 'M') &&
1827: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1828: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
1829: SKIP(9);
1.42 daniel 1830: SKIP_BLANKS;
1.22 daniel 1831: name = xmlParseName(ctxt);
1832: if (name == NULL) {
1.31 daniel 1833: xmlParserError(ctxt, "xmlParseElementDecl: no name for Element\n");
1.22 daniel 1834: return;
1835: }
1.42 daniel 1836: SKIP_BLANKS;
1.40 daniel 1837: if ((CUR == 'E') && (NXT(1) == 'M') &&
1838: (NXT(2) == 'P') && (NXT(3) == 'T') &&
1839: (NXT(4) == 'Y')) {
1840: SKIP(5);
1.22 daniel 1841: /*
1842: * Element must always be empty.
1843: */
1.40 daniel 1844: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
1845: (NXT(2) == 'Y')) {
1846: SKIP(3);
1.22 daniel 1847: /*
1848: * Element is a generic container.
1849: */
1850: } else {
1851: xmlParseElementContentDecl(ctxt, name);
1852: }
1.42 daniel 1853: SKIP_BLANKS;
1.40 daniel 1854: if (CUR != '>') {
1.31 daniel 1855: xmlParserError(ctxt,
1856: "xmlParseElementDecl: expected '>' at the end\n");
1.22 daniel 1857: } else
1.40 daniel 1858: NEXT;
1.22 daniel 1859: }
1860: }
1861:
1862: /*
1863: * xmlParseMarkupDecl: parse Markup declarations
1864: *
1865: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
1866: * NotationDecl | PI | Comment
1867: *
1868: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
1869: */
1870: void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1871: xmlParseElementDecl(ctxt);
1872: xmlParseAttributeListDecl(ctxt);
1873: xmlParseEntityDecl(ctxt);
1874: xmlParseNotationDecl(ctxt);
1875: xmlParsePI(ctxt);
1.31 daniel 1876: xmlParseComment(ctxt, 0);
1.22 daniel 1877: }
1878:
1879: /*
1.24 daniel 1880: * xmlParseCharRef: parse Reference declarations
1881: *
1882: * [66] CharRef ::= '&#' [0-9]+ ';' |
1883: * '&#x' [0-9a-fA-F]+ ';'
1884: */
1.46 ! daniel 1885: CHAR *xmlParseCharRef(xmlParserCtxtPtr ctxt, int inLine) {
1.29 daniel 1886: int val = 0;
1.44 daniel 1887: CHAR buf[2];
1.24 daniel 1888:
1.40 daniel 1889: if ((CUR == '&') && (NXT(1) == '#') &&
1890: (NXT(2) == 'x')) {
1891: SKIP(3);
1892: while (CUR != ';') {
1893: if ((CUR >= '0') && (CUR <= '9'))
1894: val = val * 16 + (CUR - '0');
1895: else if ((CUR >= 'a') && (CUR <= 'f'))
1896: val = val * 16 + (CUR - 'a') + 10;
1897: else if ((CUR >= 'A') && (CUR <= 'F'))
1898: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 1899: else {
1.31 daniel 1900: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1901: val = 0;
1.24 daniel 1902: break;
1903: }
1904: }
1.40 daniel 1905: if (CUR != ';')
1906: NEXT;
1907: } else if ((CUR == '&') && (NXT(1) == '#')) {
1908: SKIP(2);
1909: while (CUR != ';') {
1910: if ((CUR >= '0') && (CUR <= '9'))
1911: val = val * 16 + (CUR - '0');
1.24 daniel 1912: else {
1.31 daniel 1913: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1914: val = 0;
1.24 daniel 1915: break;
1916: }
1917: }
1.40 daniel 1918: if (CUR != ';')
1919: NEXT;
1.24 daniel 1920: } else {
1.31 daniel 1921: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.24 daniel 1922: }
1.29 daniel 1923: /*
1924: * Check the value IS_CHAR ...
1925: */
1.44 daniel 1926: if (IS_CHAR(val)) {
1927: buf[0] = (CHAR) val;
1928: buf[1] = 0;
1.46 ! daniel 1929: if (inLine)
! 1930: return(xmlStrndup(buf, 1));
! 1931: else if (ctxt->sax != NULL)
1.45 daniel 1932: ctxt->sax->characters(ctxt, buf, 0, 1);
1.44 daniel 1933: } else {
1.39 daniel 1934: xmlParserError(ctxt, "xmlParseCharRef: invalid value");
1.29 daniel 1935: }
1.46 ! daniel 1936: return(NULL);
1.24 daniel 1937: }
1938:
1939: /*
1940: * xmlParseEntityRef: parse ENTITY references declarations
1941: *
1942: * [68] EntityRef ::= '&' Name ';'
1943: */
1.46 ! daniel 1944: CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt, int inLine) {
! 1945: CHAR *ret = NULL;
1.24 daniel 1946: CHAR *name;
1.44 daniel 1947: xmlEntityPtr entity;
1.24 daniel 1948:
1.40 daniel 1949: if (CUR == '&') {
1950: NEXT;
1.24 daniel 1951: name = xmlParseName(ctxt);
1952: if (name == NULL) {
1.31 daniel 1953: xmlParserError(ctxt, "xmlParseEntityRef: no name\n");
1.24 daniel 1954: } else {
1.40 daniel 1955: if (CUR == ';') {
1956: NEXT;
1.45 daniel 1957: entity = xmlGetDocEntity(ctxt->doc, name);
1958: if (entity == NULL) {
1.46 ! daniel 1959: /* TODO !!! Create a reference ! */
1.45 daniel 1960: xmlParserWarning(ctxt,
1961: "xmlParseEntityRef: &%s; not found\n", name);
1962: }
1.24 daniel 1963: /*
1.45 daniel 1964: * If we can get the content, push the entity content
1965: * as the next input stream.
1.24 daniel 1966: */
1.45 daniel 1967: else {
1968: switch (entity->type) {
1969: case XML_INTERNAL_PARAMETER_ENTITY:
1970: case XML_EXTERNAL_PARAMETER_ENTITY:
1971: xmlParserError(ctxt,
1972: "internal: xmlGetDtdEntity returned a general entity\n");
1973: break;
1974: case XML_INTERNAL_GENERAL_ENTITY:
1.46 ! daniel 1975: if (inLine)
! 1976: ret = entity->content;
! 1977: else
! 1978: xmlHandleEntity(ctxt, entity);
1.45 daniel 1979: break;
1980: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1981: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1982: xmlParserWarning(ctxt,
1983: "xmlParseEntityRef: external entity &%s; not supported\n",
1984: name);
1985: break;
1986: default:
1987: xmlParserError(ctxt,
1988: "internal: xmlParseEntityRef: unknown entity type %d\n",
1989: entity->type);
1990: }
1.44 daniel 1991: }
1.24 daniel 1992: } else {
1.46 ! daniel 1993: char cst[2] = { '&', 0 };
! 1994:
1.31 daniel 1995: xmlParserError(ctxt, "xmlParseEntityRef: expecting ';'\n");
1.46 ! daniel 1996: ret = xmlStrndup(cst, 1);
! 1997: ret = xmlStrcat(ret, name);
1.24 daniel 1998: }
1.45 daniel 1999: free(name);
1.24 daniel 2000: }
2001: }
1.46 ! daniel 2002: return(ret);
1.24 daniel 2003: }
2004:
2005: /*
2006: * xmlParseReference: parse Reference declarations
2007: *
2008: * [67] Reference ::= EntityRef | CharRef
2009: */
1.46 ! daniel 2010: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt, int inLine) {
1.44 daniel 2011: if ((CUR == '&') && (NXT(1) == '#')) {
1.46 ! daniel 2012: return(xmlParseCharRef(ctxt, inLine));
1.44 daniel 2013: } else if (CUR == '&') {
1.46 ! daniel 2014: return(xmlParseEntityRef(ctxt, inLine));
1.24 daniel 2015: }
1.46 ! daniel 2016: return(NULL);
1.24 daniel 2017: }
2018:
2019: /*
1.22 daniel 2020: * xmlParsePEReference: parse PEReference declarations
2021: *
2022: * [69] PEReference ::= '%' Name ';'
2023: */
1.46 ! daniel 2024: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt, int inLine) {
! 2025: CHAR *ret = NULL;
1.22 daniel 2026: CHAR *name;
1.45 daniel 2027: xmlEntityPtr entity;
1.22 daniel 2028:
1.40 daniel 2029: if (CUR == '%') {
2030: NEXT;
1.22 daniel 2031: name = xmlParseName(ctxt);
2032: if (name == NULL) {
1.31 daniel 2033: xmlParserError(ctxt, "xmlParsePEReference: no name\n");
1.22 daniel 2034: } else {
1.40 daniel 2035: if (CUR == ';') {
2036: NEXT;
1.45 daniel 2037: entity = xmlGetDtdEntity(ctxt->doc, name);
2038: if (entity == NULL) {
2039: xmlParserWarning(ctxt,
2040: "xmlParsePEReference: %%%s; not found\n");
2041: }
1.22 daniel 2042: /*
1.45 daniel 2043: * If we can get the content, push the entity content
2044: * as the next input stream.
1.22 daniel 2045: */
1.45 daniel 2046: else {
2047: switch (entity->type) {
2048: case XML_INTERNAL_PARAMETER_ENTITY:
1.46 ! daniel 2049: if (inLine)
! 2050: ret = entity->content;
! 2051: else
! 2052: xmlNewEntityInputStream(ctxt, entity);
1.45 daniel 2053: break;
2054: case XML_EXTERNAL_PARAMETER_ENTITY:
2055: xmlParserWarning(ctxt,
2056: "xmlParsePEReference: external entity %%%s; not supported\n");
2057: break;
2058: case XML_INTERNAL_GENERAL_ENTITY:
2059: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
2060: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2061: xmlParserError(ctxt,
2062: "internal: xmlGetDtdEntity returned a general entity\n");
2063: break;
2064: default:
2065: xmlParserError(ctxt,
2066: "internal: xmlParsePEReference: unknown entity type %d\n",
2067: entity->type);
2068: }
2069: }
1.22 daniel 2070: } else {
1.46 ! daniel 2071: char cst[2] = { '&', 0 };
! 2072:
1.31 daniel 2073: xmlParserError(ctxt, "xmlParsePEReference: expecting ';'\n");
1.46 ! daniel 2074: ret = xmlStrndup(cst, 1);
! 2075: ret = xmlStrcat(ret, name);
1.22 daniel 2076: }
1.45 daniel 2077: free(name);
1.3 veillard 2078: }
2079: }
1.46 ! daniel 2080: return(ret);
1.3 veillard 2081: }
2082:
2083: /*
1.21 daniel 2084: * xmlParseDocTypeDecl : parse a DOCTYPE declaration
2085: *
1.22 daniel 2086: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
2087: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 2088: */
2089:
2090: void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.36 daniel 2091: xmlDtdPtr dtd;
1.21 daniel 2092: CHAR *name;
2093: CHAR *ExternalID = NULL;
1.39 daniel 2094: CHAR *URI = NULL;
1.21 daniel 2095:
2096: /*
2097: * We know that '<!DOCTYPE' has been detected.
2098: */
1.40 daniel 2099: SKIP(9);
1.21 daniel 2100:
1.42 daniel 2101: SKIP_BLANKS;
1.21 daniel 2102:
2103: /*
2104: * Parse the DOCTYPE name.
2105: */
2106: name = xmlParseName(ctxt);
2107: if (name == NULL) {
1.31 daniel 2108: xmlParserError(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.21 daniel 2109: }
2110:
1.42 daniel 2111: SKIP_BLANKS;
1.21 daniel 2112:
2113: /*
1.22 daniel 2114: * Check for SystemID and ExternalID
2115: */
1.39 daniel 2116: URI = xmlParseExternalID(ctxt, &ExternalID);
1.42 daniel 2117: SKIP_BLANKS;
1.36 daniel 2118:
1.39 daniel 2119: dtd = xmlNewDtd(ctxt->doc, name, ExternalID, URI);
1.22 daniel 2120:
2121: /*
2122: * Is there any DTD definition ?
2123: */
1.40 daniel 2124: if (CUR == '[') {
2125: NEXT;
1.22 daniel 2126: /*
2127: * Parse the succession of Markup declarations and
2128: * PEReferences.
2129: * Subsequence (markupdecl | PEReference | S)*
2130: */
1.40 daniel 2131: while (CUR != ']') {
2132: const CHAR *check = CUR_PTR;
1.22 daniel 2133:
1.42 daniel 2134: SKIP_BLANKS;
1.22 daniel 2135: xmlParseMarkupDecl(ctxt);
1.46 ! daniel 2136: xmlParsePEReference(ctxt, 0);
1.22 daniel 2137:
1.40 daniel 2138: if (CUR_PTR == check) {
1.31 daniel 2139: xmlParserError(ctxt,
2140: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.22 daniel 2141: break;
2142: }
2143: }
1.40 daniel 2144: if (CUR == ']') NEXT;
1.22 daniel 2145: }
2146:
2147: /*
2148: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 2149: */
1.40 daniel 2150: if (CUR != '>') {
1.31 daniel 2151: xmlParserError(ctxt, "DOCTYPE unproperly terminated\n");
1.22 daniel 2152: /* We shouldn't try to resynchronize ... */
1.21 daniel 2153: }
1.40 daniel 2154: NEXT;
1.22 daniel 2155:
2156: /*
2157: * Cleanup, since we don't use all those identifiers
2158: * TODO : the DOCTYPE if available should be stored !
2159: */
1.39 daniel 2160: if (URI != NULL) free(URI);
1.22 daniel 2161: if (ExternalID != NULL) free(ExternalID);
2162: if (name != NULL) free(name);
1.21 daniel 2163: }
2164:
2165: /*
1.3 veillard 2166: * xmlParseAttribute: parse a start of tag.
2167: *
1.22 daniel 2168: * [41] Attribute ::= Name Eq AttValue
2169: *
2170: * [25] Eq ::= S? '=' S?
2171: *
1.29 daniel 2172: * With namespace:
2173: *
2174: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 2175: *
2176: * Also the case QName == xmlns:??? is handled independently as a namespace
2177: * definition.
1.3 veillard 2178: */
2179:
1.16 daniel 2180: void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 2181: CHAR *name, *value = NULL;
1.29 daniel 2182: CHAR *ns;
1.3 veillard 2183:
1.29 daniel 2184: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 2185: if (name == NULL) {
1.31 daniel 2186: xmlParserError(ctxt, "error parsing attribute name\n");
1.29 daniel 2187: return;
1.3 veillard 2188: }
2189:
2190: /*
1.29 daniel 2191: * read the value
1.3 veillard 2192: */
1.42 daniel 2193: SKIP_BLANKS;
1.40 daniel 2194: if (CUR == '=') {
2195: NEXT;
1.42 daniel 2196: SKIP_BLANKS;
1.29 daniel 2197: value = xmlParseAttValue(ctxt);
2198: } else {
1.31 daniel 2199: xmlParserError(ctxt, "Specification mandate value for attribute %s\n",
2200: name);
1.3 veillard 2201: }
2202:
2203: /*
1.43 daniel 2204: * Check whether it's a namespace definition
2205: */
2206: if ((ns == NULL) &&
2207: (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
2208: (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
2209: /* a default namespace definition */
2210: xmlNewNs(node, value, NULL);
2211: if (name != NULL)
2212: free(name);
2213: if (value != NULL)
2214: free(value);
2215: return;
2216: }
2217: if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
2218: (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
2219: /* a standard namespace definition */
2220: xmlNewNs(node, value, name);
2221: if (name != NULL)
2222: free(name);
2223: if (value != NULL)
2224: free(value);
2225: return;
2226: }
2227:
2228: /*
1.3 veillard 2229: * Add the attribute to the node.
2230: */
1.17 daniel 2231: if (name != NULL) {
1.3 veillard 2232: xmlNewProp(node, name, value);
1.17 daniel 2233: free(name);
2234: }
1.29 daniel 2235: if (value != NULL)
1.17 daniel 2236: free(value);
1.3 veillard 2237: }
2238:
2239: /*
1.29 daniel 2240: * xmlParseStartTag: parse a start of tag either for rule element or
2241: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 2242: *
2243: * [40] STag ::= '<' Name (S Attribute)* S? '>'
2244: *
1.29 daniel 2245: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
2246: *
2247: * With namespace:
2248: *
2249: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
2250: *
2251: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.2 veillard 2252: */
2253:
1.16 daniel 2254: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.34 daniel 2255: CHAR *namespace, *name;
2256: xmlNsPtr ns = NULL;
1.2 veillard 2257: xmlNodePtr ret = NULL;
2258:
1.40 daniel 2259: if (CUR != '<') return(NULL);
2260: NEXT;
1.3 veillard 2261:
1.34 daniel 2262: name = xmlNamespaceParseQName(ctxt, &namespace);
1.3 veillard 2263:
1.43 daniel 2264: /*
2265: * Note : the namespace resolution is deferred until the end of the
2266: * attributes parsing, since local namespace can be defined as
2267: * an attribute at this level.
2268: */
1.34 daniel 2269: ret = xmlNewNode(ns, name, NULL);
1.2 veillard 2270:
1.3 veillard 2271: /*
2272: * Now parse the attributes, it ends up with the ending
2273: *
2274: * (S Attribute)* S?
2275: */
1.42 daniel 2276: SKIP_BLANKS;
1.40 daniel 2277: while ((IS_CHAR(CUR)) &&
2278: (CUR != '>') &&
2279: ((CUR != '/') || (NXT(1) != '>'))) {
2280: const CHAR *q = CUR_PTR;
1.29 daniel 2281:
2282: xmlParseAttribute(ctxt, ret);
1.42 daniel 2283: SKIP_BLANKS;
1.29 daniel 2284:
1.40 daniel 2285: if (q == CUR_PTR) {
1.31 daniel 2286: xmlParserError(ctxt,
2287: "xmlParseStartTag: problem parsing attributes\n");
1.29 daniel 2288: break;
1.3 veillard 2289: }
2290: }
2291:
1.43 daniel 2292: /*
2293: * Search the namespace
2294: */
2295: ns = xmlSearchNs(ctxt->doc, ret, namespace);
2296: if (ns == NULL) /* ret still doesn't have a parent yet ! */
2297: ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
2298: xmlSetNs(ret, ns);
2299: if (namespace != NULL)
2300: free(namespace);
2301:
1.44 daniel 2302: /*
1.45 daniel 2303: * We are parsing a new node.
2304: */
2305: nodePush(ctxt, ret);
2306:
2307: /*
1.44 daniel 2308: * SAX: Start of Element !
2309: */
2310: if (ctxt->sax != NULL)
2311: ctxt->sax->startElement(ctxt, name);
2312:
1.3 veillard 2313: return(ret);
2314: }
2315:
2316: /*
1.27 daniel 2317: * xmlParseEndTag: parse an end of tag
2318: *
2319: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 2320: *
2321: * With namespace
2322: *
2323: * [9] ETag ::= '</' QName S? '>'
1.7 veillard 2324: */
2325:
1.34 daniel 2326: void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
2327: CHAR *namespace, *name;
2328: xmlNsPtr ns = NULL;
1.7 veillard 2329:
1.34 daniel 2330: *nsPtr = NULL;
1.7 veillard 2331: *tagPtr = NULL;
2332:
1.40 daniel 2333: if ((CUR != '<') || (NXT(1) != '/')) {
1.31 daniel 2334: xmlParserError(ctxt, "xmlParseEndTag: '</' not found\n");
1.27 daniel 2335: return;
2336: }
1.40 daniel 2337: SKIP(2);
1.7 veillard 2338:
1.34 daniel 2339: name = xmlNamespaceParseQName(ctxt, &namespace);
1.43 daniel 2340:
2341: /*
2342: * Search the namespace
2343: */
2344: ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
2345: if (namespace != NULL)
1.34 daniel 2346: free(namespace);
1.7 veillard 2347:
1.34 daniel 2348: *nsPtr = ns;
1.7 veillard 2349: *tagPtr = name;
2350:
2351: /*
2352: * We should definitely be at the ending "S? '>'" part
2353: */
1.42 daniel 2354: SKIP_BLANKS;
1.40 daniel 2355: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.31 daniel 2356: xmlParserError(ctxt, "End tag : expected '>'\n");
1.7 veillard 2357: } else
1.40 daniel 2358: NEXT;
1.7 veillard 2359:
2360: return;
2361: }
2362:
2363: /*
1.3 veillard 2364: * xmlParseCDSect: escaped pure raw content.
1.29 daniel 2365: *
2366: * [18] CDSect ::= CDStart CData CDEnd
2367: *
2368: * [19] CDStart ::= '<![CDATA['
2369: *
2370: * [20] Data ::= (Char* - (Char* ']]>' Char*))
2371: *
2372: * [21] CDEnd ::= ']]>'
1.3 veillard 2373: */
1.45 daniel 2374: void xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 2375: const CHAR *r, *s, *base;
1.3 veillard 2376:
1.40 daniel 2377: if ((CUR == '<') && (NXT(1) == '!') &&
2378: (NXT(2) == '[') && (NXT(3) == 'C') &&
2379: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2380: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2381: (NXT(8) == '[')) {
2382: SKIP(9);
1.29 daniel 2383: } else
1.45 daniel 2384: return;
1.40 daniel 2385: base = CUR_PTR;
2386: if (!IS_CHAR(CUR)) {
1.31 daniel 2387: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.45 daniel 2388: return;
1.3 veillard 2389: }
1.40 daniel 2390: r = NEXT;
2391: if (!IS_CHAR(CUR)) {
1.31 daniel 2392: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.45 daniel 2393: return;
1.3 veillard 2394: }
1.40 daniel 2395: s = NEXT;
2396: while (IS_CHAR(CUR) &&
2397: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
2398: r++;s++;NEXT;
1.3 veillard 2399: }
1.40 daniel 2400: if (!IS_CHAR(CUR)) {
1.31 daniel 2401: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.45 daniel 2402: return;
1.3 veillard 2403: }
1.16 daniel 2404:
1.45 daniel 2405: /*
2406: * Ok the segment [base CUR_PTR] is to be consumed as chars.
2407: */
2408: if (ctxt->sax != NULL) {
2409: if (areBlanks(ctxt, base, CUR_PTR - base))
2410: ctxt->sax->ignorableWhitespace(ctxt, base, 0, CUR_PTR - base);
2411: else
2412: ctxt->sax->characters(ctxt, base, 0, CUR_PTR - base);
2413: }
1.2 veillard 2414: }
2415:
2416: /*
2417: * xmlParseContent: a content is
2418: * (element | PCData | Reference | CDSect | PI | Comment)
2419: *
1.27 daniel 2420: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 2421: */
2422:
1.45 daniel 2423: void xmlParseContent(xmlParserCtxtPtr ctxt) {
1.2 veillard 2424: xmlNodePtr ret = NULL;
2425:
1.40 daniel 2426: while ((CUR != '<') || (NXT(1) != '/')) {
2427: const CHAR *test = CUR_PTR;
1.27 daniel 2428: ret = NULL;
2429:
2430: /*
2431: * First case : a Processing Instruction.
2432: */
1.40 daniel 2433: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 2434: xmlParsePI(ctxt);
2435: }
2436: /*
2437: * Second case : a CDSection
2438: */
1.40 daniel 2439: else if ((CUR == '<') && (NXT(1) == '!') &&
2440: (NXT(2) == '[') && (NXT(3) == 'C') &&
2441: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2442: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2443: (NXT(8) == '[')) {
1.45 daniel 2444: xmlParseCDSect(ctxt);
1.27 daniel 2445: }
2446: /*
2447: * Third case : a comment
2448: */
1.40 daniel 2449: else if ((CUR == '<') && (NXT(1) == '!') &&
2450: (NXT(2) == '-') && (NXT(3) == '-')) {
1.31 daniel 2451: ret = xmlParseComment(ctxt, 1);
1.27 daniel 2452: }
2453: /*
2454: * Fourth case : a sub-element.
2455: */
1.40 daniel 2456: else if (CUR == '<') {
1.45 daniel 2457: ret = xmlParseElement(ctxt);
2458: }
2459: /*
2460: * Fifth case : a reference.
2461: */
2462: else if (CUR == '&') {
1.46 ! daniel 2463: xmlParseReference(ctxt, 0);
1.27 daniel 2464: }
2465: /*
2466: * Last case, text. Note that References are handled directly.
2467: */
2468: else {
1.45 daniel 2469: xmlParseCharData(ctxt, 0);
1.3 veillard 2470: }
1.14 veillard 2471:
2472: /*
1.45 daniel 2473: * Pop-up of finished entities.
1.14 veillard 2474: */
1.45 daniel 2475: while ((CUR == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt);
2476:
1.40 daniel 2477: if (test == CUR_PTR) {
1.31 daniel 2478: xmlParserError(ctxt, "detected an error in element content\n");
1.29 daniel 2479: break;
2480: }
1.3 veillard 2481: }
1.2 veillard 2482: }
2483:
2484: /*
2485: * xmlParseElement: parse an XML element
1.26 daniel 2486: *
2487: * [39] element ::= EmptyElemTag | STag content ETag
2488: *
2489: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 2490: */
1.26 daniel 2491:
1.2 veillard 2492:
1.45 daniel 2493: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 2494: xmlNodePtr ret;
1.40 daniel 2495: const CHAR *openTag = CUR_PTR;
1.32 daniel 2496: xmlParserNodeInfo node_info;
1.27 daniel 2497: CHAR *endTag;
1.34 daniel 2498: xmlNsPtr endNs;
1.2 veillard 2499:
1.32 daniel 2500: /* Capture start position */
1.40 daniel 2501: node_info.begin_pos = CUR_PTR - ctxt->input->base;
2502: node_info.begin_line = ctxt->input->line;
1.32 daniel 2503:
1.16 daniel 2504: ret = xmlParseStartTag(ctxt);
1.3 veillard 2505: if (ret == NULL) {
2506: return(NULL);
2507: }
1.2 veillard 2508:
2509: /*
2510: * Check for an Empty Element.
2511: */
1.40 daniel 2512: if ((CUR == '/') && (NXT(1) == '>')) {
2513: SKIP(2);
1.45 daniel 2514: if (ctxt->sax != NULL)
2515: ctxt->sax->endElement(ctxt, ret->name);
2516:
2517: /*
2518: * end of parsing of this node.
2519: */
2520: nodePop(ctxt);
2521:
1.2 veillard 2522: return(ret);
2523: }
1.40 daniel 2524: if (CUR == '>') NEXT;
1.2 veillard 2525: else {
1.31 daniel 2526: xmlParserError(ctxt, "Couldn't find end of Start Tag\n%.30s\n",openTag);
1.45 daniel 2527:
2528: /*
2529: * end of parsing of this node.
2530: */
2531: nodePop(ctxt);
2532:
1.16 daniel 2533: return(NULL);
1.2 veillard 2534: }
2535:
2536: /*
2537: * Parse the content of the element:
2538: */
1.45 daniel 2539: xmlParseContent(ctxt);
1.40 daniel 2540: if (!IS_CHAR(CUR)) {
1.31 daniel 2541: xmlParserError(ctxt, "Premature end of data in tag %.30s\n%.30s\n",
2542: openTag);
1.45 daniel 2543:
2544: /*
2545: * end of parsing of this node.
2546: */
2547: nodePop(ctxt);
2548:
1.16 daniel 2549: return(NULL);
1.2 veillard 2550: }
2551:
2552: /*
1.27 daniel 2553: * parse the end of tag: '</' should be here.
1.2 veillard 2554: */
1.34 daniel 2555: xmlParseEndTag(ctxt, &endNs, &endTag);
1.7 veillard 2556:
1.27 daniel 2557: /*
2558: * Check that the Name in the ETag is the same as in the STag.
2559: */
1.34 daniel 2560: if (endNs != ret->ns) {
1.31 daniel 2561: xmlParserError(ctxt,
1.43 daniel 2562: "Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
1.31 daniel 2563: openTag, endTag);
1.27 daniel 2564: }
1.32 daniel 2565: if (endTag == NULL ) {
2566: xmlParserError(ctxt, "The End tag has no name\n%.30s\n", openTag);
1.45 daniel 2567: } else if (xmlStrcmp(ret->name, endTag)) {
1.31 daniel 2568: xmlParserError(ctxt,
2569: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
2570: openTag, endTag);
1.27 daniel 2571: }
1.44 daniel 2572: /*
2573: * SAX: End of Tag
2574: */
2575: else if (ctxt->sax != NULL)
2576: ctxt->sax->endElement(ctxt, endTag);
1.7 veillard 2577:
1.44 daniel 2578: if (endTag != NULL)
2579: free(endTag);
1.2 veillard 2580:
1.32 daniel 2581: /* Capture end position and add node */
2582: if ( ret != NULL && ctxt->record_info ) {
1.40 daniel 2583: node_info.end_pos = CUR_PTR - ctxt->input->base;
2584: node_info.end_line = ctxt->input->line;
1.32 daniel 2585: node_info.node = ret;
2586: xmlParserAddNodeInfo(ctxt, &node_info);
2587: }
1.43 daniel 2588:
2589: /*
2590: * end of parsing of this node.
2591: */
2592: nodePop(ctxt);
2593:
1.2 veillard 2594: return(ret);
2595: }
2596:
2597: /*
1.29 daniel 2598: * xmlParseVersionNum: parse the XML version value.
2599: *
2600: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
2601: */
2602: CHAR *xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 2603: const CHAR *q = CUR_PTR;
1.29 daniel 2604: CHAR *ret;
2605:
1.40 daniel 2606: while (IS_CHAR(CUR) &&
2607: (((CUR >= 'a') && (CUR <= 'z')) ||
2608: ((CUR >= 'A') && (CUR <= 'Z')) ||
2609: ((CUR >= '0') && (CUR <= '9')) ||
2610: (CUR == '_') || (CUR == '.') ||
2611: (CUR == ':') || (CUR == '-'))) NEXT;
2612: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 2613: return(ret);
2614: }
2615:
2616: /*
2617: * xmlParseVersionInfo: parse the XML version.
2618: *
2619: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
2620: *
2621: * [25] Eq ::= S? '=' S?
2622: */
2623:
2624: CHAR *xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
2625: CHAR *version = NULL;
2626: const CHAR *q;
2627:
1.40 daniel 2628: if ((CUR == 'v') && (NXT(1) == 'e') &&
2629: (NXT(2) == 'r') && (NXT(3) == 's') &&
2630: (NXT(4) == 'i') && (NXT(5) == 'o') &&
2631: (NXT(6) == 'n')) {
2632: SKIP(7);
1.42 daniel 2633: SKIP_BLANKS;
1.40 daniel 2634: if (CUR != '=') {
1.31 daniel 2635: xmlParserError(ctxt, "xmlParseVersionInfo : expected '='\n");
2636: return(NULL);
2637: }
1.40 daniel 2638: NEXT;
1.42 daniel 2639: SKIP_BLANKS;
1.40 daniel 2640: if (CUR == '"') {
2641: NEXT;
2642: q = CUR_PTR;
1.29 daniel 2643: version = xmlParseVersionNum(ctxt);
1.40 daniel 2644: if (CUR != '"')
1.31 daniel 2645: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2646: else
1.40 daniel 2647: NEXT;
2648: } else if (CUR == '\''){
2649: NEXT;
2650: q = CUR_PTR;
1.29 daniel 2651: version = xmlParseVersionNum(ctxt);
1.40 daniel 2652: if (CUR != '\'')
1.31 daniel 2653: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2654: else
1.40 daniel 2655: NEXT;
1.31 daniel 2656: } else {
2657: xmlParserError(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
1.29 daniel 2658: }
2659: }
2660: return(version);
2661: }
2662:
2663: /*
2664: * xmlParseEncName: parse the XML encoding name
2665: *
2666: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
2667: */
2668: CHAR *xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 2669: const CHAR *q = CUR_PTR;
1.29 daniel 2670: CHAR *ret = NULL;
2671:
1.40 daniel 2672: if (((CUR >= 'a') && (CUR <= 'z')) ||
2673: ((CUR >= 'A') && (CUR <= 'Z'))) {
2674: NEXT;
2675: while (IS_CHAR(CUR) &&
2676: (((CUR >= 'a') && (CUR <= 'z')) ||
2677: ((CUR >= 'A') && (CUR <= 'Z')) ||
2678: ((CUR >= '0') && (CUR <= '9')) ||
2679: (CUR == '-'))) NEXT;
2680: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 2681: } else {
1.31 daniel 2682: xmlParserError(ctxt, "Invalid XML encoding name\n");
1.29 daniel 2683: }
2684: return(ret);
2685: }
2686:
2687: /*
2688: * xmlParseEncodingDecl: parse the XML encoding declaration
2689: *
2690: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
2691: */
2692:
2693: CHAR *xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
2694: CHAR *encoding = NULL;
2695: const CHAR *q;
2696:
1.42 daniel 2697: SKIP_BLANKS;
1.40 daniel 2698: if ((CUR == 'e') && (NXT(1) == 'n') &&
2699: (NXT(2) == 'c') && (NXT(3) == 'o') &&
2700: (NXT(4) == 'd') && (NXT(5) == 'i') &&
2701: (NXT(6) == 'n') && (NXT(7) == 'g')) {
2702: SKIP(8);
1.42 daniel 2703: SKIP_BLANKS;
1.40 daniel 2704: if (CUR != '=') {
1.31 daniel 2705: xmlParserError(ctxt, "xmlParseEncodingDecl : expected '='\n");
2706: return(NULL);
2707: }
1.40 daniel 2708: NEXT;
1.42 daniel 2709: SKIP_BLANKS;
1.40 daniel 2710: if (CUR == '"') {
2711: NEXT;
2712: q = CUR_PTR;
1.29 daniel 2713: encoding = xmlParseEncName(ctxt);
1.40 daniel 2714: if (CUR != '"')
1.31 daniel 2715: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2716: else
1.40 daniel 2717: NEXT;
2718: } else if (CUR == '\''){
2719: NEXT;
2720: q = CUR_PTR;
1.29 daniel 2721: encoding = xmlParseEncName(ctxt);
1.40 daniel 2722: if (CUR != '\'')
1.31 daniel 2723: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2724: else
1.40 daniel 2725: NEXT;
2726: } else if (CUR == '"'){
1.31 daniel 2727: xmlParserError(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
1.29 daniel 2728: }
2729: }
2730: return(encoding);
2731: }
2732:
2733: /*
2734: * xmlParseSDDecl: parse the XML standalone declaration
2735: *
2736: * [32] SDDecl ::= S 'standalone' Eq
2737: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
2738: */
2739:
2740: int xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
2741: int standalone = -1;
2742:
1.42 daniel 2743: SKIP_BLANKS;
1.40 daniel 2744: if ((CUR == 's') && (NXT(1) == 't') &&
2745: (NXT(2) == 'a') && (NXT(3) == 'n') &&
2746: (NXT(4) == 'd') && (NXT(5) == 'a') &&
2747: (NXT(6) == 'l') && (NXT(7) == 'o') &&
2748: (NXT(8) == 'n') && (NXT(9) == 'e')) {
2749: SKIP(10);
2750: if (CUR != '=') {
1.32 daniel 2751: xmlParserError(ctxt, "XML standalone declaration : expected '='\n");
2752: return(standalone);
2753: }
1.40 daniel 2754: NEXT;
1.42 daniel 2755: SKIP_BLANKS;
1.40 daniel 2756: if (CUR == '\''){
2757: NEXT;
2758: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 2759: standalone = 0;
1.40 daniel 2760: SKIP(2);
2761: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
2762: (NXT(2) == 's')) {
1.29 daniel 2763: standalone = 1;
1.40 daniel 2764: SKIP(3);
1.29 daniel 2765: } else {
1.31 daniel 2766: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2767: }
1.40 daniel 2768: if (CUR != '\'')
1.31 daniel 2769: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2770: else
1.40 daniel 2771: NEXT;
2772: } else if (CUR == '"'){
2773: NEXT;
2774: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 2775: standalone = 0;
1.40 daniel 2776: SKIP(2);
2777: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
2778: (NXT(2) == 's')) {
1.29 daniel 2779: standalone = 1;
1.40 daniel 2780: SKIP(3);
1.29 daniel 2781: } else {
1.31 daniel 2782: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2783: }
1.40 daniel 2784: if (CUR != '"')
1.31 daniel 2785: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2786: else
1.40 daniel 2787: NEXT;
1.37 daniel 2788: } else {
2789: xmlParserError(ctxt, "Standalone value not found\n");
2790: }
1.29 daniel 2791: }
2792: return(standalone);
2793: }
2794:
2795: /*
1.1 veillard 2796: * xmlParseXMLDecl: parse an XML declaration header
1.29 daniel 2797: *
2798: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 2799: */
2800:
1.16 daniel 2801: void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 2802: CHAR *version;
2803:
2804: /*
1.19 daniel 2805: * We know that '<?xml' is here.
1.1 veillard 2806: */
1.40 daniel 2807: SKIP(5);
1.1 veillard 2808:
1.42 daniel 2809: SKIP_BLANKS;
1.1 veillard 2810:
2811: /*
1.29 daniel 2812: * We should have the VersionInfo here.
1.1 veillard 2813: */
1.29 daniel 2814: version = xmlParseVersionInfo(ctxt);
2815: if (version == NULL)
1.45 daniel 2816: version = xmlCharStrdup(XML_DEFAULT_VERSION);
2817: ctxt->doc = xmlNewDoc(version);
2818: free(version);
1.29 daniel 2819:
2820: /*
2821: * We may have the encoding declaration
2822: */
1.32 daniel 2823: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 2824:
2825: /*
1.29 daniel 2826: * We may have the standalone status.
1.1 veillard 2827: */
1.32 daniel 2828: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 2829:
1.42 daniel 2830: SKIP_BLANKS;
1.40 daniel 2831: if ((CUR == '?') && (NXT(1) == '>')) {
2832: SKIP(2);
2833: } else if (CUR == '>') {
1.31 daniel 2834: /* Deprecated old WD ... */
2835: xmlParserError(ctxt, "XML declaration must end-up with '?>'\n");
1.40 daniel 2836: NEXT;
1.29 daniel 2837: } else {
1.31 daniel 2838: xmlParserError(ctxt, "parsing XML declaration: '?>' expected\n");
1.40 daniel 2839: MOVETO_ENDTAG(CUR_PTR);
2840: NEXT;
1.29 daniel 2841: }
1.1 veillard 2842: }
2843:
2844: /*
1.22 daniel 2845: * xmlParseMisc: parse an XML Misc* optionnal field.
1.21 daniel 2846: * Misc*
2847: *
1.22 daniel 2848: * [27] Misc ::= Comment | PI | S
1.1 veillard 2849: */
2850:
1.16 daniel 2851: void xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 2852: while (((CUR == '<') && (NXT(1) == '?')) ||
2853: ((CUR == '<') && (NXT(1) == '!') &&
2854: (NXT(2) == '-') && (NXT(3) == '-')) ||
2855: IS_BLANK(CUR)) {
2856: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 2857: xmlParsePI(ctxt);
1.40 daniel 2858: } else if (IS_BLANK(CUR)) {
2859: NEXT;
1.1 veillard 2860: } else
1.31 daniel 2861: xmlParseComment(ctxt, 0);
1.1 veillard 2862: }
2863: }
2864:
2865: /*
1.16 daniel 2866: * xmlParseDocument : parse an XML document and build a tree.
1.21 daniel 2867: *
1.22 daniel 2868: * [1] document ::= prolog element Misc*
1.29 daniel 2869: *
2870: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.1 veillard 2871: */
2872:
1.16 daniel 2873: int xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 2874: xmlDefaultSAXHandlerInit();
2875:
1.14 veillard 2876: /*
1.44 daniel 2877: * SAX: beginning of the document processing.
2878: */
2879: if (ctxt->sax)
2880: ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
2881: if (ctxt->sax)
2882: ctxt->sax->startDocument(ctxt);
2883:
2884: /*
1.14 veillard 2885: * We should check for encoding here and plug-in some
2886: * conversion code TODO !!!!
2887: */
1.1 veillard 2888:
2889: /*
2890: * Wipe out everything which is before the first '<'
2891: */
1.42 daniel 2892: SKIP_BLANKS;
1.1 veillard 2893:
2894: /*
2895: * Check for the XMLDecl in the Prolog.
2896: */
1.40 daniel 2897: if ((CUR == '<') && (NXT(1) == '?') &&
2898: (NXT(2) == 'x') && (NXT(3) == 'm') &&
2899: (NXT(4) == 'l')) {
1.19 daniel 2900: xmlParseXMLDecl(ctxt);
2901: /* SKIP_EOL(cur); */
1.42 daniel 2902: SKIP_BLANKS;
1.40 daniel 2903: } else if ((CUR == '<') && (NXT(1) == '?') &&
2904: (NXT(2) == 'X') && (NXT(3) == 'M') &&
2905: (NXT(4) == 'L')) {
1.19 daniel 2906: /*
2907: * The first drafts were using <?XML and the final W3C REC
2908: * now use <?xml ...
2909: */
1.16 daniel 2910: xmlParseXMLDecl(ctxt);
1.1 veillard 2911: /* SKIP_EOL(cur); */
1.42 daniel 2912: SKIP_BLANKS;
1.1 veillard 2913: } else {
1.45 daniel 2914: CHAR *version;
2915:
2916: version = xmlCharStrdup(XML_DEFAULT_VERSION);
2917: ctxt->doc = xmlNewDoc(version);
2918: free(version);
1.1 veillard 2919: }
2920:
2921: /*
2922: * The Misc part of the Prolog
2923: */
1.16 daniel 2924: xmlParseMisc(ctxt);
1.1 veillard 2925:
2926: /*
1.29 daniel 2927: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 2928: * (doctypedecl Misc*)?
2929: */
1.40 daniel 2930: if ((CUR == '<') && (NXT(1) == '!') &&
2931: (NXT(2) == 'D') && (NXT(3) == 'O') &&
2932: (NXT(4) == 'C') && (NXT(5) == 'T') &&
2933: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
2934: (NXT(8) == 'E')) {
1.22 daniel 2935: xmlParseDocTypeDecl(ctxt);
2936: xmlParseMisc(ctxt);
1.21 daniel 2937: }
2938:
2939: /*
2940: * Time to start parsing the tree itself
1.1 veillard 2941: */
1.45 daniel 2942: ctxt->doc->root = xmlParseElement(ctxt);
1.33 daniel 2943:
2944: /*
2945: * The Misc part at the end
2946: */
2947: xmlParseMisc(ctxt);
1.16 daniel 2948:
1.44 daniel 2949: /*
2950: * SAX: end of the document processing.
2951: */
2952: if (ctxt->sax)
2953: ctxt->sax->endDocument(ctxt);
1.16 daniel 2954: return(0);
2955: }
2956:
2957: /*
2958: * xmlParseDoc : parse an XML in-memory document and build a tree.
2959: */
2960:
2961: xmlDocPtr xmlParseDoc(CHAR *cur) {
2962: xmlDocPtr ret;
2963: xmlParserCtxtPtr ctxt;
1.40 daniel 2964: xmlParserInputPtr input;
1.16 daniel 2965:
2966: if (cur == NULL) return(NULL);
1.1 veillard 2967:
1.16 daniel 2968: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2969: if (ctxt == NULL) {
2970: perror("malloc");
2971: return(NULL);
2972: }
1.40 daniel 2973: xmlInitParserCtxt(ctxt);
2974: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
2975: if (input == NULL) {
2976: perror("malloc");
2977: free(ctxt);
2978: return(NULL);
2979: }
2980:
2981: input->filename = NULL;
2982: input->line = 1;
2983: input->col = 1;
2984: input->base = cur;
2985: input->cur = cur;
2986:
2987: inputPush(ctxt, input);
1.16 daniel 2988:
2989:
2990: xmlParseDocument(ctxt);
2991: ret = ctxt->doc;
2992: free(ctxt);
2993:
1.1 veillard 2994: return(ret);
2995: }
2996:
1.9 httpng 2997: /*
2998: * xmlParseFile : parse an XML file and build a tree.
2999: */
3000:
3001: xmlDocPtr xmlParseFile(const char *filename) {
3002: xmlDocPtr ret;
1.20 daniel 3003: #ifdef HAVE_ZLIB_H
3004: gzFile input;
3005: #else
1.9 httpng 3006: int input;
1.20 daniel 3007: #endif
1.9 httpng 3008: int res;
3009: struct stat buf;
3010: char *buffer;
1.16 daniel 3011: xmlParserCtxtPtr ctxt;
1.40 daniel 3012: xmlParserInputPtr inputStream;
1.9 httpng 3013:
1.11 veillard 3014: res = stat(filename, &buf);
1.9 httpng 3015: if (res < 0) return(NULL);
3016:
1.20 daniel 3017: #ifdef HAVE_ZLIB_H
3018: retry_bigger:
3019: buffer = malloc((buf.st_size * 20) + 100);
3020: #else
1.9 httpng 3021: buffer = malloc(buf.st_size + 100);
1.20 daniel 3022: #endif
1.9 httpng 3023: if (buffer == NULL) {
3024: perror("malloc");
3025: return(NULL);
3026: }
3027:
3028: memset(buffer, 0, sizeof(buffer));
1.20 daniel 3029: #ifdef HAVE_ZLIB_H
3030: input = gzopen (filename, "r");
3031: if (input == NULL) {
3032: fprintf (stderr, "Cannot read file %s :\n", filename);
3033: perror ("gzopen failed");
3034: return(NULL);
3035: }
3036: #else
1.9 httpng 3037: input = open (filename, O_RDONLY);
3038: if (input < 0) {
3039: fprintf (stderr, "Cannot read file %s :\n", filename);
3040: perror ("open failed");
3041: return(NULL);
3042: }
1.20 daniel 3043: #endif
3044: #ifdef HAVE_ZLIB_H
3045: res = gzread(input, buffer, 20 * buf.st_size);
3046: #else
1.9 httpng 3047: res = read(input, buffer, buf.st_size);
1.20 daniel 3048: #endif
1.9 httpng 3049: if (res < 0) {
3050: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 3051: #ifdef HAVE_ZLIB_H
3052: perror ("gzread failed");
3053: #else
1.9 httpng 3054: perror ("read failed");
1.20 daniel 3055: #endif
1.9 httpng 3056: return(NULL);
3057: }
1.20 daniel 3058: #ifdef HAVE_ZLIB_H
3059: gzclose(input);
3060: if (res >= 20 * buf.st_size) {
3061: free(buffer);
3062: buf.st_size *= 2;
3063: goto retry_bigger;
3064: }
3065: buf.st_size = res;
3066: #else
1.9 httpng 3067: close(input);
1.20 daniel 3068: #endif
3069:
1.40 daniel 3070: buffer[buf.st_size] = '\0';
1.9 httpng 3071:
1.16 daniel 3072: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3073: if (ctxt == NULL) {
3074: perror("malloc");
3075: return(NULL);
3076: }
1.40 daniel 3077: xmlInitParserCtxt(ctxt);
3078: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3079: if (inputStream == NULL) {
3080: perror("malloc");
3081: free(ctxt);
3082: return(NULL);
3083: }
3084:
3085: inputStream->filename = strdup(filename);
3086: inputStream->line = 1;
3087: inputStream->col = 1;
1.45 daniel 3088:
3089: /*
3090: * TODO : plug some encoding conversion routines here. !!!
3091: */
1.40 daniel 3092: inputStream->base = buffer;
3093: inputStream->cur = buffer;
1.16 daniel 3094:
1.40 daniel 3095: inputPush(ctxt, inputStream);
1.16 daniel 3096:
3097: xmlParseDocument(ctxt);
1.40 daniel 3098:
1.16 daniel 3099: ret = ctxt->doc;
1.9 httpng 3100: free(buffer);
1.20 daniel 3101: free(ctxt);
3102:
3103: return(ret);
3104: }
3105:
1.32 daniel 3106:
1.20 daniel 3107: /*
1.32 daniel 3108: * xmlParseMemory : parse an XML memory block and build a tree.
1.20 daniel 3109: */
3110: xmlDocPtr xmlParseMemory(char *buffer, int size) {
3111: xmlDocPtr ret;
3112: xmlParserCtxtPtr ctxt;
1.40 daniel 3113: xmlParserInputPtr input;
3114:
3115: buffer[size - 1] = '\0';
3116:
1.20 daniel 3117: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3118: if (ctxt == NULL) {
3119: perror("malloc");
3120: return(NULL);
3121: }
1.40 daniel 3122: xmlInitParserCtxt(ctxt);
3123: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3124: if (input == NULL) {
3125: perror("malloc");
3126: free(ctxt);
3127: return(NULL);
3128: }
1.20 daniel 3129:
1.40 daniel 3130: input->filename = NULL;
3131: input->line = 1;
3132: input->col = 1;
1.45 daniel 3133:
3134: /*
3135: * TODO : plug some encoding conversion routines here. !!!
3136: */
1.40 daniel 3137: input->base = buffer;
3138: input->cur = buffer;
1.20 daniel 3139:
1.40 daniel 3140: inputPush(ctxt, input);
1.20 daniel 3141:
3142: xmlParseDocument(ctxt);
1.40 daniel 3143:
1.20 daniel 3144: ret = ctxt->doc;
1.16 daniel 3145: free(ctxt);
3146:
1.9 httpng 3147: return(ret);
1.17 daniel 3148: }
3149:
3150:
3151: /* Initialize parser context */
3152: void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
3153: {
1.40 daniel 3154: /* Allocate the Input stack */
3155: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
3156: ctxt->inputNr = 0;
3157: ctxt->inputMax = 5;
3158: ctxt->input = NULL;
3159:
1.43 daniel 3160: /* Allocate the Node stack */
3161: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
3162: ctxt->nodeNr = 0;
3163: ctxt->nodeMax = 10;
3164: ctxt->node = NULL;
3165:
1.45 daniel 3166: ctxt->sax = &xmlDefaultSAXHandler;
1.32 daniel 3167: ctxt->doc = NULL;
3168: ctxt->record_info = 0;
3169: xmlInitNodeInfoSeq(&ctxt->node_seq);
1.17 daniel 3170: }
3171:
3172:
1.19 daniel 3173: /*
3174: * Clear (release owned resources) and reinitialize context
3175: */
1.32 daniel 3176: void xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 3177: {
1.32 daniel 3178: xmlClearNodeInfoSeq(&ctxt->node_seq);
3179: xmlInitParserCtxt(ctxt);
1.17 daniel 3180: }
3181:
3182:
1.19 daniel 3183: /*
3184: * Setup the parser context to parse a new buffer; Clears any prior
3185: * contents from the parser context. The buffer parameter must not be
3186: * NULL, but the filename parameter can be
3187: */
1.17 daniel 3188: void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
3189: const char* filename)
3190: {
1.40 daniel 3191: xmlParserInputPtr input;
3192:
3193: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3194: if (input == NULL) {
3195: perror("malloc");
3196: free(ctxt);
3197: exit(1);
3198: }
3199:
1.17 daniel 3200: xmlClearParserCtxt(ctxt);
1.40 daniel 3201: if (input->filename != NULL)
3202: input->filename = strdup(filename);
3203: else
3204: input->filename = NULL;
3205: input->line = 1;
3206: input->col = 1;
3207: input->base = buffer;
3208: input->cur = buffer;
3209:
3210: inputPush(ctxt, input);
1.17 daniel 3211: }
3212:
1.32 daniel 3213:
3214: /*
3215: * xmlParserFindNodeInfo : Find the parser node info struct for a given node
3216: */
3217: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
3218: const xmlNode* node)
3219: {
3220: unsigned long pos;
3221:
3222: /* Find position where node should be at */
3223: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3224: if ( ctx->node_seq.buffer[pos].node == node )
3225: return &ctx->node_seq.buffer[pos];
3226: else
3227: return NULL;
3228: }
3229:
3230:
3231: /*
3232: * xmlInitNodeInfoSeq -- Initialize (set to initial state) node info sequence
3233: */
3234: void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3235: {
3236: seq->length = 0;
3237: seq->maximum = 0;
3238: seq->buffer = NULL;
3239: }
3240:
3241: /*
3242: * xmlClearNodeInfoSeq -- Clear (release memory and reinitialize) node
3243: * info sequence
3244: */
3245: void xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3246: {
3247: if ( seq->buffer != NULL )
3248: free(seq->buffer);
3249: xmlInitNodeInfoSeq(seq);
3250: }
3251:
3252:
3253: /*
3254: * xmlParserFindNodeInfoIndex : Find the index that the info record for
3255: * the given node is or should be at in a sorted sequence
3256: */
3257: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
3258: const xmlNode* node)
3259: {
3260: unsigned long upper, lower, middle;
3261: int found = 0;
3262:
3263: /* Do a binary search for the key */
3264: lower = 1;
3265: upper = seq->length;
3266: middle = 0;
3267: while ( lower <= upper && !found) {
3268: middle = lower + (upper - lower) / 2;
3269: if ( node == seq->buffer[middle - 1].node )
3270: found = 1;
3271: else if ( node < seq->buffer[middle - 1].node )
3272: upper = middle - 1;
3273: else
3274: lower = middle + 1;
3275: }
3276:
3277: /* Return position */
3278: if ( middle == 0 || seq->buffer[middle - 1].node < node )
3279: return middle;
3280: else
3281: return middle - 1;
3282: }
3283:
3284:
3285: /*
3286: * xmlParserAddNodeInfo : Insert node info record into sorted sequence
3287: */
3288: void xmlParserAddNodeInfo(xmlParserCtxtPtr ctx,
3289: const xmlParserNodeInfo* info)
3290: {
3291: unsigned long pos;
3292: static unsigned int block_size = 5;
3293:
3294: /* Find pos and check to see if node is already in the sequence */
3295: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, info->node);
3296: if ( pos < ctx->node_seq.length
3297: && ctx->node_seq.buffer[pos].node == info->node ) {
3298: ctx->node_seq.buffer[pos] = *info;
3299: }
3300:
3301: /* Otherwise, we need to add new node to buffer */
3302: else {
3303: /* Expand buffer by 5 if needed */
3304: if ( ctx->node_seq.length + 1 > ctx->node_seq.maximum ) {
3305: xmlParserNodeInfo* tmp_buffer;
3306: unsigned int byte_size = (sizeof(*ctx->node_seq.buffer)
3307: *(ctx->node_seq.maximum + block_size));
3308:
3309: if ( ctx->node_seq.buffer == NULL )
3310: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
3311: else
3312: tmp_buffer = (xmlParserNodeInfo*)realloc(ctx->node_seq.buffer, byte_size);
3313:
3314: if ( tmp_buffer == NULL ) {
3315: xmlParserError(ctx, "Out of memory");
3316: return;
3317: }
3318: ctx->node_seq.buffer = tmp_buffer;
3319: ctx->node_seq.maximum += block_size;
3320: }
3321:
3322: /* If position is not at end, move elements out of the way */
3323: if ( pos != ctx->node_seq.length ) {
3324: unsigned long i;
3325:
3326: for ( i = ctx->node_seq.length; i > pos; i-- )
3327: ctx->node_seq.buffer[i] = ctx->node_seq.buffer[i - 1];
3328: }
3329:
3330: /* Copy element and increase length */
3331: ctx->node_seq.buffer[pos] = *info;
3332: ctx->node_seq.length++;
3333: }
3334: }
Webmaster