Annotation of XML/parser.c, revision 1.45
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.45 ! daniel 6: * $Id: parser.c,v 1.44 1998/08/09 04:52:34 daniel Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.1 veillard 18: #include <malloc.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
1.45 ! daniel 34: /************************************************************************
! 35: * *
! 36: * Parser stacks related functions and macros *
! 37: * *
! 38: ************************************************************************/
1.1 veillard 39: /*
1.40 daniel 40: * Generic function for accessing stacks in the Parser Context
1.1 veillard 41: */
42:
1.31 daniel 43: #define PUSH_AND_POP(type, name) \
1.40 daniel 44: int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 45: if (ctxt->name##Nr >= ctxt->name##Max) { \
46: ctxt->name##Max *= 2; \
1.40 daniel 47: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
48: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
49: if (ctxt->name##Tab == NULL) { \
1.31 daniel 50: fprintf(stderr, "realloc failed !\n"); \
51: exit(1); \
52: } \
53: } \
1.40 daniel 54: ctxt->name##Tab[ctxt->name##Nr] = value; \
55: ctxt->name = value; \
56: return(ctxt->name##Nr++); \
1.31 daniel 57: } \
1.40 daniel 58: type name##Pop(xmlParserCtxtPtr ctxt) { \
59: if (ctxt->name##Nr <= 0) return(0); \
60: ctxt->name##Nr--; \
1.45 ! daniel 61: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
1.40 daniel 62: return(ctxt->name); \
1.31 daniel 63: } \
64:
1.40 daniel 65: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 66: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 67:
1.45 ! daniel 68: /*************
1.40 daniel 69: #define CUR (*(ctxt->input->cur) ? *(ctxt->input->cur) : xmlPopInput(ctxt))
1.45 ! daniel 70: #define NEXT (((*(ctxt->input->cur) == '\n') ? \
! 71: (ctxt->input->line++, ctxt->input->col = 1) : \
! 72: (ctxt->input->col++)), ctxt->input->cur++)
! 73: *************/
! 74:
! 75: #define CUR (*ctxt->input->cur)
! 76: #define NEXT ((*ctxt->input->cur) ? \
! 77: (((*(ctxt->input->cur) == '\n') ? \
! 78: (ctxt->input->line++, ctxt->input->col = 1) : \
! 79: (ctxt->input->col++)), ctxt->input->cur++) : \
! 80: (xmlPopInput(ctxt), ctxt->input->cur))
1.42 daniel 81:
1.40 daniel 82: #define CUR_PTR ctxt->input->cur
1.42 daniel 83:
1.40 daniel 84: #define NXT(val) ctxt->input->cur[(val)]
1.42 daniel 85:
1.40 daniel 86: #define SKIP(val) ctxt->input->cur += (val)
1.42 daniel 87: #define SKIP_BLANKS \
88: while (IS_BLANK(*(ctxt->input->cur))) NEXT
89:
1.40 daniel 90:
91: /*
92: * xmlPopInput: the current input pointed by ctxt->input came to an end
93: * pop it and return the next char.
1.45 ! daniel 94: *
! 95: * TODO A deallocation of the popped Input structure is needed
1.40 daniel 96: */
97: CHAR xmlPopInput(xmlParserCtxtPtr ctxt) {
98: if (ctxt->inputNr == 1) return(0); /* End of main Input */
99: inputPop(ctxt);
100: return(CUR);
101: }
102:
103: /*
104: * xmlPushInput: switch to a new input stream which is stacked on top
105: * of the previous one(s).
106: */
107: void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
108: if (input == NULL) return;
109: inputPush(ctxt, input);
110: }
111:
112: /*
1.45 ! daniel 113: * Create a new input stream based on a memory buffer.
! 114: */
! 115: void xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
! 116: xmlParserInputPtr input;
! 117:
! 118: if (entity == NULL) {
! 119: xmlParserError(ctxt,
! 120: "internal: xmlNewEntityInputStream entity = NULL\n");
! 121: return;
! 122: }
! 123: if (entity->content == NULL) {
! 124: xmlParserError(ctxt,
! 125: "internal: xmlNewEntityInputStream entity->input = NULL\n");
! 126: return;
! 127: }
! 128: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
! 129: if (input == NULL) {
! 130: xmlParserError(ctxt, "malloc: couldn't allocate a new input stream\n");
! 131: return;
! 132: }
! 133: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
! 134: input->base = entity->content;
! 135: input->cur = entity->content;
! 136: input->line = 1;
! 137: input->col = 1;
! 138: xmlPushInput(ctxt, input);
! 139: }
! 140:
! 141: /*
1.40 daniel 142: * A few macros needed to help building the parser.
143: */
144:
1.1 veillard 145: #ifdef UNICODE
1.30 daniel 146: /************************************************************************
147: * *
148: * UNICODE version of the macros. *
149: * *
150: ************************************************************************/
1.1 veillard 151: /*
1.22 daniel 152: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
153: * | [#x10000-#x10FFFF]
154: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 155: */
156: #define IS_CHAR(c) \
157: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
158: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
159:
1.22 daniel 160: /*
161: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
162: */
1.42 daniel 163: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
164: ((c) == 0x0D))
1.1 veillard 165:
1.22 daniel 166: /*
1.30 daniel 167: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 168: *
1.30 daniel 169: * VI is your friend !
170: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
171: * and
172: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 173: */
1.1 veillard 174: #define IS_BASECHAR(c) \
1.30 daniel 175: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
176: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
177: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
178: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
179: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
180: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
181: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
182: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
183: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
184: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
185: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
186: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
187: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
188: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
189: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
190: ((c) == 0x0386) || \
191: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
192: ((c) == 0x038C) || \
193: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
194: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
195: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
196: ((c) == 0x03DA) || \
197: ((c) == 0x03DC) || \
198: ((c) == 0x03DE) || \
199: ((c) == 0x03E0) || \
200: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
201: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
202: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
203: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
204: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
205: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
206: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
207: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
208: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
209: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
210: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
211: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
212: ((c) == 0x0559) || \
213: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
214: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
215: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
216: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
217: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
218: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
219: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
220: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
221: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
222: ((c) == 0x06D5) || \
223: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
224: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
225: ((c) == 0x093D) || \
226: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
227: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
228: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
229: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
230: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
231: ((c) == 0x09B2) || \
232: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
233: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
234: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
235: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
236: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
237: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
238: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
239: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
240: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
241: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
242: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
243: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
244: ((c) == 0x0A5E) || \
245: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
246: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
247: ((c) == 0x0A8D) || \
248: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
249: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
250: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
251: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
252: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
253: ((c) == 0x0ABD) || \
254: ((c) == 0x0AE0) || \
255: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
256: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
257: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
258: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
259: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
260: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
261: ((c) == 0x0B3D) || \
262: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
263: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
264: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
265: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
266: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
267: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
268: ((c) == 0x0B9C) || \
269: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
270: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
271: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
272: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
273: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
274: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
275: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
276: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
277: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
278: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
279: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
280: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
281: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
282: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
283: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
284: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
285: ((c) == 0x0CDE) || \
286: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
287: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
288: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
289: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
290: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
291: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
292: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
293: ((c) == 0x0E30) || \
294: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
295: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
296: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
297: ((c) == 0x0E84) || \
298: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
299: ((c) == 0x0E8A) || \
300: ((c) == 0x0E8D) || \
301: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
302: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
303: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
304: ((c) == 0x0EA5) || \
305: ((c) == 0x0EA7) || \
306: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
307: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
308: ((c) == 0x0EB0) || \
309: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
310: ((c) == 0x0EBD) || \
311: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
312: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
313: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
314: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
315: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
316: ((c) == 0x1100) || \
317: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
318: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
319: ((c) == 0x1109) || \
320: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
321: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
322: ((c) == 0x113C) || \
323: ((c) == 0x113E) || \
324: ((c) == 0x1140) || \
325: ((c) == 0x114C) || \
326: ((c) == 0x114E) || \
327: ((c) == 0x1150) || \
328: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
329: ((c) == 0x1159) || \
330: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
331: ((c) == 0x1163) || \
332: ((c) == 0x1165) || \
333: ((c) == 0x1167) || \
334: ((c) == 0x1169) || \
335: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
336: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
337: ((c) == 0x1175) || \
338: ((c) == 0x119E) || \
339: ((c) == 0x11A8) || \
340: ((c) == 0x11AB) || \
341: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
342: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
343: ((c) == 0x11BA) || \
344: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
345: ((c) == 0x11EB) || \
346: ((c) == 0x11F0) || \
347: ((c) == 0x11F9) || \
348: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
349: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
350: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
351: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
352: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
353: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
354: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
355: ((c) == 0x1F59) || \
356: ((c) == 0x1F5B) || \
357: ((c) == 0x1F5D) || \
358: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
359: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
360: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
361: ((c) == 0x1FBE) || \
362: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
363: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
364: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
365: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
366: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
367: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
368: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
369: ((c) == 0x2126) || \
370: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
371: ((c) == 0x212E) || \
372: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
373: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
374: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
375: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
376: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 377:
1.22 daniel 378: /*
379: * [88] Digit ::= ... long list see REC ...
380: */
1.30 daniel 381: #define IS_DIGIT(c) \
382: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
383: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
384: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
385: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
386: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
387: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
388: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
389: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
390: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
391: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
392: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
393: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
394: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
395: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
396: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 397:
1.22 daniel 398: /*
399: * [87] CombiningChar ::= ... long list see REC ...
400: */
1.30 daniel 401: #define IS_COMBINING(c) \
402: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
403: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
404: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
405: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
406: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
407: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
408: ((c) == 0x05BF) || \
409: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
410: ((c) == 0x05C4) || \
411: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
412: ((c) == 0x0670) || \
413: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
414: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
415: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
416: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
417: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
418: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
419: ((c) == 0x093C) || \
420: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
421: ((c) == 0x094D) || \
422: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
423: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
424: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
425: ((c) == 0x09BC) || \
426: ((c) == 0x09BE) || \
427: ((c) == 0x09BF) || \
428: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
429: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
430: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
431: ((c) == 0x09D7) || \
432: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
433: ((c) == 0x0A02) || \
434: ((c) == 0x0A3C) || \
435: ((c) == 0x0A3E) || \
436: ((c) == 0x0A3F) || \
437: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
438: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
439: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
440: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
441: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
442: ((c) == 0x0ABC) || \
443: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
444: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
445: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
446: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
447: ((c) == 0x0B3C) || \
448: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
449: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
450: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
451: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
452: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
453: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
454: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
455: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
456: ((c) == 0x0BD7) || \
457: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
458: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
459: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
460: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
461: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
462: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
463: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
464: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
465: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
466: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
467: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
468: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
469: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
470: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
471: ((c) == 0x0D57) || \
472: ((c) == 0x0E31) || \
473: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
474: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
475: ((c) == 0x0EB1) || \
476: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
477: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
478: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
479: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
480: ((c) == 0x0F35) || \
481: ((c) == 0x0F37) || \
482: ((c) == 0x0F39) || \
483: ((c) == 0x0F3E) || \
484: ((c) == 0x0F3F) || \
485: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
486: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
487: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
488: ((c) == 0x0F97) || \
489: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
490: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
491: ((c) == 0x0FB9) || \
492: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
493: ((c) == 0x20E1) || \
494: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
495: ((c) == 0x3099) || \
496: ((c) == 0x309A))
1.3 veillard 497:
1.22 daniel 498: /*
499: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
500: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
501: * [#x309D-#x309E] | [#x30FC-#x30FE]
502: */
1.3 veillard 503: #define IS_EXTENDER(c) \
504: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
505: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
506: ((c) == 0xec6) || ((c) == 0x3005) \
507: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
508: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 509: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 510:
1.22 daniel 511: /*
512: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
513: */
1.1 veillard 514: #define IS_IDEOGRAPHIC(c) \
515: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
516: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
517: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
518: ((c) == 0x3007))
519:
1.22 daniel 520: /*
521: * [84] Letter ::= BaseChar | Ideographic
522: */
1.1 veillard 523: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
524:
525: #else
1.30 daniel 526: /************************************************************************
527: * *
528: * 8bits / ASCII version of the macros. *
529: * *
530: ************************************************************************/
1.1 veillard 531: /*
1.22 daniel 532: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
533: * | [#x10000-#x10FFFF]
534: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 535: */
536: #define IS_CHAR(c) \
1.21 daniel 537: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
538: ((c) == 0xa))
1.1 veillard 539:
1.22 daniel 540: /*
541: * [85] BaseChar ::= ... long list see REC ...
542: */
1.1 veillard 543: #define IS_BASECHAR(c) \
544: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
545: (((c) >= 0x61) && ((c) <= 0x7a)) || \
546: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
547: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
548: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
549: (((c) >= 0xf8) && ((c) <= 0xff)) || \
550: ((c) == 0xba))
551:
1.22 daniel 552: /*
553: * [88] Digit ::= ... long list see REC ...
554: */
1.1 veillard 555: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
556:
1.22 daniel 557: /*
558: * [84] Letter ::= BaseChar | Ideographic
559: */
1.1 veillard 560: #define IS_LETTER(c) IS_BASECHAR(c)
561:
1.22 daniel 562:
563: /*
564: * [87] CombiningChar ::= ... long list see REC ...
565: */
1.1 veillard 566: #define IS_COMBINING(c) 0
567:
1.22 daniel 568: /*
569: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
570: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
571: * [#x309D-#x309E] | [#x30FC-#x30FE]
572: */
1.3 veillard 573: #define IS_EXTENDER(c) ((c) == 0xb7)
574:
1.21 daniel 575: #endif /* !UNICODE */
1.1 veillard 576:
1.22 daniel 577: /*
578: * Blank chars.
579: *
580: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
581: */
582: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
583: ((c) == 0x0D))
584:
585: /*
586: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
587: */
1.21 daniel 588: #define IS_PUBIDCHAR(c) \
589: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
590: (((c) >= 'a') && ((c) <= 'z')) || \
591: (((c) >= 'A') && ((c) <= 'Z')) || \
592: (((c) >= '0') && ((c) <= '9')) || \
593: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
594: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
595: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
596: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
597: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 598:
599: #define SKIP_EOL(p) \
600: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
601: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
602:
603: #define MOVETO_ENDTAG(p) \
1.39 daniel 604: while (IS_CHAR(*p) && (*(p) != '>')) (p)++
1.1 veillard 605:
606: #define MOVETO_STARTTAG(p) \
1.39 daniel 607: while (IS_CHAR(*p) && (*(p) != '<')) (p)++
1.1 veillard 608:
1.28 daniel 609: /************************************************************************
610: * *
611: * Commodity functions to handle CHARs *
612: * *
613: ************************************************************************/
614:
1.3 veillard 615: /*
1.45 ! daniel 616: * xmlStrndup : a strndup for array of CHAR's
1.1 veillard 617: */
618:
1.6 httpng 619: CHAR *xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 620: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
621:
622: if (ret == NULL) {
623: fprintf(stderr, "malloc of %d byte failed\n",
624: (len + 1) * sizeof(CHAR));
625: return(NULL);
626: }
627: memcpy(ret, cur, len * sizeof(CHAR));
628: ret[len] = 0;
629: return(ret);
630: }
631:
632: /*
633: * xmlStrdup : a strdup for CHAR's
634: */
635:
1.6 httpng 636: CHAR *xmlStrdup(const CHAR *cur) {
637: const CHAR *p = cur;
1.1 veillard 638:
639: while (IS_CHAR(*p)) p++;
640: return(xmlStrndup(cur, p - cur));
641: }
642:
643: /*
1.45 ! daniel 644: * xmlCharStrndup : a strndup for char's to CHAR's
! 645: */
! 646:
! 647: CHAR *xmlCharStrndup(const char *cur, int len) {
! 648: int i;
! 649: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
! 650:
! 651: if (ret == NULL) {
! 652: fprintf(stderr, "malloc of %d byte failed\n",
! 653: (len + 1) * sizeof(CHAR));
! 654: return(NULL);
! 655: }
! 656: for (i = 0;i < len;i++)
! 657: ret[i] = (CHAR) cur[i];
! 658: ret[len] = 0;
! 659: return(ret);
! 660: }
! 661:
! 662: /*
! 663: * xmlCharStrdup : a strdup for char's to CHAR's
! 664: */
! 665:
! 666: CHAR *xmlCharStrdup(const char *cur) {
! 667: const char *p = cur;
! 668:
! 669: while (*p != '\0') p++;
! 670: return(xmlCharStrndup(cur, p - cur));
! 671: }
! 672:
! 673: /*
1.14 veillard 674: * xmlStrcmp : a strcmp for CHAR's
675: */
676:
677: int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
678: register int tmp;
679:
680: do {
681: tmp = *str1++ - *str2++;
682: if (tmp != 0) return(tmp);
683: } while ((*str1 != 0) && (*str2 != 0));
684: return (*str1 - *str2);
685: }
686:
687: /*
688: * xmlStrncmp : a strncmp for CHAR's
689: */
690:
691: int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
692: register int tmp;
693:
694: if (len <= 0) return(0);
695: do {
696: tmp = *str1++ - *str2++;
697: if (tmp != 0) return(tmp);
698: len--;
699: if (len <= 0) return(0);
700: } while ((*str1 != 0) && (*str2 != 0));
701: return (*str1 - *str2);
702: }
703:
704: /*
705: * xmlStrchr : a strchr for CHAR's
706: */
707:
708: CHAR *xmlStrchr(const CHAR *str, CHAR val) {
709: while (*str != 0) {
710: if (*str == val) return((CHAR *) str);
711: str++;
712: }
713: return(NULL);
714: }
1.28 daniel 715:
1.45 ! daniel 716: /*
! 717: * xmlStrlen : lenght of a CHAR's string
! 718: */
! 719:
! 720: int xmlStrlen(const CHAR *str) {
! 721: int len = 0;
! 722:
! 723: if (str == NULL) return(0);
! 724: while (*str != 0) {
! 725: str++;
! 726: len++;
! 727: }
! 728: return(len);
! 729: }
! 730:
! 731: /*
! 732: * xmlStrncat : a strncat for array of CHAR's
! 733: */
! 734:
! 735: CHAR *xmlStrncat(CHAR *cur, const CHAR *add, int len) {
! 736: int size;
! 737: CHAR *ret;
! 738:
! 739: if ((add == NULL) || (len == 0))
! 740: return(cur);
! 741: if (cur == NULL)
! 742: return(xmlStrndup(add, len));
! 743:
! 744: size = xmlStrlen(cur);
! 745: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
! 746: if (ret == NULL) {
! 747: fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
! 748: (size + len + 1) * sizeof(CHAR));
! 749: return(cur);
! 750: }
! 751: memcpy(&ret[size], add, len * sizeof(CHAR));
! 752: ret[size + len] = 0;
! 753: return(ret);
! 754: }
! 755:
! 756: /*
! 757: * xmlStrcat : a strcat for CHAR's
! 758: */
! 759:
! 760: CHAR *xmlStrcat(CHAR *cur, const CHAR *add) {
! 761: const CHAR *p = add;
! 762:
! 763: if (add == NULL) return(cur);
! 764: if (cur == NULL)
! 765: return(xmlStrdup(add));
! 766:
! 767: while (IS_CHAR(*p)) p++;
! 768: return(xmlStrncat(cur, add, p - add));
! 769: }
! 770:
! 771: /************************************************************************
! 772: * *
! 773: * Commodity functions, cleanup needed ? *
! 774: * *
! 775: ************************************************************************/
! 776:
! 777: /*
! 778: * Is this a sequence of blank chars that one can ignore ?
! 779: */
! 780:
! 781: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
! 782: int i;
! 783: xmlNodePtr lastChild;
! 784:
! 785: for (i = 0;i < len;i++)
! 786: if (!(IS_BLANK(str[i]))) return(0);
! 787:
! 788: if (CUR != '<') return(0);
! 789: lastChild = xmlGetLastChild(ctxt->node);
! 790: if (lastChild == NULL) {
! 791: if (ctxt->node->content != NULL) return(0);
! 792: } else if (xmlNodeIsText(lastChild))
! 793: return(0);
! 794: return(1);
! 795: }
! 796:
! 797: /*
! 798: * Handling of defined entities, when should we define a new input
! 799: * stream ? When do we just handle that as a set of chars ?
! 800: */
! 801:
! 802: void xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
! 803: int len;
! 804:
! 805: if (entity->content == NULL) {
! 806: xmlParserError(ctxt, "xmlHandleEntity %s: content == NULL\n",
! 807: entity->name);
! 808: return;
! 809: }
! 810: len = xmlStrlen(entity->content);
! 811: if (len <= 2) goto handle_as_char;
! 812:
! 813: /*
! 814: * Redefine its content as an input stream.
! 815: */
! 816: xmlNewEntityInputStream(ctxt, entity);
! 817: return;
! 818:
! 819: handle_as_char:
! 820: /*
! 821: * Just handle the content as a set of chars.
! 822: */
! 823: if (ctxt->sax != NULL)
! 824: ctxt->sax->characters(ctxt, entity->content, 0, len);
! 825:
! 826: }
! 827:
! 828: /*
! 829: * Forward definition for recusive behaviour.
! 830: */
! 831: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
! 832:
1.28 daniel 833: /************************************************************************
834: * *
835: * Extra stuff for namespace support *
836: * Relates to http://www.w3.org/TR/WD-xml-names *
837: * *
838: ************************************************************************/
839:
840: /*
841: * xmlNamespaceParseNCName : parse an XML namespace name.
842: *
843: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
844: *
845: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
846: * CombiningChar | Extender
847: */
848:
849: CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
850: const CHAR *q;
851: CHAR *ret = NULL;
852:
1.40 daniel 853: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
854: q = NEXT;
1.28 daniel 855:
1.40 daniel 856: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
857: (CUR == '.') || (CUR == '-') ||
858: (CUR == '_') ||
859: (IS_COMBINING(CUR)) ||
860: (IS_EXTENDER(CUR)))
861: NEXT;
1.28 daniel 862:
1.40 daniel 863: ret = xmlStrndup(q, CUR_PTR - q);
1.28 daniel 864:
865: return(ret);
866: }
867:
868: /*
869: * xmlNamespaceParseQName : parse an XML qualified name
870: *
871: * [NS 5] QName ::= (Prefix ':')? LocalPart
872: *
873: * [NS 6] Prefix ::= NCName
874: *
875: * [NS 7] LocalPart ::= NCName
876: */
877:
878: CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
879: CHAR *ret = NULL;
880:
881: *prefix = NULL;
882: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 883: if (CUR == ':') {
1.28 daniel 884: *prefix = ret;
1.40 daniel 885: NEXT;
1.28 daniel 886: ret = xmlNamespaceParseNCName(ctxt);
887: }
888:
889: return(ret);
890: }
891:
892: /*
893: * xmlNamespaceParseNSDef : parse a namespace prefix declaration
894: *
895: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
896: *
897: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
898: */
899:
1.39 daniel 900: CHAR *xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 901: CHAR *name = NULL;
902:
1.40 daniel 903: if ((CUR == 'x') && (NXT(1) == 'm') &&
904: (NXT(2) == 'l') && (NXT(3) == 'n') &&
905: (NXT(4) == 's')) {
906: SKIP(5);
907: if (CUR == ':') {
908: NEXT;
1.28 daniel 909: name = xmlNamespaceParseNCName(ctxt);
910: }
911: }
1.39 daniel 912: return(name);
1.28 daniel 913: }
914:
1.45 ! daniel 915: /*
! 916: * [OLD] Parse and return a string between quotes or doublequotes
! 917: */
! 918: CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
! 919: CHAR *ret = NULL;
! 920: const CHAR *q;
! 921:
! 922: if (CUR == '"') {
! 923: NEXT;
! 924: q = CUR_PTR;
! 925: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
! 926: if (CUR != '"')
! 927: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
! 928: else {
! 929: ret = xmlStrndup(q, CUR_PTR - q);
! 930: NEXT;
! 931: }
! 932: } else if (CUR == '\''){
! 933: NEXT;
! 934: q = CUR_PTR;
! 935: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
! 936: if (CUR != '\'')
! 937: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
! 938: else {
! 939: ret = xmlStrndup(q, CUR_PTR - q);
! 940: NEXT;
! 941: }
! 942: }
! 943: return(ret);
! 944: }
! 945:
! 946: /*
! 947: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
! 948: *
! 949: * This is what the older xml-name Working Draft specified, a bunch of
! 950: * other stuff may still rely on it, so support is still here as
! 951: * if ot was declared on the root of the Tree:-(
! 952: */
! 953:
! 954: void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
! 955: CHAR *href = NULL;
! 956: CHAR *prefix = NULL;
! 957: int garbage = 0;
! 958:
! 959: /*
! 960: * We just skipped "namespace" or "xml:namespace"
! 961: */
! 962: SKIP_BLANKS;
! 963:
! 964: while (IS_CHAR(CUR) && (CUR != '>')) {
! 965: /*
! 966: * We can have "ns" or "prefix" attributes
! 967: * Old encoding as 'href' or 'AS' attributes is still supported
! 968: */
! 969: if ((CUR == 'n') && (NXT(1) == 's')) {
! 970: garbage = 0;
! 971: SKIP(2);
! 972: SKIP_BLANKS;
! 973:
! 974: if (CUR != '=') continue;
! 975: NEXT;
! 976: SKIP_BLANKS;
! 977:
! 978: href = xmlParseQuotedString(ctxt);
! 979: SKIP_BLANKS;
! 980: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
! 981: (NXT(2) == 'e') && (NXT(3) == 'f')) {
! 982: garbage = 0;
! 983: SKIP(4);
! 984: SKIP_BLANKS;
! 985:
! 986: if (CUR != '=') continue;
! 987: NEXT;
! 988: SKIP_BLANKS;
! 989:
! 990: href = xmlParseQuotedString(ctxt);
! 991: SKIP_BLANKS;
! 992: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
! 993: (NXT(2) == 'e') && (NXT(3) == 'f') &&
! 994: (NXT(4) == 'i') && (NXT(5) == 'x')) {
! 995: garbage = 0;
! 996: SKIP(6);
! 997: SKIP_BLANKS;
! 998:
! 999: if (CUR != '=') continue;
! 1000: NEXT;
! 1001: SKIP_BLANKS;
! 1002:
! 1003: prefix = xmlParseQuotedString(ctxt);
! 1004: SKIP_BLANKS;
! 1005: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
! 1006: garbage = 0;
! 1007: SKIP(2);
! 1008: SKIP_BLANKS;
! 1009:
! 1010: if (CUR != '=') continue;
! 1011: NEXT;
! 1012: SKIP_BLANKS;
! 1013:
! 1014: prefix = xmlParseQuotedString(ctxt);
! 1015: SKIP_BLANKS;
! 1016: } else if ((CUR == '?') && (NXT(1) == '>')) {
! 1017: garbage = 0;
! 1018: CUR_PTR ++;
! 1019: } else {
! 1020: /*
! 1021: * Found garbage when parsing the namespace
! 1022: */
! 1023: if (!garbage)
! 1024: xmlParserError(ctxt, "xmlParseNamespace found garbage\n");
! 1025: NEXT;
! 1026: }
! 1027: }
! 1028:
! 1029: MOVETO_ENDTAG(CUR_PTR);
! 1030: NEXT;
! 1031:
! 1032: /*
! 1033: * Register the DTD.
! 1034: */
! 1035: if (href != NULL)
! 1036: xmlNewGlobalNs(ctxt->doc, href, prefix);
! 1037:
! 1038: if (prefix != NULL) free(prefix);
! 1039: if (href != NULL) free(href);
! 1040: }
! 1041:
1.28 daniel 1042: /************************************************************************
1043: * *
1044: * The parser itself *
1045: * Relates to http://www.w3.org/TR/REC-xml *
1046: * *
1047: ************************************************************************/
1.14 veillard 1048:
1049: /*
1.1 veillard 1050: * xmlParseName : parse an XML name.
1.22 daniel 1051: *
1052: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1053: * CombiningChar | Extender
1054: *
1055: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1056: *
1057: * [6] Names ::= Name (S Name)*
1.1 veillard 1058: */
1059:
1.16 daniel 1060: CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 1061: const CHAR *q;
1062: CHAR *ret = NULL;
1.1 veillard 1063:
1.40 daniel 1064: if (!IS_LETTER(CUR) && (CUR != '_') &&
1065: (CUR != ':')) return(NULL);
1066: q = NEXT;
1067:
1068: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1069: (CUR == '.') || (CUR == '-') ||
1070: (CUR == '_') || (CUR == ':') ||
1071: (IS_COMBINING(CUR)) ||
1072: (IS_EXTENDER(CUR)))
1073: NEXT;
1.22 daniel 1074:
1.40 daniel 1075: ret = xmlStrndup(q, CUR_PTR - q);
1.22 daniel 1076:
1077: return(ret);
1078: }
1079:
1080: /*
1081: * xmlParseNmtoken : parse an XML Nmtoken.
1082: *
1083: * [7] Nmtoken ::= (NameChar)+
1084: *
1085: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1086: */
1087:
1088: CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1089: const CHAR *q;
1090: CHAR *ret = NULL;
1091:
1.40 daniel 1092: q = NEXT;
1.22 daniel 1093:
1.40 daniel 1094: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1095: (CUR == '.') || (CUR == '-') ||
1096: (CUR == '_') || (CUR == ':') ||
1097: (IS_COMBINING(CUR)) ||
1098: (IS_EXTENDER(CUR)))
1099: NEXT;
1.3 veillard 1100:
1.40 daniel 1101: ret = xmlStrndup(q, CUR_PTR - q);
1.1 veillard 1102:
1.3 veillard 1103: return(ret);
1.1 veillard 1104: }
1105:
1106: /*
1.24 daniel 1107: * xmlParseEntityValue : parse a value for ENTITY decl.
1108: *
1109: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1110: * "'" ([^%&'] | PEReference | Reference)* "'"
1111: */
1112:
1113: CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
1.45 ! daniel 1114: CHAR *ret = NULL;
1.24 daniel 1115: const CHAR *q;
1.39 daniel 1116: int needSubst = 0;
1117: int needParam = 0;
1.24 daniel 1118:
1.40 daniel 1119: if (CUR == '"') {
1120: NEXT;
1.24 daniel 1121:
1.40 daniel 1122: q = CUR_PTR;
1123: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1124: if (CUR == '%') {
1.39 daniel 1125: needParam = 1; /* TODO !!! */
1.40 daniel 1126: NEXT;
1127: } else if (CUR == '&') {
1.39 daniel 1128: needSubst = 1;
1.40 daniel 1129: NEXT;
1.24 daniel 1130: } else
1.40 daniel 1131: NEXT;
1.24 daniel 1132: }
1.40 daniel 1133: if (!IS_CHAR(CUR)) {
1.31 daniel 1134: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 1135: } else {
1.39 daniel 1136: if (needSubst) {
1.45 ! daniel 1137: /* TODO !!!!!!!!!!!!!!! */
1.39 daniel 1138: } else
1.40 daniel 1139: ret = xmlStrndup(q, CUR_PTR - q);
1140: NEXT;
1.24 daniel 1141: }
1.40 daniel 1142: } else if (CUR == '\'') {
1143: NEXT;
1144: q = CUR_PTR;
1145: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1146: if (CUR == '%') {
1.39 daniel 1147: needParam = 1; /* TODO !!! */
1.40 daniel 1148: NEXT;
1149: } else if (CUR == '&') {
1.39 daniel 1150: needSubst = 1;
1.40 daniel 1151: NEXT;
1.24 daniel 1152: } else
1.40 daniel 1153: NEXT;
1.24 daniel 1154: }
1.40 daniel 1155: if (!IS_CHAR(CUR)) {
1.31 daniel 1156: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 1157: } else {
1.39 daniel 1158: if (needSubst) {
1.45 ! daniel 1159: /* TODO !!!!!!!!!!!!!!! */
1.39 daniel 1160: } else
1.40 daniel 1161: ret = xmlStrndup(q, CUR_PTR - q);
1162: NEXT;
1.24 daniel 1163: }
1164: } else {
1.31 daniel 1165: xmlParserError(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.24 daniel 1166: }
1167:
1168: return(ret);
1169: }
1170:
1171: /*
1.29 daniel 1172: * xmlParseAttValue : parse a value for an attribute
1173: *
1174: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1175: * "'" ([^<&'] | Reference)* "'"
1176: */
1177:
1178: CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.45 ! daniel 1179: CHAR *ret = NULL;
1.29 daniel 1180: const CHAR *q;
1.36 daniel 1181: int needSubst = 0;
1.29 daniel 1182:
1.40 daniel 1183: if (CUR == '"') {
1184: NEXT;
1.29 daniel 1185:
1.40 daniel 1186: q = CUR_PTR;
1187: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1188: if (CUR == '&') {
1.36 daniel 1189: needSubst = 1;
1.40 daniel 1190: NEXT;
1.29 daniel 1191: } else
1.40 daniel 1192: NEXT;
1.29 daniel 1193: }
1.40 daniel 1194: if (!IS_CHAR(CUR)) {
1.31 daniel 1195: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 1196: } else {
1.39 daniel 1197: if (needSubst) {
1.45 ! daniel 1198: /* TODO !!!!!!!!!!!!!!! */
1.39 daniel 1199: } else
1.40 daniel 1200: ret = xmlStrndup(q, CUR_PTR - q);
1201: NEXT;
1.29 daniel 1202: }
1.40 daniel 1203: } else if (CUR == '\'') {
1204: NEXT;
1205: q = CUR_PTR;
1206: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1207: if (CUR == '&') {
1.36 daniel 1208: needSubst = 1;
1.40 daniel 1209: NEXT;
1.29 daniel 1210: } else
1.40 daniel 1211: NEXT;
1.29 daniel 1212: }
1.40 daniel 1213: if (!IS_CHAR(CUR)) {
1.31 daniel 1214: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 1215: } else {
1.39 daniel 1216: if (needSubst) {
1.45 ! daniel 1217: /* TODO !!!!!!!!!!!!!!! */
1.39 daniel 1218: } else
1.40 daniel 1219: ret = xmlStrndup(q, CUR_PTR - q);
1220: NEXT;
1.29 daniel 1221: }
1222: } else {
1.31 daniel 1223: xmlParserError(ctxt, "AttValue: \" or ' expected\n");
1.29 daniel 1224: }
1225:
1226: return(ret);
1227: }
1228:
1229: /*
1.21 daniel 1230: * xmlParseSystemLiteral : parse an XML Literal
1231: *
1.22 daniel 1232: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.21 daniel 1233: */
1234:
1235: CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1236: const CHAR *q;
1237: CHAR *ret = NULL;
1238:
1.40 daniel 1239: if (CUR == '"') {
1240: NEXT;
1241: q = CUR_PTR;
1242: while ((IS_CHAR(CUR)) && (CUR != '"'))
1243: NEXT;
1244: if (!IS_CHAR(CUR)) {
1.31 daniel 1245: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 1246: } else {
1.40 daniel 1247: ret = xmlStrndup(q, CUR_PTR - q);
1248: NEXT;
1.21 daniel 1249: }
1.40 daniel 1250: } else if (CUR == '\'') {
1251: NEXT;
1252: q = CUR_PTR;
1253: while ((IS_CHAR(CUR)) && (CUR != '\''))
1254: NEXT;
1255: if (!IS_CHAR(CUR)) {
1.31 daniel 1256: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 1257: } else {
1.40 daniel 1258: ret = xmlStrndup(q, CUR_PTR - q);
1259: NEXT;
1.21 daniel 1260: }
1261: } else {
1.31 daniel 1262: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 1263: }
1264:
1265: return(ret);
1266: }
1267:
1268: /*
1.27 daniel 1269: * xmlParsePubidLiteral: parse an XML public literal
1.21 daniel 1270: *
1.22 daniel 1271: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1.21 daniel 1272: */
1273:
1274: CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1275: const CHAR *q;
1276: CHAR *ret = NULL;
1277: /*
1278: * Name ::= (Letter | '_') (NameChar)*
1279: */
1.40 daniel 1280: if (CUR == '"') {
1281: NEXT;
1282: q = CUR_PTR;
1283: while (IS_PUBIDCHAR(CUR)) NEXT;
1284: if (CUR != '"') {
1.31 daniel 1285: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 1286: } else {
1.40 daniel 1287: ret = xmlStrndup(q, CUR_PTR - q);
1288: NEXT;
1.21 daniel 1289: }
1.40 daniel 1290: } else if (CUR == '\'') {
1291: NEXT;
1292: q = CUR_PTR;
1293: while ((IS_LETTER(CUR)) && (CUR != '\''))
1294: NEXT;
1295: if (!IS_LETTER(CUR)) {
1.31 daniel 1296: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 1297: } else {
1.40 daniel 1298: ret = xmlStrndup(q, CUR_PTR - q);
1299: NEXT;
1.21 daniel 1300: }
1301: } else {
1.31 daniel 1302: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 1303: }
1304:
1305: return(ret);
1306: }
1307:
1308: /*
1.27 daniel 1309: * xmlParseCharData: parse a CharData section.
1310: * if we are within a CDATA section ']]>' marks an end of section.
1311: *
1312: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1313: */
1314:
1.45 ! daniel 1315: void xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.27 daniel 1316: const CHAR *q;
1317:
1.40 daniel 1318: q = CUR_PTR;
1319: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1320: (CUR != '&')) {
1321: NEXT;
1322: if ((cdata) && (CUR == ']') && (NXT(1) == ']') &&
1323: (NXT(2) == '>')) break;
1.27 daniel 1324: }
1.45 ! daniel 1325: if (q == CUR_PTR) return;
! 1326:
! 1327: /*
! 1328: * Ok the segment [q CUR_PTR] is to be consumed as chars.
! 1329: */
! 1330: if (ctxt->sax != NULL) {
! 1331: if (areBlanks(ctxt, q, CUR_PTR - q))
! 1332: ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
! 1333: else
! 1334: ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
! 1335: }
1.27 daniel 1336: }
1337:
1338: /*
1.22 daniel 1339: * xmlParseExternalID: Parse an External ID
1340: *
1341: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1342: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1343: */
1344:
1.39 daniel 1345: CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
1346: CHAR *URI = NULL;
1.22 daniel 1347:
1.40 daniel 1348: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1349: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1350: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1351: SKIP(6);
1.42 daniel 1352: SKIP_BLANKS;
1.39 daniel 1353: URI = xmlParseSystemLiteral(ctxt);
1354: if (URI == NULL)
1.31 daniel 1355: xmlParserError(ctxt,
1.39 daniel 1356: "xmlParseExternalID: SYSTEM, no URI\n");
1.40 daniel 1357: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1358: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1359: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1360: SKIP(6);
1.42 daniel 1361: SKIP_BLANKS;
1.39 daniel 1362: *publicID = xmlParsePubidLiteral(ctxt);
1363: if (*publicID == NULL)
1.31 daniel 1364: xmlParserError(ctxt,
1.39 daniel 1365: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.42 daniel 1366: SKIP_BLANKS;
1.39 daniel 1367: URI = xmlParseSystemLiteral(ctxt);
1368: if (URI == NULL)
1.31 daniel 1369: xmlParserError(ctxt,
1.39 daniel 1370: "xmlParseExternalID: PUBLIC, no URI\n");
1.22 daniel 1371: }
1.39 daniel 1372: return(URI);
1.22 daniel 1373: }
1374:
1375: /*
1.3 veillard 1376: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1377: * This may or may not create a node (depending on the context)
1.38 daniel 1378: * The spec says that "For compatibility, the string "--" (double-hyphen)
1379: * must not occur within comments. "
1.22 daniel 1380: *
1381: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 1382: */
1.31 daniel 1383: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1384: xmlNodePtr ret = NULL;
1.17 daniel 1385: const CHAR *q, *start;
1386: const CHAR *r;
1.39 daniel 1387: CHAR *val;
1.3 veillard 1388:
1389: /*
1.22 daniel 1390: * Check that there is a comment right here.
1.3 veillard 1391: */
1.40 daniel 1392: if ((CUR != '<') || (NXT(1) != '!') ||
1393: (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1.3 veillard 1394:
1.40 daniel 1395: SKIP(4);
1396: start = q = CUR_PTR;
1397: NEXT;
1398: r = CUR_PTR;
1399: NEXT;
1400: while (IS_CHAR(CUR) &&
1401: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 1402: (*r != '-') || (*q != '-'))) {
1.38 daniel 1403: if ((*r == '-') && (*q == '-'))
1404: xmlParserError(ctxt,
1405: "Comment must not contain '--' (double-hyphen)`\n");
1.40 daniel 1406: NEXT;r++;q++;
1.3 veillard 1407: }
1.40 daniel 1408: if (!IS_CHAR(CUR)) {
1.31 daniel 1409: xmlParserError(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.3 veillard 1410: } else {
1.40 daniel 1411: NEXT;
1.31 daniel 1412: if (create) {
1.39 daniel 1413: val = xmlStrndup(start, q - start);
1414: ret = xmlNewComment(val);
1415: free(val);
1.31 daniel 1416: }
1.3 veillard 1417: }
1.39 daniel 1418: return(ret);
1.3 veillard 1419: }
1420:
1421: /*
1.22 daniel 1422: * xmlParsePITarget: parse the name of a PI
1423: *
1424: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1425: */
1426:
1427: CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1428: CHAR *name;
1429:
1430: name = xmlParseName(ctxt);
1431: if ((name != NULL) && (name[3] == 0) &&
1432: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1433: ((name[1] == 'm') || (name[1] == 'M')) &&
1434: ((name[2] == 'l') || (name[2] == 'L'))) {
1435: xmlParserError(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1436: return(NULL);
1437: }
1438: return(name);
1439: }
1440:
1441: /*
1.3 veillard 1442: * xmlParsePI: parse an XML Processing Instruction.
1.22 daniel 1443: *
1444: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.3 veillard 1445: */
1446:
1.16 daniel 1447: void xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1448: CHAR *target;
1449:
1.40 daniel 1450: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 1451: /*
1452: * this is a Processing Instruction.
1453: */
1.40 daniel 1454: SKIP(2);
1.3 veillard 1455:
1456: /*
1.22 daniel 1457: * Parse the target name and check for special support like
1458: * namespace.
1459: *
1460: * TODO : PI handling should be dynamically redefinable using an
1461: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1462: */
1.22 daniel 1463: target = xmlParsePITarget(ctxt);
1464: if (target != NULL) {
1465: /*
1.44 daniel 1466: * Support for the old Processing Instruction related to namespace.
1.22 daniel 1467: */
1468: if ((target[0] == 'n') && (target[1] == 'a') &&
1469: (target[2] == 'm') && (target[3] == 'e') &&
1470: (target[4] == 's') && (target[5] == 'p') &&
1471: (target[6] == 'a') && (target[7] == 'c') &&
1472: (target[8] == 'e')) {
1473: xmlParseNamespace(ctxt);
1474: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1475: (target[2] == 'l') && (target[3] == ':') &&
1476: (target[4] == 'n') && (target[5] == 'a') &&
1477: (target[6] == 'm') && (target[7] == 'e') &&
1478: (target[8] == 's') && (target[9] == 'p') &&
1479: (target[10] == 'a') && (target[11] == 'c') &&
1480: (target[12] == 'e')) {
1481: xmlParseNamespace(ctxt);
1482: } else {
1.44 daniel 1483: const CHAR *q = CUR_PTR;
1484:
1.40 daniel 1485: while (IS_CHAR(CUR) &&
1486: ((CUR != '?') || (NXT(1) != '>')))
1487: NEXT;
1488: if (!IS_CHAR(CUR)) {
1.31 daniel 1489: xmlParserError(ctxt, "xmlParsePI: PI %s never end ...\n",
1490: target);
1.44 daniel 1491: } else {
1492: CHAR *data;
1493:
1494: data = xmlStrndup(CUR_PTR, CUR_PTR - q);
1.40 daniel 1495: SKIP(2);
1.44 daniel 1496:
1497: /*
1498: * SAX: PI detected.
1499: */
1500: if (ctxt->sax)
1501: ctxt->sax->processingInstruction(ctxt, target, data);
1502: /*
1503: * Unknown PI, ignore it !
1504: */
1505: else
1506: xmlParserWarning(ctxt,
1507: "xmlParsePI : skipping unknown PI %s\n",
1508: target);
1509: free(data);
1510: }
1.22 daniel 1511: }
1.39 daniel 1512: free(target);
1.3 veillard 1513: } else {
1.31 daniel 1514: xmlParserError(ctxt, "xmlParsePI : no target name\n");
1.22 daniel 1515: /********* Should we try to complete parsing the PI ???
1.40 daniel 1516: while (IS_CHAR(CUR) &&
1517: (CUR != '?') && (CUR != '>'))
1518: NEXT;
1519: if (!IS_CHAR(CUR)) {
1.22 daniel 1520: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1521: target);
1522: }
1523: ********************************************************/
1524: }
1525: }
1526: }
1527:
1528: /*
1529: * xmlParseNotationDecl: parse a notation declaration
1530: *
1531: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1532: *
1533: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1534: *
1535: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1536: * 'PUBLIC' S PubidLiteral S SystemLiteral
1537: *
1538: * Hence there is actually 3 choices:
1539: * 'PUBLIC' S PubidLiteral
1540: * 'PUBLIC' S PubidLiteral S SystemLiteral
1541: * and 'SYSTEM' S SystemLiteral
1542: */
1543:
1544: void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1545: CHAR *name;
1546:
1.40 daniel 1547: if ((CUR == '<') && (NXT(1) == '!') &&
1548: (NXT(2) == 'N') && (NXT(3) == 'O') &&
1549: (NXT(4) == 'T') && (NXT(5) == 'A') &&
1550: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1551: (NXT(8) == 'O') && (NXT(9) == 'N') &&
1552: (IS_BLANK(NXT(10)))) {
1553: SKIP(10);
1.42 daniel 1554: SKIP_BLANKS;
1.22 daniel 1555:
1556: name = xmlParseName(ctxt);
1557: if (name == NULL) {
1.31 daniel 1558: xmlParserError(ctxt,
1559: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1560: return;
1561: }
1.42 daniel 1562: SKIP_BLANKS;
1.22 daniel 1563: /*
1.31 daniel 1564: * TODO !!!
1.22 daniel 1565: */
1.40 daniel 1566: while ((IS_CHAR(CUR)) && (CUR != '>'))
1567: NEXT;
1.22 daniel 1568: free(name);
1569: }
1570: }
1571:
1572: /*
1573: * xmlParseEntityDecl: parse <!ENTITY declarations
1574: *
1575: * [70] EntityDecl ::= GEDecl | PEDecl
1576: *
1577: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1578: *
1579: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1580: *
1581: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1582: *
1583: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1584: *
1585: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1586: */
1587:
1588: void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 1589: CHAR *name = NULL;
1.24 daniel 1590: CHAR *value = NULL;
1.39 daniel 1591: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 1592: CHAR *ndata = NULL;
1.39 daniel 1593: int isParameter = 0;
1.22 daniel 1594:
1.40 daniel 1595: if ((CUR == '<') && (NXT(1) == '!') &&
1596: (NXT(2) == 'E') && (NXT(3) == 'N') &&
1597: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1598: (NXT(6) == 'T') && (NXT(7) == 'Y') &&
1599: (IS_BLANK(NXT(8)))) {
1600: SKIP(8);
1.42 daniel 1601: SKIP_BLANKS;
1.40 daniel 1602:
1603: if (CUR == '%') {
1604: NEXT;
1.42 daniel 1605: SKIP_BLANKS;
1.39 daniel 1606: isParameter = 1;
1.22 daniel 1607: }
1608:
1609: name = xmlParseName(ctxt);
1.24 daniel 1610: if (name == NULL) {
1.31 daniel 1611: xmlParserError(ctxt, "xmlParseEntityDecl: no name\n");
1.24 daniel 1612: return;
1613: }
1.42 daniel 1614: SKIP_BLANKS;
1.24 daniel 1615:
1.22 daniel 1616: /*
1.24 daniel 1617: * TODO handle the various case of definitions...
1.22 daniel 1618: */
1.39 daniel 1619: if (isParameter) {
1.40 daniel 1620: if ((CUR == '"') || (CUR == '\''))
1.24 daniel 1621: value = xmlParseEntityValue(ctxt);
1.39 daniel 1622: if (value) {
1623: xmlAddDocEntity(ctxt->doc, name,
1624: XML_INTERNAL_PARAMETER_ENTITY,
1625: NULL, NULL, value);
1626: }
1.24 daniel 1627: else {
1.39 daniel 1628: URI = xmlParseExternalID(ctxt, &literal);
1629: if (URI) {
1630: xmlAddDocEntity(ctxt->doc, name,
1631: XML_EXTERNAL_PARAMETER_ENTITY,
1632: literal, URI, NULL);
1633: }
1.24 daniel 1634: }
1635: } else {
1.40 daniel 1636: if ((CUR == '"') || (CUR == '\'')) {
1.24 daniel 1637: value = xmlParseEntityValue(ctxt);
1.39 daniel 1638: xmlAddDocEntity(ctxt->doc, name,
1639: XML_INTERNAL_GENERAL_ENTITY,
1640: NULL, NULL, value);
1641: } else {
1642: URI = xmlParseExternalID(ctxt, &literal);
1.42 daniel 1643: SKIP_BLANKS;
1.40 daniel 1644: if ((CUR == 'N') && (NXT(1) == 'D') &&
1645: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1646: (NXT(4) == 'A')) {
1647: SKIP(5);
1.42 daniel 1648: SKIP_BLANKS;
1.24 daniel 1649: ndata = xmlParseName(ctxt);
1.39 daniel 1650: xmlAddDocEntity(ctxt->doc, name,
1651: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
1652: literal, URI, ndata);
1653: } else {
1654: xmlAddDocEntity(ctxt->doc, name,
1655: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
1656: literal, URI, NULL);
1.24 daniel 1657: }
1658: }
1659: }
1.42 daniel 1660: SKIP_BLANKS;
1.40 daniel 1661: if (CUR != '>') {
1.31 daniel 1662: xmlParserError(ctxt,
1663: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.24 daniel 1664: } else
1.40 daniel 1665: NEXT;
1.39 daniel 1666: if (name != NULL) free(name);
1667: if (value != NULL) free(value);
1668: if (URI != NULL) free(URI);
1669: if (literal != NULL) free(literal);
1670: if (ndata != NULL) free(ndata);
1.22 daniel 1671: }
1672: }
1673:
1674: /*
1675: * xmlParseEnumeratedType: parse and Enumerated attribute type.
1676: *
1677: * [57] EnumeratedType ::= NotationType | Enumeration
1678: *
1679: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1680: *
1681: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1682: */
1683:
1684: void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1685: /*
1686: * TODO !!!
1687: */
1.40 daniel 1688: while ((IS_CHAR(CUR)) && (CUR != '>'))
1689: NEXT;
1.22 daniel 1690: }
1691:
1692: /*
1693: * xmlParseAttributeType: parse the Attribute list def for an element
1694: *
1695: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1696: *
1697: * [55] StringType ::= 'CDATA'
1698: *
1699: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1700: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1701: */
1702: void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.45 ! daniel 1703: /* TODO !!! */
1.40 daniel 1704: if ((CUR == 'C') && (NXT(1) == 'D') &&
1705: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1706: (NXT(4) == 'A')) {
1707: SKIP(5);
1708: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
1709: SKIP(2);
1710: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1711: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1712: (NXT(4) == 'F')) {
1713: SKIP(5);
1714: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1715: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1716: (NXT(4) == 'F') && (NXT(5) == 'S')) {
1717: SKIP(6);
1718: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1719: (NXT(2) == 'T') && (NXT(3) == 'I') &&
1720: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
1721: SKIP(6);
1722: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1723: (NXT(2) == 'T') && (NXT(3) == 'I') &&
1724: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1725: (NXT(6) == 'E') && (NXT(7) == 'S')) {
1726: SKIP(8);
1727: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1728: (NXT(2) == 'T') && (NXT(3) == 'O') &&
1729: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1730: (NXT(6) == 'N')) {
1731: SKIP(7);
1732: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1733: (NXT(2) == 'T') && (NXT(3) == 'O') &&
1734: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1735: (NXT(6) == 'N') && (NXT(7) == 'S')) {
1.22 daniel 1736: } else {
1737: xmlParseEnumeratedType(ctxt, name);
1738: }
1739: }
1740:
1741: /*
1742: * xmlParseAttributeListDecl: parse the Attribute list def for an element
1743: *
1744: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1745: *
1746: * [53] AttDef ::= S Name S AttType S DefaultDecl
1747: */
1748: void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1749: CHAR *name;
1750:
1.45 ! daniel 1751: /* TODO !!! */
1.40 daniel 1752: if ((CUR == '<') && (NXT(1) == '!') &&
1753: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1754: (NXT(4) == 'T') && (NXT(5) == 'L') &&
1755: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1756: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
1757: SKIP(9);
1.42 daniel 1758: SKIP_BLANKS;
1.22 daniel 1759: name = xmlParseName(ctxt);
1760: if (name == NULL) {
1.31 daniel 1761: xmlParserError(ctxt,
1762: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1763: return;
1764: }
1.42 daniel 1765: SKIP_BLANKS;
1.40 daniel 1766: while (CUR != '>') {
1767: const CHAR *check = CUR_PTR;
1.22 daniel 1768:
1769: xmlParseAttributeType(ctxt, name);
1.42 daniel 1770: SKIP_BLANKS;
1.40 daniel 1771: if (check == CUR_PTR) {
1.31 daniel 1772: xmlParserError(ctxt,
1773: "xmlParseAttributeListDecl: detected error\n");
1.22 daniel 1774: break;
1775: }
1776: }
1.40 daniel 1777: if (CUR == '>')
1778: NEXT;
1.22 daniel 1779:
1780: free(name);
1781: }
1782: }
1783:
1784: /*
1785: * xmlParseElementContentDecl: parse the declaration for an Element content
1786: * either Mixed or Children, the cases EMPTY and ANY being handled
1787: * int xmlParseElementDecl.
1788: *
1789: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
1790: *
1791: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1792: *
1793: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
1794: *
1795: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
1796: *
1797: * or
1798: *
1799: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
1800: * '(' S? '#PCDATA' S? ')'
1801: */
1802:
1803: void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1804: /*
1805: * TODO This has to be parsed correctly, currently we just skip until
1806: * we reach the first '>'.
1.31 daniel 1807: * !!!
1.22 daniel 1808: */
1.40 daniel 1809: while ((IS_CHAR(CUR)) && (CUR != '>'))
1810: NEXT;
1.22 daniel 1811: }
1812:
1813: /*
1814: * xmlParseElementDecl: parse an Element declaration.
1815: *
1816: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1817: *
1818: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1819: *
1820: * TODO There is a check [ VC: Unique Element Type Declaration ]
1821: */
1822: void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1823: CHAR *name;
1824:
1.40 daniel 1825: if ((CUR == '<') && (NXT(1) == '!') &&
1826: (NXT(2) == 'E') && (NXT(3) == 'L') &&
1827: (NXT(4) == 'E') && (NXT(5) == 'M') &&
1828: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1829: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
1830: SKIP(9);
1.42 daniel 1831: SKIP_BLANKS;
1.22 daniel 1832: name = xmlParseName(ctxt);
1833: if (name == NULL) {
1.31 daniel 1834: xmlParserError(ctxt, "xmlParseElementDecl: no name for Element\n");
1.22 daniel 1835: return;
1836: }
1.42 daniel 1837: SKIP_BLANKS;
1.40 daniel 1838: if ((CUR == 'E') && (NXT(1) == 'M') &&
1839: (NXT(2) == 'P') && (NXT(3) == 'T') &&
1840: (NXT(4) == 'Y')) {
1841: SKIP(5);
1.22 daniel 1842: /*
1843: * Element must always be empty.
1844: */
1.40 daniel 1845: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
1846: (NXT(2) == 'Y')) {
1847: SKIP(3);
1.22 daniel 1848: /*
1849: * Element is a generic container.
1850: */
1851: } else {
1852: xmlParseElementContentDecl(ctxt, name);
1853: }
1.42 daniel 1854: SKIP_BLANKS;
1.40 daniel 1855: if (CUR != '>') {
1.31 daniel 1856: xmlParserError(ctxt,
1857: "xmlParseElementDecl: expected '>' at the end\n");
1.22 daniel 1858: } else
1.40 daniel 1859: NEXT;
1.22 daniel 1860: }
1861: }
1862:
1863: /*
1864: * xmlParseMarkupDecl: parse Markup declarations
1865: *
1866: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
1867: * NotationDecl | PI | Comment
1868: *
1869: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
1870: */
1871: void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1872: xmlParseElementDecl(ctxt);
1873: xmlParseAttributeListDecl(ctxt);
1874: xmlParseEntityDecl(ctxt);
1875: xmlParseNotationDecl(ctxt);
1876: xmlParsePI(ctxt);
1.31 daniel 1877: xmlParseComment(ctxt, 0);
1.22 daniel 1878: }
1879:
1880: /*
1.24 daniel 1881: * xmlParseCharRef: parse Reference declarations
1882: *
1883: * [66] CharRef ::= '&#' [0-9]+ ';' |
1884: * '&#x' [0-9a-fA-F]+ ';'
1885: */
1.45 ! daniel 1886: void xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 1887: int val = 0;
1.44 daniel 1888: CHAR buf[2];
1.24 daniel 1889:
1.40 daniel 1890: if ((CUR == '&') && (NXT(1) == '#') &&
1891: (NXT(2) == 'x')) {
1892: SKIP(3);
1893: while (CUR != ';') {
1894: if ((CUR >= '0') && (CUR <= '9'))
1895: val = val * 16 + (CUR - '0');
1896: else if ((CUR >= 'a') && (CUR <= 'f'))
1897: val = val * 16 + (CUR - 'a') + 10;
1898: else if ((CUR >= 'A') && (CUR <= 'F'))
1899: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 1900: else {
1.31 daniel 1901: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1902: val = 0;
1.24 daniel 1903: break;
1904: }
1905: }
1.40 daniel 1906: if (CUR != ';')
1907: NEXT;
1908: } else if ((CUR == '&') && (NXT(1) == '#')) {
1909: SKIP(2);
1910: while (CUR != ';') {
1911: if ((CUR >= '0') && (CUR <= '9'))
1912: val = val * 16 + (CUR - '0');
1.24 daniel 1913: else {
1.31 daniel 1914: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 1915: val = 0;
1.24 daniel 1916: break;
1917: }
1918: }
1.40 daniel 1919: if (CUR != ';')
1920: NEXT;
1.24 daniel 1921: } else {
1.31 daniel 1922: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.24 daniel 1923: }
1.29 daniel 1924: /*
1925: * Check the value IS_CHAR ...
1926: */
1.44 daniel 1927: if (IS_CHAR(val)) {
1928: buf[0] = (CHAR) val;
1929: buf[1] = 0;
1.45 ! daniel 1930: if (ctxt->sax != NULL)
! 1931: ctxt->sax->characters(ctxt, buf, 0, 1);
1.44 daniel 1932: } else {
1.39 daniel 1933: xmlParserError(ctxt, "xmlParseCharRef: invalid value");
1.29 daniel 1934: }
1.24 daniel 1935: }
1936:
1937: /*
1938: * xmlParseEntityRef: parse ENTITY references declarations
1939: *
1940: * [68] EntityRef ::= '&' Name ';'
1941: */
1.45 ! daniel 1942: void xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.24 daniel 1943: CHAR *name;
1.44 daniel 1944: xmlEntityPtr entity;
1.24 daniel 1945:
1.40 daniel 1946: if (CUR == '&') {
1947: NEXT;
1.24 daniel 1948: name = xmlParseName(ctxt);
1949: if (name == NULL) {
1.31 daniel 1950: xmlParserError(ctxt, "xmlParseEntityRef: no name\n");
1.24 daniel 1951: } else {
1.40 daniel 1952: if (CUR == ';') {
1953: NEXT;
1.45 ! daniel 1954: entity = xmlGetDocEntity(ctxt->doc, name);
! 1955: if (entity == NULL) {
! 1956: xmlParserWarning(ctxt,
! 1957: "xmlParseEntityRef: &%s; not found\n", name);
! 1958: }
1.24 daniel 1959: /*
1.45 ! daniel 1960: * If we can get the content, push the entity content
! 1961: * as the next input stream.
1.24 daniel 1962: */
1.45 ! daniel 1963: else {
! 1964: switch (entity->type) {
! 1965: case XML_INTERNAL_PARAMETER_ENTITY:
! 1966: case XML_EXTERNAL_PARAMETER_ENTITY:
! 1967: xmlParserError(ctxt,
! 1968: "internal: xmlGetDtdEntity returned a general entity\n");
! 1969: break;
! 1970: case XML_INTERNAL_GENERAL_ENTITY:
! 1971: xmlHandleEntity(ctxt, entity);
! 1972: break;
! 1973: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
! 1974: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
! 1975: xmlParserWarning(ctxt,
! 1976: "xmlParseEntityRef: external entity &%s; not supported\n",
! 1977: name);
! 1978: break;
! 1979: default:
! 1980: xmlParserError(ctxt,
! 1981: "internal: xmlParseEntityRef: unknown entity type %d\n",
! 1982: entity->type);
! 1983: }
1.44 daniel 1984: }
1.24 daniel 1985: } else {
1.31 daniel 1986: xmlParserError(ctxt, "xmlParseEntityRef: expecting ';'\n");
1.24 daniel 1987: }
1.45 ! daniel 1988: free(name);
1.24 daniel 1989: }
1990: }
1991: }
1992:
1993: /*
1994: * xmlParseReference: parse Reference declarations
1995: *
1996: * [67] Reference ::= EntityRef | CharRef
1997: */
1.45 ! daniel 1998: void xmlParseReference(xmlParserCtxtPtr ctxt) {
1.44 daniel 1999: if ((CUR == '&') && (NXT(1) == '#')) {
1.45 ! daniel 2000: xmlParseCharRef(ctxt);
1.44 daniel 2001: } else if (CUR == '&') {
1.45 ! daniel 2002: xmlParseEntityRef(ctxt);
1.24 daniel 2003: }
1.45 ! daniel 2004: return;
1.24 daniel 2005: }
2006:
2007: /*
1.22 daniel 2008: * xmlParsePEReference: parse PEReference declarations
2009: *
2010: * [69] PEReference ::= '%' Name ';'
2011: */
1.45 ! daniel 2012: void xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 2013: CHAR *name;
1.45 ! daniel 2014: xmlEntityPtr entity;
1.22 daniel 2015:
1.40 daniel 2016: if (CUR == '%') {
2017: NEXT;
1.22 daniel 2018: name = xmlParseName(ctxt);
2019: if (name == NULL) {
1.31 daniel 2020: xmlParserError(ctxt, "xmlParsePEReference: no name\n");
1.22 daniel 2021: } else {
1.40 daniel 2022: if (CUR == ';') {
2023: NEXT;
1.45 ! daniel 2024: entity = xmlGetDtdEntity(ctxt->doc, name);
! 2025: if (entity == NULL) {
! 2026: xmlParserWarning(ctxt,
! 2027: "xmlParsePEReference: %%%s; not found\n");
! 2028: }
1.22 daniel 2029: /*
1.45 ! daniel 2030: * If we can get the content, push the entity content
! 2031: * as the next input stream.
1.22 daniel 2032: */
1.45 ! daniel 2033: else {
! 2034: switch (entity->type) {
! 2035: case XML_INTERNAL_PARAMETER_ENTITY:
! 2036: xmlNewEntityInputStream(ctxt, entity);
! 2037: break;
! 2038: case XML_EXTERNAL_PARAMETER_ENTITY:
! 2039: xmlParserWarning(ctxt,
! 2040: "xmlParsePEReference: external entity %%%s; not supported\n");
! 2041: break;
! 2042: case XML_INTERNAL_GENERAL_ENTITY:
! 2043: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
! 2044: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
! 2045: xmlParserError(ctxt,
! 2046: "internal: xmlGetDtdEntity returned a general entity\n");
! 2047: break;
! 2048: default:
! 2049: xmlParserError(ctxt,
! 2050: "internal: xmlParsePEReference: unknown entity type %d\n",
! 2051: entity->type);
! 2052: }
! 2053: }
1.22 daniel 2054: } else {
1.31 daniel 2055: xmlParserError(ctxt, "xmlParsePEReference: expecting ';'\n");
1.22 daniel 2056: }
1.45 ! daniel 2057: free(name);
1.3 veillard 2058: }
2059: }
2060: }
2061:
2062: /*
1.21 daniel 2063: * xmlParseDocTypeDecl : parse a DOCTYPE declaration
2064: *
1.22 daniel 2065: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
2066: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 2067: */
2068:
2069: void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.36 daniel 2070: xmlDtdPtr dtd;
1.21 daniel 2071: CHAR *name;
2072: CHAR *ExternalID = NULL;
1.39 daniel 2073: CHAR *URI = NULL;
1.21 daniel 2074:
2075: /*
2076: * We know that '<!DOCTYPE' has been detected.
2077: */
1.40 daniel 2078: SKIP(9);
1.21 daniel 2079:
1.42 daniel 2080: SKIP_BLANKS;
1.21 daniel 2081:
2082: /*
2083: * Parse the DOCTYPE name.
2084: */
2085: name = xmlParseName(ctxt);
2086: if (name == NULL) {
1.31 daniel 2087: xmlParserError(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.21 daniel 2088: }
2089:
1.42 daniel 2090: SKIP_BLANKS;
1.21 daniel 2091:
2092: /*
1.22 daniel 2093: * Check for SystemID and ExternalID
2094: */
1.39 daniel 2095: URI = xmlParseExternalID(ctxt, &ExternalID);
1.42 daniel 2096: SKIP_BLANKS;
1.36 daniel 2097:
1.39 daniel 2098: dtd = xmlNewDtd(ctxt->doc, name, ExternalID, URI);
1.22 daniel 2099:
2100: /*
2101: * Is there any DTD definition ?
2102: */
1.40 daniel 2103: if (CUR == '[') {
2104: NEXT;
1.22 daniel 2105: /*
2106: * Parse the succession of Markup declarations and
2107: * PEReferences.
2108: * Subsequence (markupdecl | PEReference | S)*
2109: */
1.40 daniel 2110: while (CUR != ']') {
2111: const CHAR *check = CUR_PTR;
1.22 daniel 2112:
1.42 daniel 2113: SKIP_BLANKS;
1.22 daniel 2114: xmlParseMarkupDecl(ctxt);
2115: xmlParsePEReference(ctxt);
2116:
1.40 daniel 2117: if (CUR_PTR == check) {
1.31 daniel 2118: xmlParserError(ctxt,
2119: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.22 daniel 2120: break;
2121: }
2122: }
1.40 daniel 2123: if (CUR == ']') NEXT;
1.22 daniel 2124: }
2125:
2126: /*
2127: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 2128: */
1.40 daniel 2129: if (CUR != '>') {
1.31 daniel 2130: xmlParserError(ctxt, "DOCTYPE unproperly terminated\n");
1.22 daniel 2131: /* We shouldn't try to resynchronize ... */
1.21 daniel 2132: }
1.40 daniel 2133: NEXT;
1.22 daniel 2134:
2135: /*
2136: * Cleanup, since we don't use all those identifiers
2137: * TODO : the DOCTYPE if available should be stored !
2138: */
1.39 daniel 2139: if (URI != NULL) free(URI);
1.22 daniel 2140: if (ExternalID != NULL) free(ExternalID);
2141: if (name != NULL) free(name);
1.21 daniel 2142: }
2143:
2144: /*
1.3 veillard 2145: * xmlParseAttribute: parse a start of tag.
2146: *
1.22 daniel 2147: * [41] Attribute ::= Name Eq AttValue
2148: *
2149: * [25] Eq ::= S? '=' S?
2150: *
1.29 daniel 2151: * With namespace:
2152: *
2153: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 2154: *
2155: * Also the case QName == xmlns:??? is handled independently as a namespace
2156: * definition.
1.3 veillard 2157: */
2158:
1.16 daniel 2159: void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 2160: CHAR *name, *value = NULL;
1.29 daniel 2161: CHAR *ns;
1.3 veillard 2162:
1.29 daniel 2163: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 2164: if (name == NULL) {
1.31 daniel 2165: xmlParserError(ctxt, "error parsing attribute name\n");
1.29 daniel 2166: return;
1.3 veillard 2167: }
2168:
2169: /*
1.29 daniel 2170: * read the value
1.3 veillard 2171: */
1.42 daniel 2172: SKIP_BLANKS;
1.40 daniel 2173: if (CUR == '=') {
2174: NEXT;
1.42 daniel 2175: SKIP_BLANKS;
1.29 daniel 2176: value = xmlParseAttValue(ctxt);
2177: } else {
1.31 daniel 2178: xmlParserError(ctxt, "Specification mandate value for attribute %s\n",
2179: name);
1.3 veillard 2180: }
2181:
2182: /*
1.43 daniel 2183: * Check whether it's a namespace definition
2184: */
2185: if ((ns == NULL) &&
2186: (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
2187: (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
2188: /* a default namespace definition */
2189: xmlNewNs(node, value, NULL);
2190: if (name != NULL)
2191: free(name);
2192: if (value != NULL)
2193: free(value);
2194: return;
2195: }
2196: if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
2197: (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
2198: /* a standard namespace definition */
2199: xmlNewNs(node, value, name);
2200: if (name != NULL)
2201: free(name);
2202: if (value != NULL)
2203: free(value);
2204: return;
2205: }
2206:
2207: /*
1.3 veillard 2208: * Add the attribute to the node.
2209: */
1.17 daniel 2210: if (name != NULL) {
1.3 veillard 2211: xmlNewProp(node, name, value);
1.17 daniel 2212: free(name);
2213: }
1.29 daniel 2214: if (value != NULL)
1.17 daniel 2215: free(value);
1.3 veillard 2216: }
2217:
2218: /*
1.29 daniel 2219: * xmlParseStartTag: parse a start of tag either for rule element or
2220: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 2221: *
2222: * [40] STag ::= '<' Name (S Attribute)* S? '>'
2223: *
1.29 daniel 2224: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
2225: *
2226: * With namespace:
2227: *
2228: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
2229: *
2230: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.2 veillard 2231: */
2232:
1.16 daniel 2233: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.34 daniel 2234: CHAR *namespace, *name;
2235: xmlNsPtr ns = NULL;
1.2 veillard 2236: xmlNodePtr ret = NULL;
2237:
1.40 daniel 2238: if (CUR != '<') return(NULL);
2239: NEXT;
1.3 veillard 2240:
1.34 daniel 2241: name = xmlNamespaceParseQName(ctxt, &namespace);
1.3 veillard 2242:
1.43 daniel 2243: /*
2244: * Note : the namespace resolution is deferred until the end of the
2245: * attributes parsing, since local namespace can be defined as
2246: * an attribute at this level.
2247: */
1.34 daniel 2248: ret = xmlNewNode(ns, name, NULL);
1.2 veillard 2249:
1.3 veillard 2250: /*
2251: * Now parse the attributes, it ends up with the ending
2252: *
2253: * (S Attribute)* S?
2254: */
1.42 daniel 2255: SKIP_BLANKS;
1.40 daniel 2256: while ((IS_CHAR(CUR)) &&
2257: (CUR != '>') &&
2258: ((CUR != '/') || (NXT(1) != '>'))) {
2259: const CHAR *q = CUR_PTR;
1.29 daniel 2260:
2261: xmlParseAttribute(ctxt, ret);
1.42 daniel 2262: SKIP_BLANKS;
1.29 daniel 2263:
1.40 daniel 2264: if (q == CUR_PTR) {
1.31 daniel 2265: xmlParserError(ctxt,
2266: "xmlParseStartTag: problem parsing attributes\n");
1.29 daniel 2267: break;
1.3 veillard 2268: }
2269: }
2270:
1.43 daniel 2271: /*
2272: * Search the namespace
2273: */
2274: ns = xmlSearchNs(ctxt->doc, ret, namespace);
2275: if (ns == NULL) /* ret still doesn't have a parent yet ! */
2276: ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
2277: xmlSetNs(ret, ns);
2278: if (namespace != NULL)
2279: free(namespace);
2280:
1.44 daniel 2281: /*
1.45 ! daniel 2282: * We are parsing a new node.
! 2283: */
! 2284: nodePush(ctxt, ret);
! 2285:
! 2286: /*
1.44 daniel 2287: * SAX: Start of Element !
2288: */
2289: if (ctxt->sax != NULL)
2290: ctxt->sax->startElement(ctxt, name);
2291:
1.3 veillard 2292: return(ret);
2293: }
2294:
2295: /*
1.27 daniel 2296: * xmlParseEndTag: parse an end of tag
2297: *
2298: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 2299: *
2300: * With namespace
2301: *
2302: * [9] ETag ::= '</' QName S? '>'
1.7 veillard 2303: */
2304:
1.34 daniel 2305: void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
2306: CHAR *namespace, *name;
2307: xmlNsPtr ns = NULL;
1.7 veillard 2308:
1.34 daniel 2309: *nsPtr = NULL;
1.7 veillard 2310: *tagPtr = NULL;
2311:
1.40 daniel 2312: if ((CUR != '<') || (NXT(1) != '/')) {
1.31 daniel 2313: xmlParserError(ctxt, "xmlParseEndTag: '</' not found\n");
1.27 daniel 2314: return;
2315: }
1.40 daniel 2316: SKIP(2);
1.7 veillard 2317:
1.34 daniel 2318: name = xmlNamespaceParseQName(ctxt, &namespace);
1.43 daniel 2319:
2320: /*
2321: * Search the namespace
2322: */
2323: ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
2324: if (namespace != NULL)
1.34 daniel 2325: free(namespace);
1.7 veillard 2326:
1.34 daniel 2327: *nsPtr = ns;
1.7 veillard 2328: *tagPtr = name;
2329:
2330: /*
2331: * We should definitely be at the ending "S? '>'" part
2332: */
1.42 daniel 2333: SKIP_BLANKS;
1.40 daniel 2334: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.31 daniel 2335: xmlParserError(ctxt, "End tag : expected '>'\n");
1.7 veillard 2336: } else
1.40 daniel 2337: NEXT;
1.7 veillard 2338:
2339: return;
2340: }
2341:
2342: /*
1.3 veillard 2343: * xmlParseCDSect: escaped pure raw content.
1.29 daniel 2344: *
2345: * [18] CDSect ::= CDStart CData CDEnd
2346: *
2347: * [19] CDStart ::= '<![CDATA['
2348: *
2349: * [20] Data ::= (Char* - (Char* ']]>' Char*))
2350: *
2351: * [21] CDEnd ::= ']]>'
1.3 veillard 2352: */
1.45 ! daniel 2353: void xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 2354: const CHAR *r, *s, *base;
1.3 veillard 2355:
1.40 daniel 2356: if ((CUR == '<') && (NXT(1) == '!') &&
2357: (NXT(2) == '[') && (NXT(3) == 'C') &&
2358: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2359: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2360: (NXT(8) == '[')) {
2361: SKIP(9);
1.29 daniel 2362: } else
1.45 ! daniel 2363: return;
1.40 daniel 2364: base = CUR_PTR;
2365: if (!IS_CHAR(CUR)) {
1.31 daniel 2366: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.45 ! daniel 2367: return;
1.3 veillard 2368: }
1.40 daniel 2369: r = NEXT;
2370: if (!IS_CHAR(CUR)) {
1.31 daniel 2371: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.45 ! daniel 2372: return;
1.3 veillard 2373: }
1.40 daniel 2374: s = NEXT;
2375: while (IS_CHAR(CUR) &&
2376: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
2377: r++;s++;NEXT;
1.3 veillard 2378: }
1.40 daniel 2379: if (!IS_CHAR(CUR)) {
1.31 daniel 2380: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.45 ! daniel 2381: return;
1.3 veillard 2382: }
1.16 daniel 2383:
1.45 ! daniel 2384: /*
! 2385: * Ok the segment [base CUR_PTR] is to be consumed as chars.
! 2386: */
! 2387: if (ctxt->sax != NULL) {
! 2388: if (areBlanks(ctxt, base, CUR_PTR - base))
! 2389: ctxt->sax->ignorableWhitespace(ctxt, base, 0, CUR_PTR - base);
! 2390: else
! 2391: ctxt->sax->characters(ctxt, base, 0, CUR_PTR - base);
! 2392: }
1.2 veillard 2393: }
2394:
2395: /*
2396: * xmlParseContent: a content is
2397: * (element | PCData | Reference | CDSect | PI | Comment)
2398: *
1.27 daniel 2399: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 2400: */
2401:
1.45 ! daniel 2402: void xmlParseContent(xmlParserCtxtPtr ctxt) {
1.2 veillard 2403: xmlNodePtr ret = NULL;
2404:
1.40 daniel 2405: while ((CUR != '<') || (NXT(1) != '/')) {
2406: const CHAR *test = CUR_PTR;
1.27 daniel 2407: ret = NULL;
2408:
2409: /*
2410: * First case : a Processing Instruction.
2411: */
1.40 daniel 2412: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 2413: xmlParsePI(ctxt);
2414: }
2415: /*
2416: * Second case : a CDSection
2417: */
1.40 daniel 2418: else if ((CUR == '<') && (NXT(1) == '!') &&
2419: (NXT(2) == '[') && (NXT(3) == 'C') &&
2420: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2421: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2422: (NXT(8) == '[')) {
1.45 ! daniel 2423: xmlParseCDSect(ctxt);
1.27 daniel 2424: }
2425: /*
2426: * Third case : a comment
2427: */
1.40 daniel 2428: else if ((CUR == '<') && (NXT(1) == '!') &&
2429: (NXT(2) == '-') && (NXT(3) == '-')) {
1.31 daniel 2430: ret = xmlParseComment(ctxt, 1);
1.27 daniel 2431: }
2432: /*
2433: * Fourth case : a sub-element.
2434: */
1.40 daniel 2435: else if (CUR == '<') {
1.45 ! daniel 2436: ret = xmlParseElement(ctxt);
! 2437: }
! 2438: /*
! 2439: * Fifth case : a reference.
! 2440: */
! 2441: else if (CUR == '&') {
! 2442: xmlParseReference(ctxt);
1.27 daniel 2443: }
2444: /*
2445: * Last case, text. Note that References are handled directly.
2446: */
2447: else {
1.45 ! daniel 2448: xmlParseCharData(ctxt, 0);
1.3 veillard 2449: }
1.14 veillard 2450:
2451: /*
1.45 ! daniel 2452: * Pop-up of finished entities.
1.14 veillard 2453: */
1.45 ! daniel 2454: while ((CUR == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt);
! 2455:
1.40 daniel 2456: if (test == CUR_PTR) {
1.31 daniel 2457: xmlParserError(ctxt, "detected an error in element content\n");
1.29 daniel 2458: break;
2459: }
1.3 veillard 2460: }
1.2 veillard 2461: }
2462:
2463: /*
2464: * xmlParseElement: parse an XML element
1.26 daniel 2465: *
2466: * [39] element ::= EmptyElemTag | STag content ETag
2467: *
2468: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 2469: */
1.26 daniel 2470:
1.2 veillard 2471:
1.45 ! daniel 2472: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 2473: xmlNodePtr ret;
1.40 daniel 2474: const CHAR *openTag = CUR_PTR;
1.32 daniel 2475: xmlParserNodeInfo node_info;
1.27 daniel 2476: CHAR *endTag;
1.34 daniel 2477: xmlNsPtr endNs;
1.2 veillard 2478:
1.32 daniel 2479: /* Capture start position */
1.40 daniel 2480: node_info.begin_pos = CUR_PTR - ctxt->input->base;
2481: node_info.begin_line = ctxt->input->line;
1.32 daniel 2482:
1.16 daniel 2483: ret = xmlParseStartTag(ctxt);
1.3 veillard 2484: if (ret == NULL) {
2485: return(NULL);
2486: }
1.2 veillard 2487:
2488: /*
2489: * Check for an Empty Element.
2490: */
1.40 daniel 2491: if ((CUR == '/') && (NXT(1) == '>')) {
2492: SKIP(2);
1.45 ! daniel 2493: if (ctxt->sax != NULL)
! 2494: ctxt->sax->endElement(ctxt, ret->name);
! 2495:
! 2496: /*
! 2497: * end of parsing of this node.
! 2498: */
! 2499: nodePop(ctxt);
! 2500:
1.2 veillard 2501: return(ret);
2502: }
1.40 daniel 2503: if (CUR == '>') NEXT;
1.2 veillard 2504: else {
1.31 daniel 2505: xmlParserError(ctxt, "Couldn't find end of Start Tag\n%.30s\n",openTag);
1.45 ! daniel 2506:
! 2507: /*
! 2508: * end of parsing of this node.
! 2509: */
! 2510: nodePop(ctxt);
! 2511:
1.16 daniel 2512: return(NULL);
1.2 veillard 2513: }
2514:
2515: /*
2516: * Parse the content of the element:
2517: */
1.45 ! daniel 2518: xmlParseContent(ctxt);
1.40 daniel 2519: if (!IS_CHAR(CUR)) {
1.31 daniel 2520: xmlParserError(ctxt, "Premature end of data in tag %.30s\n%.30s\n",
2521: openTag);
1.45 ! daniel 2522:
! 2523: /*
! 2524: * end of parsing of this node.
! 2525: */
! 2526: nodePop(ctxt);
! 2527:
1.16 daniel 2528: return(NULL);
1.2 veillard 2529: }
2530:
2531: /*
1.27 daniel 2532: * parse the end of tag: '</' should be here.
1.2 veillard 2533: */
1.34 daniel 2534: xmlParseEndTag(ctxt, &endNs, &endTag);
1.7 veillard 2535:
1.27 daniel 2536: /*
2537: * Check that the Name in the ETag is the same as in the STag.
2538: */
1.34 daniel 2539: if (endNs != ret->ns) {
1.31 daniel 2540: xmlParserError(ctxt,
1.43 daniel 2541: "Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
1.31 daniel 2542: openTag, endTag);
1.27 daniel 2543: }
1.32 daniel 2544: if (endTag == NULL ) {
2545: xmlParserError(ctxt, "The End tag has no name\n%.30s\n", openTag);
1.45 ! daniel 2546: } else if (xmlStrcmp(ret->name, endTag)) {
1.31 daniel 2547: xmlParserError(ctxt,
2548: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
2549: openTag, endTag);
1.27 daniel 2550: }
1.44 daniel 2551: /*
2552: * SAX: End of Tag
2553: */
2554: else if (ctxt->sax != NULL)
2555: ctxt->sax->endElement(ctxt, endTag);
1.7 veillard 2556:
1.44 daniel 2557: if (endTag != NULL)
2558: free(endTag);
1.2 veillard 2559:
1.32 daniel 2560: /* Capture end position and add node */
2561: if ( ret != NULL && ctxt->record_info ) {
1.40 daniel 2562: node_info.end_pos = CUR_PTR - ctxt->input->base;
2563: node_info.end_line = ctxt->input->line;
1.32 daniel 2564: node_info.node = ret;
2565: xmlParserAddNodeInfo(ctxt, &node_info);
2566: }
1.43 daniel 2567:
2568: /*
2569: * end of parsing of this node.
2570: */
2571: nodePop(ctxt);
2572:
1.2 veillard 2573: return(ret);
2574: }
2575:
2576: /*
1.29 daniel 2577: * xmlParseVersionNum: parse the XML version value.
2578: *
2579: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
2580: */
2581: CHAR *xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 2582: const CHAR *q = CUR_PTR;
1.29 daniel 2583: CHAR *ret;
2584:
1.40 daniel 2585: while (IS_CHAR(CUR) &&
2586: (((CUR >= 'a') && (CUR <= 'z')) ||
2587: ((CUR >= 'A') && (CUR <= 'Z')) ||
2588: ((CUR >= '0') && (CUR <= '9')) ||
2589: (CUR == '_') || (CUR == '.') ||
2590: (CUR == ':') || (CUR == '-'))) NEXT;
2591: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 2592: return(ret);
2593: }
2594:
2595: /*
2596: * xmlParseVersionInfo: parse the XML version.
2597: *
2598: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
2599: *
2600: * [25] Eq ::= S? '=' S?
2601: */
2602:
2603: CHAR *xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
2604: CHAR *version = NULL;
2605: const CHAR *q;
2606:
1.40 daniel 2607: if ((CUR == 'v') && (NXT(1) == 'e') &&
2608: (NXT(2) == 'r') && (NXT(3) == 's') &&
2609: (NXT(4) == 'i') && (NXT(5) == 'o') &&
2610: (NXT(6) == 'n')) {
2611: SKIP(7);
1.42 daniel 2612: SKIP_BLANKS;
1.40 daniel 2613: if (CUR != '=') {
1.31 daniel 2614: xmlParserError(ctxt, "xmlParseVersionInfo : expected '='\n");
2615: return(NULL);
2616: }
1.40 daniel 2617: NEXT;
1.42 daniel 2618: SKIP_BLANKS;
1.40 daniel 2619: if (CUR == '"') {
2620: NEXT;
2621: q = CUR_PTR;
1.29 daniel 2622: version = xmlParseVersionNum(ctxt);
1.40 daniel 2623: if (CUR != '"')
1.31 daniel 2624: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2625: else
1.40 daniel 2626: NEXT;
2627: } else if (CUR == '\''){
2628: NEXT;
2629: q = CUR_PTR;
1.29 daniel 2630: version = xmlParseVersionNum(ctxt);
1.40 daniel 2631: if (CUR != '\'')
1.31 daniel 2632: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2633: else
1.40 daniel 2634: NEXT;
1.31 daniel 2635: } else {
2636: xmlParserError(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
1.29 daniel 2637: }
2638: }
2639: return(version);
2640: }
2641:
2642: /*
2643: * xmlParseEncName: parse the XML encoding name
2644: *
2645: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
2646: */
2647: CHAR *xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 2648: const CHAR *q = CUR_PTR;
1.29 daniel 2649: CHAR *ret = NULL;
2650:
1.40 daniel 2651: if (((CUR >= 'a') && (CUR <= 'z')) ||
2652: ((CUR >= 'A') && (CUR <= 'Z'))) {
2653: NEXT;
2654: while (IS_CHAR(CUR) &&
2655: (((CUR >= 'a') && (CUR <= 'z')) ||
2656: ((CUR >= 'A') && (CUR <= 'Z')) ||
2657: ((CUR >= '0') && (CUR <= '9')) ||
2658: (CUR == '-'))) NEXT;
2659: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 2660: } else {
1.31 daniel 2661: xmlParserError(ctxt, "Invalid XML encoding name\n");
1.29 daniel 2662: }
2663: return(ret);
2664: }
2665:
2666: /*
2667: * xmlParseEncodingDecl: parse the XML encoding declaration
2668: *
2669: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
2670: */
2671:
2672: CHAR *xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
2673: CHAR *encoding = NULL;
2674: const CHAR *q;
2675:
1.42 daniel 2676: SKIP_BLANKS;
1.40 daniel 2677: if ((CUR == 'e') && (NXT(1) == 'n') &&
2678: (NXT(2) == 'c') && (NXT(3) == 'o') &&
2679: (NXT(4) == 'd') && (NXT(5) == 'i') &&
2680: (NXT(6) == 'n') && (NXT(7) == 'g')) {
2681: SKIP(8);
1.42 daniel 2682: SKIP_BLANKS;
1.40 daniel 2683: if (CUR != '=') {
1.31 daniel 2684: xmlParserError(ctxt, "xmlParseEncodingDecl : expected '='\n");
2685: return(NULL);
2686: }
1.40 daniel 2687: NEXT;
1.42 daniel 2688: SKIP_BLANKS;
1.40 daniel 2689: if (CUR == '"') {
2690: NEXT;
2691: q = CUR_PTR;
1.29 daniel 2692: encoding = xmlParseEncName(ctxt);
1.40 daniel 2693: if (CUR != '"')
1.31 daniel 2694: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2695: else
1.40 daniel 2696: NEXT;
2697: } else if (CUR == '\''){
2698: NEXT;
2699: q = CUR_PTR;
1.29 daniel 2700: encoding = xmlParseEncName(ctxt);
1.40 daniel 2701: if (CUR != '\'')
1.31 daniel 2702: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2703: else
1.40 daniel 2704: NEXT;
2705: } else if (CUR == '"'){
1.31 daniel 2706: xmlParserError(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
1.29 daniel 2707: }
2708: }
2709: return(encoding);
2710: }
2711:
2712: /*
2713: * xmlParseSDDecl: parse the XML standalone declaration
2714: *
2715: * [32] SDDecl ::= S 'standalone' Eq
2716: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
2717: */
2718:
2719: int xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
2720: int standalone = -1;
2721:
1.42 daniel 2722: SKIP_BLANKS;
1.40 daniel 2723: if ((CUR == 's') && (NXT(1) == 't') &&
2724: (NXT(2) == 'a') && (NXT(3) == 'n') &&
2725: (NXT(4) == 'd') && (NXT(5) == 'a') &&
2726: (NXT(6) == 'l') && (NXT(7) == 'o') &&
2727: (NXT(8) == 'n') && (NXT(9) == 'e')) {
2728: SKIP(10);
2729: if (CUR != '=') {
1.32 daniel 2730: xmlParserError(ctxt, "XML standalone declaration : expected '='\n");
2731: return(standalone);
2732: }
1.40 daniel 2733: NEXT;
1.42 daniel 2734: SKIP_BLANKS;
1.40 daniel 2735: if (CUR == '\''){
2736: NEXT;
2737: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 2738: standalone = 0;
1.40 daniel 2739: SKIP(2);
2740: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
2741: (NXT(2) == 's')) {
1.29 daniel 2742: standalone = 1;
1.40 daniel 2743: SKIP(3);
1.29 daniel 2744: } else {
1.31 daniel 2745: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2746: }
1.40 daniel 2747: if (CUR != '\'')
1.31 daniel 2748: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2749: else
1.40 daniel 2750: NEXT;
2751: } else if (CUR == '"'){
2752: NEXT;
2753: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 2754: standalone = 0;
1.40 daniel 2755: SKIP(2);
2756: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
2757: (NXT(2) == 's')) {
1.29 daniel 2758: standalone = 1;
1.40 daniel 2759: SKIP(3);
1.29 daniel 2760: } else {
1.31 daniel 2761: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 2762: }
1.40 daniel 2763: if (CUR != '"')
1.31 daniel 2764: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 2765: else
1.40 daniel 2766: NEXT;
1.37 daniel 2767: } else {
2768: xmlParserError(ctxt, "Standalone value not found\n");
2769: }
1.29 daniel 2770: }
2771: return(standalone);
2772: }
2773:
2774: /*
1.1 veillard 2775: * xmlParseXMLDecl: parse an XML declaration header
1.29 daniel 2776: *
2777: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 2778: */
2779:
1.16 daniel 2780: void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 2781: CHAR *version;
2782:
2783: /*
1.19 daniel 2784: * We know that '<?xml' is here.
1.1 veillard 2785: */
1.40 daniel 2786: SKIP(5);
1.1 veillard 2787:
1.42 daniel 2788: SKIP_BLANKS;
1.1 veillard 2789:
2790: /*
1.29 daniel 2791: * We should have the VersionInfo here.
1.1 veillard 2792: */
1.29 daniel 2793: version = xmlParseVersionInfo(ctxt);
2794: if (version == NULL)
1.45 ! daniel 2795: version = xmlCharStrdup(XML_DEFAULT_VERSION);
! 2796: ctxt->doc = xmlNewDoc(version);
! 2797: free(version);
1.29 daniel 2798:
2799: /*
2800: * We may have the encoding declaration
2801: */
1.32 daniel 2802: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 2803:
2804: /*
1.29 daniel 2805: * We may have the standalone status.
1.1 veillard 2806: */
1.32 daniel 2807: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 2808:
1.42 daniel 2809: SKIP_BLANKS;
1.40 daniel 2810: if ((CUR == '?') && (NXT(1) == '>')) {
2811: SKIP(2);
2812: } else if (CUR == '>') {
1.31 daniel 2813: /* Deprecated old WD ... */
2814: xmlParserError(ctxt, "XML declaration must end-up with '?>'\n");
1.40 daniel 2815: NEXT;
1.29 daniel 2816: } else {
1.31 daniel 2817: xmlParserError(ctxt, "parsing XML declaration: '?>' expected\n");
1.40 daniel 2818: MOVETO_ENDTAG(CUR_PTR);
2819: NEXT;
1.29 daniel 2820: }
1.1 veillard 2821: }
2822:
2823: /*
1.22 daniel 2824: * xmlParseMisc: parse an XML Misc* optionnal field.
1.21 daniel 2825: * Misc*
2826: *
1.22 daniel 2827: * [27] Misc ::= Comment | PI | S
1.1 veillard 2828: */
2829:
1.16 daniel 2830: void xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 2831: while (((CUR == '<') && (NXT(1) == '?')) ||
2832: ((CUR == '<') && (NXT(1) == '!') &&
2833: (NXT(2) == '-') && (NXT(3) == '-')) ||
2834: IS_BLANK(CUR)) {
2835: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 2836: xmlParsePI(ctxt);
1.40 daniel 2837: } else if (IS_BLANK(CUR)) {
2838: NEXT;
1.1 veillard 2839: } else
1.31 daniel 2840: xmlParseComment(ctxt, 0);
1.1 veillard 2841: }
2842: }
2843:
2844: /*
1.16 daniel 2845: * xmlParseDocument : parse an XML document and build a tree.
1.21 daniel 2846: *
1.22 daniel 2847: * [1] document ::= prolog element Misc*
1.29 daniel 2848: *
2849: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.1 veillard 2850: */
2851:
1.16 daniel 2852: int xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 ! daniel 2853: xmlDefaultSAXHandlerInit();
! 2854:
1.14 veillard 2855: /*
1.44 daniel 2856: * SAX: beginning of the document processing.
2857: */
2858: if (ctxt->sax)
2859: ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
2860: if (ctxt->sax)
2861: ctxt->sax->startDocument(ctxt);
2862:
2863: /*
1.14 veillard 2864: * We should check for encoding here and plug-in some
2865: * conversion code TODO !!!!
2866: */
1.1 veillard 2867:
2868: /*
2869: * Wipe out everything which is before the first '<'
2870: */
1.42 daniel 2871: SKIP_BLANKS;
1.1 veillard 2872:
2873: /*
2874: * Check for the XMLDecl in the Prolog.
2875: */
1.40 daniel 2876: if ((CUR == '<') && (NXT(1) == '?') &&
2877: (NXT(2) == 'x') && (NXT(3) == 'm') &&
2878: (NXT(4) == 'l')) {
1.19 daniel 2879: xmlParseXMLDecl(ctxt);
2880: /* SKIP_EOL(cur); */
1.42 daniel 2881: SKIP_BLANKS;
1.40 daniel 2882: } else if ((CUR == '<') && (NXT(1) == '?') &&
2883: (NXT(2) == 'X') && (NXT(3) == 'M') &&
2884: (NXT(4) == 'L')) {
1.19 daniel 2885: /*
2886: * The first drafts were using <?XML and the final W3C REC
2887: * now use <?xml ...
2888: */
1.16 daniel 2889: xmlParseXMLDecl(ctxt);
1.1 veillard 2890: /* SKIP_EOL(cur); */
1.42 daniel 2891: SKIP_BLANKS;
1.1 veillard 2892: } else {
1.45 ! daniel 2893: CHAR *version;
! 2894:
! 2895: version = xmlCharStrdup(XML_DEFAULT_VERSION);
! 2896: ctxt->doc = xmlNewDoc(version);
! 2897: free(version);
1.1 veillard 2898: }
2899:
2900: /*
2901: * The Misc part of the Prolog
2902: */
1.16 daniel 2903: xmlParseMisc(ctxt);
1.1 veillard 2904:
2905: /*
1.29 daniel 2906: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 2907: * (doctypedecl Misc*)?
2908: */
1.40 daniel 2909: if ((CUR == '<') && (NXT(1) == '!') &&
2910: (NXT(2) == 'D') && (NXT(3) == 'O') &&
2911: (NXT(4) == 'C') && (NXT(5) == 'T') &&
2912: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
2913: (NXT(8) == 'E')) {
1.22 daniel 2914: xmlParseDocTypeDecl(ctxt);
2915: xmlParseMisc(ctxt);
1.21 daniel 2916: }
2917:
2918: /*
2919: * Time to start parsing the tree itself
1.1 veillard 2920: */
1.45 ! daniel 2921: ctxt->doc->root = xmlParseElement(ctxt);
1.33 daniel 2922:
2923: /*
2924: * The Misc part at the end
2925: */
2926: xmlParseMisc(ctxt);
1.16 daniel 2927:
1.44 daniel 2928: /*
2929: * SAX: end of the document processing.
2930: */
2931: if (ctxt->sax)
2932: ctxt->sax->endDocument(ctxt);
1.16 daniel 2933: return(0);
2934: }
2935:
2936: /*
2937: * xmlParseDoc : parse an XML in-memory document and build a tree.
2938: */
2939:
2940: xmlDocPtr xmlParseDoc(CHAR *cur) {
2941: xmlDocPtr ret;
2942: xmlParserCtxtPtr ctxt;
1.40 daniel 2943: xmlParserInputPtr input;
1.16 daniel 2944:
2945: if (cur == NULL) return(NULL);
1.1 veillard 2946:
1.16 daniel 2947: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2948: if (ctxt == NULL) {
2949: perror("malloc");
2950: return(NULL);
2951: }
1.40 daniel 2952: xmlInitParserCtxt(ctxt);
2953: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
2954: if (input == NULL) {
2955: perror("malloc");
2956: free(ctxt);
2957: return(NULL);
2958: }
2959:
2960: input->filename = NULL;
2961: input->line = 1;
2962: input->col = 1;
2963: input->base = cur;
2964: input->cur = cur;
2965:
2966: inputPush(ctxt, input);
1.16 daniel 2967:
2968:
2969: xmlParseDocument(ctxt);
2970: ret = ctxt->doc;
2971: free(ctxt);
2972:
1.1 veillard 2973: return(ret);
2974: }
2975:
1.9 httpng 2976: /*
2977: * xmlParseFile : parse an XML file and build a tree.
2978: */
2979:
2980: xmlDocPtr xmlParseFile(const char *filename) {
2981: xmlDocPtr ret;
1.20 daniel 2982: #ifdef HAVE_ZLIB_H
2983: gzFile input;
2984: #else
1.9 httpng 2985: int input;
1.20 daniel 2986: #endif
1.9 httpng 2987: int res;
2988: struct stat buf;
2989: char *buffer;
1.16 daniel 2990: xmlParserCtxtPtr ctxt;
1.40 daniel 2991: xmlParserInputPtr inputStream;
1.9 httpng 2992:
1.11 veillard 2993: res = stat(filename, &buf);
1.9 httpng 2994: if (res < 0) return(NULL);
2995:
1.20 daniel 2996: #ifdef HAVE_ZLIB_H
2997: retry_bigger:
2998: buffer = malloc((buf.st_size * 20) + 100);
2999: #else
1.9 httpng 3000: buffer = malloc(buf.st_size + 100);
1.20 daniel 3001: #endif
1.9 httpng 3002: if (buffer == NULL) {
3003: perror("malloc");
3004: return(NULL);
3005: }
3006:
3007: memset(buffer, 0, sizeof(buffer));
1.20 daniel 3008: #ifdef HAVE_ZLIB_H
3009: input = gzopen (filename, "r");
3010: if (input == NULL) {
3011: fprintf (stderr, "Cannot read file %s :\n", filename);
3012: perror ("gzopen failed");
3013: return(NULL);
3014: }
3015: #else
1.9 httpng 3016: input = open (filename, O_RDONLY);
3017: if (input < 0) {
3018: fprintf (stderr, "Cannot read file %s :\n", filename);
3019: perror ("open failed");
3020: return(NULL);
3021: }
1.20 daniel 3022: #endif
3023: #ifdef HAVE_ZLIB_H
3024: res = gzread(input, buffer, 20 * buf.st_size);
3025: #else
1.9 httpng 3026: res = read(input, buffer, buf.st_size);
1.20 daniel 3027: #endif
1.9 httpng 3028: if (res < 0) {
3029: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 3030: #ifdef HAVE_ZLIB_H
3031: perror ("gzread failed");
3032: #else
1.9 httpng 3033: perror ("read failed");
1.20 daniel 3034: #endif
1.9 httpng 3035: return(NULL);
3036: }
1.20 daniel 3037: #ifdef HAVE_ZLIB_H
3038: gzclose(input);
3039: if (res >= 20 * buf.st_size) {
3040: free(buffer);
3041: buf.st_size *= 2;
3042: goto retry_bigger;
3043: }
3044: buf.st_size = res;
3045: #else
1.9 httpng 3046: close(input);
1.20 daniel 3047: #endif
3048:
1.40 daniel 3049: buffer[buf.st_size] = '\0';
1.9 httpng 3050:
1.16 daniel 3051: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3052: if (ctxt == NULL) {
3053: perror("malloc");
3054: return(NULL);
3055: }
1.40 daniel 3056: xmlInitParserCtxt(ctxt);
3057: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3058: if (inputStream == NULL) {
3059: perror("malloc");
3060: free(ctxt);
3061: return(NULL);
3062: }
3063:
3064: inputStream->filename = strdup(filename);
3065: inputStream->line = 1;
3066: inputStream->col = 1;
1.45 ! daniel 3067:
! 3068: /*
! 3069: * TODO : plug some encoding conversion routines here. !!!
! 3070: */
1.40 daniel 3071: inputStream->base = buffer;
3072: inputStream->cur = buffer;
1.16 daniel 3073:
1.40 daniel 3074: inputPush(ctxt, inputStream);
1.16 daniel 3075:
3076: xmlParseDocument(ctxt);
1.40 daniel 3077:
1.16 daniel 3078: ret = ctxt->doc;
1.9 httpng 3079: free(buffer);
1.20 daniel 3080: free(ctxt);
3081:
3082: return(ret);
3083: }
3084:
1.32 daniel 3085:
1.20 daniel 3086: /*
1.32 daniel 3087: * xmlParseMemory : parse an XML memory block and build a tree.
1.20 daniel 3088: */
3089: xmlDocPtr xmlParseMemory(char *buffer, int size) {
3090: xmlDocPtr ret;
3091: xmlParserCtxtPtr ctxt;
1.40 daniel 3092: xmlParserInputPtr input;
3093:
3094: buffer[size - 1] = '\0';
3095:
1.20 daniel 3096: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3097: if (ctxt == NULL) {
3098: perror("malloc");
3099: return(NULL);
3100: }
1.40 daniel 3101: xmlInitParserCtxt(ctxt);
3102: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3103: if (input == NULL) {
3104: perror("malloc");
3105: free(ctxt);
3106: return(NULL);
3107: }
1.20 daniel 3108:
1.40 daniel 3109: input->filename = NULL;
3110: input->line = 1;
3111: input->col = 1;
1.45 ! daniel 3112:
! 3113: /*
! 3114: * TODO : plug some encoding conversion routines here. !!!
! 3115: */
1.40 daniel 3116: input->base = buffer;
3117: input->cur = buffer;
1.20 daniel 3118:
1.40 daniel 3119: inputPush(ctxt, input);
1.20 daniel 3120:
3121: xmlParseDocument(ctxt);
1.40 daniel 3122:
1.20 daniel 3123: ret = ctxt->doc;
1.16 daniel 3124: free(ctxt);
3125:
1.9 httpng 3126: return(ret);
1.17 daniel 3127: }
3128:
3129:
3130: /* Initialize parser context */
3131: void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
3132: {
1.40 daniel 3133: /* Allocate the Input stack */
3134: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
3135: ctxt->inputNr = 0;
3136: ctxt->inputMax = 5;
3137: ctxt->input = NULL;
3138:
1.43 daniel 3139: /* Allocate the Node stack */
3140: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
3141: ctxt->nodeNr = 0;
3142: ctxt->nodeMax = 10;
3143: ctxt->node = NULL;
3144:
1.45 ! daniel 3145: ctxt->sax = &xmlDefaultSAXHandler;
1.32 daniel 3146: ctxt->doc = NULL;
3147: ctxt->record_info = 0;
3148: xmlInitNodeInfoSeq(&ctxt->node_seq);
1.17 daniel 3149: }
3150:
3151:
1.19 daniel 3152: /*
3153: * Clear (release owned resources) and reinitialize context
3154: */
1.32 daniel 3155: void xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 3156: {
1.32 daniel 3157: xmlClearNodeInfoSeq(&ctxt->node_seq);
3158: xmlInitParserCtxt(ctxt);
1.17 daniel 3159: }
3160:
3161:
1.19 daniel 3162: /*
3163: * Setup the parser context to parse a new buffer; Clears any prior
3164: * contents from the parser context. The buffer parameter must not be
3165: * NULL, but the filename parameter can be
3166: */
1.17 daniel 3167: void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
3168: const char* filename)
3169: {
1.40 daniel 3170: xmlParserInputPtr input;
3171:
3172: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3173: if (input == NULL) {
3174: perror("malloc");
3175: free(ctxt);
3176: exit(1);
3177: }
3178:
1.17 daniel 3179: xmlClearParserCtxt(ctxt);
1.40 daniel 3180: if (input->filename != NULL)
3181: input->filename = strdup(filename);
3182: else
3183: input->filename = NULL;
3184: input->line = 1;
3185: input->col = 1;
3186: input->base = buffer;
3187: input->cur = buffer;
3188:
3189: inputPush(ctxt, input);
1.17 daniel 3190: }
3191:
1.32 daniel 3192:
3193: /*
3194: * xmlParserFindNodeInfo : Find the parser node info struct for a given node
3195: */
3196: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
3197: const xmlNode* node)
3198: {
3199: unsigned long pos;
3200:
3201: /* Find position where node should be at */
3202: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3203: if ( ctx->node_seq.buffer[pos].node == node )
3204: return &ctx->node_seq.buffer[pos];
3205: else
3206: return NULL;
3207: }
3208:
3209:
3210: /*
3211: * xmlInitNodeInfoSeq -- Initialize (set to initial state) node info sequence
3212: */
3213: void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3214: {
3215: seq->length = 0;
3216: seq->maximum = 0;
3217: seq->buffer = NULL;
3218: }
3219:
3220: /*
3221: * xmlClearNodeInfoSeq -- Clear (release memory and reinitialize) node
3222: * info sequence
3223: */
3224: void xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3225: {
3226: if ( seq->buffer != NULL )
3227: free(seq->buffer);
3228: xmlInitNodeInfoSeq(seq);
3229: }
3230:
3231:
3232: /*
3233: * xmlParserFindNodeInfoIndex : Find the index that the info record for
3234: * the given node is or should be at in a sorted sequence
3235: */
3236: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
3237: const xmlNode* node)
3238: {
3239: unsigned long upper, lower, middle;
3240: int found = 0;
3241:
3242: /* Do a binary search for the key */
3243: lower = 1;
3244: upper = seq->length;
3245: middle = 0;
3246: while ( lower <= upper && !found) {
3247: middle = lower + (upper - lower) / 2;
3248: if ( node == seq->buffer[middle - 1].node )
3249: found = 1;
3250: else if ( node < seq->buffer[middle - 1].node )
3251: upper = middle - 1;
3252: else
3253: lower = middle + 1;
3254: }
3255:
3256: /* Return position */
3257: if ( middle == 0 || seq->buffer[middle - 1].node < node )
3258: return middle;
3259: else
3260: return middle - 1;
3261: }
3262:
3263:
3264: /*
3265: * xmlParserAddNodeInfo : Insert node info record into sorted sequence
3266: */
3267: void xmlParserAddNodeInfo(xmlParserCtxtPtr ctx,
3268: const xmlParserNodeInfo* info)
3269: {
3270: unsigned long pos;
3271: static unsigned int block_size = 5;
3272:
3273: /* Find pos and check to see if node is already in the sequence */
3274: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, info->node);
3275: if ( pos < ctx->node_seq.length
3276: && ctx->node_seq.buffer[pos].node == info->node ) {
3277: ctx->node_seq.buffer[pos] = *info;
3278: }
3279:
3280: /* Otherwise, we need to add new node to buffer */
3281: else {
3282: /* Expand buffer by 5 if needed */
3283: if ( ctx->node_seq.length + 1 > ctx->node_seq.maximum ) {
3284: xmlParserNodeInfo* tmp_buffer;
3285: unsigned int byte_size = (sizeof(*ctx->node_seq.buffer)
3286: *(ctx->node_seq.maximum + block_size));
3287:
3288: if ( ctx->node_seq.buffer == NULL )
3289: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
3290: else
3291: tmp_buffer = (xmlParserNodeInfo*)realloc(ctx->node_seq.buffer, byte_size);
3292:
3293: if ( tmp_buffer == NULL ) {
3294: xmlParserError(ctx, "Out of memory");
3295: return;
3296: }
3297: ctx->node_seq.buffer = tmp_buffer;
3298: ctx->node_seq.maximum += block_size;
3299: }
3300:
3301: /* If position is not at end, move elements out of the way */
3302: if ( pos != ctx->node_seq.length ) {
3303: unsigned long i;
3304:
3305: for ( i = ctx->node_seq.length; i > pos; i-- )
3306: ctx->node_seq.buffer[i] = ctx->node_seq.buffer[i - 1];
3307: }
3308:
3309: /* Copy element and increase length */
3310: ctx->node_seq.buffer[pos] = *info;
3311: ctx->node_seq.length++;
3312: }
3313: }
Webmaster