Annotation of XML/parser.c, revision 1.53
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.52 daniel 6: * $Id: parser.c,v 1.51 1998/10/26 06:40:46 daniel Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
1.45 daniel 34: /************************************************************************
35: * *
36: * Parser stacks related functions and macros *
37: * *
38: ************************************************************************/
1.1 veillard 39: /*
1.40 daniel 40: * Generic function for accessing stacks in the Parser Context
1.1 veillard 41: */
42:
1.31 daniel 43: #define PUSH_AND_POP(type, name) \
1.40 daniel 44: int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 45: if (ctxt->name##Nr >= ctxt->name##Max) { \
46: ctxt->name##Max *= 2; \
1.40 daniel 47: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
48: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
49: if (ctxt->name##Tab == NULL) { \
1.31 daniel 50: fprintf(stderr, "realloc failed !\n"); \
51: exit(1); \
52: } \
53: } \
1.40 daniel 54: ctxt->name##Tab[ctxt->name##Nr] = value; \
55: ctxt->name = value; \
56: return(ctxt->name##Nr++); \
1.31 daniel 57: } \
1.40 daniel 58: type name##Pop(xmlParserCtxtPtr ctxt) { \
59: if (ctxt->name##Nr <= 0) return(0); \
60: ctxt->name##Nr--; \
1.50 daniel 61: if (ctxt->name##Nr > 0) \
62: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
63: else \
64: ctxt->name = NULL; \
1.40 daniel 65: return(ctxt->name); \
1.31 daniel 66: } \
67:
1.40 daniel 68: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 69: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 70:
1.45 daniel 71: /*************
1.40 daniel 72: #define CUR (*(ctxt->input->cur) ? *(ctxt->input->cur) : xmlPopInput(ctxt))
1.45 daniel 73: #define NEXT (((*(ctxt->input->cur) == '\n') ? \
74: (ctxt->input->line++, ctxt->input->col = 1) : \
75: (ctxt->input->col++)), ctxt->input->cur++)
76: *************/
77:
78: #define CUR (*ctxt->input->cur)
79: #define NEXT ((*ctxt->input->cur) ? \
80: (((*(ctxt->input->cur) == '\n') ? \
81: (ctxt->input->line++, ctxt->input->col = 1) : \
82: (ctxt->input->col++)), ctxt->input->cur++) : \
83: (xmlPopInput(ctxt), ctxt->input->cur))
1.42 daniel 84:
1.40 daniel 85: #define CUR_PTR ctxt->input->cur
1.42 daniel 86:
1.40 daniel 87: #define NXT(val) ctxt->input->cur[(val)]
1.42 daniel 88:
1.40 daniel 89: #define SKIP(val) ctxt->input->cur += (val)
1.42 daniel 90: #define SKIP_BLANKS \
91: while (IS_BLANK(*(ctxt->input->cur))) NEXT
92:
1.40 daniel 93:
1.50 daniel 94: /**
95: * xmlPopInput:
96: * @ctxt: an XML parser context
97: *
1.40 daniel 98: * xmlPopInput: the current input pointed by ctxt->input came to an end
99: * pop it and return the next char.
1.45 daniel 100: *
101: * TODO A deallocation of the popped Input structure is needed
1.50 daniel 102: * return values: the current CHAR in the parser context
1.40 daniel 103: */
104: CHAR xmlPopInput(xmlParserCtxtPtr ctxt) {
105: if (ctxt->inputNr == 1) return(0); /* End of main Input */
106: inputPop(ctxt);
107: return(CUR);
108: }
109:
1.50 daniel 110: /**
111: * xmlPushInput:
112: * @ctxt: an XML parser context
113: * @input: an XML parser input fragment (entity, XML fragment ...).
114: *
1.40 daniel 115: * xmlPushInput: switch to a new input stream which is stacked on top
116: * of the previous one(s).
117: */
118: void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
119: if (input == NULL) return;
120: inputPush(ctxt, input);
121: }
122:
1.50 daniel 123: /**
124: * xmlNewEntityInputStream:
125: * @ctxt: an XML parser context
126: * @entity: an Entity pointer
127: *
1.45 daniel 128: * Create a new input stream based on a memory buffer.
1.50 daniel 129: * return vakues: the new input stream
1.45 daniel 130: */
1.50 daniel 131: xmlParserInputPtr
132: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 133: xmlParserInputPtr input;
134:
135: if (entity == NULL) {
136: xmlParserError(ctxt,
137: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 138: return(NULL);
1.45 daniel 139: }
140: if (entity->content == NULL) {
141: xmlParserError(ctxt,
142: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 143: return(NULL);
1.45 daniel 144: }
145: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
146: if (input == NULL) {
147: xmlParserError(ctxt, "malloc: couldn't allocate a new input stream\n");
1.50 daniel 148: return(NULL);
1.45 daniel 149: }
150: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
151: input->base = entity->content;
152: input->cur = entity->content;
153: input->line = 1;
154: input->col = 1;
1.50 daniel 155: return(input);
1.45 daniel 156: }
157:
158: /*
1.40 daniel 159: * A few macros needed to help building the parser.
160: */
161:
1.1 veillard 162: #ifdef UNICODE
1.30 daniel 163: /************************************************************************
164: * *
165: * UNICODE version of the macros. *
166: * *
167: ************************************************************************/
1.1 veillard 168: /*
1.22 daniel 169: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
170: * | [#x10000-#x10FFFF]
171: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 172: */
173: #define IS_CHAR(c) \
174: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
175: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
176:
1.22 daniel 177: /*
178: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
179: */
1.42 daniel 180: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
181: ((c) == 0x0D))
1.1 veillard 182:
1.22 daniel 183: /*
1.30 daniel 184: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 185: *
1.30 daniel 186: * VI is your friend !
187: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
188: * and
189: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 190: */
1.1 veillard 191: #define IS_BASECHAR(c) \
1.30 daniel 192: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
193: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
194: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
195: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
196: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
197: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
198: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
199: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
200: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
201: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
202: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
203: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
204: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
205: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
206: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
207: ((c) == 0x0386) || \
208: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
209: ((c) == 0x038C) || \
210: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
211: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
212: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
213: ((c) == 0x03DA) || \
214: ((c) == 0x03DC) || \
215: ((c) == 0x03DE) || \
216: ((c) == 0x03E0) || \
217: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
218: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
219: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
220: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
221: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
222: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
223: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
224: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
225: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
226: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
227: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
228: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
229: ((c) == 0x0559) || \
230: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
231: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
232: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
233: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
234: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
235: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
236: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
237: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
238: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
239: ((c) == 0x06D5) || \
240: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
241: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
242: ((c) == 0x093D) || \
243: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
244: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
245: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
246: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
247: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
248: ((c) == 0x09B2) || \
249: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
250: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
251: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
252: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
253: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
254: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
255: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
256: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
257: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
258: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
259: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
260: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
261: ((c) == 0x0A5E) || \
262: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
263: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
264: ((c) == 0x0A8D) || \
265: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
266: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
267: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
268: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
269: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
270: ((c) == 0x0ABD) || \
271: ((c) == 0x0AE0) || \
272: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
273: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
274: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
275: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
276: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
277: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
278: ((c) == 0x0B3D) || \
279: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
280: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
281: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
282: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
283: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
284: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
285: ((c) == 0x0B9C) || \
286: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
287: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
288: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
289: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
290: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
291: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
292: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
293: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
294: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
295: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
296: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
297: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
298: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
299: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
300: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
301: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
302: ((c) == 0x0CDE) || \
303: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
304: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
305: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
306: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
307: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
308: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
309: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
310: ((c) == 0x0E30) || \
311: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
312: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
313: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
314: ((c) == 0x0E84) || \
315: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
316: ((c) == 0x0E8A) || \
317: ((c) == 0x0E8D) || \
318: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
319: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
320: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
321: ((c) == 0x0EA5) || \
322: ((c) == 0x0EA7) || \
323: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
324: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
325: ((c) == 0x0EB0) || \
326: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
327: ((c) == 0x0EBD) || \
328: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
329: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
330: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
331: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
332: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
333: ((c) == 0x1100) || \
334: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
335: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
336: ((c) == 0x1109) || \
337: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
338: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
339: ((c) == 0x113C) || \
340: ((c) == 0x113E) || \
341: ((c) == 0x1140) || \
342: ((c) == 0x114C) || \
343: ((c) == 0x114E) || \
344: ((c) == 0x1150) || \
345: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
346: ((c) == 0x1159) || \
347: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
348: ((c) == 0x1163) || \
349: ((c) == 0x1165) || \
350: ((c) == 0x1167) || \
351: ((c) == 0x1169) || \
352: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
353: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
354: ((c) == 0x1175) || \
355: ((c) == 0x119E) || \
356: ((c) == 0x11A8) || \
357: ((c) == 0x11AB) || \
358: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
359: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
360: ((c) == 0x11BA) || \
361: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
362: ((c) == 0x11EB) || \
363: ((c) == 0x11F0) || \
364: ((c) == 0x11F9) || \
365: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
366: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
367: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
368: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
369: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
370: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
371: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
372: ((c) == 0x1F59) || \
373: ((c) == 0x1F5B) || \
374: ((c) == 0x1F5D) || \
375: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
376: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
377: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
378: ((c) == 0x1FBE) || \
379: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
380: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
381: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
382: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
383: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
384: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
385: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
386: ((c) == 0x2126) || \
387: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
388: ((c) == 0x212E) || \
389: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
390: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
391: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
392: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
393: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 394:
1.22 daniel 395: /*
396: * [88] Digit ::= ... long list see REC ...
397: */
1.30 daniel 398: #define IS_DIGIT(c) \
399: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
400: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
401: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
402: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
403: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
404: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
405: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
406: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
407: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
408: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
409: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
410: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
411: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
412: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
413: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 414:
1.22 daniel 415: /*
416: * [87] CombiningChar ::= ... long list see REC ...
417: */
1.30 daniel 418: #define IS_COMBINING(c) \
419: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
420: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
421: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
422: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
423: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
424: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
425: ((c) == 0x05BF) || \
426: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
427: ((c) == 0x05C4) || \
428: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
429: ((c) == 0x0670) || \
430: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
431: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
432: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
433: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
434: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
435: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
436: ((c) == 0x093C) || \
437: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
438: ((c) == 0x094D) || \
439: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
440: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
441: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
442: ((c) == 0x09BC) || \
443: ((c) == 0x09BE) || \
444: ((c) == 0x09BF) || \
445: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
446: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
447: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
448: ((c) == 0x09D7) || \
449: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
450: ((c) == 0x0A02) || \
451: ((c) == 0x0A3C) || \
452: ((c) == 0x0A3E) || \
453: ((c) == 0x0A3F) || \
454: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
455: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
456: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
457: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
458: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
459: ((c) == 0x0ABC) || \
460: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
461: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
462: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
463: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
464: ((c) == 0x0B3C) || \
465: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
466: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
467: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
468: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
469: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
470: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
471: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
472: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
473: ((c) == 0x0BD7) || \
474: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
475: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
476: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
477: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
478: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
479: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
480: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
481: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
482: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
483: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
484: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
485: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
486: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
487: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
488: ((c) == 0x0D57) || \
489: ((c) == 0x0E31) || \
490: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
491: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
492: ((c) == 0x0EB1) || \
493: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
494: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
495: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
496: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
497: ((c) == 0x0F35) || \
498: ((c) == 0x0F37) || \
499: ((c) == 0x0F39) || \
500: ((c) == 0x0F3E) || \
501: ((c) == 0x0F3F) || \
502: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
503: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
504: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
505: ((c) == 0x0F97) || \
506: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
507: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
508: ((c) == 0x0FB9) || \
509: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
510: ((c) == 0x20E1) || \
511: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
512: ((c) == 0x3099) || \
513: ((c) == 0x309A))
1.3 veillard 514:
1.22 daniel 515: /*
516: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
517: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
518: * [#x309D-#x309E] | [#x30FC-#x30FE]
519: */
1.3 veillard 520: #define IS_EXTENDER(c) \
521: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
522: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
523: ((c) == 0xec6) || ((c) == 0x3005) \
524: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
525: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 526: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 527:
1.22 daniel 528: /*
529: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
530: */
1.1 veillard 531: #define IS_IDEOGRAPHIC(c) \
532: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
533: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
534: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
535: ((c) == 0x3007))
536:
1.22 daniel 537: /*
538: * [84] Letter ::= BaseChar | Ideographic
539: */
1.1 veillard 540: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
541:
542: #else
1.30 daniel 543: /************************************************************************
544: * *
545: * 8bits / ASCII version of the macros. *
546: * *
547: ************************************************************************/
1.1 veillard 548: /*
1.22 daniel 549: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
550: * | [#x10000-#x10FFFF]
551: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 552: */
553: #define IS_CHAR(c) \
1.21 daniel 554: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
555: ((c) == 0xa))
1.1 veillard 556:
1.22 daniel 557: /*
558: * [85] BaseChar ::= ... long list see REC ...
559: */
1.1 veillard 560: #define IS_BASECHAR(c) \
561: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
562: (((c) >= 0x61) && ((c) <= 0x7a)) || \
563: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
564: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
565: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
566: (((c) >= 0xf8) && ((c) <= 0xff)) || \
567: ((c) == 0xba))
568:
1.22 daniel 569: /*
570: * [88] Digit ::= ... long list see REC ...
571: */
1.1 veillard 572: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
573:
1.22 daniel 574: /*
575: * [84] Letter ::= BaseChar | Ideographic
576: */
1.1 veillard 577: #define IS_LETTER(c) IS_BASECHAR(c)
578:
1.22 daniel 579:
580: /*
581: * [87] CombiningChar ::= ... long list see REC ...
582: */
1.1 veillard 583: #define IS_COMBINING(c) 0
584:
1.22 daniel 585: /*
586: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
587: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
588: * [#x309D-#x309E] | [#x30FC-#x30FE]
589: */
1.3 veillard 590: #define IS_EXTENDER(c) ((c) == 0xb7)
591:
1.21 daniel 592: #endif /* !UNICODE */
1.1 veillard 593:
1.22 daniel 594: /*
595: * Blank chars.
596: *
597: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
598: */
599: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
600: ((c) == 0x0D))
601:
602: /*
603: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
604: */
1.21 daniel 605: #define IS_PUBIDCHAR(c) \
606: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
607: (((c) >= 'a') && ((c) <= 'z')) || \
608: (((c) >= 'A') && ((c) <= 'Z')) || \
609: (((c) >= '0') && ((c) <= '9')) || \
610: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
611: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
612: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
613: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
614: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 615:
616: #define SKIP_EOL(p) \
617: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
618: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
619:
620: #define MOVETO_ENDTAG(p) \
1.39 daniel 621: while (IS_CHAR(*p) && (*(p) != '>')) (p)++
1.1 veillard 622:
623: #define MOVETO_STARTTAG(p) \
1.39 daniel 624: while (IS_CHAR(*p) && (*(p) != '<')) (p)++
1.1 veillard 625:
1.28 daniel 626: /************************************************************************
627: * *
628: * Commodity functions to handle CHARs *
629: * *
630: ************************************************************************/
631:
1.50 daniel 632: /**
633: * xmlStrndup:
634: * @cur: the input CHAR *
635: * @len: the len of @cur
636: *
637: * a strndup for array of CHAR's
638: * return values: a new CHAR * or NULL
1.1 veillard 639: */
640:
1.6 httpng 641: CHAR *xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 642: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
643:
644: if (ret == NULL) {
645: fprintf(stderr, "malloc of %d byte failed\n",
646: (len + 1) * sizeof(CHAR));
647: return(NULL);
648: }
649: memcpy(ret, cur, len * sizeof(CHAR));
650: ret[len] = 0;
651: return(ret);
652: }
653:
1.50 daniel 654: /**
655: * xmlStrdup:
656: * @cur: the input CHAR *
657: *
658: * a strdup for array of CHAR's
659: * return values: a new CHAR * or NULL
1.1 veillard 660: */
661:
1.6 httpng 662: CHAR *xmlStrdup(const CHAR *cur) {
663: const CHAR *p = cur;
1.1 veillard 664:
665: while (IS_CHAR(*p)) p++;
666: return(xmlStrndup(cur, p - cur));
667: }
668:
1.50 daniel 669: /**
670: * xmlCharStrndup:
671: * @cur: the input char *
672: * @len: the len of @cur
673: *
674: * a strndup for char's to CHAR's
675: * return values: a new CHAR * or NULL
1.45 daniel 676: */
677:
678: CHAR *xmlCharStrndup(const char *cur, int len) {
679: int i;
680: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
681:
682: if (ret == NULL) {
683: fprintf(stderr, "malloc of %d byte failed\n",
684: (len + 1) * sizeof(CHAR));
685: return(NULL);
686: }
687: for (i = 0;i < len;i++)
688: ret[i] = (CHAR) cur[i];
689: ret[len] = 0;
690: return(ret);
691: }
692:
1.50 daniel 693: /**
694: * xmlCharStrdup:
695: * @cur: the input char *
696: * @len: the len of @cur
697: *
698: * a strdup for char's to CHAR's
699: * return values: a new CHAR * or NULL
1.45 daniel 700: */
701:
702: CHAR *xmlCharStrdup(const char *cur) {
703: const char *p = cur;
704:
705: while (*p != '\0') p++;
706: return(xmlCharStrndup(cur, p - cur));
707: }
708:
1.50 daniel 709: /**
710: * xmlStrcmp:
711: * @str1: the first CHAR *
712: * @str2: the second CHAR *
713: *
714: * a strcmp for CHAR's
715: * return values: the integer result of the comparison
1.14 veillard 716: */
717:
718: int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
719: register int tmp;
720:
721: do {
722: tmp = *str1++ - *str2++;
723: if (tmp != 0) return(tmp);
724: } while ((*str1 != 0) && (*str2 != 0));
725: return (*str1 - *str2);
726: }
727:
1.50 daniel 728: /**
729: * xmlStrncmp:
730: * @str1: the first CHAR *
731: * @str2: the second CHAR *
732: * @len: the max comparison length
733: *
734: * a strncmp for CHAR's
735: * return values: the integer result of the comparison
1.14 veillard 736: */
737:
738: int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
739: register int tmp;
740:
741: if (len <= 0) return(0);
742: do {
743: tmp = *str1++ - *str2++;
744: if (tmp != 0) return(tmp);
745: len--;
746: if (len <= 0) return(0);
747: } while ((*str1 != 0) && (*str2 != 0));
748: return (*str1 - *str2);
749: }
750:
1.50 daniel 751: /**
752: * xmlStrchr:
753: * @str: the CHAR * array
754: * @val: the CHAR to search
755: *
756: * a strchr for CHAR's
757: * return values: the CHAR * for the first occurence or NULL.
1.14 veillard 758: */
759:
760: CHAR *xmlStrchr(const CHAR *str, CHAR val) {
761: while (*str != 0) {
762: if (*str == val) return((CHAR *) str);
763: str++;
764: }
765: return(NULL);
766: }
1.28 daniel 767:
1.50 daniel 768: /**
769: * xmlStrlen:
770: * @str: the CHAR * array
771: *
772: * lenght of a CHAR's string
773: * return values: the number of CHAR contained in the ARRAY.
1.45 daniel 774: */
775:
776: int xmlStrlen(const CHAR *str) {
777: int len = 0;
778:
779: if (str == NULL) return(0);
780: while (*str != 0) {
781: str++;
782: len++;
783: }
784: return(len);
785: }
786:
1.50 daniel 787: /**
788: * xmlStrncat:
789: * @first: the original CHAR * array
790: * @add: the CHAR * array added
791: * @len: the length of @add
792: *
793: * a strncat for array of CHAR's
794: * return values: a new CHAR * containing the concatenated string.
1.45 daniel 795: */
796:
797: CHAR *xmlStrncat(CHAR *cur, const CHAR *add, int len) {
798: int size;
799: CHAR *ret;
800:
801: if ((add == NULL) || (len == 0))
802: return(cur);
803: if (cur == NULL)
804: return(xmlStrndup(add, len));
805:
806: size = xmlStrlen(cur);
807: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
808: if (ret == NULL) {
809: fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
810: (size + len + 1) * sizeof(CHAR));
811: return(cur);
812: }
813: memcpy(&ret[size], add, len * sizeof(CHAR));
814: ret[size + len] = 0;
815: return(ret);
816: }
817:
1.50 daniel 818: /**
819: * xmlStrcat:
820: * @first: the original CHAR * array
821: * @add: the CHAR * array added
822: *
823: * a strcat for array of CHAR's
824: * return values: a new CHAR * containing the concatenated string.
1.45 daniel 825: */
826:
827: CHAR *xmlStrcat(CHAR *cur, const CHAR *add) {
828: const CHAR *p = add;
829:
830: if (add == NULL) return(cur);
831: if (cur == NULL)
832: return(xmlStrdup(add));
833:
834: while (IS_CHAR(*p)) p++;
835: return(xmlStrncat(cur, add, p - add));
836: }
837:
838: /************************************************************************
839: * *
840: * Commodity functions, cleanup needed ? *
841: * *
842: ************************************************************************/
843:
1.50 daniel 844: /**
845: * areBlanks:
846: * @ctxt: an XML parser context
847: * @str: a CHAR *
848: * @len: the size of @str
849: *
1.45 daniel 850: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 851: *
852: * TODO: to be corrected accodingly to DTD information if available
853: * return values: 1 if ignorable 0 otherwise.
1.45 daniel 854: */
855:
856: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
857: int i;
858: xmlNodePtr lastChild;
859:
860: for (i = 0;i < len;i++)
861: if (!(IS_BLANK(str[i]))) return(0);
862:
863: if (CUR != '<') return(0);
864: lastChild = xmlGetLastChild(ctxt->node);
865: if (lastChild == NULL) {
866: if (ctxt->node->content != NULL) return(0);
867: } else if (xmlNodeIsText(lastChild))
868: return(0);
869: return(1);
870: }
871:
1.50 daniel 872: /**
873: * xmlHandleEntity:
874: * @ctxt: an XML parser context
875: * @entity: an XML entity pointer.
876: *
877: * Default handling of defined entities, when should we define a new input
1.45 daniel 878: * stream ? When do we just handle that as a set of chars ?
1.50 daniel 879: * TODO: we should call the SAX handler here and have it resolve the issue
1.45 daniel 880: */
881:
882: void xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
883: int len;
1.50 daniel 884: xmlParserInputPtr input;
1.45 daniel 885:
886: if (entity->content == NULL) {
887: xmlParserError(ctxt, "xmlHandleEntity %s: content == NULL\n",
888: entity->name);
889: return;
890: }
891: len = xmlStrlen(entity->content);
892: if (len <= 2) goto handle_as_char;
893:
894: /*
895: * Redefine its content as an input stream.
896: */
1.50 daniel 897: input = xmlNewEntityInputStream(ctxt, entity);
898: xmlPushInput(ctxt, input);
1.45 daniel 899: return;
900:
901: handle_as_char:
902: /*
903: * Just handle the content as a set of chars.
904: */
905: if (ctxt->sax != NULL)
906: ctxt->sax->characters(ctxt, entity->content, 0, len);
907:
908: }
909:
910: /*
911: * Forward definition for recusive behaviour.
912: */
913: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.50 daniel 914: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt);
915: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 916:
1.28 daniel 917: /************************************************************************
918: * *
919: * Extra stuff for namespace support *
920: * Relates to http://www.w3.org/TR/WD-xml-names *
921: * *
922: ************************************************************************/
923:
1.50 daniel 924: /**
925: * xmlNamespaceParseNCName:
926: * @ctxt: an XML parser context
927: *
928: * parse an XML namespace name.
1.28 daniel 929: *
930: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
931: *
932: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
933: * CombiningChar | Extender
1.50 daniel 934: * return values: the namespace name or NULL
1.28 daniel 935: */
936:
937: CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
938: const CHAR *q;
939: CHAR *ret = NULL;
940:
1.40 daniel 941: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
942: q = NEXT;
1.28 daniel 943:
1.40 daniel 944: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
945: (CUR == '.') || (CUR == '-') ||
946: (CUR == '_') ||
947: (IS_COMBINING(CUR)) ||
948: (IS_EXTENDER(CUR)))
949: NEXT;
1.28 daniel 950:
1.40 daniel 951: ret = xmlStrndup(q, CUR_PTR - q);
1.28 daniel 952:
953: return(ret);
954: }
955:
1.50 daniel 956: /**
957: * xmlNamespaceParseQName:
958: * @ctxt: an XML parser context
959: * @prefix: a CHAR **
960: *
961: * parse an XML qualified name
1.28 daniel 962: *
963: * [NS 5] QName ::= (Prefix ':')? LocalPart
964: *
965: * [NS 6] Prefix ::= NCName
966: *
967: * [NS 7] LocalPart ::= NCName
1.50 daniel 968: * return values: the function returns the local part, and prefix is updated
969: * to get the Prefix if any.
1.28 daniel 970: */
971:
972: CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
973: CHAR *ret = NULL;
974:
975: *prefix = NULL;
976: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 977: if (CUR == ':') {
1.28 daniel 978: *prefix = ret;
1.40 daniel 979: NEXT;
1.28 daniel 980: ret = xmlNamespaceParseNCName(ctxt);
981: }
982:
983: return(ret);
984: }
985:
1.50 daniel 986: /**
987: * xmlNamespaceParseNSDef:
988: * @ctxt: an XML parser context
989: *
990: * parse a namespace prefix declaration
1.28 daniel 991: *
992: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
993: *
994: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.50 daniel 995: * return values: the namespace name
1.28 daniel 996: */
997:
1.39 daniel 998: CHAR *xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 999: CHAR *name = NULL;
1000:
1.40 daniel 1001: if ((CUR == 'x') && (NXT(1) == 'm') &&
1002: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1003: (NXT(4) == 's')) {
1004: SKIP(5);
1005: if (CUR == ':') {
1006: NEXT;
1.28 daniel 1007: name = xmlNamespaceParseNCName(ctxt);
1008: }
1009: }
1.39 daniel 1010: return(name);
1.28 daniel 1011: }
1012:
1.50 daniel 1013: /**
1014: * xmlParseQuotedString:
1015: * @ctxt: an XML parser context
1016: *
1.45 daniel 1017: * [OLD] Parse and return a string between quotes or doublequotes
1.50 daniel 1018: * return values: the string parser or NULL.
1.45 daniel 1019: */
1020: CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1021: CHAR *ret = NULL;
1022: const CHAR *q;
1023:
1024: if (CUR == '"') {
1025: NEXT;
1026: q = CUR_PTR;
1027: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1028: if (CUR != '"')
1029: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1030: else {
1031: ret = xmlStrndup(q, CUR_PTR - q);
1032: NEXT;
1033: }
1034: } else if (CUR == '\''){
1035: NEXT;
1036: q = CUR_PTR;
1037: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1038: if (CUR != '\'')
1039: xmlParserError(ctxt, "String not closed\"%.50s\n", q);
1040: else {
1041: ret = xmlStrndup(q, CUR_PTR - q);
1042: NEXT;
1043: }
1044: }
1045: return(ret);
1046: }
1047:
1.50 daniel 1048: /**
1049: * xmlParseNamespace:
1050: * @ctxt: an XML parser context
1051: *
1.45 daniel 1052: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1053: *
1054: * This is what the older xml-name Working Draft specified, a bunch of
1055: * other stuff may still rely on it, so support is still here as
1056: * if ot was declared on the root of the Tree:-(
1057: */
1058:
1059: void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1060: CHAR *href = NULL;
1061: CHAR *prefix = NULL;
1062: int garbage = 0;
1063:
1064: /*
1065: * We just skipped "namespace" or "xml:namespace"
1066: */
1067: SKIP_BLANKS;
1068:
1069: while (IS_CHAR(CUR) && (CUR != '>')) {
1070: /*
1071: * We can have "ns" or "prefix" attributes
1072: * Old encoding as 'href' or 'AS' attributes is still supported
1073: */
1074: if ((CUR == 'n') && (NXT(1) == 's')) {
1075: garbage = 0;
1076: SKIP(2);
1077: SKIP_BLANKS;
1078:
1079: if (CUR != '=') continue;
1080: NEXT;
1081: SKIP_BLANKS;
1082:
1083: href = xmlParseQuotedString(ctxt);
1084: SKIP_BLANKS;
1085: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1086: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1087: garbage = 0;
1088: SKIP(4);
1089: SKIP_BLANKS;
1090:
1091: if (CUR != '=') continue;
1092: NEXT;
1093: SKIP_BLANKS;
1094:
1095: href = xmlParseQuotedString(ctxt);
1096: SKIP_BLANKS;
1097: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1098: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1099: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1100: garbage = 0;
1101: SKIP(6);
1102: SKIP_BLANKS;
1103:
1104: if (CUR != '=') continue;
1105: NEXT;
1106: SKIP_BLANKS;
1107:
1108: prefix = xmlParseQuotedString(ctxt);
1109: SKIP_BLANKS;
1110: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1111: garbage = 0;
1112: SKIP(2);
1113: SKIP_BLANKS;
1114:
1115: if (CUR != '=') continue;
1116: NEXT;
1117: SKIP_BLANKS;
1118:
1119: prefix = xmlParseQuotedString(ctxt);
1120: SKIP_BLANKS;
1121: } else if ((CUR == '?') && (NXT(1) == '>')) {
1122: garbage = 0;
1123: CUR_PTR ++;
1124: } else {
1125: /*
1126: * Found garbage when parsing the namespace
1127: */
1128: if (!garbage)
1129: xmlParserError(ctxt, "xmlParseNamespace found garbage\n");
1130: NEXT;
1131: }
1132: }
1133:
1134: MOVETO_ENDTAG(CUR_PTR);
1135: NEXT;
1136:
1137: /*
1138: * Register the DTD.
1139: */
1140: if (href != NULL)
1141: xmlNewGlobalNs(ctxt->doc, href, prefix);
1142:
1143: if (prefix != NULL) free(prefix);
1144: if (href != NULL) free(href);
1145: }
1146:
1.28 daniel 1147: /************************************************************************
1148: * *
1149: * The parser itself *
1150: * Relates to http://www.w3.org/TR/REC-xml *
1151: * *
1152: ************************************************************************/
1.14 veillard 1153:
1.50 daniel 1154: /**
1155: * xmlParseName:
1156: * @ctxt: an XML parser context
1157: *
1158: * parse an XML name.
1.22 daniel 1159: *
1160: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1161: * CombiningChar | Extender
1162: *
1163: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1164: *
1165: * [6] Names ::= Name (S Name)*
1.50 daniel 1166: * return values: the Name parsed or NULL
1.1 veillard 1167: */
1168:
1.16 daniel 1169: CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 1170: const CHAR *q;
1171: CHAR *ret = NULL;
1.1 veillard 1172:
1.40 daniel 1173: if (!IS_LETTER(CUR) && (CUR != '_') &&
1174: (CUR != ':')) return(NULL);
1175: q = NEXT;
1176:
1177: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1178: (CUR == '.') || (CUR == '-') ||
1179: (CUR == '_') || (CUR == ':') ||
1180: (IS_COMBINING(CUR)) ||
1181: (IS_EXTENDER(CUR)))
1182: NEXT;
1.22 daniel 1183:
1.40 daniel 1184: ret = xmlStrndup(q, CUR_PTR - q);
1.22 daniel 1185:
1186: return(ret);
1187: }
1188:
1.50 daniel 1189: /**
1190: * xmlParseNmtoken:
1191: * @ctxt: an XML parser context
1192: *
1193: * parse an XML Nmtoken.
1.22 daniel 1194: *
1195: * [7] Nmtoken ::= (NameChar)+
1196: *
1197: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.50 daniel 1198: * return values: the Nmtoken parsed or NULL
1.22 daniel 1199: */
1200:
1201: CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1202: const CHAR *q;
1203: CHAR *ret = NULL;
1204:
1.40 daniel 1205: q = NEXT;
1.22 daniel 1206:
1.40 daniel 1207: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1208: (CUR == '.') || (CUR == '-') ||
1209: (CUR == '_') || (CUR == ':') ||
1210: (IS_COMBINING(CUR)) ||
1211: (IS_EXTENDER(CUR)))
1212: NEXT;
1.3 veillard 1213:
1.40 daniel 1214: ret = xmlStrndup(q, CUR_PTR - q);
1.1 veillard 1215:
1.3 veillard 1216: return(ret);
1.1 veillard 1217: }
1218:
1.50 daniel 1219: /**
1220: * xmlParseEntityValue:
1221: * @ctxt: an XML parser context
1222: *
1223: * parse a value for ENTITY decl.
1.24 daniel 1224: *
1225: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1226: * "'" ([^%&'] | PEReference | Reference)* "'"
1.50 daniel 1227: * return values: the EntityValue parsed or NULL
1.24 daniel 1228: */
1229:
1230: CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1231: CHAR *ret = NULL, *cur;
1.24 daniel 1232: const CHAR *q;
1233:
1.40 daniel 1234: if (CUR == '"') {
1235: NEXT;
1.24 daniel 1236:
1.40 daniel 1237: q = CUR_PTR;
1238: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1239: if (CUR == '%') {
1.46 daniel 1240: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1241: cur = xmlParsePEReference(ctxt);
1.46 daniel 1242: ret = xmlStrcat(ret, cur);
1243: q = CUR_PTR;
1.40 daniel 1244: } else if (CUR == '&') {
1.46 daniel 1245: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1246: cur = xmlParseReference(ctxt);
1247: if (cur != NULL) {
1248: CHAR buf[2];
1249: buf[0] = '&';
1250: buf[1] = 0;
1251: ret = xmlStrncat(ret, buf, 1);
1252: ret = xmlStrcat(ret, cur);
1253: buf[0] = ';';
1254: buf[1] = 0;
1255: ret = xmlStrncat(ret, buf, 1);
1256: }
1.46 daniel 1257: q = CUR_PTR;
1.24 daniel 1258: } else
1.40 daniel 1259: NEXT;
1.24 daniel 1260: }
1.40 daniel 1261: if (!IS_CHAR(CUR)) {
1.31 daniel 1262: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 1263: } else {
1.46 daniel 1264: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1265: NEXT;
1.24 daniel 1266: }
1.40 daniel 1267: } else if (CUR == '\'') {
1268: NEXT;
1269: q = CUR_PTR;
1270: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1271: if (CUR == '%') {
1.46 daniel 1272: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1273: cur = xmlParsePEReference(ctxt);
1.46 daniel 1274: ret = xmlStrcat(ret, cur);
1275: q = CUR_PTR;
1.40 daniel 1276: } else if (CUR == '&') {
1.46 daniel 1277: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1278: cur = xmlParseReference(ctxt);
1279: if (cur != NULL) {
1280: CHAR buf[2];
1281: buf[0] = '&';
1282: buf[1] = 0;
1283: ret = xmlStrncat(ret, buf, 1);
1284: ret = xmlStrcat(ret, cur);
1285: buf[0] = ';';
1286: buf[1] = 0;
1287: ret = xmlStrncat(ret, buf, 1);
1288: }
1.46 daniel 1289: q = CUR_PTR;
1.24 daniel 1290: } else
1.40 daniel 1291: NEXT;
1.24 daniel 1292: }
1.40 daniel 1293: if (!IS_CHAR(CUR)) {
1.31 daniel 1294: xmlParserError(ctxt, "Unfinished EntityValue\n");
1.24 daniel 1295: } else {
1.46 daniel 1296: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1297: NEXT;
1.24 daniel 1298: }
1299: } else {
1.31 daniel 1300: xmlParserError(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.24 daniel 1301: }
1302:
1303: return(ret);
1304: }
1305:
1.50 daniel 1306: /**
1307: * xmlParseAttValue:
1308: * @ctxt: an XML parser context
1309: *
1310: * parse a value for an attribute
1.29 daniel 1311: *
1312: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1313: * "'" ([^<&'] | Reference)* "'"
1.50 daniel 1314: * return values: the AttValue parsed or NULL.
1.29 daniel 1315: */
1316:
1317: CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1318: CHAR *ret = NULL, *cur;
1.29 daniel 1319: const CHAR *q;
1320:
1.40 daniel 1321: if (CUR == '"') {
1322: NEXT;
1.29 daniel 1323:
1.40 daniel 1324: q = CUR_PTR;
1325: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1326: if (CUR == '&') {
1.46 daniel 1327: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1328: cur = xmlParseReference(ctxt);
1329: if (cur != NULL) {
1330: /*
1331: * Special case for '&', we don't want to
1332: * resolve it here since it will break later
1333: * when searching entities in the string.
1334: */
1335: if ((cur[0] == '&') && (cur[1] == 0)) {
1336: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1337: ret = xmlStrncat(ret, buf, 5);
1338: } else
1339: ret = xmlStrcat(ret, cur);
1340: free(cur);
1341: }
1.46 daniel 1342: q = CUR_PTR;
1.29 daniel 1343: } else
1.40 daniel 1344: NEXT;
1.50 daniel 1345: /*
1346: * Pop out finished entity references.
1347: */
1348: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1349: if (CUR_PTR != q)
1350: ret = xmlStrncat(ret, q, CUR_PTR - q);
1351: xmlPopInput(ctxt);
1352: q = CUR_PTR;
1353: }
1.29 daniel 1354: }
1.40 daniel 1355: if (!IS_CHAR(CUR)) {
1.31 daniel 1356: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 1357: } else {
1.46 daniel 1358: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1359: NEXT;
1.29 daniel 1360: }
1.40 daniel 1361: } else if (CUR == '\'') {
1362: NEXT;
1363: q = CUR_PTR;
1364: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1365: if (CUR == '&') {
1.46 daniel 1366: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1367: cur = xmlParseReference(ctxt);
1368: if (cur != NULL) {
1369: /*
1370: * Special case for '&', we don't want to
1371: * resolve it here since it will break later
1372: * when searching entities in the string.
1373: */
1374: if ((cur[0] == '&') && (cur[1] == 0)) {
1375: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1376: ret = xmlStrncat(ret, buf, 5);
1377: } else
1378: ret = xmlStrcat(ret, cur);
1379: free(cur);
1380: }
1.46 daniel 1381: q = CUR_PTR;
1.29 daniel 1382: } else
1.40 daniel 1383: NEXT;
1.50 daniel 1384: /*
1385: * Pop out finished entity references.
1386: */
1387: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1388: if (CUR_PTR != q)
1389: ret = xmlStrncat(ret, q, CUR_PTR - q);
1390: xmlPopInput(ctxt);
1391: q = CUR_PTR;
1392: }
1.29 daniel 1393: }
1.40 daniel 1394: if (!IS_CHAR(CUR)) {
1.31 daniel 1395: xmlParserError(ctxt, "Unfinished AttValue\n");
1.29 daniel 1396: } else {
1.46 daniel 1397: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1398: NEXT;
1.29 daniel 1399: }
1400: } else {
1.31 daniel 1401: xmlParserError(ctxt, "AttValue: \" or ' expected\n");
1.29 daniel 1402: }
1403:
1404: return(ret);
1405: }
1406:
1.50 daniel 1407: /**
1408: * xmlParseSystemLiteral:
1409: * @ctxt: an XML parser context
1410: *
1411: * parse an XML Literal
1.21 daniel 1412: *
1.22 daniel 1413: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.50 daniel 1414: * return values: the SystemLiteral parsed or NULL
1.21 daniel 1415: */
1416:
1417: CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1418: const CHAR *q;
1419: CHAR *ret = NULL;
1420:
1.40 daniel 1421: if (CUR == '"') {
1422: NEXT;
1423: q = CUR_PTR;
1424: while ((IS_CHAR(CUR)) && (CUR != '"'))
1425: NEXT;
1426: if (!IS_CHAR(CUR)) {
1.31 daniel 1427: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 1428: } else {
1.40 daniel 1429: ret = xmlStrndup(q, CUR_PTR - q);
1430: NEXT;
1.21 daniel 1431: }
1.40 daniel 1432: } else if (CUR == '\'') {
1433: NEXT;
1434: q = CUR_PTR;
1435: while ((IS_CHAR(CUR)) && (CUR != '\''))
1436: NEXT;
1437: if (!IS_CHAR(CUR)) {
1.31 daniel 1438: xmlParserError(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 1439: } else {
1.40 daniel 1440: ret = xmlStrndup(q, CUR_PTR - q);
1441: NEXT;
1.21 daniel 1442: }
1443: } else {
1.31 daniel 1444: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 1445: }
1446:
1447: return(ret);
1448: }
1449:
1.50 daniel 1450: /**
1451: * xmlParsePubidLiteral:
1452: * @ctxt: an XML parser context
1.21 daniel 1453: *
1.50 daniel 1454: * parse an XML public literal
1455: * return values: the PubidLiteral parsed or NULL.
1.21 daniel 1456: */
1457:
1458: CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1459: const CHAR *q;
1460: CHAR *ret = NULL;
1461: /*
1462: * Name ::= (Letter | '_') (NameChar)*
1463: */
1.40 daniel 1464: if (CUR == '"') {
1465: NEXT;
1466: q = CUR_PTR;
1467: while (IS_PUBIDCHAR(CUR)) NEXT;
1468: if (CUR != '"') {
1.31 daniel 1469: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 1470: } else {
1.40 daniel 1471: ret = xmlStrndup(q, CUR_PTR - q);
1472: NEXT;
1.21 daniel 1473: }
1.40 daniel 1474: } else if (CUR == '\'') {
1475: NEXT;
1476: q = CUR_PTR;
1477: while ((IS_LETTER(CUR)) && (CUR != '\''))
1478: NEXT;
1479: if (!IS_LETTER(CUR)) {
1.31 daniel 1480: xmlParserError(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 1481: } else {
1.40 daniel 1482: ret = xmlStrndup(q, CUR_PTR - q);
1483: NEXT;
1.21 daniel 1484: }
1485: } else {
1.31 daniel 1486: xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 1487: }
1488:
1489: return(ret);
1490: }
1491:
1.50 daniel 1492: /**
1493: * xmlParseCharData:
1494: * @ctxt: an XML parser context
1495: * @cdata: int indicating whether we are within a CDATA section
1496: *
1497: * parse a CharData section.
1498: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 1499: *
1500: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1.50 daniel 1501: * return values:
1.27 daniel 1502: */
1503:
1.45 daniel 1504: void xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.27 daniel 1505: const CHAR *q;
1506:
1.40 daniel 1507: q = CUR_PTR;
1508: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1509: (CUR != '&')) {
1510: NEXT;
1511: if ((cdata) && (CUR == ']') && (NXT(1) == ']') &&
1512: (NXT(2) == '>')) break;
1.27 daniel 1513: }
1.45 daniel 1514: if (q == CUR_PTR) return;
1515:
1516: /*
1517: * Ok the segment [q CUR_PTR] is to be consumed as chars.
1518: */
1519: if (ctxt->sax != NULL) {
1520: if (areBlanks(ctxt, q, CUR_PTR - q))
1521: ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
1522: else
1523: ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
1524: }
1.27 daniel 1525: }
1526:
1.50 daniel 1527: /**
1528: * xmlParseExternalID:
1529: * @ctxt: an XML parser context
1530: * @publicID: a CHAR** receiving PubidLiteral
1531: *
1532: * Parse an External ID
1.22 daniel 1533: *
1534: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1535: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.50 daniel 1536: * return values: the function returns SystemLiteral and in the second
1537: * case publicID receives PubidLiteral
1.22 daniel 1538: */
1539:
1.39 daniel 1540: CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
1541: CHAR *URI = NULL;
1.22 daniel 1542:
1.40 daniel 1543: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1544: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1545: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1546: SKIP(6);
1.42 daniel 1547: SKIP_BLANKS;
1.39 daniel 1548: URI = xmlParseSystemLiteral(ctxt);
1549: if (URI == NULL)
1.31 daniel 1550: xmlParserError(ctxt,
1.39 daniel 1551: "xmlParseExternalID: SYSTEM, no URI\n");
1.40 daniel 1552: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1553: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1554: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1555: SKIP(6);
1.42 daniel 1556: SKIP_BLANKS;
1.39 daniel 1557: *publicID = xmlParsePubidLiteral(ctxt);
1558: if (*publicID == NULL)
1.31 daniel 1559: xmlParserError(ctxt,
1.39 daniel 1560: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.42 daniel 1561: SKIP_BLANKS;
1.39 daniel 1562: URI = xmlParseSystemLiteral(ctxt);
1563: if (URI == NULL)
1.31 daniel 1564: xmlParserError(ctxt,
1.39 daniel 1565: "xmlParseExternalID: PUBLIC, no URI\n");
1.22 daniel 1566: }
1.39 daniel 1567: return(URI);
1.22 daniel 1568: }
1569:
1.50 daniel 1570: /**
1571: * xmlParseComment:
1572: * @create: should we create a node
1573: *
1.3 veillard 1574: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1575: * This may or may not create a node (depending on the context)
1.38 daniel 1576: * The spec says that "For compatibility, the string "--" (double-hyphen)
1577: * must not occur within comments. "
1.22 daniel 1578: *
1579: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.50 daniel 1580: *
1581: * TODO: this should call a SAX function which will handle (or not) the
1582: * creation of the comment !
1583: * return values:
1.3 veillard 1584: */
1.31 daniel 1585: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1586: xmlNodePtr ret = NULL;
1.17 daniel 1587: const CHAR *q, *start;
1588: const CHAR *r;
1.39 daniel 1589: CHAR *val;
1.3 veillard 1590:
1591: /*
1.22 daniel 1592: * Check that there is a comment right here.
1.3 veillard 1593: */
1.40 daniel 1594: if ((CUR != '<') || (NXT(1) != '!') ||
1595: (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1.3 veillard 1596:
1.40 daniel 1597: SKIP(4);
1598: start = q = CUR_PTR;
1599: NEXT;
1600: r = CUR_PTR;
1601: NEXT;
1602: while (IS_CHAR(CUR) &&
1603: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 1604: (*r != '-') || (*q != '-'))) {
1.38 daniel 1605: if ((*r == '-') && (*q == '-'))
1606: xmlParserError(ctxt,
1607: "Comment must not contain '--' (double-hyphen)`\n");
1.40 daniel 1608: NEXT;r++;q++;
1.3 veillard 1609: }
1.40 daniel 1610: if (!IS_CHAR(CUR)) {
1.31 daniel 1611: xmlParserError(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.3 veillard 1612: } else {
1.40 daniel 1613: NEXT;
1.31 daniel 1614: if (create) {
1.39 daniel 1615: val = xmlStrndup(start, q - start);
1.50 daniel 1616: ret = xmlNewDocComment(ctxt->doc, val);
1.39 daniel 1617: free(val);
1.31 daniel 1618: }
1.3 veillard 1619: }
1.39 daniel 1620: return(ret);
1.3 veillard 1621: }
1622:
1.50 daniel 1623: /**
1624: * xmlParsePITarget:
1625: * @ctxt: an XML parser context
1626: *
1627: * parse the name of a PI
1.22 daniel 1628: *
1629: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.50 daniel 1630: * return values: the PITarget name or NULL
1.22 daniel 1631: */
1632:
1633: CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1634: CHAR *name;
1635:
1636: name = xmlParseName(ctxt);
1637: if ((name != NULL) && (name[3] == 0) &&
1638: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1639: ((name[1] == 'm') || (name[1] == 'M')) &&
1640: ((name[2] == 'l') || (name[2] == 'L'))) {
1641: xmlParserError(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1642: return(NULL);
1643: }
1644: return(name);
1645: }
1646:
1.50 daniel 1647: /**
1648: * xmlParsePI:
1649: * @ctxt: an XML parser context
1650: *
1651: * parse an XML Processing Instruction.
1.22 daniel 1652: *
1653: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.50 daniel 1654: * return values: the PI name or NULL
1.3 veillard 1655: */
1656:
1.16 daniel 1657: void xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1658: CHAR *target;
1659:
1.40 daniel 1660: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 1661: /*
1662: * this is a Processing Instruction.
1663: */
1.40 daniel 1664: SKIP(2);
1.3 veillard 1665:
1666: /*
1.22 daniel 1667: * Parse the target name and check for special support like
1668: * namespace.
1669: *
1670: * TODO : PI handling should be dynamically redefinable using an
1671: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1672: */
1.22 daniel 1673: target = xmlParsePITarget(ctxt);
1674: if (target != NULL) {
1675: /*
1.44 daniel 1676: * Support for the old Processing Instruction related to namespace.
1.22 daniel 1677: */
1678: if ((target[0] == 'n') && (target[1] == 'a') &&
1679: (target[2] == 'm') && (target[3] == 'e') &&
1680: (target[4] == 's') && (target[5] == 'p') &&
1681: (target[6] == 'a') && (target[7] == 'c') &&
1682: (target[8] == 'e')) {
1683: xmlParseNamespace(ctxt);
1684: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1685: (target[2] == 'l') && (target[3] == ':') &&
1686: (target[4] == 'n') && (target[5] == 'a') &&
1687: (target[6] == 'm') && (target[7] == 'e') &&
1688: (target[8] == 's') && (target[9] == 'p') &&
1689: (target[10] == 'a') && (target[11] == 'c') &&
1690: (target[12] == 'e')) {
1691: xmlParseNamespace(ctxt);
1692: } else {
1.44 daniel 1693: const CHAR *q = CUR_PTR;
1694:
1.40 daniel 1695: while (IS_CHAR(CUR) &&
1696: ((CUR != '?') || (NXT(1) != '>')))
1697: NEXT;
1698: if (!IS_CHAR(CUR)) {
1.31 daniel 1699: xmlParserError(ctxt, "xmlParsePI: PI %s never end ...\n",
1700: target);
1.44 daniel 1701: } else {
1702: CHAR *data;
1703:
1704: data = xmlStrndup(CUR_PTR, CUR_PTR - q);
1.40 daniel 1705: SKIP(2);
1.44 daniel 1706:
1707: /*
1708: * SAX: PI detected.
1709: */
1710: if (ctxt->sax)
1711: ctxt->sax->processingInstruction(ctxt, target, data);
1712: /*
1713: * Unknown PI, ignore it !
1714: */
1715: else
1716: xmlParserWarning(ctxt,
1717: "xmlParsePI : skipping unknown PI %s\n",
1718: target);
1719: free(data);
1720: }
1.22 daniel 1721: }
1.39 daniel 1722: free(target);
1.3 veillard 1723: } else {
1.31 daniel 1724: xmlParserError(ctxt, "xmlParsePI : no target name\n");
1.22 daniel 1725: /********* Should we try to complete parsing the PI ???
1.40 daniel 1726: while (IS_CHAR(CUR) &&
1727: (CUR != '?') && (CUR != '>'))
1728: NEXT;
1729: if (!IS_CHAR(CUR)) {
1.22 daniel 1730: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1731: target);
1732: }
1733: ********************************************************/
1734: }
1735: }
1736: }
1737:
1.50 daniel 1738: /**
1739: * xmlParseNotationDecl:
1740: * @ctxt: an XML parser context
1741: *
1742: * parse a notation declaration
1.22 daniel 1743: *
1744: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1745: *
1746: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1747: *
1748: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1749: * 'PUBLIC' S PubidLiteral S SystemLiteral
1750: *
1751: * Hence there is actually 3 choices:
1752: * 'PUBLIC' S PubidLiteral
1753: * 'PUBLIC' S PubidLiteral S SystemLiteral
1754: * and 'SYSTEM' S SystemLiteral
1.50 daniel 1755: *
1756: * TODO: no handling of the values parsed !
1.22 daniel 1757: */
1758:
1759: void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1760: CHAR *name;
1761:
1.40 daniel 1762: if ((CUR == '<') && (NXT(1) == '!') &&
1763: (NXT(2) == 'N') && (NXT(3) == 'O') &&
1764: (NXT(4) == 'T') && (NXT(5) == 'A') &&
1765: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1766: (NXT(8) == 'O') && (NXT(9) == 'N') &&
1767: (IS_BLANK(NXT(10)))) {
1768: SKIP(10);
1.42 daniel 1769: SKIP_BLANKS;
1.22 daniel 1770:
1771: name = xmlParseName(ctxt);
1772: if (name == NULL) {
1.31 daniel 1773: xmlParserError(ctxt,
1774: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1775: return;
1776: }
1.42 daniel 1777: SKIP_BLANKS;
1.22 daniel 1778: /*
1.31 daniel 1779: * TODO !!!
1.22 daniel 1780: */
1.40 daniel 1781: while ((IS_CHAR(CUR)) && (CUR != '>'))
1782: NEXT;
1.22 daniel 1783: free(name);
1784: }
1785: }
1786:
1.50 daniel 1787: /**
1788: * xmlParseEntityDecl:
1789: * @ctxt: an XML parser context
1790: *
1791: * parse <!ENTITY declarations
1.22 daniel 1792: *
1793: * [70] EntityDecl ::= GEDecl | PEDecl
1794: *
1795: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1796: *
1797: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1798: *
1799: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1800: *
1801: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1802: *
1803: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1804: */
1805:
1806: void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 1807: CHAR *name = NULL;
1.24 daniel 1808: CHAR *value = NULL;
1.39 daniel 1809: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 1810: CHAR *ndata = NULL;
1.39 daniel 1811: int isParameter = 0;
1.22 daniel 1812:
1.40 daniel 1813: if ((CUR == '<') && (NXT(1) == '!') &&
1814: (NXT(2) == 'E') && (NXT(3) == 'N') &&
1815: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1816: (NXT(6) == 'T') && (NXT(7) == 'Y') &&
1817: (IS_BLANK(NXT(8)))) {
1818: SKIP(8);
1.42 daniel 1819: SKIP_BLANKS;
1.40 daniel 1820:
1821: if (CUR == '%') {
1822: NEXT;
1.42 daniel 1823: SKIP_BLANKS;
1.39 daniel 1824: isParameter = 1;
1.22 daniel 1825: }
1826:
1827: name = xmlParseName(ctxt);
1.24 daniel 1828: if (name == NULL) {
1.31 daniel 1829: xmlParserError(ctxt, "xmlParseEntityDecl: no name\n");
1.24 daniel 1830: return;
1831: }
1.42 daniel 1832: SKIP_BLANKS;
1.24 daniel 1833:
1.22 daniel 1834: /*
1.24 daniel 1835: * TODO handle the various case of definitions...
1.22 daniel 1836: */
1.39 daniel 1837: if (isParameter) {
1.40 daniel 1838: if ((CUR == '"') || (CUR == '\''))
1.24 daniel 1839: value = xmlParseEntityValue(ctxt);
1.39 daniel 1840: if (value) {
1841: xmlAddDocEntity(ctxt->doc, name,
1842: XML_INTERNAL_PARAMETER_ENTITY,
1843: NULL, NULL, value);
1844: }
1.24 daniel 1845: else {
1.39 daniel 1846: URI = xmlParseExternalID(ctxt, &literal);
1847: if (URI) {
1848: xmlAddDocEntity(ctxt->doc, name,
1849: XML_EXTERNAL_PARAMETER_ENTITY,
1850: literal, URI, NULL);
1851: }
1.24 daniel 1852: }
1853: } else {
1.40 daniel 1854: if ((CUR == '"') || (CUR == '\'')) {
1.24 daniel 1855: value = xmlParseEntityValue(ctxt);
1.39 daniel 1856: xmlAddDocEntity(ctxt->doc, name,
1857: XML_INTERNAL_GENERAL_ENTITY,
1858: NULL, NULL, value);
1859: } else {
1860: URI = xmlParseExternalID(ctxt, &literal);
1.42 daniel 1861: SKIP_BLANKS;
1.40 daniel 1862: if ((CUR == 'N') && (NXT(1) == 'D') &&
1863: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1864: (NXT(4) == 'A')) {
1865: SKIP(5);
1.42 daniel 1866: SKIP_BLANKS;
1.24 daniel 1867: ndata = xmlParseName(ctxt);
1.39 daniel 1868: xmlAddDocEntity(ctxt->doc, name,
1869: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
1870: literal, URI, ndata);
1871: } else {
1872: xmlAddDocEntity(ctxt->doc, name,
1873: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
1874: literal, URI, NULL);
1.24 daniel 1875: }
1876: }
1877: }
1.42 daniel 1878: SKIP_BLANKS;
1.40 daniel 1879: if (CUR != '>') {
1.31 daniel 1880: xmlParserError(ctxt,
1881: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.24 daniel 1882: } else
1.40 daniel 1883: NEXT;
1.39 daniel 1884: if (name != NULL) free(name);
1885: if (value != NULL) free(value);
1886: if (URI != NULL) free(URI);
1887: if (literal != NULL) free(literal);
1888: if (ndata != NULL) free(ndata);
1.22 daniel 1889: }
1890: }
1891:
1.50 daniel 1892: /**
1893: * xmlParseEnumeratedType:
1894: * @ctxt: an XML parser context
1895: * @name: ???
1896: * @:
1897: *
1898: * parse and Enumerated attribute type.
1.22 daniel 1899: *
1900: * [57] EnumeratedType ::= NotationType | Enumeration
1901: *
1902: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1903: *
1904: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1.50 daniel 1905: *
1906: * TODO: not implemented !!!
1.22 daniel 1907: */
1908:
1909: void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1910: /*
1911: * TODO !!!
1912: */
1.40 daniel 1913: while ((IS_CHAR(CUR)) && (CUR != '>'))
1914: NEXT;
1.22 daniel 1915: }
1916:
1.50 daniel 1917: /**
1918: * xmlParseAttributeType:
1919: * @ctxt: an XML parser context
1920: * @name: ???
1921: *
1922: * : parse the Attribute list def for an element
1.22 daniel 1923: *
1924: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1925: *
1926: * [55] StringType ::= 'CDATA'
1927: *
1928: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1929: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 1930: *
1931: * TODO: not implemented !!!
1.22 daniel 1932: */
1933: void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.45 daniel 1934: /* TODO !!! */
1.40 daniel 1935: if ((CUR == 'C') && (NXT(1) == 'D') &&
1936: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1937: (NXT(4) == 'A')) {
1938: SKIP(5);
1939: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
1940: SKIP(2);
1941: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1942: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1943: (NXT(4) == 'F')) {
1944: SKIP(5);
1945: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
1946: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1947: (NXT(4) == 'F') && (NXT(5) == 'S')) {
1948: SKIP(6);
1949: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1950: (NXT(2) == 'T') && (NXT(3) == 'I') &&
1951: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
1952: SKIP(6);
1953: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
1954: (NXT(2) == 'T') && (NXT(3) == 'I') &&
1955: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1956: (NXT(6) == 'E') && (NXT(7) == 'S')) {
1957: SKIP(8);
1958: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1959: (NXT(2) == 'T') && (NXT(3) == 'O') &&
1960: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1961: (NXT(6) == 'N')) {
1962: SKIP(7);
1963: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
1964: (NXT(2) == 'T') && (NXT(3) == 'O') &&
1965: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1966: (NXT(6) == 'N') && (NXT(7) == 'S')) {
1.22 daniel 1967: } else {
1968: xmlParseEnumeratedType(ctxt, name);
1969: }
1970: }
1971:
1.50 daniel 1972: /**
1973: * xmlParseAttributeListDecl:
1974: * @ctxt: an XML parser context
1975: *
1976: * : parse the Attribute list def for an element
1.22 daniel 1977: *
1978: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1979: *
1980: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 1981: *
1982: * TODO: not implemented !!!
1.22 daniel 1983: */
1984: void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1985: CHAR *name;
1986:
1.45 daniel 1987: /* TODO !!! */
1.40 daniel 1988: if ((CUR == '<') && (NXT(1) == '!') &&
1989: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1990: (NXT(4) == 'T') && (NXT(5) == 'L') &&
1991: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1992: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
1993: SKIP(9);
1.42 daniel 1994: SKIP_BLANKS;
1.22 daniel 1995: name = xmlParseName(ctxt);
1996: if (name == NULL) {
1.31 daniel 1997: xmlParserError(ctxt,
1998: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1999: return;
2000: }
1.42 daniel 2001: SKIP_BLANKS;
1.40 daniel 2002: while (CUR != '>') {
2003: const CHAR *check = CUR_PTR;
1.22 daniel 2004:
2005: xmlParseAttributeType(ctxt, name);
1.42 daniel 2006: SKIP_BLANKS;
1.40 daniel 2007: if (check == CUR_PTR) {
1.31 daniel 2008: xmlParserError(ctxt,
2009: "xmlParseAttributeListDecl: detected error\n");
1.22 daniel 2010: break;
2011: }
2012: }
1.40 daniel 2013: if (CUR == '>')
2014: NEXT;
1.22 daniel 2015:
2016: free(name);
2017: }
2018: }
2019:
1.50 daniel 2020: /**
2021: * xmlParseElementContentDecl:
2022: * @ctxt: an XML parser context
2023: * @name: ???
2024: *
2025: * parse the declaration for an Element content
2026: * either Mixed or Children, the cases EMPTY and ANY being handled
1.22 daniel 2027: *
2028: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
2029: *
2030: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
2031: *
2032: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
2033: *
2034: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
2035: *
2036: * or
2037: *
2038: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
2039: * '(' S? '#PCDATA' S? ')'
1.50 daniel 2040: *
2041: * TODO: not implemented !!!
1.22 daniel 2042: */
2043:
2044: void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
2045: /*
2046: * TODO This has to be parsed correctly, currently we just skip until
2047: * we reach the first '>'.
1.31 daniel 2048: * !!!
1.22 daniel 2049: */
1.40 daniel 2050: while ((IS_CHAR(CUR)) && (CUR != '>'))
2051: NEXT;
1.22 daniel 2052: }
2053:
1.50 daniel 2054: /**
2055: * xmlParseElementDecl:
2056: * @ctxt: an XML parser context
2057: *
2058: * parse an Element declaration.
1.22 daniel 2059: *
2060: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
2061: *
2062: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
2063: *
2064: * TODO There is a check [ VC: Unique Element Type Declaration ]
2065: */
2066: void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
2067: CHAR *name;
2068:
1.40 daniel 2069: if ((CUR == '<') && (NXT(1) == '!') &&
2070: (NXT(2) == 'E') && (NXT(3) == 'L') &&
2071: (NXT(4) == 'E') && (NXT(5) == 'M') &&
2072: (NXT(6) == 'E') && (NXT(7) == 'N') &&
2073: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
2074: SKIP(9);
1.42 daniel 2075: SKIP_BLANKS;
1.22 daniel 2076: name = xmlParseName(ctxt);
2077: if (name == NULL) {
1.31 daniel 2078: xmlParserError(ctxt, "xmlParseElementDecl: no name for Element\n");
1.22 daniel 2079: return;
2080: }
1.42 daniel 2081: SKIP_BLANKS;
1.40 daniel 2082: if ((CUR == 'E') && (NXT(1) == 'M') &&
2083: (NXT(2) == 'P') && (NXT(3) == 'T') &&
2084: (NXT(4) == 'Y')) {
2085: SKIP(5);
1.22 daniel 2086: /*
2087: * Element must always be empty.
2088: */
1.40 daniel 2089: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
2090: (NXT(2) == 'Y')) {
2091: SKIP(3);
1.22 daniel 2092: /*
2093: * Element is a generic container.
2094: */
2095: } else {
2096: xmlParseElementContentDecl(ctxt, name);
2097: }
1.42 daniel 2098: SKIP_BLANKS;
1.40 daniel 2099: if (CUR != '>') {
1.31 daniel 2100: xmlParserError(ctxt,
2101: "xmlParseElementDecl: expected '>' at the end\n");
1.22 daniel 2102: } else
1.40 daniel 2103: NEXT;
1.22 daniel 2104: }
2105: }
2106:
1.50 daniel 2107: /**
2108: * xmlParseMarkupDecl:
2109: * @ctxt: an XML parser context
2110: *
2111: * parse Markup declarations
1.22 daniel 2112: *
2113: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
2114: * NotationDecl | PI | Comment
2115: *
2116: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
2117: */
2118: void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
2119: xmlParseElementDecl(ctxt);
2120: xmlParseAttributeListDecl(ctxt);
2121: xmlParseEntityDecl(ctxt);
2122: xmlParseNotationDecl(ctxt);
2123: xmlParsePI(ctxt);
1.31 daniel 2124: xmlParseComment(ctxt, 0);
1.22 daniel 2125: }
2126:
1.50 daniel 2127: /**
2128: * xmlParseCharRef:
2129: * @ctxt: an XML parser context
2130: *
2131: * parse Reference declarations
1.24 daniel 2132: *
2133: * [66] CharRef ::= '&#' [0-9]+ ';' |
2134: * '&#x' [0-9a-fA-F]+ ';'
1.50 daniel 2135: * return values: the value parsed
1.24 daniel 2136: */
1.50 daniel 2137: CHAR *xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 2138: int val = 0;
1.44 daniel 2139: CHAR buf[2];
1.24 daniel 2140:
1.40 daniel 2141: if ((CUR == '&') && (NXT(1) == '#') &&
2142: (NXT(2) == 'x')) {
2143: SKIP(3);
2144: while (CUR != ';') {
2145: if ((CUR >= '0') && (CUR <= '9'))
2146: val = val * 16 + (CUR - '0');
2147: else if ((CUR >= 'a') && (CUR <= 'f'))
2148: val = val * 16 + (CUR - 'a') + 10;
2149: else if ((CUR >= 'A') && (CUR <= 'F'))
2150: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 2151: else {
1.31 daniel 2152: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 2153: val = 0;
1.24 daniel 2154: break;
2155: }
1.47 daniel 2156: NEXT;
1.24 daniel 2157: }
1.50 daniel 2158: if (CUR != ';')
1.40 daniel 2159: NEXT;
2160: } else if ((CUR == '&') && (NXT(1) == '#')) {
2161: SKIP(2);
2162: while (CUR != ';') {
2163: if ((CUR >= '0') && (CUR <= '9'))
2164: val = val * 16 + (CUR - '0');
1.24 daniel 2165: else {
1.31 daniel 2166: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 2167: val = 0;
1.24 daniel 2168: break;
2169: }
1.47 daniel 2170: NEXT;
1.24 daniel 2171: }
1.50 daniel 2172: if (CUR != ';')
1.40 daniel 2173: NEXT;
1.24 daniel 2174: } else {
1.31 daniel 2175: xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
1.24 daniel 2176: }
1.29 daniel 2177: /*
2178: * Check the value IS_CHAR ...
2179: */
1.44 daniel 2180: if (IS_CHAR(val)) {
2181: buf[0] = (CHAR) val;
2182: buf[1] = 0;
1.50 daniel 2183: return(xmlStrndup(buf, 1));
1.44 daniel 2184: } else {
1.39 daniel 2185: xmlParserError(ctxt, "xmlParseCharRef: invalid value");
1.29 daniel 2186: }
1.46 daniel 2187: return(NULL);
1.24 daniel 2188: }
2189:
1.50 daniel 2190: /**
2191: * xmlParseEntityRef:
2192: * @ctxt: an XML parser context
2193: *
2194: * parse ENTITY references declarations
1.24 daniel 2195: *
2196: * [68] EntityRef ::= '&' Name ';'
1.52 daniel 2197: * return values: the entity ref string or NULL if directly as input stream.
1.24 daniel 2198: */
1.50 daniel 2199: CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.46 daniel 2200: CHAR *ret = NULL;
1.50 daniel 2201: const CHAR *q;
1.24 daniel 2202: CHAR *name;
1.50 daniel 2203: xmlParserInputPtr input = NULL;
1.24 daniel 2204:
1.50 daniel 2205: q = CUR_PTR;
1.40 daniel 2206: if (CUR == '&') {
2207: NEXT;
1.24 daniel 2208: name = xmlParseName(ctxt);
2209: if (name == NULL) {
1.31 daniel 2210: xmlParserError(ctxt, "xmlParseEntityRef: no name\n");
1.24 daniel 2211: } else {
1.40 daniel 2212: if (CUR == ';') {
2213: NEXT;
1.24 daniel 2214: /*
1.52 daniel 2215: * We parsed the entity reference correctly, call SAX
2216: * interface for the proper behaviour:
2217: * - get a new input stream
2218: * - or keep the reference inline
1.24 daniel 2219: */
1.52 daniel 2220: if (ctxt->sax)
2221: input = ctxt->sax->resolveEntity(ctxt, NULL, name);
2222: if (input != NULL)
2223: xmlPushInput(ctxt, input);
2224: else {
2225: ret = xmlStrndup(q, CUR_PTR - q);
2226: }
1.24 daniel 2227: } else {
1.46 daniel 2228: char cst[2] = { '&', 0 };
2229:
1.31 daniel 2230: xmlParserError(ctxt, "xmlParseEntityRef: expecting ';'\n");
1.46 daniel 2231: ret = xmlStrndup(cst, 1);
2232: ret = xmlStrcat(ret, name);
1.24 daniel 2233: }
1.45 daniel 2234: free(name);
1.24 daniel 2235: }
2236: }
1.46 daniel 2237: return(ret);
1.24 daniel 2238: }
2239:
1.50 daniel 2240: /**
2241: * xmlParseReference:
2242: * @ctxt: an XML parser context
2243: *
2244: * parse Reference declarations
1.24 daniel 2245: *
2246: * [67] Reference ::= EntityRef | CharRef
1.52 daniel 2247: * return values: the entity string or NULL if handled directly by pushing
2248: * the entity value as the input.
1.24 daniel 2249: */
1.50 daniel 2250: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt) {
1.44 daniel 2251: if ((CUR == '&') && (NXT(1) == '#')) {
1.50 daniel 2252: return(xmlParseCharRef(ctxt));
1.44 daniel 2253: } else if (CUR == '&') {
1.50 daniel 2254: return(xmlParseEntityRef(ctxt));
1.24 daniel 2255: }
1.46 daniel 2256: return(NULL);
1.24 daniel 2257: }
2258:
1.50 daniel 2259: /**
2260: * xmlParsePEReference:
2261: * @ctxt: an XML parser context
2262: *
2263: * parse PEReference declarations
1.22 daniel 2264: *
2265: * [69] PEReference ::= '%' Name ';'
1.50 daniel 2266: * return values: the entity content or NULL if handled directly.
1.22 daniel 2267: */
1.50 daniel 2268: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.46 daniel 2269: CHAR *ret = NULL;
1.22 daniel 2270: CHAR *name;
1.45 daniel 2271: xmlEntityPtr entity;
1.50 daniel 2272: xmlParserInputPtr input;
1.22 daniel 2273:
1.40 daniel 2274: if (CUR == '%') {
2275: NEXT;
1.22 daniel 2276: name = xmlParseName(ctxt);
2277: if (name == NULL) {
1.31 daniel 2278: xmlParserError(ctxt, "xmlParsePEReference: no name\n");
1.22 daniel 2279: } else {
1.40 daniel 2280: if (CUR == ';') {
2281: NEXT;
1.45 daniel 2282: entity = xmlGetDtdEntity(ctxt->doc, name);
2283: if (entity == NULL) {
2284: xmlParserWarning(ctxt,
2285: "xmlParsePEReference: %%%s; not found\n");
1.50 daniel 2286: } else {
2287: input = xmlNewEntityInputStream(ctxt, entity);
2288: xmlPushInput(ctxt, input);
1.45 daniel 2289: }
1.22 daniel 2290: } else {
1.50 daniel 2291: char cst[2] = { '%', 0 };
1.46 daniel 2292:
1.31 daniel 2293: xmlParserError(ctxt, "xmlParsePEReference: expecting ';'\n");
1.46 daniel 2294: ret = xmlStrndup(cst, 1);
2295: ret = xmlStrcat(ret, name);
1.22 daniel 2296: }
1.45 daniel 2297: free(name);
1.3 veillard 2298: }
2299: }
1.46 daniel 2300: return(ret);
1.3 veillard 2301: }
2302:
1.50 daniel 2303: /**
2304: * xmlParseDocTypeDecl :
2305: * @ctxt: an XML parser context
2306: *
2307: * parse a DOCTYPE declaration
1.21 daniel 2308: *
1.22 daniel 2309: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
2310: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 2311: */
2312:
2313: void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.36 daniel 2314: xmlDtdPtr dtd;
1.21 daniel 2315: CHAR *name;
2316: CHAR *ExternalID = NULL;
1.39 daniel 2317: CHAR *URI = NULL;
1.21 daniel 2318:
2319: /*
2320: * We know that '<!DOCTYPE' has been detected.
2321: */
1.40 daniel 2322: SKIP(9);
1.21 daniel 2323:
1.42 daniel 2324: SKIP_BLANKS;
1.21 daniel 2325:
2326: /*
2327: * Parse the DOCTYPE name.
2328: */
2329: name = xmlParseName(ctxt);
2330: if (name == NULL) {
1.31 daniel 2331: xmlParserError(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.21 daniel 2332: }
2333:
1.42 daniel 2334: SKIP_BLANKS;
1.21 daniel 2335:
2336: /*
1.22 daniel 2337: * Check for SystemID and ExternalID
2338: */
1.39 daniel 2339: URI = xmlParseExternalID(ctxt, &ExternalID);
1.42 daniel 2340: SKIP_BLANKS;
1.36 daniel 2341:
1.39 daniel 2342: dtd = xmlNewDtd(ctxt->doc, name, ExternalID, URI);
1.22 daniel 2343:
2344: /*
2345: * Is there any DTD definition ?
2346: */
1.40 daniel 2347: if (CUR == '[') {
2348: NEXT;
1.22 daniel 2349: /*
2350: * Parse the succession of Markup declarations and
2351: * PEReferences.
2352: * Subsequence (markupdecl | PEReference | S)*
2353: */
1.40 daniel 2354: while (CUR != ']') {
2355: const CHAR *check = CUR_PTR;
1.22 daniel 2356:
1.42 daniel 2357: SKIP_BLANKS;
1.22 daniel 2358: xmlParseMarkupDecl(ctxt);
1.50 daniel 2359: xmlParsePEReference(ctxt);
1.22 daniel 2360:
1.40 daniel 2361: if (CUR_PTR == check) {
1.31 daniel 2362: xmlParserError(ctxt,
2363: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.22 daniel 2364: break;
2365: }
2366: }
1.40 daniel 2367: if (CUR == ']') NEXT;
1.22 daniel 2368: }
2369:
2370: /*
2371: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 2372: */
1.40 daniel 2373: if (CUR != '>') {
1.31 daniel 2374: xmlParserError(ctxt, "DOCTYPE unproperly terminated\n");
1.22 daniel 2375: /* We shouldn't try to resynchronize ... */
1.21 daniel 2376: }
1.40 daniel 2377: NEXT;
1.22 daniel 2378:
2379: /*
2380: * Cleanup, since we don't use all those identifiers
2381: * TODO : the DOCTYPE if available should be stored !
2382: */
1.39 daniel 2383: if (URI != NULL) free(URI);
1.22 daniel 2384: if (ExternalID != NULL) free(ExternalID);
2385: if (name != NULL) free(name);
1.21 daniel 2386: }
2387:
1.50 daniel 2388: /**
2389: * xmlParseAttribute:
2390: * @ctxt: an XML parser context
2391: * @node: the node carrying the attribute
2392: *
2393: * parse an attribute
1.3 veillard 2394: *
1.22 daniel 2395: * [41] Attribute ::= Name Eq AttValue
2396: *
2397: * [25] Eq ::= S? '=' S?
2398: *
1.29 daniel 2399: * With namespace:
2400: *
2401: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 2402: *
2403: * Also the case QName == xmlns:??? is handled independently as a namespace
2404: * definition.
1.3 veillard 2405: */
2406:
1.52 daniel 2407: xmlAttrPtr xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
2408: CHAR *name;
1.29 daniel 2409: CHAR *ns;
1.52 daniel 2410: CHAR *value = NULL;
2411: xmlAttrPtr ret;
1.3 veillard 2412:
1.29 daniel 2413: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 2414: if (name == NULL) {
1.31 daniel 2415: xmlParserError(ctxt, "error parsing attribute name\n");
1.52 daniel 2416: return(NULL);
1.3 veillard 2417: }
2418:
2419: /*
1.29 daniel 2420: * read the value
1.3 veillard 2421: */
1.42 daniel 2422: SKIP_BLANKS;
1.40 daniel 2423: if (CUR == '=') {
2424: NEXT;
1.42 daniel 2425: SKIP_BLANKS;
1.29 daniel 2426: value = xmlParseAttValue(ctxt);
2427: } else {
1.31 daniel 2428: xmlParserError(ctxt, "Specification mandate value for attribute %s\n",
2429: name);
1.3 veillard 2430: }
2431:
2432: /*
1.43 daniel 2433: * Check whether it's a namespace definition
2434: */
2435: if ((ns == NULL) &&
2436: (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
2437: (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
2438: /* a default namespace definition */
2439: xmlNewNs(node, value, NULL);
2440: if (name != NULL)
2441: free(name);
2442: if (value != NULL)
2443: free(value);
1.52 daniel 2444: return(NULL);
1.43 daniel 2445: }
2446: if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
2447: (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
2448: /* a standard namespace definition */
2449: xmlNewNs(node, value, name);
1.50 daniel 2450: free(ns);
1.43 daniel 2451: if (name != NULL)
2452: free(name);
2453: if (value != NULL)
2454: free(value);
1.52 daniel 2455: return(NULL);
1.43 daniel 2456: }
2457:
1.52 daniel 2458: ret = xmlNewProp(ctxt->node, name, NULL);
2459: if (ret != NULL)
2460: ret->val = xmlStringGetNodeList(ctxt->doc, value);
1.53 ! daniel 2461:
! 2462: if (ns != NULL)
! 2463: free(ns);
! 2464: if (value != NULL)
! 2465: free(value);
! 2466: free(name);
1.52 daniel 2467: return(ret);
1.3 veillard 2468: }
2469:
1.50 daniel 2470: /**
2471: * xmlParseStartTag:
2472: * @ctxt: an XML parser context
2473: *
2474: * parse a start of tag either for rule element or
2475: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 2476: *
2477: * [40] STag ::= '<' Name (S Attribute)* S? '>'
2478: *
1.29 daniel 2479: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
2480: *
2481: * With namespace:
2482: *
2483: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
2484: *
2485: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.50 daniel 2486: *
2487: * return values: the XML new node or NULL.
1.2 veillard 2488: */
2489:
1.16 daniel 2490: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.34 daniel 2491: CHAR *namespace, *name;
2492: xmlNsPtr ns = NULL;
1.2 veillard 2493: xmlNodePtr ret = NULL;
1.50 daniel 2494: xmlNodePtr parent = ctxt->node;
1.2 veillard 2495:
1.40 daniel 2496: if (CUR != '<') return(NULL);
2497: NEXT;
1.3 veillard 2498:
1.34 daniel 2499: name = xmlNamespaceParseQName(ctxt, &namespace);
1.50 daniel 2500: if (name == NULL) return(NULL);
1.3 veillard 2501:
1.43 daniel 2502: /*
2503: * Note : the namespace resolution is deferred until the end of the
2504: * attributes parsing, since local namespace can be defined as
2505: * an attribute at this level.
2506: */
1.50 daniel 2507: ret = xmlNewDocNode(ctxt->doc, ns, name, NULL);
2508: if (ret == NULL) {
2509: if (namespace != NULL)
2510: free(namespace);
2511: free(name);
2512: return(NULL);
2513: }
2514:
2515: /*
2516: * We are parsing a new node.
2517: */
2518: nodePush(ctxt, ret);
1.2 veillard 2519:
1.3 veillard 2520: /*
2521: * Now parse the attributes, it ends up with the ending
2522: *
2523: * (S Attribute)* S?
2524: */
1.42 daniel 2525: SKIP_BLANKS;
1.40 daniel 2526: while ((IS_CHAR(CUR)) &&
2527: (CUR != '>') &&
2528: ((CUR != '/') || (NXT(1) != '>'))) {
2529: const CHAR *q = CUR_PTR;
1.29 daniel 2530:
2531: xmlParseAttribute(ctxt, ret);
1.42 daniel 2532: SKIP_BLANKS;
1.29 daniel 2533:
1.40 daniel 2534: if (q == CUR_PTR) {
1.31 daniel 2535: xmlParserError(ctxt,
2536: "xmlParseStartTag: problem parsing attributes\n");
1.29 daniel 2537: break;
1.3 veillard 2538: }
2539: }
2540:
1.43 daniel 2541: /*
2542: * Search the namespace
2543: */
2544: ns = xmlSearchNs(ctxt->doc, ret, namespace);
2545: if (ns == NULL) /* ret still doesn't have a parent yet ! */
1.50 daniel 2546: ns = xmlSearchNs(ctxt->doc, parent, namespace);
1.43 daniel 2547: xmlSetNs(ret, ns);
2548: if (namespace != NULL)
2549: free(namespace);
2550:
1.44 daniel 2551: /*
2552: * SAX: Start of Element !
2553: */
2554: if (ctxt->sax != NULL)
2555: ctxt->sax->startElement(ctxt, name);
1.52 daniel 2556: free(name);
2557:
2558: /*
2559: * Link the child element
2560: */
2561: if (ctxt->nodeNr < 2) return(ret);
2562: parent = ctxt->nodeTab[ctxt->nodeNr - 2];
2563: if (parent != NULL)
2564: xmlAddChild(parent, ctxt->node);
1.44 daniel 2565:
1.3 veillard 2566: return(ret);
2567: }
2568:
1.50 daniel 2569: /**
2570: * xmlParseEndTag:
2571: * @ctxt: an XML parser context
2572: * @nsPtr: the current node namespace definition
2573: * @tagPtr: CHAR** receive the tag value
2574: *
2575: * parse an end of tag
1.27 daniel 2576: *
2577: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 2578: *
2579: * With namespace
2580: *
2581: * [9] ETag ::= '</' QName S? '>'
1.50 daniel 2582: *
2583: * return values: tagPtr receive the tag name just read
1.7 veillard 2584: */
2585:
1.34 daniel 2586: void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
2587: CHAR *namespace, *name;
2588: xmlNsPtr ns = NULL;
1.7 veillard 2589:
1.34 daniel 2590: *nsPtr = NULL;
1.7 veillard 2591: *tagPtr = NULL;
2592:
1.40 daniel 2593: if ((CUR != '<') || (NXT(1) != '/')) {
1.31 daniel 2594: xmlParserError(ctxt, "xmlParseEndTag: '</' not found\n");
1.27 daniel 2595: return;
2596: }
1.40 daniel 2597: SKIP(2);
1.7 veillard 2598:
1.34 daniel 2599: name = xmlNamespaceParseQName(ctxt, &namespace);
1.43 daniel 2600:
2601: /*
2602: * Search the namespace
2603: */
2604: ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
2605: if (namespace != NULL)
1.34 daniel 2606: free(namespace);
1.7 veillard 2607:
1.34 daniel 2608: *nsPtr = ns;
1.7 veillard 2609: *tagPtr = name;
2610:
2611: /*
2612: * We should definitely be at the ending "S? '>'" part
2613: */
1.42 daniel 2614: SKIP_BLANKS;
1.40 daniel 2615: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.31 daniel 2616: xmlParserError(ctxt, "End tag : expected '>'\n");
1.7 veillard 2617: } else
1.40 daniel 2618: NEXT;
1.7 veillard 2619:
2620: return;
2621: }
2622:
1.50 daniel 2623: /**
2624: * xmlParseCDSect:
2625: * @ctxt: an XML parser context
2626: *
2627: * Parse escaped pure raw content.
1.29 daniel 2628: *
2629: * [18] CDSect ::= CDStart CData CDEnd
2630: *
2631: * [19] CDStart ::= '<![CDATA['
2632: *
2633: * [20] Data ::= (Char* - (Char* ']]>' Char*))
2634: *
2635: * [21] CDEnd ::= ']]>'
1.3 veillard 2636: */
1.45 daniel 2637: void xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 2638: const CHAR *r, *s, *base;
1.3 veillard 2639:
1.40 daniel 2640: if ((CUR == '<') && (NXT(1) == '!') &&
2641: (NXT(2) == '[') && (NXT(3) == 'C') &&
2642: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2643: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2644: (NXT(8) == '[')) {
2645: SKIP(9);
1.29 daniel 2646: } else
1.45 daniel 2647: return;
1.40 daniel 2648: base = CUR_PTR;
2649: if (!IS_CHAR(CUR)) {
1.31 daniel 2650: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.45 daniel 2651: return;
1.3 veillard 2652: }
1.40 daniel 2653: r = NEXT;
2654: if (!IS_CHAR(CUR)) {
1.31 daniel 2655: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.45 daniel 2656: return;
1.3 veillard 2657: }
1.40 daniel 2658: s = NEXT;
2659: while (IS_CHAR(CUR) &&
2660: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
2661: r++;s++;NEXT;
1.3 veillard 2662: }
1.40 daniel 2663: if (!IS_CHAR(CUR)) {
1.31 daniel 2664: xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
1.45 daniel 2665: return;
1.3 veillard 2666: }
1.16 daniel 2667:
1.45 daniel 2668: /*
2669: * Ok the segment [base CUR_PTR] is to be consumed as chars.
2670: */
2671: if (ctxt->sax != NULL) {
2672: if (areBlanks(ctxt, base, CUR_PTR - base))
2673: ctxt->sax->ignorableWhitespace(ctxt, base, 0, CUR_PTR - base);
2674: else
2675: ctxt->sax->characters(ctxt, base, 0, CUR_PTR - base);
2676: }
1.2 veillard 2677: }
2678:
1.50 daniel 2679: /**
2680: * xmlParseContent:
2681: * @ctxt: an XML parser context
2682: *
2683: * Parse a content:
1.2 veillard 2684: *
1.27 daniel 2685: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 2686: */
2687:
1.45 daniel 2688: void xmlParseContent(xmlParserCtxtPtr ctxt) {
1.2 veillard 2689: xmlNodePtr ret = NULL;
2690:
1.40 daniel 2691: while ((CUR != '<') || (NXT(1) != '/')) {
2692: const CHAR *test = CUR_PTR;
1.27 daniel 2693: ret = NULL;
2694:
2695: /*
2696: * First case : a Processing Instruction.
2697: */
1.40 daniel 2698: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 2699: xmlParsePI(ctxt);
2700: }
2701: /*
2702: * Second case : a CDSection
2703: */
1.40 daniel 2704: else if ((CUR == '<') && (NXT(1) == '!') &&
2705: (NXT(2) == '[') && (NXT(3) == 'C') &&
2706: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2707: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2708: (NXT(8) == '[')) {
1.45 daniel 2709: xmlParseCDSect(ctxt);
1.27 daniel 2710: }
2711: /*
2712: * Third case : a comment
2713: */
1.40 daniel 2714: else if ((CUR == '<') && (NXT(1) == '!') &&
2715: (NXT(2) == '-') && (NXT(3) == '-')) {
1.31 daniel 2716: ret = xmlParseComment(ctxt, 1);
1.27 daniel 2717: }
2718: /*
2719: * Fourth case : a sub-element.
2720: */
1.40 daniel 2721: else if (CUR == '<') {
1.45 daniel 2722: ret = xmlParseElement(ctxt);
2723: }
2724: /*
1.50 daniel 2725: * Fifth case : a reference. If if has not been resolved,
2726: * parsing returns it's Name, create the node
1.45 daniel 2727: */
2728: else if (CUR == '&') {
1.50 daniel 2729: CHAR *val = xmlParseReference(ctxt);
2730: if (val != NULL) {
2731: if (val[0] != '&') {
2732: /*
2733: * inline predefined entity.
2734: */
2735: if (ctxt->sax != NULL)
2736: ctxt->sax->characters(ctxt, val, 0, xmlStrlen(val));
2737: } else {
2738: /*
2739: * user defined entity, create a node.
2740: */
2741: ret = xmlNewReference(ctxt->doc, val);
2742: xmlAddChild(ctxt->node, ret);
2743: }
2744: free(val);
2745: }
1.27 daniel 2746: }
2747: /*
2748: * Last case, text. Note that References are handled directly.
2749: */
2750: else {
1.45 daniel 2751: xmlParseCharData(ctxt, 0);
1.3 veillard 2752: }
1.14 veillard 2753:
2754: /*
1.45 daniel 2755: * Pop-up of finished entities.
1.14 veillard 2756: */
1.45 daniel 2757: while ((CUR == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt);
2758:
1.40 daniel 2759: if (test == CUR_PTR) {
1.31 daniel 2760: xmlParserError(ctxt, "detected an error in element content\n");
1.29 daniel 2761: break;
2762: }
1.3 veillard 2763: }
1.2 veillard 2764: }
2765:
1.50 daniel 2766: /**
2767: * xmlParseElement:
2768: * @ctxt: an XML parser context
2769: *
2770: * parse an XML element, this is highly recursive
1.26 daniel 2771: *
2772: * [39] element ::= EmptyElemTag | STag content ETag
2773: *
2774: * [41] Attribute ::= Name Eq AttValue
1.50 daniel 2775: * return values: the XML new node or NULL
1.2 veillard 2776: */
1.26 daniel 2777:
1.2 veillard 2778:
1.45 daniel 2779: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 2780: xmlNodePtr ret;
1.40 daniel 2781: const CHAR *openTag = CUR_PTR;
1.32 daniel 2782: xmlParserNodeInfo node_info;
1.27 daniel 2783: CHAR *endTag;
1.34 daniel 2784: xmlNsPtr endNs;
1.2 veillard 2785:
1.32 daniel 2786: /* Capture start position */
1.40 daniel 2787: node_info.begin_pos = CUR_PTR - ctxt->input->base;
2788: node_info.begin_line = ctxt->input->line;
1.32 daniel 2789:
1.16 daniel 2790: ret = xmlParseStartTag(ctxt);
1.3 veillard 2791: if (ret == NULL) {
2792: return(NULL);
2793: }
1.2 veillard 2794:
2795: /*
2796: * Check for an Empty Element.
2797: */
1.40 daniel 2798: if ((CUR == '/') && (NXT(1) == '>')) {
2799: SKIP(2);
1.45 daniel 2800: if (ctxt->sax != NULL)
2801: ctxt->sax->endElement(ctxt, ret->name);
2802:
2803: /*
2804: * end of parsing of this node.
2805: */
2806: nodePop(ctxt);
2807:
1.2 veillard 2808: return(ret);
2809: }
1.40 daniel 2810: if (CUR == '>') NEXT;
1.2 veillard 2811: else {
1.31 daniel 2812: xmlParserError(ctxt, "Couldn't find end of Start Tag\n%.30s\n",openTag);
1.45 daniel 2813:
2814: /*
2815: * end of parsing of this node.
2816: */
2817: nodePop(ctxt);
2818:
1.16 daniel 2819: return(NULL);
1.2 veillard 2820: }
2821:
2822: /*
2823: * Parse the content of the element:
2824: */
1.45 daniel 2825: xmlParseContent(ctxt);
1.40 daniel 2826: if (!IS_CHAR(CUR)) {
1.31 daniel 2827: xmlParserError(ctxt, "Premature end of data in tag %.30s\n%.30s\n",
2828: openTag);
1.45 daniel 2829:
2830: /*
2831: * end of parsing of this node.
2832: */
2833: nodePop(ctxt);
2834:
1.16 daniel 2835: return(NULL);
1.2 veillard 2836: }
2837:
2838: /*
1.27 daniel 2839: * parse the end of tag: '</' should be here.
1.2 veillard 2840: */
1.34 daniel 2841: xmlParseEndTag(ctxt, &endNs, &endTag);
1.7 veillard 2842:
1.27 daniel 2843: /*
2844: * Check that the Name in the ETag is the same as in the STag.
2845: */
1.34 daniel 2846: if (endNs != ret->ns) {
1.31 daniel 2847: xmlParserError(ctxt,
1.43 daniel 2848: "Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
1.31 daniel 2849: openTag, endTag);
1.27 daniel 2850: }
1.32 daniel 2851: if (endTag == NULL ) {
2852: xmlParserError(ctxt, "The End tag has no name\n%.30s\n", openTag);
1.45 daniel 2853: } else if (xmlStrcmp(ret->name, endTag)) {
1.31 daniel 2854: xmlParserError(ctxt,
2855: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
2856: openTag, endTag);
1.27 daniel 2857: }
1.44 daniel 2858: /*
2859: * SAX: End of Tag
2860: */
2861: else if (ctxt->sax != NULL)
2862: ctxt->sax->endElement(ctxt, endTag);
1.7 veillard 2863:
1.44 daniel 2864: if (endTag != NULL)
2865: free(endTag);
1.2 veillard 2866:
1.32 daniel 2867: /* Capture end position and add node */
2868: if ( ret != NULL && ctxt->record_info ) {
1.40 daniel 2869: node_info.end_pos = CUR_PTR - ctxt->input->base;
2870: node_info.end_line = ctxt->input->line;
1.32 daniel 2871: node_info.node = ret;
2872: xmlParserAddNodeInfo(ctxt, &node_info);
2873: }
1.43 daniel 2874:
2875: /*
2876: * end of parsing of this node.
2877: */
2878: nodePop(ctxt);
2879:
1.2 veillard 2880: return(ret);
2881: }
2882:
1.50 daniel 2883: /**
2884: * xmlParseVersionNum:
2885: * @ctxt: an XML parser context
2886: *
2887: * parse the XML version value.
1.29 daniel 2888: *
2889: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.50 daniel 2890: * return values: the string giving the XML version number, or NULL
1.29 daniel 2891: */
2892: CHAR *xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 2893: const CHAR *q = CUR_PTR;
1.29 daniel 2894: CHAR *ret;
2895:
1.40 daniel 2896: while (IS_CHAR(CUR) &&
2897: (((CUR >= 'a') && (CUR <= 'z')) ||
2898: ((CUR >= 'A') && (CUR <= 'Z')) ||
2899: ((CUR >= '0') && (CUR <= '9')) ||
2900: (CUR == '_') || (CUR == '.') ||
2901: (CUR == ':') || (CUR == '-'))) NEXT;
2902: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 2903: return(ret);
2904: }
2905:
1.50 daniel 2906: /**
2907: * xmlParseVersionInfo:
2908: * @ctxt: an XML parser context
2909: *
2910: * parse the XML version.
1.29 daniel 2911: *
2912: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
2913: *
2914: * [25] Eq ::= S? '=' S?
1.50 daniel 2915: *
2916: * return values: the version string, e.g. "1.0"
1.29 daniel 2917: */
2918:
2919: CHAR *xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
2920: CHAR *version = NULL;
2921: const CHAR *q;
2922:
1.40 daniel 2923: if ((CUR == 'v') && (NXT(1) == 'e') &&
2924: (NXT(2) == 'r') && (NXT(3) == 's') &&
2925: (NXT(4) == 'i') && (NXT(5) == 'o') &&
2926: (NXT(6) == 'n')) {
2927: SKIP(7);
1.42 daniel 2928: SKIP_BLANKS;
1.40 daniel 2929: if (CUR != '=') {
1.31 daniel 2930: xmlParserError(ctxt, "xmlParseVersionInfo : expected '='\n");
2931: return(NULL);
2932: }
1.40 daniel 2933: NEXT;
1.42 daniel 2934: SKIP_BLANKS;
1.40 daniel 2935: if (CUR == '"') {
2936: NEXT;
2937: q = CUR_PTR;
1.29 daniel 2938: version = xmlParseVersionNum(ctxt);
1.40 daniel 2939: if (CUR != '"')
1.31 daniel 2940: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2941: else
1.40 daniel 2942: NEXT;
2943: } else if (CUR == '\''){
2944: NEXT;
2945: q = CUR_PTR;
1.29 daniel 2946: version = xmlParseVersionNum(ctxt);
1.40 daniel 2947: if (CUR != '\'')
1.31 daniel 2948: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 2949: else
1.40 daniel 2950: NEXT;
1.31 daniel 2951: } else {
2952: xmlParserError(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
1.29 daniel 2953: }
2954: }
2955: return(version);
2956: }
2957:
1.50 daniel 2958: /**
2959: * xmlParseEncName:
2960: * @ctxt: an XML parser context
2961: *
2962: * parse the XML encoding name
1.29 daniel 2963: *
2964: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 2965: *
2966: * return values: the encoding name value or NULL
1.29 daniel 2967: */
2968: CHAR *xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 2969: const CHAR *q = CUR_PTR;
1.29 daniel 2970: CHAR *ret = NULL;
2971:
1.40 daniel 2972: if (((CUR >= 'a') && (CUR <= 'z')) ||
2973: ((CUR >= 'A') && (CUR <= 'Z'))) {
2974: NEXT;
2975: while (IS_CHAR(CUR) &&
2976: (((CUR >= 'a') && (CUR <= 'z')) ||
2977: ((CUR >= 'A') && (CUR <= 'Z')) ||
2978: ((CUR >= '0') && (CUR <= '9')) ||
2979: (CUR == '-'))) NEXT;
2980: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 2981: } else {
1.31 daniel 2982: xmlParserError(ctxt, "Invalid XML encoding name\n");
1.29 daniel 2983: }
2984: return(ret);
2985: }
2986:
1.50 daniel 2987: /**
2988: * xmlParseEncodingDecl:
2989: * @ctxt: an XML parser context
2990: *
2991: * parse the XML encoding declaration
1.29 daniel 2992: *
2993: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 2994: *
2995: * TODO: this should setup the conversion filters.
2996: *
2997: * return values: the encoding value or NULL
1.29 daniel 2998: */
2999:
3000: CHAR *xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
3001: CHAR *encoding = NULL;
3002: const CHAR *q;
3003:
1.42 daniel 3004: SKIP_BLANKS;
1.40 daniel 3005: if ((CUR == 'e') && (NXT(1) == 'n') &&
3006: (NXT(2) == 'c') && (NXT(3) == 'o') &&
3007: (NXT(4) == 'd') && (NXT(5) == 'i') &&
3008: (NXT(6) == 'n') && (NXT(7) == 'g')) {
3009: SKIP(8);
1.42 daniel 3010: SKIP_BLANKS;
1.40 daniel 3011: if (CUR != '=') {
1.31 daniel 3012: xmlParserError(ctxt, "xmlParseEncodingDecl : expected '='\n");
3013: return(NULL);
3014: }
1.40 daniel 3015: NEXT;
1.42 daniel 3016: SKIP_BLANKS;
1.40 daniel 3017: if (CUR == '"') {
3018: NEXT;
3019: q = CUR_PTR;
1.29 daniel 3020: encoding = xmlParseEncName(ctxt);
1.40 daniel 3021: if (CUR != '"')
1.31 daniel 3022: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 3023: else
1.40 daniel 3024: NEXT;
3025: } else if (CUR == '\''){
3026: NEXT;
3027: q = CUR_PTR;
1.29 daniel 3028: encoding = xmlParseEncName(ctxt);
1.40 daniel 3029: if (CUR != '\'')
1.31 daniel 3030: xmlParserError(ctxt, "String not closed\n%.50s\n", q);
1.29 daniel 3031: else
1.40 daniel 3032: NEXT;
3033: } else if (CUR == '"'){
1.31 daniel 3034: xmlParserError(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
1.29 daniel 3035: }
3036: }
3037: return(encoding);
3038: }
3039:
1.50 daniel 3040: /**
3041: * xmlParseSDDecl:
3042: * @ctxt: an XML parser context
3043: *
3044: * parse the XML standalone declaration
1.29 daniel 3045: *
3046: * [32] SDDecl ::= S 'standalone' Eq
3047: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.50 daniel 3048: * return values: 1 if standalone, 0 otherwise
1.29 daniel 3049: */
3050:
3051: int xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
3052: int standalone = -1;
3053:
1.42 daniel 3054: SKIP_BLANKS;
1.40 daniel 3055: if ((CUR == 's') && (NXT(1) == 't') &&
3056: (NXT(2) == 'a') && (NXT(3) == 'n') &&
3057: (NXT(4) == 'd') && (NXT(5) == 'a') &&
3058: (NXT(6) == 'l') && (NXT(7) == 'o') &&
3059: (NXT(8) == 'n') && (NXT(9) == 'e')) {
3060: SKIP(10);
3061: if (CUR != '=') {
1.32 daniel 3062: xmlParserError(ctxt, "XML standalone declaration : expected '='\n");
3063: return(standalone);
3064: }
1.40 daniel 3065: NEXT;
1.42 daniel 3066: SKIP_BLANKS;
1.40 daniel 3067: if (CUR == '\''){
3068: NEXT;
3069: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 3070: standalone = 0;
1.40 daniel 3071: SKIP(2);
3072: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3073: (NXT(2) == 's')) {
1.29 daniel 3074: standalone = 1;
1.40 daniel 3075: SKIP(3);
1.29 daniel 3076: } else {
1.31 daniel 3077: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 3078: }
1.40 daniel 3079: if (CUR != '\'')
1.31 daniel 3080: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 3081: else
1.40 daniel 3082: NEXT;
3083: } else if (CUR == '"'){
3084: NEXT;
3085: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 3086: standalone = 0;
1.40 daniel 3087: SKIP(2);
3088: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3089: (NXT(2) == 's')) {
1.29 daniel 3090: standalone = 1;
1.40 daniel 3091: SKIP(3);
1.29 daniel 3092: } else {
1.31 daniel 3093: xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 3094: }
1.40 daniel 3095: if (CUR != '"')
1.31 daniel 3096: xmlParserError(ctxt, "String not closed\n");
1.29 daniel 3097: else
1.40 daniel 3098: NEXT;
1.37 daniel 3099: } else {
3100: xmlParserError(ctxt, "Standalone value not found\n");
3101: }
1.29 daniel 3102: }
3103: return(standalone);
3104: }
3105:
1.50 daniel 3106: /**
3107: * xmlParseXMLDecl:
3108: * @ctxt: an XML parser context
3109: *
3110: * parse an XML declaration header
1.29 daniel 3111: *
3112: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 3113: */
3114:
1.16 daniel 3115: void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 3116: CHAR *version;
3117:
3118: /*
1.19 daniel 3119: * We know that '<?xml' is here.
1.1 veillard 3120: */
1.40 daniel 3121: SKIP(5);
1.1 veillard 3122:
1.42 daniel 3123: SKIP_BLANKS;
1.1 veillard 3124:
3125: /*
1.29 daniel 3126: * We should have the VersionInfo here.
1.1 veillard 3127: */
1.29 daniel 3128: version = xmlParseVersionInfo(ctxt);
3129: if (version == NULL)
1.45 daniel 3130: version = xmlCharStrdup(XML_DEFAULT_VERSION);
3131: ctxt->doc = xmlNewDoc(version);
3132: free(version);
1.29 daniel 3133:
3134: /*
3135: * We may have the encoding declaration
3136: */
1.32 daniel 3137: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 3138:
3139: /*
1.29 daniel 3140: * We may have the standalone status.
1.1 veillard 3141: */
1.32 daniel 3142: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 3143:
1.42 daniel 3144: SKIP_BLANKS;
1.40 daniel 3145: if ((CUR == '?') && (NXT(1) == '>')) {
3146: SKIP(2);
3147: } else if (CUR == '>') {
1.31 daniel 3148: /* Deprecated old WD ... */
3149: xmlParserError(ctxt, "XML declaration must end-up with '?>'\n");
1.40 daniel 3150: NEXT;
1.29 daniel 3151: } else {
1.31 daniel 3152: xmlParserError(ctxt, "parsing XML declaration: '?>' expected\n");
1.40 daniel 3153: MOVETO_ENDTAG(CUR_PTR);
3154: NEXT;
1.29 daniel 3155: }
1.1 veillard 3156: }
3157:
1.50 daniel 3158: /**
3159: * xmlParseMisc:
3160: * @ctxt: an XML parser context
3161: *
3162: * parse an XML Misc* optionnal field.
1.21 daniel 3163: *
1.22 daniel 3164: * [27] Misc ::= Comment | PI | S
1.1 veillard 3165: */
3166:
1.16 daniel 3167: void xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 3168: while (((CUR == '<') && (NXT(1) == '?')) ||
3169: ((CUR == '<') && (NXT(1) == '!') &&
3170: (NXT(2) == '-') && (NXT(3) == '-')) ||
3171: IS_BLANK(CUR)) {
3172: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 3173: xmlParsePI(ctxt);
1.40 daniel 3174: } else if (IS_BLANK(CUR)) {
3175: NEXT;
1.1 veillard 3176: } else
1.31 daniel 3177: xmlParseComment(ctxt, 0);
1.1 veillard 3178: }
3179: }
3180:
1.50 daniel 3181: /**
3182: * xmlParseDocument :
3183: * @ctxt: an XML parser context
3184: *
3185: * parse an XML document (and build a tree if using the standard SAX
3186: * interface).
1.21 daniel 3187: *
1.22 daniel 3188: * [1] document ::= prolog element Misc*
1.29 daniel 3189: *
3190: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 3191: *
3192: * return values: 0, -1 in case of error. the parser context is augmented
3193: * as a result of the parsing.
1.1 veillard 3194: */
3195:
1.16 daniel 3196: int xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 3197: xmlDefaultSAXHandlerInit();
3198:
1.14 veillard 3199: /*
1.44 daniel 3200: * SAX: beginning of the document processing.
3201: */
3202: if (ctxt->sax)
3203: ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
3204: if (ctxt->sax)
3205: ctxt->sax->startDocument(ctxt);
3206:
3207: /*
1.14 veillard 3208: * We should check for encoding here and plug-in some
3209: * conversion code TODO !!!!
3210: */
1.1 veillard 3211:
3212: /*
3213: * Wipe out everything which is before the first '<'
3214: */
1.42 daniel 3215: SKIP_BLANKS;
1.1 veillard 3216:
3217: /*
3218: * Check for the XMLDecl in the Prolog.
3219: */
1.40 daniel 3220: if ((CUR == '<') && (NXT(1) == '?') &&
3221: (NXT(2) == 'x') && (NXT(3) == 'm') &&
3222: (NXT(4) == 'l')) {
1.19 daniel 3223: xmlParseXMLDecl(ctxt);
3224: /* SKIP_EOL(cur); */
1.42 daniel 3225: SKIP_BLANKS;
1.40 daniel 3226: } else if ((CUR == '<') && (NXT(1) == '?') &&
3227: (NXT(2) == 'X') && (NXT(3) == 'M') &&
3228: (NXT(4) == 'L')) {
1.19 daniel 3229: /*
3230: * The first drafts were using <?XML and the final W3C REC
3231: * now use <?xml ...
3232: */
1.16 daniel 3233: xmlParseXMLDecl(ctxt);
1.1 veillard 3234: /* SKIP_EOL(cur); */
1.42 daniel 3235: SKIP_BLANKS;
1.1 veillard 3236: } else {
1.45 daniel 3237: CHAR *version;
3238:
3239: version = xmlCharStrdup(XML_DEFAULT_VERSION);
3240: ctxt->doc = xmlNewDoc(version);
3241: free(version);
1.1 veillard 3242: }
3243:
3244: /*
3245: * The Misc part of the Prolog
3246: */
1.16 daniel 3247: xmlParseMisc(ctxt);
1.1 veillard 3248:
3249: /*
1.29 daniel 3250: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 3251: * (doctypedecl Misc*)?
3252: */
1.40 daniel 3253: if ((CUR == '<') && (NXT(1) == '!') &&
3254: (NXT(2) == 'D') && (NXT(3) == 'O') &&
3255: (NXT(4) == 'C') && (NXT(5) == 'T') &&
3256: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
3257: (NXT(8) == 'E')) {
1.22 daniel 3258: xmlParseDocTypeDecl(ctxt);
3259: xmlParseMisc(ctxt);
1.21 daniel 3260: }
3261:
3262: /*
3263: * Time to start parsing the tree itself
1.1 veillard 3264: */
1.45 daniel 3265: ctxt->doc->root = xmlParseElement(ctxt);
1.33 daniel 3266:
3267: /*
3268: * The Misc part at the end
3269: */
3270: xmlParseMisc(ctxt);
1.16 daniel 3271:
1.44 daniel 3272: /*
3273: * SAX: end of the document processing.
3274: */
3275: if (ctxt->sax)
3276: ctxt->sax->endDocument(ctxt);
1.16 daniel 3277: return(0);
3278: }
3279:
1.50 daniel 3280: /**
3281: * xmlParseDoc :
3282: * @cur: a pointer to an array of CHAR
3283: *
3284: * parse an XML in-memory document and build a tree.
3285: *
3286: * return values: the resulting document tree
1.16 daniel 3287: */
3288:
3289: xmlDocPtr xmlParseDoc(CHAR *cur) {
3290: xmlDocPtr ret;
3291: xmlParserCtxtPtr ctxt;
1.40 daniel 3292: xmlParserInputPtr input;
1.16 daniel 3293:
3294: if (cur == NULL) return(NULL);
1.1 veillard 3295:
1.16 daniel 3296: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3297: if (ctxt == NULL) {
3298: perror("malloc");
3299: return(NULL);
3300: }
1.40 daniel 3301: xmlInitParserCtxt(ctxt);
3302: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3303: if (input == NULL) {
3304: perror("malloc");
3305: free(ctxt);
3306: return(NULL);
3307: }
3308:
3309: input->filename = NULL;
3310: input->line = 1;
3311: input->col = 1;
3312: input->base = cur;
3313: input->cur = cur;
3314:
3315: inputPush(ctxt, input);
1.16 daniel 3316:
3317:
3318: xmlParseDocument(ctxt);
3319: ret = ctxt->doc;
1.50 daniel 3320: free(ctxt->nodeTab);
3321: free(ctxt->inputTab);
3322: if (input->filename != NULL)
1.51 daniel 3323: free((char *)input->filename);
1.50 daniel 3324: free(input);
1.16 daniel 3325: free(ctxt);
3326:
1.1 veillard 3327: return(ret);
3328: }
3329:
1.50 daniel 3330: /**
3331: * xmlParseFile :
3332: * @filename: the filename
3333: *
3334: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
3335: * compressed document is provided by default if found at compile-time.
3336: *
3337: * return values: the resulting document tree
1.9 httpng 3338: */
3339:
3340: xmlDocPtr xmlParseFile(const char *filename) {
3341: xmlDocPtr ret;
1.20 daniel 3342: #ifdef HAVE_ZLIB_H
3343: gzFile input;
3344: #else
1.9 httpng 3345: int input;
1.20 daniel 3346: #endif
1.9 httpng 3347: int res;
3348: struct stat buf;
3349: char *buffer;
1.16 daniel 3350: xmlParserCtxtPtr ctxt;
1.40 daniel 3351: xmlParserInputPtr inputStream;
1.9 httpng 3352:
1.11 veillard 3353: res = stat(filename, &buf);
1.9 httpng 3354: if (res < 0) return(NULL);
3355:
1.20 daniel 3356: #ifdef HAVE_ZLIB_H
3357: retry_bigger:
3358: buffer = malloc((buf.st_size * 20) + 100);
3359: #else
1.9 httpng 3360: buffer = malloc(buf.st_size + 100);
1.20 daniel 3361: #endif
1.9 httpng 3362: if (buffer == NULL) {
3363: perror("malloc");
3364: return(NULL);
3365: }
3366:
3367: memset(buffer, 0, sizeof(buffer));
1.20 daniel 3368: #ifdef HAVE_ZLIB_H
3369: input = gzopen (filename, "r");
3370: if (input == NULL) {
3371: fprintf (stderr, "Cannot read file %s :\n", filename);
3372: perror ("gzopen failed");
3373: return(NULL);
3374: }
3375: #else
1.9 httpng 3376: input = open (filename, O_RDONLY);
3377: if (input < 0) {
3378: fprintf (stderr, "Cannot read file %s :\n", filename);
3379: perror ("open failed");
3380: return(NULL);
3381: }
1.20 daniel 3382: #endif
3383: #ifdef HAVE_ZLIB_H
3384: res = gzread(input, buffer, 20 * buf.st_size);
3385: #else
1.9 httpng 3386: res = read(input, buffer, buf.st_size);
1.20 daniel 3387: #endif
1.9 httpng 3388: if (res < 0) {
3389: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 3390: #ifdef HAVE_ZLIB_H
3391: perror ("gzread failed");
3392: #else
1.9 httpng 3393: perror ("read failed");
1.20 daniel 3394: #endif
1.9 httpng 3395: return(NULL);
3396: }
1.20 daniel 3397: #ifdef HAVE_ZLIB_H
3398: gzclose(input);
1.50 daniel 3399: if (res >= 20 * buf.st_size + 20) {
1.20 daniel 3400: free(buffer);
3401: buf.st_size *= 2;
3402: goto retry_bigger;
3403: }
3404: buf.st_size = res;
3405: #else
1.9 httpng 3406: close(input);
1.20 daniel 3407: #endif
3408:
1.40 daniel 3409: buffer[buf.st_size] = '\0';
1.9 httpng 3410:
1.16 daniel 3411: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3412: if (ctxt == NULL) {
3413: perror("malloc");
3414: return(NULL);
3415: }
1.40 daniel 3416: xmlInitParserCtxt(ctxt);
3417: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3418: if (inputStream == NULL) {
3419: perror("malloc");
3420: free(ctxt);
3421: return(NULL);
3422: }
3423:
3424: inputStream->filename = strdup(filename);
3425: inputStream->line = 1;
3426: inputStream->col = 1;
1.45 daniel 3427:
3428: /*
3429: * TODO : plug some encoding conversion routines here. !!!
3430: */
1.40 daniel 3431: inputStream->base = buffer;
3432: inputStream->cur = buffer;
1.16 daniel 3433:
1.40 daniel 3434: inputPush(ctxt, inputStream);
1.16 daniel 3435:
3436: xmlParseDocument(ctxt);
1.40 daniel 3437:
1.16 daniel 3438: ret = ctxt->doc;
1.9 httpng 3439: free(buffer);
1.50 daniel 3440: free(ctxt->nodeTab);
3441: free(ctxt->inputTab);
3442: if (inputStream->filename != NULL)
1.51 daniel 3443: free((char *)inputStream->filename);
1.50 daniel 3444: free(inputStream);
1.20 daniel 3445: free(ctxt);
3446:
3447: return(ret);
3448: }
3449:
1.32 daniel 3450:
1.50 daniel 3451: /**
3452: * xmlParseMemory :
3453: * @cur: an pointer to a char array
3454: * @size: the siwe of the array
3455: *
3456: * parse an XML in-memory block and build a tree.
3457: *
3458: * TODO : plug some encoding conversion routines here. !!!
3459: *
3460: * return values: the resulting document tree
1.20 daniel 3461: */
1.50 daniel 3462:
1.20 daniel 3463: xmlDocPtr xmlParseMemory(char *buffer, int size) {
3464: xmlDocPtr ret;
3465: xmlParserCtxtPtr ctxt;
1.40 daniel 3466: xmlParserInputPtr input;
3467:
3468: buffer[size - 1] = '\0';
3469:
1.20 daniel 3470: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3471: if (ctxt == NULL) {
3472: perror("malloc");
3473: return(NULL);
3474: }
1.40 daniel 3475: xmlInitParserCtxt(ctxt);
3476: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3477: if (input == NULL) {
3478: perror("malloc");
1.50 daniel 3479: free(ctxt->nodeTab);
3480: free(ctxt->inputTab);
1.40 daniel 3481: free(ctxt);
3482: return(NULL);
3483: }
1.20 daniel 3484:
1.40 daniel 3485: input->filename = NULL;
3486: input->line = 1;
3487: input->col = 1;
1.45 daniel 3488:
3489: /*
3490: * TODO : plug some encoding conversion routines here. !!!
3491: */
1.40 daniel 3492: input->base = buffer;
3493: input->cur = buffer;
1.20 daniel 3494:
1.40 daniel 3495: inputPush(ctxt, input);
1.20 daniel 3496:
3497: xmlParseDocument(ctxt);
1.40 daniel 3498:
1.20 daniel 3499: ret = ctxt->doc;
1.50 daniel 3500: free(ctxt->nodeTab);
3501: free(ctxt->inputTab);
3502: if (input->filename != NULL)
1.51 daniel 3503: free((char *)input->filename);
1.50 daniel 3504: free(input);
1.16 daniel 3505: free(ctxt);
3506:
1.9 httpng 3507: return(ret);
1.17 daniel 3508: }
3509:
3510:
1.50 daniel 3511: /**
3512: * xmlInitParserCtxt:
3513: * @ctxt: an XML parser context
3514: *
3515: * Initialize a parser context
3516: */
3517:
1.17 daniel 3518: void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
3519: {
1.40 daniel 3520: /* Allocate the Input stack */
3521: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
3522: ctxt->inputNr = 0;
3523: ctxt->inputMax = 5;
3524: ctxt->input = NULL;
3525:
1.43 daniel 3526: /* Allocate the Node stack */
3527: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
3528: ctxt->nodeNr = 0;
3529: ctxt->nodeMax = 10;
3530: ctxt->node = NULL;
3531:
1.45 daniel 3532: ctxt->sax = &xmlDefaultSAXHandler;
1.32 daniel 3533: ctxt->doc = NULL;
3534: ctxt->record_info = 0;
3535: xmlInitNodeInfoSeq(&ctxt->node_seq);
1.17 daniel 3536: }
3537:
1.50 daniel 3538: /**
3539: * xmlClearParserCtxt:
3540: * @ctxt: an XML parser context
3541: *
3542: * Clear (release owned resources) and reinitialize a parser context
3543: */
1.17 daniel 3544:
1.32 daniel 3545: void xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 3546: {
1.32 daniel 3547: xmlClearNodeInfoSeq(&ctxt->node_seq);
3548: xmlInitParserCtxt(ctxt);
1.17 daniel 3549: }
3550:
3551:
1.50 daniel 3552: /**
3553: * xmlSetupParserForBuffer:
3554: * @ctxt: an XML parser context
3555: * @buffer: a CHAR * buffer
3556: * @filename: a file name
3557: *
1.19 daniel 3558: * Setup the parser context to parse a new buffer; Clears any prior
3559: * contents from the parser context. The buffer parameter must not be
3560: * NULL, but the filename parameter can be
3561: */
1.50 daniel 3562:
1.17 daniel 3563: void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
3564: const char* filename)
3565: {
1.40 daniel 3566: xmlParserInputPtr input;
3567:
3568: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3569: if (input == NULL) {
3570: perror("malloc");
3571: free(ctxt);
3572: exit(1);
3573: }
3574:
1.17 daniel 3575: xmlClearParserCtxt(ctxt);
1.40 daniel 3576: if (input->filename != NULL)
3577: input->filename = strdup(filename);
3578: else
3579: input->filename = NULL;
3580: input->line = 1;
3581: input->col = 1;
3582: input->base = buffer;
3583: input->cur = buffer;
3584:
3585: inputPush(ctxt, input);
1.17 daniel 3586: }
3587:
1.32 daniel 3588:
1.50 daniel 3589: /**
3590: * xmlParserFindNodeInfo:
3591: * @ctxt: an XML parser context
3592: * @node: an XML node within the tree
3593: *
3594: * Find the parser node info struct for a given node
3595: *
3596: * return values: an xmlParserNodeInfo block pointer or NULL
1.32 daniel 3597: */
3598: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
3599: const xmlNode* node)
3600: {
3601: unsigned long pos;
3602:
3603: /* Find position where node should be at */
3604: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3605: if ( ctx->node_seq.buffer[pos].node == node )
3606: return &ctx->node_seq.buffer[pos];
3607: else
3608: return NULL;
3609: }
3610:
3611:
1.50 daniel 3612: /**
3613: * xmlInitNodeInfoSeq :
3614: * @seq: a node info sequence pointer
3615: *
3616: * -- Initialize (set to initial state) node info sequence
1.32 daniel 3617: */
3618: void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3619: {
3620: seq->length = 0;
3621: seq->maximum = 0;
3622: seq->buffer = NULL;
3623: }
3624:
1.50 daniel 3625: /**
3626: * xmlClearNodeInfoSeq :
3627: * @seq: a node info sequence pointer
3628: *
3629: * -- Clear (release memory and reinitialize) node
1.32 daniel 3630: * info sequence
3631: */
3632: void xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
3633: {
3634: if ( seq->buffer != NULL )
3635: free(seq->buffer);
3636: xmlInitNodeInfoSeq(seq);
3637: }
3638:
3639:
1.50 daniel 3640: /**
3641: * xmlParserFindNodeInfoIndex:
3642: * @seq: a node info sequence pointer
3643: * @node: an XML node pointer
3644: *
3645: *
1.32 daniel 3646: * xmlParserFindNodeInfoIndex : Find the index that the info record for
3647: * the given node is or should be at in a sorted sequence
1.50 daniel 3648: * return values: a long indicating the position of the record
1.32 daniel 3649: */
3650: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
3651: const xmlNode* node)
3652: {
3653: unsigned long upper, lower, middle;
3654: int found = 0;
3655:
3656: /* Do a binary search for the key */
3657: lower = 1;
3658: upper = seq->length;
3659: middle = 0;
3660: while ( lower <= upper && !found) {
3661: middle = lower + (upper - lower) / 2;
3662: if ( node == seq->buffer[middle - 1].node )
3663: found = 1;
3664: else if ( node < seq->buffer[middle - 1].node )
3665: upper = middle - 1;
3666: else
3667: lower = middle + 1;
3668: }
3669:
3670: /* Return position */
3671: if ( middle == 0 || seq->buffer[middle - 1].node < node )
3672: return middle;
3673: else
3674: return middle - 1;
3675: }
3676:
3677:
1.50 daniel 3678: /**
3679: * xmlParserAddNodeInfo:
3680: * @ctxt: an XML parser context
3681: * @seq: a node info sequence pointer
3682: *
3683: * Insert node info record into the sorted sequence
1.32 daniel 3684: */
3685: void xmlParserAddNodeInfo(xmlParserCtxtPtr ctx,
3686: const xmlParserNodeInfo* info)
3687: {
3688: unsigned long pos;
3689: static unsigned int block_size = 5;
3690:
3691: /* Find pos and check to see if node is already in the sequence */
3692: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, info->node);
3693: if ( pos < ctx->node_seq.length
3694: && ctx->node_seq.buffer[pos].node == info->node ) {
3695: ctx->node_seq.buffer[pos] = *info;
3696: }
3697:
3698: /* Otherwise, we need to add new node to buffer */
3699: else {
3700: /* Expand buffer by 5 if needed */
3701: if ( ctx->node_seq.length + 1 > ctx->node_seq.maximum ) {
3702: xmlParserNodeInfo* tmp_buffer;
3703: unsigned int byte_size = (sizeof(*ctx->node_seq.buffer)
3704: *(ctx->node_seq.maximum + block_size));
3705:
3706: if ( ctx->node_seq.buffer == NULL )
3707: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
3708: else
3709: tmp_buffer = (xmlParserNodeInfo*)realloc(ctx->node_seq.buffer, byte_size);
3710:
3711: if ( tmp_buffer == NULL ) {
3712: xmlParserError(ctx, "Out of memory");
3713: return;
3714: }
3715: ctx->node_seq.buffer = tmp_buffer;
3716: ctx->node_seq.maximum += block_size;
3717: }
3718:
3719: /* If position is not at end, move elements out of the way */
3720: if ( pos != ctx->node_seq.length ) {
3721: unsigned long i;
3722:
3723: for ( i = ctx->node_seq.length; i > pos; i-- )
3724: ctx->node_seq.buffer[i] = ctx->node_seq.buffer[i - 1];
3725: }
3726:
3727: /* Copy element and increase length */
3728: ctx->node_seq.buffer[pos] = *info;
3729: ctx->node_seq.length++;
3730: }
3731: }
Webmaster