Annotation of XML/parser.c, revision 1.57
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.57 ! daniel 6: * $Id: parser.c,v 1.56 1998/11/13 01:19:39 daniel Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
1.45 daniel 34: /************************************************************************
35: * *
36: * Parser stacks related functions and macros *
37: * *
38: ************************************************************************/
1.1 veillard 39: /*
1.40 daniel 40: * Generic function for accessing stacks in the Parser Context
1.1 veillard 41: */
42:
1.31 daniel 43: #define PUSH_AND_POP(type, name) \
1.40 daniel 44: int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 45: if (ctxt->name##Nr >= ctxt->name##Max) { \
46: ctxt->name##Max *= 2; \
1.40 daniel 47: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
48: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
49: if (ctxt->name##Tab == NULL) { \
1.31 daniel 50: fprintf(stderr, "realloc failed !\n"); \
51: exit(1); \
52: } \
53: } \
1.40 daniel 54: ctxt->name##Tab[ctxt->name##Nr] = value; \
55: ctxt->name = value; \
56: return(ctxt->name##Nr++); \
1.31 daniel 57: } \
1.40 daniel 58: type name##Pop(xmlParserCtxtPtr ctxt) { \
59: if (ctxt->name##Nr <= 0) return(0); \
60: ctxt->name##Nr--; \
1.50 daniel 61: if (ctxt->name##Nr > 0) \
62: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
63: else \
64: ctxt->name = NULL; \
1.40 daniel 65: return(ctxt->name); \
1.31 daniel 66: } \
67:
1.40 daniel 68: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 69: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 70:
1.55 daniel 71: /*
72: * Macros for accessing the content. Those should be used only by the parser,
73: * and not exported.
74: *
75: * Dirty macros, i.e. one need to make assumption on the context to use them
76: *
77: * CUR_PTR return the current pointer to the CHAR to be parsed.
78: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
79: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
80: * in UNICODE mode. This should be used internally by the parser
81: * only to compare to ASCII values otherwise it would break when
82: * running with UTF-8 encoding.
83: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
84: * to compare on ASCII based substring.
85: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
86: * strings within the parser.
87: *
88: * Clean macros, not dependent of an ASCII context.
89: *
90: * CURRENT Returns the current char value, with the full decoding of
91: * UTF-8 if we are using this mode. It returns an int.
92: * NEXT Skip to the next character, this does the proper decoding
93: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
94: * It returns the pointer to the current CHAR.
95: */
1.45 daniel 96:
97: #define CUR (*ctxt->input->cur)
1.55 daniel 98: #define SKIP(val) ctxt->input->cur += (val)
99: #define NXT(val) ctxt->input->cur[(val)]
100: #define CUR_PTR ctxt->input->cur
101:
102: #define SKIP_BLANKS \
103: while (IS_BLANK(*(ctxt->input->cur))) NEXT
104:
105: #ifndef USE_UTF_8
106: #define CURRENT (*ctxt->input->cur)
1.45 daniel 107: #define NEXT ((*ctxt->input->cur) ? \
108: (((*(ctxt->input->cur) == '\n') ? \
109: (ctxt->input->line++, ctxt->input->col = 1) : \
110: (ctxt->input->col++)), ctxt->input->cur++) : \
111: (xmlPopInput(ctxt), ctxt->input->cur))
1.55 daniel 112: #else
113: #endif
1.42 daniel 114:
1.40 daniel 115:
1.50 daniel 116: /**
117: * xmlPopInput:
118: * @ctxt: an XML parser context
119: *
1.40 daniel 120: * xmlPopInput: the current input pointed by ctxt->input came to an end
121: * pop it and return the next char.
1.45 daniel 122: *
123: * TODO A deallocation of the popped Input structure is needed
1.50 daniel 124: * return values: the current CHAR in the parser context
1.40 daniel 125: */
1.55 daniel 126: CHAR
127: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 128: if (ctxt->inputNr == 1) return(0); /* End of main Input */
129: inputPop(ctxt);
130: return(CUR);
131: }
132:
1.50 daniel 133: /**
134: * xmlPushInput:
135: * @ctxt: an XML parser context
136: * @input: an XML parser input fragment (entity, XML fragment ...).
137: *
1.40 daniel 138: * xmlPushInput: switch to a new input stream which is stacked on top
139: * of the previous one(s).
140: */
1.55 daniel 141: void
142: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 143: if (input == NULL) return;
144: inputPush(ctxt, input);
145: }
146:
1.50 daniel 147: /**
148: * xmlNewEntityInputStream:
149: * @ctxt: an XML parser context
150: * @entity: an Entity pointer
151: *
1.45 daniel 152: * Create a new input stream based on a memory buffer.
1.50 daniel 153: * return vakues: the new input stream
1.45 daniel 154: */
1.50 daniel 155: xmlParserInputPtr
156: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 157: xmlParserInputPtr input;
158:
159: if (entity == NULL) {
1.55 daniel 160: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
161: ctxt->sax->error(ctxt,
1.45 daniel 162: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 163: return(NULL);
1.45 daniel 164: }
165: if (entity->content == NULL) {
1.55 daniel 166: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
167: ctxt->sax->error(ctxt,
1.45 daniel 168: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 169: return(NULL);
1.45 daniel 170: }
171: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
172: if (input == NULL) {
1.55 daniel 173: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
174: ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
1.50 daniel 175: return(NULL);
1.45 daniel 176: }
177: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
178: input->base = entity->content;
179: input->cur = entity->content;
180: input->line = 1;
181: input->col = 1;
1.50 daniel 182: return(input);
1.45 daniel 183: }
184:
185: /*
1.40 daniel 186: * A few macros needed to help building the parser.
187: */
188:
1.1 veillard 189: #ifdef UNICODE
1.30 daniel 190: /************************************************************************
191: * *
192: * UNICODE version of the macros. *
193: * *
194: ************************************************************************/
1.1 veillard 195: /*
1.22 daniel 196: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
197: * | [#x10000-#x10FFFF]
198: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 199: */
200: #define IS_CHAR(c) \
201: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
202: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
203:
1.22 daniel 204: /*
205: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
206: */
1.42 daniel 207: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
208: ((c) == 0x0D))
1.1 veillard 209:
1.22 daniel 210: /*
1.30 daniel 211: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 212: *
1.30 daniel 213: * VI is your friend !
214: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
215: * and
216: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 217: */
1.1 veillard 218: #define IS_BASECHAR(c) \
1.30 daniel 219: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
220: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
221: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
222: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
223: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
224: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
225: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
226: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
227: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
228: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
229: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
230: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
231: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
232: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
233: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
234: ((c) == 0x0386) || \
235: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
236: ((c) == 0x038C) || \
237: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
238: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
239: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
240: ((c) == 0x03DA) || \
241: ((c) == 0x03DC) || \
242: ((c) == 0x03DE) || \
243: ((c) == 0x03E0) || \
244: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
245: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
246: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
247: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
248: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
249: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
250: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
251: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
252: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
253: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
254: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
255: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
256: ((c) == 0x0559) || \
257: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
258: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
259: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
260: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
261: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
262: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
263: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
264: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
265: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
266: ((c) == 0x06D5) || \
267: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
268: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
269: ((c) == 0x093D) || \
270: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
271: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
272: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
273: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
274: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
275: ((c) == 0x09B2) || \
276: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
277: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
278: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
279: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
280: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
281: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
282: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
283: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
284: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
285: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
286: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
287: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
288: ((c) == 0x0A5E) || \
289: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
290: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
291: ((c) == 0x0A8D) || \
292: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
293: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
294: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
295: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
296: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
297: ((c) == 0x0ABD) || \
298: ((c) == 0x0AE0) || \
299: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
300: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
301: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
302: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
303: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
304: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
305: ((c) == 0x0B3D) || \
306: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
307: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
308: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
309: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
310: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
311: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
312: ((c) == 0x0B9C) || \
313: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
314: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
315: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
316: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
317: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
318: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
319: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
320: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
321: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
322: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
323: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
324: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
325: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
326: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
327: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
328: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
329: ((c) == 0x0CDE) || \
330: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
331: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
332: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
333: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
334: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
335: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
336: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
337: ((c) == 0x0E30) || \
338: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
339: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
340: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
341: ((c) == 0x0E84) || \
342: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
343: ((c) == 0x0E8A) || \
344: ((c) == 0x0E8D) || \
345: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
346: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
347: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
348: ((c) == 0x0EA5) || \
349: ((c) == 0x0EA7) || \
350: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
351: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
352: ((c) == 0x0EB0) || \
353: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
354: ((c) == 0x0EBD) || \
355: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
356: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
357: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
358: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
359: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
360: ((c) == 0x1100) || \
361: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
362: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
363: ((c) == 0x1109) || \
364: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
365: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
366: ((c) == 0x113C) || \
367: ((c) == 0x113E) || \
368: ((c) == 0x1140) || \
369: ((c) == 0x114C) || \
370: ((c) == 0x114E) || \
371: ((c) == 0x1150) || \
372: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
373: ((c) == 0x1159) || \
374: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
375: ((c) == 0x1163) || \
376: ((c) == 0x1165) || \
377: ((c) == 0x1167) || \
378: ((c) == 0x1169) || \
379: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
380: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
381: ((c) == 0x1175) || \
382: ((c) == 0x119E) || \
383: ((c) == 0x11A8) || \
384: ((c) == 0x11AB) || \
385: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
386: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
387: ((c) == 0x11BA) || \
388: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
389: ((c) == 0x11EB) || \
390: ((c) == 0x11F0) || \
391: ((c) == 0x11F9) || \
392: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
393: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
394: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
395: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
396: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
397: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
398: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
399: ((c) == 0x1F59) || \
400: ((c) == 0x1F5B) || \
401: ((c) == 0x1F5D) || \
402: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
403: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
404: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
405: ((c) == 0x1FBE) || \
406: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
407: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
408: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
409: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
410: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
411: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
412: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
413: ((c) == 0x2126) || \
414: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
415: ((c) == 0x212E) || \
416: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
417: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
418: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
419: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
420: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 421:
1.22 daniel 422: /*
423: * [88] Digit ::= ... long list see REC ...
424: */
1.30 daniel 425: #define IS_DIGIT(c) \
426: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
427: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
428: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
429: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
430: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
431: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
432: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
433: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
434: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
435: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
436: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
437: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
438: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
439: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
440: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 441:
1.22 daniel 442: /*
443: * [87] CombiningChar ::= ... long list see REC ...
444: */
1.30 daniel 445: #define IS_COMBINING(c) \
446: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
447: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
448: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
449: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
450: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
451: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
452: ((c) == 0x05BF) || \
453: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
454: ((c) == 0x05C4) || \
455: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
456: ((c) == 0x0670) || \
457: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
458: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
459: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
460: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
461: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
462: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
463: ((c) == 0x093C) || \
464: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
465: ((c) == 0x094D) || \
466: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
467: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
468: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
469: ((c) == 0x09BC) || \
470: ((c) == 0x09BE) || \
471: ((c) == 0x09BF) || \
472: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
473: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
474: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
475: ((c) == 0x09D7) || \
476: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
477: ((c) == 0x0A02) || \
478: ((c) == 0x0A3C) || \
479: ((c) == 0x0A3E) || \
480: ((c) == 0x0A3F) || \
481: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
482: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
483: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
484: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
485: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
486: ((c) == 0x0ABC) || \
487: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
488: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
489: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
490: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
491: ((c) == 0x0B3C) || \
492: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
493: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
494: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
495: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
496: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
497: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
498: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
499: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
500: ((c) == 0x0BD7) || \
501: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
502: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
503: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
504: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
505: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
506: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
507: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
508: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
509: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
510: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
511: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
512: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
513: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
514: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
515: ((c) == 0x0D57) || \
516: ((c) == 0x0E31) || \
517: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
518: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
519: ((c) == 0x0EB1) || \
520: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
521: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
522: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
523: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
524: ((c) == 0x0F35) || \
525: ((c) == 0x0F37) || \
526: ((c) == 0x0F39) || \
527: ((c) == 0x0F3E) || \
528: ((c) == 0x0F3F) || \
529: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
530: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
531: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
532: ((c) == 0x0F97) || \
533: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
534: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
535: ((c) == 0x0FB9) || \
536: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
537: ((c) == 0x20E1) || \
538: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
539: ((c) == 0x3099) || \
540: ((c) == 0x309A))
1.3 veillard 541:
1.22 daniel 542: /*
543: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
544: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
545: * [#x309D-#x309E] | [#x30FC-#x30FE]
546: */
1.3 veillard 547: #define IS_EXTENDER(c) \
548: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
549: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
550: ((c) == 0xec6) || ((c) == 0x3005) \
551: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
552: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 553: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 554:
1.22 daniel 555: /*
556: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
557: */
1.1 veillard 558: #define IS_IDEOGRAPHIC(c) \
559: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
560: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
561: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
562: ((c) == 0x3007))
563:
1.22 daniel 564: /*
565: * [84] Letter ::= BaseChar | Ideographic
566: */
1.1 veillard 567: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
568:
569: #else
1.55 daniel 570: #ifndef USE_UTF_8
1.30 daniel 571: /************************************************************************
572: * *
1.55 daniel 573: * 8bits / ISO-Latin version of the macros. *
1.30 daniel 574: * *
575: ************************************************************************/
1.1 veillard 576: /*
1.22 daniel 577: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
578: * | [#x10000-#x10FFFF]
579: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 580: */
581: #define IS_CHAR(c) \
1.21 daniel 582: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
583: ((c) == 0xa))
1.1 veillard 584:
1.22 daniel 585: /*
586: * [85] BaseChar ::= ... long list see REC ...
587: */
1.1 veillard 588: #define IS_BASECHAR(c) \
589: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
590: (((c) >= 0x61) && ((c) <= 0x7a)) || \
591: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
592: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
593: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
594: (((c) >= 0xf8) && ((c) <= 0xff)) || \
595: ((c) == 0xba))
596:
1.22 daniel 597: /*
598: * [88] Digit ::= ... long list see REC ...
599: */
1.1 veillard 600: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
601:
1.22 daniel 602: /*
603: * [84] Letter ::= BaseChar | Ideographic
604: */
1.1 veillard 605: #define IS_LETTER(c) IS_BASECHAR(c)
606:
1.22 daniel 607:
608: /*
609: * [87] CombiningChar ::= ... long list see REC ...
610: */
1.1 veillard 611: #define IS_COMBINING(c) 0
612:
1.22 daniel 613: /*
614: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
615: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
616: * [#x309D-#x309E] | [#x30FC-#x30FE]
617: */
1.3 veillard 618: #define IS_EXTENDER(c) ((c) == 0xb7)
619:
1.55 daniel 620: #else /* USE_UTF_8 */
621: /************************************************************************
622: * *
623: * 8bits / UTF-8 version of the macros. *
624: * *
625: ************************************************************************/
626:
627: TODO !!!
628: #endif /* USE_UTF_8 */
1.21 daniel 629: #endif /* !UNICODE */
1.1 veillard 630:
1.22 daniel 631: /*
632: * Blank chars.
633: *
634: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
635: */
636: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
637: ((c) == 0x0D))
638:
639: /*
640: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
641: */
1.21 daniel 642: #define IS_PUBIDCHAR(c) \
643: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
644: (((c) >= 'a') && ((c) <= 'z')) || \
645: (((c) >= 'A') && ((c) <= 'Z')) || \
646: (((c) >= '0') && ((c) <= '9')) || \
647: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
648: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
649: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
650: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
651: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 652:
653: #define SKIP_EOL(p) \
654: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
655: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
656:
657: #define MOVETO_ENDTAG(p) \
1.39 daniel 658: while (IS_CHAR(*p) && (*(p) != '>')) (p)++
1.1 veillard 659:
660: #define MOVETO_STARTTAG(p) \
1.39 daniel 661: while (IS_CHAR(*p) && (*(p) != '<')) (p)++
1.1 veillard 662:
1.28 daniel 663: /************************************************************************
664: * *
665: * Commodity functions to handle CHARs *
666: * *
667: ************************************************************************/
668:
1.50 daniel 669: /**
670: * xmlStrndup:
671: * @cur: the input CHAR *
672: * @len: the len of @cur
673: *
674: * a strndup for array of CHAR's
675: * return values: a new CHAR * or NULL
1.1 veillard 676: */
677:
1.55 daniel 678: CHAR *
679: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 680: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
681:
682: if (ret == NULL) {
683: fprintf(stderr, "malloc of %d byte failed\n",
684: (len + 1) * sizeof(CHAR));
685: return(NULL);
686: }
687: memcpy(ret, cur, len * sizeof(CHAR));
688: ret[len] = 0;
689: return(ret);
690: }
691:
1.50 daniel 692: /**
693: * xmlStrdup:
694: * @cur: the input CHAR *
695: *
696: * a strdup for array of CHAR's
697: * return values: a new CHAR * or NULL
1.1 veillard 698: */
699:
1.55 daniel 700: CHAR *
701: xmlStrdup(const CHAR *cur) {
1.6 httpng 702: const CHAR *p = cur;
1.1 veillard 703:
704: while (IS_CHAR(*p)) p++;
705: return(xmlStrndup(cur, p - cur));
706: }
707:
1.50 daniel 708: /**
709: * xmlCharStrndup:
710: * @cur: the input char *
711: * @len: the len of @cur
712: *
713: * a strndup for char's to CHAR's
714: * return values: a new CHAR * or NULL
1.45 daniel 715: */
716:
1.55 daniel 717: CHAR *
718: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 719: int i;
720: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
721:
722: if (ret == NULL) {
723: fprintf(stderr, "malloc of %d byte failed\n",
724: (len + 1) * sizeof(CHAR));
725: return(NULL);
726: }
727: for (i = 0;i < len;i++)
728: ret[i] = (CHAR) cur[i];
729: ret[len] = 0;
730: return(ret);
731: }
732:
1.50 daniel 733: /**
734: * xmlCharStrdup:
735: * @cur: the input char *
736: * @len: the len of @cur
737: *
738: * a strdup for char's to CHAR's
739: * return values: a new CHAR * or NULL
1.45 daniel 740: */
741:
1.55 daniel 742: CHAR *
743: xmlCharStrdup(const char *cur) {
1.45 daniel 744: const char *p = cur;
745:
746: while (*p != '\0') p++;
747: return(xmlCharStrndup(cur, p - cur));
748: }
749:
1.50 daniel 750: /**
751: * xmlStrcmp:
752: * @str1: the first CHAR *
753: * @str2: the second CHAR *
754: *
755: * a strcmp for CHAR's
756: * return values: the integer result of the comparison
1.14 veillard 757: */
758:
1.55 daniel 759: int
760: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 761: register int tmp;
762:
763: do {
764: tmp = *str1++ - *str2++;
765: if (tmp != 0) return(tmp);
766: } while ((*str1 != 0) && (*str2 != 0));
767: return (*str1 - *str2);
768: }
769:
1.50 daniel 770: /**
771: * xmlStrncmp:
772: * @str1: the first CHAR *
773: * @str2: the second CHAR *
774: * @len: the max comparison length
775: *
776: * a strncmp for CHAR's
777: * return values: the integer result of the comparison
1.14 veillard 778: */
779:
1.55 daniel 780: int
781: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 782: register int tmp;
783:
784: if (len <= 0) return(0);
785: do {
786: tmp = *str1++ - *str2++;
787: if (tmp != 0) return(tmp);
788: len--;
789: if (len <= 0) return(0);
790: } while ((*str1 != 0) && (*str2 != 0));
791: return (*str1 - *str2);
792: }
793:
1.50 daniel 794: /**
795: * xmlStrchr:
796: * @str: the CHAR * array
797: * @val: the CHAR to search
798: *
799: * a strchr for CHAR's
800: * return values: the CHAR * for the first occurence or NULL.
1.14 veillard 801: */
802:
1.55 daniel 803: CHAR *
804: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 805: while (*str != 0) {
806: if (*str == val) return((CHAR *) str);
807: str++;
808: }
809: return(NULL);
810: }
1.28 daniel 811:
1.50 daniel 812: /**
813: * xmlStrlen:
814: * @str: the CHAR * array
815: *
816: * lenght of a CHAR's string
817: * return values: the number of CHAR contained in the ARRAY.
1.45 daniel 818: */
819:
1.55 daniel 820: int
821: xmlStrlen(const CHAR *str) {
1.45 daniel 822: int len = 0;
823:
824: if (str == NULL) return(0);
825: while (*str != 0) {
826: str++;
827: len++;
828: }
829: return(len);
830: }
831:
1.50 daniel 832: /**
833: * xmlStrncat:
834: * @first: the original CHAR * array
835: * @add: the CHAR * array added
836: * @len: the length of @add
837: *
838: * a strncat for array of CHAR's
839: * return values: a new CHAR * containing the concatenated string.
1.45 daniel 840: */
841:
1.55 daniel 842: CHAR *
843: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 844: int size;
845: CHAR *ret;
846:
847: if ((add == NULL) || (len == 0))
848: return(cur);
849: if (cur == NULL)
850: return(xmlStrndup(add, len));
851:
852: size = xmlStrlen(cur);
853: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
854: if (ret == NULL) {
855: fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
856: (size + len + 1) * sizeof(CHAR));
857: return(cur);
858: }
859: memcpy(&ret[size], add, len * sizeof(CHAR));
860: ret[size + len] = 0;
861: return(ret);
862: }
863:
1.50 daniel 864: /**
865: * xmlStrcat:
866: * @first: the original CHAR * array
867: * @add: the CHAR * array added
868: *
869: * a strcat for array of CHAR's
870: * return values: a new CHAR * containing the concatenated string.
1.45 daniel 871: */
872:
1.55 daniel 873: CHAR *
874: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 875: const CHAR *p = add;
876:
877: if (add == NULL) return(cur);
878: if (cur == NULL)
879: return(xmlStrdup(add));
880:
881: while (IS_CHAR(*p)) p++;
882: return(xmlStrncat(cur, add, p - add));
883: }
884:
885: /************************************************************************
886: * *
887: * Commodity functions, cleanup needed ? *
888: * *
889: ************************************************************************/
890:
1.50 daniel 891: /**
892: * areBlanks:
893: * @ctxt: an XML parser context
894: * @str: a CHAR *
895: * @len: the size of @str
896: *
1.45 daniel 897: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 898: *
899: * TODO: to be corrected accodingly to DTD information if available
900: * return values: 1 if ignorable 0 otherwise.
1.45 daniel 901: */
902:
903: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
904: int i;
905: xmlNodePtr lastChild;
906:
907: for (i = 0;i < len;i++)
908: if (!(IS_BLANK(str[i]))) return(0);
909:
910: if (CUR != '<') return(0);
911: lastChild = xmlGetLastChild(ctxt->node);
912: if (lastChild == NULL) {
913: if (ctxt->node->content != NULL) return(0);
914: } else if (xmlNodeIsText(lastChild))
915: return(0);
916: return(1);
917: }
918:
1.50 daniel 919: /**
920: * xmlHandleEntity:
921: * @ctxt: an XML parser context
922: * @entity: an XML entity pointer.
923: *
924: * Default handling of defined entities, when should we define a new input
1.45 daniel 925: * stream ? When do we just handle that as a set of chars ?
1.50 daniel 926: * TODO: we should call the SAX handler here and have it resolve the issue
1.45 daniel 927: */
928:
1.55 daniel 929: void
930: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 931: int len;
1.50 daniel 932: xmlParserInputPtr input;
1.45 daniel 933:
934: if (entity->content == NULL) {
1.55 daniel 935: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
936: ctxt->sax->error(ctxt, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 937: entity->name);
938: return;
939: }
940: len = xmlStrlen(entity->content);
941: if (len <= 2) goto handle_as_char;
942:
943: /*
944: * Redefine its content as an input stream.
945: */
1.50 daniel 946: input = xmlNewEntityInputStream(ctxt, entity);
947: xmlPushInput(ctxt, input);
1.45 daniel 948: return;
949:
950: handle_as_char:
951: /*
952: * Just handle the content as a set of chars.
953: */
954: if (ctxt->sax != NULL)
955: ctxt->sax->characters(ctxt, entity->content, 0, len);
956:
957: }
958:
959: /*
960: * Forward definition for recusive behaviour.
961: */
962: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.50 daniel 963: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt);
964: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 965:
1.28 daniel 966: /************************************************************************
967: * *
968: * Extra stuff for namespace support *
969: * Relates to http://www.w3.org/TR/WD-xml-names *
970: * *
971: ************************************************************************/
972:
1.50 daniel 973: /**
974: * xmlNamespaceParseNCName:
975: * @ctxt: an XML parser context
976: *
977: * parse an XML namespace name.
1.28 daniel 978: *
979: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
980: *
981: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
982: * CombiningChar | Extender
1.50 daniel 983: * return values: the namespace name or NULL
1.28 daniel 984: */
985:
1.55 daniel 986: CHAR *
987: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.28 daniel 988: const CHAR *q;
989: CHAR *ret = NULL;
990:
1.40 daniel 991: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
992: q = NEXT;
1.28 daniel 993:
1.40 daniel 994: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
995: (CUR == '.') || (CUR == '-') ||
996: (CUR == '_') ||
997: (IS_COMBINING(CUR)) ||
998: (IS_EXTENDER(CUR)))
999: NEXT;
1.28 daniel 1000:
1.40 daniel 1001: ret = xmlStrndup(q, CUR_PTR - q);
1.28 daniel 1002:
1003: return(ret);
1004: }
1005:
1.50 daniel 1006: /**
1007: * xmlNamespaceParseQName:
1008: * @ctxt: an XML parser context
1009: * @prefix: a CHAR **
1010: *
1011: * parse an XML qualified name
1.28 daniel 1012: *
1013: * [NS 5] QName ::= (Prefix ':')? LocalPart
1014: *
1015: * [NS 6] Prefix ::= NCName
1016: *
1017: * [NS 7] LocalPart ::= NCName
1.50 daniel 1018: * return values: the function returns the local part, and prefix is updated
1019: * to get the Prefix if any.
1.28 daniel 1020: */
1021:
1.55 daniel 1022: CHAR *
1023: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1024: CHAR *ret = NULL;
1025:
1026: *prefix = NULL;
1027: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1028: if (CUR == ':') {
1.28 daniel 1029: *prefix = ret;
1.40 daniel 1030: NEXT;
1.28 daniel 1031: ret = xmlNamespaceParseNCName(ctxt);
1032: }
1033:
1034: return(ret);
1035: }
1036:
1.50 daniel 1037: /**
1038: * xmlNamespaceParseNSDef:
1039: * @ctxt: an XML parser context
1040: *
1041: * parse a namespace prefix declaration
1.28 daniel 1042: *
1043: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1044: *
1045: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.50 daniel 1046: * return values: the namespace name
1.28 daniel 1047: */
1048:
1.55 daniel 1049: CHAR *
1050: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1051: CHAR *name = NULL;
1052:
1.40 daniel 1053: if ((CUR == 'x') && (NXT(1) == 'm') &&
1054: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1055: (NXT(4) == 's')) {
1056: SKIP(5);
1057: if (CUR == ':') {
1058: NEXT;
1.28 daniel 1059: name = xmlNamespaceParseNCName(ctxt);
1060: }
1061: }
1.39 daniel 1062: return(name);
1.28 daniel 1063: }
1064:
1.50 daniel 1065: /**
1066: * xmlParseQuotedString:
1067: * @ctxt: an XML parser context
1068: *
1.45 daniel 1069: * [OLD] Parse and return a string between quotes or doublequotes
1.50 daniel 1070: * return values: the string parser or NULL.
1.45 daniel 1071: */
1.55 daniel 1072: CHAR *
1073: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1074: CHAR *ret = NULL;
1075: const CHAR *q;
1076:
1077: if (CUR == '"') {
1078: NEXT;
1079: q = CUR_PTR;
1080: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1081: if (CUR != '"') {
1082: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1083: ctxt->sax->error(ctxt, "String not closed\"%.50s\n", q);
1084: } else {
1.45 daniel 1085: ret = xmlStrndup(q, CUR_PTR - q);
1086: NEXT;
1087: }
1088: } else if (CUR == '\''){
1089: NEXT;
1090: q = CUR_PTR;
1091: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1092: if (CUR != '\'') {
1093: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1094: ctxt->sax->error(ctxt, "String not closed\"%.50s\n", q);
1095: } else {
1.45 daniel 1096: ret = xmlStrndup(q, CUR_PTR - q);
1097: NEXT;
1098: }
1099: }
1100: return(ret);
1101: }
1102:
1.50 daniel 1103: /**
1104: * xmlParseNamespace:
1105: * @ctxt: an XML parser context
1106: *
1.45 daniel 1107: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1108: *
1109: * This is what the older xml-name Working Draft specified, a bunch of
1110: * other stuff may still rely on it, so support is still here as
1111: * if ot was declared on the root of the Tree:-(
1112: */
1113:
1.55 daniel 1114: void
1115: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 1116: CHAR *href = NULL;
1117: CHAR *prefix = NULL;
1118: int garbage = 0;
1119:
1120: /*
1121: * We just skipped "namespace" or "xml:namespace"
1122: */
1123: SKIP_BLANKS;
1124:
1125: while (IS_CHAR(CUR) && (CUR != '>')) {
1126: /*
1127: * We can have "ns" or "prefix" attributes
1128: * Old encoding as 'href' or 'AS' attributes is still supported
1129: */
1130: if ((CUR == 'n') && (NXT(1) == 's')) {
1131: garbage = 0;
1132: SKIP(2);
1133: SKIP_BLANKS;
1134:
1135: if (CUR != '=') continue;
1136: NEXT;
1137: SKIP_BLANKS;
1138:
1139: href = xmlParseQuotedString(ctxt);
1140: SKIP_BLANKS;
1141: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1142: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1143: garbage = 0;
1144: SKIP(4);
1145: SKIP_BLANKS;
1146:
1147: if (CUR != '=') continue;
1148: NEXT;
1149: SKIP_BLANKS;
1150:
1151: href = xmlParseQuotedString(ctxt);
1152: SKIP_BLANKS;
1153: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1154: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1155: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1156: garbage = 0;
1157: SKIP(6);
1158: SKIP_BLANKS;
1159:
1160: if (CUR != '=') continue;
1161: NEXT;
1162: SKIP_BLANKS;
1163:
1164: prefix = xmlParseQuotedString(ctxt);
1165: SKIP_BLANKS;
1166: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1167: garbage = 0;
1168: SKIP(2);
1169: SKIP_BLANKS;
1170:
1171: if (CUR != '=') continue;
1172: NEXT;
1173: SKIP_BLANKS;
1174:
1175: prefix = xmlParseQuotedString(ctxt);
1176: SKIP_BLANKS;
1177: } else if ((CUR == '?') && (NXT(1) == '>')) {
1178: garbage = 0;
1179: CUR_PTR ++;
1180: } else {
1181: /*
1182: * Found garbage when parsing the namespace
1183: */
1184: if (!garbage)
1.55 daniel 1185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1186: ctxt->sax->error(ctxt, "xmlParseNamespace found garbage\n");
1.45 daniel 1187: NEXT;
1188: }
1189: }
1190:
1191: MOVETO_ENDTAG(CUR_PTR);
1192: NEXT;
1193:
1194: /*
1195: * Register the DTD.
1196: */
1197: if (href != NULL)
1198: xmlNewGlobalNs(ctxt->doc, href, prefix);
1199:
1200: if (prefix != NULL) free(prefix);
1201: if (href != NULL) free(href);
1202: }
1203:
1.28 daniel 1204: /************************************************************************
1205: * *
1206: * The parser itself *
1207: * Relates to http://www.w3.org/TR/REC-xml *
1208: * *
1209: ************************************************************************/
1.14 veillard 1210:
1.50 daniel 1211: /**
1212: * xmlParseName:
1213: * @ctxt: an XML parser context
1214: *
1215: * parse an XML name.
1.22 daniel 1216: *
1217: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1218: * CombiningChar | Extender
1219: *
1220: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1221: *
1222: * [6] Names ::= Name (S Name)*
1.50 daniel 1223: * return values: the Name parsed or NULL
1.1 veillard 1224: */
1225:
1.55 daniel 1226: CHAR *
1227: xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 1228: const CHAR *q;
1229: CHAR *ret = NULL;
1.1 veillard 1230:
1.40 daniel 1231: if (!IS_LETTER(CUR) && (CUR != '_') &&
1232: (CUR != ':')) return(NULL);
1233: q = NEXT;
1234:
1235: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1236: (CUR == '.') || (CUR == '-') ||
1237: (CUR == '_') || (CUR == ':') ||
1238: (IS_COMBINING(CUR)) ||
1239: (IS_EXTENDER(CUR)))
1240: NEXT;
1.22 daniel 1241:
1.40 daniel 1242: ret = xmlStrndup(q, CUR_PTR - q);
1.22 daniel 1243:
1244: return(ret);
1245: }
1246:
1.50 daniel 1247: /**
1248: * xmlParseNmtoken:
1249: * @ctxt: an XML parser context
1250: *
1251: * parse an XML Nmtoken.
1.22 daniel 1252: *
1253: * [7] Nmtoken ::= (NameChar)+
1254: *
1255: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.50 daniel 1256: * return values: the Nmtoken parsed or NULL
1.22 daniel 1257: */
1258:
1.55 daniel 1259: CHAR *
1260: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.22 daniel 1261: const CHAR *q;
1262: CHAR *ret = NULL;
1263:
1.40 daniel 1264: q = NEXT;
1.22 daniel 1265:
1.40 daniel 1266: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1267: (CUR == '.') || (CUR == '-') ||
1268: (CUR == '_') || (CUR == ':') ||
1269: (IS_COMBINING(CUR)) ||
1270: (IS_EXTENDER(CUR)))
1271: NEXT;
1.3 veillard 1272:
1.40 daniel 1273: ret = xmlStrndup(q, CUR_PTR - q);
1.1 veillard 1274:
1.3 veillard 1275: return(ret);
1.1 veillard 1276: }
1277:
1.50 daniel 1278: /**
1279: * xmlParseEntityValue:
1280: * @ctxt: an XML parser context
1281: *
1282: * parse a value for ENTITY decl.
1.24 daniel 1283: *
1284: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1285: * "'" ([^%&'] | PEReference | Reference)* "'"
1.50 daniel 1286: * return values: the EntityValue parsed or NULL
1.24 daniel 1287: */
1288:
1.55 daniel 1289: CHAR *
1290: xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1291: CHAR *ret = NULL, *cur;
1.24 daniel 1292: const CHAR *q;
1293:
1.40 daniel 1294: if (CUR == '"') {
1295: NEXT;
1.24 daniel 1296:
1.40 daniel 1297: q = CUR_PTR;
1298: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1299: if (CUR == '%') {
1.46 daniel 1300: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1301: cur = xmlParsePEReference(ctxt);
1.46 daniel 1302: ret = xmlStrcat(ret, cur);
1303: q = CUR_PTR;
1.40 daniel 1304: } else if (CUR == '&') {
1.46 daniel 1305: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1306: cur = xmlParseReference(ctxt);
1307: if (cur != NULL) {
1308: CHAR buf[2];
1309: buf[0] = '&';
1310: buf[1] = 0;
1311: ret = xmlStrncat(ret, buf, 1);
1312: ret = xmlStrcat(ret, cur);
1313: buf[0] = ';';
1314: buf[1] = 0;
1315: ret = xmlStrncat(ret, buf, 1);
1316: }
1.46 daniel 1317: q = CUR_PTR;
1.24 daniel 1318: } else
1.40 daniel 1319: NEXT;
1.24 daniel 1320: }
1.40 daniel 1321: if (!IS_CHAR(CUR)) {
1.55 daniel 1322: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1323: ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
1.24 daniel 1324: } else {
1.46 daniel 1325: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1326: NEXT;
1.24 daniel 1327: }
1.40 daniel 1328: } else if (CUR == '\'') {
1329: NEXT;
1330: q = CUR_PTR;
1331: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1332: if (CUR == '%') {
1.46 daniel 1333: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1334: cur = xmlParsePEReference(ctxt);
1.46 daniel 1335: ret = xmlStrcat(ret, cur);
1336: q = CUR_PTR;
1.40 daniel 1337: } else if (CUR == '&') {
1.46 daniel 1338: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1339: cur = xmlParseReference(ctxt);
1340: if (cur != NULL) {
1341: CHAR buf[2];
1342: buf[0] = '&';
1343: buf[1] = 0;
1344: ret = xmlStrncat(ret, buf, 1);
1345: ret = xmlStrcat(ret, cur);
1346: buf[0] = ';';
1347: buf[1] = 0;
1348: ret = xmlStrncat(ret, buf, 1);
1349: }
1.46 daniel 1350: q = CUR_PTR;
1.24 daniel 1351: } else
1.40 daniel 1352: NEXT;
1.24 daniel 1353: }
1.40 daniel 1354: if (!IS_CHAR(CUR)) {
1.55 daniel 1355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1356: ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
1.24 daniel 1357: } else {
1.46 daniel 1358: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1359: NEXT;
1.24 daniel 1360: }
1361: } else {
1.55 daniel 1362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1363: ctxt->sax->error(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.24 daniel 1364: }
1365:
1366: return(ret);
1367: }
1368:
1.50 daniel 1369: /**
1370: * xmlParseAttValue:
1371: * @ctxt: an XML parser context
1372: *
1373: * parse a value for an attribute
1.29 daniel 1374: *
1375: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1376: * "'" ([^<&'] | Reference)* "'"
1.50 daniel 1377: * return values: the AttValue parsed or NULL.
1.29 daniel 1378: */
1379:
1.55 daniel 1380: CHAR *
1381: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1382: CHAR *ret = NULL, *cur;
1.29 daniel 1383: const CHAR *q;
1384:
1.40 daniel 1385: if (CUR == '"') {
1386: NEXT;
1.29 daniel 1387:
1.40 daniel 1388: q = CUR_PTR;
1389: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1390: if (CUR == '&') {
1.46 daniel 1391: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1392: cur = xmlParseReference(ctxt);
1393: if (cur != NULL) {
1394: /*
1395: * Special case for '&', we don't want to
1396: * resolve it here since it will break later
1397: * when searching entities in the string.
1398: */
1399: if ((cur[0] == '&') && (cur[1] == 0)) {
1400: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1401: ret = xmlStrncat(ret, buf, 5);
1402: } else
1403: ret = xmlStrcat(ret, cur);
1404: free(cur);
1405: }
1.46 daniel 1406: q = CUR_PTR;
1.29 daniel 1407: } else
1.40 daniel 1408: NEXT;
1.50 daniel 1409: /*
1410: * Pop out finished entity references.
1411: */
1412: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1413: if (CUR_PTR != q)
1414: ret = xmlStrncat(ret, q, CUR_PTR - q);
1415: xmlPopInput(ctxt);
1416: q = CUR_PTR;
1417: }
1.29 daniel 1418: }
1.40 daniel 1419: if (!IS_CHAR(CUR)) {
1.55 daniel 1420: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1421: ctxt->sax->error(ctxt, "Unfinished AttValue\n");
1.29 daniel 1422: } else {
1.46 daniel 1423: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1424: NEXT;
1.29 daniel 1425: }
1.40 daniel 1426: } else if (CUR == '\'') {
1427: NEXT;
1428: q = CUR_PTR;
1429: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1430: if (CUR == '&') {
1.46 daniel 1431: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1432: cur = xmlParseReference(ctxt);
1433: if (cur != NULL) {
1434: /*
1435: * Special case for '&', we don't want to
1436: * resolve it here since it will break later
1437: * when searching entities in the string.
1438: */
1439: if ((cur[0] == '&') && (cur[1] == 0)) {
1440: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1441: ret = xmlStrncat(ret, buf, 5);
1442: } else
1443: ret = xmlStrcat(ret, cur);
1444: free(cur);
1445: }
1.46 daniel 1446: q = CUR_PTR;
1.29 daniel 1447: } else
1.40 daniel 1448: NEXT;
1.50 daniel 1449: /*
1450: * Pop out finished entity references.
1451: */
1452: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1453: if (CUR_PTR != q)
1454: ret = xmlStrncat(ret, q, CUR_PTR - q);
1455: xmlPopInput(ctxt);
1456: q = CUR_PTR;
1457: }
1.29 daniel 1458: }
1.40 daniel 1459: if (!IS_CHAR(CUR)) {
1.55 daniel 1460: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1461: ctxt->sax->error(ctxt, "Unfinished AttValue\n");
1.29 daniel 1462: } else {
1.46 daniel 1463: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1464: NEXT;
1.29 daniel 1465: }
1466: } else {
1.55 daniel 1467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1468: ctxt->sax->error(ctxt, "AttValue: \" or ' expected\n");
1.29 daniel 1469: }
1470:
1471: return(ret);
1472: }
1473:
1.50 daniel 1474: /**
1475: * xmlParseSystemLiteral:
1476: * @ctxt: an XML parser context
1477: *
1478: * parse an XML Literal
1.21 daniel 1479: *
1.22 daniel 1480: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.50 daniel 1481: * return values: the SystemLiteral parsed or NULL
1.21 daniel 1482: */
1483:
1.55 daniel 1484: CHAR *
1485: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1486: const CHAR *q;
1487: CHAR *ret = NULL;
1488:
1.40 daniel 1489: if (CUR == '"') {
1490: NEXT;
1491: q = CUR_PTR;
1492: while ((IS_CHAR(CUR)) && (CUR != '"'))
1493: NEXT;
1494: if (!IS_CHAR(CUR)) {
1.55 daniel 1495: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1496: ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 1497: } else {
1.40 daniel 1498: ret = xmlStrndup(q, CUR_PTR - q);
1499: NEXT;
1.21 daniel 1500: }
1.40 daniel 1501: } else if (CUR == '\'') {
1502: NEXT;
1503: q = CUR_PTR;
1504: while ((IS_CHAR(CUR)) && (CUR != '\''))
1505: NEXT;
1506: if (!IS_CHAR(CUR)) {
1.55 daniel 1507: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1508: ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
1.21 daniel 1509: } else {
1.40 daniel 1510: ret = xmlStrndup(q, CUR_PTR - q);
1511: NEXT;
1.21 daniel 1512: }
1513: } else {
1.55 daniel 1514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1515: ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 1516: }
1517:
1518: return(ret);
1519: }
1520:
1.50 daniel 1521: /**
1522: * xmlParsePubidLiteral:
1523: * @ctxt: an XML parser context
1.21 daniel 1524: *
1.50 daniel 1525: * parse an XML public literal
1526: * return values: the PubidLiteral parsed or NULL.
1.21 daniel 1527: */
1528:
1.55 daniel 1529: CHAR *
1530: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1531: const CHAR *q;
1532: CHAR *ret = NULL;
1533: /*
1534: * Name ::= (Letter | '_') (NameChar)*
1535: */
1.40 daniel 1536: if (CUR == '"') {
1537: NEXT;
1538: q = CUR_PTR;
1539: while (IS_PUBIDCHAR(CUR)) NEXT;
1540: if (CUR != '"') {
1.55 daniel 1541: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1542: ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 1543: } else {
1.40 daniel 1544: ret = xmlStrndup(q, CUR_PTR - q);
1545: NEXT;
1.21 daniel 1546: }
1.40 daniel 1547: } else if (CUR == '\'') {
1548: NEXT;
1549: q = CUR_PTR;
1550: while ((IS_LETTER(CUR)) && (CUR != '\''))
1551: NEXT;
1552: if (!IS_LETTER(CUR)) {
1.55 daniel 1553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1554: ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
1.21 daniel 1555: } else {
1.40 daniel 1556: ret = xmlStrndup(q, CUR_PTR - q);
1557: NEXT;
1.21 daniel 1558: }
1559: } else {
1.55 daniel 1560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1561: ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
1.21 daniel 1562: }
1563:
1564: return(ret);
1565: }
1566:
1.50 daniel 1567: /**
1568: * xmlParseCharData:
1569: * @ctxt: an XML parser context
1570: * @cdata: int indicating whether we are within a CDATA section
1571: *
1572: * parse a CharData section.
1573: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 1574: *
1575: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1.50 daniel 1576: * return values:
1.27 daniel 1577: */
1578:
1.55 daniel 1579: void
1580: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.27 daniel 1581: const CHAR *q;
1582:
1.40 daniel 1583: q = CUR_PTR;
1584: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1585: (CUR != '&')) {
1586: NEXT;
1587: if ((cdata) && (CUR == ']') && (NXT(1) == ']') &&
1588: (NXT(2) == '>')) break;
1.27 daniel 1589: }
1.45 daniel 1590: if (q == CUR_PTR) return;
1591:
1592: /*
1593: * Ok the segment [q CUR_PTR] is to be consumed as chars.
1594: */
1595: if (ctxt->sax != NULL) {
1596: if (areBlanks(ctxt, q, CUR_PTR - q))
1597: ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
1598: else
1599: ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
1600: }
1.27 daniel 1601: }
1602:
1.50 daniel 1603: /**
1604: * xmlParseExternalID:
1605: * @ctxt: an XML parser context
1606: * @publicID: a CHAR** receiving PubidLiteral
1607: *
1608: * Parse an External ID
1.22 daniel 1609: *
1610: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1611: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.50 daniel 1612: * return values: the function returns SystemLiteral and in the second
1613: * case publicID receives PubidLiteral
1.22 daniel 1614: */
1615:
1.55 daniel 1616: CHAR *
1617: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
1.39 daniel 1618: CHAR *URI = NULL;
1.22 daniel 1619:
1.40 daniel 1620: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1621: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1622: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1623: SKIP(6);
1.42 daniel 1624: SKIP_BLANKS;
1.39 daniel 1625: URI = xmlParseSystemLiteral(ctxt);
1626: if (URI == NULL)
1.55 daniel 1627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1628: ctxt->sax->error(ctxt,
1.39 daniel 1629: "xmlParseExternalID: SYSTEM, no URI\n");
1.40 daniel 1630: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1631: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1632: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1633: SKIP(6);
1.42 daniel 1634: SKIP_BLANKS;
1.39 daniel 1635: *publicID = xmlParsePubidLiteral(ctxt);
1636: if (*publicID == NULL)
1.55 daniel 1637: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1638: ctxt->sax->error(ctxt,
1.39 daniel 1639: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.42 daniel 1640: SKIP_BLANKS;
1.39 daniel 1641: URI = xmlParseSystemLiteral(ctxt);
1642: if (URI == NULL)
1.55 daniel 1643: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644: ctxt->sax->error(ctxt,
1.39 daniel 1645: "xmlParseExternalID: PUBLIC, no URI\n");
1.22 daniel 1646: }
1.39 daniel 1647: return(URI);
1.22 daniel 1648: }
1649:
1.50 daniel 1650: /**
1651: * xmlParseComment:
1652: * @create: should we create a node
1653: *
1.3 veillard 1654: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1655: * This may or may not create a node (depending on the context)
1.38 daniel 1656: * The spec says that "For compatibility, the string "--" (double-hyphen)
1657: * must not occur within comments. "
1.22 daniel 1658: *
1659: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.50 daniel 1660: *
1661: * TODO: this should call a SAX function which will handle (or not) the
1662: * creation of the comment !
1663: * return values:
1.3 veillard 1664: */
1.31 daniel 1665: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1666: xmlNodePtr ret = NULL;
1.17 daniel 1667: const CHAR *q, *start;
1668: const CHAR *r;
1.39 daniel 1669: CHAR *val;
1.3 veillard 1670:
1671: /*
1.22 daniel 1672: * Check that there is a comment right here.
1.3 veillard 1673: */
1.40 daniel 1674: if ((CUR != '<') || (NXT(1) != '!') ||
1675: (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1.3 veillard 1676:
1.40 daniel 1677: SKIP(4);
1678: start = q = CUR_PTR;
1679: NEXT;
1680: r = CUR_PTR;
1681: NEXT;
1682: while (IS_CHAR(CUR) &&
1683: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 1684: (*r != '-') || (*q != '-'))) {
1.38 daniel 1685: if ((*r == '-') && (*q == '-'))
1.55 daniel 1686: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1687: ctxt->sax->error(ctxt,
1.38 daniel 1688: "Comment must not contain '--' (double-hyphen)`\n");
1.40 daniel 1689: NEXT;r++;q++;
1.3 veillard 1690: }
1.40 daniel 1691: if (!IS_CHAR(CUR)) {
1.55 daniel 1692: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1693: ctxt->sax->error(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.3 veillard 1694: } else {
1.40 daniel 1695: NEXT;
1.31 daniel 1696: if (create) {
1.39 daniel 1697: val = xmlStrndup(start, q - start);
1.50 daniel 1698: ret = xmlNewDocComment(ctxt->doc, val);
1.39 daniel 1699: free(val);
1.31 daniel 1700: }
1.3 veillard 1701: }
1.39 daniel 1702: return(ret);
1.3 veillard 1703: }
1704:
1.50 daniel 1705: /**
1706: * xmlParsePITarget:
1707: * @ctxt: an XML parser context
1708: *
1709: * parse the name of a PI
1.22 daniel 1710: *
1711: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.50 daniel 1712: * return values: the PITarget name or NULL
1.22 daniel 1713: */
1714:
1.55 daniel 1715: CHAR *
1716: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 1717: CHAR *name;
1718:
1719: name = xmlParseName(ctxt);
1720: if ((name != NULL) && (name[3] == 0) &&
1721: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1722: ((name[1] == 'm') || (name[1] == 'M')) &&
1723: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 1724: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1725: ctxt->sax->error(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1726: return(NULL);
1727: }
1728: return(name);
1729: }
1730:
1.50 daniel 1731: /**
1732: * xmlParsePI:
1733: * @ctxt: an XML parser context
1734: *
1735: * parse an XML Processing Instruction.
1.22 daniel 1736: *
1737: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.50 daniel 1738: * return values: the PI name or NULL
1.3 veillard 1739: */
1740:
1.55 daniel 1741: void
1742: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1743: CHAR *target;
1744:
1.40 daniel 1745: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 1746: /*
1747: * this is a Processing Instruction.
1748: */
1.40 daniel 1749: SKIP(2);
1.3 veillard 1750:
1751: /*
1.22 daniel 1752: * Parse the target name and check for special support like
1753: * namespace.
1754: *
1755: * TODO : PI handling should be dynamically redefinable using an
1756: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1757: */
1.22 daniel 1758: target = xmlParsePITarget(ctxt);
1759: if (target != NULL) {
1760: /*
1.44 daniel 1761: * Support for the old Processing Instruction related to namespace.
1.22 daniel 1762: */
1763: if ((target[0] == 'n') && (target[1] == 'a') &&
1764: (target[2] == 'm') && (target[3] == 'e') &&
1765: (target[4] == 's') && (target[5] == 'p') &&
1766: (target[6] == 'a') && (target[7] == 'c') &&
1767: (target[8] == 'e')) {
1768: xmlParseNamespace(ctxt);
1769: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1770: (target[2] == 'l') && (target[3] == ':') &&
1771: (target[4] == 'n') && (target[5] == 'a') &&
1772: (target[6] == 'm') && (target[7] == 'e') &&
1773: (target[8] == 's') && (target[9] == 'p') &&
1774: (target[10] == 'a') && (target[11] == 'c') &&
1775: (target[12] == 'e')) {
1776: xmlParseNamespace(ctxt);
1777: } else {
1.44 daniel 1778: const CHAR *q = CUR_PTR;
1779:
1.40 daniel 1780: while (IS_CHAR(CUR) &&
1781: ((CUR != '?') || (NXT(1) != '>')))
1782: NEXT;
1783: if (!IS_CHAR(CUR)) {
1.55 daniel 1784: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1785: ctxt->sax->error(ctxt, "xmlParsePI: PI %s never end ...\n",
1.31 daniel 1786: target);
1.44 daniel 1787: } else {
1788: CHAR *data;
1789:
1790: data = xmlStrndup(CUR_PTR, CUR_PTR - q);
1.40 daniel 1791: SKIP(2);
1.44 daniel 1792:
1793: /*
1794: * SAX: PI detected.
1795: */
1796: if (ctxt->sax)
1797: ctxt->sax->processingInstruction(ctxt, target, data);
1798: /*
1799: * Unknown PI, ignore it !
1800: */
1801: else
1802: xmlParserWarning(ctxt,
1803: "xmlParsePI : skipping unknown PI %s\n",
1804: target);
1805: free(data);
1806: }
1.22 daniel 1807: }
1.39 daniel 1808: free(target);
1.3 veillard 1809: } else {
1.55 daniel 1810: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1811: ctxt->sax->error(ctxt, "xmlParsePI : no target name\n");
1.22 daniel 1812: /********* Should we try to complete parsing the PI ???
1.40 daniel 1813: while (IS_CHAR(CUR) &&
1814: (CUR != '?') && (CUR != '>'))
1815: NEXT;
1816: if (!IS_CHAR(CUR)) {
1.22 daniel 1817: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1818: target);
1819: }
1820: ********************************************************/
1821: }
1822: }
1823: }
1824:
1.50 daniel 1825: /**
1826: * xmlParseNotationDecl:
1827: * @ctxt: an XML parser context
1828: *
1829: * parse a notation declaration
1.22 daniel 1830: *
1831: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1832: *
1833: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1834: *
1835: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1836: * 'PUBLIC' S PubidLiteral S SystemLiteral
1837: *
1838: * Hence there is actually 3 choices:
1839: * 'PUBLIC' S PubidLiteral
1840: * 'PUBLIC' S PubidLiteral S SystemLiteral
1841: * and 'SYSTEM' S SystemLiteral
1.50 daniel 1842: *
1843: * TODO: no handling of the values parsed !
1.22 daniel 1844: */
1845:
1.55 daniel 1846: void
1847: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 1848: CHAR *name;
1849:
1.40 daniel 1850: if ((CUR == '<') && (NXT(1) == '!') &&
1851: (NXT(2) == 'N') && (NXT(3) == 'O') &&
1852: (NXT(4) == 'T') && (NXT(5) == 'A') &&
1853: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1854: (NXT(8) == 'O') && (NXT(9) == 'N') &&
1855: (IS_BLANK(NXT(10)))) {
1856: SKIP(10);
1.42 daniel 1857: SKIP_BLANKS;
1.22 daniel 1858:
1859: name = xmlParseName(ctxt);
1860: if (name == NULL) {
1.55 daniel 1861: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1862: ctxt->sax->error(ctxt,
1.31 daniel 1863: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 1864: return;
1865: }
1.42 daniel 1866: SKIP_BLANKS;
1.22 daniel 1867: /*
1.31 daniel 1868: * TODO !!!
1.22 daniel 1869: */
1.40 daniel 1870: while ((IS_CHAR(CUR)) && (CUR != '>'))
1871: NEXT;
1.22 daniel 1872: free(name);
1873: }
1874: }
1875:
1.50 daniel 1876: /**
1877: * xmlParseEntityDecl:
1878: * @ctxt: an XML parser context
1879: *
1880: * parse <!ENTITY declarations
1.22 daniel 1881: *
1882: * [70] EntityDecl ::= GEDecl | PEDecl
1883: *
1884: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1885: *
1886: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1887: *
1888: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1889: *
1890: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1891: *
1892: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1893: */
1894:
1.55 daniel 1895: void
1896: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 1897: CHAR *name = NULL;
1.24 daniel 1898: CHAR *value = NULL;
1.39 daniel 1899: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 1900: CHAR *ndata = NULL;
1.39 daniel 1901: int isParameter = 0;
1.22 daniel 1902:
1.40 daniel 1903: if ((CUR == '<') && (NXT(1) == '!') &&
1904: (NXT(2) == 'E') && (NXT(3) == 'N') &&
1905: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1906: (NXT(6) == 'T') && (NXT(7) == 'Y') &&
1907: (IS_BLANK(NXT(8)))) {
1908: SKIP(8);
1.42 daniel 1909: SKIP_BLANKS;
1.40 daniel 1910:
1911: if (CUR == '%') {
1912: NEXT;
1.42 daniel 1913: SKIP_BLANKS;
1.39 daniel 1914: isParameter = 1;
1.22 daniel 1915: }
1916:
1917: name = xmlParseName(ctxt);
1.24 daniel 1918: if (name == NULL) {
1.55 daniel 1919: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1920: ctxt->sax->error(ctxt, "xmlParseEntityDecl: no name\n");
1.24 daniel 1921: return;
1922: }
1.42 daniel 1923: SKIP_BLANKS;
1.24 daniel 1924:
1.22 daniel 1925: /*
1.24 daniel 1926: * TODO handle the various case of definitions...
1.22 daniel 1927: */
1.39 daniel 1928: if (isParameter) {
1.40 daniel 1929: if ((CUR == '"') || (CUR == '\''))
1.24 daniel 1930: value = xmlParseEntityValue(ctxt);
1.39 daniel 1931: if (value) {
1932: xmlAddDocEntity(ctxt->doc, name,
1933: XML_INTERNAL_PARAMETER_ENTITY,
1934: NULL, NULL, value);
1935: }
1.24 daniel 1936: else {
1.39 daniel 1937: URI = xmlParseExternalID(ctxt, &literal);
1938: if (URI) {
1939: xmlAddDocEntity(ctxt->doc, name,
1940: XML_EXTERNAL_PARAMETER_ENTITY,
1941: literal, URI, NULL);
1942: }
1.24 daniel 1943: }
1944: } else {
1.40 daniel 1945: if ((CUR == '"') || (CUR == '\'')) {
1.24 daniel 1946: value = xmlParseEntityValue(ctxt);
1.39 daniel 1947: xmlAddDocEntity(ctxt->doc, name,
1948: XML_INTERNAL_GENERAL_ENTITY,
1949: NULL, NULL, value);
1950: } else {
1951: URI = xmlParseExternalID(ctxt, &literal);
1.42 daniel 1952: SKIP_BLANKS;
1.40 daniel 1953: if ((CUR == 'N') && (NXT(1) == 'D') &&
1954: (NXT(2) == 'A') && (NXT(3) == 'T') &&
1955: (NXT(4) == 'A')) {
1956: SKIP(5);
1.42 daniel 1957: SKIP_BLANKS;
1.24 daniel 1958: ndata = xmlParseName(ctxt);
1.39 daniel 1959: xmlAddDocEntity(ctxt->doc, name,
1960: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
1961: literal, URI, ndata);
1962: } else {
1963: xmlAddDocEntity(ctxt->doc, name,
1964: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
1965: literal, URI, NULL);
1.24 daniel 1966: }
1967: }
1968: }
1.42 daniel 1969: SKIP_BLANKS;
1.40 daniel 1970: if (CUR != '>') {
1.55 daniel 1971: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1972: ctxt->sax->error(ctxt,
1.31 daniel 1973: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.24 daniel 1974: } else
1.40 daniel 1975: NEXT;
1.39 daniel 1976: if (name != NULL) free(name);
1977: if (value != NULL) free(value);
1978: if (URI != NULL) free(URI);
1979: if (literal != NULL) free(literal);
1980: if (ndata != NULL) free(ndata);
1.22 daniel 1981: }
1982: }
1983:
1.50 daniel 1984: /**
1985: * xmlParseEnumeratedType:
1986: * @ctxt: an XML parser context
1987: * @name: ???
1988: * @:
1989: *
1990: * parse and Enumerated attribute type.
1.22 daniel 1991: *
1992: * [57] EnumeratedType ::= NotationType | Enumeration
1993: *
1994: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1995: *
1996: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1.50 daniel 1997: *
1998: * TODO: not implemented !!!
1.22 daniel 1999: */
2000:
1.55 daniel 2001: void
2002: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.22 daniel 2003: /*
2004: * TODO !!!
2005: */
1.40 daniel 2006: while ((IS_CHAR(CUR)) && (CUR != '>'))
2007: NEXT;
1.22 daniel 2008: }
2009:
1.50 daniel 2010: /**
2011: * xmlParseAttributeType:
2012: * @ctxt: an XML parser context
2013: * @name: ???
2014: *
2015: * : parse the Attribute list def for an element
1.22 daniel 2016: *
2017: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
2018: *
2019: * [55] StringType ::= 'CDATA'
2020: *
2021: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
2022: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 2023: *
2024: * TODO: not implemented !!!
1.22 daniel 2025: */
1.55 daniel 2026: void
2027: xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.45 daniel 2028: /* TODO !!! */
1.40 daniel 2029: if ((CUR == 'C') && (NXT(1) == 'D') &&
2030: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2031: (NXT(4) == 'A')) {
2032: SKIP(5);
2033: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
2034: SKIP(2);
2035: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2036: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2037: (NXT(4) == 'F')) {
2038: SKIP(5);
2039: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2040: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2041: (NXT(4) == 'F') && (NXT(5) == 'S')) {
2042: SKIP(6);
2043: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2044: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2045: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
2046: SKIP(6);
2047: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2048: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2049: (NXT(4) == 'T') && (NXT(5) == 'I') &&
2050: (NXT(6) == 'E') && (NXT(7) == 'S')) {
2051: SKIP(8);
2052: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2053: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2054: (NXT(4) == 'K') && (NXT(5) == 'E') &&
2055: (NXT(6) == 'N')) {
2056: SKIP(7);
2057: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2058: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2059: (NXT(4) == 'K') && (NXT(5) == 'E') &&
2060: (NXT(6) == 'N') && (NXT(7) == 'S')) {
1.22 daniel 2061: } else {
2062: xmlParseEnumeratedType(ctxt, name);
2063: }
2064: }
2065:
1.50 daniel 2066: /**
2067: * xmlParseAttributeListDecl:
2068: * @ctxt: an XML parser context
2069: *
2070: * : parse the Attribute list def for an element
1.22 daniel 2071: *
2072: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2073: *
2074: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 2075: *
2076: * TODO: not implemented !!!
1.22 daniel 2077: */
1.55 daniel 2078: void
2079: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2080: CHAR *name;
2081:
1.45 daniel 2082: /* TODO !!! */
1.40 daniel 2083: if ((CUR == '<') && (NXT(1) == '!') &&
2084: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2085: (NXT(4) == 'T') && (NXT(5) == 'L') &&
2086: (NXT(6) == 'I') && (NXT(7) == 'S') &&
2087: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
2088: SKIP(9);
1.42 daniel 2089: SKIP_BLANKS;
1.22 daniel 2090: name = xmlParseName(ctxt);
2091: if (name == NULL) {
1.55 daniel 2092: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2093: ctxt->sax->error(ctxt,
1.31 daniel 2094: "xmlParseAttributeListDecl: no name for Element\n");
1.22 daniel 2095: return;
2096: }
1.42 daniel 2097: SKIP_BLANKS;
1.40 daniel 2098: while (CUR != '>') {
2099: const CHAR *check = CUR_PTR;
1.22 daniel 2100:
2101: xmlParseAttributeType(ctxt, name);
1.42 daniel 2102: SKIP_BLANKS;
1.40 daniel 2103: if (check == CUR_PTR) {
1.55 daniel 2104: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2105: ctxt->sax->error(ctxt,
1.31 daniel 2106: "xmlParseAttributeListDecl: detected error\n");
1.22 daniel 2107: break;
2108: }
2109: }
1.40 daniel 2110: if (CUR == '>')
2111: NEXT;
1.22 daniel 2112:
2113: free(name);
2114: }
2115: }
2116:
1.50 daniel 2117: /**
2118: * xmlParseElementContentDecl:
2119: * @ctxt: an XML parser context
2120: * @name: ???
2121: *
2122: * parse the declaration for an Element content
2123: * either Mixed or Children, the cases EMPTY and ANY being handled
1.22 daniel 2124: *
2125: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
2126: *
2127: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
2128: *
2129: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
2130: *
2131: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
2132: *
2133: * or
2134: *
2135: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
2136: * '(' S? '#PCDATA' S? ')'
1.50 daniel 2137: *
2138: * TODO: not implemented !!!
1.22 daniel 2139: */
2140:
1.55 daniel 2141: void
2142: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1.22 daniel 2143: /*
2144: * TODO This has to be parsed correctly, currently we just skip until
2145: * we reach the first '>'.
1.31 daniel 2146: * !!!
1.22 daniel 2147: */
1.40 daniel 2148: while ((IS_CHAR(CUR)) && (CUR != '>'))
2149: NEXT;
1.22 daniel 2150: }
2151:
1.50 daniel 2152: /**
2153: * xmlParseElementDecl:
2154: * @ctxt: an XML parser context
2155: *
2156: * parse an Element declaration.
1.22 daniel 2157: *
2158: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
2159: *
2160: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
2161: *
2162: * TODO There is a check [ VC: Unique Element Type Declaration ]
2163: */
1.55 daniel 2164: void
2165: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2166: CHAR *name;
2167:
1.40 daniel 2168: if ((CUR == '<') && (NXT(1) == '!') &&
2169: (NXT(2) == 'E') && (NXT(3) == 'L') &&
2170: (NXT(4) == 'E') && (NXT(5) == 'M') &&
2171: (NXT(6) == 'E') && (NXT(7) == 'N') &&
2172: (NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
2173: SKIP(9);
1.42 daniel 2174: SKIP_BLANKS;
1.22 daniel 2175: name = xmlParseName(ctxt);
2176: if (name == NULL) {
1.55 daniel 2177: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2178: ctxt->sax->error(ctxt, "xmlParseElementDecl: no name for Element\n");
1.22 daniel 2179: return;
2180: }
1.42 daniel 2181: SKIP_BLANKS;
1.40 daniel 2182: if ((CUR == 'E') && (NXT(1) == 'M') &&
2183: (NXT(2) == 'P') && (NXT(3) == 'T') &&
2184: (NXT(4) == 'Y')) {
2185: SKIP(5);
1.22 daniel 2186: /*
2187: * Element must always be empty.
2188: */
1.40 daniel 2189: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
2190: (NXT(2) == 'Y')) {
2191: SKIP(3);
1.22 daniel 2192: /*
2193: * Element is a generic container.
2194: */
2195: } else {
2196: xmlParseElementContentDecl(ctxt, name);
2197: }
1.42 daniel 2198: SKIP_BLANKS;
1.40 daniel 2199: if (CUR != '>') {
1.55 daniel 2200: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2201: ctxt->sax->error(ctxt,
1.31 daniel 2202: "xmlParseElementDecl: expected '>' at the end\n");
1.22 daniel 2203: } else
1.40 daniel 2204: NEXT;
1.22 daniel 2205: }
2206: }
2207:
1.50 daniel 2208: /**
2209: * xmlParseMarkupDecl:
2210: * @ctxt: an XML parser context
2211: *
2212: * parse Markup declarations
1.22 daniel 2213: *
2214: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
2215: * NotationDecl | PI | Comment
2216: *
2217: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
2218: */
1.55 daniel 2219: void
2220: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2221: xmlParseElementDecl(ctxt);
2222: xmlParseAttributeListDecl(ctxt);
2223: xmlParseEntityDecl(ctxt);
2224: xmlParseNotationDecl(ctxt);
2225: xmlParsePI(ctxt);
1.31 daniel 2226: xmlParseComment(ctxt, 0);
1.22 daniel 2227: }
2228:
1.50 daniel 2229: /**
2230: * xmlParseCharRef:
2231: * @ctxt: an XML parser context
2232: *
2233: * parse Reference declarations
1.24 daniel 2234: *
2235: * [66] CharRef ::= '&#' [0-9]+ ';' |
2236: * '&#x' [0-9a-fA-F]+ ';'
1.50 daniel 2237: * return values: the value parsed
1.24 daniel 2238: */
1.55 daniel 2239: CHAR *
2240: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 2241: int val = 0;
1.44 daniel 2242: CHAR buf[2];
1.24 daniel 2243:
1.40 daniel 2244: if ((CUR == '&') && (NXT(1) == '#') &&
2245: (NXT(2) == 'x')) {
2246: SKIP(3);
2247: while (CUR != ';') {
2248: if ((CUR >= '0') && (CUR <= '9'))
2249: val = val * 16 + (CUR - '0');
2250: else if ((CUR >= 'a') && (CUR <= 'f'))
2251: val = val * 16 + (CUR - 'a') + 10;
2252: else if ((CUR >= 'A') && (CUR <= 'F'))
2253: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 2254: else {
1.55 daniel 2255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2256: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 2257: val = 0;
1.24 daniel 2258: break;
2259: }
1.47 daniel 2260: NEXT;
1.24 daniel 2261: }
1.55 daniel 2262: if (CUR == ';')
1.40 daniel 2263: NEXT;
2264: } else if ((CUR == '&') && (NXT(1) == '#')) {
2265: SKIP(2);
2266: while (CUR != ';') {
2267: if ((CUR >= '0') && (CUR <= '9'))
1.55 daniel 2268: val = val * 10 + (CUR - '0');
1.24 daniel 2269: else {
1.55 daniel 2270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2271: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
1.29 daniel 2272: val = 0;
1.24 daniel 2273: break;
2274: }
1.47 daniel 2275: NEXT;
1.24 daniel 2276: }
1.55 daniel 2277: if (CUR == ';')
1.40 daniel 2278: NEXT;
1.24 daniel 2279: } else {
1.55 daniel 2280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2281: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
1.24 daniel 2282: }
1.29 daniel 2283: /*
2284: * Check the value IS_CHAR ...
2285: */
1.44 daniel 2286: if (IS_CHAR(val)) {
2287: buf[0] = (CHAR) val;
2288: buf[1] = 0;
1.50 daniel 2289: return(xmlStrndup(buf, 1));
1.44 daniel 2290: } else {
1.55 daniel 2291: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2292: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value");
1.29 daniel 2293: }
1.46 daniel 2294: return(NULL);
1.24 daniel 2295: }
2296:
1.50 daniel 2297: /**
2298: * xmlParseEntityRef:
2299: * @ctxt: an XML parser context
2300: *
2301: * parse ENTITY references declarations
1.24 daniel 2302: *
2303: * [68] EntityRef ::= '&' Name ';'
1.52 daniel 2304: * return values: the entity ref string or NULL if directly as input stream.
1.24 daniel 2305: */
1.55 daniel 2306: CHAR *
2307: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.46 daniel 2308: CHAR *ret = NULL;
1.50 daniel 2309: const CHAR *q;
1.24 daniel 2310: CHAR *name;
1.50 daniel 2311: xmlParserInputPtr input = NULL;
1.24 daniel 2312:
1.50 daniel 2313: q = CUR_PTR;
1.40 daniel 2314: if (CUR == '&') {
2315: NEXT;
1.24 daniel 2316: name = xmlParseName(ctxt);
2317: if (name == NULL) {
1.55 daniel 2318: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2319: ctxt->sax->error(ctxt, "xmlParseEntityRef: no name\n");
1.24 daniel 2320: } else {
1.40 daniel 2321: if (CUR == ';') {
2322: NEXT;
1.24 daniel 2323: /*
1.52 daniel 2324: * We parsed the entity reference correctly, call SAX
2325: * interface for the proper behaviour:
2326: * - get a new input stream
2327: * - or keep the reference inline
1.24 daniel 2328: */
1.52 daniel 2329: if (ctxt->sax)
2330: input = ctxt->sax->resolveEntity(ctxt, NULL, name);
2331: if (input != NULL)
2332: xmlPushInput(ctxt, input);
2333: else {
2334: ret = xmlStrndup(q, CUR_PTR - q);
2335: }
1.24 daniel 2336: } else {
1.46 daniel 2337: char cst[2] = { '&', 0 };
2338:
1.55 daniel 2339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2340: ctxt->sax->error(ctxt, "xmlParseEntityRef: expecting ';'\n");
1.46 daniel 2341: ret = xmlStrndup(cst, 1);
2342: ret = xmlStrcat(ret, name);
1.24 daniel 2343: }
1.45 daniel 2344: free(name);
1.24 daniel 2345: }
2346: }
1.46 daniel 2347: return(ret);
1.24 daniel 2348: }
2349:
1.50 daniel 2350: /**
2351: * xmlParseReference:
2352: * @ctxt: an XML parser context
2353: *
2354: * parse Reference declarations
1.24 daniel 2355: *
2356: * [67] Reference ::= EntityRef | CharRef
1.52 daniel 2357: * return values: the entity string or NULL if handled directly by pushing
2358: * the entity value as the input.
1.24 daniel 2359: */
1.55 daniel 2360: CHAR *
2361: xmlParseReference(xmlParserCtxtPtr ctxt) {
1.44 daniel 2362: if ((CUR == '&') && (NXT(1) == '#')) {
1.50 daniel 2363: return(xmlParseCharRef(ctxt));
1.44 daniel 2364: } else if (CUR == '&') {
1.50 daniel 2365: return(xmlParseEntityRef(ctxt));
1.24 daniel 2366: }
1.46 daniel 2367: return(NULL);
1.24 daniel 2368: }
2369:
1.50 daniel 2370: /**
2371: * xmlParsePEReference:
2372: * @ctxt: an XML parser context
2373: *
2374: * parse PEReference declarations
1.22 daniel 2375: *
2376: * [69] PEReference ::= '%' Name ';'
1.50 daniel 2377: * return values: the entity content or NULL if handled directly.
1.22 daniel 2378: */
1.55 daniel 2379: CHAR *
2380: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.46 daniel 2381: CHAR *ret = NULL;
1.22 daniel 2382: CHAR *name;
1.45 daniel 2383: xmlEntityPtr entity;
1.50 daniel 2384: xmlParserInputPtr input;
1.22 daniel 2385:
1.40 daniel 2386: if (CUR == '%') {
2387: NEXT;
1.22 daniel 2388: name = xmlParseName(ctxt);
2389: if (name == NULL) {
1.55 daniel 2390: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2391: ctxt->sax->error(ctxt, "xmlParsePEReference: no name\n");
1.22 daniel 2392: } else {
1.40 daniel 2393: if (CUR == ';') {
2394: NEXT;
1.45 daniel 2395: entity = xmlGetDtdEntity(ctxt->doc, name);
2396: if (entity == NULL) {
1.55 daniel 2397: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
2398: ctxt->sax->warning(ctxt,
1.45 daniel 2399: "xmlParsePEReference: %%%s; not found\n");
1.50 daniel 2400: } else {
2401: input = xmlNewEntityInputStream(ctxt, entity);
2402: xmlPushInput(ctxt, input);
1.45 daniel 2403: }
1.22 daniel 2404: } else {
1.50 daniel 2405: char cst[2] = { '%', 0 };
1.46 daniel 2406:
1.55 daniel 2407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2408: ctxt->sax->error(ctxt, "xmlParsePEReference: expecting ';'\n");
1.46 daniel 2409: ret = xmlStrndup(cst, 1);
2410: ret = xmlStrcat(ret, name);
1.22 daniel 2411: }
1.45 daniel 2412: free(name);
1.3 veillard 2413: }
2414: }
1.46 daniel 2415: return(ret);
1.3 veillard 2416: }
2417:
1.50 daniel 2418: /**
2419: * xmlParseDocTypeDecl :
2420: * @ctxt: an XML parser context
2421: *
2422: * parse a DOCTYPE declaration
1.21 daniel 2423: *
1.22 daniel 2424: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
2425: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 2426: */
2427:
1.55 daniel 2428: void
2429: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.36 daniel 2430: xmlDtdPtr dtd;
1.21 daniel 2431: CHAR *name;
2432: CHAR *ExternalID = NULL;
1.39 daniel 2433: CHAR *URI = NULL;
1.21 daniel 2434:
2435: /*
2436: * We know that '<!DOCTYPE' has been detected.
2437: */
1.40 daniel 2438: SKIP(9);
1.21 daniel 2439:
1.42 daniel 2440: SKIP_BLANKS;
1.21 daniel 2441:
2442: /*
2443: * Parse the DOCTYPE name.
2444: */
2445: name = xmlParseName(ctxt);
2446: if (name == NULL) {
1.55 daniel 2447: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2448: ctxt->sax->error(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.21 daniel 2449: }
2450:
1.42 daniel 2451: SKIP_BLANKS;
1.21 daniel 2452:
2453: /*
1.22 daniel 2454: * Check for SystemID and ExternalID
2455: */
1.39 daniel 2456: URI = xmlParseExternalID(ctxt, &ExternalID);
1.42 daniel 2457: SKIP_BLANKS;
1.36 daniel 2458:
1.39 daniel 2459: dtd = xmlNewDtd(ctxt->doc, name, ExternalID, URI);
1.22 daniel 2460:
2461: /*
2462: * Is there any DTD definition ?
2463: */
1.40 daniel 2464: if (CUR == '[') {
2465: NEXT;
1.22 daniel 2466: /*
2467: * Parse the succession of Markup declarations and
2468: * PEReferences.
2469: * Subsequence (markupdecl | PEReference | S)*
2470: */
1.40 daniel 2471: while (CUR != ']') {
2472: const CHAR *check = CUR_PTR;
1.22 daniel 2473:
1.42 daniel 2474: SKIP_BLANKS;
1.22 daniel 2475: xmlParseMarkupDecl(ctxt);
1.50 daniel 2476: xmlParsePEReference(ctxt);
1.22 daniel 2477:
1.40 daniel 2478: if (CUR_PTR == check) {
1.55 daniel 2479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2480: ctxt->sax->error(ctxt,
1.31 daniel 2481: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.22 daniel 2482: break;
2483: }
2484: }
1.40 daniel 2485: if (CUR == ']') NEXT;
1.22 daniel 2486: }
2487:
2488: /*
2489: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 2490: */
1.40 daniel 2491: if (CUR != '>') {
1.55 daniel 2492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2493: ctxt->sax->error(ctxt, "DOCTYPE unproperly terminated\n");
1.22 daniel 2494: /* We shouldn't try to resynchronize ... */
1.21 daniel 2495: }
1.40 daniel 2496: NEXT;
1.22 daniel 2497:
2498: /*
2499: * Cleanup, since we don't use all those identifiers
2500: * TODO : the DOCTYPE if available should be stored !
2501: */
1.39 daniel 2502: if (URI != NULL) free(URI);
1.22 daniel 2503: if (ExternalID != NULL) free(ExternalID);
2504: if (name != NULL) free(name);
1.21 daniel 2505: }
2506:
1.50 daniel 2507: /**
2508: * xmlParseAttribute:
2509: * @ctxt: an XML parser context
2510: * @node: the node carrying the attribute
2511: *
2512: * parse an attribute
1.3 veillard 2513: *
1.22 daniel 2514: * [41] Attribute ::= Name Eq AttValue
2515: *
2516: * [25] Eq ::= S? '=' S?
2517: *
1.29 daniel 2518: * With namespace:
2519: *
2520: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 2521: *
2522: * Also the case QName == xmlns:??? is handled independently as a namespace
2523: * definition.
1.3 veillard 2524: */
2525:
1.52 daniel 2526: xmlAttrPtr xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
2527: CHAR *name;
1.29 daniel 2528: CHAR *ns;
1.52 daniel 2529: CHAR *value = NULL;
2530: xmlAttrPtr ret;
1.3 veillard 2531:
1.29 daniel 2532: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 2533: if (name == NULL) {
1.55 daniel 2534: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2535: ctxt->sax->error(ctxt, "error parsing attribute name\n");
1.52 daniel 2536: return(NULL);
1.3 veillard 2537: }
2538:
2539: /*
1.29 daniel 2540: * read the value
1.3 veillard 2541: */
1.42 daniel 2542: SKIP_BLANKS;
1.40 daniel 2543: if (CUR == '=') {
2544: NEXT;
1.42 daniel 2545: SKIP_BLANKS;
1.29 daniel 2546: value = xmlParseAttValue(ctxt);
2547: } else {
1.55 daniel 2548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2549: ctxt->sax->error(ctxt, "Specification mandate value for attribute %s\n",
1.31 daniel 2550: name);
1.3 veillard 2551: }
2552:
2553: /*
1.43 daniel 2554: * Check whether it's a namespace definition
2555: */
2556: if ((ns == NULL) &&
2557: (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
2558: (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
2559: /* a default namespace definition */
2560: xmlNewNs(node, value, NULL);
2561: if (name != NULL)
2562: free(name);
2563: if (value != NULL)
2564: free(value);
1.52 daniel 2565: return(NULL);
1.43 daniel 2566: }
2567: if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
2568: (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
2569: /* a standard namespace definition */
2570: xmlNewNs(node, value, name);
1.50 daniel 2571: free(ns);
1.43 daniel 2572: if (name != NULL)
2573: free(name);
2574: if (value != NULL)
2575: free(value);
1.52 daniel 2576: return(NULL);
1.43 daniel 2577: }
2578:
1.52 daniel 2579: ret = xmlNewProp(ctxt->node, name, NULL);
2580: if (ret != NULL)
2581: ret->val = xmlStringGetNodeList(ctxt->doc, value);
1.53 daniel 2582:
2583: if (ns != NULL)
2584: free(ns);
2585: if (value != NULL)
2586: free(value);
2587: free(name);
1.52 daniel 2588: return(ret);
1.3 veillard 2589: }
2590:
1.50 daniel 2591: /**
2592: * xmlParseStartTag:
2593: * @ctxt: an XML parser context
2594: *
2595: * parse a start of tag either for rule element or
2596: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 2597: *
2598: * [40] STag ::= '<' Name (S Attribute)* S? '>'
2599: *
1.29 daniel 2600: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
2601: *
2602: * With namespace:
2603: *
2604: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
2605: *
2606: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.50 daniel 2607: *
2608: * return values: the XML new node or NULL.
1.2 veillard 2609: */
2610:
1.16 daniel 2611: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.34 daniel 2612: CHAR *namespace, *name;
2613: xmlNsPtr ns = NULL;
1.2 veillard 2614: xmlNodePtr ret = NULL;
1.50 daniel 2615: xmlNodePtr parent = ctxt->node;
1.2 veillard 2616:
1.40 daniel 2617: if (CUR != '<') return(NULL);
2618: NEXT;
1.3 veillard 2619:
1.34 daniel 2620: name = xmlNamespaceParseQName(ctxt, &namespace);
1.50 daniel 2621: if (name == NULL) return(NULL);
1.3 veillard 2622:
1.43 daniel 2623: /*
2624: * Note : the namespace resolution is deferred until the end of the
2625: * attributes parsing, since local namespace can be defined as
2626: * an attribute at this level.
2627: */
1.50 daniel 2628: ret = xmlNewDocNode(ctxt->doc, ns, name, NULL);
2629: if (ret == NULL) {
2630: if (namespace != NULL)
2631: free(namespace);
2632: free(name);
2633: return(NULL);
2634: }
2635:
2636: /*
2637: * We are parsing a new node.
2638: */
2639: nodePush(ctxt, ret);
1.2 veillard 2640:
1.3 veillard 2641: /*
2642: * Now parse the attributes, it ends up with the ending
2643: *
2644: * (S Attribute)* S?
2645: */
1.42 daniel 2646: SKIP_BLANKS;
1.40 daniel 2647: while ((IS_CHAR(CUR)) &&
2648: (CUR != '>') &&
2649: ((CUR != '/') || (NXT(1) != '>'))) {
2650: const CHAR *q = CUR_PTR;
1.29 daniel 2651:
2652: xmlParseAttribute(ctxt, ret);
1.42 daniel 2653: SKIP_BLANKS;
1.29 daniel 2654:
1.40 daniel 2655: if (q == CUR_PTR) {
1.55 daniel 2656: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2657: ctxt->sax->error(ctxt,
1.31 daniel 2658: "xmlParseStartTag: problem parsing attributes\n");
1.29 daniel 2659: break;
1.3 veillard 2660: }
2661: }
2662:
1.43 daniel 2663: /*
2664: * Search the namespace
2665: */
2666: ns = xmlSearchNs(ctxt->doc, ret, namespace);
2667: if (ns == NULL) /* ret still doesn't have a parent yet ! */
1.50 daniel 2668: ns = xmlSearchNs(ctxt->doc, parent, namespace);
1.43 daniel 2669: xmlSetNs(ret, ns);
2670: if (namespace != NULL)
2671: free(namespace);
2672:
1.44 daniel 2673: /*
2674: * SAX: Start of Element !
2675: */
2676: if (ctxt->sax != NULL)
2677: ctxt->sax->startElement(ctxt, name);
1.52 daniel 2678: free(name);
2679:
2680: /*
2681: * Link the child element
2682: */
2683: if (ctxt->nodeNr < 2) return(ret);
2684: parent = ctxt->nodeTab[ctxt->nodeNr - 2];
2685: if (parent != NULL)
2686: xmlAddChild(parent, ctxt->node);
1.44 daniel 2687:
1.3 veillard 2688: return(ret);
2689: }
2690:
1.50 daniel 2691: /**
2692: * xmlParseEndTag:
2693: * @ctxt: an XML parser context
2694: * @nsPtr: the current node namespace definition
2695: * @tagPtr: CHAR** receive the tag value
2696: *
2697: * parse an end of tag
1.27 daniel 2698: *
2699: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 2700: *
2701: * With namespace
2702: *
2703: * [9] ETag ::= '</' QName S? '>'
1.50 daniel 2704: *
2705: * return values: tagPtr receive the tag name just read
1.7 veillard 2706: */
2707:
1.55 daniel 2708: void
2709: xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
1.34 daniel 2710: CHAR *namespace, *name;
2711: xmlNsPtr ns = NULL;
1.7 veillard 2712:
1.34 daniel 2713: *nsPtr = NULL;
1.7 veillard 2714: *tagPtr = NULL;
2715:
1.40 daniel 2716: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 2717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2718: ctxt->sax->error(ctxt, "xmlParseEndTag: '</' not found\n");
1.27 daniel 2719: return;
2720: }
1.40 daniel 2721: SKIP(2);
1.7 veillard 2722:
1.34 daniel 2723: name = xmlNamespaceParseQName(ctxt, &namespace);
1.43 daniel 2724:
2725: /*
2726: * Search the namespace
2727: */
2728: ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
2729: if (namespace != NULL)
1.34 daniel 2730: free(namespace);
1.7 veillard 2731:
1.34 daniel 2732: *nsPtr = ns;
1.7 veillard 2733: *tagPtr = name;
2734:
2735: /*
2736: * We should definitely be at the ending "S? '>'" part
2737: */
1.42 daniel 2738: SKIP_BLANKS;
1.40 daniel 2739: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 2740: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2741: ctxt->sax->error(ctxt, "End tag : expected '>'\n");
1.7 veillard 2742: } else
1.40 daniel 2743: NEXT;
1.7 veillard 2744:
2745: return;
2746: }
2747:
1.50 daniel 2748: /**
2749: * xmlParseCDSect:
2750: * @ctxt: an XML parser context
2751: *
2752: * Parse escaped pure raw content.
1.29 daniel 2753: *
2754: * [18] CDSect ::= CDStart CData CDEnd
2755: *
2756: * [19] CDStart ::= '<![CDATA['
2757: *
2758: * [20] Data ::= (Char* - (Char* ']]>' Char*))
2759: *
2760: * [21] CDEnd ::= ']]>'
1.3 veillard 2761: */
1.55 daniel 2762: void
2763: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 2764: const CHAR *r, *s, *base;
1.3 veillard 2765:
1.40 daniel 2766: if ((CUR == '<') && (NXT(1) == '!') &&
2767: (NXT(2) == '[') && (NXT(3) == 'C') &&
2768: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2769: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2770: (NXT(8) == '[')) {
2771: SKIP(9);
1.29 daniel 2772: } else
1.45 daniel 2773: return;
1.40 daniel 2774: base = CUR_PTR;
2775: if (!IS_CHAR(CUR)) {
1.55 daniel 2776: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2777: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.45 daniel 2778: return;
1.3 veillard 2779: }
1.40 daniel 2780: r = NEXT;
2781: if (!IS_CHAR(CUR)) {
1.55 daniel 2782: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2783: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.45 daniel 2784: return;
1.3 veillard 2785: }
1.40 daniel 2786: s = NEXT;
2787: while (IS_CHAR(CUR) &&
2788: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
2789: r++;s++;NEXT;
1.3 veillard 2790: }
1.40 daniel 2791: if (!IS_CHAR(CUR)) {
1.55 daniel 2792: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2793: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.45 daniel 2794: return;
1.3 veillard 2795: }
1.16 daniel 2796:
1.45 daniel 2797: /*
2798: * Ok the segment [base CUR_PTR] is to be consumed as chars.
2799: */
2800: if (ctxt->sax != NULL) {
2801: if (areBlanks(ctxt, base, CUR_PTR - base))
2802: ctxt->sax->ignorableWhitespace(ctxt, base, 0, CUR_PTR - base);
2803: else
2804: ctxt->sax->characters(ctxt, base, 0, CUR_PTR - base);
2805: }
1.2 veillard 2806: }
2807:
1.50 daniel 2808: /**
2809: * xmlParseContent:
2810: * @ctxt: an XML parser context
2811: *
2812: * Parse a content:
1.2 veillard 2813: *
1.27 daniel 2814: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 2815: */
2816:
1.55 daniel 2817: void
2818: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.2 veillard 2819: xmlNodePtr ret = NULL;
2820:
1.40 daniel 2821: while ((CUR != '<') || (NXT(1) != '/')) {
2822: const CHAR *test = CUR_PTR;
1.27 daniel 2823: ret = NULL;
2824:
2825: /*
2826: * First case : a Processing Instruction.
2827: */
1.40 daniel 2828: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 2829: xmlParsePI(ctxt);
2830: }
2831: /*
2832: * Second case : a CDSection
2833: */
1.40 daniel 2834: else if ((CUR == '<') && (NXT(1) == '!') &&
2835: (NXT(2) == '[') && (NXT(3) == 'C') &&
2836: (NXT(4) == 'D') && (NXT(5) == 'A') &&
2837: (NXT(6) == 'T') && (NXT(7) == 'A') &&
2838: (NXT(8) == '[')) {
1.45 daniel 2839: xmlParseCDSect(ctxt);
1.27 daniel 2840: }
2841: /*
2842: * Third case : a comment
2843: */
1.40 daniel 2844: else if ((CUR == '<') && (NXT(1) == '!') &&
2845: (NXT(2) == '-') && (NXT(3) == '-')) {
1.31 daniel 2846: ret = xmlParseComment(ctxt, 1);
1.27 daniel 2847: }
2848: /*
2849: * Fourth case : a sub-element.
2850: */
1.40 daniel 2851: else if (CUR == '<') {
1.45 daniel 2852: ret = xmlParseElement(ctxt);
2853: }
2854: /*
1.50 daniel 2855: * Fifth case : a reference. If if has not been resolved,
2856: * parsing returns it's Name, create the node
1.45 daniel 2857: */
2858: else if (CUR == '&') {
1.50 daniel 2859: CHAR *val = xmlParseReference(ctxt);
2860: if (val != NULL) {
2861: if (val[0] != '&') {
2862: /*
2863: * inline predefined entity.
2864: */
2865: if (ctxt->sax != NULL)
2866: ctxt->sax->characters(ctxt, val, 0, xmlStrlen(val));
2867: } else {
2868: /*
2869: * user defined entity, create a node.
2870: */
2871: ret = xmlNewReference(ctxt->doc, val);
2872: xmlAddChild(ctxt->node, ret);
2873: }
2874: free(val);
2875: }
1.27 daniel 2876: }
2877: /*
2878: * Last case, text. Note that References are handled directly.
2879: */
2880: else {
1.45 daniel 2881: xmlParseCharData(ctxt, 0);
1.3 veillard 2882: }
1.14 veillard 2883:
2884: /*
1.45 daniel 2885: * Pop-up of finished entities.
1.14 veillard 2886: */
1.45 daniel 2887: while ((CUR == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt);
2888:
1.40 daniel 2889: if (test == CUR_PTR) {
1.55 daniel 2890: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2891: ctxt->sax->error(ctxt, "detected an error in element content\n");
1.29 daniel 2892: break;
2893: }
1.3 veillard 2894: }
1.2 veillard 2895: }
2896:
1.50 daniel 2897: /**
2898: * xmlParseElement:
2899: * @ctxt: an XML parser context
2900: *
2901: * parse an XML element, this is highly recursive
1.26 daniel 2902: *
2903: * [39] element ::= EmptyElemTag | STag content ETag
2904: *
2905: * [41] Attribute ::= Name Eq AttValue
1.50 daniel 2906: * return values: the XML new node or NULL
1.2 veillard 2907: */
1.26 daniel 2908:
1.2 veillard 2909:
1.45 daniel 2910: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 2911: xmlNodePtr ret;
1.40 daniel 2912: const CHAR *openTag = CUR_PTR;
1.32 daniel 2913: xmlParserNodeInfo node_info;
1.27 daniel 2914: CHAR *endTag;
1.34 daniel 2915: xmlNsPtr endNs;
1.2 veillard 2916:
1.32 daniel 2917: /* Capture start position */
1.40 daniel 2918: node_info.begin_pos = CUR_PTR - ctxt->input->base;
2919: node_info.begin_line = ctxt->input->line;
1.32 daniel 2920:
1.16 daniel 2921: ret = xmlParseStartTag(ctxt);
1.3 veillard 2922: if (ret == NULL) {
2923: return(NULL);
2924: }
1.2 veillard 2925:
2926: /*
2927: * Check for an Empty Element.
2928: */
1.40 daniel 2929: if ((CUR == '/') && (NXT(1) == '>')) {
2930: SKIP(2);
1.45 daniel 2931: if (ctxt->sax != NULL)
2932: ctxt->sax->endElement(ctxt, ret->name);
2933:
2934: /*
2935: * end of parsing of this node.
2936: */
2937: nodePop(ctxt);
2938:
1.2 veillard 2939: return(ret);
2940: }
1.40 daniel 2941: if (CUR == '>') NEXT;
1.2 veillard 2942: else {
1.55 daniel 2943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.57 ! daniel 2944: ctxt->sax->error(ctxt, "Couldn't find end of Start Tag\n%.30s\n",
! 2945: openTag);
1.45 daniel 2946:
2947: /*
2948: * end of parsing of this node.
2949: */
2950: nodePop(ctxt);
2951:
1.16 daniel 2952: return(NULL);
1.2 veillard 2953: }
2954:
2955: /*
2956: * Parse the content of the element:
2957: */
1.45 daniel 2958: xmlParseContent(ctxt);
1.40 daniel 2959: if (!IS_CHAR(CUR)) {
1.55 daniel 2960: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.57 ! daniel 2961: ctxt->sax->error(ctxt,
! 2962: "Premature end of data in tag %.30s\n", openTag);
1.45 daniel 2963:
2964: /*
2965: * end of parsing of this node.
2966: */
2967: nodePop(ctxt);
2968:
1.16 daniel 2969: return(NULL);
1.2 veillard 2970: }
2971:
2972: /*
1.27 daniel 2973: * parse the end of tag: '</' should be here.
1.2 veillard 2974: */
1.34 daniel 2975: xmlParseEndTag(ctxt, &endNs, &endTag);
1.7 veillard 2976:
1.27 daniel 2977: /*
2978: * Check that the Name in the ETag is the same as in the STag.
2979: */
1.34 daniel 2980: if (endNs != ret->ns) {
1.55 daniel 2981: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2982: ctxt->sax->error(ctxt,
1.43 daniel 2983: "Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
1.31 daniel 2984: openTag, endTag);
1.27 daniel 2985: }
1.32 daniel 2986: if (endTag == NULL ) {
1.55 daniel 2987: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2988: ctxt->sax->error(ctxt, "The End tag has no name\n%.30s\n", openTag);
1.45 daniel 2989: } else if (xmlStrcmp(ret->name, endTag)) {
1.55 daniel 2990: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2991: ctxt->sax->error(ctxt,
1.31 daniel 2992: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
2993: openTag, endTag);
1.27 daniel 2994: }
1.44 daniel 2995: /*
2996: * SAX: End of Tag
2997: */
2998: else if (ctxt->sax != NULL)
2999: ctxt->sax->endElement(ctxt, endTag);
1.7 veillard 3000:
1.44 daniel 3001: if (endTag != NULL)
3002: free(endTag);
1.2 veillard 3003:
1.32 daniel 3004: /* Capture end position and add node */
3005: if ( ret != NULL && ctxt->record_info ) {
1.40 daniel 3006: node_info.end_pos = CUR_PTR - ctxt->input->base;
3007: node_info.end_line = ctxt->input->line;
1.32 daniel 3008: node_info.node = ret;
3009: xmlParserAddNodeInfo(ctxt, &node_info);
3010: }
1.43 daniel 3011:
3012: /*
3013: * end of parsing of this node.
3014: */
3015: nodePop(ctxt);
3016:
1.2 veillard 3017: return(ret);
3018: }
3019:
1.50 daniel 3020: /**
3021: * xmlParseVersionNum:
3022: * @ctxt: an XML parser context
3023: *
3024: * parse the XML version value.
1.29 daniel 3025: *
3026: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.50 daniel 3027: * return values: the string giving the XML version number, or NULL
1.29 daniel 3028: */
1.55 daniel 3029: CHAR *
3030: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 3031: const CHAR *q = CUR_PTR;
1.29 daniel 3032: CHAR *ret;
3033:
1.40 daniel 3034: while (IS_CHAR(CUR) &&
3035: (((CUR >= 'a') && (CUR <= 'z')) ||
3036: ((CUR >= 'A') && (CUR <= 'Z')) ||
3037: ((CUR >= '0') && (CUR <= '9')) ||
3038: (CUR == '_') || (CUR == '.') ||
3039: (CUR == ':') || (CUR == '-'))) NEXT;
3040: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 3041: return(ret);
3042: }
3043:
1.50 daniel 3044: /**
3045: * xmlParseVersionInfo:
3046: * @ctxt: an XML parser context
3047: *
3048: * parse the XML version.
1.29 daniel 3049: *
3050: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
3051: *
3052: * [25] Eq ::= S? '=' S?
1.50 daniel 3053: *
3054: * return values: the version string, e.g. "1.0"
1.29 daniel 3055: */
3056:
1.55 daniel 3057: CHAR *
3058: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 3059: CHAR *version = NULL;
3060: const CHAR *q;
3061:
1.40 daniel 3062: if ((CUR == 'v') && (NXT(1) == 'e') &&
3063: (NXT(2) == 'r') && (NXT(3) == 's') &&
3064: (NXT(4) == 'i') && (NXT(5) == 'o') &&
3065: (NXT(6) == 'n')) {
3066: SKIP(7);
1.42 daniel 3067: SKIP_BLANKS;
1.40 daniel 3068: if (CUR != '=') {
1.55 daniel 3069: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3070: ctxt->sax->error(ctxt, "xmlParseVersionInfo : expected '='\n");
1.31 daniel 3071: return(NULL);
3072: }
1.40 daniel 3073: NEXT;
1.42 daniel 3074: SKIP_BLANKS;
1.40 daniel 3075: if (CUR == '"') {
3076: NEXT;
3077: q = CUR_PTR;
1.29 daniel 3078: version = xmlParseVersionNum(ctxt);
1.55 daniel 3079: if (CUR != '"') {
3080: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3081: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
3082: } else
1.40 daniel 3083: NEXT;
3084: } else if (CUR == '\''){
3085: NEXT;
3086: q = CUR_PTR;
1.29 daniel 3087: version = xmlParseVersionNum(ctxt);
1.55 daniel 3088: if (CUR != '\'') {
3089: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3090: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
3091: } else
1.40 daniel 3092: NEXT;
1.31 daniel 3093: } else {
1.55 daniel 3094: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3095: ctxt->sax->error(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
1.29 daniel 3096: }
3097: }
3098: return(version);
3099: }
3100:
1.50 daniel 3101: /**
3102: * xmlParseEncName:
3103: * @ctxt: an XML parser context
3104: *
3105: * parse the XML encoding name
1.29 daniel 3106: *
3107: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 3108: *
3109: * return values: the encoding name value or NULL
1.29 daniel 3110: */
1.55 daniel 3111: CHAR *
3112: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 3113: const CHAR *q = CUR_PTR;
1.29 daniel 3114: CHAR *ret = NULL;
3115:
1.40 daniel 3116: if (((CUR >= 'a') && (CUR <= 'z')) ||
3117: ((CUR >= 'A') && (CUR <= 'Z'))) {
3118: NEXT;
3119: while (IS_CHAR(CUR) &&
3120: (((CUR >= 'a') && (CUR <= 'z')) ||
3121: ((CUR >= 'A') && (CUR <= 'Z')) ||
3122: ((CUR >= '0') && (CUR <= '9')) ||
3123: (CUR == '-'))) NEXT;
3124: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 3125: } else {
1.55 daniel 3126: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3127: ctxt->sax->error(ctxt, "Invalid XML encoding name\n");
1.29 daniel 3128: }
3129: return(ret);
3130: }
3131:
1.50 daniel 3132: /**
3133: * xmlParseEncodingDecl:
3134: * @ctxt: an XML parser context
3135: *
3136: * parse the XML encoding declaration
1.29 daniel 3137: *
3138: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 3139: *
3140: * TODO: this should setup the conversion filters.
3141: *
3142: * return values: the encoding value or NULL
1.29 daniel 3143: */
3144:
1.55 daniel 3145: CHAR *
3146: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 3147: CHAR *encoding = NULL;
3148: const CHAR *q;
3149:
1.42 daniel 3150: SKIP_BLANKS;
1.40 daniel 3151: if ((CUR == 'e') && (NXT(1) == 'n') &&
3152: (NXT(2) == 'c') && (NXT(3) == 'o') &&
3153: (NXT(4) == 'd') && (NXT(5) == 'i') &&
3154: (NXT(6) == 'n') && (NXT(7) == 'g')) {
3155: SKIP(8);
1.42 daniel 3156: SKIP_BLANKS;
1.40 daniel 3157: if (CUR != '=') {
1.55 daniel 3158: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3159: ctxt->sax->error(ctxt, "xmlParseEncodingDecl : expected '='\n");
1.31 daniel 3160: return(NULL);
3161: }
1.40 daniel 3162: NEXT;
1.42 daniel 3163: SKIP_BLANKS;
1.40 daniel 3164: if (CUR == '"') {
3165: NEXT;
3166: q = CUR_PTR;
1.29 daniel 3167: encoding = xmlParseEncName(ctxt);
1.55 daniel 3168: if (CUR != '"') {
3169: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3170: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
3171: } else
1.40 daniel 3172: NEXT;
3173: } else if (CUR == '\''){
3174: NEXT;
3175: q = CUR_PTR;
1.29 daniel 3176: encoding = xmlParseEncName(ctxt);
1.55 daniel 3177: if (CUR != '\'') {
3178: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3179: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
3180: } else
1.40 daniel 3181: NEXT;
3182: } else if (CUR == '"'){
1.55 daniel 3183: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3184: ctxt->sax->error(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
1.29 daniel 3185: }
3186: }
3187: return(encoding);
3188: }
3189:
1.50 daniel 3190: /**
3191: * xmlParseSDDecl:
3192: * @ctxt: an XML parser context
3193: *
3194: * parse the XML standalone declaration
1.29 daniel 3195: *
3196: * [32] SDDecl ::= S 'standalone' Eq
3197: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.50 daniel 3198: * return values: 1 if standalone, 0 otherwise
1.29 daniel 3199: */
3200:
1.55 daniel 3201: int
3202: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 3203: int standalone = -1;
3204:
1.42 daniel 3205: SKIP_BLANKS;
1.40 daniel 3206: if ((CUR == 's') && (NXT(1) == 't') &&
3207: (NXT(2) == 'a') && (NXT(3) == 'n') &&
3208: (NXT(4) == 'd') && (NXT(5) == 'a') &&
3209: (NXT(6) == 'l') && (NXT(7) == 'o') &&
3210: (NXT(8) == 'n') && (NXT(9) == 'e')) {
3211: SKIP(10);
3212: if (CUR != '=') {
1.55 daniel 3213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3214: ctxt->sax->error(ctxt, "XML standalone declaration : expected '='\n");
1.32 daniel 3215: return(standalone);
3216: }
1.40 daniel 3217: NEXT;
1.42 daniel 3218: SKIP_BLANKS;
1.40 daniel 3219: if (CUR == '\''){
3220: NEXT;
3221: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 3222: standalone = 0;
1.40 daniel 3223: SKIP(2);
3224: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3225: (NXT(2) == 's')) {
1.29 daniel 3226: standalone = 1;
1.40 daniel 3227: SKIP(3);
1.29 daniel 3228: } else {
1.55 daniel 3229: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3230: ctxt->sax->error(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 3231: }
1.55 daniel 3232: if (CUR != '\'') {
3233: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3234: ctxt->sax->error(ctxt, "String not closed\n");
3235: } else
1.40 daniel 3236: NEXT;
3237: } else if (CUR == '"'){
3238: NEXT;
3239: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 3240: standalone = 0;
1.40 daniel 3241: SKIP(2);
3242: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3243: (NXT(2) == 's')) {
1.29 daniel 3244: standalone = 1;
1.40 daniel 3245: SKIP(3);
1.29 daniel 3246: } else {
1.55 daniel 3247: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3248: ctxt->sax->error(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.29 daniel 3249: }
1.55 daniel 3250: if (CUR != '"') {
3251: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3252: ctxt->sax->error(ctxt, "String not closed\n");
3253: } else
1.40 daniel 3254: NEXT;
1.37 daniel 3255: } else {
1.55 daniel 3256: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3257: ctxt->sax->error(ctxt, "Standalone value not found\n");
1.37 daniel 3258: }
1.29 daniel 3259: }
3260: return(standalone);
3261: }
3262:
1.50 daniel 3263: /**
3264: * xmlParseXMLDecl:
3265: * @ctxt: an XML parser context
3266: *
3267: * parse an XML declaration header
1.29 daniel 3268: *
3269: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 3270: */
3271:
1.55 daniel 3272: void
3273: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 3274: CHAR *version;
3275:
3276: /*
1.19 daniel 3277: * We know that '<?xml' is here.
1.1 veillard 3278: */
1.40 daniel 3279: SKIP(5);
1.1 veillard 3280:
1.42 daniel 3281: SKIP_BLANKS;
1.1 veillard 3282:
3283: /*
1.29 daniel 3284: * We should have the VersionInfo here.
1.1 veillard 3285: */
1.29 daniel 3286: version = xmlParseVersionInfo(ctxt);
3287: if (version == NULL)
1.45 daniel 3288: version = xmlCharStrdup(XML_DEFAULT_VERSION);
3289: ctxt->doc = xmlNewDoc(version);
3290: free(version);
1.29 daniel 3291:
3292: /*
3293: * We may have the encoding declaration
3294: */
1.32 daniel 3295: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 3296:
3297: /*
1.29 daniel 3298: * We may have the standalone status.
1.1 veillard 3299: */
1.32 daniel 3300: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 3301:
1.42 daniel 3302: SKIP_BLANKS;
1.40 daniel 3303: if ((CUR == '?') && (NXT(1) == '>')) {
3304: SKIP(2);
3305: } else if (CUR == '>') {
1.31 daniel 3306: /* Deprecated old WD ... */
1.55 daniel 3307: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3308: ctxt->sax->error(ctxt, "XML declaration must end-up with '?>'\n");
1.40 daniel 3309: NEXT;
1.29 daniel 3310: } else {
1.55 daniel 3311: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3312: ctxt->sax->error(ctxt, "parsing XML declaration: '?>' expected\n");
1.40 daniel 3313: MOVETO_ENDTAG(CUR_PTR);
3314: NEXT;
1.29 daniel 3315: }
1.1 veillard 3316: }
3317:
1.50 daniel 3318: /**
3319: * xmlParseMisc:
3320: * @ctxt: an XML parser context
3321: *
3322: * parse an XML Misc* optionnal field.
1.21 daniel 3323: *
1.22 daniel 3324: * [27] Misc ::= Comment | PI | S
1.1 veillard 3325: */
3326:
1.55 daniel 3327: void
3328: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 3329: while (((CUR == '<') && (NXT(1) == '?')) ||
3330: ((CUR == '<') && (NXT(1) == '!') &&
3331: (NXT(2) == '-') && (NXT(3) == '-')) ||
3332: IS_BLANK(CUR)) {
3333: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 3334: xmlParsePI(ctxt);
1.40 daniel 3335: } else if (IS_BLANK(CUR)) {
3336: NEXT;
1.1 veillard 3337: } else
1.31 daniel 3338: xmlParseComment(ctxt, 0);
1.1 veillard 3339: }
3340: }
3341:
1.50 daniel 3342: /**
3343: * xmlParseDocument :
3344: * @ctxt: an XML parser context
3345: *
3346: * parse an XML document (and build a tree if using the standard SAX
3347: * interface).
1.21 daniel 3348: *
1.22 daniel 3349: * [1] document ::= prolog element Misc*
1.29 daniel 3350: *
3351: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 3352: *
3353: * return values: 0, -1 in case of error. the parser context is augmented
3354: * as a result of the parsing.
1.1 veillard 3355: */
3356:
1.55 daniel 3357: int
3358: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 3359: xmlDefaultSAXHandlerInit();
3360:
1.14 veillard 3361: /*
1.44 daniel 3362: * SAX: beginning of the document processing.
3363: */
3364: if (ctxt->sax)
3365: ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
3366: if (ctxt->sax)
3367: ctxt->sax->startDocument(ctxt);
3368:
3369: /*
1.14 veillard 3370: * We should check for encoding here and plug-in some
3371: * conversion code TODO !!!!
3372: */
1.1 veillard 3373:
3374: /*
3375: * Wipe out everything which is before the first '<'
3376: */
1.42 daniel 3377: SKIP_BLANKS;
1.1 veillard 3378:
3379: /*
3380: * Check for the XMLDecl in the Prolog.
3381: */
1.40 daniel 3382: if ((CUR == '<') && (NXT(1) == '?') &&
3383: (NXT(2) == 'x') && (NXT(3) == 'm') &&
3384: (NXT(4) == 'l')) {
1.19 daniel 3385: xmlParseXMLDecl(ctxt);
3386: /* SKIP_EOL(cur); */
1.42 daniel 3387: SKIP_BLANKS;
1.40 daniel 3388: } else if ((CUR == '<') && (NXT(1) == '?') &&
3389: (NXT(2) == 'X') && (NXT(3) == 'M') &&
3390: (NXT(4) == 'L')) {
1.19 daniel 3391: /*
3392: * The first drafts were using <?XML and the final W3C REC
3393: * now use <?xml ...
3394: */
1.16 daniel 3395: xmlParseXMLDecl(ctxt);
1.1 veillard 3396: /* SKIP_EOL(cur); */
1.42 daniel 3397: SKIP_BLANKS;
1.1 veillard 3398: } else {
1.45 daniel 3399: CHAR *version;
3400:
3401: version = xmlCharStrdup(XML_DEFAULT_VERSION);
3402: ctxt->doc = xmlNewDoc(version);
3403: free(version);
1.1 veillard 3404: }
3405:
3406: /*
3407: * The Misc part of the Prolog
3408: */
1.16 daniel 3409: xmlParseMisc(ctxt);
1.1 veillard 3410:
3411: /*
1.29 daniel 3412: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 3413: * (doctypedecl Misc*)?
3414: */
1.40 daniel 3415: if ((CUR == '<') && (NXT(1) == '!') &&
3416: (NXT(2) == 'D') && (NXT(3) == 'O') &&
3417: (NXT(4) == 'C') && (NXT(5) == 'T') &&
3418: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
3419: (NXT(8) == 'E')) {
1.22 daniel 3420: xmlParseDocTypeDecl(ctxt);
3421: xmlParseMisc(ctxt);
1.21 daniel 3422: }
3423:
3424: /*
3425: * Time to start parsing the tree itself
1.1 veillard 3426: */
1.45 daniel 3427: ctxt->doc->root = xmlParseElement(ctxt);
1.33 daniel 3428:
3429: /*
3430: * The Misc part at the end
3431: */
3432: xmlParseMisc(ctxt);
1.16 daniel 3433:
1.44 daniel 3434: /*
3435: * SAX: end of the document processing.
3436: */
3437: if (ctxt->sax)
3438: ctxt->sax->endDocument(ctxt);
1.16 daniel 3439: return(0);
3440: }
3441:
1.50 daniel 3442: /**
1.55 daniel 3443: * xmlSAXParseDoc :
3444: * @sax: the SAX handler block
1.50 daniel 3445: * @cur: a pointer to an array of CHAR
3446: *
3447: * parse an XML in-memory document and build a tree.
1.55 daniel 3448: * It use the given SAX function block to handle the parsing callback.
3449: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 3450: *
3451: * return values: the resulting document tree
1.16 daniel 3452: */
3453:
1.55 daniel 3454: xmlDocPtr xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur) {
1.16 daniel 3455: xmlDocPtr ret;
3456: xmlParserCtxtPtr ctxt;
1.40 daniel 3457: xmlParserInputPtr input;
1.16 daniel 3458:
3459: if (cur == NULL) return(NULL);
1.1 veillard 3460:
1.16 daniel 3461: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3462: if (ctxt == NULL) {
3463: perror("malloc");
3464: return(NULL);
3465: }
1.40 daniel 3466: xmlInitParserCtxt(ctxt);
1.56 daniel 3467: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 3468: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3469: if (input == NULL) {
3470: perror("malloc");
3471: free(ctxt);
3472: return(NULL);
3473: }
3474:
3475: input->filename = NULL;
3476: input->line = 1;
3477: input->col = 1;
3478: input->base = cur;
3479: input->cur = cur;
3480:
3481: inputPush(ctxt, input);
1.16 daniel 3482:
3483:
3484: xmlParseDocument(ctxt);
3485: ret = ctxt->doc;
1.50 daniel 3486: free(ctxt->nodeTab);
3487: free(ctxt->inputTab);
3488: if (input->filename != NULL)
1.51 daniel 3489: free((char *)input->filename);
1.50 daniel 3490: free(input);
1.16 daniel 3491: free(ctxt);
3492:
1.1 veillard 3493: return(ret);
3494: }
3495:
1.50 daniel 3496: /**
1.55 daniel 3497: * xmlParseDoc :
3498: * @cur: a pointer to an array of CHAR
3499: *
3500: * parse an XML in-memory document and build a tree.
3501: *
3502: * return values: the resulting document tree
3503: */
3504:
3505: xmlDocPtr xmlParseDoc(CHAR *cur) {
3506: return(xmlSAXParseDoc(NULL, cur));
3507: }
3508:
3509: /**
3510: * xmlSAXParseFile :
3511: * @sax: the SAX handler block
1.50 daniel 3512: * @filename: the filename
3513: *
3514: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
3515: * compressed document is provided by default if found at compile-time.
1.55 daniel 3516: * It use the given SAX function block to handle the parsing callback.
3517: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 3518: *
3519: * return values: the resulting document tree
1.9 httpng 3520: */
3521:
1.55 daniel 3522: xmlDocPtr xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename) {
1.9 httpng 3523: xmlDocPtr ret;
1.20 daniel 3524: #ifdef HAVE_ZLIB_H
3525: gzFile input;
3526: #else
1.9 httpng 3527: int input;
1.20 daniel 3528: #endif
1.9 httpng 3529: int res;
1.55 daniel 3530: int len;
1.9 httpng 3531: struct stat buf;
3532: char *buffer;
1.16 daniel 3533: xmlParserCtxtPtr ctxt;
1.40 daniel 3534: xmlParserInputPtr inputStream;
1.9 httpng 3535:
1.11 veillard 3536: res = stat(filename, &buf);
1.9 httpng 3537: if (res < 0) return(NULL);
3538:
1.20 daniel 3539: #ifdef HAVE_ZLIB_H
1.55 daniel 3540: len = (buf.st_size * 8) + 1000;
1.20 daniel 3541: retry_bigger:
1.55 daniel 3542: buffer = malloc(len);
1.20 daniel 3543: #else
1.55 daniel 3544: len = buf.st_size + 100;
3545: buffer = malloc(len);
1.20 daniel 3546: #endif
1.9 httpng 3547: if (buffer == NULL) {
3548: perror("malloc");
3549: return(NULL);
3550: }
3551:
1.55 daniel 3552: memset(buffer, 0, len);
1.20 daniel 3553: #ifdef HAVE_ZLIB_H
3554: input = gzopen (filename, "r");
3555: if (input == NULL) {
3556: fprintf (stderr, "Cannot read file %s :\n", filename);
3557: perror ("gzopen failed");
3558: return(NULL);
3559: }
3560: #else
1.9 httpng 3561: input = open (filename, O_RDONLY);
3562: if (input < 0) {
3563: fprintf (stderr, "Cannot read file %s :\n", filename);
3564: perror ("open failed");
3565: return(NULL);
3566: }
1.20 daniel 3567: #endif
3568: #ifdef HAVE_ZLIB_H
1.55 daniel 3569: res = gzread(input, buffer, len);
1.20 daniel 3570: #else
1.9 httpng 3571: res = read(input, buffer, buf.st_size);
1.20 daniel 3572: #endif
1.9 httpng 3573: if (res < 0) {
3574: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 3575: #ifdef HAVE_ZLIB_H
3576: perror ("gzread failed");
3577: #else
1.9 httpng 3578: perror ("read failed");
1.20 daniel 3579: #endif
1.9 httpng 3580: return(NULL);
3581: }
1.20 daniel 3582: #ifdef HAVE_ZLIB_H
3583: gzclose(input);
1.55 daniel 3584: if (res >= len) {
1.20 daniel 3585: free(buffer);
1.55 daniel 3586: len *= 2;
1.20 daniel 3587: goto retry_bigger;
3588: }
3589: buf.st_size = res;
3590: #else
1.9 httpng 3591: close(input);
1.20 daniel 3592: #endif
3593:
1.40 daniel 3594: buffer[buf.st_size] = '\0';
1.9 httpng 3595:
1.16 daniel 3596: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3597: if (ctxt == NULL) {
3598: perror("malloc");
3599: return(NULL);
3600: }
1.40 daniel 3601: xmlInitParserCtxt(ctxt);
1.56 daniel 3602: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 3603: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3604: if (inputStream == NULL) {
3605: perror("malloc");
3606: free(ctxt);
3607: return(NULL);
3608: }
3609:
3610: inputStream->filename = strdup(filename);
3611: inputStream->line = 1;
3612: inputStream->col = 1;
1.45 daniel 3613:
3614: /*
3615: * TODO : plug some encoding conversion routines here. !!!
3616: */
1.40 daniel 3617: inputStream->base = buffer;
3618: inputStream->cur = buffer;
1.16 daniel 3619:
1.40 daniel 3620: inputPush(ctxt, inputStream);
1.16 daniel 3621:
3622: xmlParseDocument(ctxt);
1.40 daniel 3623:
1.16 daniel 3624: ret = ctxt->doc;
1.9 httpng 3625: free(buffer);
1.50 daniel 3626: free(ctxt->nodeTab);
3627: free(ctxt->inputTab);
3628: if (inputStream->filename != NULL)
1.51 daniel 3629: free((char *)inputStream->filename);
1.50 daniel 3630: free(inputStream);
1.20 daniel 3631: free(ctxt);
3632:
3633: return(ret);
3634: }
3635:
1.55 daniel 3636: /**
3637: * xmlParseFile :
3638: * @filename: the filename
3639: *
3640: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
3641: * compressed document is provided by default if found at compile-time.
3642: *
3643: * return values: the resulting document tree
3644: */
3645:
3646: xmlDocPtr xmlParseFile(const char *filename) {
3647: return(xmlSAXParseFile(NULL, filename));
3648: }
1.32 daniel 3649:
1.50 daniel 3650: /**
1.55 daniel 3651: * xmlSAXParseMemory :
3652: * @sax: the SAX handler block
1.50 daniel 3653: * @cur: an pointer to a char array
3654: * @size: the siwe of the array
3655: *
1.55 daniel 3656: * parse an XML in-memory block and use the given SAX function block
3657: * to handle the parsing callback. If sax is NULL, fallback to the default
3658: * DOM tree building routines.
1.50 daniel 3659: *
3660: * TODO : plug some encoding conversion routines here. !!!
3661: *
3662: * return values: the resulting document tree
1.20 daniel 3663: */
1.50 daniel 3664:
1.55 daniel 3665: xmlDocPtr xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size) {
1.20 daniel 3666: xmlDocPtr ret;
3667: xmlParserCtxtPtr ctxt;
1.40 daniel 3668: xmlParserInputPtr input;
3669:
3670: buffer[size - 1] = '\0';
3671:
1.20 daniel 3672: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3673: if (ctxt == NULL) {
3674: perror("malloc");
3675: return(NULL);
3676: }
1.40 daniel 3677: xmlInitParserCtxt(ctxt);
1.56 daniel 3678: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 3679: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3680: if (input == NULL) {
3681: perror("malloc");
1.50 daniel 3682: free(ctxt->nodeTab);
3683: free(ctxt->inputTab);
1.40 daniel 3684: free(ctxt);
3685: return(NULL);
3686: }
1.20 daniel 3687:
1.40 daniel 3688: input->filename = NULL;
3689: input->line = 1;
3690: input->col = 1;
1.45 daniel 3691:
3692: /*
3693: * TODO : plug some encoding conversion routines here. !!!
3694: */
1.40 daniel 3695: input->base = buffer;
3696: input->cur = buffer;
1.20 daniel 3697:
1.40 daniel 3698: inputPush(ctxt, input);
1.20 daniel 3699:
3700: xmlParseDocument(ctxt);
1.40 daniel 3701:
1.20 daniel 3702: ret = ctxt->doc;
1.50 daniel 3703: free(ctxt->nodeTab);
3704: free(ctxt->inputTab);
3705: if (input->filename != NULL)
1.51 daniel 3706: free((char *)input->filename);
1.50 daniel 3707: free(input);
1.16 daniel 3708: free(ctxt);
3709:
1.9 httpng 3710: return(ret);
1.17 daniel 3711: }
3712:
1.55 daniel 3713: /**
3714: * xmlParseMemory :
3715: * @cur: an pointer to a char array
3716: * @size: the size of the array
3717: *
3718: * parse an XML in-memory block and build a tree.
3719: *
3720: * return values: the resulting document tree
3721: */
3722:
3723: xmlDocPtr xmlParseMemory(char *buffer, int size) {
3724: return(xmlSAXParseMemory(NULL, buffer, size));
3725: }
1.17 daniel 3726:
1.50 daniel 3727: /**
3728: * xmlInitParserCtxt:
3729: * @ctxt: an XML parser context
3730: *
3731: * Initialize a parser context
3732: */
3733:
1.55 daniel 3734: void
3735: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 3736: {
1.40 daniel 3737: /* Allocate the Input stack */
3738: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
3739: ctxt->inputNr = 0;
3740: ctxt->inputMax = 5;
3741: ctxt->input = NULL;
3742:
1.43 daniel 3743: /* Allocate the Node stack */
3744: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
3745: ctxt->nodeNr = 0;
3746: ctxt->nodeMax = 10;
3747: ctxt->node = NULL;
3748:
1.45 daniel 3749: ctxt->sax = &xmlDefaultSAXHandler;
1.32 daniel 3750: ctxt->doc = NULL;
3751: ctxt->record_info = 0;
3752: xmlInitNodeInfoSeq(&ctxt->node_seq);
1.17 daniel 3753: }
3754:
1.50 daniel 3755: /**
3756: * xmlClearParserCtxt:
3757: * @ctxt: an XML parser context
3758: *
3759: * Clear (release owned resources) and reinitialize a parser context
3760: */
1.17 daniel 3761:
1.55 daniel 3762: void
3763: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 3764: {
1.32 daniel 3765: xmlClearNodeInfoSeq(&ctxt->node_seq);
3766: xmlInitParserCtxt(ctxt);
1.17 daniel 3767: }
3768:
3769:
1.50 daniel 3770: /**
3771: * xmlSetupParserForBuffer:
3772: * @ctxt: an XML parser context
3773: * @buffer: a CHAR * buffer
3774: * @filename: a file name
3775: *
1.19 daniel 3776: * Setup the parser context to parse a new buffer; Clears any prior
3777: * contents from the parser context. The buffer parameter must not be
3778: * NULL, but the filename parameter can be
3779: */
1.50 daniel 3780:
1.55 daniel 3781: void
3782: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 3783: const char* filename)
3784: {
1.40 daniel 3785: xmlParserInputPtr input;
3786:
3787: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3788: if (input == NULL) {
3789: perror("malloc");
3790: free(ctxt);
3791: exit(1);
3792: }
3793:
1.17 daniel 3794: xmlClearParserCtxt(ctxt);
1.40 daniel 3795: if (input->filename != NULL)
3796: input->filename = strdup(filename);
3797: else
3798: input->filename = NULL;
3799: input->line = 1;
3800: input->col = 1;
3801: input->base = buffer;
3802: input->cur = buffer;
3803:
3804: inputPush(ctxt, input);
1.17 daniel 3805: }
3806:
1.32 daniel 3807:
1.50 daniel 3808: /**
3809: * xmlParserFindNodeInfo:
3810: * @ctxt: an XML parser context
3811: * @node: an XML node within the tree
3812: *
3813: * Find the parser node info struct for a given node
3814: *
3815: * return values: an xmlParserNodeInfo block pointer or NULL
1.32 daniel 3816: */
3817: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
3818: const xmlNode* node)
3819: {
3820: unsigned long pos;
3821:
3822: /* Find position where node should be at */
3823: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
3824: if ( ctx->node_seq.buffer[pos].node == node )
3825: return &ctx->node_seq.buffer[pos];
3826: else
3827: return NULL;
3828: }
3829:
3830:
1.50 daniel 3831: /**
3832: * xmlInitNodeInfoSeq :
3833: * @seq: a node info sequence pointer
3834: *
3835: * -- Initialize (set to initial state) node info sequence
1.32 daniel 3836: */
1.55 daniel 3837: void
3838: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 3839: {
3840: seq->length = 0;
3841: seq->maximum = 0;
3842: seq->buffer = NULL;
3843: }
3844:
1.50 daniel 3845: /**
3846: * xmlClearNodeInfoSeq :
3847: * @seq: a node info sequence pointer
3848: *
3849: * -- Clear (release memory and reinitialize) node
1.32 daniel 3850: * info sequence
3851: */
1.55 daniel 3852: void
3853: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 3854: {
3855: if ( seq->buffer != NULL )
3856: free(seq->buffer);
3857: xmlInitNodeInfoSeq(seq);
3858: }
3859:
3860:
1.50 daniel 3861: /**
3862: * xmlParserFindNodeInfoIndex:
3863: * @seq: a node info sequence pointer
3864: * @node: an XML node pointer
3865: *
3866: *
1.32 daniel 3867: * xmlParserFindNodeInfoIndex : Find the index that the info record for
3868: * the given node is or should be at in a sorted sequence
1.50 daniel 3869: * return values: a long indicating the position of the record
1.32 daniel 3870: */
3871: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
3872: const xmlNode* node)
3873: {
3874: unsigned long upper, lower, middle;
3875: int found = 0;
3876:
3877: /* Do a binary search for the key */
3878: lower = 1;
3879: upper = seq->length;
3880: middle = 0;
3881: while ( lower <= upper && !found) {
3882: middle = lower + (upper - lower) / 2;
3883: if ( node == seq->buffer[middle - 1].node )
3884: found = 1;
3885: else if ( node < seq->buffer[middle - 1].node )
3886: upper = middle - 1;
3887: else
3888: lower = middle + 1;
3889: }
3890:
3891: /* Return position */
3892: if ( middle == 0 || seq->buffer[middle - 1].node < node )
3893: return middle;
3894: else
3895: return middle - 1;
3896: }
3897:
3898:
1.50 daniel 3899: /**
3900: * xmlParserAddNodeInfo:
3901: * @ctxt: an XML parser context
3902: * @seq: a node info sequence pointer
3903: *
3904: * Insert node info record into the sorted sequence
1.32 daniel 3905: */
1.55 daniel 3906: void
3907: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.32 daniel 3908: const xmlParserNodeInfo* info)
3909: {
3910: unsigned long pos;
3911: static unsigned int block_size = 5;
3912:
3913: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 3914: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
3915: if ( pos < ctxt->node_seq.length
3916: && ctxt->node_seq.buffer[pos].node == info->node ) {
3917: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 3918: }
3919:
3920: /* Otherwise, we need to add new node to buffer */
3921: else {
3922: /* Expand buffer by 5 if needed */
1.55 daniel 3923: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 3924: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 3925: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
3926: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 3927:
1.55 daniel 3928: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 3929: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
3930: else
1.55 daniel 3931: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 3932:
3933: if ( tmp_buffer == NULL ) {
1.55 daniel 3934: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3935: ctxt->sax->error(ctxt, "Out of memory");
1.32 daniel 3936: return;
3937: }
1.55 daniel 3938: ctxt->node_seq.buffer = tmp_buffer;
3939: ctxt->node_seq.maximum += block_size;
1.32 daniel 3940: }
3941:
3942: /* If position is not at end, move elements out of the way */
1.55 daniel 3943: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 3944: unsigned long i;
3945:
1.55 daniel 3946: for ( i = ctxt->node_seq.length; i > pos; i-- )
3947: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 3948: }
3949:
3950: /* Copy element and increase length */
1.55 daniel 3951: ctxt->node_seq.buffer[pos] = *info;
3952: ctxt->node_seq.length++;
1.32 daniel 3953: }
3954: }
Webmaster