Annotation of XML/parser.c, revision 1.59
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.59 ! daniel 6: * $Id: parser.c,v 1.13 1998/11/16 01:04:26 veillard Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
1.45 daniel 34: /************************************************************************
35: * *
36: * Parser stacks related functions and macros *
37: * *
38: ************************************************************************/
1.1 veillard 39: /*
1.40 daniel 40: * Generic function for accessing stacks in the Parser Context
1.1 veillard 41: */
42:
1.31 daniel 43: #define PUSH_AND_POP(type, name) \
1.40 daniel 44: int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 45: if (ctxt->name##Nr >= ctxt->name##Max) { \
46: ctxt->name##Max *= 2; \
1.40 daniel 47: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
48: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
49: if (ctxt->name##Tab == NULL) { \
1.31 daniel 50: fprintf(stderr, "realloc failed !\n"); \
51: exit(1); \
52: } \
53: } \
1.40 daniel 54: ctxt->name##Tab[ctxt->name##Nr] = value; \
55: ctxt->name = value; \
56: return(ctxt->name##Nr++); \
1.31 daniel 57: } \
1.40 daniel 58: type name##Pop(xmlParserCtxtPtr ctxt) { \
59: if (ctxt->name##Nr <= 0) return(0); \
60: ctxt->name##Nr--; \
1.50 daniel 61: if (ctxt->name##Nr > 0) \
62: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
63: else \
64: ctxt->name = NULL; \
1.40 daniel 65: return(ctxt->name); \
1.31 daniel 66: } \
67:
1.40 daniel 68: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 69: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 70:
1.55 daniel 71: /*
72: * Macros for accessing the content. Those should be used only by the parser,
73: * and not exported.
74: *
75: * Dirty macros, i.e. one need to make assumption on the context to use them
76: *
77: * CUR_PTR return the current pointer to the CHAR to be parsed.
78: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
79: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
80: * in UNICODE mode. This should be used internally by the parser
81: * only to compare to ASCII values otherwise it would break when
82: * running with UTF-8 encoding.
83: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
84: * to compare on ASCII based substring.
85: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
86: * strings within the parser.
87: *
88: * Clean macros, not dependent of an ASCII context.
89: *
90: * CURRENT Returns the current char value, with the full decoding of
91: * UTF-8 if we are using this mode. It returns an int.
92: * NEXT Skip to the next character, this does the proper decoding
93: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
94: * It returns the pointer to the current CHAR.
95: */
1.45 daniel 96:
97: #define CUR (*ctxt->input->cur)
1.55 daniel 98: #define SKIP(val) ctxt->input->cur += (val)
99: #define NXT(val) ctxt->input->cur[(val)]
100: #define CUR_PTR ctxt->input->cur
101:
102: #define SKIP_BLANKS \
103: while (IS_BLANK(*(ctxt->input->cur))) NEXT
104:
105: #ifndef USE_UTF_8
106: #define CURRENT (*ctxt->input->cur)
1.45 daniel 107: #define NEXT ((*ctxt->input->cur) ? \
108: (((*(ctxt->input->cur) == '\n') ? \
109: (ctxt->input->line++, ctxt->input->col = 1) : \
110: (ctxt->input->col++)), ctxt->input->cur++) : \
111: (xmlPopInput(ctxt), ctxt->input->cur))
1.55 daniel 112: #else
113: #endif
1.42 daniel 114:
1.40 daniel 115:
1.50 daniel 116: /**
117: * xmlPopInput:
118: * @ctxt: an XML parser context
119: *
1.40 daniel 120: * xmlPopInput: the current input pointed by ctxt->input came to an end
121: * pop it and return the next char.
1.45 daniel 122: *
123: * TODO A deallocation of the popped Input structure is needed
1.50 daniel 124: * return values: the current CHAR in the parser context
1.40 daniel 125: */
1.55 daniel 126: CHAR
127: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 128: if (ctxt->inputNr == 1) return(0); /* End of main Input */
129: inputPop(ctxt);
130: return(CUR);
131: }
132:
1.50 daniel 133: /**
134: * xmlPushInput:
135: * @ctxt: an XML parser context
136: * @input: an XML parser input fragment (entity, XML fragment ...).
137: *
1.40 daniel 138: * xmlPushInput: switch to a new input stream which is stacked on top
139: * of the previous one(s).
140: */
1.55 daniel 141: void
142: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 143: if (input == NULL) return;
144: inputPush(ctxt, input);
145: }
146:
1.50 daniel 147: /**
148: * xmlNewEntityInputStream:
149: * @ctxt: an XML parser context
150: * @entity: an Entity pointer
151: *
1.45 daniel 152: * Create a new input stream based on a memory buffer.
1.50 daniel 153: * return vakues: the new input stream
1.45 daniel 154: */
1.50 daniel 155: xmlParserInputPtr
156: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 157: xmlParserInputPtr input;
158:
159: if (entity == NULL) {
1.55 daniel 160: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
161: ctxt->sax->error(ctxt,
1.45 daniel 162: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 163: return(NULL);
1.45 daniel 164: }
165: if (entity->content == NULL) {
1.55 daniel 166: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
167: ctxt->sax->error(ctxt,
1.45 daniel 168: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 169: return(NULL);
1.45 daniel 170: }
171: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
172: if (input == NULL) {
1.55 daniel 173: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
174: ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
1.50 daniel 175: return(NULL);
1.45 daniel 176: }
177: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
178: input->base = entity->content;
179: input->cur = entity->content;
180: input->line = 1;
181: input->col = 1;
1.50 daniel 182: return(input);
1.45 daniel 183: }
184:
1.59 ! daniel 185: /**
! 186: * xmlNewStringInputStream:
! 187: * @ctxt: an XML parser context
! 188: * @entity: an Entity pointer
! 189: *
! 190: * Create a new input stream based on a memory buffer.
! 191: * return vakues: the new input stream
! 192: */
! 193: xmlParserInputPtr
! 194: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, CHAR *string) {
! 195: xmlParserInputPtr input;
! 196:
! 197: if (string == NULL) {
! 198: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 199: ctxt->sax->error(ctxt,
! 200: "internal: xmlNewStringInputStream string = NULL\n");
! 201: return(NULL);
! 202: }
! 203: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
! 204: if (input == NULL) {
! 205: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 206: ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
! 207: return(NULL);
! 208: }
! 209: input->filename = NULL;
! 210: input->base = string;
! 211: input->cur = string;
! 212: input->line = 1;
! 213: input->col = 1;
! 214: return(input);
! 215: }
! 216:
1.45 daniel 217: /*
1.40 daniel 218: * A few macros needed to help building the parser.
219: */
220:
1.1 veillard 221: #ifdef UNICODE
1.30 daniel 222: /************************************************************************
223: * *
224: * UNICODE version of the macros. *
225: * *
226: ************************************************************************/
1.1 veillard 227: /*
1.22 daniel 228: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
229: * | [#x10000-#x10FFFF]
230: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 231: */
232: #define IS_CHAR(c) \
1.59 ! daniel 233: ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
! 234: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
! 235: (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) && \
! 236: ((c) <= 0x10FFFF))
1.1 veillard 237:
1.22 daniel 238: /*
239: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
240: */
1.42 daniel 241: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
242: ((c) == 0x0D))
1.1 veillard 243:
1.22 daniel 244: /*
1.30 daniel 245: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 246: *
1.30 daniel 247: * VI is your friend !
248: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
249: * and
250: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 251: */
1.1 veillard 252: #define IS_BASECHAR(c) \
1.30 daniel 253: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
254: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
255: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
256: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
257: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
258: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
259: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
260: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
261: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
262: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
263: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
264: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
265: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
266: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
267: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
268: ((c) == 0x0386) || \
269: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
270: ((c) == 0x038C) || \
271: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
272: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
273: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
274: ((c) == 0x03DA) || \
275: ((c) == 0x03DC) || \
276: ((c) == 0x03DE) || \
277: ((c) == 0x03E0) || \
278: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
279: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
280: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
281: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
282: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
283: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
284: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
285: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
286: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
287: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
288: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
289: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
290: ((c) == 0x0559) || \
291: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
292: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
293: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
294: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
295: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
296: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
297: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
298: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
299: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
300: ((c) == 0x06D5) || \
301: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
302: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
303: ((c) == 0x093D) || \
304: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
305: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
306: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
307: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
308: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
309: ((c) == 0x09B2) || \
310: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
311: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
312: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
313: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
314: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
315: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
316: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
317: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
318: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
319: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
320: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
321: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
322: ((c) == 0x0A5E) || \
323: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
324: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
325: ((c) == 0x0A8D) || \
326: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
327: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
328: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
329: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
330: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
331: ((c) == 0x0ABD) || \
332: ((c) == 0x0AE0) || \
333: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
334: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
335: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
336: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
337: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
338: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
339: ((c) == 0x0B3D) || \
340: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
341: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
342: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
343: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
344: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
345: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
346: ((c) == 0x0B9C) || \
347: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
348: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
349: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
350: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
351: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
352: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
353: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
354: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
355: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
356: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
357: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
358: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
359: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
360: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
361: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
362: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
363: ((c) == 0x0CDE) || \
364: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
365: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
366: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
367: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
368: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
369: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
370: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
371: ((c) == 0x0E30) || \
372: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
373: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
374: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
375: ((c) == 0x0E84) || \
376: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
377: ((c) == 0x0E8A) || \
378: ((c) == 0x0E8D) || \
379: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
380: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
381: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
382: ((c) == 0x0EA5) || \
383: ((c) == 0x0EA7) || \
384: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
385: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
386: ((c) == 0x0EB0) || \
387: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
388: ((c) == 0x0EBD) || \
389: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
390: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
391: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
392: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
393: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
394: ((c) == 0x1100) || \
395: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
396: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
397: ((c) == 0x1109) || \
398: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
399: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
400: ((c) == 0x113C) || \
401: ((c) == 0x113E) || \
402: ((c) == 0x1140) || \
403: ((c) == 0x114C) || \
404: ((c) == 0x114E) || \
405: ((c) == 0x1150) || \
406: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
407: ((c) == 0x1159) || \
408: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
409: ((c) == 0x1163) || \
410: ((c) == 0x1165) || \
411: ((c) == 0x1167) || \
412: ((c) == 0x1169) || \
413: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
414: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
415: ((c) == 0x1175) || \
416: ((c) == 0x119E) || \
417: ((c) == 0x11A8) || \
418: ((c) == 0x11AB) || \
419: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
420: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
421: ((c) == 0x11BA) || \
422: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
423: ((c) == 0x11EB) || \
424: ((c) == 0x11F0) || \
425: ((c) == 0x11F9) || \
426: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
427: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
428: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
429: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
430: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
431: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
432: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
433: ((c) == 0x1F59) || \
434: ((c) == 0x1F5B) || \
435: ((c) == 0x1F5D) || \
436: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
437: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
438: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
439: ((c) == 0x1FBE) || \
440: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
441: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
442: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
443: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
444: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
445: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
446: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
447: ((c) == 0x2126) || \
448: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
449: ((c) == 0x212E) || \
450: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
451: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
452: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
453: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
454: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 455:
1.22 daniel 456: /*
457: * [88] Digit ::= ... long list see REC ...
458: */
1.30 daniel 459: #define IS_DIGIT(c) \
460: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
461: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
462: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
463: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
464: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
465: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
466: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
467: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
468: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
469: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
470: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
471: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
472: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
473: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
474: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 475:
1.22 daniel 476: /*
477: * [87] CombiningChar ::= ... long list see REC ...
478: */
1.30 daniel 479: #define IS_COMBINING(c) \
480: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
481: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
482: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
483: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
484: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
485: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
486: ((c) == 0x05BF) || \
487: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
488: ((c) == 0x05C4) || \
489: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
490: ((c) == 0x0670) || \
491: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
492: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
493: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
494: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
495: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
496: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
497: ((c) == 0x093C) || \
498: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
499: ((c) == 0x094D) || \
500: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
501: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
502: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
503: ((c) == 0x09BC) || \
504: ((c) == 0x09BE) || \
505: ((c) == 0x09BF) || \
506: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
507: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
508: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
509: ((c) == 0x09D7) || \
510: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
511: ((c) == 0x0A02) || \
512: ((c) == 0x0A3C) || \
513: ((c) == 0x0A3E) || \
514: ((c) == 0x0A3F) || \
515: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
516: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
517: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
518: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
519: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
520: ((c) == 0x0ABC) || \
521: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
522: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
523: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
524: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
525: ((c) == 0x0B3C) || \
526: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
527: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
528: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
529: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
530: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
531: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
532: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
533: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
534: ((c) == 0x0BD7) || \
535: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
536: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
537: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
538: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
539: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
540: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
541: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
542: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
543: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
544: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
545: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
546: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
547: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
548: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
549: ((c) == 0x0D57) || \
550: ((c) == 0x0E31) || \
551: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
552: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
553: ((c) == 0x0EB1) || \
554: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
555: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
556: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
557: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
558: ((c) == 0x0F35) || \
559: ((c) == 0x0F37) || \
560: ((c) == 0x0F39) || \
561: ((c) == 0x0F3E) || \
562: ((c) == 0x0F3F) || \
563: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
564: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
565: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
566: ((c) == 0x0F97) || \
567: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
568: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
569: ((c) == 0x0FB9) || \
570: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
571: ((c) == 0x20E1) || \
572: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
573: ((c) == 0x3099) || \
574: ((c) == 0x309A))
1.3 veillard 575:
1.22 daniel 576: /*
577: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
578: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
579: * [#x309D-#x309E] | [#x30FC-#x30FE]
580: */
1.3 veillard 581: #define IS_EXTENDER(c) \
582: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
583: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
584: ((c) == 0xec6) || ((c) == 0x3005) \
585: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
586: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 587: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 588:
1.22 daniel 589: /*
590: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
591: */
1.1 veillard 592: #define IS_IDEOGRAPHIC(c) \
593: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
594: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
595: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
596: ((c) == 0x3007))
597:
1.22 daniel 598: /*
599: * [84] Letter ::= BaseChar | Ideographic
600: */
1.1 veillard 601: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
602:
603: #else
1.55 daniel 604: #ifndef USE_UTF_8
1.30 daniel 605: /************************************************************************
606: * *
1.55 daniel 607: * 8bits / ISO-Latin version of the macros. *
1.30 daniel 608: * *
609: ************************************************************************/
1.1 veillard 610: /*
1.22 daniel 611: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
612: * | [#x10000-#x10FFFF]
613: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 614: */
615: #define IS_CHAR(c) \
1.59 ! daniel 616: ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
! 617: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
! 618: (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) <= 0x10FFFF))
1.1 veillard 619:
1.22 daniel 620: /*
621: * [85] BaseChar ::= ... long list see REC ...
622: */
1.1 veillard 623: #define IS_BASECHAR(c) \
624: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
625: (((c) >= 0x61) && ((c) <= 0x7a)) || \
626: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
627: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
628: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
629: (((c) >= 0xf8) && ((c) <= 0xff)) || \
630: ((c) == 0xba))
631:
1.22 daniel 632: /*
633: * [88] Digit ::= ... long list see REC ...
634: */
1.1 veillard 635: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
636:
1.22 daniel 637: /*
638: * [84] Letter ::= BaseChar | Ideographic
639: */
1.1 veillard 640: #define IS_LETTER(c) IS_BASECHAR(c)
641:
1.22 daniel 642:
643: /*
644: * [87] CombiningChar ::= ... long list see REC ...
645: */
1.1 veillard 646: #define IS_COMBINING(c) 0
647:
1.22 daniel 648: /*
649: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
650: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
651: * [#x309D-#x309E] | [#x30FC-#x30FE]
652: */
1.3 veillard 653: #define IS_EXTENDER(c) ((c) == 0xb7)
654:
1.55 daniel 655: #else /* USE_UTF_8 */
656: /************************************************************************
657: * *
658: * 8bits / UTF-8 version of the macros. *
659: * *
660: ************************************************************************/
661:
662: TODO !!!
663: #endif /* USE_UTF_8 */
1.21 daniel 664: #endif /* !UNICODE */
1.1 veillard 665:
1.22 daniel 666: /*
667: * Blank chars.
668: *
669: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
670: */
671: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
672: ((c) == 0x0D))
673:
674: /*
675: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
676: */
1.21 daniel 677: #define IS_PUBIDCHAR(c) \
678: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
679: (((c) >= 'a') && ((c) <= 'z')) || \
680: (((c) >= 'A') && ((c) <= 'Z')) || \
681: (((c) >= '0') && ((c) <= '9')) || \
682: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
683: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
684: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
685: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
686: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 687:
688: #define SKIP_EOL(p) \
689: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
690: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
691:
692: #define MOVETO_ENDTAG(p) \
1.39 daniel 693: while (IS_CHAR(*p) && (*(p) != '>')) (p)++
1.1 veillard 694:
695: #define MOVETO_STARTTAG(p) \
1.39 daniel 696: while (IS_CHAR(*p) && (*(p) != '<')) (p)++
1.1 veillard 697:
1.28 daniel 698: /************************************************************************
699: * *
700: * Commodity functions to handle CHARs *
701: * *
702: ************************************************************************/
703:
1.50 daniel 704: /**
705: * xmlStrndup:
706: * @cur: the input CHAR *
707: * @len: the len of @cur
708: *
709: * a strndup for array of CHAR's
710: * return values: a new CHAR * or NULL
1.1 veillard 711: */
712:
1.55 daniel 713: CHAR *
714: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 715: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
716:
717: if (ret == NULL) {
718: fprintf(stderr, "malloc of %d byte failed\n",
719: (len + 1) * sizeof(CHAR));
720: return(NULL);
721: }
722: memcpy(ret, cur, len * sizeof(CHAR));
723: ret[len] = 0;
724: return(ret);
725: }
726:
1.50 daniel 727: /**
728: * xmlStrdup:
729: * @cur: the input CHAR *
730: *
731: * a strdup for array of CHAR's
732: * return values: a new CHAR * or NULL
1.1 veillard 733: */
734:
1.55 daniel 735: CHAR *
736: xmlStrdup(const CHAR *cur) {
1.6 httpng 737: const CHAR *p = cur;
1.1 veillard 738:
739: while (IS_CHAR(*p)) p++;
740: return(xmlStrndup(cur, p - cur));
741: }
742:
1.50 daniel 743: /**
744: * xmlCharStrndup:
745: * @cur: the input char *
746: * @len: the len of @cur
747: *
748: * a strndup for char's to CHAR's
749: * return values: a new CHAR * or NULL
1.45 daniel 750: */
751:
1.55 daniel 752: CHAR *
753: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 754: int i;
755: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
756:
757: if (ret == NULL) {
758: fprintf(stderr, "malloc of %d byte failed\n",
759: (len + 1) * sizeof(CHAR));
760: return(NULL);
761: }
762: for (i = 0;i < len;i++)
763: ret[i] = (CHAR) cur[i];
764: ret[len] = 0;
765: return(ret);
766: }
767:
1.50 daniel 768: /**
769: * xmlCharStrdup:
770: * @cur: the input char *
771: * @len: the len of @cur
772: *
773: * a strdup for char's to CHAR's
774: * return values: a new CHAR * or NULL
1.45 daniel 775: */
776:
1.55 daniel 777: CHAR *
778: xmlCharStrdup(const char *cur) {
1.45 daniel 779: const char *p = cur;
780:
781: while (*p != '\0') p++;
782: return(xmlCharStrndup(cur, p - cur));
783: }
784:
1.50 daniel 785: /**
786: * xmlStrcmp:
787: * @str1: the first CHAR *
788: * @str2: the second CHAR *
789: *
790: * a strcmp for CHAR's
791: * return values: the integer result of the comparison
1.14 veillard 792: */
793:
1.55 daniel 794: int
795: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 796: register int tmp;
797:
798: do {
799: tmp = *str1++ - *str2++;
800: if (tmp != 0) return(tmp);
801: } while ((*str1 != 0) && (*str2 != 0));
802: return (*str1 - *str2);
803: }
804:
1.50 daniel 805: /**
806: * xmlStrncmp:
807: * @str1: the first CHAR *
808: * @str2: the second CHAR *
809: * @len: the max comparison length
810: *
811: * a strncmp for CHAR's
812: * return values: the integer result of the comparison
1.14 veillard 813: */
814:
1.55 daniel 815: int
816: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 817: register int tmp;
818:
819: if (len <= 0) return(0);
820: do {
821: tmp = *str1++ - *str2++;
822: if (tmp != 0) return(tmp);
823: len--;
824: if (len <= 0) return(0);
825: } while ((*str1 != 0) && (*str2 != 0));
826: return (*str1 - *str2);
827: }
828:
1.50 daniel 829: /**
830: * xmlStrchr:
831: * @str: the CHAR * array
832: * @val: the CHAR to search
833: *
834: * a strchr for CHAR's
835: * return values: the CHAR * for the first occurence or NULL.
1.14 veillard 836: */
837:
1.55 daniel 838: CHAR *
839: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 840: while (*str != 0) {
841: if (*str == val) return((CHAR *) str);
842: str++;
843: }
844: return(NULL);
845: }
1.28 daniel 846:
1.50 daniel 847: /**
848: * xmlStrlen:
849: * @str: the CHAR * array
850: *
851: * lenght of a CHAR's string
852: * return values: the number of CHAR contained in the ARRAY.
1.45 daniel 853: */
854:
1.55 daniel 855: int
856: xmlStrlen(const CHAR *str) {
1.45 daniel 857: int len = 0;
858:
859: if (str == NULL) return(0);
860: while (*str != 0) {
861: str++;
862: len++;
863: }
864: return(len);
865: }
866:
1.50 daniel 867: /**
868: * xmlStrncat:
869: * @first: the original CHAR * array
870: * @add: the CHAR * array added
871: * @len: the length of @add
872: *
873: * a strncat for array of CHAR's
874: * return values: a new CHAR * containing the concatenated string.
1.45 daniel 875: */
876:
1.55 daniel 877: CHAR *
878: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 879: int size;
880: CHAR *ret;
881:
882: if ((add == NULL) || (len == 0))
883: return(cur);
884: if (cur == NULL)
885: return(xmlStrndup(add, len));
886:
887: size = xmlStrlen(cur);
888: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
889: if (ret == NULL) {
890: fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
891: (size + len + 1) * sizeof(CHAR));
892: return(cur);
893: }
894: memcpy(&ret[size], add, len * sizeof(CHAR));
895: ret[size + len] = 0;
896: return(ret);
897: }
898:
1.50 daniel 899: /**
900: * xmlStrcat:
901: * @first: the original CHAR * array
902: * @add: the CHAR * array added
903: *
904: * a strcat for array of CHAR's
905: * return values: a new CHAR * containing the concatenated string.
1.45 daniel 906: */
907:
1.55 daniel 908: CHAR *
909: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 910: const CHAR *p = add;
911:
912: if (add == NULL) return(cur);
913: if (cur == NULL)
914: return(xmlStrdup(add));
915:
916: while (IS_CHAR(*p)) p++;
917: return(xmlStrncat(cur, add, p - add));
918: }
919:
920: /************************************************************************
921: * *
922: * Commodity functions, cleanup needed ? *
923: * *
924: ************************************************************************/
925:
1.50 daniel 926: /**
927: * areBlanks:
928: * @ctxt: an XML parser context
929: * @str: a CHAR *
930: * @len: the size of @str
931: *
1.45 daniel 932: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 933: *
934: * TODO: to be corrected accodingly to DTD information if available
935: * return values: 1 if ignorable 0 otherwise.
1.45 daniel 936: */
937:
938: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
939: int i;
940: xmlNodePtr lastChild;
941:
942: for (i = 0;i < len;i++)
943: if (!(IS_BLANK(str[i]))) return(0);
944:
945: if (CUR != '<') return(0);
946: lastChild = xmlGetLastChild(ctxt->node);
947: if (lastChild == NULL) {
948: if (ctxt->node->content != NULL) return(0);
949: } else if (xmlNodeIsText(lastChild))
950: return(0);
951: return(1);
952: }
953:
1.50 daniel 954: /**
955: * xmlHandleEntity:
956: * @ctxt: an XML parser context
957: * @entity: an XML entity pointer.
958: *
959: * Default handling of defined entities, when should we define a new input
1.45 daniel 960: * stream ? When do we just handle that as a set of chars ?
1.50 daniel 961: * TODO: we should call the SAX handler here and have it resolve the issue
1.45 daniel 962: */
963:
1.55 daniel 964: void
965: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 966: int len;
1.50 daniel 967: xmlParserInputPtr input;
1.45 daniel 968:
969: if (entity->content == NULL) {
1.55 daniel 970: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
971: ctxt->sax->error(ctxt, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 972: entity->name);
1.59 ! daniel 973: ctxt->wellFormed = 0;
1.45 daniel 974: return;
975: }
976: len = xmlStrlen(entity->content);
977: if (len <= 2) goto handle_as_char;
978:
979: /*
980: * Redefine its content as an input stream.
981: */
1.50 daniel 982: input = xmlNewEntityInputStream(ctxt, entity);
983: xmlPushInput(ctxt, input);
1.45 daniel 984: return;
985:
986: handle_as_char:
987: /*
988: * Just handle the content as a set of chars.
989: */
990: if (ctxt->sax != NULL)
991: ctxt->sax->characters(ctxt, entity->content, 0, len);
992:
993: }
994:
995: /*
996: * Forward definition for recusive behaviour.
997: */
998: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.50 daniel 999: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt);
1000: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1001:
1.28 daniel 1002: /************************************************************************
1003: * *
1004: * Extra stuff for namespace support *
1005: * Relates to http://www.w3.org/TR/WD-xml-names *
1006: * *
1007: ************************************************************************/
1008:
1.50 daniel 1009: /**
1010: * xmlNamespaceParseNCName:
1011: * @ctxt: an XML parser context
1012: *
1013: * parse an XML namespace name.
1.28 daniel 1014: *
1015: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1016: *
1017: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1018: * CombiningChar | Extender
1.50 daniel 1019: * return values: the namespace name or NULL
1.28 daniel 1020: */
1021:
1.55 daniel 1022: CHAR *
1023: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.28 daniel 1024: const CHAR *q;
1025: CHAR *ret = NULL;
1026:
1.40 daniel 1027: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1028: q = NEXT;
1.28 daniel 1029:
1.40 daniel 1030: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1031: (CUR == '.') || (CUR == '-') ||
1032: (CUR == '_') ||
1033: (IS_COMBINING(CUR)) ||
1034: (IS_EXTENDER(CUR)))
1035: NEXT;
1.28 daniel 1036:
1.40 daniel 1037: ret = xmlStrndup(q, CUR_PTR - q);
1.28 daniel 1038:
1039: return(ret);
1040: }
1041:
1.50 daniel 1042: /**
1043: * xmlNamespaceParseQName:
1044: * @ctxt: an XML parser context
1045: * @prefix: a CHAR **
1046: *
1047: * parse an XML qualified name
1.28 daniel 1048: *
1049: * [NS 5] QName ::= (Prefix ':')? LocalPart
1050: *
1051: * [NS 6] Prefix ::= NCName
1052: *
1053: * [NS 7] LocalPart ::= NCName
1.50 daniel 1054: * return values: the function returns the local part, and prefix is updated
1055: * to get the Prefix if any.
1.28 daniel 1056: */
1057:
1.55 daniel 1058: CHAR *
1059: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1060: CHAR *ret = NULL;
1061:
1062: *prefix = NULL;
1063: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1064: if (CUR == ':') {
1.28 daniel 1065: *prefix = ret;
1.40 daniel 1066: NEXT;
1.28 daniel 1067: ret = xmlNamespaceParseNCName(ctxt);
1068: }
1069:
1070: return(ret);
1071: }
1072:
1.50 daniel 1073: /**
1074: * xmlNamespaceParseNSDef:
1075: * @ctxt: an XML parser context
1076: *
1077: * parse a namespace prefix declaration
1.28 daniel 1078: *
1079: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1080: *
1081: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.50 daniel 1082: * return values: the namespace name
1.28 daniel 1083: */
1084:
1.55 daniel 1085: CHAR *
1086: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1087: CHAR *name = NULL;
1088:
1.40 daniel 1089: if ((CUR == 'x') && (NXT(1) == 'm') &&
1090: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1091: (NXT(4) == 's')) {
1092: SKIP(5);
1093: if (CUR == ':') {
1094: NEXT;
1.28 daniel 1095: name = xmlNamespaceParseNCName(ctxt);
1096: }
1097: }
1.39 daniel 1098: return(name);
1.28 daniel 1099: }
1100:
1.50 daniel 1101: /**
1102: * xmlParseQuotedString:
1103: * @ctxt: an XML parser context
1104: *
1.45 daniel 1105: * [OLD] Parse and return a string between quotes or doublequotes
1.50 daniel 1106: * return values: the string parser or NULL.
1.45 daniel 1107: */
1.55 daniel 1108: CHAR *
1109: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1110: CHAR *ret = NULL;
1111: const CHAR *q;
1112:
1113: if (CUR == '"') {
1114: NEXT;
1115: q = CUR_PTR;
1116: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1117: if (CUR != '"') {
1118: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 1119: ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
1.59 ! daniel 1120: ctxt->wellFormed = 0;
1.55 daniel 1121: } else {
1.45 daniel 1122: ret = xmlStrndup(q, CUR_PTR - q);
1123: NEXT;
1124: }
1125: } else if (CUR == '\''){
1126: NEXT;
1127: q = CUR_PTR;
1128: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1129: if (CUR != '\'') {
1130: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 1131: ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
1.59 ! daniel 1132: ctxt->wellFormed = 0;
1.55 daniel 1133: } else {
1.45 daniel 1134: ret = xmlStrndup(q, CUR_PTR - q);
1135: NEXT;
1136: }
1137: }
1138: return(ret);
1139: }
1140:
1.50 daniel 1141: /**
1142: * xmlParseNamespace:
1143: * @ctxt: an XML parser context
1144: *
1.45 daniel 1145: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1146: *
1147: * This is what the older xml-name Working Draft specified, a bunch of
1148: * other stuff may still rely on it, so support is still here as
1149: * if ot was declared on the root of the Tree:-(
1150: */
1151:
1.55 daniel 1152: void
1153: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 1154: CHAR *href = NULL;
1155: CHAR *prefix = NULL;
1156: int garbage = 0;
1157:
1158: /*
1159: * We just skipped "namespace" or "xml:namespace"
1160: */
1161: SKIP_BLANKS;
1162:
1163: while (IS_CHAR(CUR) && (CUR != '>')) {
1164: /*
1165: * We can have "ns" or "prefix" attributes
1166: * Old encoding as 'href' or 'AS' attributes is still supported
1167: */
1168: if ((CUR == 'n') && (NXT(1) == 's')) {
1169: garbage = 0;
1170: SKIP(2);
1171: SKIP_BLANKS;
1172:
1173: if (CUR != '=') continue;
1174: NEXT;
1175: SKIP_BLANKS;
1176:
1177: href = xmlParseQuotedString(ctxt);
1178: SKIP_BLANKS;
1179: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1180: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1181: garbage = 0;
1182: SKIP(4);
1183: SKIP_BLANKS;
1184:
1185: if (CUR != '=') continue;
1186: NEXT;
1187: SKIP_BLANKS;
1188:
1189: href = xmlParseQuotedString(ctxt);
1190: SKIP_BLANKS;
1191: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1192: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1193: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1194: garbage = 0;
1195: SKIP(6);
1196: SKIP_BLANKS;
1197:
1198: if (CUR != '=') continue;
1199: NEXT;
1200: SKIP_BLANKS;
1201:
1202: prefix = xmlParseQuotedString(ctxt);
1203: SKIP_BLANKS;
1204: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1205: garbage = 0;
1206: SKIP(2);
1207: SKIP_BLANKS;
1208:
1209: if (CUR != '=') continue;
1210: NEXT;
1211: SKIP_BLANKS;
1212:
1213: prefix = xmlParseQuotedString(ctxt);
1214: SKIP_BLANKS;
1215: } else if ((CUR == '?') && (NXT(1) == '>')) {
1216: garbage = 0;
1217: CUR_PTR ++;
1218: } else {
1219: /*
1220: * Found garbage when parsing the namespace
1221: */
1222: if (!garbage)
1.55 daniel 1223: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1224: ctxt->sax->error(ctxt, "xmlParseNamespace found garbage\n");
1.59 ! daniel 1225: ctxt->wellFormed = 0;
1.45 daniel 1226: NEXT;
1227: }
1228: }
1229:
1230: MOVETO_ENDTAG(CUR_PTR);
1231: NEXT;
1232:
1233: /*
1234: * Register the DTD.
1235: */
1236: if (href != NULL)
1237: xmlNewGlobalNs(ctxt->doc, href, prefix);
1238:
1239: if (prefix != NULL) free(prefix);
1240: if (href != NULL) free(href);
1241: }
1242:
1.28 daniel 1243: /************************************************************************
1244: * *
1245: * The parser itself *
1246: * Relates to http://www.w3.org/TR/REC-xml *
1247: * *
1248: ************************************************************************/
1.14 veillard 1249:
1.50 daniel 1250: /**
1251: * xmlParseName:
1252: * @ctxt: an XML parser context
1253: *
1254: * parse an XML name.
1.22 daniel 1255: *
1256: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1257: * CombiningChar | Extender
1258: *
1259: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1260: *
1261: * [6] Names ::= Name (S Name)*
1.50 daniel 1262: * return values: the Name parsed or NULL
1.1 veillard 1263: */
1264:
1.55 daniel 1265: CHAR *
1266: xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 1267: const CHAR *q;
1268: CHAR *ret = NULL;
1.1 veillard 1269:
1.40 daniel 1270: if (!IS_LETTER(CUR) && (CUR != '_') &&
1271: (CUR != ':')) return(NULL);
1272: q = NEXT;
1273:
1274: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1275: (CUR == '.') || (CUR == '-') ||
1276: (CUR == '_') || (CUR == ':') ||
1277: (IS_COMBINING(CUR)) ||
1278: (IS_EXTENDER(CUR)))
1279: NEXT;
1.22 daniel 1280:
1.40 daniel 1281: ret = xmlStrndup(q, CUR_PTR - q);
1.22 daniel 1282:
1283: return(ret);
1284: }
1285:
1.50 daniel 1286: /**
1287: * xmlParseNmtoken:
1288: * @ctxt: an XML parser context
1289: *
1290: * parse an XML Nmtoken.
1.22 daniel 1291: *
1292: * [7] Nmtoken ::= (NameChar)+
1293: *
1294: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.50 daniel 1295: * return values: the Nmtoken parsed or NULL
1.22 daniel 1296: */
1297:
1.55 daniel 1298: CHAR *
1299: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.22 daniel 1300: const CHAR *q;
1301: CHAR *ret = NULL;
1302:
1.40 daniel 1303: q = NEXT;
1.22 daniel 1304:
1.40 daniel 1305: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1306: (CUR == '.') || (CUR == '-') ||
1307: (CUR == '_') || (CUR == ':') ||
1308: (IS_COMBINING(CUR)) ||
1309: (IS_EXTENDER(CUR)))
1310: NEXT;
1.3 veillard 1311:
1.40 daniel 1312: ret = xmlStrndup(q, CUR_PTR - q);
1.1 veillard 1313:
1.3 veillard 1314: return(ret);
1.1 veillard 1315: }
1316:
1.50 daniel 1317: /**
1318: * xmlParseEntityValue:
1319: * @ctxt: an XML parser context
1320: *
1321: * parse a value for ENTITY decl.
1.24 daniel 1322: *
1323: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1324: * "'" ([^%&'] | PEReference | Reference)* "'"
1.50 daniel 1325: * return values: the EntityValue parsed or NULL
1.24 daniel 1326: */
1327:
1.55 daniel 1328: CHAR *
1329: xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1330: CHAR *ret = NULL, *cur;
1.24 daniel 1331: const CHAR *q;
1332:
1.40 daniel 1333: if (CUR == '"') {
1334: NEXT;
1.24 daniel 1335:
1.40 daniel 1336: q = CUR_PTR;
1337: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1338: if (CUR == '%') {
1.46 daniel 1339: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1340: cur = xmlParsePEReference(ctxt);
1.46 daniel 1341: ret = xmlStrcat(ret, cur);
1342: q = CUR_PTR;
1.40 daniel 1343: } else if (CUR == '&') {
1.46 daniel 1344: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1345: cur = xmlParseReference(ctxt);
1346: if (cur != NULL) {
1347: CHAR buf[2];
1348: buf[0] = '&';
1349: buf[1] = 0;
1350: ret = xmlStrncat(ret, buf, 1);
1351: ret = xmlStrcat(ret, cur);
1352: buf[0] = ';';
1353: buf[1] = 0;
1354: ret = xmlStrncat(ret, buf, 1);
1355: }
1.46 daniel 1356: q = CUR_PTR;
1.24 daniel 1357: } else
1.40 daniel 1358: NEXT;
1.24 daniel 1359: }
1.40 daniel 1360: if (!IS_CHAR(CUR)) {
1.55 daniel 1361: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1362: ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
1.59 ! daniel 1363: ctxt->wellFormed = 0;
1.24 daniel 1364: } else {
1.46 daniel 1365: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1366: NEXT;
1.24 daniel 1367: }
1.40 daniel 1368: } else if (CUR == '\'') {
1369: NEXT;
1370: q = CUR_PTR;
1371: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1372: if (CUR == '%') {
1.46 daniel 1373: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1374: cur = xmlParsePEReference(ctxt);
1.46 daniel 1375: ret = xmlStrcat(ret, cur);
1376: q = CUR_PTR;
1.40 daniel 1377: } else if (CUR == '&') {
1.46 daniel 1378: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1379: cur = xmlParseReference(ctxt);
1380: if (cur != NULL) {
1381: CHAR buf[2];
1382: buf[0] = '&';
1383: buf[1] = 0;
1384: ret = xmlStrncat(ret, buf, 1);
1385: ret = xmlStrcat(ret, cur);
1386: buf[0] = ';';
1387: buf[1] = 0;
1388: ret = xmlStrncat(ret, buf, 1);
1389: }
1.46 daniel 1390: q = CUR_PTR;
1.24 daniel 1391: } else
1.40 daniel 1392: NEXT;
1.24 daniel 1393: }
1.40 daniel 1394: if (!IS_CHAR(CUR)) {
1.55 daniel 1395: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1396: ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
1.59 ! daniel 1397: ctxt->wellFormed = 0;
1.24 daniel 1398: } else {
1.46 daniel 1399: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1400: NEXT;
1.24 daniel 1401: }
1402: } else {
1.55 daniel 1403: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1404: ctxt->sax->error(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.59 ! daniel 1405: ctxt->wellFormed = 0;
1.24 daniel 1406: }
1407:
1408: return(ret);
1409: }
1410:
1.50 daniel 1411: /**
1412: * xmlParseAttValue:
1413: * @ctxt: an XML parser context
1414: *
1415: * parse a value for an attribute
1.29 daniel 1416: *
1417: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1418: * "'" ([^<&'] | Reference)* "'"
1.50 daniel 1419: * return values: the AttValue parsed or NULL.
1.29 daniel 1420: */
1421:
1.55 daniel 1422: CHAR *
1423: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1424: CHAR *ret = NULL, *cur;
1.29 daniel 1425: const CHAR *q;
1426:
1.40 daniel 1427: if (CUR == '"') {
1428: NEXT;
1.29 daniel 1429:
1.40 daniel 1430: q = CUR_PTR;
1431: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1.59 ! daniel 1432: if (CUR == '<') {
! 1433: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 1434: ctxt->sax->error(ctxt,
! 1435: "Unescaped '<' not allowed in attributes values\n");
! 1436: ctxt->wellFormed = 0;
! 1437: }
1.40 daniel 1438: if (CUR == '&') {
1.46 daniel 1439: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1440: cur = xmlParseReference(ctxt);
1441: if (cur != NULL) {
1442: /*
1443: * Special case for '&', we don't want to
1444: * resolve it here since it will break later
1445: * when searching entities in the string.
1446: */
1447: if ((cur[0] == '&') && (cur[1] == 0)) {
1448: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1449: ret = xmlStrncat(ret, buf, 5);
1450: } else
1451: ret = xmlStrcat(ret, cur);
1452: free(cur);
1453: }
1.46 daniel 1454: q = CUR_PTR;
1.29 daniel 1455: } else
1.40 daniel 1456: NEXT;
1.50 daniel 1457: /*
1458: * Pop out finished entity references.
1459: */
1460: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1461: if (CUR_PTR != q)
1462: ret = xmlStrncat(ret, q, CUR_PTR - q);
1463: xmlPopInput(ctxt);
1464: q = CUR_PTR;
1465: }
1.29 daniel 1466: }
1.40 daniel 1467: if (!IS_CHAR(CUR)) {
1.55 daniel 1468: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1469: ctxt->sax->error(ctxt, "Unfinished AttValue\n");
1.59 ! daniel 1470: ctxt->wellFormed = 0;
1.29 daniel 1471: } else {
1.46 daniel 1472: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1473: NEXT;
1.29 daniel 1474: }
1.40 daniel 1475: } else if (CUR == '\'') {
1476: NEXT;
1477: q = CUR_PTR;
1478: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1.59 ! daniel 1479: if (CUR == '<') {
! 1480: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 1481: ctxt->sax->error(ctxt,
! 1482: "Unescaped '<' not allowed in attributes values\n");
! 1483: ctxt->wellFormed = 0;
! 1484: }
1.40 daniel 1485: if (CUR == '&') {
1.46 daniel 1486: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1487: cur = xmlParseReference(ctxt);
1488: if (cur != NULL) {
1489: /*
1490: * Special case for '&', we don't want to
1491: * resolve it here since it will break later
1492: * when searching entities in the string.
1493: */
1494: if ((cur[0] == '&') && (cur[1] == 0)) {
1495: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1496: ret = xmlStrncat(ret, buf, 5);
1497: } else
1498: ret = xmlStrcat(ret, cur);
1499: free(cur);
1500: }
1.46 daniel 1501: q = CUR_PTR;
1.29 daniel 1502: } else
1.40 daniel 1503: NEXT;
1.50 daniel 1504: /*
1505: * Pop out finished entity references.
1506: */
1507: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1508: if (CUR_PTR != q)
1509: ret = xmlStrncat(ret, q, CUR_PTR - q);
1510: xmlPopInput(ctxt);
1511: q = CUR_PTR;
1512: }
1.29 daniel 1513: }
1.40 daniel 1514: if (!IS_CHAR(CUR)) {
1.55 daniel 1515: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1516: ctxt->sax->error(ctxt, "Unfinished AttValue\n");
1.59 ! daniel 1517: ctxt->wellFormed = 0;
1.29 daniel 1518: } else {
1.46 daniel 1519: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1520: NEXT;
1.29 daniel 1521: }
1522: } else {
1.55 daniel 1523: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1524: ctxt->sax->error(ctxt, "AttValue: \" or ' expected\n");
1.59 ! daniel 1525: ctxt->wellFormed = 0;
1.29 daniel 1526: }
1527:
1528: return(ret);
1529: }
1530:
1.50 daniel 1531: /**
1532: * xmlParseSystemLiteral:
1533: * @ctxt: an XML parser context
1534: *
1535: * parse an XML Literal
1.21 daniel 1536: *
1.22 daniel 1537: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.50 daniel 1538: * return values: the SystemLiteral parsed or NULL
1.21 daniel 1539: */
1540:
1.55 daniel 1541: CHAR *
1542: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1543: const CHAR *q;
1544: CHAR *ret = NULL;
1545:
1.40 daniel 1546: if (CUR == '"') {
1547: NEXT;
1548: q = CUR_PTR;
1549: while ((IS_CHAR(CUR)) && (CUR != '"'))
1550: NEXT;
1551: if (!IS_CHAR(CUR)) {
1.55 daniel 1552: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1553: ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
1.59 ! daniel 1554: ctxt->wellFormed = 0;
1.21 daniel 1555: } else {
1.40 daniel 1556: ret = xmlStrndup(q, CUR_PTR - q);
1557: NEXT;
1.21 daniel 1558: }
1.40 daniel 1559: } else if (CUR == '\'') {
1560: NEXT;
1561: q = CUR_PTR;
1562: while ((IS_CHAR(CUR)) && (CUR != '\''))
1563: NEXT;
1564: if (!IS_CHAR(CUR)) {
1.55 daniel 1565: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1566: ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
1.59 ! daniel 1567: ctxt->wellFormed = 0;
1.21 daniel 1568: } else {
1.40 daniel 1569: ret = xmlStrndup(q, CUR_PTR - q);
1570: NEXT;
1.21 daniel 1571: }
1572: } else {
1.55 daniel 1573: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1574: ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
1.59 ! daniel 1575: ctxt->wellFormed = 0;
1.21 daniel 1576: }
1577:
1578: return(ret);
1579: }
1580:
1.50 daniel 1581: /**
1582: * xmlParsePubidLiteral:
1583: * @ctxt: an XML parser context
1.21 daniel 1584: *
1.50 daniel 1585: * parse an XML public literal
1586: * return values: the PubidLiteral parsed or NULL.
1.21 daniel 1587: */
1588:
1.55 daniel 1589: CHAR *
1590: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1591: const CHAR *q;
1592: CHAR *ret = NULL;
1593: /*
1594: * Name ::= (Letter | '_') (NameChar)*
1595: */
1.40 daniel 1596: if (CUR == '"') {
1597: NEXT;
1598: q = CUR_PTR;
1599: while (IS_PUBIDCHAR(CUR)) NEXT;
1600: if (CUR != '"') {
1.55 daniel 1601: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1602: ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
1.59 ! daniel 1603: ctxt->wellFormed = 0;
1.21 daniel 1604: } else {
1.40 daniel 1605: ret = xmlStrndup(q, CUR_PTR - q);
1606: NEXT;
1.21 daniel 1607: }
1.40 daniel 1608: } else if (CUR == '\'') {
1609: NEXT;
1610: q = CUR_PTR;
1611: while ((IS_LETTER(CUR)) && (CUR != '\''))
1612: NEXT;
1613: if (!IS_LETTER(CUR)) {
1.55 daniel 1614: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1615: ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
1.59 ! daniel 1616: ctxt->wellFormed = 0;
1.21 daniel 1617: } else {
1.40 daniel 1618: ret = xmlStrndup(q, CUR_PTR - q);
1619: NEXT;
1.21 daniel 1620: }
1621: } else {
1.55 daniel 1622: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1623: ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
1.59 ! daniel 1624: ctxt->wellFormed = 0;
1.21 daniel 1625: }
1626:
1627: return(ret);
1628: }
1629:
1.50 daniel 1630: /**
1631: * xmlParseCharData:
1632: * @ctxt: an XML parser context
1633: * @cdata: int indicating whether we are within a CDATA section
1634: *
1635: * parse a CharData section.
1636: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 1637: *
1638: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1.50 daniel 1639: * return values:
1.27 daniel 1640: */
1641:
1.55 daniel 1642: void
1643: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.27 daniel 1644: const CHAR *q;
1645:
1.40 daniel 1646: q = CUR_PTR;
1647: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1648: (CUR != '&')) {
1.59 ! daniel 1649: if ((CUR == ']') && (NXT(1) == ']') &&
! 1650: (NXT(2) == '>')) {
! 1651: if (cdata) break;
! 1652: else {
! 1653: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 1654: ctxt->sax->error(ctxt,
! 1655: "Sequence ']]>' not allowed in content\n");
! 1656: ctxt->wellFormed = 0;
! 1657: }
! 1658: }
1.40 daniel 1659: NEXT;
1.27 daniel 1660: }
1.45 daniel 1661: if (q == CUR_PTR) return;
1662:
1663: /*
1664: * Ok the segment [q CUR_PTR] is to be consumed as chars.
1665: */
1666: if (ctxt->sax != NULL) {
1667: if (areBlanks(ctxt, q, CUR_PTR - q))
1668: ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
1669: else
1670: ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
1671: }
1.27 daniel 1672: }
1673:
1.50 daniel 1674: /**
1675: * xmlParseExternalID:
1676: * @ctxt: an XML parser context
1677: * @publicID: a CHAR** receiving PubidLiteral
1678: *
1679: * Parse an External ID
1.22 daniel 1680: *
1681: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1682: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.50 daniel 1683: * return values: the function returns SystemLiteral and in the second
1684: * case publicID receives PubidLiteral
1.22 daniel 1685: */
1686:
1.55 daniel 1687: CHAR *
1688: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
1.39 daniel 1689: CHAR *URI = NULL;
1.22 daniel 1690:
1.40 daniel 1691: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1692: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1693: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1694: SKIP(6);
1.59 ! daniel 1695: if (!IS_BLANK(CUR)) {
! 1696: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 1697: ctxt->sax->error(ctxt,
! 1698: "Space required after 'SYSTEM'\n");
! 1699: ctxt->wellFormed = 0;
! 1700: }
1.42 daniel 1701: SKIP_BLANKS;
1.39 daniel 1702: URI = xmlParseSystemLiteral(ctxt);
1.59 ! daniel 1703: if (URI == NULL) {
1.55 daniel 1704: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1705: ctxt->sax->error(ctxt,
1.39 daniel 1706: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 ! daniel 1707: ctxt->wellFormed = 0;
! 1708: }
1.40 daniel 1709: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1710: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1711: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1712: SKIP(6);
1.59 ! daniel 1713: if (!IS_BLANK(CUR)) {
! 1714: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 1715: ctxt->sax->error(ctxt,
! 1716: "Space required after 'PUBLIC'\n");
! 1717: ctxt->wellFormed = 0;
! 1718: }
1.42 daniel 1719: SKIP_BLANKS;
1.39 daniel 1720: *publicID = xmlParsePubidLiteral(ctxt);
1.59 ! daniel 1721: if (*publicID == NULL) {
1.55 daniel 1722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1723: ctxt->sax->error(ctxt,
1.39 daniel 1724: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 ! daniel 1725: ctxt->wellFormed = 0;
! 1726: }
! 1727: if (!IS_BLANK(CUR)) {
! 1728: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 1729: ctxt->sax->error(ctxt,
! 1730: "Space required after the Public Identifier\n");
! 1731: ctxt->wellFormed = 0;
! 1732: }
1.42 daniel 1733: SKIP_BLANKS;
1.39 daniel 1734: URI = xmlParseSystemLiteral(ctxt);
1.59 ! daniel 1735: if (URI == NULL) {
1.55 daniel 1736: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1737: ctxt->sax->error(ctxt,
1.39 daniel 1738: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 ! daniel 1739: ctxt->wellFormed = 0;
! 1740: }
1.22 daniel 1741: }
1.39 daniel 1742: return(URI);
1.22 daniel 1743: }
1744:
1.50 daniel 1745: /**
1746: * xmlParseComment:
1747: * @create: should we create a node
1748: *
1.3 veillard 1749: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1750: * This may or may not create a node (depending on the context)
1.38 daniel 1751: * The spec says that "For compatibility, the string "--" (double-hyphen)
1752: * must not occur within comments. "
1.22 daniel 1753: *
1754: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.50 daniel 1755: *
1756: * TODO: this should call a SAX function which will handle (or not) the
1757: * creation of the comment !
1758: * return values:
1.3 veillard 1759: */
1.31 daniel 1760: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1761: xmlNodePtr ret = NULL;
1.17 daniel 1762: const CHAR *q, *start;
1763: const CHAR *r;
1.39 daniel 1764: CHAR *val;
1.3 veillard 1765:
1766: /*
1.22 daniel 1767: * Check that there is a comment right here.
1.3 veillard 1768: */
1.40 daniel 1769: if ((CUR != '<') || (NXT(1) != '!') ||
1770: (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1.3 veillard 1771:
1.40 daniel 1772: SKIP(4);
1773: start = q = CUR_PTR;
1774: NEXT;
1775: r = CUR_PTR;
1776: NEXT;
1777: while (IS_CHAR(CUR) &&
1778: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 1779: (*r != '-') || (*q != '-'))) {
1.59 ! daniel 1780: if ((*r == '-') && (*q == '-')) {
1.55 daniel 1781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1782: ctxt->sax->error(ctxt,
1.38 daniel 1783: "Comment must not contain '--' (double-hyphen)`\n");
1.59 ! daniel 1784: ctxt->wellFormed = 0;
! 1785: }
1.40 daniel 1786: NEXT;r++;q++;
1.3 veillard 1787: }
1.40 daniel 1788: if (!IS_CHAR(CUR)) {
1.55 daniel 1789: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1790: ctxt->sax->error(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.59 ! daniel 1791: ctxt->wellFormed = 0;
1.3 veillard 1792: } else {
1.40 daniel 1793: NEXT;
1.31 daniel 1794: if (create) {
1.39 daniel 1795: val = xmlStrndup(start, q - start);
1.50 daniel 1796: ret = xmlNewDocComment(ctxt->doc, val);
1.39 daniel 1797: free(val);
1.31 daniel 1798: }
1.3 veillard 1799: }
1.39 daniel 1800: return(ret);
1.3 veillard 1801: }
1802:
1.50 daniel 1803: /**
1804: * xmlParsePITarget:
1805: * @ctxt: an XML parser context
1806: *
1807: * parse the name of a PI
1.22 daniel 1808: *
1809: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.50 daniel 1810: * return values: the PITarget name or NULL
1.22 daniel 1811: */
1812:
1.55 daniel 1813: CHAR *
1814: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 1815: CHAR *name;
1816:
1817: name = xmlParseName(ctxt);
1818: if ((name != NULL) && (name[3] == 0) &&
1819: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1820: ((name[1] == 'm') || (name[1] == 'M')) &&
1821: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 1822: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1823: ctxt->sax->error(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1824: return(NULL);
1825: }
1826: return(name);
1827: }
1828:
1.50 daniel 1829: /**
1830: * xmlParsePI:
1831: * @ctxt: an XML parser context
1832: *
1833: * parse an XML Processing Instruction.
1.22 daniel 1834: *
1835: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.50 daniel 1836: * return values: the PI name or NULL
1.3 veillard 1837: */
1838:
1.55 daniel 1839: void
1840: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1841: CHAR *target;
1842:
1.40 daniel 1843: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 1844: /*
1845: * this is a Processing Instruction.
1846: */
1.40 daniel 1847: SKIP(2);
1.3 veillard 1848:
1849: /*
1.22 daniel 1850: * Parse the target name and check for special support like
1851: * namespace.
1852: *
1853: * TODO : PI handling should be dynamically redefinable using an
1854: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1855: */
1.22 daniel 1856: target = xmlParsePITarget(ctxt);
1857: if (target != NULL) {
1858: /*
1.44 daniel 1859: * Support for the old Processing Instruction related to namespace.
1.22 daniel 1860: */
1861: if ((target[0] == 'n') && (target[1] == 'a') &&
1862: (target[2] == 'm') && (target[3] == 'e') &&
1863: (target[4] == 's') && (target[5] == 'p') &&
1864: (target[6] == 'a') && (target[7] == 'c') &&
1865: (target[8] == 'e')) {
1866: xmlParseNamespace(ctxt);
1867: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1868: (target[2] == 'l') && (target[3] == ':') &&
1869: (target[4] == 'n') && (target[5] == 'a') &&
1870: (target[6] == 'm') && (target[7] == 'e') &&
1871: (target[8] == 's') && (target[9] == 'p') &&
1872: (target[10] == 'a') && (target[11] == 'c') &&
1873: (target[12] == 'e')) {
1874: xmlParseNamespace(ctxt);
1875: } else {
1.44 daniel 1876: const CHAR *q = CUR_PTR;
1877:
1.40 daniel 1878: while (IS_CHAR(CUR) &&
1879: ((CUR != '?') || (NXT(1) != '>')))
1880: NEXT;
1881: if (!IS_CHAR(CUR)) {
1.55 daniel 1882: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 ! daniel 1883: ctxt->sax->error(ctxt,
! 1884: "xmlParsePI: PI %s never end ...\n", target);
! 1885: ctxt->wellFormed = 0;
1.44 daniel 1886: } else {
1887: CHAR *data;
1888:
1889: data = xmlStrndup(CUR_PTR, CUR_PTR - q);
1.40 daniel 1890: SKIP(2);
1.44 daniel 1891:
1892: /*
1893: * SAX: PI detected.
1894: */
1895: if (ctxt->sax)
1896: ctxt->sax->processingInstruction(ctxt, target, data);
1897: /*
1898: * Unknown PI, ignore it !
1899: */
1900: else
1901: xmlParserWarning(ctxt,
1902: "xmlParsePI : skipping unknown PI %s\n",
1903: target);
1904: free(data);
1905: }
1.22 daniel 1906: }
1.39 daniel 1907: free(target);
1.3 veillard 1908: } else {
1.55 daniel 1909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1910: ctxt->sax->error(ctxt, "xmlParsePI : no target name\n");
1.59 ! daniel 1911: ctxt->wellFormed = 0;
! 1912:
1.22 daniel 1913: /********* Should we try to complete parsing the PI ???
1.40 daniel 1914: while (IS_CHAR(CUR) &&
1915: (CUR != '?') && (CUR != '>'))
1916: NEXT;
1917: if (!IS_CHAR(CUR)) {
1.22 daniel 1918: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1919: target);
1920: }
1921: ********************************************************/
1922: }
1923: }
1924: }
1925:
1.50 daniel 1926: /**
1927: * xmlParseNotationDecl:
1928: * @ctxt: an XML parser context
1929: *
1930: * parse a notation declaration
1.22 daniel 1931: *
1932: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1933: *
1934: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1935: *
1936: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1937: * 'PUBLIC' S PubidLiteral S SystemLiteral
1938: *
1939: * Hence there is actually 3 choices:
1940: * 'PUBLIC' S PubidLiteral
1941: * 'PUBLIC' S PubidLiteral S SystemLiteral
1942: * and 'SYSTEM' S SystemLiteral
1.50 daniel 1943: *
1944: * TODO: no handling of the values parsed !
1.22 daniel 1945: */
1946:
1.55 daniel 1947: void
1948: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 1949: CHAR *name;
1950:
1.40 daniel 1951: if ((CUR == '<') && (NXT(1) == '!') &&
1952: (NXT(2) == 'N') && (NXT(3) == 'O') &&
1953: (NXT(4) == 'T') && (NXT(5) == 'A') &&
1954: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1955: (NXT(8) == 'O') && (NXT(9) == 'N') &&
1956: (IS_BLANK(NXT(10)))) {
1957: SKIP(10);
1.42 daniel 1958: SKIP_BLANKS;
1.22 daniel 1959:
1960: name = xmlParseName(ctxt);
1961: if (name == NULL) {
1.55 daniel 1962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1963: ctxt->sax->error(ctxt,
1.31 daniel 1964: "xmlParseAttributeListDecl: no name for Element\n");
1.59 ! daniel 1965: ctxt->wellFormed = 0;
1.22 daniel 1966: return;
1967: }
1.42 daniel 1968: SKIP_BLANKS;
1.22 daniel 1969: /*
1.31 daniel 1970: * TODO !!!
1.22 daniel 1971: */
1.40 daniel 1972: while ((IS_CHAR(CUR)) && (CUR != '>'))
1973: NEXT;
1.22 daniel 1974: free(name);
1975: }
1976: }
1977:
1.50 daniel 1978: /**
1979: * xmlParseEntityDecl:
1980: * @ctxt: an XML parser context
1981: *
1982: * parse <!ENTITY declarations
1.22 daniel 1983: *
1984: * [70] EntityDecl ::= GEDecl | PEDecl
1985: *
1986: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1987: *
1988: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1989: *
1990: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1991: *
1992: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1993: *
1994: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1995: */
1996:
1.55 daniel 1997: void
1998: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 1999: CHAR *name = NULL;
1.24 daniel 2000: CHAR *value = NULL;
1.39 daniel 2001: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 2002: CHAR *ndata = NULL;
1.39 daniel 2003: int isParameter = 0;
1.22 daniel 2004:
1.40 daniel 2005: if ((CUR == '<') && (NXT(1) == '!') &&
2006: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2007: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 ! daniel 2008: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.40 daniel 2009: SKIP(8);
1.59 ! daniel 2010: if (!IS_BLANK(CUR)) {
! 2011: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2012: ctxt->sax->error(ctxt, "Space required after '<!ENTITY'\n");
! 2013: ctxt->wellFormed = 0;
! 2014: }
! 2015: SKIP_BLANKS;
1.40 daniel 2016:
2017: if (CUR == '%') {
2018: NEXT;
1.59 ! daniel 2019: if (!IS_BLANK(CUR)) {
! 2020: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2021: ctxt->sax->error(ctxt, "Space required after '%'\n");
! 2022: ctxt->wellFormed = 0;
! 2023: }
1.42 daniel 2024: SKIP_BLANKS;
1.39 daniel 2025: isParameter = 1;
1.22 daniel 2026: }
2027:
2028: name = xmlParseName(ctxt);
1.24 daniel 2029: if (name == NULL) {
1.55 daniel 2030: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2031: ctxt->sax->error(ctxt, "xmlParseEntityDecl: no name\n");
1.59 ! daniel 2032: ctxt->wellFormed = 0;
1.24 daniel 2033: return;
2034: }
1.59 ! daniel 2035: if (!IS_BLANK(CUR)) {
! 2036: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2037: ctxt->sax->error(ctxt,
! 2038: "Space required after the entity name\n");
! 2039: ctxt->wellFormed = 0;
! 2040: }
1.42 daniel 2041: SKIP_BLANKS;
1.24 daniel 2042:
1.22 daniel 2043: /*
1.24 daniel 2044: * TODO handle the various case of definitions...
1.22 daniel 2045: */
1.39 daniel 2046: if (isParameter) {
1.40 daniel 2047: if ((CUR == '"') || (CUR == '\''))
1.24 daniel 2048: value = xmlParseEntityValue(ctxt);
1.39 daniel 2049: if (value) {
2050: xmlAddDocEntity(ctxt->doc, name,
2051: XML_INTERNAL_PARAMETER_ENTITY,
2052: NULL, NULL, value);
2053: }
1.24 daniel 2054: else {
1.39 daniel 2055: URI = xmlParseExternalID(ctxt, &literal);
2056: if (URI) {
2057: xmlAddDocEntity(ctxt->doc, name,
2058: XML_EXTERNAL_PARAMETER_ENTITY,
2059: literal, URI, NULL);
2060: }
1.24 daniel 2061: }
2062: } else {
1.40 daniel 2063: if ((CUR == '"') || (CUR == '\'')) {
1.24 daniel 2064: value = xmlParseEntityValue(ctxt);
1.39 daniel 2065: xmlAddDocEntity(ctxt->doc, name,
2066: XML_INTERNAL_GENERAL_ENTITY,
2067: NULL, NULL, value);
2068: } else {
2069: URI = xmlParseExternalID(ctxt, &literal);
1.59 ! daniel 2070: if ((CUR != '>') && (!IS_BLANK(CUR))) {
! 2071: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2072: ctxt->sax->error(ctxt,
! 2073: "Space required before 'NDATA'\n");
! 2074: ctxt->wellFormed = 0;
! 2075: }
1.42 daniel 2076: SKIP_BLANKS;
1.40 daniel 2077: if ((CUR == 'N') && (NXT(1) == 'D') &&
2078: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2079: (NXT(4) == 'A')) {
2080: SKIP(5);
1.59 ! daniel 2081: if (!IS_BLANK(CUR)) {
! 2082: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2083: ctxt->sax->error(ctxt,
! 2084: "Space required after 'NDATA'\n");
! 2085: ctxt->wellFormed = 0;
! 2086: }
1.42 daniel 2087: SKIP_BLANKS;
1.24 daniel 2088: ndata = xmlParseName(ctxt);
1.39 daniel 2089: xmlAddDocEntity(ctxt->doc, name,
2090: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
2091: literal, URI, ndata);
2092: } else {
2093: xmlAddDocEntity(ctxt->doc, name,
2094: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
2095: literal, URI, NULL);
1.24 daniel 2096: }
2097: }
2098: }
1.42 daniel 2099: SKIP_BLANKS;
1.40 daniel 2100: if (CUR != '>') {
1.55 daniel 2101: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2102: ctxt->sax->error(ctxt,
1.31 daniel 2103: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 ! daniel 2104: ctxt->wellFormed = 0;
1.24 daniel 2105: } else
1.40 daniel 2106: NEXT;
1.39 daniel 2107: if (name != NULL) free(name);
2108: if (value != NULL) free(value);
2109: if (URI != NULL) free(URI);
2110: if (literal != NULL) free(literal);
2111: if (ndata != NULL) free(ndata);
1.22 daniel 2112: }
2113: }
2114:
1.50 daniel 2115: /**
1.59 ! daniel 2116: * xmlParseDefaultDecl:
! 2117: * @ctxt: an XML parser context
! 2118: * @value: Receive a possible fixed default value for the attribute
! 2119: *
! 2120: * Parse an attribute default declaration
! 2121: *
! 2122: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
! 2123: *
! 2124: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
! 2125: * or XML_ATTRIBUTE_FIXED.
! 2126: */
! 2127:
! 2128: int
! 2129: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
! 2130: int val;
! 2131: CHAR *ret;
! 2132:
! 2133: *value = NULL;
! 2134: if ((CUR == '#') && (NXT(1) == 'R') &&
! 2135: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
! 2136: (NXT(4) == 'U') && (NXT(5) == 'I') &&
! 2137: (NXT(6) == 'R') && (NXT(7) == 'E') &&
! 2138: (NXT(8) == 'D')) {
! 2139: SKIP(9);
! 2140: return(XML_ATTRIBUTE_REQUIRED);
! 2141: }
! 2142: if ((CUR == '#') && (NXT(1) == 'I') &&
! 2143: (NXT(2) == 'M') && (NXT(3) == 'P') &&
! 2144: (NXT(4) == 'L') && (NXT(5) == 'I') &&
! 2145: (NXT(6) == 'E') && (NXT(7) == 'D')) {
! 2146: SKIP(8);
! 2147: return(XML_ATTRIBUTE_IMPLIED);
! 2148: }
! 2149: val = XML_ATTRIBUTE_NONE;
! 2150: if ((CUR == '#') && (NXT(1) == 'F') &&
! 2151: (NXT(2) == 'I') && (NXT(3) == 'X') &&
! 2152: (NXT(4) == 'E') && (NXT(5) == 'D')) {
! 2153: SKIP(6);
! 2154: val = XML_ATTRIBUTE_FIXED;
! 2155: if (!IS_BLANK(CUR)) {
! 2156: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2157: ctxt->sax->error(ctxt, "Space required after '#FIXED'\n");
! 2158: ctxt->wellFormed = 0;
! 2159: }
! 2160: SKIP_BLANKS;
! 2161: }
! 2162: ret = xmlParseAttValue(ctxt);
! 2163: if (ret == NULL) {
! 2164: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2165: ctxt->sax->error(ctxt,
! 2166: "Attribute default value declaration error\n");
! 2167: ctxt->wellFormed = 0;
! 2168: } else
! 2169: *value = ret;
! 2170: return(val);
! 2171: }
! 2172:
! 2173: /**
1.50 daniel 2174: * xmlParseEnumeratedType:
2175: * @ctxt: an XML parser context
2176: * @name: ???
2177: * @:
2178: *
2179: * parse and Enumerated attribute type.
1.22 daniel 2180: *
2181: * [57] EnumeratedType ::= NotationType | Enumeration
2182: *
2183: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2184: *
2185: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1.50 daniel 2186: *
2187: * TODO: not implemented !!!
1.22 daniel 2188: */
2189:
1.55 daniel 2190: void
2191: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.22 daniel 2192: /*
2193: * TODO !!!
2194: */
1.59 ! daniel 2195: fprintf(stderr, "Production [57] EnumeratedType not yet supported\n");
1.40 daniel 2196: while ((IS_CHAR(CUR)) && (CUR != '>'))
2197: NEXT;
1.22 daniel 2198: }
2199:
1.50 daniel 2200: /**
2201: * xmlParseAttributeType:
2202: * @ctxt: an XML parser context
2203: * @name: ???
2204: *
1.59 ! daniel 2205: * parse the Attribute list def for an element
1.22 daniel 2206: *
2207: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
2208: *
2209: * [55] StringType ::= 'CDATA'
2210: *
2211: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
2212: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 2213: *
1.59 ! daniel 2214: * Returns: the attribute type
1.22 daniel 2215: */
1.59 ! daniel 2216: int
1.55 daniel 2217: xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.45 daniel 2218: /* TODO !!! */
1.40 daniel 2219: if ((CUR == 'C') && (NXT(1) == 'D') &&
2220: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2221: (NXT(4) == 'A')) {
2222: SKIP(5);
1.59 ! daniel 2223: return(XML_ATTRIBUTE_STRING);
1.40 daniel 2224: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
2225: SKIP(2);
1.59 ! daniel 2226: return(XML_ATTRIBUTE_ID);
1.40 daniel 2227: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2228: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2229: (NXT(4) == 'F')) {
2230: SKIP(5);
1.59 ! daniel 2231: return(XML_ATTRIBUTE_IDREF);
1.40 daniel 2232: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2233: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2234: (NXT(4) == 'F') && (NXT(5) == 'S')) {
2235: SKIP(6);
1.59 ! daniel 2236: return(XML_ATTRIBUTE_IDREFS);
1.40 daniel 2237: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2238: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2239: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
2240: SKIP(6);
1.59 ! daniel 2241: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 2242: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2243: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2244: (NXT(4) == 'T') && (NXT(5) == 'I') &&
2245: (NXT(6) == 'E') && (NXT(7) == 'S')) {
2246: SKIP(8);
1.59 ! daniel 2247: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 2248: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2249: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2250: (NXT(4) == 'K') && (NXT(5) == 'E') &&
2251: (NXT(6) == 'N')) {
2252: SKIP(7);
1.59 ! daniel 2253: return(XML_ATTRIBUTE_NMTOKEN);
1.40 daniel 2254: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2255: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2256: (NXT(4) == 'K') && (NXT(5) == 'E') &&
2257: (NXT(6) == 'N') && (NXT(7) == 'S')) {
1.59 ! daniel 2258: return(XML_ATTRIBUTE_NMTOKENS);
1.22 daniel 2259: }
1.59 ! daniel 2260: xmlParseEnumeratedType(ctxt, name);
! 2261: return(XML_ATTRIBUTE_ENUMERATED);
1.22 daniel 2262: }
2263:
1.50 daniel 2264: /**
2265: * xmlParseAttributeListDecl:
2266: * @ctxt: an XML parser context
2267: *
2268: * : parse the Attribute list def for an element
1.22 daniel 2269: *
2270: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2271: *
2272: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 2273: *
2274: * TODO: not implemented !!!
1.22 daniel 2275: */
1.55 daniel 2276: void
2277: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 ! daniel 2278: CHAR *elemName;
! 2279: CHAR *attrName;
1.22 daniel 2280:
1.45 daniel 2281: /* TODO !!! */
1.40 daniel 2282: if ((CUR == '<') && (NXT(1) == '!') &&
2283: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2284: (NXT(4) == 'T') && (NXT(5) == 'L') &&
2285: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 ! daniel 2286: (NXT(8) == 'T')) {
1.40 daniel 2287: SKIP(9);
1.59 ! daniel 2288: if (!IS_BLANK(CUR)) {
! 2289: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2290: ctxt->sax->error(ctxt, "Space required after '<!ATTLIST'\n");
! 2291: ctxt->wellFormed = 0;
! 2292: }
1.42 daniel 2293: SKIP_BLANKS;
1.59 ! daniel 2294: elemName = xmlParseName(ctxt);
! 2295: if (elemName == NULL) {
1.55 daniel 2296: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 ! daniel 2297: ctxt->sax->error(ctxt, "ATTLIST: no name for Element\n");
! 2298: ctxt->wellFormed = 0;
1.22 daniel 2299: return;
2300: }
1.42 daniel 2301: SKIP_BLANKS;
1.40 daniel 2302: while (CUR != '>') {
2303: const CHAR *check = CUR_PTR;
1.59 ! daniel 2304: int type;
! 2305: int def;
! 2306: CHAR *defaultValue = NULL;
! 2307:
! 2308: attrName = xmlParseName(ctxt);
! 2309: if (attrName == NULL) {
! 2310: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2311: ctxt->sax->error(ctxt, "ATTLIST: no name for Attribute\n");
! 2312: ctxt->wellFormed = 0;
! 2313: break;
! 2314: }
! 2315: if (!IS_BLANK(CUR)) {
! 2316: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2317: ctxt->sax->error(ctxt,
! 2318: "Space required after the attribute name\n");
! 2319: ctxt->wellFormed = 0;
! 2320: break;
! 2321: }
! 2322: SKIP_BLANKS;
! 2323:
! 2324: type = xmlParseAttributeType(ctxt, attrName);
! 2325: if (type <= 0) break;
1.22 daniel 2326:
1.59 ! daniel 2327: if (!IS_BLANK(CUR)) {
! 2328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2329: ctxt->sax->error(ctxt,
! 2330: "Space required after the attribute type\n");
! 2331: ctxt->wellFormed = 0;
! 2332: break;
! 2333: }
1.42 daniel 2334: SKIP_BLANKS;
1.59 ! daniel 2335:
! 2336: def = xmlParseDefaultDecl(ctxt, &defaultValue);
! 2337: if (def <= 0) break;
! 2338:
! 2339: if (CUR != '>') {
! 2340: if (!IS_BLANK(CUR)) {
! 2341: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2342: ctxt->sax->error(ctxt,
! 2343: "Space required after the attribute default value\n");
! 2344: ctxt->wellFormed = 0;
! 2345: break;
! 2346: }
! 2347: SKIP_BLANKS;
! 2348: }
1.40 daniel 2349: if (check == CUR_PTR) {
1.55 daniel 2350: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2351: ctxt->sax->error(ctxt,
1.59 ! daniel 2352: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 2353: break;
2354: }
1.59 ! daniel 2355: if (attrName != NULL)
! 2356: free(attrName);
! 2357: if (defaultValue != NULL)
! 2358: free(defaultValue);
1.22 daniel 2359: }
1.40 daniel 2360: if (CUR == '>')
2361: NEXT;
1.22 daniel 2362:
1.59 ! daniel 2363: free(elemName);
1.22 daniel 2364: }
2365: }
2366:
1.50 daniel 2367: /**
2368: * xmlParseElementContentDecl:
2369: * @ctxt: an XML parser context
2370: * @name: ???
2371: *
2372: * parse the declaration for an Element content
2373: * either Mixed or Children, the cases EMPTY and ANY being handled
1.22 daniel 2374: *
2375: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
2376: *
2377: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
2378: *
2379: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
2380: *
2381: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
2382: *
2383: * or
2384: *
2385: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
2386: * '(' S? '#PCDATA' S? ')'
1.50 daniel 2387: *
2388: * TODO: not implemented !!!
1.22 daniel 2389: */
2390:
1.55 daniel 2391: void
2392: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1.22 daniel 2393: /*
2394: * TODO This has to be parsed correctly, currently we just skip until
2395: * we reach the first '>'.
1.31 daniel 2396: * !!!
1.22 daniel 2397: */
1.40 daniel 2398: while ((IS_CHAR(CUR)) && (CUR != '>'))
2399: NEXT;
1.22 daniel 2400: }
2401:
1.50 daniel 2402: /**
2403: * xmlParseElementDecl:
2404: * @ctxt: an XML parser context
2405: *
2406: * parse an Element declaration.
1.22 daniel 2407: *
2408: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
2409: *
2410: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
2411: *
2412: * TODO There is a check [ VC: Unique Element Type Declaration ]
2413: */
1.59 ! daniel 2414: int
1.55 daniel 2415: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2416: CHAR *name;
1.59 ! daniel 2417: int ret = -1;
1.22 daniel 2418:
1.40 daniel 2419: if ((CUR == '<') && (NXT(1) == '!') &&
2420: (NXT(2) == 'E') && (NXT(3) == 'L') &&
2421: (NXT(4) == 'E') && (NXT(5) == 'M') &&
2422: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 ! daniel 2423: (NXT(8) == 'T')) {
1.40 daniel 2424: SKIP(9);
1.59 ! daniel 2425: if (!IS_BLANK(CUR)) {
! 2426: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2427: ctxt->sax->error(ctxt,
! 2428: "Space required after 'ELEMENT'\n");
! 2429: ctxt->wellFormed = 0;
! 2430: }
1.42 daniel 2431: SKIP_BLANKS;
1.22 daniel 2432: name = xmlParseName(ctxt);
2433: if (name == NULL) {
1.55 daniel 2434: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 ! daniel 2435: ctxt->sax->error(ctxt,
! 2436: "xmlParseElementDecl: no name for Element\n");
! 2437: ctxt->wellFormed = 0;
! 2438: return(-1);
! 2439: }
! 2440: if (!IS_BLANK(CUR)) {
! 2441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2442: ctxt->sax->error(ctxt,
! 2443: "Space required after the element name\n");
! 2444: ctxt->wellFormed = 0;
1.22 daniel 2445: }
1.42 daniel 2446: SKIP_BLANKS;
1.40 daniel 2447: if ((CUR == 'E') && (NXT(1) == 'M') &&
2448: (NXT(2) == 'P') && (NXT(3) == 'T') &&
2449: (NXT(4) == 'Y')) {
2450: SKIP(5);
1.22 daniel 2451: /*
2452: * Element must always be empty.
2453: */
1.59 ! daniel 2454: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 2455: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
2456: (NXT(2) == 'Y')) {
2457: SKIP(3);
1.22 daniel 2458: /*
2459: * Element is a generic container.
2460: */
1.59 ! daniel 2461: ret = XML_ELEMENT_TYPE_ANY;
1.22 daniel 2462: } else {
2463: xmlParseElementContentDecl(ctxt, name);
1.59 ! daniel 2464: ret = XML_ELEMENT_TYPE_ANY;
1.22 daniel 2465: }
1.42 daniel 2466: SKIP_BLANKS;
1.40 daniel 2467: if (CUR != '>') {
1.55 daniel 2468: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2469: ctxt->sax->error(ctxt,
1.31 daniel 2470: "xmlParseElementDecl: expected '>' at the end\n");
1.59 ! daniel 2471: ctxt->wellFormed = 0;
1.22 daniel 2472: } else
1.40 daniel 2473: NEXT;
1.22 daniel 2474: }
1.59 ! daniel 2475: return(ret);
1.22 daniel 2476: }
2477:
1.50 daniel 2478: /**
2479: * xmlParseMarkupDecl:
2480: * @ctxt: an XML parser context
2481: *
2482: * parse Markup declarations
1.22 daniel 2483: *
2484: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
2485: * NotationDecl | PI | Comment
2486: *
2487: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
2488: */
1.55 daniel 2489: void
2490: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2491: xmlParseElementDecl(ctxt);
2492: xmlParseAttributeListDecl(ctxt);
2493: xmlParseEntityDecl(ctxt);
2494: xmlParseNotationDecl(ctxt);
2495: xmlParsePI(ctxt);
1.31 daniel 2496: xmlParseComment(ctxt, 0);
1.22 daniel 2497: }
2498:
1.50 daniel 2499: /**
2500: * xmlParseCharRef:
2501: * @ctxt: an XML parser context
2502: *
2503: * parse Reference declarations
1.24 daniel 2504: *
2505: * [66] CharRef ::= '&#' [0-9]+ ';' |
2506: * '&#x' [0-9a-fA-F]+ ';'
1.50 daniel 2507: * return values: the value parsed
1.24 daniel 2508: */
1.55 daniel 2509: CHAR *
2510: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 2511: int val = 0;
1.44 daniel 2512: CHAR buf[2];
1.24 daniel 2513:
1.40 daniel 2514: if ((CUR == '&') && (NXT(1) == '#') &&
2515: (NXT(2) == 'x')) {
2516: SKIP(3);
2517: while (CUR != ';') {
2518: if ((CUR >= '0') && (CUR <= '9'))
2519: val = val * 16 + (CUR - '0');
2520: else if ((CUR >= 'a') && (CUR <= 'f'))
2521: val = val * 16 + (CUR - 'a') + 10;
2522: else if ((CUR >= 'A') && (CUR <= 'F'))
2523: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 2524: else {
1.55 daniel 2525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 2526: ctxt->sax->error(ctxt,
1.59 ! daniel 2527: "xmlParseCharRef: invalid hexadecimal value\n");
! 2528: ctxt->wellFormed = 0;
1.29 daniel 2529: val = 0;
1.24 daniel 2530: break;
2531: }
1.47 daniel 2532: NEXT;
1.24 daniel 2533: }
1.55 daniel 2534: if (CUR == ';')
1.40 daniel 2535: NEXT;
2536: } else if ((CUR == '&') && (NXT(1) == '#')) {
2537: SKIP(2);
2538: while (CUR != ';') {
2539: if ((CUR >= '0') && (CUR <= '9'))
1.55 daniel 2540: val = val * 10 + (CUR - '0');
1.24 daniel 2541: else {
1.55 daniel 2542: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 2543: ctxt->sax->error(ctxt,
2544: "xmlParseCharRef: invalid decimal value\n");
1.59 ! daniel 2545: ctxt->wellFormed = 0;
1.29 daniel 2546: val = 0;
1.24 daniel 2547: break;
2548: }
1.47 daniel 2549: NEXT;
1.24 daniel 2550: }
1.55 daniel 2551: if (CUR == ';')
1.40 daniel 2552: NEXT;
1.24 daniel 2553: } else {
1.55 daniel 2554: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2555: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
1.59 ! daniel 2556: ctxt->wellFormed = 0;
1.24 daniel 2557: }
1.29 daniel 2558: /*
2559: * Check the value IS_CHAR ...
2560: */
1.44 daniel 2561: if (IS_CHAR(val)) {
2562: buf[0] = (CHAR) val;
2563: buf[1] = 0;
1.50 daniel 2564: return(xmlStrndup(buf, 1));
1.44 daniel 2565: } else {
1.55 daniel 2566: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 2567: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid CHAR value %d\n",
2568: val);
1.59 ! daniel 2569: ctxt->wellFormed = 0;
1.29 daniel 2570: }
1.46 daniel 2571: return(NULL);
1.24 daniel 2572: }
2573:
1.50 daniel 2574: /**
2575: * xmlParseEntityRef:
2576: * @ctxt: an XML parser context
2577: *
2578: * parse ENTITY references declarations
1.24 daniel 2579: *
2580: * [68] EntityRef ::= '&' Name ';'
1.52 daniel 2581: * return values: the entity ref string or NULL if directly as input stream.
1.24 daniel 2582: */
1.55 daniel 2583: CHAR *
2584: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.46 daniel 2585: CHAR *ret = NULL;
1.50 daniel 2586: const CHAR *q;
1.24 daniel 2587: CHAR *name;
1.59 ! daniel 2588: xmlEntityPtr ent;
1.50 daniel 2589: xmlParserInputPtr input = NULL;
1.24 daniel 2590:
1.50 daniel 2591: q = CUR_PTR;
1.40 daniel 2592: if (CUR == '&') {
2593: NEXT;
1.24 daniel 2594: name = xmlParseName(ctxt);
2595: if (name == NULL) {
1.55 daniel 2596: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2597: ctxt->sax->error(ctxt, "xmlParseEntityRef: no name\n");
1.59 ! daniel 2598: ctxt->wellFormed = 0;
1.24 daniel 2599: } else {
1.40 daniel 2600: if (CUR == ';') {
2601: NEXT;
1.24 daniel 2602: /*
1.59 ! daniel 2603: * Well Formedness Constraint if:
! 2604: * - standalone
! 2605: * or
! 2606: * - no external subset and no external parameter entities
! 2607: * referenced
! 2608: * then
! 2609: * the entity referenced must have been declared
! 2610: *
! 2611: * TODO: to be double checked !!!
! 2612: */
! 2613: ent = xmlGetDocEntity(ctxt->doc, name);
! 2614: if ((ctxt->doc->standalone) ||
! 2615: ((ctxt->doc->intSubset == NULL) &&
! 2616: (ctxt->doc->extSubset == NULL))) {
! 2617: if (ent == NULL) {
! 2618: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2619: ctxt->sax->error(ctxt,
! 2620: "Entity '%s' not defined\n", name);
! 2621: ctxt->wellFormed = 0;
! 2622: }
! 2623: }
! 2624:
! 2625: /*
! 2626: * Well Formedness Constraint :
! 2627: * The referenced entity must be a parsed entity.
! 2628: */
! 2629: if (ent != NULL) {
! 2630: switch (ent->type) {
! 2631: case XML_INTERNAL_PARAMETER_ENTITY:
! 2632: case XML_EXTERNAL_PARAMETER_ENTITY:
! 2633: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2634: ctxt->sax->error(ctxt,
! 2635: "Attempt to reference the parameter entity '%s'\n", name);
! 2636: ctxt->wellFormed = 0;
! 2637: break;
! 2638:
! 2639: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
! 2640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2641: ctxt->sax->error(ctxt,
! 2642: "Attempt to reference unparsed entity '%s'\n", name);
! 2643: ctxt->wellFormed = 0;
! 2644: break;
! 2645: }
! 2646: }
! 2647:
! 2648: /*
! 2649: * Well Formedness Constraint :
! 2650: * The referenced entity must not lead to recursion !
! 2651: */
! 2652:
! 2653: /*
1.52 daniel 2654: * We parsed the entity reference correctly, call SAX
2655: * interface for the proper behaviour:
2656: * - get a new input stream
2657: * - or keep the reference inline
1.24 daniel 2658: */
1.52 daniel 2659: if (ctxt->sax)
2660: input = ctxt->sax->resolveEntity(ctxt, NULL, name);
2661: if (input != NULL)
2662: xmlPushInput(ctxt, input);
2663: else {
2664: ret = xmlStrndup(q, CUR_PTR - q);
2665: }
1.24 daniel 2666: } else {
1.46 daniel 2667: char cst[2] = { '&', 0 };
2668:
1.55 daniel 2669: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 ! daniel 2670: ctxt->sax->error(ctxt,
! 2671: "xmlParseEntityRef: expecting ';'\n");
! 2672: ctxt->wellFormed = 0;
1.46 daniel 2673: ret = xmlStrndup(cst, 1);
2674: ret = xmlStrcat(ret, name);
1.24 daniel 2675: }
1.45 daniel 2676: free(name);
1.24 daniel 2677: }
2678: }
1.46 daniel 2679: return(ret);
1.24 daniel 2680: }
2681:
1.50 daniel 2682: /**
2683: * xmlParseReference:
2684: * @ctxt: an XML parser context
2685: *
2686: * parse Reference declarations
1.24 daniel 2687: *
2688: * [67] Reference ::= EntityRef | CharRef
1.52 daniel 2689: * return values: the entity string or NULL if handled directly by pushing
2690: * the entity value as the input.
1.24 daniel 2691: */
1.55 daniel 2692: CHAR *
2693: xmlParseReference(xmlParserCtxtPtr ctxt) {
1.44 daniel 2694: if ((CUR == '&') && (NXT(1) == '#')) {
1.59 ! daniel 2695: CHAR *val = xmlParseCharRef(ctxt);
! 2696: xmlParserInputPtr in;
! 2697:
! 2698: if (val != NULL) {
! 2699: in = xmlNewStringInputStream(ctxt, val);
! 2700: xmlPushInput(ctxt, in);
! 2701: }
! 2702: return(NULL);
1.44 daniel 2703: } else if (CUR == '&') {
1.50 daniel 2704: return(xmlParseEntityRef(ctxt));
1.24 daniel 2705: }
1.46 daniel 2706: return(NULL);
1.24 daniel 2707: }
2708:
1.50 daniel 2709: /**
2710: * xmlParsePEReference:
2711: * @ctxt: an XML parser context
2712: *
2713: * parse PEReference declarations
1.22 daniel 2714: *
2715: * [69] PEReference ::= '%' Name ';'
1.50 daniel 2716: * return values: the entity content or NULL if handled directly.
1.22 daniel 2717: */
1.55 daniel 2718: CHAR *
2719: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.46 daniel 2720: CHAR *ret = NULL;
1.22 daniel 2721: CHAR *name;
1.45 daniel 2722: xmlEntityPtr entity;
1.50 daniel 2723: xmlParserInputPtr input;
1.22 daniel 2724:
1.40 daniel 2725: if (CUR == '%') {
2726: NEXT;
1.22 daniel 2727: name = xmlParseName(ctxt);
2728: if (name == NULL) {
1.55 daniel 2729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2730: ctxt->sax->error(ctxt, "xmlParsePEReference: no name\n");
1.59 ! daniel 2731: ctxt->wellFormed = 0;
1.22 daniel 2732: } else {
1.40 daniel 2733: if (CUR == ';') {
2734: NEXT;
1.45 daniel 2735: entity = xmlGetDtdEntity(ctxt->doc, name);
2736: if (entity == NULL) {
1.55 daniel 2737: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
2738: ctxt->sax->warning(ctxt,
1.59 ! daniel 2739: "xmlParsePEReference: %%%s; not found\n", name);
1.50 daniel 2740: } else {
2741: input = xmlNewEntityInputStream(ctxt, entity);
2742: xmlPushInput(ctxt, input);
1.45 daniel 2743: }
1.22 daniel 2744: } else {
1.50 daniel 2745: char cst[2] = { '%', 0 };
1.46 daniel 2746:
1.55 daniel 2747: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 ! daniel 2748: ctxt->sax->error(ctxt,
! 2749: "xmlParsePEReference: expecting ';'\n");
! 2750: ctxt->wellFormed = 0;
1.46 daniel 2751: ret = xmlStrndup(cst, 1);
2752: ret = xmlStrcat(ret, name);
1.22 daniel 2753: }
1.45 daniel 2754: free(name);
1.3 veillard 2755: }
2756: }
1.46 daniel 2757: return(ret);
1.3 veillard 2758: }
2759:
1.50 daniel 2760: /**
2761: * xmlParseDocTypeDecl :
2762: * @ctxt: an XML parser context
2763: *
2764: * parse a DOCTYPE declaration
1.21 daniel 2765: *
1.22 daniel 2766: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
2767: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 2768: */
2769:
1.55 daniel 2770: void
2771: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.36 daniel 2772: xmlDtdPtr dtd;
1.21 daniel 2773: CHAR *name;
2774: CHAR *ExternalID = NULL;
1.39 daniel 2775: CHAR *URI = NULL;
1.21 daniel 2776:
2777: /*
2778: * We know that '<!DOCTYPE' has been detected.
2779: */
1.40 daniel 2780: SKIP(9);
1.21 daniel 2781:
1.42 daniel 2782: SKIP_BLANKS;
1.21 daniel 2783:
2784: /*
2785: * Parse the DOCTYPE name.
2786: */
2787: name = xmlParseName(ctxt);
2788: if (name == NULL) {
1.55 daniel 2789: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2790: ctxt->sax->error(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 ! daniel 2791: ctxt->wellFormed = 0;
1.21 daniel 2792: }
2793:
1.42 daniel 2794: SKIP_BLANKS;
1.21 daniel 2795:
2796: /*
1.22 daniel 2797: * Check for SystemID and ExternalID
2798: */
1.39 daniel 2799: URI = xmlParseExternalID(ctxt, &ExternalID);
1.42 daniel 2800: SKIP_BLANKS;
1.36 daniel 2801:
1.59 ! daniel 2802: dtd = xmlCreateIntSubset(ctxt->doc, name, ExternalID, URI);
1.22 daniel 2803:
2804: /*
2805: * Is there any DTD definition ?
2806: */
1.40 daniel 2807: if (CUR == '[') {
2808: NEXT;
1.22 daniel 2809: /*
2810: * Parse the succession of Markup declarations and
2811: * PEReferences.
2812: * Subsequence (markupdecl | PEReference | S)*
2813: */
1.40 daniel 2814: while (CUR != ']') {
2815: const CHAR *check = CUR_PTR;
1.22 daniel 2816:
1.42 daniel 2817: SKIP_BLANKS;
1.22 daniel 2818: xmlParseMarkupDecl(ctxt);
1.50 daniel 2819: xmlParsePEReference(ctxt);
1.22 daniel 2820:
1.40 daniel 2821: if (CUR_PTR == check) {
1.55 daniel 2822: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2823: ctxt->sax->error(ctxt,
1.31 daniel 2824: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 ! daniel 2825: ctxt->wellFormed = 0;
1.22 daniel 2826: break;
2827: }
2828: }
1.40 daniel 2829: if (CUR == ']') NEXT;
1.22 daniel 2830: }
2831:
2832: /*
2833: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 2834: */
1.40 daniel 2835: if (CUR != '>') {
1.55 daniel 2836: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2837: ctxt->sax->error(ctxt, "DOCTYPE unproperly terminated\n");
1.59 ! daniel 2838: ctxt->wellFormed = 0;
1.22 daniel 2839: /* We shouldn't try to resynchronize ... */
1.21 daniel 2840: }
1.40 daniel 2841: NEXT;
1.22 daniel 2842:
2843: /*
2844: * Cleanup, since we don't use all those identifiers
2845: * TODO : the DOCTYPE if available should be stored !
2846: */
1.39 daniel 2847: if (URI != NULL) free(URI);
1.22 daniel 2848: if (ExternalID != NULL) free(ExternalID);
2849: if (name != NULL) free(name);
1.21 daniel 2850: }
2851:
1.50 daniel 2852: /**
2853: * xmlParseAttribute:
2854: * @ctxt: an XML parser context
2855: * @node: the node carrying the attribute
2856: *
2857: * parse an attribute
1.3 veillard 2858: *
1.22 daniel 2859: * [41] Attribute ::= Name Eq AttValue
2860: *
2861: * [25] Eq ::= S? '=' S?
2862: *
1.29 daniel 2863: * With namespace:
2864: *
2865: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 2866: *
2867: * Also the case QName == xmlns:??? is handled independently as a namespace
2868: * definition.
1.3 veillard 2869: */
2870:
1.52 daniel 2871: xmlAttrPtr xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.59 ! daniel 2872: CHAR *name, *val;
1.29 daniel 2873: CHAR *ns;
1.52 daniel 2874: CHAR *value = NULL;
2875: xmlAttrPtr ret;
1.3 veillard 2876:
1.29 daniel 2877: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 2878: if (name == NULL) {
1.55 daniel 2879: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2880: ctxt->sax->error(ctxt, "error parsing attribute name\n");
1.59 ! daniel 2881: ctxt->wellFormed = 0;
1.52 daniel 2882: return(NULL);
1.3 veillard 2883: }
2884:
2885: /*
1.29 daniel 2886: * read the value
1.3 veillard 2887: */
1.42 daniel 2888: SKIP_BLANKS;
1.40 daniel 2889: if (CUR == '=') {
2890: NEXT;
1.42 daniel 2891: SKIP_BLANKS;
1.29 daniel 2892: value = xmlParseAttValue(ctxt);
2893: } else {
1.55 daniel 2894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 ! daniel 2895: ctxt->sax->error(ctxt,
! 2896: "Specification mandate value for attribute %s\n", name);
! 2897: ctxt->wellFormed = 0;
1.3 veillard 2898: }
2899:
2900: /*
1.43 daniel 2901: * Check whether it's a namespace definition
2902: */
2903: if ((ns == NULL) &&
2904: (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
2905: (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
2906: /* a default namespace definition */
2907: xmlNewNs(node, value, NULL);
2908: if (name != NULL)
2909: free(name);
2910: if (value != NULL)
2911: free(value);
1.52 daniel 2912: return(NULL);
1.43 daniel 2913: }
2914: if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
2915: (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
2916: /* a standard namespace definition */
2917: xmlNewNs(node, value, name);
1.50 daniel 2918: free(ns);
1.43 daniel 2919: if (name != NULL)
2920: free(name);
2921: if (value != NULL)
2922: free(value);
1.52 daniel 2923: return(NULL);
1.43 daniel 2924: }
2925:
1.59 ! daniel 2926: /*
! 2927: * Well formedness requires at most one declaration of an attribute
! 2928: */
! 2929: if ((val = xmlGetProp(ctxt->node, name)) != NULL) {
! 2930: free(val);
! 2931: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2932: ctxt->sax->error(ctxt, "Attribute %s redefined\n", name);
! 2933: ctxt->wellFormed = 0;
! 2934: ret = NULL;
! 2935: } else {
! 2936: ret = xmlNewProp(ctxt->node, name, NULL);
! 2937: if (ret != NULL)
! 2938: ret->val = xmlStringGetNodeList(ctxt->doc, value);
! 2939: }
1.53 daniel 2940:
2941: if (ns != NULL)
2942: free(ns);
2943: if (value != NULL)
2944: free(value);
2945: free(name);
1.52 daniel 2946: return(ret);
1.3 veillard 2947: }
2948:
1.50 daniel 2949: /**
2950: * xmlParseStartTag:
2951: * @ctxt: an XML parser context
2952: *
2953: * parse a start of tag either for rule element or
2954: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 2955: *
2956: * [40] STag ::= '<' Name (S Attribute)* S? '>'
2957: *
1.29 daniel 2958: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
2959: *
2960: * With namespace:
2961: *
2962: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
2963: *
2964: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.50 daniel 2965: *
2966: * return values: the XML new node or NULL.
1.2 veillard 2967: */
2968:
1.16 daniel 2969: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.34 daniel 2970: CHAR *namespace, *name;
2971: xmlNsPtr ns = NULL;
1.2 veillard 2972: xmlNodePtr ret = NULL;
1.50 daniel 2973: xmlNodePtr parent = ctxt->node;
1.2 veillard 2974:
1.40 daniel 2975: if (CUR != '<') return(NULL);
2976: NEXT;
1.3 veillard 2977:
1.34 daniel 2978: name = xmlNamespaceParseQName(ctxt, &namespace);
1.59 ! daniel 2979: if (name == NULL) {
! 2980: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2981: ctxt->sax->error(ctxt,
! 2982: "xmlParseStartTag: invalid element name\n");
! 2983: ctxt->wellFormed = 0;
! 2984: return(NULL);
! 2985: }
1.3 veillard 2986:
1.43 daniel 2987: /*
2988: * Note : the namespace resolution is deferred until the end of the
2989: * attributes parsing, since local namespace can be defined as
2990: * an attribute at this level.
2991: */
1.50 daniel 2992: ret = xmlNewDocNode(ctxt->doc, ns, name, NULL);
2993: if (ret == NULL) {
2994: if (namespace != NULL)
2995: free(namespace);
2996: free(name);
2997: return(NULL);
2998: }
2999:
3000: /*
3001: * We are parsing a new node.
3002: */
3003: nodePush(ctxt, ret);
1.2 veillard 3004:
1.3 veillard 3005: /*
3006: * Now parse the attributes, it ends up with the ending
3007: *
3008: * (S Attribute)* S?
3009: */
1.42 daniel 3010: SKIP_BLANKS;
1.40 daniel 3011: while ((IS_CHAR(CUR)) &&
3012: (CUR != '>') &&
3013: ((CUR != '/') || (NXT(1) != '>'))) {
3014: const CHAR *q = CUR_PTR;
1.29 daniel 3015:
3016: xmlParseAttribute(ctxt, ret);
1.42 daniel 3017: SKIP_BLANKS;
1.29 daniel 3018:
1.40 daniel 3019: if (q == CUR_PTR) {
1.55 daniel 3020: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3021: ctxt->sax->error(ctxt,
1.31 daniel 3022: "xmlParseStartTag: problem parsing attributes\n");
1.59 ! daniel 3023: ctxt->wellFormed = 0;
1.29 daniel 3024: break;
1.3 veillard 3025: }
3026: }
3027:
1.43 daniel 3028: /*
3029: * Search the namespace
3030: */
3031: ns = xmlSearchNs(ctxt->doc, ret, namespace);
3032: if (ns == NULL) /* ret still doesn't have a parent yet ! */
1.50 daniel 3033: ns = xmlSearchNs(ctxt->doc, parent, namespace);
1.43 daniel 3034: xmlSetNs(ret, ns);
3035: if (namespace != NULL)
3036: free(namespace);
3037:
1.44 daniel 3038: /*
3039: * SAX: Start of Element !
3040: */
3041: if (ctxt->sax != NULL)
3042: ctxt->sax->startElement(ctxt, name);
1.52 daniel 3043: free(name);
3044:
3045: /*
3046: * Link the child element
3047: */
3048: if (ctxt->nodeNr < 2) return(ret);
3049: parent = ctxt->nodeTab[ctxt->nodeNr - 2];
3050: if (parent != NULL)
3051: xmlAddChild(parent, ctxt->node);
1.44 daniel 3052:
1.3 veillard 3053: return(ret);
3054: }
3055:
1.50 daniel 3056: /**
3057: * xmlParseEndTag:
3058: * @ctxt: an XML parser context
3059: * @nsPtr: the current node namespace definition
3060: * @tagPtr: CHAR** receive the tag value
3061: *
3062: * parse an end of tag
1.27 daniel 3063: *
3064: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 3065: *
3066: * With namespace
3067: *
3068: * [9] ETag ::= '</' QName S? '>'
1.50 daniel 3069: *
3070: * return values: tagPtr receive the tag name just read
1.7 veillard 3071: */
3072:
1.55 daniel 3073: void
3074: xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
1.34 daniel 3075: CHAR *namespace, *name;
3076: xmlNsPtr ns = NULL;
1.7 veillard 3077:
1.34 daniel 3078: *nsPtr = NULL;
1.7 veillard 3079: *tagPtr = NULL;
3080:
1.40 daniel 3081: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 3082: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3083: ctxt->sax->error(ctxt, "xmlParseEndTag: '</' not found\n");
1.59 ! daniel 3084: ctxt->wellFormed = 0;
1.27 daniel 3085: return;
3086: }
1.40 daniel 3087: SKIP(2);
1.7 veillard 3088:
1.34 daniel 3089: name = xmlNamespaceParseQName(ctxt, &namespace);
1.43 daniel 3090:
3091: /*
3092: * Search the namespace
3093: */
3094: ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
3095: if (namespace != NULL)
1.34 daniel 3096: free(namespace);
1.7 veillard 3097:
1.34 daniel 3098: *nsPtr = ns;
1.7 veillard 3099: *tagPtr = name;
3100:
3101: /*
3102: * We should definitely be at the ending "S? '>'" part
3103: */
1.42 daniel 3104: SKIP_BLANKS;
1.40 daniel 3105: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 3106: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3107: ctxt->sax->error(ctxt, "End tag : expected '>'\n");
1.59 ! daniel 3108: ctxt->wellFormed = 0;
1.7 veillard 3109: } else
1.40 daniel 3110: NEXT;
1.7 veillard 3111:
3112: return;
3113: }
3114:
1.50 daniel 3115: /**
3116: * xmlParseCDSect:
3117: * @ctxt: an XML parser context
3118: *
3119: * Parse escaped pure raw content.
1.29 daniel 3120: *
3121: * [18] CDSect ::= CDStart CData CDEnd
3122: *
3123: * [19] CDStart ::= '<![CDATA['
3124: *
3125: * [20] Data ::= (Char* - (Char* ']]>' Char*))
3126: *
3127: * [21] CDEnd ::= ']]>'
1.3 veillard 3128: */
1.55 daniel 3129: void
3130: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 3131: const CHAR *r, *s, *base;
1.3 veillard 3132:
1.40 daniel 3133: if ((CUR == '<') && (NXT(1) == '!') &&
3134: (NXT(2) == '[') && (NXT(3) == 'C') &&
3135: (NXT(4) == 'D') && (NXT(5) == 'A') &&
3136: (NXT(6) == 'T') && (NXT(7) == 'A') &&
3137: (NXT(8) == '[')) {
3138: SKIP(9);
1.29 daniel 3139: } else
1.45 daniel 3140: return;
1.40 daniel 3141: base = CUR_PTR;
3142: if (!IS_CHAR(CUR)) {
1.55 daniel 3143: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3144: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 ! daniel 3145: ctxt->wellFormed = 0;
1.45 daniel 3146: return;
1.3 veillard 3147: }
1.40 daniel 3148: r = NEXT;
3149: if (!IS_CHAR(CUR)) {
1.55 daniel 3150: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3151: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 ! daniel 3152: ctxt->wellFormed = 0;
1.45 daniel 3153: return;
1.3 veillard 3154: }
1.40 daniel 3155: s = NEXT;
3156: while (IS_CHAR(CUR) &&
3157: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
3158: r++;s++;NEXT;
1.3 veillard 3159: }
1.40 daniel 3160: if (!IS_CHAR(CUR)) {
1.55 daniel 3161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3162: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 ! daniel 3163: ctxt->wellFormed = 0;
1.45 daniel 3164: return;
1.3 veillard 3165: }
1.16 daniel 3166:
1.45 daniel 3167: /*
3168: * Ok the segment [base CUR_PTR] is to be consumed as chars.
3169: */
3170: if (ctxt->sax != NULL) {
3171: if (areBlanks(ctxt, base, CUR_PTR - base))
1.59 ! daniel 3172: ctxt->sax->ignorableWhitespace(ctxt, base, 0, (CUR_PTR - base) - 2);
1.45 daniel 3173: else
1.59 ! daniel 3174: ctxt->sax->characters(ctxt, base, 0, (CUR_PTR - base) - 2);
1.45 daniel 3175: }
1.2 veillard 3176: }
3177:
1.50 daniel 3178: /**
3179: * xmlParseContent:
3180: * @ctxt: an XML parser context
3181: *
3182: * Parse a content:
1.2 veillard 3183: *
1.27 daniel 3184: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 3185: */
3186:
1.55 daniel 3187: void
3188: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.2 veillard 3189: xmlNodePtr ret = NULL;
3190:
1.40 daniel 3191: while ((CUR != '<') || (NXT(1) != '/')) {
3192: const CHAR *test = CUR_PTR;
1.27 daniel 3193: ret = NULL;
3194:
3195: /*
3196: * First case : a Processing Instruction.
3197: */
1.40 daniel 3198: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 3199: xmlParsePI(ctxt);
3200: }
3201: /*
3202: * Second case : a CDSection
3203: */
1.40 daniel 3204: else if ((CUR == '<') && (NXT(1) == '!') &&
3205: (NXT(2) == '[') && (NXT(3) == 'C') &&
3206: (NXT(4) == 'D') && (NXT(5) == 'A') &&
3207: (NXT(6) == 'T') && (NXT(7) == 'A') &&
3208: (NXT(8) == '[')) {
1.45 daniel 3209: xmlParseCDSect(ctxt);
1.27 daniel 3210: }
3211: /*
3212: * Third case : a comment
3213: */
1.40 daniel 3214: else if ((CUR == '<') && (NXT(1) == '!') &&
3215: (NXT(2) == '-') && (NXT(3) == '-')) {
1.31 daniel 3216: ret = xmlParseComment(ctxt, 1);
1.27 daniel 3217: }
3218: /*
3219: * Fourth case : a sub-element.
3220: */
1.40 daniel 3221: else if (CUR == '<') {
1.45 daniel 3222: ret = xmlParseElement(ctxt);
3223: }
3224: /*
1.50 daniel 3225: * Fifth case : a reference. If if has not been resolved,
3226: * parsing returns it's Name, create the node
1.45 daniel 3227: */
3228: else if (CUR == '&') {
1.50 daniel 3229: CHAR *val = xmlParseReference(ctxt);
3230: if (val != NULL) {
3231: if (val[0] != '&') {
3232: /*
3233: * inline predefined entity.
3234: */
3235: if (ctxt->sax != NULL)
3236: ctxt->sax->characters(ctxt, val, 0, xmlStrlen(val));
3237: } else {
3238: /*
3239: * user defined entity, create a node.
3240: */
3241: ret = xmlNewReference(ctxt->doc, val);
3242: xmlAddChild(ctxt->node, ret);
3243: }
3244: free(val);
3245: }
1.27 daniel 3246: }
3247: /*
3248: * Last case, text. Note that References are handled directly.
3249: */
3250: else {
1.45 daniel 3251: xmlParseCharData(ctxt, 0);
1.3 veillard 3252: }
1.14 veillard 3253:
3254: /*
1.45 daniel 3255: * Pop-up of finished entities.
1.14 veillard 3256: */
1.45 daniel 3257: while ((CUR == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt);
3258:
1.40 daniel 3259: if (test == CUR_PTR) {
1.55 daniel 3260: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 ! daniel 3261: ctxt->sax->error(ctxt,
! 3262: "detected an error in element content\n");
! 3263: ctxt->wellFormed = 0;
1.29 daniel 3264: break;
3265: }
1.3 veillard 3266: }
1.2 veillard 3267: }
3268:
1.50 daniel 3269: /**
3270: * xmlParseElement:
3271: * @ctxt: an XML parser context
3272: *
3273: * parse an XML element, this is highly recursive
1.26 daniel 3274: *
3275: * [39] element ::= EmptyElemTag | STag content ETag
3276: *
3277: * [41] Attribute ::= Name Eq AttValue
1.50 daniel 3278: * return values: the XML new node or NULL
1.2 veillard 3279: */
1.26 daniel 3280:
1.2 veillard 3281:
1.45 daniel 3282: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 3283: xmlNodePtr ret;
1.40 daniel 3284: const CHAR *openTag = CUR_PTR;
1.32 daniel 3285: xmlParserNodeInfo node_info;
1.27 daniel 3286: CHAR *endTag;
1.34 daniel 3287: xmlNsPtr endNs;
1.2 veillard 3288:
1.32 daniel 3289: /* Capture start position */
1.40 daniel 3290: node_info.begin_pos = CUR_PTR - ctxt->input->base;
3291: node_info.begin_line = ctxt->input->line;
1.32 daniel 3292:
1.16 daniel 3293: ret = xmlParseStartTag(ctxt);
1.3 veillard 3294: if (ret == NULL) {
3295: return(NULL);
3296: }
1.2 veillard 3297:
3298: /*
3299: * Check for an Empty Element.
3300: */
1.40 daniel 3301: if ((CUR == '/') && (NXT(1) == '>')) {
3302: SKIP(2);
1.45 daniel 3303: if (ctxt->sax != NULL)
3304: ctxt->sax->endElement(ctxt, ret->name);
3305:
3306: /*
3307: * end of parsing of this node.
3308: */
3309: nodePop(ctxt);
3310:
1.2 veillard 3311: return(ret);
3312: }
1.40 daniel 3313: if (CUR == '>') NEXT;
1.2 veillard 3314: else {
1.55 daniel 3315: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.57 daniel 3316: ctxt->sax->error(ctxt, "Couldn't find end of Start Tag\n%.30s\n",
3317: openTag);
1.59 ! daniel 3318: ctxt->wellFormed = 0;
1.45 daniel 3319:
3320: /*
3321: * end of parsing of this node.
3322: */
3323: nodePop(ctxt);
3324:
1.16 daniel 3325: return(NULL);
1.2 veillard 3326: }
3327:
3328: /*
3329: * Parse the content of the element:
3330: */
1.45 daniel 3331: xmlParseContent(ctxt);
1.40 daniel 3332: if (!IS_CHAR(CUR)) {
1.55 daniel 3333: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.57 daniel 3334: ctxt->sax->error(ctxt,
3335: "Premature end of data in tag %.30s\n", openTag);
1.59 ! daniel 3336: ctxt->wellFormed = 0;
1.45 daniel 3337:
3338: /*
3339: * end of parsing of this node.
3340: */
3341: nodePop(ctxt);
3342:
1.16 daniel 3343: return(NULL);
1.2 veillard 3344: }
3345:
3346: /*
1.27 daniel 3347: * parse the end of tag: '</' should be here.
1.2 veillard 3348: */
1.34 daniel 3349: xmlParseEndTag(ctxt, &endNs, &endTag);
1.7 veillard 3350:
1.27 daniel 3351: /*
3352: * Check that the Name in the ETag is the same as in the STag.
3353: */
1.34 daniel 3354: if (endNs != ret->ns) {
1.55 daniel 3355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3356: ctxt->sax->error(ctxt,
1.43 daniel 3357: "Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
1.31 daniel 3358: openTag, endTag);
1.59 ! daniel 3359: ctxt->wellFormed = 0;
1.27 daniel 3360: }
1.32 daniel 3361: if (endTag == NULL ) {
1.55 daniel 3362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3363: ctxt->sax->error(ctxt, "The End tag has no name\n%.30s\n", openTag);
1.59 ! daniel 3364: ctxt->wellFormed = 0;
1.45 daniel 3365: } else if (xmlStrcmp(ret->name, endTag)) {
1.55 daniel 3366: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3367: ctxt->sax->error(ctxt,
1.31 daniel 3368: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
3369: openTag, endTag);
1.59 ! daniel 3370: ctxt->wellFormed = 0;
1.27 daniel 3371: }
1.44 daniel 3372: /*
3373: * SAX: End of Tag
3374: */
3375: else if (ctxt->sax != NULL)
3376: ctxt->sax->endElement(ctxt, endTag);
1.7 veillard 3377:
1.44 daniel 3378: if (endTag != NULL)
3379: free(endTag);
1.2 veillard 3380:
1.32 daniel 3381: /* Capture end position and add node */
3382: if ( ret != NULL && ctxt->record_info ) {
1.40 daniel 3383: node_info.end_pos = CUR_PTR - ctxt->input->base;
3384: node_info.end_line = ctxt->input->line;
1.32 daniel 3385: node_info.node = ret;
3386: xmlParserAddNodeInfo(ctxt, &node_info);
3387: }
1.43 daniel 3388:
3389: /*
3390: * end of parsing of this node.
3391: */
3392: nodePop(ctxt);
3393:
1.2 veillard 3394: return(ret);
3395: }
3396:
1.50 daniel 3397: /**
3398: * xmlParseVersionNum:
3399: * @ctxt: an XML parser context
3400: *
3401: * parse the XML version value.
1.29 daniel 3402: *
3403: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.50 daniel 3404: * return values: the string giving the XML version number, or NULL
1.29 daniel 3405: */
1.55 daniel 3406: CHAR *
3407: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 3408: const CHAR *q = CUR_PTR;
1.29 daniel 3409: CHAR *ret;
3410:
1.40 daniel 3411: while (IS_CHAR(CUR) &&
3412: (((CUR >= 'a') && (CUR <= 'z')) ||
3413: ((CUR >= 'A') && (CUR <= 'Z')) ||
3414: ((CUR >= '0') && (CUR <= '9')) ||
3415: (CUR == '_') || (CUR == '.') ||
3416: (CUR == ':') || (CUR == '-'))) NEXT;
3417: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 3418: return(ret);
3419: }
3420:
1.50 daniel 3421: /**
3422: * xmlParseVersionInfo:
3423: * @ctxt: an XML parser context
3424: *
3425: * parse the XML version.
1.29 daniel 3426: *
3427: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
3428: *
3429: * [25] Eq ::= S? '=' S?
1.50 daniel 3430: *
3431: * return values: the version string, e.g. "1.0"
1.29 daniel 3432: */
3433:
1.55 daniel 3434: CHAR *
3435: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 3436: CHAR *version = NULL;
3437: const CHAR *q;
3438:
1.40 daniel 3439: if ((CUR == 'v') && (NXT(1) == 'e') &&
3440: (NXT(2) == 'r') && (NXT(3) == 's') &&
3441: (NXT(4) == 'i') && (NXT(5) == 'o') &&
3442: (NXT(6) == 'n')) {
3443: SKIP(7);
1.42 daniel 3444: SKIP_BLANKS;
1.40 daniel 3445: if (CUR != '=') {
1.55 daniel 3446: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3447: ctxt->sax->error(ctxt, "xmlParseVersionInfo : expected '='\n");
1.59 ! daniel 3448: ctxt->wellFormed = 0;
1.31 daniel 3449: return(NULL);
3450: }
1.40 daniel 3451: NEXT;
1.42 daniel 3452: SKIP_BLANKS;
1.40 daniel 3453: if (CUR == '"') {
3454: NEXT;
3455: q = CUR_PTR;
1.29 daniel 3456: version = xmlParseVersionNum(ctxt);
1.55 daniel 3457: if (CUR != '"') {
3458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 ! daniel 3460: ctxt->wellFormed = 0;
1.55 daniel 3461: } else
1.40 daniel 3462: NEXT;
3463: } else if (CUR == '\''){
3464: NEXT;
3465: q = CUR_PTR;
1.29 daniel 3466: version = xmlParseVersionNum(ctxt);
1.55 daniel 3467: if (CUR != '\'') {
3468: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3469: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 ! daniel 3470: ctxt->wellFormed = 0;
1.55 daniel 3471: } else
1.40 daniel 3472: NEXT;
1.31 daniel 3473: } else {
1.55 daniel 3474: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 ! daniel 3475: ctxt->sax->error(ctxt,
! 3476: "xmlParseVersionInfo : expected ' or \"\n");
! 3477: ctxt->wellFormed = 0;
1.29 daniel 3478: }
3479: }
3480: return(version);
3481: }
3482:
1.50 daniel 3483: /**
3484: * xmlParseEncName:
3485: * @ctxt: an XML parser context
3486: *
3487: * parse the XML encoding name
1.29 daniel 3488: *
3489: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 3490: *
3491: * return values: the encoding name value or NULL
1.29 daniel 3492: */
1.55 daniel 3493: CHAR *
3494: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 3495: const CHAR *q = CUR_PTR;
1.29 daniel 3496: CHAR *ret = NULL;
3497:
1.40 daniel 3498: if (((CUR >= 'a') && (CUR <= 'z')) ||
3499: ((CUR >= 'A') && (CUR <= 'Z'))) {
3500: NEXT;
3501: while (IS_CHAR(CUR) &&
3502: (((CUR >= 'a') && (CUR <= 'z')) ||
3503: ((CUR >= 'A') && (CUR <= 'Z')) ||
3504: ((CUR >= '0') && (CUR <= '9')) ||
3505: (CUR == '-'))) NEXT;
3506: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 3507: } else {
1.55 daniel 3508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3509: ctxt->sax->error(ctxt, "Invalid XML encoding name\n");
1.59 ! daniel 3510: ctxt->wellFormed = 0;
1.29 daniel 3511: }
3512: return(ret);
3513: }
3514:
1.50 daniel 3515: /**
3516: * xmlParseEncodingDecl:
3517: * @ctxt: an XML parser context
3518: *
3519: * parse the XML encoding declaration
1.29 daniel 3520: *
3521: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 3522: *
3523: * TODO: this should setup the conversion filters.
3524: *
3525: * return values: the encoding value or NULL
1.29 daniel 3526: */
3527:
1.55 daniel 3528: CHAR *
3529: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 3530: CHAR *encoding = NULL;
3531: const CHAR *q;
3532:
1.42 daniel 3533: SKIP_BLANKS;
1.40 daniel 3534: if ((CUR == 'e') && (NXT(1) == 'n') &&
3535: (NXT(2) == 'c') && (NXT(3) == 'o') &&
3536: (NXT(4) == 'd') && (NXT(5) == 'i') &&
3537: (NXT(6) == 'n') && (NXT(7) == 'g')) {
3538: SKIP(8);
1.42 daniel 3539: SKIP_BLANKS;
1.40 daniel 3540: if (CUR != '=') {
1.55 daniel 3541: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3542: ctxt->sax->error(ctxt, "xmlParseEncodingDecl : expected '='\n");
1.59 ! daniel 3543: ctxt->wellFormed = 0;
1.31 daniel 3544: return(NULL);
3545: }
1.40 daniel 3546: NEXT;
1.42 daniel 3547: SKIP_BLANKS;
1.40 daniel 3548: if (CUR == '"') {
3549: NEXT;
3550: q = CUR_PTR;
1.29 daniel 3551: encoding = xmlParseEncName(ctxt);
1.55 daniel 3552: if (CUR != '"') {
3553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3554: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 ! daniel 3555: ctxt->wellFormed = 0;
1.55 daniel 3556: } else
1.40 daniel 3557: NEXT;
3558: } else if (CUR == '\''){
3559: NEXT;
3560: q = CUR_PTR;
1.29 daniel 3561: encoding = xmlParseEncName(ctxt);
1.55 daniel 3562: if (CUR != '\'') {
3563: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3564: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 ! daniel 3565: ctxt->wellFormed = 0;
1.55 daniel 3566: } else
1.40 daniel 3567: NEXT;
3568: } else if (CUR == '"'){
1.55 daniel 3569: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 ! daniel 3570: ctxt->sax->error(ctxt,
! 3571: "xmlParseEncodingDecl : expected ' or \"\n");
! 3572: ctxt->wellFormed = 0;
1.29 daniel 3573: }
3574: }
3575: return(encoding);
3576: }
3577:
1.50 daniel 3578: /**
3579: * xmlParseSDDecl:
3580: * @ctxt: an XML parser context
3581: *
3582: * parse the XML standalone declaration
1.29 daniel 3583: *
3584: * [32] SDDecl ::= S 'standalone' Eq
3585: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.50 daniel 3586: * return values: 1 if standalone, 0 otherwise
1.29 daniel 3587: */
3588:
1.55 daniel 3589: int
3590: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 3591: int standalone = -1;
3592:
1.42 daniel 3593: SKIP_BLANKS;
1.40 daniel 3594: if ((CUR == 's') && (NXT(1) == 't') &&
3595: (NXT(2) == 'a') && (NXT(3) == 'n') &&
3596: (NXT(4) == 'd') && (NXT(5) == 'a') &&
3597: (NXT(6) == 'l') && (NXT(7) == 'o') &&
3598: (NXT(8) == 'n') && (NXT(9) == 'e')) {
3599: SKIP(10);
3600: if (CUR != '=') {
1.55 daniel 3601: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 ! daniel 3602: ctxt->sax->error(ctxt,
! 3603: "XML standalone declaration : expected '='\n");
! 3604: ctxt->wellFormed = 0;
1.32 daniel 3605: return(standalone);
3606: }
1.40 daniel 3607: NEXT;
1.42 daniel 3608: SKIP_BLANKS;
1.40 daniel 3609: if (CUR == '\''){
3610: NEXT;
3611: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 3612: standalone = 0;
1.40 daniel 3613: SKIP(2);
3614: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3615: (NXT(2) == 's')) {
1.29 daniel 3616: standalone = 1;
1.40 daniel 3617: SKIP(3);
1.29 daniel 3618: } else {
1.55 daniel 3619: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3620: ctxt->sax->error(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.59 ! daniel 3621: ctxt->wellFormed = 0;
1.29 daniel 3622: }
1.55 daniel 3623: if (CUR != '\'') {
3624: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3625: ctxt->sax->error(ctxt, "String not closed\n");
1.59 ! daniel 3626: ctxt->wellFormed = 0;
1.55 daniel 3627: } else
1.40 daniel 3628: NEXT;
3629: } else if (CUR == '"'){
3630: NEXT;
3631: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 3632: standalone = 0;
1.40 daniel 3633: SKIP(2);
3634: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3635: (NXT(2) == 's')) {
1.29 daniel 3636: standalone = 1;
1.40 daniel 3637: SKIP(3);
1.29 daniel 3638: } else {
1.55 daniel 3639: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 ! daniel 3640: ctxt->sax->error(ctxt,
! 3641: "standalone accepts only 'yes' or 'no'\n");
! 3642: ctxt->wellFormed = 0;
1.29 daniel 3643: }
1.55 daniel 3644: if (CUR != '"') {
3645: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3646: ctxt->sax->error(ctxt, "String not closed\n");
1.59 ! daniel 3647: ctxt->wellFormed = 0;
1.55 daniel 3648: } else
1.40 daniel 3649: NEXT;
1.37 daniel 3650: } else {
1.55 daniel 3651: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3652: ctxt->sax->error(ctxt, "Standalone value not found\n");
1.59 ! daniel 3653: ctxt->wellFormed = 0;
1.37 daniel 3654: }
1.29 daniel 3655: }
3656: return(standalone);
3657: }
3658:
1.50 daniel 3659: /**
3660: * xmlParseXMLDecl:
3661: * @ctxt: an XML parser context
3662: *
3663: * parse an XML declaration header
1.29 daniel 3664: *
3665: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 3666: */
3667:
1.55 daniel 3668: void
3669: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 3670: CHAR *version;
3671:
3672: /*
1.19 daniel 3673: * We know that '<?xml' is here.
1.1 veillard 3674: */
1.40 daniel 3675: SKIP(5);
1.1 veillard 3676:
1.59 ! daniel 3677: if (!IS_BLANK(CUR)) {
! 3678: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3679: ctxt->sax->error(ctxt, "Blank needed after '<?xml'\n");
! 3680: ctxt->wellFormed = 0;
! 3681: }
1.42 daniel 3682: SKIP_BLANKS;
1.1 veillard 3683:
3684: /*
1.29 daniel 3685: * We should have the VersionInfo here.
1.1 veillard 3686: */
1.29 daniel 3687: version = xmlParseVersionInfo(ctxt);
3688: if (version == NULL)
1.45 daniel 3689: version = xmlCharStrdup(XML_DEFAULT_VERSION);
3690: ctxt->doc = xmlNewDoc(version);
3691: free(version);
1.29 daniel 3692:
3693: /*
3694: * We may have the encoding declaration
3695: */
1.59 ! daniel 3696: if (!IS_BLANK(CUR)) {
! 3697: if ((CUR == '?') && (NXT(1) == '>')) {
! 3698: SKIP(2);
! 3699: return;
! 3700: }
! 3701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3702: ctxt->sax->error(ctxt, "Blank needed here\n");
! 3703: ctxt->wellFormed = 0;
! 3704: }
1.32 daniel 3705: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 3706:
3707: /*
1.29 daniel 3708: * We may have the standalone status.
1.1 veillard 3709: */
1.59 ! daniel 3710: if ((ctxt->doc->encoding != NULL) && (!IS_BLANK(CUR))) {
! 3711: if ((CUR == '?') && (NXT(1) == '>')) {
! 3712: SKIP(2);
! 3713: return;
! 3714: }
! 3715: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3716: ctxt->sax->error(ctxt, "Blank needed here\n");
! 3717: ctxt->wellFormed = 0;
! 3718: }
! 3719: SKIP_BLANKS;
1.32 daniel 3720: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 3721:
1.42 daniel 3722: SKIP_BLANKS;
1.40 daniel 3723: if ((CUR == '?') && (NXT(1) == '>')) {
3724: SKIP(2);
3725: } else if (CUR == '>') {
1.31 daniel 3726: /* Deprecated old WD ... */
1.55 daniel 3727: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3728: ctxt->sax->error(ctxt, "XML declaration must end-up with '?>'\n");
1.59 ! daniel 3729: ctxt->wellFormed = 0;
1.40 daniel 3730: NEXT;
1.29 daniel 3731: } else {
1.55 daniel 3732: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3733: ctxt->sax->error(ctxt, "parsing XML declaration: '?>' expected\n");
1.59 ! daniel 3734: ctxt->wellFormed = 0;
1.40 daniel 3735: MOVETO_ENDTAG(CUR_PTR);
3736: NEXT;
1.29 daniel 3737: }
1.1 veillard 3738: }
3739:
1.50 daniel 3740: /**
3741: * xmlParseMisc:
3742: * @ctxt: an XML parser context
3743: *
3744: * parse an XML Misc* optionnal field.
1.21 daniel 3745: *
1.22 daniel 3746: * [27] Misc ::= Comment | PI | S
1.1 veillard 3747: */
3748:
1.55 daniel 3749: void
3750: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 3751: while (((CUR == '<') && (NXT(1) == '?')) ||
3752: ((CUR == '<') && (NXT(1) == '!') &&
3753: (NXT(2) == '-') && (NXT(3) == '-')) ||
3754: IS_BLANK(CUR)) {
3755: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 3756: xmlParsePI(ctxt);
1.40 daniel 3757: } else if (IS_BLANK(CUR)) {
3758: NEXT;
1.1 veillard 3759: } else
1.31 daniel 3760: xmlParseComment(ctxt, 0);
1.1 veillard 3761: }
3762: }
3763:
1.50 daniel 3764: /**
3765: * xmlParseDocument :
3766: * @ctxt: an XML parser context
3767: *
3768: * parse an XML document (and build a tree if using the standard SAX
3769: * interface).
1.21 daniel 3770: *
1.22 daniel 3771: * [1] document ::= prolog element Misc*
1.29 daniel 3772: *
3773: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 3774: *
3775: * return values: 0, -1 in case of error. the parser context is augmented
3776: * as a result of the parsing.
1.1 veillard 3777: */
3778:
1.55 daniel 3779: int
3780: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 3781: xmlDefaultSAXHandlerInit();
3782:
1.14 veillard 3783: /*
1.44 daniel 3784: * SAX: beginning of the document processing.
3785: */
3786: if (ctxt->sax)
3787: ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
3788: if (ctxt->sax)
3789: ctxt->sax->startDocument(ctxt);
3790:
3791: /*
1.14 veillard 3792: * We should check for encoding here and plug-in some
3793: * conversion code TODO !!!!
3794: */
1.1 veillard 3795:
3796: /*
3797: * Wipe out everything which is before the first '<'
3798: */
1.59 ! daniel 3799: if (IS_BLANK(CUR)) {
! 3800: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3801: ctxt->sax->error(ctxt,
! 3802: "Extra spaces at the beginning of the document are not allowed\n");
! 3803: ctxt->wellFormed = 0;
! 3804: SKIP_BLANKS;
! 3805: }
! 3806:
! 3807: if (CUR == 0) {
! 3808: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3809: ctxt->sax->error(ctxt, "Document is empty\n");
! 3810: ctxt->wellFormed = 0;
! 3811: }
1.1 veillard 3812:
3813: /*
3814: * Check for the XMLDecl in the Prolog.
3815: */
1.40 daniel 3816: if ((CUR == '<') && (NXT(1) == '?') &&
3817: (NXT(2) == 'x') && (NXT(3) == 'm') &&
3818: (NXT(4) == 'l')) {
1.19 daniel 3819: xmlParseXMLDecl(ctxt);
3820: /* SKIP_EOL(cur); */
1.42 daniel 3821: SKIP_BLANKS;
1.40 daniel 3822: } else if ((CUR == '<') && (NXT(1) == '?') &&
3823: (NXT(2) == 'X') && (NXT(3) == 'M') &&
3824: (NXT(4) == 'L')) {
1.19 daniel 3825: /*
3826: * The first drafts were using <?XML and the final W3C REC
3827: * now use <?xml ...
3828: */
1.16 daniel 3829: xmlParseXMLDecl(ctxt);
1.1 veillard 3830: /* SKIP_EOL(cur); */
1.42 daniel 3831: SKIP_BLANKS;
1.1 veillard 3832: } else {
1.45 daniel 3833: CHAR *version;
3834:
3835: version = xmlCharStrdup(XML_DEFAULT_VERSION);
3836: ctxt->doc = xmlNewDoc(version);
3837: free(version);
1.1 veillard 3838: }
3839:
3840: /*
3841: * The Misc part of the Prolog
3842: */
1.16 daniel 3843: xmlParseMisc(ctxt);
1.1 veillard 3844:
3845: /*
1.29 daniel 3846: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 3847: * (doctypedecl Misc*)?
3848: */
1.40 daniel 3849: if ((CUR == '<') && (NXT(1) == '!') &&
3850: (NXT(2) == 'D') && (NXT(3) == 'O') &&
3851: (NXT(4) == 'C') && (NXT(5) == 'T') &&
3852: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
3853: (NXT(8) == 'E')) {
1.22 daniel 3854: xmlParseDocTypeDecl(ctxt);
3855: xmlParseMisc(ctxt);
1.21 daniel 3856: }
3857:
3858: /*
3859: * Time to start parsing the tree itself
1.1 veillard 3860: */
1.45 daniel 3861: ctxt->doc->root = xmlParseElement(ctxt);
1.33 daniel 3862:
3863: /*
3864: * The Misc part at the end
3865: */
3866: xmlParseMisc(ctxt);
1.16 daniel 3867:
1.59 ! daniel 3868: if (CUR != 0) {
! 3869: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3870: ctxt->sax->error(ctxt,
! 3871: "Extra content at the end of the document\n");
! 3872: ctxt->wellFormed = 0;
! 3873: }
! 3874:
1.44 daniel 3875: /*
3876: * SAX: end of the document processing.
3877: */
3878: if (ctxt->sax)
3879: ctxt->sax->endDocument(ctxt);
1.59 ! daniel 3880: if (! ctxt->wellFormed) return(-1);
1.16 daniel 3881: return(0);
3882: }
3883:
1.50 daniel 3884: /**
1.55 daniel 3885: * xmlSAXParseDoc :
3886: * @sax: the SAX handler block
1.50 daniel 3887: * @cur: a pointer to an array of CHAR
1.59 ! daniel 3888: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
! 3889: * documents
1.50 daniel 3890: *
3891: * parse an XML in-memory document and build a tree.
1.55 daniel 3892: * It use the given SAX function block to handle the parsing callback.
3893: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 3894: *
3895: * return values: the resulting document tree
1.16 daniel 3896: */
3897:
1.59 ! daniel 3898: xmlDocPtr xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
1.16 daniel 3899: xmlDocPtr ret;
3900: xmlParserCtxtPtr ctxt;
1.40 daniel 3901: xmlParserInputPtr input;
1.16 daniel 3902:
3903: if (cur == NULL) return(NULL);
1.1 veillard 3904:
1.16 daniel 3905: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
3906: if (ctxt == NULL) {
3907: perror("malloc");
3908: return(NULL);
3909: }
1.40 daniel 3910: xmlInitParserCtxt(ctxt);
1.56 daniel 3911: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 3912: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
3913: if (input == NULL) {
3914: perror("malloc");
3915: free(ctxt);
3916: return(NULL);
3917: }
3918:
3919: input->filename = NULL;
3920: input->line = 1;
3921: input->col = 1;
3922: input->base = cur;
3923: input->cur = cur;
3924:
3925: inputPush(ctxt, input);
1.16 daniel 3926:
3927:
3928: xmlParseDocument(ctxt);
1.59 ! daniel 3929: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
! 3930: else {
! 3931: ret = NULL;
! 3932: xmlFreeDoc(ctxt->doc);
! 3933: ctxt->doc = NULL;
! 3934: }
1.50 daniel 3935: free(ctxt->nodeTab);
3936: free(ctxt->inputTab);
3937: if (input->filename != NULL)
1.51 daniel 3938: free((char *)input->filename);
1.50 daniel 3939: free(input);
1.16 daniel 3940: free(ctxt);
3941:
1.1 veillard 3942: return(ret);
3943: }
3944:
1.50 daniel 3945: /**
1.55 daniel 3946: * xmlParseDoc :
3947: * @cur: a pointer to an array of CHAR
3948: *
3949: * parse an XML in-memory document and build a tree.
3950: *
3951: * return values: the resulting document tree
3952: */
3953:
3954: xmlDocPtr xmlParseDoc(CHAR *cur) {
1.59 ! daniel 3955: return(xmlSAXParseDoc(NULL, cur, 0));
! 3956: }
! 3957:
! 3958: /**
! 3959: * xmlRecoverDoc :
! 3960: * @cur: a pointer to an array of CHAR
! 3961: *
! 3962: * parse an XML in-memory document and build a tree.
! 3963: * In the case the document is not Well Formed, a tree is built anyway
! 3964: *
! 3965: * return values: the resulting document tree
! 3966: */
! 3967:
! 3968: xmlDocPtr xmlRecoverDoc(CHAR *cur) {
! 3969: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 3970: }
3971:
3972: /**
3973: * xmlSAXParseFile :
3974: * @sax: the SAX handler block
1.50 daniel 3975: * @filename: the filename
1.59 ! daniel 3976: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
! 3977: * documents
1.50 daniel 3978: *
3979: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
3980: * compressed document is provided by default if found at compile-time.
1.55 daniel 3981: * It use the given SAX function block to handle the parsing callback.
3982: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 3983: *
3984: * return values: the resulting document tree
1.9 httpng 3985: */
3986:
1.59 ! daniel 3987: xmlDocPtr xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
! 3988: int recovery) {
1.9 httpng 3989: xmlDocPtr ret;
1.20 daniel 3990: #ifdef HAVE_ZLIB_H
3991: gzFile input;
3992: #else
1.9 httpng 3993: int input;
1.20 daniel 3994: #endif
1.9 httpng 3995: int res;
1.55 daniel 3996: int len;
1.9 httpng 3997: struct stat buf;
3998: char *buffer;
1.16 daniel 3999: xmlParserCtxtPtr ctxt;
1.40 daniel 4000: xmlParserInputPtr inputStream;
1.9 httpng 4001:
1.11 veillard 4002: res = stat(filename, &buf);
1.9 httpng 4003: if (res < 0) return(NULL);
4004:
1.20 daniel 4005: #ifdef HAVE_ZLIB_H
1.55 daniel 4006: len = (buf.st_size * 8) + 1000;
1.20 daniel 4007: retry_bigger:
1.55 daniel 4008: buffer = malloc(len);
1.20 daniel 4009: #else
1.55 daniel 4010: len = buf.st_size + 100;
4011: buffer = malloc(len);
1.20 daniel 4012: #endif
1.9 httpng 4013: if (buffer == NULL) {
4014: perror("malloc");
4015: return(NULL);
4016: }
4017:
1.55 daniel 4018: memset(buffer, 0, len);
1.20 daniel 4019: #ifdef HAVE_ZLIB_H
4020: input = gzopen (filename, "r");
4021: if (input == NULL) {
4022: fprintf (stderr, "Cannot read file %s :\n", filename);
4023: perror ("gzopen failed");
4024: return(NULL);
4025: }
4026: #else
1.9 httpng 4027: input = open (filename, O_RDONLY);
4028: if (input < 0) {
4029: fprintf (stderr, "Cannot read file %s :\n", filename);
4030: perror ("open failed");
4031: return(NULL);
4032: }
1.20 daniel 4033: #endif
4034: #ifdef HAVE_ZLIB_H
1.55 daniel 4035: res = gzread(input, buffer, len);
1.20 daniel 4036: #else
1.9 httpng 4037: res = read(input, buffer, buf.st_size);
1.20 daniel 4038: #endif
1.9 httpng 4039: if (res < 0) {
4040: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 4041: #ifdef HAVE_ZLIB_H
4042: perror ("gzread failed");
4043: #else
1.9 httpng 4044: perror ("read failed");
1.20 daniel 4045: #endif
1.9 httpng 4046: return(NULL);
4047: }
1.20 daniel 4048: #ifdef HAVE_ZLIB_H
4049: gzclose(input);
1.55 daniel 4050: if (res >= len) {
1.20 daniel 4051: free(buffer);
1.55 daniel 4052: len *= 2;
1.20 daniel 4053: goto retry_bigger;
4054: }
4055: buf.st_size = res;
4056: #else
1.9 httpng 4057: close(input);
1.20 daniel 4058: #endif
4059:
1.40 daniel 4060: buffer[buf.st_size] = '\0';
1.9 httpng 4061:
1.16 daniel 4062: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4063: if (ctxt == NULL) {
4064: perror("malloc");
4065: return(NULL);
4066: }
1.40 daniel 4067: xmlInitParserCtxt(ctxt);
1.56 daniel 4068: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 4069: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4070: if (inputStream == NULL) {
4071: perror("malloc");
4072: free(ctxt);
4073: return(NULL);
4074: }
4075:
4076: inputStream->filename = strdup(filename);
4077: inputStream->line = 1;
4078: inputStream->col = 1;
1.45 daniel 4079:
4080: /*
4081: * TODO : plug some encoding conversion routines here. !!!
4082: */
1.40 daniel 4083: inputStream->base = buffer;
4084: inputStream->cur = buffer;
1.16 daniel 4085:
1.40 daniel 4086: inputPush(ctxt, inputStream);
1.16 daniel 4087:
4088: xmlParseDocument(ctxt);
1.40 daniel 4089:
1.59 ! daniel 4090: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
! 4091: else {
! 4092: ret = NULL;
! 4093: xmlFreeDoc(ctxt->doc);
! 4094: ctxt->doc = NULL;
! 4095: }
1.9 httpng 4096: free(buffer);
1.50 daniel 4097: free(ctxt->nodeTab);
4098: free(ctxt->inputTab);
4099: if (inputStream->filename != NULL)
1.51 daniel 4100: free((char *)inputStream->filename);
1.50 daniel 4101: free(inputStream);
1.20 daniel 4102: free(ctxt);
4103:
4104: return(ret);
4105: }
4106:
1.55 daniel 4107: /**
4108: * xmlParseFile :
4109: * @filename: the filename
4110: *
4111: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4112: * compressed document is provided by default if found at compile-time.
4113: *
4114: * return values: the resulting document tree
4115: */
4116:
4117: xmlDocPtr xmlParseFile(const char *filename) {
1.59 ! daniel 4118: return(xmlSAXParseFile(NULL, filename, 0));
! 4119: }
! 4120:
! 4121: /**
! 4122: * xmlRecoverFile :
! 4123: * @filename: the filename
! 4124: *
! 4125: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
! 4126: * compressed document is provided by default if found at compile-time.
! 4127: * In the case the document is not Well Formed, a tree is built anyway
! 4128: *
! 4129: * return values: the resulting document tree
! 4130: */
! 4131:
! 4132: xmlDocPtr xmlRecoverFile(const char *filename) {
! 4133: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 4134: }
1.32 daniel 4135:
1.50 daniel 4136: /**
1.55 daniel 4137: * xmlSAXParseMemory :
4138: * @sax: the SAX handler block
1.50 daniel 4139: * @cur: an pointer to a char array
4140: * @size: the siwe of the array
1.59 ! daniel 4141: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
! 4142: * documents
1.50 daniel 4143: *
1.55 daniel 4144: * parse an XML in-memory block and use the given SAX function block
4145: * to handle the parsing callback. If sax is NULL, fallback to the default
4146: * DOM tree building routines.
1.50 daniel 4147: *
4148: * TODO : plug some encoding conversion routines here. !!!
4149: *
4150: * return values: the resulting document tree
1.20 daniel 4151: */
1.50 daniel 4152:
1.59 ! daniel 4153: xmlDocPtr xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size,
! 4154: int recovery) {
1.20 daniel 4155: xmlDocPtr ret;
4156: xmlParserCtxtPtr ctxt;
1.40 daniel 4157: xmlParserInputPtr input;
4158:
4159: buffer[size - 1] = '\0';
4160:
1.20 daniel 4161: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4162: if (ctxt == NULL) {
4163: perror("malloc");
4164: return(NULL);
4165: }
1.40 daniel 4166: xmlInitParserCtxt(ctxt);
1.56 daniel 4167: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 4168: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4169: if (input == NULL) {
4170: perror("malloc");
1.50 daniel 4171: free(ctxt->nodeTab);
4172: free(ctxt->inputTab);
1.40 daniel 4173: free(ctxt);
4174: return(NULL);
4175: }
1.20 daniel 4176:
1.40 daniel 4177: input->filename = NULL;
4178: input->line = 1;
4179: input->col = 1;
1.45 daniel 4180:
4181: /*
4182: * TODO : plug some encoding conversion routines here. !!!
4183: */
1.40 daniel 4184: input->base = buffer;
4185: input->cur = buffer;
1.20 daniel 4186:
1.40 daniel 4187: inputPush(ctxt, input);
1.20 daniel 4188:
4189: xmlParseDocument(ctxt);
1.40 daniel 4190:
1.59 ! daniel 4191: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
! 4192: else {
! 4193: ret = NULL;
! 4194: xmlFreeDoc(ctxt->doc);
! 4195: ctxt->doc = NULL;
! 4196: }
1.50 daniel 4197: free(ctxt->nodeTab);
4198: free(ctxt->inputTab);
4199: if (input->filename != NULL)
1.51 daniel 4200: free((char *)input->filename);
1.50 daniel 4201: free(input);
1.16 daniel 4202: free(ctxt);
4203:
1.9 httpng 4204: return(ret);
1.17 daniel 4205: }
4206:
1.55 daniel 4207: /**
4208: * xmlParseMemory :
4209: * @cur: an pointer to a char array
4210: * @size: the size of the array
4211: *
4212: * parse an XML in-memory block and build a tree.
4213: *
4214: * return values: the resulting document tree
4215: */
4216:
4217: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 ! daniel 4218: return(xmlSAXParseMemory(NULL, buffer, size, 0));
! 4219: }
! 4220:
! 4221: /**
! 4222: * xmlRecoverMemory :
! 4223: * @cur: an pointer to a char array
! 4224: * @size: the size of the array
! 4225: *
! 4226: * parse an XML in-memory block and build a tree.
! 4227: * In the case the document is not Well Formed, a tree is built anyway
! 4228: *
! 4229: * return values: the resulting document tree
! 4230: */
! 4231:
! 4232: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
! 4233: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.55 daniel 4234: }
1.17 daniel 4235:
1.50 daniel 4236: /**
4237: * xmlInitParserCtxt:
4238: * @ctxt: an XML parser context
4239: *
4240: * Initialize a parser context
4241: */
4242:
1.55 daniel 4243: void
4244: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 4245: {
1.40 daniel 4246: /* Allocate the Input stack */
4247: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
4248: ctxt->inputNr = 0;
4249: ctxt->inputMax = 5;
4250: ctxt->input = NULL;
4251:
1.43 daniel 4252: /* Allocate the Node stack */
4253: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
4254: ctxt->nodeNr = 0;
4255: ctxt->nodeMax = 10;
4256: ctxt->node = NULL;
4257:
1.45 daniel 4258: ctxt->sax = &xmlDefaultSAXHandler;
1.32 daniel 4259: ctxt->doc = NULL;
1.59 ! daniel 4260: ctxt->wellFormed = 1;
1.32 daniel 4261: ctxt->record_info = 0;
4262: xmlInitNodeInfoSeq(&ctxt->node_seq);
1.17 daniel 4263: }
4264:
1.50 daniel 4265: /**
4266: * xmlClearParserCtxt:
4267: * @ctxt: an XML parser context
4268: *
4269: * Clear (release owned resources) and reinitialize a parser context
4270: */
1.17 daniel 4271:
1.55 daniel 4272: void
4273: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 4274: {
1.32 daniel 4275: xmlClearNodeInfoSeq(&ctxt->node_seq);
4276: xmlInitParserCtxt(ctxt);
1.17 daniel 4277: }
4278:
4279:
1.50 daniel 4280: /**
4281: * xmlSetupParserForBuffer:
4282: * @ctxt: an XML parser context
4283: * @buffer: a CHAR * buffer
4284: * @filename: a file name
4285: *
1.19 daniel 4286: * Setup the parser context to parse a new buffer; Clears any prior
4287: * contents from the parser context. The buffer parameter must not be
4288: * NULL, but the filename parameter can be
4289: */
1.50 daniel 4290:
1.55 daniel 4291: void
4292: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 4293: const char* filename)
4294: {
1.40 daniel 4295: xmlParserInputPtr input;
4296:
4297: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4298: if (input == NULL) {
4299: perror("malloc");
4300: free(ctxt);
4301: exit(1);
4302: }
4303:
1.17 daniel 4304: xmlClearParserCtxt(ctxt);
1.40 daniel 4305: if (input->filename != NULL)
4306: input->filename = strdup(filename);
4307: else
4308: input->filename = NULL;
4309: input->line = 1;
4310: input->col = 1;
4311: input->base = buffer;
4312: input->cur = buffer;
4313:
4314: inputPush(ctxt, input);
1.17 daniel 4315: }
4316:
1.32 daniel 4317:
1.50 daniel 4318: /**
4319: * xmlParserFindNodeInfo:
4320: * @ctxt: an XML parser context
4321: * @node: an XML node within the tree
4322: *
4323: * Find the parser node info struct for a given node
4324: *
4325: * return values: an xmlParserNodeInfo block pointer or NULL
1.32 daniel 4326: */
4327: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
4328: const xmlNode* node)
4329: {
4330: unsigned long pos;
4331:
4332: /* Find position where node should be at */
4333: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
4334: if ( ctx->node_seq.buffer[pos].node == node )
4335: return &ctx->node_seq.buffer[pos];
4336: else
4337: return NULL;
4338: }
4339:
4340:
1.50 daniel 4341: /**
4342: * xmlInitNodeInfoSeq :
4343: * @seq: a node info sequence pointer
4344: *
4345: * -- Initialize (set to initial state) node info sequence
1.32 daniel 4346: */
1.55 daniel 4347: void
4348: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 4349: {
4350: seq->length = 0;
4351: seq->maximum = 0;
4352: seq->buffer = NULL;
4353: }
4354:
1.50 daniel 4355: /**
4356: * xmlClearNodeInfoSeq :
4357: * @seq: a node info sequence pointer
4358: *
4359: * -- Clear (release memory and reinitialize) node
1.32 daniel 4360: * info sequence
4361: */
1.55 daniel 4362: void
4363: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 4364: {
4365: if ( seq->buffer != NULL )
4366: free(seq->buffer);
4367: xmlInitNodeInfoSeq(seq);
4368: }
4369:
4370:
1.50 daniel 4371: /**
4372: * xmlParserFindNodeInfoIndex:
4373: * @seq: a node info sequence pointer
4374: * @node: an XML node pointer
4375: *
4376: *
1.32 daniel 4377: * xmlParserFindNodeInfoIndex : Find the index that the info record for
4378: * the given node is or should be at in a sorted sequence
1.50 daniel 4379: * return values: a long indicating the position of the record
1.32 daniel 4380: */
4381: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
4382: const xmlNode* node)
4383: {
4384: unsigned long upper, lower, middle;
4385: int found = 0;
4386:
4387: /* Do a binary search for the key */
4388: lower = 1;
4389: upper = seq->length;
4390: middle = 0;
4391: while ( lower <= upper && !found) {
4392: middle = lower + (upper - lower) / 2;
4393: if ( node == seq->buffer[middle - 1].node )
4394: found = 1;
4395: else if ( node < seq->buffer[middle - 1].node )
4396: upper = middle - 1;
4397: else
4398: lower = middle + 1;
4399: }
4400:
4401: /* Return position */
4402: if ( middle == 0 || seq->buffer[middle - 1].node < node )
4403: return middle;
4404: else
4405: return middle - 1;
4406: }
4407:
4408:
1.50 daniel 4409: /**
4410: * xmlParserAddNodeInfo:
4411: * @ctxt: an XML parser context
4412: * @seq: a node info sequence pointer
4413: *
4414: * Insert node info record into the sorted sequence
1.32 daniel 4415: */
1.55 daniel 4416: void
4417: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.32 daniel 4418: const xmlParserNodeInfo* info)
4419: {
4420: unsigned long pos;
4421: static unsigned int block_size = 5;
4422:
4423: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 4424: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
4425: if ( pos < ctxt->node_seq.length
4426: && ctxt->node_seq.buffer[pos].node == info->node ) {
4427: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 4428: }
4429:
4430: /* Otherwise, we need to add new node to buffer */
4431: else {
4432: /* Expand buffer by 5 if needed */
1.55 daniel 4433: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 4434: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 4435: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
4436: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 4437:
1.55 daniel 4438: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 4439: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
4440: else
1.55 daniel 4441: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 4442:
4443: if ( tmp_buffer == NULL ) {
1.55 daniel 4444: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 4445: ctxt->sax->error(ctxt, "Out of memory\n");
1.32 daniel 4446: return;
4447: }
1.55 daniel 4448: ctxt->node_seq.buffer = tmp_buffer;
4449: ctxt->node_seq.maximum += block_size;
1.32 daniel 4450: }
4451:
4452: /* If position is not at end, move elements out of the way */
1.55 daniel 4453: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 4454: unsigned long i;
4455:
1.55 daniel 4456: for ( i = ctxt->node_seq.length; i > pos; i-- )
4457: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 4458: }
4459:
4460: /* Copy element and increase length */
1.55 daniel 4461: ctxt->node_seq.buffer[pos] = *info;
4462: ctxt->node_seq.length++;
1.32 daniel 4463: }
4464: }
Webmaster