Annotation of XML/parser.c, revision 1.28
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.28 ! daniel 6: * $Id: parser.c,v 1.27 1998/08/03 20:06:17 daniel Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.1 veillard 18: #include <malloc.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
34: /*
35: * A few macros needed to help building the parser.
36: */
37:
38: #ifdef UNICODE
39: /*
1.14 veillard 40: * UNICODE version of the macros. Incomplete now TODO !!!!
1.22 daniel 41: *
42: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
43: * | [#x10000-#x10FFFF]
44: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 45: */
46: #define IS_CHAR(c) \
47: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
48: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
49:
1.22 daniel 50: /*
51: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
52: */
1.1 veillard 53: #define SKIP_BLANKS(p) \
54: while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \
1.22 daniel 55: (*(p) == 0xd) || (*(p) == 0x3000)) (p)++;
1.1 veillard 56:
1.22 daniel 57: /*
58: * I'm too lazy to complete this one TODO !!!!
59: *
60: * [85] BaseChar ::= ... long list see REC ...
61: */
1.1 veillard 62: #define IS_BASECHAR(c) \
63: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
64: (((c) >= 0x61) && ((c) <= 0x7a)) || \
65: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
66: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
67: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
68: (((c) >= 0xf8) && ((c) <= 0xff)) || \
69: ((c) == 0xba))
70:
1.22 daniel 71: /*
72: * I'm too lazy to complete this one TODO !!!!
73: *
74: * [88] Digit ::= ... long list see REC ...
75: */
1.1 veillard 76: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
77:
1.22 daniel 78: /*
79: * I'm too lazy to complete this one TODO !!!!
80: *
81: * [87] CombiningChar ::= ... long list see REC ...
82: */
1.1 veillard 83: #define IS_COMBINING(c) 0
84:
1.22 daniel 85: /*
86: * Was in old WD ... removed from REC
87: *
1.3 veillard 88: #define IS_IGNORABLE(c) \
89: ((((c) >= 0x200c) && ((c) <= 0x200f)) || \
90: (((c) >= 0x202a) && ((c) <= 0x202e)) || \
91: (((c) >= 0x206a) && ((c) <= 0x206f)) || \
92: ((c) == 0xfeff))
1.22 daniel 93: */
1.3 veillard 94:
1.22 daniel 95: /*
96: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
97: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
98: * [#x309D-#x309E] | [#x30FC-#x30FE]
99: */
1.3 veillard 100: #define IS_EXTENDER(c) \
101: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
102: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
103: ((c) == 0xec6) || ((c) == 0x3005) \
104: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
105: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 106: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 107:
1.22 daniel 108: /*
109: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
110: */
1.1 veillard 111: #define IS_IDEOGRAPHIC(c) \
112: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
113: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
114: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
115: ((c) == 0x3007))
116:
1.22 daniel 117: /*
118: * [84] Letter ::= BaseChar | Ideographic
119: */
1.1 veillard 120: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
121:
122: #else
123: /*
1.3 veillard 124: * 8bits / ASCII version of the macros.
1.22 daniel 125: *
126: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
127: * | [#x10000-#x10FFFF]
128: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 129: */
130: #define IS_CHAR(c) \
1.21 daniel 131: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
132: ((c) == 0xa))
1.1 veillard 133:
1.22 daniel 134: /*
135: * [85] BaseChar ::= ... long list see REC ...
136: */
1.1 veillard 137: #define IS_BASECHAR(c) \
138: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
139: (((c) >= 0x61) && ((c) <= 0x7a)) || \
140: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
141: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
142: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
143: (((c) >= 0xf8) && ((c) <= 0xff)) || \
144: ((c) == 0xba))
145:
1.22 daniel 146: /*
147: * [88] Digit ::= ... long list see REC ...
148: */
1.1 veillard 149: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
150:
1.22 daniel 151: /*
152: * [84] Letter ::= BaseChar | Ideographic
153: */
1.1 veillard 154: #define IS_LETTER(c) IS_BASECHAR(c)
155:
1.22 daniel 156:
157: /*
158: * [87] CombiningChar ::= ... long list see REC ...
159: */
1.1 veillard 160: #define IS_COMBINING(c) 0
161:
1.22 daniel 162: /*
163: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
164: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
165: * [#x309D-#x309E] | [#x30FC-#x30FE]
166: */
1.3 veillard 167: #define IS_EXTENDER(c) ((c) == 0xb7)
168:
1.21 daniel 169: #endif /* !UNICODE */
1.1 veillard 170:
1.22 daniel 171: /*
172: * Blank chars.
173: *
174: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
175: */
176: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
177: ((c) == 0x0D))
178:
179: /*
180: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
181: */
1.21 daniel 182: #define IS_PUBIDCHAR(c) \
183: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
184: (((c) >= 'a') && ((c) <= 'z')) || \
185: (((c) >= 'A') && ((c) <= 'Z')) || \
186: (((c) >= '0') && ((c) <= '9')) || \
187: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
188: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
189: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
190: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
191: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 192:
193: #define SKIP_EOL(p) \
194: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
195: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
196:
197: #define SKIP_BLANKS(p) \
198: while (IS_BLANK(*(p))) (p)++;
199:
200: #define MOVETO_ENDTAG(p) \
201: while (IS_CHAR(*p) && (*(p) != '>')) (p)++;
202:
203: #define MOVETO_STARTTAG(p) \
204: while (IS_CHAR(*p) && (*(p) != '<')) (p)++;
205:
206: /*
1.3 veillard 207: * Forward definition for recusive behaviour.
208: */
1.16 daniel 209: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.3 veillard 210:
211: /*
212: * xmlHandleData : this routine represent's the specific application
213: * behaviour when reading a piece of text.
214: *
215: * For example in WebDav, any piece made only of blanks is eliminated
216: */
217:
218: CHAR *xmlHandleData(CHAR *in) {
219: CHAR *cur;
220:
221: if (in == NULL) return(NULL);
222: cur = in;
223: while (IS_CHAR(*cur)) {
224: if (!IS_BLANK(*cur)) goto not_blank;
225: cur++;
226: }
227: free(in);
228: return(NULL);
229:
230: not_blank:
231: return(in);
232: }
233:
1.28 ! daniel 234: /************************************************************************
! 235: * *
! 236: * Commodity functions to handle CHARs *
! 237: * *
! 238: ************************************************************************/
! 239:
1.3 veillard 240: /*
1.1 veillard 241: * xmlStrndup : a strdup for array of CHAR's
242: */
243:
1.6 httpng 244: CHAR *xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 245: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
246:
247: if (ret == NULL) {
248: fprintf(stderr, "malloc of %d byte failed\n",
249: (len + 1) * sizeof(CHAR));
250: return(NULL);
251: }
252: memcpy(ret, cur, len * sizeof(CHAR));
253: ret[len] = 0;
254: return(ret);
255: }
256:
257: /*
258: * xmlStrdup : a strdup for CHAR's
259: */
260:
1.6 httpng 261: CHAR *xmlStrdup(const CHAR *cur) {
262: const CHAR *p = cur;
1.1 veillard 263:
264: while (IS_CHAR(*p)) p++;
265: return(xmlStrndup(cur, p - cur));
266: }
267:
268: /*
1.14 veillard 269: * xmlStrcmp : a strcmp for CHAR's
270: */
271:
272: int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
273: register int tmp;
274:
275: do {
276: tmp = *str1++ - *str2++;
277: if (tmp != 0) return(tmp);
278: } while ((*str1 != 0) && (*str2 != 0));
279: return (*str1 - *str2);
280: }
281:
282: /*
283: * xmlStrncmp : a strncmp for CHAR's
284: */
285:
286: int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
287: register int tmp;
288:
289: if (len <= 0) return(0);
290: do {
291: tmp = *str1++ - *str2++;
292: if (tmp != 0) return(tmp);
293: len--;
294: if (len <= 0) return(0);
295: } while ((*str1 != 0) && (*str2 != 0));
296: return (*str1 - *str2);
297: }
298:
299: /*
300: * xmlStrchr : a strchr for CHAR's
301: */
302:
303: CHAR *xmlStrchr(const CHAR *str, CHAR val) {
304: while (*str != 0) {
305: if (*str == val) return((CHAR *) str);
306: str++;
307: }
308: return(NULL);
309: }
1.28 ! daniel 310:
! 311: /************************************************************************
! 312: * *
! 313: * Extra stuff for namespace support *
! 314: * Relates to http://www.w3.org/TR/WD-xml-names *
! 315: * *
! 316: ************************************************************************/
! 317:
! 318: /*
! 319: * xmlNamespaceParseNCName : parse an XML namespace name.
! 320: *
! 321: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
! 322: *
! 323: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
! 324: * CombiningChar | Extender
! 325: */
! 326:
! 327: CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
! 328: const CHAR *q;
! 329: CHAR *ret = NULL;
! 330:
! 331: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
! 332: q = ctxt->cur++;
! 333:
! 334: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
! 335: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
! 336: (ctxt->cur[0] == '_') ||
! 337: (IS_COMBINING(ctxt->cur[0])) ||
! 338: (IS_EXTENDER(ctxt->cur[0])))
! 339: ctxt->cur++;
! 340:
! 341: ret = xmlStrndup(q, ctxt->cur - q);
! 342:
! 343: return(ret);
! 344: }
! 345:
! 346: /*
! 347: * xmlNamespaceParseQName : parse an XML qualified name
! 348: *
! 349: * [NS 5] QName ::= (Prefix ':')? LocalPart
! 350: *
! 351: * [NS 6] Prefix ::= NCName
! 352: *
! 353: * [NS 7] LocalPart ::= NCName
! 354: */
! 355:
! 356: CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
! 357: CHAR *ret = NULL;
! 358:
! 359: *prefix = NULL;
! 360: ret = xmlNamespaceParseNCName(ctxt);
! 361: if (ctxt->cur[0] == ':') {
! 362: *prefix = ret;
! 363: ctxt->cur++;
! 364: ret = xmlNamespaceParseNCName(ctxt);
! 365: }
! 366:
! 367: return(ret);
! 368: }
! 369:
! 370: /*
! 371: * xmlNamespaceParseNSDef : parse a namespace prefix declaration
! 372: *
! 373: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
! 374: *
! 375: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
! 376: */
! 377:
! 378: void xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
! 379: CHAR *name = NULL;
! 380:
! 381: if ((ctxt->cur[0] == 'x') && (ctxt->cur[1] == 'm') &&
! 382: (ctxt->cur[2] == 'l') && (ctxt->cur[3] == 'n') &&
! 383: (ctxt->cur[4] == 's')) {
! 384: ctxt->cur += 5;
! 385: if (ctxt->cur[0] == ':') {
! 386: ctxt->cur++;
! 387: name = xmlNamespaceParseNCName(ctxt);
! 388: }
! 389: }
! 390: }
! 391:
! 392: /************************************************************************
! 393: * *
! 394: * The parser itself *
! 395: * Relates to http://www.w3.org/TR/REC-xml *
! 396: * *
! 397: ************************************************************************/
1.14 veillard 398:
399: /*
1.1 veillard 400: * xmlParseName : parse an XML name.
1.22 daniel 401: *
402: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
403: * CombiningChar | Extender
404: *
405: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
406: *
407: * [6] Names ::= Name (S Name)*
1.1 veillard 408: */
409:
1.16 daniel 410: CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 411: const CHAR *q;
412: CHAR *ret = NULL;
1.1 veillard 413:
1.22 daniel 414: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_') &&
415: (ctxt->cur[0] != ':')) return(NULL);
416: q = ctxt->cur++;
417:
418: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
419: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
420: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
421: (IS_COMBINING(ctxt->cur[0])) ||
422: (IS_EXTENDER(ctxt->cur[0])))
423: ctxt->cur++;
424:
425: ret = xmlStrndup(q, ctxt->cur - q);
426:
427: return(ret);
428: }
429:
430: /*
431: * xmlParseNmtoken : parse an XML Nmtoken.
432: *
433: * [7] Nmtoken ::= (NameChar)+
434: *
435: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
436: */
437:
438: CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
439: const CHAR *q;
440: CHAR *ret = NULL;
441:
1.16 daniel 442: q = ctxt->cur++;
1.22 daniel 443:
1.16 daniel 444: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1.22 daniel 445: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
446: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
447: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 448: (IS_EXTENDER(ctxt->cur[0])))
449: ctxt->cur++;
1.3 veillard 450:
1.16 daniel 451: ret = xmlStrndup(q, ctxt->cur - q);
1.1 veillard 452:
1.3 veillard 453: return(ret);
1.1 veillard 454: }
455:
456: /*
1.24 daniel 457: * xmlParseEntityValue : parse a value for ENTITY decl.
458: *
459: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
460: * "'" ([^%&'] | PEReference | Reference)* "'"
461: */
462:
463: CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
464: CHAR *ret = NULL;
465: const CHAR *q;
466: int needSubst;
467:
468: if (ctxt->cur[0] == '"') {
469: ctxt->cur++;
470:
471: q = ctxt->cur;
472: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"')) {
473: if (ctxt->cur[0] == '%') {
474: needSubst = 1; /* TODO !!! */
475: ctxt->cur++;
476: } else if (ctxt->cur[0] == '%') {
477: needSubst = 1; /* TODO !!! */
478: ctxt->cur++;
479: } else
480: ctxt->cur++;
481: }
482: if (!IS_CHAR(ctxt->cur[0])) {
483: fprintf(stderr, "Unfinished EntityValue %30s\n", q);
484: } else {
485: ret = xmlStrndup(q, ctxt->cur - q);
486: ctxt->cur++;
487: }
488: } else if (ctxt->cur[0] == '\'') {
489: ctxt->cur++;
490: q = ctxt->cur;
491: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\'')) {
492: if (ctxt->cur[0] == '%') {
493: needSubst = 1; /* TODO !!! */
494: ctxt->cur++;
495: } else if (ctxt->cur[0] == '%') {
496: needSubst = 1; /* TODO !!! */
497: ctxt->cur++;
498: } else
499: ctxt->cur++;
500: }
501: if (!IS_CHAR(ctxt->cur[0])) {
502: fprintf(stderr, "Unfinished EntityValue %30s\n", q);
503: } else {
504: ret = xmlStrndup(q, ctxt->cur - q);
505: ctxt->cur++;
506: }
507: } else {
508: fprintf(stderr, "xmlParseEntityValue \" or ' expected: %30s\n",
509: ctxt->cur);
510: }
511:
512: return(ret);
513: }
514:
515: /*
1.21 daniel 516: * xmlParseSystemLiteral : parse an XML Literal
517: *
1.22 daniel 518: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.21 daniel 519: */
520:
521: CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
522: const CHAR *q;
523: CHAR *ret = NULL;
524:
525: if (ctxt->cur[0] == '"') {
526: ctxt->cur++;
527: q = ctxt->cur;
1.22 daniel 528: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"'))
1.21 daniel 529: ctxt->cur++;
1.22 daniel 530: if (!IS_CHAR(ctxt->cur[0])) {
1.21 daniel 531: fprintf(stderr, "Unfinished SystemLiteral %30s\n", q);
532: } else {
533: ret = xmlStrndup(q, ctxt->cur - q);
534: ctxt->cur++;
535: }
536: } else if (ctxt->cur[0] == '\'') {
537: ctxt->cur++;
538: q = ctxt->cur;
1.22 daniel 539: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
1.21 daniel 540: ctxt->cur++;
1.22 daniel 541: if (!IS_CHAR(ctxt->cur[0])) {
1.21 daniel 542: fprintf(stderr, "Unfinished SystemLiteral %30s\n", q);
543: } else {
544: ret = xmlStrndup(q, ctxt->cur - q);
545: ctxt->cur++;
546: }
547: } else {
548: fprintf(stderr, "SystemLiteral \" or ' expected: %30s\n", ctxt->cur);
549: }
550:
551: return(ret);
552: }
553:
554: /*
1.27 daniel 555: * xmlParsePubidLiteral: parse an XML public literal
1.21 daniel 556: *
1.22 daniel 557: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1.21 daniel 558: */
559:
560: CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
561: const CHAR *q;
562: CHAR *ret = NULL;
563: /*
564: * Name ::= (Letter | '_') (NameChar)*
565: */
566: if (ctxt->cur[0] == '"') {
567: ctxt->cur++;
568: q = ctxt->cur;
569: while (IS_PUBIDCHAR(ctxt->cur[0])) ctxt->cur++;
570: if (ctxt->cur[0] != '"') {
571: fprintf(stderr, "Unfinished PubidLiteral %30s\n", q);
572: } else {
573: ret = xmlStrndup(q, ctxt->cur - q);
574: ctxt->cur++;
575: }
576: } else if (ctxt->cur[0] == '\'') {
577: ctxt->cur++;
578: q = ctxt->cur;
579: while ((IS_LETTER(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
580: ctxt->cur++;
581: if (!IS_LETTER(ctxt->cur[0])) {
582: fprintf(stderr, "Unfinished SystemLiteral %30s\n", q);
583: } else {
584: ret = xmlStrndup(q, ctxt->cur - q);
585: ctxt->cur++;
586: }
587: } else {
588: fprintf(stderr, "SystemLiteral \" or ' expected: %30s\n", ctxt->cur);
589: }
590:
591: return(ret);
592: }
593:
594: /*
1.27 daniel 595: * xmlParseCharData: parse a CharData section.
596: * if we are within a CDATA section ']]>' marks an end of section.
597: *
598: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
599: */
600:
601: CHAR *xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
602: const CHAR *q;
603: CHAR *ret = NULL;
604:
605: q = ctxt->cur;
606: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '<') &&
607: (ctxt->cur[0] != '&')) {
608: ctxt->cur++;
609: if ((cdata) && (ctxt->cur[0] == ']') && (ctxt->cur[1] == ']') &&
610: (ctxt->cur[2] == '>')) break;
611: }
612: if (q == ctxt->cur) return(NULL);
613: ret = xmlStrndup(q, ctxt->cur - q);
614: return(ret);
615: }
616:
617: /*
1.22 daniel 618: * xmlParseExternalID: Parse an External ID
619: *
620: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
621: * | 'PUBLIC' S PubidLiteral S SystemLiteral
622: */
623:
624: CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **PubidLiteral) {
625: CHAR *ExternalID = NULL;
626:
627: *PubidLiteral = NULL;
628: if ((ctxt->cur[0] == 'S') && (ctxt->cur[1] == 'Y') &&
629: (ctxt->cur[2] == 'S') && (ctxt->cur[3] == 'T') &&
630: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M')) {
631: ctxt->cur += 6;
632: SKIP_BLANKS(ctxt->cur);
633: ExternalID = xmlParseSystemLiteral(ctxt);
634: if (ExternalID == NULL)
635: fprintf(stderr, "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
636: } else if ((ctxt->cur[0] == 'P') && (ctxt->cur[1] == 'U') &&
637: (ctxt->cur[2] == 'B') && (ctxt->cur[3] == 'L') &&
638: (ctxt->cur[4] == 'I') && (ctxt->cur[5] == 'C')) {
639: ctxt->cur += 6;
640: SKIP_BLANKS(ctxt->cur);
641: *PubidLiteral = xmlParsePubidLiteral(ctxt);
642: if (*PubidLiteral == NULL)
643: fprintf(stderr, "xmlParseExternalID: PUBLIC, no PubidLiteral\n");
644: SKIP_BLANKS(ctxt->cur);
645: ExternalID = xmlParseSystemLiteral(ctxt);
646: if (ExternalID == NULL)
647: fprintf(stderr, "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
648: }
649: return(ExternalID);
650: }
651:
652: /*
1.1 veillard 653: * Parse and return a string between quotes or doublequotes
654: */
1.16 daniel 655: CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.1 veillard 656: CHAR *ret = NULL;
1.17 daniel 657: const CHAR *q;
1.1 veillard 658:
1.16 daniel 659: if (ctxt->cur[0] == '"') {
660: ctxt->cur++;
661: q = ctxt->cur;
662: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++;
663: if (ctxt->cur[0] != '"')
1.7 veillard 664: fprintf(stderr, "String not closed \"%.50s\n", q);
1.1 veillard 665: else {
1.16 daniel 666: ret = xmlStrndup(q, ctxt->cur - q);
667: ctxt->cur++;
1.1 veillard 668: }
1.16 daniel 669: } else if (ctxt->cur[0] == '\''){
670: ctxt->cur++;
671: q = ctxt->cur;
672: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++;
673: if (ctxt->cur[0] != '\'')
1.7 veillard 674: fprintf(stderr, "String not closed '%.50s\n", q);
1.1 veillard 675: else {
1.16 daniel 676: ret = xmlStrndup(q, ctxt->cur - q);
677: ctxt->cur++;
1.1 veillard 678: }
679: }
680: return(ret);
681: }
682:
683: /*
1.3 veillard 684: * Skip an XML (SGML) comment <!-- .... -->
1.16 daniel 685: *
686: * TODO !!!! Save the comment in the tree !!!
1.22 daniel 687: *
688: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 689: */
1.16 daniel 690: void xmlParserSkipComment(xmlParserCtxtPtr ctxt) {
1.17 daniel 691: const CHAR *q, *start;
692: const CHAR *r;
1.3 veillard 693:
694: /*
1.22 daniel 695: * Check that there is a comment right here.
1.3 veillard 696: */
1.16 daniel 697: if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '!') ||
698: (ctxt->cur[2] != '-') || (ctxt->cur[3] != '-')) return;
1.3 veillard 699:
1.16 daniel 700: ctxt->cur += 4;
701: start = q = ctxt->cur;
702: ctxt->cur++;
703: r = ctxt->cur;
704: ctxt->cur++;
705: while (IS_CHAR(ctxt->cur[0]) &&
706: ((ctxt->cur[0] == ':') || (ctxt->cur[0] != '>') ||
707: (*r != '-') || (*q != '-'))) {
708: ctxt->cur++;r++;q++;
1.3 veillard 709: }
1.16 daniel 710: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 711: fprintf(stderr, "Comment not terminated <!--%.50s\n", start);
1.16 daniel 712: ctxt->cur = start; /* !!! We shouldn't really try to recover !!! */
1.3 veillard 713: } else {
1.16 daniel 714: ctxt->cur++;
1.3 veillard 715: }
716: }
717:
718: /*
1.13 veillard 719: * xmlParseNamespace: parse specific '<?namespace ...' constructs.
1.22 daniel 720: *
721: * TODO !!! Check the upcoming REC ...
1.1 veillard 722: */
723:
1.16 daniel 724: void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.1 veillard 725: CHAR *href = NULL;
726: CHAR *AS = NULL;
1.3 veillard 727: int garbage = 0;
1.1 veillard 728:
729: /*
1.18 daniel 730: * We just skipped "namespace" or "xml:namespace"
1.1 veillard 731: */
1.16 daniel 732: SKIP_BLANKS(ctxt->cur);
1.1 veillard 733:
1.16 daniel 734: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '>')) {
1.1 veillard 735: /*
1.18 daniel 736: * We can have "ns" or "prefix" attributes
737: * Old encoding as 'href' or 'AS' attributes is still supported
1.1 veillard 738: */
1.18 daniel 739: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 's')) {
740: garbage = 0;
741: ctxt->cur += 2;
742: SKIP_BLANKS(ctxt->cur);
743:
744: if (ctxt->cur[0] != '=') continue;
745: ctxt->cur++;
746: SKIP_BLANKS(ctxt->cur);
747:
748: href = xmlParseQuotedString(ctxt);
749: SKIP_BLANKS(ctxt->cur);
750: } else if ((ctxt->cur[0] == 'h') && (ctxt->cur[1] == 'r') &&
1.16 daniel 751: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f')) {
1.3 veillard 752: garbage = 0;
1.16 daniel 753: ctxt->cur += 4;
754: SKIP_BLANKS(ctxt->cur);
1.1 veillard 755:
1.16 daniel 756: if (ctxt->cur[0] != '=') continue;
757: ctxt->cur++;
758: SKIP_BLANKS(ctxt->cur);
759:
760: href = xmlParseQuotedString(ctxt);
761: SKIP_BLANKS(ctxt->cur);
1.18 daniel 762: } else if ((ctxt->cur[0] == 'p') && (ctxt->cur[1] == 'r') &&
763: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f') &&
764: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'x')) {
765: garbage = 0;
766: ctxt->cur += 6;
767: SKIP_BLANKS(ctxt->cur);
768:
769: if (ctxt->cur[0] != '=') continue;
770: ctxt->cur++;
771: SKIP_BLANKS(ctxt->cur);
772:
773: AS = xmlParseQuotedString(ctxt);
774: SKIP_BLANKS(ctxt->cur);
1.16 daniel 775: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'S')) {
1.3 veillard 776: garbage = 0;
1.16 daniel 777: ctxt->cur += 2;
778: SKIP_BLANKS(ctxt->cur);
1.1 veillard 779:
1.16 daniel 780: if (ctxt->cur[0] != '=') continue;
781: ctxt->cur++;
782: SKIP_BLANKS(ctxt->cur);
783:
784: AS = xmlParseQuotedString(ctxt);
785: SKIP_BLANKS(ctxt->cur);
786: } else if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
1.3 veillard 787: garbage = 0;
1.16 daniel 788: ctxt->cur ++;
1.1 veillard 789: } else {
1.3 veillard 790: /*
791: * Found garbage when parsing the namespace
792: */
793: if (!garbage) fprintf(stderr,
1.13 veillard 794: "\nxmlParseNamespace found garbage: ");
1.16 daniel 795: fprintf(stderr, "%c", ctxt->cur[0]);
796: ctxt->cur++;
1.1 veillard 797: }
798: }
799:
1.16 daniel 800: MOVETO_ENDTAG(ctxt->cur);
801: ctxt->cur++;
1.1 veillard 802:
803: /*
804: * Register the DTD.
805: */
806: if (href != NULL)
1.16 daniel 807: xmlNewDtd(ctxt->doc, href, AS);
1.1 veillard 808:
1.8 veillard 809: if (AS != NULL) free(AS);
810: if (href != NULL) free(href);
1.1 veillard 811: }
812:
813: /*
1.22 daniel 814: * xmlParsePITarget: parse the name of a PI
815: *
816: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
817: */
818:
819: CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
820: CHAR *name;
821:
822: name = xmlParseName(ctxt);
823: if ((name != NULL) && (name[3] == 0) &&
824: ((name[0] == 'x') || (name[0] == 'X')) &&
825: ((name[0] == 'm') || (name[0] == 'M')) &&
826: ((name[0] == 'l') || (name[0] == 'L'))) {
827: fprintf(stderr, "xmlParsePItarget: invalid name 'xml'\n");
828: return(NULL);
829: }
830: return(name);
831: }
832:
833: /*
1.3 veillard 834: * xmlParsePI: parse an XML Processing Instruction.
1.22 daniel 835: *
836: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.3 veillard 837: */
838:
1.16 daniel 839: void xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 840: CHAR *target;
841:
1.16 daniel 842: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
1.3 veillard 843: /*
844: * this is a Processing Instruction.
845: */
1.16 daniel 846: ctxt->cur += 2;
1.3 veillard 847:
848: /*
1.22 daniel 849: * Parse the target name and check for special support like
850: * namespace.
851: *
852: * TODO : PI handling should be dynamically redefinable using an
853: * API. Only namespace should be in the code IMHO ...
1.3 veillard 854: */
1.22 daniel 855: target = xmlParsePITarget(ctxt);
856: if (target != NULL) {
857: /*
858: * Support for the Processing Instruction related to namespace.
859: */
860: if ((target[0] == 'n') && (target[1] == 'a') &&
861: (target[2] == 'm') && (target[3] == 'e') &&
862: (target[4] == 's') && (target[5] == 'p') &&
863: (target[6] == 'a') && (target[7] == 'c') &&
864: (target[8] == 'e')) {
865: xmlParseNamespace(ctxt);
866: } else if ((target[0] == 'x') && (target[1] == 'm') &&
867: (target[2] == 'l') && (target[3] == ':') &&
868: (target[4] == 'n') && (target[5] == 'a') &&
869: (target[6] == 'm') && (target[7] == 'e') &&
870: (target[8] == 's') && (target[9] == 'p') &&
871: (target[10] == 'a') && (target[11] == 'c') &&
872: (target[12] == 'e')) {
873: xmlParseNamespace(ctxt);
874: } else {
875: /* Unknown PI, ignore it ! */
876: fprintf(stderr, "xmlParsePI : skipping unknown PI %s\n",
877: target);
878: while (IS_CHAR(ctxt->cur[0]) &&
1.24 daniel 879: ((ctxt->cur[0] != '?') || (ctxt->cur[1] != '>')))
1.22 daniel 880: ctxt->cur++;
881: if (!IS_CHAR(ctxt->cur[0])) {
882: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
883: target);
1.24 daniel 884: } else
885: ctxt->cur += 2;
1.22 daniel 886: }
1.3 veillard 887: } else {
1.22 daniel 888: fprintf(stderr, "xmlParsePI : no target name...\n");
889: /********* Should we try to complete parsing the PI ???
890: while (IS_CHAR(ctxt->cur[0]) &&
891: (ctxt->cur[0] != '?') && (ctxt->cur[0] != '>'))
892: ctxt->cur++;
893: if (!IS_CHAR(ctxt->cur[0])) {
894: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
895: target);
896: }
897: ********************************************************/
898: }
899: }
900: }
901:
902: /*
903: * xmlParseNotationDecl: parse a notation declaration
904: *
905: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
906: *
907: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
908: *
909: * NOTE: Actually [75] and [83] interract badly since [75] can generate
910: * 'PUBLIC' S PubidLiteral S SystemLiteral
911: *
912: * Hence there is actually 3 choices:
913: * 'PUBLIC' S PubidLiteral
914: * 'PUBLIC' S PubidLiteral S SystemLiteral
915: * and 'SYSTEM' S SystemLiteral
916: */
917:
918: void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
919: CHAR *name;
920:
921: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
922: (ctxt->cur[2] == 'N') && (ctxt->cur[3] == 'O') &&
923: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'A') &&
924: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'I') &&
925: (ctxt->cur[8] == 'O') && (ctxt->cur[9] == 'N') &&
926: (IS_BLANK(ctxt->cur[10]))) {
927: ctxt->cur += 10;
928: SKIP_BLANKS(ctxt->cur);
929:
930: name = xmlParseName(ctxt);
931: if (name == NULL) {
932: fprintf(stderr,
933: "xmlParseAttributeListDecl: no name for Element %30s\n",
934: ctxt->cur - 10);
935: return;
936: }
937: SKIP_BLANKS(ctxt->cur);
938: /*
939: * TODO !!!!!!
940: */
941: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
942: ctxt->cur++;
943: free(name);
944: }
945: }
946:
947: /*
948: * xmlParseEntityDecl: parse <!ENTITY declarations
949: *
950: * [70] EntityDecl ::= GEDecl | PEDecl
951: *
952: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
953: *
954: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
955: *
956: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
957: *
958: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 959: *
960: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 961: */
962:
963: void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
964: CHAR *name;
1.24 daniel 965: CHAR *value = NULL;
966: CHAR *id = NULL, *literal = NULL;
967: CHAR *ndata = NULL;
1.22 daniel 968: int typePEDef = 0;
969:
970: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
971: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'N') &&
972: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
973: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'Y') &&
974: (IS_BLANK(ctxt->cur[8]))) {
975: ctxt->cur += 8;
976: SKIP_BLANKS(ctxt->cur);
977:
978: if (ctxt->cur[0] == '%') {
1.16 daniel 979: ctxt->cur++;
1.22 daniel 980: SKIP_BLANKS(ctxt->cur);
981: typePEDef = 1;
982: }
983:
984: name = xmlParseName(ctxt);
1.24 daniel 985: if (name == NULL) {
986: fprintf(stderr, "xmlParseEntityDecl: no name %30s\n",
987: ctxt->cur - 10);
988: return;
989: }
990: SKIP_BLANKS(ctxt->cur);
991:
1.22 daniel 992: /*
1.24 daniel 993: * TODO handle the various case of definitions...
1.22 daniel 994: */
1.24 daniel 995: if (typePEDef) {
996: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
997: value = xmlParseEntityValue(ctxt);
998: else {
999: id = xmlParseExternalID(ctxt, &literal);
1000: }
1001: } else {
1002: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
1003: value = xmlParseEntityValue(ctxt);
1004: else {
1005: id = xmlParseExternalID(ctxt, &literal);
1006: SKIP_BLANKS(ctxt->cur);
1007: if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'D') &&
1008: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1009: (ctxt->cur[4] == 'A')) {
1010: ndata = xmlParseName(ctxt);
1011: }
1012: }
1013: }
1014: SKIP_BLANKS(ctxt->cur);
1015: if (ctxt->cur[0] != '>') {
1016: fprintf(stderr,
1017: "xmlParseEntityDecl: entity %s not terminated %30s\n",
1.25 daniel 1018: name, ctxt->cur - 10);
1.24 daniel 1019: } else
1.22 daniel 1020: ctxt->cur++;
1021: }
1022: }
1023:
1024: /*
1025: * xmlParseEnumeratedType: parse and Enumerated attribute type.
1026: *
1027: * [57] EnumeratedType ::= NotationType | Enumeration
1028: *
1029: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1030: *
1031: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1032: */
1033:
1034: void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1035: /*
1036: * TODO !!!
1037: */
1038: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1039: ctxt->cur++;
1040: }
1041:
1042: /*
1043: * xmlParseAttributeType: parse the Attribute list def for an element
1044: *
1045: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1046: *
1047: * [55] StringType ::= 'CDATA'
1048: *
1049: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1050: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1051: */
1052: void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1053: if ((ctxt->cur[0] == 'C') && (ctxt->cur[1] == 'D') &&
1054: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1055: (ctxt->cur[4] == 'A')) {
1056: ctxt->cur += 5;
1057: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D')) {
1058: ctxt->cur += 2;
1059: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
1060: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
1061: (ctxt->cur[4] == 'F')) {
1062: ctxt->cur += 5;
1063: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
1064: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
1065: (ctxt->cur[4] == 'F') && (ctxt->cur[5] == 'S')) {
1066: ctxt->cur += 6;
1067: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
1068: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
1069: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'Y')) {
1070: ctxt->cur += 6;
1071: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
1072: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
1073: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
1074: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'S')) {
1075: ctxt->cur += 8;
1076: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
1077: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
1078: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
1079: (ctxt->cur[6] == 'N')) {
1080: ctxt->cur += 7;
1081: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
1082: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
1083: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
1084: (ctxt->cur[6] == 'N') && (ctxt->cur[7] == 'S')) {
1085: } else {
1086: xmlParseEnumeratedType(ctxt, name);
1087: }
1088: }
1089:
1090: /*
1091: * xmlParseAttributeListDecl: parse the Attribute list def for an element
1092: *
1093: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1094: *
1095: * [53] AttDef ::= S Name S AttType S DefaultDecl
1096: */
1097: void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1098: CHAR *name;
1099:
1100: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1101: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1102: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'L') &&
1103: (ctxt->cur[6] == 'I') && (ctxt->cur[7] == 'S') &&
1104: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
1105: ctxt->cur += 9;
1106: SKIP_BLANKS(ctxt->cur);
1107: name = xmlParseName(ctxt);
1108: if (name == NULL) {
1109: fprintf(stderr,
1110: "xmlParseAttributeListDecl: no name for Element %30s\n",
1111: ctxt->cur - 10);
1112: return;
1113: }
1114: SKIP_BLANKS(ctxt->cur);
1115: while (ctxt->cur[0] != '>') {
1116: const CHAR *check = ctxt->cur;
1117:
1118: xmlParseAttributeType(ctxt, name);
1119: SKIP_BLANKS(ctxt->cur);
1120: if (check == ctxt->cur) {
1121: fprintf(stderr,
1122: "xmlParseAttributeListDecl: detected error %30s\n",
1123: check - 10);
1124: break;
1125: }
1126: }
1127: if (ctxt->cur[0] == '>')
1128: ctxt->cur++;
1129:
1130: free(name);
1131: }
1132: }
1133:
1134: /*
1135: * xmlParseElementContentDecl: parse the declaration for an Element content
1136: * either Mixed or Children, the cases EMPTY and ANY being handled
1137: * int xmlParseElementDecl.
1138: *
1139: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
1140: *
1141: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1142: *
1143: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
1144: *
1145: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
1146: *
1147: * or
1148: *
1149: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
1150: * '(' S? '#PCDATA' S? ')'
1151: */
1152:
1153: void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1154: /*
1155: * TODO This has to be parsed correctly, currently we just skip until
1156: * we reach the first '>'.
1157: */
1158: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1159: ctxt->cur++;
1160: }
1161:
1162: /*
1163: * xmlParseElementDecl: parse an Element declaration.
1164: *
1165: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1166: *
1167: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1168: *
1169: * TODO There is a check [ VC: Unique Element Type Declaration ]
1170: */
1171: void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1172: CHAR *name;
1173:
1174: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1175: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'L') &&
1176: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M') &&
1177: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'N') &&
1178: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
1179: ctxt->cur += 9;
1180: SKIP_BLANKS(ctxt->cur);
1181: name = xmlParseName(ctxt);
1182: if (name == NULL) {
1183: fprintf(stderr, "xmlParseElementDecl: no name for Element %30s\n",
1184: ctxt->cur - 10);
1185: return;
1186: }
1187: SKIP_BLANKS(ctxt->cur);
1188: if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'M') &&
1189: (ctxt->cur[2] == 'P') && (ctxt->cur[3] == 'T') &&
1190: (ctxt->cur[4] == 'Y')) {
1191: ctxt->cur += 5;
1192: /*
1193: * Element must always be empty.
1194: */
1195: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'N') &&
1196: (ctxt->cur[2] == 'Y')) {
1197: ctxt->cur += 3;
1198: /*
1199: * Element is a generic container.
1200: */
1201: } else {
1202: xmlParseElementContentDecl(ctxt, name);
1203: }
1204: SKIP_BLANKS(ctxt->cur);
1205: if (ctxt->cur[0] != '>') {
1206: fprintf(stderr,
1207: "xmlParseElementDecl: expected '>' at the end %30s\n",
1208: ctxt->cur - 10);
1209: } else
1210: ctxt->cur++;
1211: }
1212: }
1213:
1214: /*
1215: * xmlParseMarkupDecl: parse Markup declarations
1216: *
1217: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
1218: * NotationDecl | PI | Comment
1219: *
1220: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
1221: */
1222: void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1223: xmlParseElementDecl(ctxt);
1224: xmlParseAttributeListDecl(ctxt);
1225: xmlParseEntityDecl(ctxt);
1226: xmlParseNotationDecl(ctxt);
1227: xmlParsePI(ctxt);
1228: xmlParserSkipComment(ctxt);
1229: }
1230:
1231: /*
1.24 daniel 1232: * xmlParseCharRef: parse Reference declarations
1233: *
1234: * [66] CharRef ::= '&#' [0-9]+ ';' |
1235: * '&#x' [0-9a-fA-F]+ ';'
1236: */
1237: CHAR xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1238: CHAR ret = 0;
1239:
1240: if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#') &&
1241: (ctxt->cur[2] == 'x')) {
1242: ctxt->cur += 3;
1243: while (ctxt->cur[0] != ';') {
1244: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
1245: ret = ret * 16 + (ctxt->cur[0] - '0');
1246: else if ((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'f'))
1247: ret = ret * 16 + (ctxt->cur[0] - 'a') + 10;
1248: else if ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'F'))
1249: ret = ret * 16 + (ctxt->cur[0] - 'A') + 10;
1250: else {
1251: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
1252: ctxt->cur - 10);
1253: ret = 0;
1254: break;
1255: }
1256: }
1257: if (ctxt->cur[0] != ';')
1258: ctxt->cur++;
1259: /*
1260: * TODO: Check the value IS_CHAR ...
1261: */
1262: } else if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#')) {
1263: ctxt->cur += 2;
1264: while (ctxt->cur[0] != ';') {
1265: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
1266: ret = ret * 16 + (ctxt->cur[0] - '0');
1267: else {
1268: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
1269: ctxt->cur - 10);
1270: ret = 0;
1271: break;
1272: }
1273: }
1274: if (ctxt->cur[0] != ';')
1275: ctxt->cur++;
1276: /*
1277: * TODO: Check the value IS_CHAR ...
1278: */
1279: } else {
1280: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
1281: ctxt->cur);
1282: }
1283: return(ret);
1284: }
1285:
1286: /*
1287: * xmlParseEntityRef: parse ENTITY references declarations
1288: *
1289: * [68] EntityRef ::= '&' Name ';'
1290: */
1291: CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1292: CHAR *name;
1293:
1294: if (ctxt->cur[0] == '&') {
1295: ctxt->cur++;
1296: name = xmlParseName(ctxt);
1297: if (name == NULL) {
1.25 daniel 1298: fprintf(stderr, "xmlParseEntityRef: no name %30s\n",
1.24 daniel 1299: ctxt->cur - 10);
1300: } else {
1301: if (ctxt->cur[0] == ';') {
1302: ctxt->cur++;
1303: /*
1304: * TODO there is a VC check here !!!
1305: * [ VC: Entity Declared ]
1306: */
1307: free(name);
1308: } else {
1.25 daniel 1309: fprintf(stderr, "xmlParseEntityRef: expecting ';' %30s\n",
1.24 daniel 1310: ctxt->cur - 10);
1311: }
1312: }
1313: }
1.25 daniel 1314: return(NULL); /* TODO !!!! */
1.24 daniel 1315: }
1316:
1317: /*
1318: * xmlParseReference: parse Reference declarations
1319: *
1320: * [67] Reference ::= EntityRef | CharRef
1321: */
1322: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt) {
1323: CHAR *name;
1324:
1325: if (ctxt->cur[0] == '&') {
1326: return(xmlParseEntityRef(ctxt));
1327: } else {
1328: ctxt->cur++;
1329: name = xmlParseName(ctxt);
1330: if (name == NULL) {
1.25 daniel 1331: fprintf(stderr, "xmlParseReference: no name %30s\n",
1.24 daniel 1332: ctxt->cur - 10);
1333: } else {
1334: if (ctxt->cur[0] == ';') {
1335: ctxt->cur++;
1336: /*
1337: * TODO there is a VC check here !!!
1338: * [ VC: Entity Declared ]
1339: */
1340: free(name);
1341: } else {
1.25 daniel 1342: fprintf(stderr, "xmlParseReference: expecting ';' %30s\n",
1.24 daniel 1343: ctxt->cur - 10);
1344: }
1345: }
1346: }
1.25 daniel 1347: return(NULL); /* TODO !!!! */
1.24 daniel 1348: }
1349:
1350: /*
1.22 daniel 1351: * xmlParsePEReference: parse PEReference declarations
1352: *
1353: * [69] PEReference ::= '%' Name ';'
1354: */
1.24 daniel 1355: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 1356: CHAR *name;
1357:
1358: if (ctxt->cur[0] == '%') {
1359: ctxt->cur++;
1360: name = xmlParseName(ctxt);
1361: if (name == NULL) {
1362: fprintf(stderr, "xmlParsePEReference: no name %30s\n",
1363: ctxt->cur - 10);
1364: } else {
1365: if (ctxt->cur[0] == ';') {
1366: ctxt->cur++;
1367: /*
1368: * TODO there is a VC check here !!!
1369: * [ VC: Entity Declared ]
1370: */
1371: free(name);
1372: } else {
1373: fprintf(stderr, "xmlParsePEReference: expecting ';' %30s\n",
1374: ctxt->cur - 10);
1375: }
1.3 veillard 1376: }
1377: }
1.25 daniel 1378: return(NULL); /* TODO !!!! */
1.3 veillard 1379: }
1380:
1381: /*
1.21 daniel 1382: * xmlParseDocTypeDecl : parse a DOCTYPE declaration
1383: *
1.22 daniel 1384: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
1385: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 1386: */
1387:
1388: void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1389: CHAR *name;
1390: CHAR *ExternalID = NULL;
1.22 daniel 1391: CHAR *SystemID = NULL;
1.21 daniel 1392:
1393: /*
1394: * We know that '<!DOCTYPE' has been detected.
1395: */
1396: ctxt->cur += 9;
1397:
1398: SKIP_BLANKS(ctxt->cur);
1399:
1400: /*
1401: * Parse the DOCTYPE name.
1402: */
1403: name = xmlParseName(ctxt);
1404: if (name == NULL) {
1405: fprintf(stderr, "xmlParseDocTypeDecl : no DOCTYPE name ! : %30s\n",
1406: ctxt->cur - 10);
1407: }
1408:
1409: SKIP_BLANKS(ctxt->cur);
1410:
1411: /*
1.22 daniel 1412: * Check for SystemID and ExternalID
1413: */
1414: SystemID = xmlParseExternalID(ctxt, &ExternalID);
1415: SKIP_BLANKS(ctxt->cur);
1416:
1417: /*
1418: * Is there any DTD definition ?
1419: */
1420: if (ctxt->cur[0] == '[') {
1421: ctxt->cur++;
1422: /*
1423: * Parse the succession of Markup declarations and
1424: * PEReferences.
1425: * Subsequence (markupdecl | PEReference | S)*
1426: */
1427: while (ctxt->cur[0] != ']') {
1428: const CHAR *check = ctxt->cur;
1429:
1430: SKIP_BLANKS(ctxt->cur);
1431: xmlParseMarkupDecl(ctxt);
1432: xmlParsePEReference(ctxt);
1433:
1434: if (ctxt->cur == check) {
1435: fprintf(stderr,
1436: "xmlParseDocTypeDecl: error detected in Markup declaration\n\t%50s\n",
1437: check - 10);
1438: break;
1439: }
1440: }
1441: if (ctxt->cur[0] == ']') ctxt->cur++;
1442: }
1443:
1444: /*
1445: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 1446: */
1.22 daniel 1447: if (ctxt->cur[0] != '>') {
1448: fprintf(stderr, "DOCTYPE unproperly terminated %30s\n",
1449: ctxt->cur - 10);
1450: /* We shouldn't try to resynchronize ... */
1.21 daniel 1451: }
1.22 daniel 1452: ctxt->cur++;
1453:
1454: /*
1455: * Cleanup, since we don't use all those identifiers
1456: * TODO : the DOCTYPE if available should be stored !
1457: */
1458: if (SystemID != NULL) free(SystemID);
1459: if (ExternalID != NULL) free(ExternalID);
1460: if (name != NULL) free(name);
1.21 daniel 1461: }
1462:
1463: /*
1.3 veillard 1464: * xmlParseAttribute: parse a start of tag.
1465: *
1.22 daniel 1466: * [41] Attribute ::= Name Eq AttValue
1467: *
1468: * [25] Eq ::= S? '=' S?
1469: *
1470: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1471: * "'" ([^<&'] | Reference)* "'"
1.3 veillard 1472: */
1473:
1.16 daniel 1474: void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 1475: CHAR *name, *value = NULL;
1.3 veillard 1476:
1.22 daniel 1477: name = xmlParseName(ctxt);
1478: if (name == NULL) {
1479: fprintf(stderr,
1480: "xmlParseAttribute: error parsing attribute name %30s\n",
1.23 daniel 1481: ctxt->cur - 10);
1.3 veillard 1482: }
1.22 daniel 1483: /*
1484: * TODO: Check for Namespace ...
1485: */
1.3 veillard 1486:
1487: /*
1488: * We should have the equal, we are laxist here and allow attributes
1.22 daniel 1489: * without values ?!?.
1490: */
1491: /*
1492: * !!!!! TODO !!!!!! Rewrite this is absolutely not clean !!!!
1.3 veillard 1493: */
1.16 daniel 1494: SKIP_BLANKS(ctxt->cur);
1495: if (ctxt->cur[0] == '=') {
1496: ctxt->cur++;
1497: SKIP_BLANKS(ctxt->cur);
1498: if ((ctxt->cur[0] != '\'') && (ctxt->cur[0] != '"')) {
1.7 veillard 1499: fprintf(stderr, "Quotes were expected for attribute value %.20s\n",
1.23 daniel 1500: ctxt->cur - 10);
1.3 veillard 1501: } else
1.16 daniel 1502: value = xmlParseQuotedString(ctxt);
1.3 veillard 1503: }
1504:
1505: /*
1506: * Add the attribute to the node.
1507: */
1.17 daniel 1508: if (name != NULL) {
1.3 veillard 1509: xmlNewProp(node, name, value);
1.17 daniel 1510: free(name);
1511: }
1512: if ( value != NULL )
1513: free(value);
1.3 veillard 1514: }
1515:
1516: /*
1.2 veillard 1517: * xmlParseStartTag: parse a start of tag.
1.27 daniel 1518: *
1519: * [40] STag ::= '<' Name (S Attribute)* S? '>'
1520: *
1521: * !!!!!!!!!!!!!!!!!!!!
1.2 veillard 1522: */
1523:
1.16 daniel 1524: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.17 daniel 1525: const CHAR *q;
1526: CHAR *ns, *name;
1.3 veillard 1527: xmlDtdPtr dtd = NULL;
1.2 veillard 1528: xmlNodePtr ret = NULL;
1529:
1530: /*
1.3 veillard 1531: * Theorically one should just parse a Name, but with the addition
1532: * of the namespace needed for WebDav, it's a bit more complicated
1533: * since the element name may be prefixed by a namespace prefix.
1534: *
1535: * QName ::= (NSPart ':')? LocalPart
1536: * NSPart ::= Name
1537: * LocalPart ::= Name
1538: * STag ::= '<' QName (S Attribute)* S? '>'
1539: *
1540: * instead of :
1541: *
1542: * STag ::= '<' QName (S Attribute)* S? '>'
1.2 veillard 1543: */
1.16 daniel 1544: if (ctxt->cur[0] != '<') return(NULL);
1545: ctxt->cur++;
1.3 veillard 1546:
1.16 daniel 1547: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
1548: q = ctxt->cur++;
1549: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1550: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
1551: (ctxt->cur[0] == '_') ||
1.22 daniel 1552: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 1553: (IS_EXTENDER(ctxt->cur[0])))
1554: ctxt->cur++;
1.3 veillard 1555:
1.16 daniel 1556: if (ctxt->cur[0] == ':') {
1557: ns = xmlStrndup(q, ctxt->cur - q);
1.3 veillard 1558:
1.16 daniel 1559: ctxt->cur++; /* skip the column */
1560: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
1.7 veillard 1561: fprintf(stderr,
1562: "Start tag : no element name after namespace identifier %.20s\n",
1.3 veillard 1563: q);
1564: free(ns);
1565: return(NULL);
1566: }
1.16 daniel 1567: q = ctxt->cur++;
1568: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1569: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
1570: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
1.22 daniel 1571: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 1572: (IS_EXTENDER(ctxt->cur[0])))
1573: ctxt->cur++;
1574: name = xmlStrndup(q, ctxt->cur - q);
1.3 veillard 1575:
1576: /*
1577: * Search the DTD associated to ns.
1578: */
1.16 daniel 1579: dtd = xmlSearchDtd(ctxt->doc, ns);
1.3 veillard 1580: if (dtd == NULL)
1.7 veillard 1581: fprintf(stderr, "Start tag : Couldn't find namespace %s\n", ns);
1.3 veillard 1582: free(ns);
1583: } else
1.16 daniel 1584: name = xmlStrndup(q, ctxt->cur - q);
1.3 veillard 1585:
1586: ret = xmlNewNode(dtd, name, NULL);
1.2 veillard 1587:
1.3 veillard 1588: /*
1589: * Now parse the attributes, it ends up with the ending
1590: *
1591: * (S Attribute)* S?
1592: */
1.16 daniel 1593: SKIP_BLANKS(ctxt->cur);
1594: while ((IS_CHAR(ctxt->cur[0])) &&
1595: (ctxt->cur[0] != '>') &&
1596: ((ctxt->cur[0] != '/') || (ctxt->cur[1] != '>'))) {
1597: if (IS_LETTER(ctxt->cur[0]) || (ctxt->cur[0] == '_'))
1598: xmlParseAttribute(ctxt, ret);
1.3 veillard 1599: else {
1.14 veillard 1600: /* We should warn TODO !!! */
1.16 daniel 1601: ctxt->cur++;
1.3 veillard 1602: }
1.16 daniel 1603: SKIP_BLANKS(ctxt->cur);
1.3 veillard 1604: }
1605:
1606: return(ret);
1607: }
1608:
1609: /*
1.27 daniel 1610: * xmlParseEndTag: parse an end of tag
1611: *
1612: * [42] ETag ::= '</' Name S? '>'
1.7 veillard 1613: */
1614:
1.16 daniel 1615: void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlDtdPtr *dtdPtr, CHAR **tagPtr) {
1.17 daniel 1616: const CHAR *q;
1617: CHAR *ns, *name;
1.7 veillard 1618: xmlDtdPtr dtd = NULL;
1619:
1620: *dtdPtr = NULL;
1621: *tagPtr = NULL;
1622:
1.27 daniel 1623: if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/')) {
1624: fprintf(stderr, "xmlParseEndTag: '</' not found %30s\n", ctxt->cur -10);
1625: return;
1626: }
1627: ctxt->cur += 2;
1.7 veillard 1628: /*
1629: * Theorically one should just parse a Name, but with the addition
1630: * of the namespace needed for WebDav, it's a bit more complicated
1631: * since the element name may be prefixed by a namespace prefix.
1632: *
1633: * QName ::= (NSPart ':')? LocalPart
1634: * NSPart ::= Name
1635: * LocalPart ::= Name
1636: * ETag ::= '</' QName S? '>'
1637: *
1638: * instead of :
1639: *
1640: * ETag ::= '</' Name S? '>'
1.27 daniel 1641: *
1642: * !!!!!!!! TODO cleanup that mess !!!
1.7 veillard 1643: */
1.16 daniel 1644: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return;
1645: q = ctxt->cur++;
1646: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1647: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
1648: (ctxt->cur[0] == '_') ||
1.22 daniel 1649: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 1650: (IS_EXTENDER(ctxt->cur[0])))
1651: ctxt->cur++;
1.7 veillard 1652:
1.16 daniel 1653: if (ctxt->cur[0] == ':') {
1654: ns = xmlStrndup(q, ctxt->cur - q);
1.7 veillard 1655:
1.16 daniel 1656: ctxt->cur++; /* skip the column */
1657: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
1.7 veillard 1658: fprintf(stderr,
1659: "End tag : no element name after namespace identifier %.20s\n",
1660: q);
1661: free(ns);
1662: return;
1663: }
1.16 daniel 1664: q = ctxt->cur++;
1665: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1666: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
1667: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
1.22 daniel 1668: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 1669: (IS_EXTENDER(ctxt->cur[0])))
1670: ctxt->cur++;
1671: name = xmlStrndup(q, ctxt->cur - q);
1.7 veillard 1672:
1673: /*
1674: * Search the DTD associated to ns.
1675: */
1.16 daniel 1676: dtd = xmlSearchDtd(ctxt->doc, ns);
1.7 veillard 1677: if (dtd == NULL)
1678: fprintf(stderr, "End tag : Couldn't find namespace %s\n", ns);
1679: free(ns);
1680: } else
1.16 daniel 1681: name = xmlStrndup(q, ctxt->cur - q);
1.7 veillard 1682:
1683: *dtdPtr = dtd;
1684: *tagPtr = name;
1685:
1686: /*
1687: * We should definitely be at the ending "S? '>'" part
1688: */
1.16 daniel 1689: SKIP_BLANKS(ctxt->cur);
1690: if ((!IS_CHAR(ctxt->cur[0])) || (ctxt->cur[0] != '>')) {
1691: fprintf(stderr, "End tag : expected '>', got %.20s\n", ctxt->cur);
1.7 veillard 1692: /*
1693: * Note : skipping to the next '>' is probably otherkill,
1694: * especially in case the '>' is hust missing.
1695: *
1696: * Otherwise add:
1.16 daniel 1697: * MOVETO_ENDTAG(ctxt->cur);
1.7 veillard 1698: */
1699: } else
1.16 daniel 1700: ctxt->cur++;
1.7 veillard 1701:
1702: return;
1703: }
1704:
1705: /*
1.3 veillard 1706: * xmlParseCDSect: escaped pure raw content.
1707: */
1.16 daniel 1708: CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 1709: const CHAR *r, *s, *base;
1710: CHAR *ret;
1.3 veillard 1711:
1.16 daniel 1712: base = ctxt->cur;
1713: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 1714: fprintf(stderr, "CData section not finished : %.20s\n", base);
1.3 veillard 1715: return(NULL);
1716: }
1.16 daniel 1717: r = ctxt->cur++;
1718: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 1719: fprintf(stderr, "CData section not finished : %.20s\n", base);
1.3 veillard 1720: return(NULL);
1721: }
1.16 daniel 1722: s = ctxt->cur++;
1723: while (IS_CHAR(ctxt->cur[0]) &&
1724: ((*r != ']') || (*s != ']') || (ctxt->cur[0] != '>'))) {
1725: r++;s++;ctxt->cur++;
1.3 veillard 1726: }
1.16 daniel 1727: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 1728: fprintf(stderr, "CData section not finished : %.20s\n", base);
1.3 veillard 1729: return(NULL);
1730: }
1.16 daniel 1731: ret = xmlStrndup(base, ctxt->cur-base);
1732:
1.2 veillard 1733: return(ret);
1734: }
1735:
1736: /*
1737: * xmlParseContent: a content is
1738: * (element | PCData | Reference | CDSect | PI | Comment)
1739: *
1.27 daniel 1740: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 1741: */
1742:
1.27 daniel 1743: void xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 1744: const CHAR *q;
1745: CHAR *data = NULL;
1.2 veillard 1746: xmlNodePtr ret = NULL;
1747:
1.27 daniel 1748: while ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/')) {
1749: ret = NULL;
1750: data = NULL;
1751:
1752: /*
1753: * First case : a Processing Instruction.
1754: */
1755: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
1756: xmlParsePI(ctxt);
1757: }
1758: /*
1759: * Second case : a CDSection
1760: */
1761: else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1762: (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
1763: (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
1764: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
1765: (ctxt->cur[8] == '[')) {
1766: ctxt->cur += 9;
1767: data = xmlParseCDSect(ctxt);
1768: }
1769: /*
1770: * Third case : a comment
1771: */
1772: else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1773: (ctxt->cur[2] == '-') && (ctxt->cur[3] == '-')) {
1774: xmlParserSkipComment(ctxt);
1775: }
1776: /*
1777: * Fourth case : a sub-element.
1778: */
1779: else if (ctxt->cur[0] == '<') {
1780: ret = xmlParseElement(ctxt);
1781: }
1782: /*
1783: * Last case, text. Note that References are handled directly.
1784: */
1785: else {
1786: q = ctxt->cur;
1787: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '<')) ctxt->cur++;
1788:
1789: if (!IS_CHAR(ctxt->cur[0])) {
1790: fprintf(stderr, "Truncated content : %.50s\n", q);
1791: return;
1792: }
1.3 veillard 1793:
1.27 daniel 1794: /*
1795: * Do the Entities decoding...
1796: */
1797: data = xmlStrdup(xmlDecodeEntities(ctxt->doc, q, ctxt->cur - q));
1.3 veillard 1798: }
1.14 veillard 1799:
1800: /*
1.27 daniel 1801: * Handle the data if any. If there is no child
1802: * add it as content, otherwise create a new node of type text.
1.14 veillard 1803: */
1.27 daniel 1804: if (data != NULL)
1805: data = xmlHandleData(data);
1806: if (data != NULL) {
1807: if (node->childs == NULL)
1808: xmlNodeSetContent(node, data);
1809: else
1810: ret = xmlNewText(data);
1811: free(data);
1812: }
1813: if (ret != NULL)
1814: xmlAddChild(node, ret);
1.3 veillard 1815: }
1.2 veillard 1816: }
1817:
1818: /*
1819: * xmlParseElement: parse an XML element
1.26 daniel 1820: *
1821: * [39] element ::= EmptyElemTag | STag content ETag
1822: *
1823: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 1824: */
1.26 daniel 1825:
1.2 veillard 1826:
1.16 daniel 1827: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 1828: xmlNodePtr ret;
1.17 daniel 1829: const CHAR *openTag = ctxt->cur;
1.27 daniel 1830: CHAR *endTag;
1831: xmlDtdPtr endDtd;
1.2 veillard 1832:
1.16 daniel 1833: ret = xmlParseStartTag(ctxt);
1.3 veillard 1834: if (ret == NULL) {
1835: return(NULL);
1836: }
1.2 veillard 1837:
1838: /*
1839: * Check for an Empty Element.
1840: */
1.16 daniel 1841: if ((ctxt->cur[0] == '/') && (ctxt->cur[1] == '>')) {
1842: ctxt->cur += 2;
1.2 veillard 1843: return(ret);
1844: }
1.16 daniel 1845: if (ctxt->cur[0] == '>') ctxt->cur++;
1.2 veillard 1846: else {
1.16 daniel 1847: fprintf(stderr, "Couldn't find end of Start Tag %.30s\n", openTag);
1848: return(NULL);
1.2 veillard 1849: }
1850:
1851: /*
1852: * Parse the content of the element:
1853: */
1.27 daniel 1854: xmlParseContent(ctxt, ret);
1.16 daniel 1855: if (!IS_CHAR(ctxt->cur[0])) {
1856: fprintf(stderr, "Premature end of data in tag %.30s\n", openTag);
1857: return(NULL);
1.2 veillard 1858: }
1859:
1860: /*
1.27 daniel 1861: * parse the end of tag: '</' should be here.
1.2 veillard 1862: */
1.27 daniel 1863: xmlParseEndTag(ctxt, &endDtd, &endTag);
1.7 veillard 1864:
1.27 daniel 1865: /*
1866: * Check that the Name in the ETag is the same as in the STag.
1867: */
1868: if (endDtd != ret->dtd) {
1869: fprintf(stderr, "Start and End tags don't use the same DTD:\n");
1870: fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, endTag);
1871: }
1872: if (strcmp(ret->name, endTag)) {
1873: fprintf(stderr, "Start and End tags don't use the same name:\n");
1874: fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, endTag);
1875: }
1.7 veillard 1876:
1.27 daniel 1877: if ( endTag != NULL )
1878: free(endTag);
1.2 veillard 1879:
1880: return(ret);
1881: }
1882:
1883: /*
1.1 veillard 1884: * xmlParseXMLDecl: parse an XML declaration header
1885: */
1886:
1.16 daniel 1887: void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 1888: CHAR *version;
1889:
1890: /*
1.19 daniel 1891: * We know that '<?xml' is here.
1.1 veillard 1892: */
1.16 daniel 1893: ctxt->cur += 5;
1.1 veillard 1894:
1895: /*
1896: * Parse the version info
1897: */
1.16 daniel 1898: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1899:
1900: /*
1901: * We should have 'version=' here !
1902: */
1.16 daniel 1903: if ((ctxt->cur[0] == 'v') && (ctxt->cur[1] == 'e') &&
1904: (ctxt->cur[2] == 'r') && (ctxt->cur[3] == 's') &&
1905: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'o') &&
1906: (ctxt->cur[6] == 'n') && (ctxt->cur[7] == '=')) {
1907: ctxt->cur += 8;
1908: version = xmlParseQuotedString(ctxt);
1.1 veillard 1909: if (version == NULL)
1.16 daniel 1910: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 1911: else {
1.16 daniel 1912: ctxt->doc = xmlNewDoc(version);
1.8 veillard 1913: free(version);
1.1 veillard 1914: }
1915: } else {
1.16 daniel 1916: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 1917: }
1918:
1919: /*
1.14 veillard 1920: * We should check for Required Markup Declaration TODO !!!!
1.1 veillard 1921: */
1.16 daniel 1922: MOVETO_ENDTAG(ctxt->cur);
1923: ctxt->cur++;
1.1 veillard 1924:
1925: }
1926:
1927: /*
1.22 daniel 1928: * xmlParseMisc: parse an XML Misc* optionnal field.
1.21 daniel 1929: * Misc*
1930: *
1.22 daniel 1931: * [27] Misc ::= Comment | PI | S
1.1 veillard 1932: */
1933:
1.16 daniel 1934: void xmlParseMisc(xmlParserCtxtPtr ctxt) {
1935: while (((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) ||
1936: ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1.21 daniel 1937: (ctxt->cur[2] == '-') && (ctxt->cur[3] == '-')) ||
1.16 daniel 1938: IS_BLANK(ctxt->cur[0])) {
1939: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
1940: xmlParsePI(ctxt);
1941: } else if (IS_BLANK(ctxt->cur[0])) {
1942: ctxt->cur++;
1.1 veillard 1943: } else
1.16 daniel 1944: xmlParserSkipComment(ctxt);
1.1 veillard 1945: }
1946: }
1947:
1948: /*
1.16 daniel 1949: * xmlParseDocument : parse an XML document and build a tree.
1.21 daniel 1950: *
1.22 daniel 1951: * [1] document ::= prolog element Misc*
1.21 daniel 1952: * prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.1 veillard 1953: */
1954:
1.16 daniel 1955: int xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.14 veillard 1956: /*
1957: * We should check for encoding here and plug-in some
1958: * conversion code TODO !!!!
1959: */
1.1 veillard 1960:
1961: /*
1962: * Wipe out everything which is before the first '<'
1963: */
1.16 daniel 1964: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1965:
1966: /*
1967: * Check for the XMLDecl in the Prolog.
1968: */
1.16 daniel 1969: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.19 daniel 1970: (ctxt->cur[2] == 'x') && (ctxt->cur[3] == 'm') &&
1971: (ctxt->cur[4] == 'l')) {
1972: xmlParseXMLDecl(ctxt);
1973: /* SKIP_EOL(cur); */
1974: SKIP_BLANKS(ctxt->cur);
1975: } else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.16 daniel 1976: (ctxt->cur[2] == 'X') && (ctxt->cur[3] == 'M') &&
1977: (ctxt->cur[4] == 'L')) {
1.19 daniel 1978: /*
1979: * The first drafts were using <?XML and the final W3C REC
1980: * now use <?xml ...
1981: */
1.16 daniel 1982: xmlParseXMLDecl(ctxt);
1.1 veillard 1983: /* SKIP_EOL(cur); */
1.16 daniel 1984: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1985: } else {
1.16 daniel 1986: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 1987: }
1988:
1989: /*
1990: * The Misc part of the Prolog
1.21 daniel 1991: * Misc*
1992: * Misc ::= Comment | PI | S
1.1 veillard 1993: */
1.16 daniel 1994: xmlParseMisc(ctxt);
1.1 veillard 1995:
1996: /*
1.21 daniel 1997: * Then possibly doc type decalration(s) and more Misc
1998: * (doctypedecl Misc*)?
1999: */
1.22 daniel 2000: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2001: (ctxt->cur[2] == 'D') && (ctxt->cur[3] == 'O') &&
2002: (ctxt->cur[4] == 'C') && (ctxt->cur[5] == 'T') &&
2003: (ctxt->cur[6] == 'Y') && (ctxt->cur[7] == 'P') &&
2004: (ctxt->cur[8] == 'E')) {
2005: xmlParseDocTypeDecl(ctxt);
2006: xmlParseMisc(ctxt);
1.21 daniel 2007: }
2008:
2009: /*
2010: * Time to start parsing the tree itself
1.1 veillard 2011: */
1.16 daniel 2012: ctxt->doc->root = xmlParseElement(ctxt);
2013:
2014: return(0);
2015: }
2016:
2017: /*
2018: * xmlParseDoc : parse an XML in-memory document and build a tree.
2019: */
2020:
2021: xmlDocPtr xmlParseDoc(CHAR *cur) {
2022: xmlDocPtr ret;
2023: xmlParserCtxtPtr ctxt;
2024:
2025: if (cur == NULL) return(NULL);
1.1 veillard 2026:
1.16 daniel 2027: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2028: if (ctxt == NULL) {
2029: perror("malloc");
2030: return(NULL);
2031: }
2032:
1.19 daniel 2033: xmlInitParserCtxt(ctxt);
1.16 daniel 2034: ctxt->base = cur;
2035: ctxt->cur = cur;
2036:
2037: xmlParseDocument(ctxt);
2038: ret = ctxt->doc;
1.20 daniel 2039: free(ctxt->nodes);
1.16 daniel 2040: free(ctxt);
2041:
1.1 veillard 2042: return(ret);
2043: }
2044:
1.9 httpng 2045: /*
2046: * xmlParseFile : parse an XML file and build a tree.
2047: */
2048:
2049: xmlDocPtr xmlParseFile(const char *filename) {
2050: xmlDocPtr ret;
1.20 daniel 2051: #ifdef HAVE_ZLIB_H
2052: gzFile input;
2053: #else
1.9 httpng 2054: int input;
1.20 daniel 2055: #endif
1.9 httpng 2056: int res;
2057: struct stat buf;
2058: char *buffer;
1.16 daniel 2059: xmlParserCtxtPtr ctxt;
1.9 httpng 2060:
1.11 veillard 2061: res = stat(filename, &buf);
1.9 httpng 2062: if (res < 0) return(NULL);
2063:
1.20 daniel 2064: #ifdef HAVE_ZLIB_H
2065: retry_bigger:
2066: buffer = malloc((buf.st_size * 20) + 100);
2067: #else
1.9 httpng 2068: buffer = malloc(buf.st_size + 100);
1.20 daniel 2069: #endif
1.9 httpng 2070: if (buffer == NULL) {
2071: perror("malloc");
2072: return(NULL);
2073: }
2074:
2075: memset(buffer, 0, sizeof(buffer));
1.20 daniel 2076: #ifdef HAVE_ZLIB_H
2077: input = gzopen (filename, "r");
2078: if (input == NULL) {
2079: fprintf (stderr, "Cannot read file %s :\n", filename);
2080: perror ("gzopen failed");
2081: return(NULL);
2082: }
2083: #else
1.9 httpng 2084: input = open (filename, O_RDONLY);
2085: if (input < 0) {
2086: fprintf (stderr, "Cannot read file %s :\n", filename);
2087: perror ("open failed");
2088: return(NULL);
2089: }
1.20 daniel 2090: #endif
2091: #ifdef HAVE_ZLIB_H
2092: res = gzread(input, buffer, 20 * buf.st_size);
2093: #else
1.9 httpng 2094: res = read(input, buffer, buf.st_size);
1.20 daniel 2095: #endif
1.9 httpng 2096: if (res < 0) {
2097: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 2098: #ifdef HAVE_ZLIB_H
2099: perror ("gzread failed");
2100: #else
1.9 httpng 2101: perror ("read failed");
1.20 daniel 2102: #endif
1.9 httpng 2103: return(NULL);
2104: }
1.20 daniel 2105: #ifdef HAVE_ZLIB_H
2106: gzclose(input);
2107: if (res >= 20 * buf.st_size) {
2108: free(buffer);
2109: buf.st_size *= 2;
2110: goto retry_bigger;
2111: }
2112: buf.st_size = res;
2113: #else
1.9 httpng 2114: close(input);
1.20 daniel 2115: #endif
2116:
1.9 httpng 2117:
1.16 daniel 2118: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2119: if (ctxt == NULL) {
2120: perror("malloc");
2121: return(NULL);
2122: }
1.9 httpng 2123: buffer[buf.st_size] = '\0';
1.16 daniel 2124:
1.19 daniel 2125: xmlInitParserCtxt(ctxt);
1.17 daniel 2126: ctxt->filename = filename;
1.16 daniel 2127: ctxt->base = buffer;
2128: ctxt->cur = buffer;
2129:
2130: xmlParseDocument(ctxt);
2131: ret = ctxt->doc;
1.9 httpng 2132: free(buffer);
1.20 daniel 2133: free(ctxt->nodes);
2134: free(ctxt);
2135:
2136: return(ret);
2137: }
2138:
2139: /*
2140: * xmlParseFile : parse an XML memory block and build a tree.
2141: */
2142:
2143: xmlDocPtr xmlParseMemory(char *buffer, int size) {
2144: xmlDocPtr ret;
2145: xmlParserCtxtPtr ctxt;
2146:
2147: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2148: if (ctxt == NULL) {
2149: perror("malloc");
2150: return(NULL);
2151: }
2152:
2153: buffer[size - 1] = '\0';
2154:
2155: xmlInitParserCtxt(ctxt);
2156: ctxt->base = buffer;
2157: ctxt->cur = buffer;
2158:
2159: xmlParseDocument(ctxt);
2160: ret = ctxt->doc;
2161: free(ctxt->nodes);
1.16 daniel 2162: free(ctxt);
2163:
1.9 httpng 2164: return(ret);
1.17 daniel 2165: }
2166:
2167:
2168:
2169:
2170: /* Initialize parser context */
2171: void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2172: {
1.19 daniel 2173: int i;
2174:
2175: ctxt->filename = NULL;
2176: ctxt->base = NULL;
2177: ctxt->cur = NULL;
2178: ctxt->line = 1;
2179: ctxt->col = 1;
2180: ctxt->doc = NULL;
2181: ctxt->depth = 0;
2182: ctxt->max_depth = 10;
2183: ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr));
2184: if (ctxt->nodes == NULL) {
2185: fprintf(stderr, "malloc of %d byte failed\n",
2186: ctxt->max_depth * sizeof(xmlNodePtr));
2187: ctxt->max_depth = 0;
2188: } else {
2189: for (i = 0;i < ctxt->max_depth;i++)
2190: ctxt->nodes[i] = NULL;
2191: }
1.17 daniel 2192: }
2193:
2194:
1.19 daniel 2195: /*
2196: * Clear (release owned resources) and reinitialize context
2197: */
1.17 daniel 2198: void xmlClearParserCtxt(xmlParserCtxtPtr ctx)
2199: {
1.19 daniel 2200: xmlInitParserCtxt(ctx);
1.17 daniel 2201: }
2202:
2203:
1.19 daniel 2204: /*
2205: * Setup the parser context to parse a new buffer; Clears any prior
2206: * contents from the parser context. The buffer parameter must not be
2207: * NULL, but the filename parameter can be
2208: */
1.17 daniel 2209: void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
2210: const char* filename)
2211: {
2212: xmlClearParserCtxt(ctxt);
2213: ctxt->base = buffer;
2214: ctxt->cur = buffer;
2215: ctxt->filename = filename;
2216: }
2217:
2218:
2219:
2220: void xmlReportError(xmlParserCtxtPtr ctx, const CHAR* msg)
2221: {
2222: fputs(msg, stderr);
1.9 httpng 2223: }
Webmaster