Annotation of XML/parser.c, revision 1.24
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.24 ! daniel 6: * $Id: parser.c,v 1.23 1998/07/30 18:10:05 daniel Exp $
1.1 veillard 7: */
8:
1.9 httpng 9: #include <config.h>
1.1 veillard 10: #include <stdio.h>
11: #include <ctype.h>
1.14 veillard 12: #include <string.h> /* for memset() only */
1.1 veillard 13: #include <malloc.h>
1.9 httpng 14: #include <sys/stat.h>
15: #ifdef HAVE_FCNTL_H
16: #include <fcntl.h>
17: #endif
1.10 httpng 18: #ifdef HAVE_UNISTD_H
19: #include <unistd.h>
20: #endif
1.20 daniel 21: #ifdef HAVE_ZLIB_H
22: #include <zlib.h>
23: #endif
1.1 veillard 24:
1.14 veillard 25: #include "tree.h"
1.1 veillard 26: #include "parser.h"
1.14 veillard 27: #include "entities.h"
1.1 veillard 28:
29: /*
30: * A few macros needed to help building the parser.
31: */
32:
33: #ifdef UNICODE
34: /*
1.14 veillard 35: * UNICODE version of the macros. Incomplete now TODO !!!!
1.22 daniel 36: *
37: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
38: * | [#x10000-#x10FFFF]
39: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 40: */
41: #define IS_CHAR(c) \
42: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
43: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
44:
1.22 daniel 45: /*
46: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
47: */
1.1 veillard 48: #define SKIP_BLANKS(p) \
49: while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \
1.22 daniel 50: (*(p) == 0xd) || (*(p) == 0x3000)) (p)++;
1.1 veillard 51:
1.22 daniel 52: /*
53: * I'm too lazy to complete this one TODO !!!!
54: *
55: * [85] BaseChar ::= ... long list see REC ...
56: */
1.1 veillard 57: #define IS_BASECHAR(c) \
58: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
59: (((c) >= 0x61) && ((c) <= 0x7a)) || \
60: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
61: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
62: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
63: (((c) >= 0xf8) && ((c) <= 0xff)) || \
64: ((c) == 0xba))
65:
1.22 daniel 66: /*
67: * I'm too lazy to complete this one TODO !!!!
68: *
69: * [88] Digit ::= ... long list see REC ...
70: */
1.1 veillard 71: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
72:
1.22 daniel 73: /*
74: * I'm too lazy to complete this one TODO !!!!
75: *
76: * [87] CombiningChar ::= ... long list see REC ...
77: */
1.1 veillard 78: #define IS_COMBINING(c) 0
79:
1.22 daniel 80: /*
81: * Was in old WD ... removed from REC
82: *
1.3 veillard 83: #define IS_IGNORABLE(c) \
84: ((((c) >= 0x200c) && ((c) <= 0x200f)) || \
85: (((c) >= 0x202a) && ((c) <= 0x202e)) || \
86: (((c) >= 0x206a) && ((c) <= 0x206f)) || \
87: ((c) == 0xfeff))
1.22 daniel 88: */
1.3 veillard 89:
1.22 daniel 90: /*
91: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
92: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
93: * [#x309D-#x309E] | [#x30FC-#x30FE]
94: */
1.3 veillard 95: #define IS_EXTENDER(c) \
96: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
97: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
98: ((c) == 0xec6) || ((c) == 0x3005) \
99: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
100: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 101: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 102:
1.22 daniel 103: /*
104: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
105: */
1.1 veillard 106: #define IS_IDEOGRAPHIC(c) \
107: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
108: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
109: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
110: ((c) == 0x3007))
111:
1.22 daniel 112: /*
113: * [84] Letter ::= BaseChar | Ideographic
114: */
1.1 veillard 115: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
116:
117: #else
118: /*
1.3 veillard 119: * 8bits / ASCII version of the macros.
1.22 daniel 120: *
121: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
122: * | [#x10000-#x10FFFF]
123: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 124: */
125: #define IS_CHAR(c) \
1.21 daniel 126: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
127: ((c) == 0xa))
1.1 veillard 128:
1.22 daniel 129: /*
130: * [85] BaseChar ::= ... long list see REC ...
131: */
1.1 veillard 132: #define IS_BASECHAR(c) \
133: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
134: (((c) >= 0x61) && ((c) <= 0x7a)) || \
135: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
136: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
137: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
138: (((c) >= 0xf8) && ((c) <= 0xff)) || \
139: ((c) == 0xba))
140:
1.22 daniel 141: /*
142: * [88] Digit ::= ... long list see REC ...
143: */
1.1 veillard 144: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
145:
1.22 daniel 146: /*
147: * [84] Letter ::= BaseChar | Ideographic
148: */
1.1 veillard 149: #define IS_LETTER(c) IS_BASECHAR(c)
150:
1.22 daniel 151:
152: /*
153: * [87] CombiningChar ::= ... long list see REC ...
154: */
1.1 veillard 155: #define IS_COMBINING(c) 0
156:
1.22 daniel 157: /*
158: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
159: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
160: * [#x309D-#x309E] | [#x30FC-#x30FE]
161: */
1.3 veillard 162: #define IS_EXTENDER(c) ((c) == 0xb7)
163:
1.21 daniel 164: #endif /* !UNICODE */
1.1 veillard 165:
1.22 daniel 166: /*
167: * Blank chars.
168: *
169: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
170: */
171: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
172: ((c) == 0x0D))
173:
174: /*
175: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
176: */
1.21 daniel 177: #define IS_PUBIDCHAR(c) \
178: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
179: (((c) >= 'a') && ((c) <= 'z')) || \
180: (((c) >= 'A') && ((c) <= 'Z')) || \
181: (((c) >= '0') && ((c) <= '9')) || \
182: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
183: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
184: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
185: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
186: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 187:
188: #define SKIP_EOL(p) \
189: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
190: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
191:
192: #define SKIP_BLANKS(p) \
193: while (IS_BLANK(*(p))) (p)++;
194:
195: #define MOVETO_ENDTAG(p) \
196: while (IS_CHAR(*p) && (*(p) != '>')) (p)++;
197:
198: #define MOVETO_STARTTAG(p) \
199: while (IS_CHAR(*p) && (*(p) != '<')) (p)++;
200:
201: /*
1.3 veillard 202: * Forward definition for recusive behaviour.
203: */
1.16 daniel 204: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.3 veillard 205:
206: /*
207: * xmlHandleData : this routine represent's the specific application
208: * behaviour when reading a piece of text.
209: *
210: * For example in WebDav, any piece made only of blanks is eliminated
211: */
212:
213: CHAR *xmlHandleData(CHAR *in) {
214: CHAR *cur;
215:
216: if (in == NULL) return(NULL);
217: cur = in;
218: while (IS_CHAR(*cur)) {
219: if (!IS_BLANK(*cur)) goto not_blank;
220: cur++;
221: }
222: free(in);
223: return(NULL);
224:
225: not_blank:
226: return(in);
227: }
228:
229: /*
1.1 veillard 230: * xmlStrndup : a strdup for array of CHAR's
231: */
232:
1.6 httpng 233: CHAR *xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 234: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
235:
236: if (ret == NULL) {
237: fprintf(stderr, "malloc of %d byte failed\n",
238: (len + 1) * sizeof(CHAR));
239: return(NULL);
240: }
241: memcpy(ret, cur, len * sizeof(CHAR));
242: ret[len] = 0;
243: return(ret);
244: }
245:
246: /*
247: * xmlStrdup : a strdup for CHAR's
248: */
249:
1.6 httpng 250: CHAR *xmlStrdup(const CHAR *cur) {
251: const CHAR *p = cur;
1.1 veillard 252:
253: while (IS_CHAR(*p)) p++;
254: return(xmlStrndup(cur, p - cur));
255: }
256:
257: /*
1.14 veillard 258: * xmlStrcmp : a strcmp for CHAR's
259: */
260:
261: int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
262: register int tmp;
263:
264: do {
265: tmp = *str1++ - *str2++;
266: if (tmp != 0) return(tmp);
267: } while ((*str1 != 0) && (*str2 != 0));
268: return (*str1 - *str2);
269: }
270:
271: /*
272: * xmlStrncmp : a strncmp for CHAR's
273: */
274:
275: int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
276: register int tmp;
277:
278: if (len <= 0) return(0);
279: do {
280: tmp = *str1++ - *str2++;
281: if (tmp != 0) return(tmp);
282: len--;
283: if (len <= 0) return(0);
284: } while ((*str1 != 0) && (*str2 != 0));
285: return (*str1 - *str2);
286: }
287:
288: /*
289: * xmlStrchr : a strchr for CHAR's
290: */
291:
292: CHAR *xmlStrchr(const CHAR *str, CHAR val) {
293: while (*str != 0) {
294: if (*str == val) return((CHAR *) str);
295: str++;
296: }
297: return(NULL);
298: }
299:
300: /*
1.1 veillard 301: * xmlParseName : parse an XML name.
1.22 daniel 302: *
303: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
304: * CombiningChar | Extender
305: *
306: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
307: *
308: * [6] Names ::= Name (S Name)*
1.1 veillard 309: */
310:
1.16 daniel 311: CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 312: const CHAR *q;
313: CHAR *ret = NULL;
1.1 veillard 314:
1.22 daniel 315: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_') &&
316: (ctxt->cur[0] != ':')) return(NULL);
317: q = ctxt->cur++;
318:
319: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
320: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
321: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
322: (IS_COMBINING(ctxt->cur[0])) ||
323: (IS_EXTENDER(ctxt->cur[0])))
324: ctxt->cur++;
325:
326: ret = xmlStrndup(q, ctxt->cur - q);
327:
328: return(ret);
329: }
330:
331: /*
332: * xmlParseNmtoken : parse an XML Nmtoken.
333: *
334: * [7] Nmtoken ::= (NameChar)+
335: *
336: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
337: */
338:
339: CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
340: const CHAR *q;
341: CHAR *ret = NULL;
342:
1.16 daniel 343: q = ctxt->cur++;
1.22 daniel 344:
1.16 daniel 345: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1.22 daniel 346: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
347: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
348: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 349: (IS_EXTENDER(ctxt->cur[0])))
350: ctxt->cur++;
1.3 veillard 351:
1.16 daniel 352: ret = xmlStrndup(q, ctxt->cur - q);
1.1 veillard 353:
1.3 veillard 354: return(ret);
1.1 veillard 355: }
356:
357: /*
1.24 ! daniel 358: * xmlParseEntityValue : parse a value for ENTITY decl.
! 359: *
! 360: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
! 361: * "'" ([^%&'] | PEReference | Reference)* "'"
! 362: */
! 363:
! 364: CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
! 365: CHAR *ret = NULL;
! 366: const CHAR *q;
! 367: int needSubst;
! 368:
! 369: if (ctxt->cur[0] == '"') {
! 370: ctxt->cur++;
! 371:
! 372: q = ctxt->cur;
! 373: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"')) {
! 374: if (ctxt->cur[0] == '%') {
! 375: needSubst = 1; /* TODO !!! */
! 376: ctxt->cur++;
! 377: } else if (ctxt->cur[0] == '%') {
! 378: needSubst = 1; /* TODO !!! */
! 379: ctxt->cur++;
! 380: } else
! 381: ctxt->cur++;
! 382: }
! 383: if (!IS_CHAR(ctxt->cur[0])) {
! 384: fprintf(stderr, "Unfinished EntityValue %30s\n", q);
! 385: } else {
! 386: ret = xmlStrndup(q, ctxt->cur - q);
! 387: ctxt->cur++;
! 388: }
! 389: } else if (ctxt->cur[0] == '\'') {
! 390: ctxt->cur++;
! 391: q = ctxt->cur;
! 392: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\'')) {
! 393: if (ctxt->cur[0] == '%') {
! 394: needSubst = 1; /* TODO !!! */
! 395: ctxt->cur++;
! 396: } else if (ctxt->cur[0] == '%') {
! 397: needSubst = 1; /* TODO !!! */
! 398: ctxt->cur++;
! 399: } else
! 400: ctxt->cur++;
! 401: }
! 402: if (!IS_CHAR(ctxt->cur[0])) {
! 403: fprintf(stderr, "Unfinished EntityValue %30s\n", q);
! 404: } else {
! 405: ret = xmlStrndup(q, ctxt->cur - q);
! 406: ctxt->cur++;
! 407: }
! 408: } else {
! 409: fprintf(stderr, "xmlParseEntityValue \" or ' expected: %30s\n",
! 410: ctxt->cur);
! 411: }
! 412:
! 413: return(ret);
! 414: }
! 415:
! 416: /*
1.21 daniel 417: * xmlParseSystemLiteral : parse an XML Literal
418: *
1.22 daniel 419: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.21 daniel 420: */
421:
422: CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
423: const CHAR *q;
424: CHAR *ret = NULL;
425:
426: if (ctxt->cur[0] == '"') {
427: ctxt->cur++;
428: q = ctxt->cur;
1.22 daniel 429: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"'))
1.21 daniel 430: ctxt->cur++;
1.22 daniel 431: if (!IS_CHAR(ctxt->cur[0])) {
1.21 daniel 432: fprintf(stderr, "Unfinished SystemLiteral %30s\n", q);
433: } else {
434: ret = xmlStrndup(q, ctxt->cur - q);
435: ctxt->cur++;
436: }
437: } else if (ctxt->cur[0] == '\'') {
438: ctxt->cur++;
439: q = ctxt->cur;
1.22 daniel 440: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
1.21 daniel 441: ctxt->cur++;
1.22 daniel 442: if (!IS_CHAR(ctxt->cur[0])) {
1.21 daniel 443: fprintf(stderr, "Unfinished SystemLiteral %30s\n", q);
444: } else {
445: ret = xmlStrndup(q, ctxt->cur - q);
446: ctxt->cur++;
447: }
448: } else {
449: fprintf(stderr, "SystemLiteral \" or ' expected: %30s\n", ctxt->cur);
450: }
451:
452: return(ret);
453: }
454:
455: /*
456: * xmlParse PubidLiteral: parse an XML public literal
457: *
1.22 daniel 458: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1.21 daniel 459: */
460:
461: CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
462: const CHAR *q;
463: CHAR *ret = NULL;
464: /*
465: * Name ::= (Letter | '_') (NameChar)*
466: */
467: if (ctxt->cur[0] == '"') {
468: ctxt->cur++;
469: q = ctxt->cur;
470: while (IS_PUBIDCHAR(ctxt->cur[0])) ctxt->cur++;
471: if (ctxt->cur[0] != '"') {
472: fprintf(stderr, "Unfinished PubidLiteral %30s\n", q);
473: } else {
474: ret = xmlStrndup(q, ctxt->cur - q);
475: ctxt->cur++;
476: }
477: } else if (ctxt->cur[0] == '\'') {
478: ctxt->cur++;
479: q = ctxt->cur;
480: while ((IS_LETTER(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
481: ctxt->cur++;
482: if (!IS_LETTER(ctxt->cur[0])) {
483: fprintf(stderr, "Unfinished SystemLiteral %30s\n", q);
484: } else {
485: ret = xmlStrndup(q, ctxt->cur - q);
486: ctxt->cur++;
487: }
488: } else {
489: fprintf(stderr, "SystemLiteral \" or ' expected: %30s\n", ctxt->cur);
490: }
491:
492: return(ret);
493: }
494:
495: /*
1.22 daniel 496: * xmlParseExternalID: Parse an External ID
497: *
498: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
499: * | 'PUBLIC' S PubidLiteral S SystemLiteral
500: */
501:
502: CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **PubidLiteral) {
503: CHAR *ExternalID = NULL;
504:
505: *PubidLiteral = NULL;
506: if ((ctxt->cur[0] == 'S') && (ctxt->cur[1] == 'Y') &&
507: (ctxt->cur[2] == 'S') && (ctxt->cur[3] == 'T') &&
508: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M')) {
509: ctxt->cur += 6;
510: SKIP_BLANKS(ctxt->cur);
511: ExternalID = xmlParseSystemLiteral(ctxt);
512: if (ExternalID == NULL)
513: fprintf(stderr, "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
514: } else if ((ctxt->cur[0] == 'P') && (ctxt->cur[1] == 'U') &&
515: (ctxt->cur[2] == 'B') && (ctxt->cur[3] == 'L') &&
516: (ctxt->cur[4] == 'I') && (ctxt->cur[5] == 'C')) {
517: ctxt->cur += 6;
518: SKIP_BLANKS(ctxt->cur);
519: *PubidLiteral = xmlParsePubidLiteral(ctxt);
520: if (*PubidLiteral == NULL)
521: fprintf(stderr, "xmlParseExternalID: PUBLIC, no PubidLiteral\n");
522: SKIP_BLANKS(ctxt->cur);
523: ExternalID = xmlParseSystemLiteral(ctxt);
524: if (ExternalID == NULL)
525: fprintf(stderr, "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
526: }
527: return(ExternalID);
528: }
529:
530: /*
1.1 veillard 531: * Parse and return a string between quotes or doublequotes
532: */
1.16 daniel 533: CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.1 veillard 534: CHAR *ret = NULL;
1.17 daniel 535: const CHAR *q;
1.1 veillard 536:
1.16 daniel 537: if (ctxt->cur[0] == '"') {
538: ctxt->cur++;
539: q = ctxt->cur;
540: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++;
541: if (ctxt->cur[0] != '"')
1.7 veillard 542: fprintf(stderr, "String not closed \"%.50s\n", q);
1.1 veillard 543: else {
1.16 daniel 544: ret = xmlStrndup(q, ctxt->cur - q);
545: ctxt->cur++;
1.1 veillard 546: }
1.16 daniel 547: } else if (ctxt->cur[0] == '\''){
548: ctxt->cur++;
549: q = ctxt->cur;
550: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++;
551: if (ctxt->cur[0] != '\'')
1.7 veillard 552: fprintf(stderr, "String not closed '%.50s\n", q);
1.1 veillard 553: else {
1.16 daniel 554: ret = xmlStrndup(q, ctxt->cur - q);
555: ctxt->cur++;
1.1 veillard 556: }
557: }
558: return(ret);
559: }
560:
561: /*
1.3 veillard 562: * Skip an XML (SGML) comment <!-- .... -->
1.16 daniel 563: *
564: * TODO !!!! Save the comment in the tree !!!
1.22 daniel 565: *
566: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 567: */
1.16 daniel 568: void xmlParserSkipComment(xmlParserCtxtPtr ctxt) {
1.17 daniel 569: const CHAR *q, *start;
570: const CHAR *r;
1.3 veillard 571:
572: /*
1.22 daniel 573: * Check that there is a comment right here.
1.3 veillard 574: */
1.16 daniel 575: if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '!') ||
576: (ctxt->cur[2] != '-') || (ctxt->cur[3] != '-')) return;
1.3 veillard 577:
1.16 daniel 578: ctxt->cur += 4;
579: start = q = ctxt->cur;
580: ctxt->cur++;
581: r = ctxt->cur;
582: ctxt->cur++;
583: while (IS_CHAR(ctxt->cur[0]) &&
584: ((ctxt->cur[0] == ':') || (ctxt->cur[0] != '>') ||
585: (*r != '-') || (*q != '-'))) {
586: ctxt->cur++;r++;q++;
1.3 veillard 587: }
1.16 daniel 588: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 589: fprintf(stderr, "Comment not terminated <!--%.50s\n", start);
1.16 daniel 590: ctxt->cur = start; /* !!! We shouldn't really try to recover !!! */
1.3 veillard 591: } else {
1.16 daniel 592: ctxt->cur++;
1.3 veillard 593: }
594: }
595:
596: /*
1.13 veillard 597: * xmlParseNamespace: parse specific '<?namespace ...' constructs.
1.22 daniel 598: *
599: * TODO !!! Check the upcoming REC ...
1.1 veillard 600: */
601:
1.16 daniel 602: void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.1 veillard 603: CHAR *href = NULL;
604: CHAR *AS = NULL;
1.3 veillard 605: int garbage = 0;
1.1 veillard 606:
607: /*
1.18 daniel 608: * We just skipped "namespace" or "xml:namespace"
1.1 veillard 609: */
1.16 daniel 610: SKIP_BLANKS(ctxt->cur);
1.1 veillard 611:
1.16 daniel 612: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '>')) {
1.1 veillard 613: /*
1.18 daniel 614: * We can have "ns" or "prefix" attributes
615: * Old encoding as 'href' or 'AS' attributes is still supported
1.1 veillard 616: */
1.18 daniel 617: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 's')) {
618: garbage = 0;
619: ctxt->cur += 2;
620: SKIP_BLANKS(ctxt->cur);
621:
622: if (ctxt->cur[0] != '=') continue;
623: ctxt->cur++;
624: SKIP_BLANKS(ctxt->cur);
625:
626: href = xmlParseQuotedString(ctxt);
627: SKIP_BLANKS(ctxt->cur);
628: } else if ((ctxt->cur[0] == 'h') && (ctxt->cur[1] == 'r') &&
1.16 daniel 629: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f')) {
1.3 veillard 630: garbage = 0;
1.16 daniel 631: ctxt->cur += 4;
632: SKIP_BLANKS(ctxt->cur);
1.1 veillard 633:
1.16 daniel 634: if (ctxt->cur[0] != '=') continue;
635: ctxt->cur++;
636: SKIP_BLANKS(ctxt->cur);
637:
638: href = xmlParseQuotedString(ctxt);
639: SKIP_BLANKS(ctxt->cur);
1.18 daniel 640: } else if ((ctxt->cur[0] == 'p') && (ctxt->cur[1] == 'r') &&
641: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f') &&
642: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'x')) {
643: garbage = 0;
644: ctxt->cur += 6;
645: SKIP_BLANKS(ctxt->cur);
646:
647: if (ctxt->cur[0] != '=') continue;
648: ctxt->cur++;
649: SKIP_BLANKS(ctxt->cur);
650:
651: AS = xmlParseQuotedString(ctxt);
652: SKIP_BLANKS(ctxt->cur);
1.16 daniel 653: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'S')) {
1.3 veillard 654: garbage = 0;
1.16 daniel 655: ctxt->cur += 2;
656: SKIP_BLANKS(ctxt->cur);
1.1 veillard 657:
1.16 daniel 658: if (ctxt->cur[0] != '=') continue;
659: ctxt->cur++;
660: SKIP_BLANKS(ctxt->cur);
661:
662: AS = xmlParseQuotedString(ctxt);
663: SKIP_BLANKS(ctxt->cur);
664: } else if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
1.3 veillard 665: garbage = 0;
1.16 daniel 666: ctxt->cur ++;
1.1 veillard 667: } else {
1.3 veillard 668: /*
669: * Found garbage when parsing the namespace
670: */
671: if (!garbage) fprintf(stderr,
1.13 veillard 672: "\nxmlParseNamespace found garbage: ");
1.16 daniel 673: fprintf(stderr, "%c", ctxt->cur[0]);
674: ctxt->cur++;
1.1 veillard 675: }
676: }
677:
1.16 daniel 678: MOVETO_ENDTAG(ctxt->cur);
679: ctxt->cur++;
1.1 veillard 680:
681: /*
682: * Register the DTD.
683: */
684: if (href != NULL)
1.16 daniel 685: xmlNewDtd(ctxt->doc, href, AS);
1.1 veillard 686:
1.8 veillard 687: if (AS != NULL) free(AS);
688: if (href != NULL) free(href);
1.1 veillard 689: }
690:
691: /*
1.22 daniel 692: * xmlParsePITarget: parse the name of a PI
693: *
694: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
695: */
696:
697: CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
698: CHAR *name;
699:
700: name = xmlParseName(ctxt);
701: if ((name != NULL) && (name[3] == 0) &&
702: ((name[0] == 'x') || (name[0] == 'X')) &&
703: ((name[0] == 'm') || (name[0] == 'M')) &&
704: ((name[0] == 'l') || (name[0] == 'L'))) {
705: fprintf(stderr, "xmlParsePItarget: invalid name 'xml'\n");
706: return(NULL);
707: }
708: return(name);
709: }
710:
711: /*
1.3 veillard 712: * xmlParsePI: parse an XML Processing Instruction.
1.22 daniel 713: *
714: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.3 veillard 715: */
716:
1.16 daniel 717: void xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 718: CHAR *target;
719:
1.16 daniel 720: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
1.3 veillard 721: /*
722: * this is a Processing Instruction.
723: */
1.16 daniel 724: ctxt->cur += 2;
1.3 veillard 725:
726: /*
1.22 daniel 727: * Parse the target name and check for special support like
728: * namespace.
729: *
730: * TODO : PI handling should be dynamically redefinable using an
731: * API. Only namespace should be in the code IMHO ...
1.3 veillard 732: */
1.22 daniel 733: target = xmlParsePITarget(ctxt);
734: if (target != NULL) {
735: /*
736: * Support for the Processing Instruction related to namespace.
737: */
738: if ((target[0] == 'n') && (target[1] == 'a') &&
739: (target[2] == 'm') && (target[3] == 'e') &&
740: (target[4] == 's') && (target[5] == 'p') &&
741: (target[6] == 'a') && (target[7] == 'c') &&
742: (target[8] == 'e')) {
743: xmlParseNamespace(ctxt);
744: } else if ((target[0] == 'x') && (target[1] == 'm') &&
745: (target[2] == 'l') && (target[3] == ':') &&
746: (target[4] == 'n') && (target[5] == 'a') &&
747: (target[6] == 'm') && (target[7] == 'e') &&
748: (target[8] == 's') && (target[9] == 'p') &&
749: (target[10] == 'a') && (target[11] == 'c') &&
750: (target[12] == 'e')) {
751: xmlParseNamespace(ctxt);
752: } else {
753: /* Unknown PI, ignore it ! */
754: fprintf(stderr, "xmlParsePI : skipping unknown PI %s\n",
755: target);
756: while (IS_CHAR(ctxt->cur[0]) &&
1.24 ! daniel 757: ((ctxt->cur[0] != '?') || (ctxt->cur[1] != '>')))
1.22 daniel 758: ctxt->cur++;
759: if (!IS_CHAR(ctxt->cur[0])) {
760: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
761: target);
1.24 ! daniel 762: } else
! 763: ctxt->cur += 2;
1.22 daniel 764: }
1.3 veillard 765: } else {
1.22 daniel 766: fprintf(stderr, "xmlParsePI : no target name...\n");
767: /********* Should we try to complete parsing the PI ???
768: while (IS_CHAR(ctxt->cur[0]) &&
769: (ctxt->cur[0] != '?') && (ctxt->cur[0] != '>'))
770: ctxt->cur++;
771: if (!IS_CHAR(ctxt->cur[0])) {
772: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
773: target);
774: }
775: ********************************************************/
776: }
777: }
778: }
779:
780: /*
781: * xmlParseNotationDecl: parse a notation declaration
782: *
783: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
784: *
785: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
786: *
787: * NOTE: Actually [75] and [83] interract badly since [75] can generate
788: * 'PUBLIC' S PubidLiteral S SystemLiteral
789: *
790: * Hence there is actually 3 choices:
791: * 'PUBLIC' S PubidLiteral
792: * 'PUBLIC' S PubidLiteral S SystemLiteral
793: * and 'SYSTEM' S SystemLiteral
794: */
795:
796: void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
797: CHAR *name;
798:
799: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
800: (ctxt->cur[2] == 'N') && (ctxt->cur[3] == 'O') &&
801: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'A') &&
802: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'I') &&
803: (ctxt->cur[8] == 'O') && (ctxt->cur[9] == 'N') &&
804: (IS_BLANK(ctxt->cur[10]))) {
805: ctxt->cur += 10;
806: SKIP_BLANKS(ctxt->cur);
807:
808: name = xmlParseName(ctxt);
809: if (name == NULL) {
810: fprintf(stderr,
811: "xmlParseAttributeListDecl: no name for Element %30s\n",
812: ctxt->cur - 10);
813: return;
814: }
815: SKIP_BLANKS(ctxt->cur);
816: /*
817: * TODO !!!!!!
818: */
819: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
820: ctxt->cur++;
821: free(name);
822: }
823: }
824:
825: /*
826: * xmlParseEntityDecl: parse <!ENTITY declarations
827: *
828: * [70] EntityDecl ::= GEDecl | PEDecl
829: *
830: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
831: *
832: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
833: *
834: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
835: *
836: * [74] PEDef ::= EntityValue | ExternalID
1.24 ! daniel 837: *
! 838: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 839: */
840:
841: void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
842: CHAR *name;
1.24 ! daniel 843: CHAR *value = NULL;
! 844: CHAR *id = NULL, *literal = NULL;
! 845: CHAR *ndata = NULL;
1.22 daniel 846: int typePEDef = 0;
847:
848: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
849: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'N') &&
850: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
851: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'Y') &&
852: (IS_BLANK(ctxt->cur[8]))) {
853: ctxt->cur += 8;
854: SKIP_BLANKS(ctxt->cur);
855:
856: if (ctxt->cur[0] == '%') {
1.16 daniel 857: ctxt->cur++;
1.22 daniel 858: SKIP_BLANKS(ctxt->cur);
859: typePEDef = 1;
860: }
861:
862: name = xmlParseName(ctxt);
1.24 ! daniel 863: if (name == NULL) {
! 864: fprintf(stderr, "xmlParseEntityDecl: no name %30s\n",
! 865: ctxt->cur - 10);
! 866: return;
! 867: }
! 868: SKIP_BLANKS(ctxt->cur);
! 869:
1.22 daniel 870: /*
1.24 ! daniel 871: * TODO handle the various case of definitions...
1.22 daniel 872: */
1.24 ! daniel 873: if (typePEDef) {
! 874: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
! 875: value = xmlParseEntityValue(ctxt);
! 876: else {
! 877: id = xmlParseExternalID(ctxt, &literal);
! 878: }
! 879: } else {
! 880: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
! 881: value = xmlParseEntityValue(ctxt);
! 882: else {
! 883: id = xmlParseExternalID(ctxt, &literal);
! 884: SKIP_BLANKS(ctxt->cur);
! 885: if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'D') &&
! 886: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
! 887: (ctxt->cur[4] == 'A')) {
! 888: ndata = xmlParseName(ctxt);
! 889: }
! 890: }
! 891: }
! 892: SKIP_BLANKS(ctxt->cur);
! 893: if (ctxt->cur[0] != '>') {
! 894: fprintf(stderr,
! 895: "xmlParseEntityDecl: entity %s not terminated %30s\n",
! 896: ctxt->cur - 10);
! 897: } else
1.22 daniel 898: ctxt->cur++;
899: }
900: }
901:
902: /*
903: * xmlParseEnumeratedType: parse and Enumerated attribute type.
904: *
905: * [57] EnumeratedType ::= NotationType | Enumeration
906: *
907: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
908: *
909: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
910: */
911:
912: void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
913: /*
914: * TODO !!!
915: */
916: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
917: ctxt->cur++;
918: }
919:
920: /*
921: * xmlParseAttributeType: parse the Attribute list def for an element
922: *
923: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
924: *
925: * [55] StringType ::= 'CDATA'
926: *
927: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
928: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
929: */
930: void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
931: if ((ctxt->cur[0] == 'C') && (ctxt->cur[1] == 'D') &&
932: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
933: (ctxt->cur[4] == 'A')) {
934: ctxt->cur += 5;
935: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D')) {
936: ctxt->cur += 2;
937: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
938: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
939: (ctxt->cur[4] == 'F')) {
940: ctxt->cur += 5;
941: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
942: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
943: (ctxt->cur[4] == 'F') && (ctxt->cur[5] == 'S')) {
944: ctxt->cur += 6;
945: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
946: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
947: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'Y')) {
948: ctxt->cur += 6;
949: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
950: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
951: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
952: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'S')) {
953: ctxt->cur += 8;
954: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
955: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
956: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
957: (ctxt->cur[6] == 'N')) {
958: ctxt->cur += 7;
959: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
960: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
961: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
962: (ctxt->cur[6] == 'N') && (ctxt->cur[7] == 'S')) {
963: } else {
964: xmlParseEnumeratedType(ctxt, name);
965: }
966: }
967:
968: /*
969: * xmlParseAttributeListDecl: parse the Attribute list def for an element
970: *
971: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
972: *
973: * [53] AttDef ::= S Name S AttType S DefaultDecl
974: */
975: void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
976: CHAR *name;
977:
978: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
979: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
980: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'L') &&
981: (ctxt->cur[6] == 'I') && (ctxt->cur[7] == 'S') &&
982: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
983: ctxt->cur += 9;
984: SKIP_BLANKS(ctxt->cur);
985: name = xmlParseName(ctxt);
986: if (name == NULL) {
987: fprintf(stderr,
988: "xmlParseAttributeListDecl: no name for Element %30s\n",
989: ctxt->cur - 10);
990: return;
991: }
992: SKIP_BLANKS(ctxt->cur);
993: while (ctxt->cur[0] != '>') {
994: const CHAR *check = ctxt->cur;
995:
996: xmlParseAttributeType(ctxt, name);
997: SKIP_BLANKS(ctxt->cur);
998: if (check == ctxt->cur) {
999: fprintf(stderr,
1000: "xmlParseAttributeListDecl: detected error %30s\n",
1001: check - 10);
1002: break;
1003: }
1004: }
1005: if (ctxt->cur[0] == '>')
1006: ctxt->cur++;
1007:
1008: free(name);
1009: }
1010: }
1011:
1012: /*
1013: * xmlParseElementContentDecl: parse the declaration for an Element content
1014: * either Mixed or Children, the cases EMPTY and ANY being handled
1015: * int xmlParseElementDecl.
1016: *
1017: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
1018: *
1019: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1020: *
1021: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
1022: *
1023: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
1024: *
1025: * or
1026: *
1027: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
1028: * '(' S? '#PCDATA' S? ')'
1029: */
1030:
1031: void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1032: /*
1033: * TODO This has to be parsed correctly, currently we just skip until
1034: * we reach the first '>'.
1035: */
1036: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1037: ctxt->cur++;
1038: }
1039:
1040: /*
1041: * xmlParseElementDecl: parse an Element declaration.
1042: *
1043: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1044: *
1045: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1046: *
1047: * TODO There is a check [ VC: Unique Element Type Declaration ]
1048: */
1049: void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1050: CHAR *name;
1051:
1052: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1053: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'L') &&
1054: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M') &&
1055: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'N') &&
1056: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
1057: ctxt->cur += 9;
1058: SKIP_BLANKS(ctxt->cur);
1059: name = xmlParseName(ctxt);
1060: if (name == NULL) {
1061: fprintf(stderr, "xmlParseElementDecl: no name for Element %30s\n",
1062: ctxt->cur - 10);
1063: return;
1064: }
1065: SKIP_BLANKS(ctxt->cur);
1066: if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'M') &&
1067: (ctxt->cur[2] == 'P') && (ctxt->cur[3] == 'T') &&
1068: (ctxt->cur[4] == 'Y')) {
1069: ctxt->cur += 5;
1070: /*
1071: * Element must always be empty.
1072: */
1073: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'N') &&
1074: (ctxt->cur[2] == 'Y')) {
1075: ctxt->cur += 3;
1076: /*
1077: * Element is a generic container.
1078: */
1079: } else {
1080: xmlParseElementContentDecl(ctxt, name);
1081: }
1082: SKIP_BLANKS(ctxt->cur);
1083: if (ctxt->cur[0] != '>') {
1084: fprintf(stderr,
1085: "xmlParseElementDecl: expected '>' at the end %30s\n",
1086: ctxt->cur - 10);
1087: } else
1088: ctxt->cur++;
1089: }
1090: }
1091:
1092: /*
1093: * xmlParseMarkupDecl: parse Markup declarations
1094: *
1095: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
1096: * NotationDecl | PI | Comment
1097: *
1098: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
1099: */
1100: void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1101: xmlParseElementDecl(ctxt);
1102: xmlParseAttributeListDecl(ctxt);
1103: xmlParseEntityDecl(ctxt);
1104: xmlParseNotationDecl(ctxt);
1105: xmlParsePI(ctxt);
1106: xmlParserSkipComment(ctxt);
1107: }
1108:
1109: /*
1.24 ! daniel 1110: * xmlParseCharRef: parse Reference declarations
! 1111: *
! 1112: * [66] CharRef ::= '&#' [0-9]+ ';' |
! 1113: * '&#x' [0-9a-fA-F]+ ';'
! 1114: */
! 1115: CHAR xmlParseCharRef(xmlParserCtxtPtr ctxt) {
! 1116: CHAR ret = 0;
! 1117:
! 1118: if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#') &&
! 1119: (ctxt->cur[2] == 'x')) {
! 1120: ctxt->cur += 3;
! 1121: while (ctxt->cur[0] != ';') {
! 1122: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
! 1123: ret = ret * 16 + (ctxt->cur[0] - '0');
! 1124: else if ((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'f'))
! 1125: ret = ret * 16 + (ctxt->cur[0] - 'a') + 10;
! 1126: else if ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'F'))
! 1127: ret = ret * 16 + (ctxt->cur[0] - 'A') + 10;
! 1128: else {
! 1129: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
! 1130: ctxt->cur - 10);
! 1131: ret = 0;
! 1132: break;
! 1133: }
! 1134: }
! 1135: if (ctxt->cur[0] != ';')
! 1136: ctxt->cur++;
! 1137: /*
! 1138: * TODO: Check the value IS_CHAR ...
! 1139: */
! 1140: } else if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#')) {
! 1141: ctxt->cur += 2;
! 1142: while (ctxt->cur[0] != ';') {
! 1143: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
! 1144: ret = ret * 16 + (ctxt->cur[0] - '0');
! 1145: else {
! 1146: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
! 1147: ctxt->cur - 10);
! 1148: ret = 0;
! 1149: break;
! 1150: }
! 1151: }
! 1152: if (ctxt->cur[0] != ';')
! 1153: ctxt->cur++;
! 1154: /*
! 1155: * TODO: Check the value IS_CHAR ...
! 1156: */
! 1157: } else {
! 1158: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
! 1159: ctxt->cur);
! 1160: }
! 1161: return(ret);
! 1162: }
! 1163:
! 1164: /*
! 1165: * xmlParseEntityRef: parse ENTITY references declarations
! 1166: *
! 1167: * [68] EntityRef ::= '&' Name ';'
! 1168: */
! 1169: CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
! 1170: CHAR *name;
! 1171:
! 1172: if (ctxt->cur[0] == '&') {
! 1173: ctxt->cur++;
! 1174: name = xmlParseName(ctxt);
! 1175: if (name == NULL) {
! 1176: fprintf(stderr, "xmlParsePEReference: no name %30s\n",
! 1177: ctxt->cur - 10);
! 1178: } else {
! 1179: if (ctxt->cur[0] == ';') {
! 1180: ctxt->cur++;
! 1181: /*
! 1182: * TODO there is a VC check here !!!
! 1183: * [ VC: Entity Declared ]
! 1184: */
! 1185: free(name);
! 1186: } else {
! 1187: fprintf(stderr, "xmlParsePEReference: expecting ';' %30s\n",
! 1188: ctxt->cur - 10);
! 1189: }
! 1190: }
! 1191: }
! 1192: }
! 1193:
! 1194: /*
! 1195: * xmlParseReference: parse Reference declarations
! 1196: *
! 1197: * [67] Reference ::= EntityRef | CharRef
! 1198: */
! 1199: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt) {
! 1200: CHAR *name;
! 1201:
! 1202: if (ctxt->cur[0] == '&') {
! 1203: return(xmlParseEntityRef(ctxt));
! 1204: } else {
! 1205: ctxt->cur++;
! 1206: name = xmlParseName(ctxt);
! 1207: if (name == NULL) {
! 1208: fprintf(stderr, "xmlParsePEReference: no name %30s\n",
! 1209: ctxt->cur - 10);
! 1210: } else {
! 1211: if (ctxt->cur[0] == ';') {
! 1212: ctxt->cur++;
! 1213: /*
! 1214: * TODO there is a VC check here !!!
! 1215: * [ VC: Entity Declared ]
! 1216: */
! 1217: free(name);
! 1218: } else {
! 1219: fprintf(stderr, "xmlParsePEReference: expecting ';' %30s\n",
! 1220: ctxt->cur - 10);
! 1221: }
! 1222: }
! 1223: }
! 1224: }
! 1225:
! 1226: /*
1.22 daniel 1227: * xmlParsePEReference: parse PEReference declarations
1228: *
1229: * [69] PEReference ::= '%' Name ';'
1230: */
1.24 ! daniel 1231: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 1232: CHAR *name;
1233:
1234: if (ctxt->cur[0] == '%') {
1235: ctxt->cur++;
1236: name = xmlParseName(ctxt);
1237: if (name == NULL) {
1238: fprintf(stderr, "xmlParsePEReference: no name %30s\n",
1239: ctxt->cur - 10);
1240: } else {
1241: if (ctxt->cur[0] == ';') {
1242: ctxt->cur++;
1243: /*
1244: * TODO there is a VC check here !!!
1245: * [ VC: Entity Declared ]
1246: */
1247: free(name);
1248: } else {
1249: fprintf(stderr, "xmlParsePEReference: expecting ';' %30s\n",
1250: ctxt->cur - 10);
1251: }
1.3 veillard 1252: }
1253: }
1254: }
1255:
1256: /*
1.21 daniel 1257: * xmlParseDocTypeDecl : parse a DOCTYPE declaration
1258: *
1.22 daniel 1259: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
1260: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 1261: */
1262:
1263: void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1264: CHAR *name;
1265: CHAR *ExternalID = NULL;
1.22 daniel 1266: CHAR *SystemID = NULL;
1.21 daniel 1267:
1268: /*
1269: * We know that '<!DOCTYPE' has been detected.
1270: */
1271: ctxt->cur += 9;
1272:
1273: SKIP_BLANKS(ctxt->cur);
1274:
1275: /*
1276: * Parse the DOCTYPE name.
1277: */
1278: name = xmlParseName(ctxt);
1279: if (name == NULL) {
1280: fprintf(stderr, "xmlParseDocTypeDecl : no DOCTYPE name ! : %30s\n",
1281: ctxt->cur - 10);
1282: }
1283:
1284: SKIP_BLANKS(ctxt->cur);
1285:
1286: /*
1.22 daniel 1287: * Check for SystemID and ExternalID
1288: */
1289: SystemID = xmlParseExternalID(ctxt, &ExternalID);
1290: SKIP_BLANKS(ctxt->cur);
1291:
1292: /*
1293: * Is there any DTD definition ?
1294: */
1295: if (ctxt->cur[0] == '[') {
1296: ctxt->cur++;
1297: /*
1298: * Parse the succession of Markup declarations and
1299: * PEReferences.
1300: * Subsequence (markupdecl | PEReference | S)*
1301: */
1302: while (ctxt->cur[0] != ']') {
1303: const CHAR *check = ctxt->cur;
1304:
1305: SKIP_BLANKS(ctxt->cur);
1306: xmlParseMarkupDecl(ctxt);
1307: xmlParsePEReference(ctxt);
1308:
1309: if (ctxt->cur == check) {
1310: fprintf(stderr,
1311: "xmlParseDocTypeDecl: error detected in Markup declaration\n\t%50s\n",
1312: check - 10);
1313: break;
1314: }
1315: }
1316: if (ctxt->cur[0] == ']') ctxt->cur++;
1317: }
1318:
1319: /*
1320: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 1321: */
1.22 daniel 1322: if (ctxt->cur[0] != '>') {
1323: fprintf(stderr, "DOCTYPE unproperly terminated %30s\n",
1324: ctxt->cur - 10);
1325: /* We shouldn't try to resynchronize ... */
1.21 daniel 1326: }
1.22 daniel 1327: ctxt->cur++;
1328:
1329: /*
1330: * Cleanup, since we don't use all those identifiers
1331: * TODO : the DOCTYPE if available should be stored !
1332: */
1333: if (SystemID != NULL) free(SystemID);
1334: if (ExternalID != NULL) free(ExternalID);
1335: if (name != NULL) free(name);
1.21 daniel 1336: }
1337:
1338: /*
1.3 veillard 1339: * xmlParseAttribute: parse a start of tag.
1340: *
1.22 daniel 1341: * [41] Attribute ::= Name Eq AttValue
1342: *
1343: * [25] Eq ::= S? '=' S?
1344: *
1345: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1346: * "'" ([^<&'] | Reference)* "'"
1.3 veillard 1347: */
1348:
1.16 daniel 1349: void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 1350: const CHAR *q;
1351: CHAR *name, *value = NULL;
1.3 veillard 1352:
1.22 daniel 1353: name = xmlParseName(ctxt);
1354: if (name == NULL) {
1355: fprintf(stderr,
1356: "xmlParseAttribute: error parsing attribute name %30s\n",
1.23 daniel 1357: ctxt->cur - 10);
1.3 veillard 1358: }
1.22 daniel 1359: /*
1360: * TODO: Check for Namespace ...
1361: */
1.3 veillard 1362:
1363: /*
1364: * We should have the equal, we are laxist here and allow attributes
1.22 daniel 1365: * without values ?!?.
1366: */
1367: /*
1368: * !!!!! TODO !!!!!! Rewrite this is absolutely not clean !!!!
1.3 veillard 1369: */
1.16 daniel 1370: SKIP_BLANKS(ctxt->cur);
1371: if (ctxt->cur[0] == '=') {
1372: ctxt->cur++;
1373: SKIP_BLANKS(ctxt->cur);
1374: if ((ctxt->cur[0] != '\'') && (ctxt->cur[0] != '"')) {
1.7 veillard 1375: fprintf(stderr, "Quotes were expected for attribute value %.20s\n",
1.23 daniel 1376: ctxt->cur - 10);
1.3 veillard 1377: } else
1.16 daniel 1378: value = xmlParseQuotedString(ctxt);
1.3 veillard 1379: }
1380:
1381: /*
1382: * Add the attribute to the node.
1383: */
1.17 daniel 1384: if (name != NULL) {
1.3 veillard 1385: xmlNewProp(node, name, value);
1.17 daniel 1386: free(name);
1387: }
1388: if ( value != NULL )
1389: free(value);
1.3 veillard 1390: }
1391:
1392: /*
1.2 veillard 1393: * xmlParseStartTag: parse a start of tag.
1394: */
1395:
1.16 daniel 1396: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.17 daniel 1397: const CHAR *q;
1398: CHAR *ns, *name;
1.3 veillard 1399: xmlDtdPtr dtd = NULL;
1.2 veillard 1400: xmlNodePtr ret = NULL;
1401:
1402: /*
1.3 veillard 1403: * Theorically one should just parse a Name, but with the addition
1404: * of the namespace needed for WebDav, it's a bit more complicated
1405: * since the element name may be prefixed by a namespace prefix.
1406: *
1407: * QName ::= (NSPart ':')? LocalPart
1408: * NSPart ::= Name
1409: * LocalPart ::= Name
1410: * STag ::= '<' QName (S Attribute)* S? '>'
1411: *
1412: * instead of :
1413: *
1414: * STag ::= '<' QName (S Attribute)* S? '>'
1.2 veillard 1415: */
1.16 daniel 1416: if (ctxt->cur[0] != '<') return(NULL);
1417: ctxt->cur++;
1.3 veillard 1418:
1.16 daniel 1419: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
1420: q = ctxt->cur++;
1421: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1422: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
1423: (ctxt->cur[0] == '_') ||
1.22 daniel 1424: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 1425: (IS_EXTENDER(ctxt->cur[0])))
1426: ctxt->cur++;
1.3 veillard 1427:
1.16 daniel 1428: if (ctxt->cur[0] == ':') {
1429: ns = xmlStrndup(q, ctxt->cur - q);
1.3 veillard 1430:
1.16 daniel 1431: ctxt->cur++; /* skip the column */
1432: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
1.7 veillard 1433: fprintf(stderr,
1434: "Start tag : no element name after namespace identifier %.20s\n",
1.3 veillard 1435: q);
1436: free(ns);
1437: return(NULL);
1438: }
1.16 daniel 1439: q = ctxt->cur++;
1440: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1441: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
1442: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
1.22 daniel 1443: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 1444: (IS_EXTENDER(ctxt->cur[0])))
1445: ctxt->cur++;
1446: name = xmlStrndup(q, ctxt->cur - q);
1.3 veillard 1447:
1448: /*
1449: * Search the DTD associated to ns.
1450: */
1.16 daniel 1451: dtd = xmlSearchDtd(ctxt->doc, ns);
1.3 veillard 1452: if (dtd == NULL)
1.7 veillard 1453: fprintf(stderr, "Start tag : Couldn't find namespace %s\n", ns);
1.3 veillard 1454: free(ns);
1455: } else
1.16 daniel 1456: name = xmlStrndup(q, ctxt->cur - q);
1.3 veillard 1457:
1458: ret = xmlNewNode(dtd, name, NULL);
1.2 veillard 1459:
1.3 veillard 1460: /*
1461: * Now parse the attributes, it ends up with the ending
1462: *
1463: * (S Attribute)* S?
1464: */
1.16 daniel 1465: SKIP_BLANKS(ctxt->cur);
1466: while ((IS_CHAR(ctxt->cur[0])) &&
1467: (ctxt->cur[0] != '>') &&
1468: ((ctxt->cur[0] != '/') || (ctxt->cur[1] != '>'))) {
1469: if (IS_LETTER(ctxt->cur[0]) || (ctxt->cur[0] == '_'))
1470: xmlParseAttribute(ctxt, ret);
1.3 veillard 1471: else {
1.14 veillard 1472: /* We should warn TODO !!! */
1.16 daniel 1473: ctxt->cur++;
1.3 veillard 1474: }
1.16 daniel 1475: SKIP_BLANKS(ctxt->cur);
1.3 veillard 1476: }
1477:
1478: return(ret);
1479: }
1480:
1481: /*
1.7 veillard 1482: * xmlParseEndTag: parse an end of tag, note that the '</' part has
1483: * already been read.
1484: */
1485:
1.16 daniel 1486: void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlDtdPtr *dtdPtr, CHAR **tagPtr) {
1.17 daniel 1487: const CHAR *q;
1488: CHAR *ns, *name;
1.7 veillard 1489: xmlDtdPtr dtd = NULL;
1490:
1491: *dtdPtr = NULL;
1492: *tagPtr = NULL;
1493:
1494: /*
1495: * Theorically one should just parse a Name, but with the addition
1496: * of the namespace needed for WebDav, it's a bit more complicated
1497: * since the element name may be prefixed by a namespace prefix.
1498: *
1499: * QName ::= (NSPart ':')? LocalPart
1500: * NSPart ::= Name
1501: * LocalPart ::= Name
1502: * ETag ::= '</' QName S? '>'
1503: *
1504: * instead of :
1505: *
1506: * ETag ::= '</' Name S? '>'
1507: */
1.16 daniel 1508: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return;
1509: q = ctxt->cur++;
1510: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1511: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
1512: (ctxt->cur[0] == '_') ||
1.22 daniel 1513: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 1514: (IS_EXTENDER(ctxt->cur[0])))
1515: ctxt->cur++;
1.7 veillard 1516:
1.16 daniel 1517: if (ctxt->cur[0] == ':') {
1518: ns = xmlStrndup(q, ctxt->cur - q);
1.7 veillard 1519:
1.16 daniel 1520: ctxt->cur++; /* skip the column */
1521: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
1.7 veillard 1522: fprintf(stderr,
1523: "End tag : no element name after namespace identifier %.20s\n",
1524: q);
1525: free(ns);
1526: return;
1527: }
1.16 daniel 1528: q = ctxt->cur++;
1529: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1530: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
1531: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
1.22 daniel 1532: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 1533: (IS_EXTENDER(ctxt->cur[0])))
1534: ctxt->cur++;
1535: name = xmlStrndup(q, ctxt->cur - q);
1.7 veillard 1536:
1537: /*
1538: * Search the DTD associated to ns.
1539: */
1.16 daniel 1540: dtd = xmlSearchDtd(ctxt->doc, ns);
1.7 veillard 1541: if (dtd == NULL)
1542: fprintf(stderr, "End tag : Couldn't find namespace %s\n", ns);
1543: free(ns);
1544: } else
1.16 daniel 1545: name = xmlStrndup(q, ctxt->cur - q);
1.7 veillard 1546:
1547: *dtdPtr = dtd;
1548: *tagPtr = name;
1549:
1550: /*
1551: * We should definitely be at the ending "S? '>'" part
1552: */
1.16 daniel 1553: SKIP_BLANKS(ctxt->cur);
1554: if ((!IS_CHAR(ctxt->cur[0])) || (ctxt->cur[0] != '>')) {
1555: fprintf(stderr, "End tag : expected '>', got %.20s\n", ctxt->cur);
1.7 veillard 1556: /*
1557: * Note : skipping to the next '>' is probably otherkill,
1558: * especially in case the '>' is hust missing.
1559: *
1560: * Otherwise add:
1.16 daniel 1561: * MOVETO_ENDTAG(ctxt->cur);
1.7 veillard 1562: */
1563: } else
1.16 daniel 1564: ctxt->cur++;
1.7 veillard 1565:
1566: return;
1567: }
1568:
1569: /*
1.3 veillard 1570: * xmlParseCDSect: escaped pure raw content.
1571: */
1.16 daniel 1572: CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 1573: const CHAR *r, *s, *base;
1574: CHAR *ret;
1.3 veillard 1575:
1.16 daniel 1576: base = ctxt->cur;
1577: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 1578: fprintf(stderr, "CData section not finished : %.20s\n", base);
1.3 veillard 1579: return(NULL);
1580: }
1.16 daniel 1581: r = ctxt->cur++;
1582: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 1583: fprintf(stderr, "CData section not finished : %.20s\n", base);
1.3 veillard 1584: return(NULL);
1585: }
1.16 daniel 1586: s = ctxt->cur++;
1587: while (IS_CHAR(ctxt->cur[0]) &&
1588: ((*r != ']') || (*s != ']') || (ctxt->cur[0] != '>'))) {
1589: r++;s++;ctxt->cur++;
1.3 veillard 1590: }
1.16 daniel 1591: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 1592: fprintf(stderr, "CData section not finished : %.20s\n", base);
1.3 veillard 1593: return(NULL);
1594: }
1.16 daniel 1595: ret = xmlStrndup(base, ctxt->cur-base);
1596:
1.2 veillard 1597: return(ret);
1598: }
1599:
1600: /*
1601: * xmlParseContent: a content is
1602: * (element | PCData | Reference | CDSect | PI | Comment)
1603: *
1604: * element : starts by '<'
1605: * PCData : any CHAR but '&' or '<'
1606: * Reference : starts by '&'
1607: * CDSect : starts by '<![CDATA['
1608: * PI : starts by '<?'
1609: */
1610:
1.16 daniel 1611: xmlNodePtr xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 1612: const CHAR *q;
1613: CHAR *data = NULL;
1.2 veillard 1614: xmlNodePtr ret = NULL;
1615:
1616: /*
1.3 veillard 1617: * First case : a Processing Instruction.
1618: */
1.16 daniel 1619: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
1620: xmlParsePI(ctxt);
1.3 veillard 1621: }
1622: /*
1623: * Second case : a CDSection
1.2 veillard 1624: */
1.16 daniel 1625: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1626: (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
1627: (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
1628: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
1629: (ctxt->cur[8] == '[')) {
1630: ctxt->cur += 9;
1631: data = xmlParseCDSect(ctxt);
1.3 veillard 1632: }
1633: /*
1634: * Third case : a sub-element.
1635: */
1.16 daniel 1636: else if (ctxt->cur[0] == '<') {
1637: ret = xmlParseElement(ctxt);
1.3 veillard 1638: }
1639: /*
1640: * Last case, text. Note that References are handled directly.
1641: */
1642: else {
1.16 daniel 1643: q = ctxt->cur;
1644: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '<')) ctxt->cur++;
1.3 veillard 1645:
1.16 daniel 1646: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 1647: fprintf(stderr, "Truncated content : %.50s\n", q);
1.3 veillard 1648: return(NULL);
1649: }
1.14 veillard 1650:
1651: /*
1652: * Do the Entities decoding...
1653: */
1.16 daniel 1654: data = xmlStrdup(xmlDecodeEntities(ctxt->doc, q, ctxt->cur - q));
1.3 veillard 1655: }
1656:
1657: /*
1658: * Handle the data if any. If there is no child
1659: * add it as content, otherwise create a new node of type text.
1660: */
1661: if (data != NULL)
1662: data = xmlHandleData(data);
1663: if (data != NULL) {
1664: if (node->childs == NULL)
1665: xmlNodeSetContent(node, data);
1.17 daniel 1666: else
1.3 veillard 1667: ret = xmlNewText(data);
1.17 daniel 1668: free(data);
1.3 veillard 1669: }
1.2 veillard 1670:
1671: return(ret);
1672: }
1673:
1674: /*
1675: * xmlParseElement: parse an XML element
1676: */
1677:
1.16 daniel 1678: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.2 veillard 1679: xmlNodePtr ret, child;
1.17 daniel 1680: const CHAR *openTag = ctxt->cur;
1681: const CHAR *closeTag = ctxt->cur;
1.2 veillard 1682:
1.16 daniel 1683: ret = xmlParseStartTag(ctxt);
1.3 veillard 1684: if (ret == NULL) {
1685: return(NULL);
1686: }
1.2 veillard 1687:
1688: /*
1689: * Check for an Empty Element.
1690: */
1.16 daniel 1691: if ((ctxt->cur[0] == '/') && (ctxt->cur[1] == '>')) {
1692: ctxt->cur += 2;
1.2 veillard 1693: return(ret);
1694: }
1.16 daniel 1695: if (ctxt->cur[0] == '>') ctxt->cur++;
1.2 veillard 1696: else {
1.16 daniel 1697: fprintf(stderr, "Couldn't find end of Start Tag %.30s\n", openTag);
1698: return(NULL);
1.2 veillard 1699: }
1700:
1701: /*
1702: * Parse the content of the element:
1703: * (element | PCData | Reference | CDSect | PI | Comment) *
1704: *
1705: * element : starts by '<'
1706: * PCData : any CHAR but '&' or '<'
1707: * Reference : starts by '&'
1708: * CDSect : starts by '<![CDATA['
1709: * PI : starts by '<?'
1710: *
1711: * The loop stops upon detection of an end of tag '</'
1712: */
1.16 daniel 1713: while ((IS_CHAR(ctxt->cur[0])) &&
1714: ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/'))) {
1715: child = xmlParseContent(ctxt, ret);
1.2 veillard 1716: if (child != NULL)
1717: xmlAddChild(ret, child);
1718: }
1.16 daniel 1719: if (!IS_CHAR(ctxt->cur[0])) {
1720: fprintf(stderr, "Premature end of data in tag %.30s\n", openTag);
1721: return(NULL);
1.2 veillard 1722: }
1723:
1724: /*
1725: * parse the end of tag : '</' has been detected.
1726: */
1.16 daniel 1727: ctxt->cur += 2;
1728: if (ctxt->cur[0] == '>') ctxt->cur++; /* simplified closing </> */
1.2 veillard 1729: else {
1.7 veillard 1730: CHAR *endTag;
1731: xmlDtdPtr endDtd;
1732:
1.16 daniel 1733: xmlParseEndTag(ctxt, &endDtd, &endTag);
1.7 veillard 1734:
1.2 veillard 1735: /*
1.7 veillard 1736: * Check that the Name in the ETag is the same as in the STag.
1.2 veillard 1737: */
1.7 veillard 1738: if (endDtd != ret->dtd) {
1739: fprintf(stderr, "Start and End tags don't use the same DTD:\n");
1740: fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
1741: }
1742: if (strcmp(ret->name, endTag)) {
1743: fprintf(stderr, "Start and End tags don't use the same name:\n");
1744: fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
1745: }
1.17 daniel 1746:
1747: if ( endTag != NULL )
1748: free(endTag);
1.2 veillard 1749: }
1750:
1751: return(ret);
1752: }
1753:
1754: /*
1.1 veillard 1755: * xmlParseXMLDecl: parse an XML declaration header
1756: */
1757:
1.16 daniel 1758: void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 1759: CHAR *version;
1760:
1761: /*
1.19 daniel 1762: * We know that '<?xml' is here.
1.1 veillard 1763: */
1.16 daniel 1764: ctxt->cur += 5;
1.1 veillard 1765:
1766: /*
1767: * Parse the version info
1768: */
1.16 daniel 1769: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1770:
1771: /*
1772: * We should have 'version=' here !
1773: */
1.16 daniel 1774: if ((ctxt->cur[0] == 'v') && (ctxt->cur[1] == 'e') &&
1775: (ctxt->cur[2] == 'r') && (ctxt->cur[3] == 's') &&
1776: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'o') &&
1777: (ctxt->cur[6] == 'n') && (ctxt->cur[7] == '=')) {
1778: ctxt->cur += 8;
1779: version = xmlParseQuotedString(ctxt);
1.1 veillard 1780: if (version == NULL)
1.16 daniel 1781: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 1782: else {
1.16 daniel 1783: ctxt->doc = xmlNewDoc(version);
1.8 veillard 1784: free(version);
1.1 veillard 1785: }
1786: } else {
1.16 daniel 1787: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 1788: }
1789:
1790: /*
1.14 veillard 1791: * We should check for Required Markup Declaration TODO !!!!
1.1 veillard 1792: */
1.16 daniel 1793: MOVETO_ENDTAG(ctxt->cur);
1794: ctxt->cur++;
1.1 veillard 1795:
1796: }
1797:
1798: /*
1.22 daniel 1799: * xmlParseMisc: parse an XML Misc* optionnal field.
1.21 daniel 1800: * Misc*
1801: *
1.22 daniel 1802: * [27] Misc ::= Comment | PI | S
1.1 veillard 1803: */
1804:
1.16 daniel 1805: void xmlParseMisc(xmlParserCtxtPtr ctxt) {
1806: while (((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) ||
1807: ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1.21 daniel 1808: (ctxt->cur[2] == '-') && (ctxt->cur[3] == '-')) ||
1.16 daniel 1809: IS_BLANK(ctxt->cur[0])) {
1810: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
1811: xmlParsePI(ctxt);
1812: } else if (IS_BLANK(ctxt->cur[0])) {
1813: ctxt->cur++;
1.1 veillard 1814: } else
1.16 daniel 1815: xmlParserSkipComment(ctxt);
1.1 veillard 1816: }
1817: }
1818:
1819: /*
1.16 daniel 1820: * xmlParseDocument : parse an XML document and build a tree.
1.21 daniel 1821: *
1.22 daniel 1822: * [1] document ::= prolog element Misc*
1.21 daniel 1823: * prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.1 veillard 1824: */
1825:
1.16 daniel 1826: int xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.14 veillard 1827: /*
1828: * We should check for encoding here and plug-in some
1829: * conversion code TODO !!!!
1830: */
1.1 veillard 1831:
1832: /*
1833: * Wipe out everything which is before the first '<'
1834: */
1.16 daniel 1835: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1836:
1837: /*
1838: * Check for the XMLDecl in the Prolog.
1839: */
1.16 daniel 1840: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.19 daniel 1841: (ctxt->cur[2] == 'x') && (ctxt->cur[3] == 'm') &&
1842: (ctxt->cur[4] == 'l')) {
1843: xmlParseXMLDecl(ctxt);
1844: /* SKIP_EOL(cur); */
1845: SKIP_BLANKS(ctxt->cur);
1846: } else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.16 daniel 1847: (ctxt->cur[2] == 'X') && (ctxt->cur[3] == 'M') &&
1848: (ctxt->cur[4] == 'L')) {
1.19 daniel 1849: /*
1850: * The first drafts were using <?XML and the final W3C REC
1851: * now use <?xml ...
1852: */
1.16 daniel 1853: xmlParseXMLDecl(ctxt);
1.1 veillard 1854: /* SKIP_EOL(cur); */
1.16 daniel 1855: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1856: } else {
1.16 daniel 1857: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 1858: }
1859:
1860: /*
1861: * The Misc part of the Prolog
1.21 daniel 1862: * Misc*
1863: * Misc ::= Comment | PI | S
1.1 veillard 1864: */
1.16 daniel 1865: xmlParseMisc(ctxt);
1.1 veillard 1866:
1867: /*
1.21 daniel 1868: * Then possibly doc type decalration(s) and more Misc
1869: * (doctypedecl Misc*)?
1870: */
1.22 daniel 1871: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1872: (ctxt->cur[2] == 'D') && (ctxt->cur[3] == 'O') &&
1873: (ctxt->cur[4] == 'C') && (ctxt->cur[5] == 'T') &&
1874: (ctxt->cur[6] == 'Y') && (ctxt->cur[7] == 'P') &&
1875: (ctxt->cur[8] == 'E')) {
1876: xmlParseDocTypeDecl(ctxt);
1877: xmlParseMisc(ctxt);
1.21 daniel 1878: }
1879:
1880: /*
1881: * Time to start parsing the tree itself
1.1 veillard 1882: */
1.16 daniel 1883: ctxt->doc->root = xmlParseElement(ctxt);
1884:
1885: return(0);
1886: }
1887:
1888: /*
1889: * xmlParseDoc : parse an XML in-memory document and build a tree.
1890: */
1891:
1892: xmlDocPtr xmlParseDoc(CHAR *cur) {
1893: xmlDocPtr ret;
1894: xmlParserCtxtPtr ctxt;
1895:
1896: if (cur == NULL) return(NULL);
1.1 veillard 1897:
1.16 daniel 1898: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
1899: if (ctxt == NULL) {
1900: perror("malloc");
1901: return(NULL);
1902: }
1903:
1.19 daniel 1904: xmlInitParserCtxt(ctxt);
1.16 daniel 1905: ctxt->base = cur;
1906: ctxt->cur = cur;
1907:
1908: xmlParseDocument(ctxt);
1909: ret = ctxt->doc;
1.20 daniel 1910: free(ctxt->nodes);
1.16 daniel 1911: free(ctxt);
1912:
1.1 veillard 1913: return(ret);
1914: }
1915:
1.9 httpng 1916: /*
1917: * xmlParseFile : parse an XML file and build a tree.
1918: */
1919:
1920: xmlDocPtr xmlParseFile(const char *filename) {
1921: xmlDocPtr ret;
1.20 daniel 1922: #ifdef HAVE_ZLIB_H
1923: gzFile input;
1924: #else
1.9 httpng 1925: int input;
1.20 daniel 1926: #endif
1.9 httpng 1927: int res;
1928: struct stat buf;
1929: char *buffer;
1.16 daniel 1930: xmlParserCtxtPtr ctxt;
1.9 httpng 1931:
1.11 veillard 1932: res = stat(filename, &buf);
1.9 httpng 1933: if (res < 0) return(NULL);
1934:
1.20 daniel 1935: #ifdef HAVE_ZLIB_H
1936: retry_bigger:
1937: buffer = malloc((buf.st_size * 20) + 100);
1938: #else
1.9 httpng 1939: buffer = malloc(buf.st_size + 100);
1.20 daniel 1940: #endif
1.9 httpng 1941: if (buffer == NULL) {
1942: perror("malloc");
1943: return(NULL);
1944: }
1945:
1946: memset(buffer, 0, sizeof(buffer));
1.20 daniel 1947: #ifdef HAVE_ZLIB_H
1948: input = gzopen (filename, "r");
1949: if (input == NULL) {
1950: fprintf (stderr, "Cannot read file %s :\n", filename);
1951: perror ("gzopen failed");
1952: return(NULL);
1953: }
1954: #else
1.9 httpng 1955: input = open (filename, O_RDONLY);
1956: if (input < 0) {
1957: fprintf (stderr, "Cannot read file %s :\n", filename);
1958: perror ("open failed");
1959: return(NULL);
1960: }
1.20 daniel 1961: #endif
1962: #ifdef HAVE_ZLIB_H
1963: res = gzread(input, buffer, 20 * buf.st_size);
1964: #else
1.9 httpng 1965: res = read(input, buffer, buf.st_size);
1.20 daniel 1966: #endif
1.9 httpng 1967: if (res < 0) {
1968: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 1969: #ifdef HAVE_ZLIB_H
1970: perror ("gzread failed");
1971: #else
1.9 httpng 1972: perror ("read failed");
1.20 daniel 1973: #endif
1.9 httpng 1974: return(NULL);
1975: }
1.20 daniel 1976: #ifdef HAVE_ZLIB_H
1977: gzclose(input);
1978: if (res >= 20 * buf.st_size) {
1979: free(buffer);
1980: buf.st_size *= 2;
1981: goto retry_bigger;
1982: }
1983: buf.st_size = res;
1984: #else
1.9 httpng 1985: close(input);
1.20 daniel 1986: #endif
1987:
1.9 httpng 1988:
1.16 daniel 1989: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
1990: if (ctxt == NULL) {
1991: perror("malloc");
1992: return(NULL);
1993: }
1.9 httpng 1994: buffer[buf.st_size] = '\0';
1.16 daniel 1995:
1.19 daniel 1996: xmlInitParserCtxt(ctxt);
1.17 daniel 1997: ctxt->filename = filename;
1.16 daniel 1998: ctxt->base = buffer;
1999: ctxt->cur = buffer;
2000:
2001: xmlParseDocument(ctxt);
2002: ret = ctxt->doc;
1.9 httpng 2003: free(buffer);
1.20 daniel 2004: free(ctxt->nodes);
2005: free(ctxt);
2006:
2007: return(ret);
2008: }
2009:
2010: /*
2011: * xmlParseFile : parse an XML memory block and build a tree.
2012: */
2013:
2014: xmlDocPtr xmlParseMemory(char *buffer, int size) {
2015: xmlDocPtr ret;
2016: xmlParserCtxtPtr ctxt;
2017:
2018: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2019: if (ctxt == NULL) {
2020: perror("malloc");
2021: return(NULL);
2022: }
2023:
2024: buffer[size - 1] = '\0';
2025:
2026: xmlInitParserCtxt(ctxt);
2027: ctxt->base = buffer;
2028: ctxt->cur = buffer;
2029:
2030: xmlParseDocument(ctxt);
2031: ret = ctxt->doc;
2032: free(ctxt->nodes);
1.16 daniel 2033: free(ctxt);
2034:
1.9 httpng 2035: return(ret);
1.17 daniel 2036: }
2037:
2038:
2039:
2040:
2041: /* Initialize parser context */
2042: void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2043: {
1.19 daniel 2044: int i;
2045:
2046: ctxt->filename = NULL;
2047: ctxt->base = NULL;
2048: ctxt->cur = NULL;
2049: ctxt->line = 1;
2050: ctxt->col = 1;
2051: ctxt->doc = NULL;
2052: ctxt->depth = 0;
2053: ctxt->max_depth = 10;
2054: ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr));
2055: if (ctxt->nodes == NULL) {
2056: fprintf(stderr, "malloc of %d byte failed\n",
2057: ctxt->max_depth * sizeof(xmlNodePtr));
2058: ctxt->max_depth = 0;
2059: } else {
2060: for (i = 0;i < ctxt->max_depth;i++)
2061: ctxt->nodes[i] = NULL;
2062: }
1.17 daniel 2063: }
2064:
2065:
1.19 daniel 2066: /*
2067: * Clear (release owned resources) and reinitialize context
2068: */
1.17 daniel 2069: void xmlClearParserCtxt(xmlParserCtxtPtr ctx)
2070: {
1.19 daniel 2071: xmlInitParserCtxt(ctx);
1.17 daniel 2072: }
2073:
2074:
1.19 daniel 2075: /*
2076: * Setup the parser context to parse a new buffer; Clears any prior
2077: * contents from the parser context. The buffer parameter must not be
2078: * NULL, but the filename parameter can be
2079: */
1.17 daniel 2080: void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
2081: const char* filename)
2082: {
2083: xmlClearParserCtxt(ctxt);
2084: ctxt->base = buffer;
2085: ctxt->cur = buffer;
2086: ctxt->filename = filename;
2087: }
2088:
2089:
2090:
2091: void xmlReportError(xmlParserCtxtPtr ctx, const CHAR* msg)
2092: {
2093: fputs(msg, stderr);
1.9 httpng 2094: }
Webmaster