Annotation of XML/parser.c, revision 1.4
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.1 veillard 3: */
4:
5: #include <stdio.h>
6: #include <ctype.h>
7: #include <string.h>
8: #include <malloc.h>
9:
10: #include "parser.h"
11: #include "tree.h"
12:
13: /*
14: * A few macros needed to help building the parser.
15: */
16:
17: #ifdef UNICODE
18: /*
1.3 veillard 19: * UNICODE version of the macros. Incomplete now !!!!
1.1 veillard 20: */
21: #define IS_CHAR(c) \
22: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
23: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
24:
25: #define SKIP_BLANKS(p) \
26: while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \
27: (*(p) == 0x3000)) (p)++;
28:
1.3 veillard 29: /* I'm too lazy to complete this one !!!! */
1.1 veillard 30: #define IS_BASECHAR(c) \
31: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
32: (((c) >= 0x61) && ((c) <= 0x7a)) || \
33: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
34: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
35: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
36: (((c) >= 0xf8) && ((c) <= 0xff)) || \
37: ((c) == 0xba))
38:
1.3 veillard 39: /* I'm too lazy to complete this one !!!! */
1.1 veillard 40: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
41:
1.3 veillard 42: /* I'm too lazy to complete this one !!!! */
1.1 veillard 43: #define IS_COMBINING(c) 0
44:
1.3 veillard 45: #define IS_IGNORABLE(c) \
46: ((((c) >= 0x200c) && ((c) <= 0x200f)) || \
47: (((c) >= 0x202a) && ((c) <= 0x202e)) || \
48: (((c) >= 0x206a) && ((c) <= 0x206f)) || \
49: ((c) == 0xfeff))
50:
51: #define IS_EXTENDER(c) \
52: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
53: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
54: ((c) == 0xec6) || ((c) == 0x3005) \
55: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
56: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
57: (((c) >= 0x30fc) && ((c) <= 0x30fe)) || \
58: (((c) >= 0xff70) && ((c) <= 0xff9e)) || \
59: ((c) == 0xff9f))
60:
1.1 veillard 61: #define IS_IDEOGRAPHIC(c) \
62: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
63: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
64: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
65: ((c) == 0x3007))
66:
67: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
68:
69: /* I'm too lazy to complete this one ! */
70: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa))
71: #else
72: /*
1.3 veillard 73: * 8bits / ASCII version of the macros.
1.1 veillard 74: */
75: #define IS_CHAR(c) \
76: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20))
77:
78: #define IS_BASECHAR(c) \
79: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
80: (((c) >= 0x61) && ((c) <= 0x7a)) || \
81: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
82: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
83: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
84: (((c) >= 0xf8) && ((c) <= 0xff)) || \
85: ((c) == 0xba))
86:
87: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
88:
89: #define IS_LETTER(c) IS_BASECHAR(c)
90:
91: #define IS_COMBINING(c) 0
92:
1.3 veillard 93: #define IS_IGNORABLE(c) 0
94:
95: #define IS_EXTENDER(c) ((c) == 0xb7)
96:
1.1 veillard 97: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa))
98: #endif
99:
100:
101: #define SKIP_EOL(p) \
102: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
103: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
104:
105: #define SKIP_BLANKS(p) \
106: while (IS_BLANK(*(p))) (p)++;
107:
108: #define MOVETO_ENDTAG(p) \
109: while (IS_CHAR(*p) && (*(p) != '>')) (p)++;
110:
111: #define MOVETO_STARTTAG(p) \
112: while (IS_CHAR(*p) && (*(p) != '<')) (p)++;
113:
114: /*
1.3 veillard 115: * Forward definition for recusive behaviour.
116: */
117: xmlNodePtr xmlParseElement(CHAR **p, xmlDocPtr doc);
118:
119: /*
120: * xmlHandleData : this routine represent's the specific application
121: * behaviour when reading a piece of text.
122: *
123: * For example in WebDav, any piece made only of blanks is eliminated
124: */
125:
126: CHAR *xmlHandleData(CHAR *in) {
127: CHAR *cur;
128:
129: if (in == NULL) return(NULL);
130: cur = in;
131: while (IS_CHAR(*cur)) {
132: if (!IS_BLANK(*cur)) goto not_blank;
133: cur++;
134: }
135: free(in);
136: return(NULL);
137:
138: not_blank:
139: return(in);
140: }
141:
142: /*
1.1 veillard 143: * xmlStrndup : a strdup for array of CHAR's
144: */
145:
146: CHAR *xmlStrndup(CHAR *cur, int len) {
147: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
148:
149: if (ret == NULL) {
150: fprintf(stderr, "malloc of %d byte failed\n",
151: (len + 1) * sizeof(CHAR));
152: return(NULL);
153: }
154: memcpy(ret, cur, len * sizeof(CHAR));
155: ret[len] = 0;
156: return(ret);
157: }
158:
159: /*
160: * xmlStrdup : a strdup for CHAR's
161: */
162:
163: CHAR *xmlStrdup(CHAR *cur) {
164: CHAR *p = cur;
165:
166: while (IS_CHAR(*p)) p++;
167: return(xmlStrndup(cur, p - cur));
168: }
169:
170: /*
171: * xmlParseName : parse an XML name.
172: */
173:
1.3 veillard 174: CHAR *xmlParseName(CHAR **p) {
175: CHAR *cur = *p, *q, *ret = NULL;
1.1 veillard 176:
177: /*
1.3 veillard 178: * Name ::= (Letter | '_') (NameChar)*
1.1 veillard 179: */
1.3 veillard 180: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
181: q = cur++;
182: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
183: (*cur == '.') || (*cur == '-') || (*cur == '_') ||
184: (IS_COMBINING(*cur)) || (IS_IGNORABLE(*cur)) ||
185: (IS_EXTENDER(*cur)))
186: cur++;
187:
188: ret = xmlStrndup(q, cur - q);
1.1 veillard 189:
1.3 veillard 190: *p = cur;
191: return(ret);
1.1 veillard 192: }
193:
194: /*
195: * Parse and return a string between quotes or doublequotes
196: */
197: CHAR *xmlParseQuotedString(CHAR **p) {
198: CHAR *ret = NULL;
199: CHAR *cur = *p, *q;
200:
201: if (*cur == '"') {
202: cur++;
203: q = cur;
204: while (IS_CHAR(*cur) && (*cur != '"')) cur++;
205: if (*cur != '"')
206: fprintf(stderr, "String not closed \"%s\n", q);
207: else {
208: ret = xmlStrndup(q, cur - q);
209: cur++;
210: }
211: } else if (*cur == '\''){
212: cur++;
213: q = cur;
214: while (IS_CHAR(*cur) && (*cur != '\'')) cur++;
215: if (*cur != '\'')
216: fprintf(stderr, "String not closed '%s\n", q);
217: else {
218: ret = xmlStrndup(q, cur - q);
219: cur++;
220: }
221: }
222: *p = cur;
223: return(ret);
224: }
225:
226: /*
1.3 veillard 227: * Skip an XML (SGML) comment <!-- .... -->
228: */
229: void xmlParserSkipComment(CHAR **p) {
230: CHAR *cur = *p, *q, *r, *start;
231:
232: /*
233: * An extra check may avoid errors and isn't that costly !
234: */
235: if ((cur[0] != '<') || (cur[1] != '!') ||
236: (cur[2] != '-') || (cur[3] != '-')) return;
237:
238: cur += 4;
239: start = q = cur;
240: cur++;
241: r = cur;
242: cur++;
243: while (IS_CHAR(*cur) &&
244: ((*cur != '>') || (*r != '-') || (*q != '-'))) {
245: cur++;r++;q++;
246: }
247: if (!IS_CHAR(*cur)) {
248: fprintf(stderr, "Comment not terminated <!--%s\n", start);
249: *p = start;
250: } else {
251: cur++;
252: *p = cur;
253: }
254: }
255:
256: /*
1.1 veillard 257: * xmlParseWebdavNamespace: parse Webdav specific '<?namespace ...' constructs.
258: */
259:
260: void xmlParseWebdavNamespace(CHAR **p, xmlDocPtr doc) {
261: CHAR *cur = *p;
262: CHAR *href = NULL;
263: CHAR *AS = NULL;
1.3 veillard 264: int garbage = 0;
1.1 veillard 265:
266: /*
267: * We know that 'namespace' is here.
268: */
269: cur += 9;
270: SKIP_BLANKS(cur);
271:
272: while (IS_CHAR(*cur) && (*cur != '>')) {
273: /*
274: * We can have 'href' or 'AS' attributes.
275: */
276: if ((cur[0] == 'h') && (cur[1] == 'r') && (cur[2] == 'e') &&
277: (cur[3] == 'f')) {
1.3 veillard 278: garbage = 0;
1.1 veillard 279: cur += 4;
280: SKIP_BLANKS(cur);
281:
282: if (*cur != '=') continue;
283: cur++;
284: SKIP_BLANKS(cur);
285:
286: href = xmlParseQuotedString(&cur);
287: SKIP_BLANKS(cur);
288: } else if ((cur[0] == 'A') && (cur[1] == 'S')) {
1.3 veillard 289: garbage = 0;
1.1 veillard 290: cur += 2;
291: SKIP_BLANKS(cur);
292:
293: if (*cur != '=') continue;
294: cur++;
295: SKIP_BLANKS(cur);
296:
297: AS = xmlParseQuotedString(&cur);
298: SKIP_BLANKS(cur);
299: } else if ((cur[0] == '?') && (cur[1] == '>')) {
1.3 veillard 300: garbage = 0;
1.1 veillard 301: cur ++;
302: } else {
1.3 veillard 303: /*
304: * Found garbage when parsing the namespace
305: */
306: if (!garbage) fprintf(stderr,
1.4 ! veillard 307: "\nxmlParseWebdavNamespace found garbage: ");
1.3 veillard 308: fprintf(stderr, "%c", *cur);
1.1 veillard 309: cur++;
310: }
311: }
1.3 veillard 312: if (!garbage) fprintf(stderr, "\n");
1.1 veillard 313:
314: MOVETO_ENDTAG(cur);
315: cur++;
316:
317: /*
318: * Register the DTD.
319: */
320: if (href != NULL)
321: xmlNewDtd(doc, href, AS);
322:
323: *p = cur;
324: }
325:
326: /*
1.3 veillard 327: * xmlParsePI: parse an XML Processing Instruction.
328: */
329:
330: void xmlParsePI(CHAR **p, xmlDocPtr doc) {
331: CHAR *cur = *p;
332:
333: if ((cur[0] == '<') && (cur[1] == '?')) {
334: /*
335: * this is a Processing Instruction.
336: */
337: cur += 2;
338:
339: /*
340: * Special for WebDav, support for the Processing Instruction
341: * '<?namespace ...' contruct in the header of the XML document.
342: */
343: if ((cur[0] == 'n') && (cur[1] == 'a') &&
344: (cur[2] == 'm') && (cur[3] == 'e') &&
345: (cur[4] == 's') && (cur[5] == 'p') &&
346: (cur[6] == 'a') && (cur[7] == 'c') &&
347: (cur[8] == 'e')) {
348: xmlParseWebdavNamespace(&cur, doc);
349: } else {
350: /* Unknown PI, ignore it ! */
351: MOVETO_ENDTAG(cur);
352: cur++;
353: }
354: }
355: *p = cur;
356: }
357:
358: /*
359: * xmlParseAttribute: parse a start of tag.
360: *
361: * Attribute ::= Name Eq AttValue
362: */
363:
364: void xmlParseAttribute(CHAR **p, xmlNodePtr node) {
365: CHAR *cur = *p, *q, *name, *value = NULL;
366:
367: if (!IS_LETTER(*cur) && (*cur != '_')) {
368: return;
369: }
370: q = cur++;
371: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
372: (*cur == '.') || (*cur == '-') || (*cur == '_') ||
373: (IS_COMBINING(*cur)) || (IS_IGNORABLE(*cur)) ||
374: (IS_EXTENDER(*cur)))
375: cur++;
376: name = xmlStrndup(q, cur - q);
377:
378: /*
379: * We should have the equal, we are laxist here and allow attributes
380: * without values and extra spaces.
381: */
382: SKIP_BLANKS(cur);
383: if (*cur == '=') {
384: cur++;
385: SKIP_BLANKS(cur);
386: if ((*cur != '\'') && (*cur != '"')) {
387: fprintf(stderr, "Quotes were expected for attribute value %s\n",
388: q);
389: } else
390: value = xmlParseQuotedString(&cur);
391: }
392:
393: /*
394: * Add the attribute to the node.
395: */
396: if (name != NULL)
397: xmlNewProp(node, name, value);
398:
399: *p = cur;
400: }
401:
402: /*
1.2 veillard 403: * xmlParseStartTag: parse a start of tag.
404: */
405:
1.3 veillard 406: xmlNodePtr xmlParseStartTag(CHAR **p, xmlDocPtr doc) {
407: CHAR *cur = *p, *q, *ns, *name;
408: xmlDtdPtr dtd = NULL;
1.2 veillard 409: xmlNodePtr ret = NULL;
410:
411: /*
1.3 veillard 412: * Theorically one should just parse a Name, but with the addition
413: * of the namespace needed for WebDav, it's a bit more complicated
414: * since the element name may be prefixed by a namespace prefix.
415: *
416: * QName ::= (NSPart ':')? LocalPart
417: * NSPart ::= Name
418: * LocalPart ::= Name
419: * STag ::= '<' QName (S Attribute)* S? '>'
420: *
421: * instead of :
422: *
423: * STag ::= '<' QName (S Attribute)* S? '>'
1.2 veillard 424: */
1.3 veillard 425: if (*cur != '<') return(NULL);
426: cur++;
427:
428: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
429: q = cur++;
430: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
431: (*cur == '.') || (*cur == '-') || (*cur == '_') ||
432: (IS_COMBINING(*cur)) || (IS_IGNORABLE(*cur)) ||
433: (IS_EXTENDER(*cur)))
434: cur++;
435:
436: if (*cur == ':') {
437: ns = xmlStrndup(q, cur - q);
438:
439: cur++; /* skip the column */
440: if (!IS_LETTER(*cur) && (*cur != '_')) {
441: fprintf(stderr, "No element name after namespace identifier %s\n",
442: q);
443: free(ns);
444: *p = cur;
445: return(NULL);
446: }
447: q = cur++;
448: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
449: (*cur == '.') || (*cur == '-') || (*cur == '_') ||
450: (IS_COMBINING(*cur)) || (IS_IGNORABLE(*cur)) ||
451: (IS_EXTENDER(*cur)))
452: cur++;
453: name = xmlStrndup(q, cur - q);
454:
455: /*
456: * Search the DTD associated to ns.
457: */
458: dtd = xmlSearchDtd(doc, ns);
459: if (dtd == NULL)
460: fprintf(stderr, "Couldn't find namespace %s\n", ns);
461: free(ns);
462: } else
463: name = xmlStrndup(q, cur - q);
464:
465: ret = xmlNewNode(dtd, name, NULL);
1.2 veillard 466:
1.3 veillard 467: /*
468: * Now parse the attributes, it ends up with the ending
469: *
470: * (S Attribute)* S?
471: */
472: SKIP_BLANKS(cur);
473: while ((IS_CHAR(*cur)) &&
474: (*cur != '>') &&
475: ((cur[0] != '/') || (cur[1] != '>'))) {
476: if (IS_LETTER(*cur) || (*cur == '_'))
477: xmlParseAttribute(&cur, ret);
478: else {
479: /* We should warn !!! */
480: cur++;
481: }
482: SKIP_BLANKS(cur);
483: }
484:
485: *p = cur;
486: return(ret);
487: }
488:
489: /*
490: * xmlParseCDSect: escaped pure raw content.
491: */
492: CHAR *xmlParseCDSect(CHAR **p) {
493: CHAR *cur = *p, *r, *s, *base, *ret;
494:
495: base = cur;
496: if (!IS_CHAR(*cur)) {
497: fprintf(stderr, "CData section not finished : %s\n", base);
498: return(NULL);
499: }
500: r = cur++;
501: if (!IS_CHAR(*cur)) {
502: fprintf(stderr, "CData section not finished : %s\n", base);
503: return(NULL);
504: }
505: s = cur++;
506: while (IS_CHAR(*cur) &&
507: ((*r != ']') || (*s != ']') || (*cur != '>'))) {
508: r++;s++;cur++;
509: }
510: if (!IS_CHAR(*cur)) {
511: fprintf(stderr, "CData section not finished : %s\n", base);
512: return(NULL);
513: }
514: ret = xmlStrndup(base, cur-base);
1.2 veillard 515: *p = cur;
516: return(ret);
517: }
518:
519: /*
520: * xmlParseContent: a content is
521: * (element | PCData | Reference | CDSect | PI | Comment)
522: *
523: * element : starts by '<'
524: * PCData : any CHAR but '&' or '<'
525: * Reference : starts by '&'
526: * CDSect : starts by '<![CDATA['
527: * PI : starts by '<?'
528: */
529:
1.3 veillard 530: xmlNodePtr xmlParseContent(CHAR **p, xmlDocPtr doc, xmlNodePtr node) {
531: CHAR *cur = *p, *q, *data = NULL;
1.2 veillard 532: xmlNodePtr ret = NULL;
533:
534: /*
1.3 veillard 535: * First case : a Processing Instruction.
536: */
537: if ((cur[0] == '<') && (cur[1] == '?')) {
538: xmlParsePI(&cur, doc);
539: }
540: /*
541: * Second case : a CDSection
1.2 veillard 542: */
1.3 veillard 543: if ((cur[0] == '<') && (cur[1] == '!') && (cur[2] == '[') &&
544: (cur[3] == 'C') && (cur[4] == 'D') && (cur[5] == 'A') &&
545: (cur[6] == 'T') && (cur[7] == 'A') && (cur[8] == '[')) {
546: cur += 9;
547: data = xmlParseCDSect(&cur);
548: }
549: /*
550: * Third case : a sub-element.
551: */
552: else if (cur[0] == '<') {
553: ret = xmlParseElement(&cur, doc);
554: }
555: /*
556: * Last case, text. Note that References are handled directly.
557: */
558: else {
559: q = cur;
560: while (IS_CHAR(*cur) && (*cur != '<')) cur++;
561:
562: if (!IS_CHAR(*cur)) {
563: fprintf(stderr, "Truncated content : %s\n", q);
1.4 ! veillard 564: *p = cur;
1.3 veillard 565: return(NULL);
566: }
567: data = xmlStrndup(q, cur - q);
568: /* Should apply the &...; reduction !!!! */
569: }
570:
571: /*
572: * Handle the data if any. If there is no child
573: * add it as content, otherwise create a new node of type text.
574: */
575: if (data != NULL)
576: data = xmlHandleData(data);
577: if (data != NULL) {
578: if (node->childs == NULL)
579: xmlNodeSetContent(node, data);
580: else {
581: ret = xmlNewText(data);
582: }
583: }
1.2 veillard 584:
585: *p = cur;
586: return(ret);
587: }
588:
589: /*
590: * xmlParseElement: parse an XML element
591: */
592:
1.3 veillard 593: xmlNodePtr xmlParseElement(CHAR **p, xmlDocPtr doc) {
1.2 veillard 594: CHAR *cur = *p;
595: xmlNodePtr ret, child;
596:
1.3 veillard 597: ret = xmlParseStartTag(&cur, doc);
598: if (ret == NULL) {
599: *p = cur;
600: return(NULL);
601: }
1.2 veillard 602:
603: /*
604: * Check for an Empty Element.
605: */
606: if ((cur[0] == '/') && (cur[1] == '>')) {
607: cur += 2;
608: *p = cur;
609: return(ret);
610: }
611: if (cur[0] == '>') cur++;
612: else {
613: fprintf(stderr, " couldn't find end of Start Tag %s\n", *p);
614: *p = cur;
615: return(ret);
616: }
617:
618: /*
619: * Parse the content of the element:
620: * (element | PCData | Reference | CDSect | PI | Comment) *
621: *
622: * element : starts by '<'
623: * PCData : any CHAR but '&' or '<'
624: * Reference : starts by '&'
625: * CDSect : starts by '<![CDATA['
626: * PI : starts by '<?'
627: *
628: * The loop stops upon detection of an end of tag '</'
629: */
630: while ((IS_CHAR(cur[0])) && ((cur[0] != '<') || (cur[1] != '/'))) {
1.3 veillard 631: child = xmlParseContent(&cur, doc, ret);
1.2 veillard 632: if (child != NULL)
633: xmlAddChild(ret, child);
634: }
635: if (!IS_CHAR(cur[0])) {
636: fprintf(stderr, "Premature end of data in tag %s\n", *p);
637: *p = cur;
638: return(ret);
639: }
640:
641: /*
642: * parse the end of tag : '</' has been detected.
643: */
644: cur += 2;
645: if (*cur == '>') cur++; /* simplified closing </> */
646: else {
647: /*
648: * We should check that the Name in the ETag is the same as in
649: * the STag. !!!!
650: */
651: MOVETO_ENDTAG(cur);
652: cur++;
653: }
654:
655: *p = cur;
656: return(ret);
657: }
658:
659: /*
1.1 veillard 660: * xmlParseXMLDecl: parse an XML declaration header
661: */
662:
663: xmlDocPtr xmlParseXMLDecl(CHAR **p) {
664: CHAR *cur = *p;
665: CHAR *version;
666: xmlDocPtr ret;
667:
668: /*
669: * We know that '<?XML' is here.
670: */
671: cur += 5;
672:
673: /*
674: * Parse the version info
675: */
676: SKIP_BLANKS(cur);
677:
678: /*
679: * We should have 'version=' here !
680: */
681: if ((cur[0] == 'v') && (cur[1] == 'e') && (cur[2] == 'r') &&
682: (cur[3] == 's') && (cur[4] == 'i') && (cur[5] == 'o') &&
683: (cur[6] == 'n') && (cur[7] == '=')) {
684: cur += 8;
685: version = xmlParseQuotedString(&cur);
686: if (version == NULL)
687: ret = xmlNewDoc(XML_DEFAULT_VERSION);
688: else {
689: ret = xmlNewDoc(version);
690: }
691: } else {
692: ret = xmlNewDoc(XML_DEFAULT_VERSION);
693: }
694:
695: /*
696: * We should check for encoding !!!!
697: */
698:
699: /*
700: * We should check for Required Markup Declaration !!!!
701: */
702: MOVETO_ENDTAG(cur);
703: cur++;
704:
705: *p = cur;
706: return(ret);
707: }
708:
709: /*
710: * xmlParseMisc: parse an XML Misc optionnal field.
711: * (Comment | PI | S)*
712: */
713:
1.3 veillard 714: void xmlParseMisc(CHAR **p, xmlDocPtr doc) {
1.1 veillard 715: CHAR *cur = *p;
716:
717: while (((cur[0] == '<') && (cur[1] == '?')) ||
718: ((cur[0] == '<') && (cur[1] == '!') &&
719: (cur[2] == '-') && (cur[2] == '-')) ||
720: IS_BLANK(*cur)) {
721: if ((cur[0] == '<') && (cur[1] == '?')) {
1.3 veillard 722: xmlParsePI(&cur, doc);
1.1 veillard 723: } else if (IS_BLANK(*cur)) {
724: cur++;
725: } else
726: xmlParserSkipComment(&cur);
727: }
728:
729: *p = cur;
730: }
731:
732: /*
733: * xmlParseDoc : parse an XML document and build a tree.
734: */
735:
736: xmlDocPtr xmlParseDoc(CHAR *cur) {
737: xmlDocPtr ret;
738:
739: /*
740: * Wipe out everything which is before the first '<'
741: */
742: SKIP_BLANKS(cur);
743:
744: /*
745: * Check for the XMLDecl in the Prolog.
746: */
747: if ((cur[0] == '<') && (cur[1] == '?') &&
748: (cur[2] == 'X') && (cur[3] == 'M') &&
749: (cur[4] == 'L')) {
750: ret = xmlParseXMLDecl(&cur);
751: /* SKIP_EOL(cur); */
752: SKIP_BLANKS(cur);
753: } else {
754: ret = xmlNewDoc(XML_DEFAULT_VERSION);
755: }
756:
757: /*
758: * The Misc part of the Prolog
759: * (Comment | PI | S) *
760: */
761: xmlParseMisc(&cur, ret);
762:
763: /*
1.2 veillard 764: * Time to start parsing
1.1 veillard 765: */
1.3 veillard 766: ret->root = xmlParseElement(&cur, ret);
1.1 veillard 767:
768: return(ret);
769: }
770:
771: /************************************************************************
772: * *
773: * Debug *
774: * *
775: ************************************************************************/
776:
777: #ifdef DEBUG
1.4 ! veillard 778: #include <sys/types.h>
! 779: #include <sys/stat.h>
! 780: #include <fcntl.h>
! 781: #include <unistd.h>
! 782:
! 783: #define MAX_BUF 100000
! 784:
! 785: CHAR buffer[MAX_BUF] =
1.1 veillard 786: "\n\
787: <?XML version=\"1.0\">\n\
788: <?namespace href = \"http://www.ietf.org/standards/dav/\" AS = \"D\"?>\n\
789: <?namespace href = \"http://www.w3.com/standards/z39.50/\" AS = \"Z\"?>\n\
790: <D:propertyupdate>\n\
1.3 veillard 791: <D:set a=\"'toto'\" b>\n\
1.1 veillard 792: <D:prop>\n\
793: <Z:authors>\n\
794: <Z:Author>Jim Whitehead</Z:Author>\n\
795: <Z:Author>Roy Fielding</Z:Author>\n\
796: </Z:authors>\n\
797: </D:prop>\n\
798: </D:set>\n\
799: <D:remove>\n\
800: <D:prop><Z:Copyright-Owner/></D:prop>\n\
801: </D:remove>\n\
802: </D:propertyupdate>\n\
803: \n\
804: ";
805:
1.4 ! veillard 806: int readFile(char *filename) {
! 807: int input;
! 808: int res;
! 809:
! 810: memset(buffer, 0, sizeof(buffer));
! 811: input = open (filename, O_RDONLY);
! 812: if (input < 0) {
! 813: fprintf (stderr, "Cannot read file %s :\n", filename);
! 814: perror ("open failed");
! 815: return(-1);
! 816: }
! 817: res = read(input, buffer, sizeof(buffer));
! 818: if (res < 0) {
! 819: fprintf (stderr, "Cannot read file %s :\n", filename);
! 820: perror ("read failed");
! 821: return(-1);
! 822: }
! 823: if (res >= MAX_BUF) {
! 824: fprintf (stderr, "Read only %d byte of %s, increase MAX_BUF\n",
! 825: res, filename);
! 826: return(-1);
! 827: }
! 828: close(input);
! 829: return(res);
! 830: }
! 831:
! 832: void parseAndPrint(CHAR *buf) {
1.1 veillard 833: xmlDocPtr doc;
834:
835: /*
836: * build a fake XML document from a string;
837: */
1.4 ! veillard 838: doc = xmlParseDoc(buf);
1.1 veillard 839:
840: /*
841: * print it.
842: */
843: xmlDocDump(stdout, doc);
844:
845: /*
846: * free it.
847: */
848: xmlFreeDoc(doc);
1.4 ! veillard 849: }
! 850:
! 851: int main(int argc, char **argv) {
! 852: int i;
! 853:
! 854: if (argc > 1) {
! 855: for (i = 1; i < argc ; i++) {
! 856: if (readFile(argv[i]) >= 0) {
! 857: printf("\n\n------- %s -----------\n", argv[i]);
! 858: parseAndPrint(buffer);
! 859: }
! 860: }
! 861: } else
! 862: parseAndPrint(buffer);
! 863:
1.1 veillard 864: return(0);
865: }
866: #endif
Webmaster