Annotation of XML/parser.c, revision 1.30
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.30 ! daniel 6: * $Id: parser.c,v 1.29 1998/08/04 04:59:15 daniel Exp $
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.1 veillard 18: #include <malloc.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.1 veillard 33:
34: /*
35: * A few macros needed to help building the parser.
36: */
37:
38: #ifdef UNICODE
1.30 ! daniel 39: /************************************************************************
! 40: * *
! 41: * UNICODE version of the macros. *
! 42: * *
! 43: ************************************************************************/
1.1 veillard 44: /*
1.22 daniel 45: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
46: * | [#x10000-#x10FFFF]
47: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 48: */
49: #define IS_CHAR(c) \
50: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
51: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
52:
1.22 daniel 53: /*
54: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
55: */
1.1 veillard 56: #define SKIP_BLANKS(p) \
57: while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \
1.22 daniel 58: (*(p) == 0xd) || (*(p) == 0x3000)) (p)++;
1.1 veillard 59:
1.22 daniel 60: /*
1.30 ! daniel 61: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 62: *
1.30 ! daniel 63: * VI is your friend !
! 64: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
! 65: * and
! 66: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 67: */
1.1 veillard 68: #define IS_BASECHAR(c) \
1.30 ! daniel 69: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
! 70: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
! 71: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
! 72: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
! 73: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
! 74: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
! 75: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
! 76: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
! 77: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
! 78: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
! 79: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
! 80: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
! 81: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
! 82: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
! 83: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
! 84: ((c) == 0x0386) || \
! 85: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
! 86: ((c) == 0x038C) || \
! 87: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
! 88: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
! 89: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
! 90: ((c) == 0x03DA) || \
! 91: ((c) == 0x03DC) || \
! 92: ((c) == 0x03DE) || \
! 93: ((c) == 0x03E0) || \
! 94: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
! 95: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
! 96: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
! 97: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
! 98: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
! 99: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
! 100: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
! 101: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
! 102: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
! 103: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
! 104: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
! 105: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
! 106: ((c) == 0x0559) || \
! 107: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
! 108: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
! 109: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
! 110: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
! 111: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
! 112: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
! 113: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
! 114: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
! 115: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
! 116: ((c) == 0x06D5) || \
! 117: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
! 118: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
! 119: ((c) == 0x093D) || \
! 120: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
! 121: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
! 122: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
! 123: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
! 124: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
! 125: ((c) == 0x09B2) || \
! 126: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
! 127: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
! 128: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
! 129: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
! 130: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
! 131: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
! 132: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
! 133: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
! 134: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
! 135: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
! 136: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
! 137: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
! 138: ((c) == 0x0A5E) || \
! 139: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
! 140: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
! 141: ((c) == 0x0A8D) || \
! 142: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
! 143: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
! 144: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
! 145: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
! 146: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
! 147: ((c) == 0x0ABD) || \
! 148: ((c) == 0x0AE0) || \
! 149: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
! 150: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
! 151: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
! 152: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
! 153: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
! 154: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
! 155: ((c) == 0x0B3D) || \
! 156: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
! 157: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
! 158: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
! 159: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
! 160: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
! 161: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
! 162: ((c) == 0x0B9C) || \
! 163: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
! 164: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
! 165: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
! 166: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
! 167: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
! 168: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
! 169: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
! 170: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
! 171: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
! 172: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
! 173: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
! 174: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
! 175: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
! 176: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
! 177: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
! 178: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
! 179: ((c) == 0x0CDE) || \
! 180: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
! 181: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
! 182: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
! 183: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
! 184: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
! 185: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
! 186: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
! 187: ((c) == 0x0E30) || \
! 188: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
! 189: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
! 190: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
! 191: ((c) == 0x0E84) || \
! 192: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
! 193: ((c) == 0x0E8A) || \
! 194: ((c) == 0x0E8D) || \
! 195: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
! 196: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
! 197: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
! 198: ((c) == 0x0EA5) || \
! 199: ((c) == 0x0EA7) || \
! 200: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
! 201: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
! 202: ((c) == 0x0EB0) || \
! 203: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
! 204: ((c) == 0x0EBD) || \
! 205: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
! 206: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
! 207: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
! 208: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
! 209: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
! 210: ((c) == 0x1100) || \
! 211: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
! 212: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
! 213: ((c) == 0x1109) || \
! 214: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
! 215: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
! 216: ((c) == 0x113C) || \
! 217: ((c) == 0x113E) || \
! 218: ((c) == 0x1140) || \
! 219: ((c) == 0x114C) || \
! 220: ((c) == 0x114E) || \
! 221: ((c) == 0x1150) || \
! 222: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
! 223: ((c) == 0x1159) || \
! 224: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
! 225: ((c) == 0x1163) || \
! 226: ((c) == 0x1165) || \
! 227: ((c) == 0x1167) || \
! 228: ((c) == 0x1169) || \
! 229: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
! 230: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
! 231: ((c) == 0x1175) || \
! 232: ((c) == 0x119E) || \
! 233: ((c) == 0x11A8) || \
! 234: ((c) == 0x11AB) || \
! 235: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
! 236: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
! 237: ((c) == 0x11BA) || \
! 238: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
! 239: ((c) == 0x11EB) || \
! 240: ((c) == 0x11F0) || \
! 241: ((c) == 0x11F9) || \
! 242: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
! 243: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
! 244: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
! 245: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
! 246: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
! 247: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
! 248: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
! 249: ((c) == 0x1F59) || \
! 250: ((c) == 0x1F5B) || \
! 251: ((c) == 0x1F5D) || \
! 252: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
! 253: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
! 254: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
! 255: ((c) == 0x1FBE) || \
! 256: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
! 257: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
! 258: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
! 259: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
! 260: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
! 261: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
! 262: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
! 263: ((c) == 0x2126) || \
! 264: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
! 265: ((c) == 0x212E) || \
! 266: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
! 267: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
! 268: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
! 269: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
! 270: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 271:
1.22 daniel 272: /*
273: * [88] Digit ::= ... long list see REC ...
274: */
1.30 ! daniel 275: #define IS_DIGIT(c) \
! 276: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
! 277: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
! 278: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
! 279: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
! 280: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
! 281: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
! 282: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
! 283: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
! 284: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
! 285: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
! 286: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
! 287: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
! 288: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
! 289: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
! 290: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 291:
1.22 daniel 292: /*
293: * [87] CombiningChar ::= ... long list see REC ...
294: */
1.30 ! daniel 295: #define IS_COMBINING(c) \
! 296: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
! 297: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
! 298: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
! 299: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
! 300: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
! 301: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
! 302: ((c) == 0x05BF) || \
! 303: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
! 304: ((c) == 0x05C4) || \
! 305: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
! 306: ((c) == 0x0670) || \
! 307: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
! 308: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
! 309: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
! 310: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
! 311: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
! 312: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
! 313: ((c) == 0x093C) || \
! 314: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
! 315: ((c) == 0x094D) || \
! 316: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
! 317: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
! 318: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
! 319: ((c) == 0x09BC) || \
! 320: ((c) == 0x09BE) || \
! 321: ((c) == 0x09BF) || \
! 322: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
! 323: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
! 324: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
! 325: ((c) == 0x09D7) || \
! 326: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
! 327: ((c) == 0x0A02) || \
! 328: ((c) == 0x0A3C) || \
! 329: ((c) == 0x0A3E) || \
! 330: ((c) == 0x0A3F) || \
! 331: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
! 332: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
! 333: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
! 334: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
! 335: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
! 336: ((c) == 0x0ABC) || \
! 337: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
! 338: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
! 339: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
! 340: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
! 341: ((c) == 0x0B3C) || \
! 342: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
! 343: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
! 344: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
! 345: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
! 346: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
! 347: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
! 348: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
! 349: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
! 350: ((c) == 0x0BD7) || \
! 351: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
! 352: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
! 353: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
! 354: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
! 355: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
! 356: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
! 357: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
! 358: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
! 359: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
! 360: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
! 361: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
! 362: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
! 363: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
! 364: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
! 365: ((c) == 0x0D57) || \
! 366: ((c) == 0x0E31) || \
! 367: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
! 368: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
! 369: ((c) == 0x0EB1) || \
! 370: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
! 371: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
! 372: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
! 373: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
! 374: ((c) == 0x0F35) || \
! 375: ((c) == 0x0F37) || \
! 376: ((c) == 0x0F39) || \
! 377: ((c) == 0x0F3E) || \
! 378: ((c) == 0x0F3F) || \
! 379: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
! 380: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
! 381: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
! 382: ((c) == 0x0F97) || \
! 383: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
! 384: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
! 385: ((c) == 0x0FB9) || \
! 386: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
! 387: ((c) == 0x20E1) || \
! 388: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
! 389: ((c) == 0x3099) || \
! 390: ((c) == 0x309A))
1.3 veillard 391:
1.22 daniel 392: /*
393: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
394: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
395: * [#x309D-#x309E] | [#x30FC-#x30FE]
396: */
1.3 veillard 397: #define IS_EXTENDER(c) \
398: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
399: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
400: ((c) == 0xec6) || ((c) == 0x3005) \
401: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
402: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 403: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 404:
1.22 daniel 405: /*
406: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
407: */
1.1 veillard 408: #define IS_IDEOGRAPHIC(c) \
409: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
410: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
411: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
412: ((c) == 0x3007))
413:
1.22 daniel 414: /*
415: * [84] Letter ::= BaseChar | Ideographic
416: */
1.1 veillard 417: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
418:
419: #else
1.30 ! daniel 420: /************************************************************************
! 421: * *
! 422: * 8bits / ASCII version of the macros. *
! 423: * *
! 424: ************************************************************************/
1.1 veillard 425: /*
1.22 daniel 426: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
427: * | [#x10000-#x10FFFF]
428: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 429: */
430: #define IS_CHAR(c) \
1.21 daniel 431: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\
432: ((c) == 0xa))
1.1 veillard 433:
1.22 daniel 434: /*
435: * [85] BaseChar ::= ... long list see REC ...
436: */
1.1 veillard 437: #define IS_BASECHAR(c) \
438: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
439: (((c) >= 0x61) && ((c) <= 0x7a)) || \
440: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
441: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
442: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
443: (((c) >= 0xf8) && ((c) <= 0xff)) || \
444: ((c) == 0xba))
445:
1.22 daniel 446: /*
447: * [88] Digit ::= ... long list see REC ...
448: */
1.1 veillard 449: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
450:
1.22 daniel 451: /*
452: * [84] Letter ::= BaseChar | Ideographic
453: */
1.1 veillard 454: #define IS_LETTER(c) IS_BASECHAR(c)
455:
1.22 daniel 456:
457: /*
458: * [87] CombiningChar ::= ... long list see REC ...
459: */
1.1 veillard 460: #define IS_COMBINING(c) 0
461:
1.22 daniel 462: /*
463: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
464: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
465: * [#x309D-#x309E] | [#x30FC-#x30FE]
466: */
1.3 veillard 467: #define IS_EXTENDER(c) ((c) == 0xb7)
468:
1.21 daniel 469: #endif /* !UNICODE */
1.1 veillard 470:
1.22 daniel 471: /*
472: * Blank chars.
473: *
474: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
475: */
476: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
477: ((c) == 0x0D))
478:
479: /*
480: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
481: */
1.21 daniel 482: #define IS_PUBIDCHAR(c) \
483: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
484: (((c) >= 'a') && ((c) <= 'z')) || \
485: (((c) >= 'A') && ((c) <= 'Z')) || \
486: (((c) >= '0') && ((c) <= '9')) || \
487: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
488: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
489: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
490: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
491: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 492:
493: #define SKIP_EOL(p) \
494: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
495: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
496:
497: #define SKIP_BLANKS(p) \
498: while (IS_BLANK(*(p))) (p)++;
499:
500: #define MOVETO_ENDTAG(p) \
501: while (IS_CHAR(*p) && (*(p) != '>')) (p)++;
502:
503: #define MOVETO_STARTTAG(p) \
504: while (IS_CHAR(*p) && (*(p) != '<')) (p)++;
505:
506: /*
1.3 veillard 507: * Forward definition for recusive behaviour.
508: */
1.16 daniel 509: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.3 veillard 510:
511: /*
512: * xmlHandleData : this routine represent's the specific application
513: * behaviour when reading a piece of text.
514: *
515: * For example in WebDav, any piece made only of blanks is eliminated
516: */
517:
518: CHAR *xmlHandleData(CHAR *in) {
519: CHAR *cur;
520:
521: if (in == NULL) return(NULL);
522: cur = in;
523: while (IS_CHAR(*cur)) {
524: if (!IS_BLANK(*cur)) goto not_blank;
525: cur++;
526: }
527: free(in);
528: return(NULL);
529:
530: not_blank:
531: return(in);
532: }
533:
1.28 daniel 534: /************************************************************************
535: * *
536: * Commodity functions to handle CHARs *
537: * *
538: ************************************************************************/
539:
1.3 veillard 540: /*
1.1 veillard 541: * xmlStrndup : a strdup for array of CHAR's
542: */
543:
1.6 httpng 544: CHAR *xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 545: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
546:
547: if (ret == NULL) {
548: fprintf(stderr, "malloc of %d byte failed\n",
549: (len + 1) * sizeof(CHAR));
550: return(NULL);
551: }
552: memcpy(ret, cur, len * sizeof(CHAR));
553: ret[len] = 0;
554: return(ret);
555: }
556:
557: /*
558: * xmlStrdup : a strdup for CHAR's
559: */
560:
1.6 httpng 561: CHAR *xmlStrdup(const CHAR *cur) {
562: const CHAR *p = cur;
1.1 veillard 563:
564: while (IS_CHAR(*p)) p++;
565: return(xmlStrndup(cur, p - cur));
566: }
567:
568: /*
1.14 veillard 569: * xmlStrcmp : a strcmp for CHAR's
570: */
571:
572: int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
573: register int tmp;
574:
575: do {
576: tmp = *str1++ - *str2++;
577: if (tmp != 0) return(tmp);
578: } while ((*str1 != 0) && (*str2 != 0));
579: return (*str1 - *str2);
580: }
581:
582: /*
583: * xmlStrncmp : a strncmp for CHAR's
584: */
585:
586: int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
587: register int tmp;
588:
589: if (len <= 0) return(0);
590: do {
591: tmp = *str1++ - *str2++;
592: if (tmp != 0) return(tmp);
593: len--;
594: if (len <= 0) return(0);
595: } while ((*str1 != 0) && (*str2 != 0));
596: return (*str1 - *str2);
597: }
598:
599: /*
600: * xmlStrchr : a strchr for CHAR's
601: */
602:
603: CHAR *xmlStrchr(const CHAR *str, CHAR val) {
604: while (*str != 0) {
605: if (*str == val) return((CHAR *) str);
606: str++;
607: }
608: return(NULL);
609: }
1.28 daniel 610:
611: /************************************************************************
612: * *
613: * Extra stuff for namespace support *
614: * Relates to http://www.w3.org/TR/WD-xml-names *
615: * *
616: ************************************************************************/
617:
618: /*
619: * xmlNamespaceParseNCName : parse an XML namespace name.
620: *
621: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
622: *
623: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
624: * CombiningChar | Extender
625: */
626:
627: CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
628: const CHAR *q;
629: CHAR *ret = NULL;
630:
631: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
632: q = ctxt->cur++;
633:
634: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
635: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
636: (ctxt->cur[0] == '_') ||
637: (IS_COMBINING(ctxt->cur[0])) ||
638: (IS_EXTENDER(ctxt->cur[0])))
639: ctxt->cur++;
640:
641: ret = xmlStrndup(q, ctxt->cur - q);
642:
643: return(ret);
644: }
645:
646: /*
647: * xmlNamespaceParseQName : parse an XML qualified name
648: *
649: * [NS 5] QName ::= (Prefix ':')? LocalPart
650: *
651: * [NS 6] Prefix ::= NCName
652: *
653: * [NS 7] LocalPart ::= NCName
654: */
655:
656: CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
657: CHAR *ret = NULL;
658:
659: *prefix = NULL;
660: ret = xmlNamespaceParseNCName(ctxt);
661: if (ctxt->cur[0] == ':') {
662: *prefix = ret;
663: ctxt->cur++;
664: ret = xmlNamespaceParseNCName(ctxt);
665: }
666:
667: return(ret);
668: }
669:
670: /*
671: * xmlNamespaceParseNSDef : parse a namespace prefix declaration
672: *
673: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
674: *
675: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
676: */
677:
678: void xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
679: CHAR *name = NULL;
680:
681: if ((ctxt->cur[0] == 'x') && (ctxt->cur[1] == 'm') &&
682: (ctxt->cur[2] == 'l') && (ctxt->cur[3] == 'n') &&
683: (ctxt->cur[4] == 's')) {
684: ctxt->cur += 5;
685: if (ctxt->cur[0] == ':') {
686: ctxt->cur++;
687: name = xmlNamespaceParseNCName(ctxt);
688: }
689: }
690: }
691:
692: /************************************************************************
693: * *
694: * The parser itself *
695: * Relates to http://www.w3.org/TR/REC-xml *
696: * *
697: ************************************************************************/
1.14 veillard 698:
699: /*
1.1 veillard 700: * xmlParseName : parse an XML name.
1.22 daniel 701: *
702: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
703: * CombiningChar | Extender
704: *
705: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
706: *
707: * [6] Names ::= Name (S Name)*
1.1 veillard 708: */
709:
1.16 daniel 710: CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 711: const CHAR *q;
712: CHAR *ret = NULL;
1.1 veillard 713:
1.22 daniel 714: if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_') &&
715: (ctxt->cur[0] != ':')) return(NULL);
716: q = ctxt->cur++;
717:
718: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
719: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
720: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
721: (IS_COMBINING(ctxt->cur[0])) ||
722: (IS_EXTENDER(ctxt->cur[0])))
723: ctxt->cur++;
724:
725: ret = xmlStrndup(q, ctxt->cur - q);
726:
727: return(ret);
728: }
729:
730: /*
731: * xmlParseNmtoken : parse an XML Nmtoken.
732: *
733: * [7] Nmtoken ::= (NameChar)+
734: *
735: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
736: */
737:
738: CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
739: const CHAR *q;
740: CHAR *ret = NULL;
741:
1.16 daniel 742: q = ctxt->cur++;
1.22 daniel 743:
1.16 daniel 744: while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
1.22 daniel 745: (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
746: (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
747: (IS_COMBINING(ctxt->cur[0])) ||
1.16 daniel 748: (IS_EXTENDER(ctxt->cur[0])))
749: ctxt->cur++;
1.3 veillard 750:
1.16 daniel 751: ret = xmlStrndup(q, ctxt->cur - q);
1.1 veillard 752:
1.3 veillard 753: return(ret);
1.1 veillard 754: }
755:
756: /*
1.24 daniel 757: * xmlParseEntityValue : parse a value for ENTITY decl.
758: *
759: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
760: * "'" ([^%&'] | PEReference | Reference)* "'"
761: */
762:
763: CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
764: CHAR *ret = NULL;
765: const CHAR *q;
766: int needSubst;
767:
768: if (ctxt->cur[0] == '"') {
769: ctxt->cur++;
770:
771: q = ctxt->cur;
772: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"')) {
773: if (ctxt->cur[0] == '%') {
774: needSubst = 1; /* TODO !!! */
775: ctxt->cur++;
1.29 daniel 776: } else if (ctxt->cur[0] == '&') {
1.24 daniel 777: needSubst = 1; /* TODO !!! */
778: ctxt->cur++;
779: } else
780: ctxt->cur++;
781: }
782: if (!IS_CHAR(ctxt->cur[0])) {
783: fprintf(stderr, "Unfinished EntityValue %30s\n", q);
784: } else {
785: ret = xmlStrndup(q, ctxt->cur - q);
786: ctxt->cur++;
787: }
788: } else if (ctxt->cur[0] == '\'') {
789: ctxt->cur++;
790: q = ctxt->cur;
791: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\'')) {
792: if (ctxt->cur[0] == '%') {
793: needSubst = 1; /* TODO !!! */
794: ctxt->cur++;
1.29 daniel 795: } else if (ctxt->cur[0] == '&') {
1.24 daniel 796: needSubst = 1; /* TODO !!! */
797: ctxt->cur++;
798: } else
799: ctxt->cur++;
800: }
801: if (!IS_CHAR(ctxt->cur[0])) {
802: fprintf(stderr, "Unfinished EntityValue %30s\n", q);
803: } else {
804: ret = xmlStrndup(q, ctxt->cur - q);
805: ctxt->cur++;
806: }
807: } else {
808: fprintf(stderr, "xmlParseEntityValue \" or ' expected: %30s\n",
809: ctxt->cur);
810: }
811:
812: return(ret);
813: }
814:
815: /*
1.29 daniel 816: * xmlParseAttValue : parse a value for an attribute
817: *
818: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
819: * "'" ([^<&'] | Reference)* "'"
820: */
821:
822: CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) {
823: CHAR *ret = NULL;
824: const CHAR *q;
825: int needSubst;
826:
827: if (ctxt->cur[0] == '"') {
828: ctxt->cur++;
829:
830: q = ctxt->cur;
831: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"')) {
832: if (ctxt->cur[0] == '%') {
833: needSubst = 1; /* TODO !!! */
834: ctxt->cur++;
835: } else if (ctxt->cur[0] == '&') {
836: needSubst = 1; /* TODO !!! */
837: ctxt->cur++;
838: } else
839: ctxt->cur++;
840: }
841: if (!IS_CHAR(ctxt->cur[0])) {
842: fprintf(stderr, "Unfinished AttValue %30s\n", q);
843: } else {
844: ret = xmlStrndup(q, ctxt->cur - q);
845: ctxt->cur++;
846: }
847: } else if (ctxt->cur[0] == '\'') {
848: ctxt->cur++;
849: q = ctxt->cur;
850: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\'')) {
851: if (ctxt->cur[0] == '%') {
852: needSubst = 1; /* TODO !!! */
853: ctxt->cur++;
854: } else if (ctxt->cur[0] == '&') {
855: needSubst = 1; /* TODO !!! */
856: ctxt->cur++;
857: } else
858: ctxt->cur++;
859: }
860: if (!IS_CHAR(ctxt->cur[0])) {
861: fprintf(stderr, "Unfinished AttValue %30s\n", q);
862: } else {
863: ret = xmlStrndup(q, ctxt->cur - q);
864: ctxt->cur++;
865: }
866: } else {
867: fprintf(stderr, "AttValue \" or ' expected: %30s\n",
868: ctxt->cur);
869: }
870:
871: return(ret);
872: }
873:
874: /*
1.21 daniel 875: * xmlParseSystemLiteral : parse an XML Literal
876: *
1.22 daniel 877: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.21 daniel 878: */
879:
880: CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
881: const CHAR *q;
882: CHAR *ret = NULL;
883:
884: if (ctxt->cur[0] == '"') {
885: ctxt->cur++;
886: q = ctxt->cur;
1.22 daniel 887: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '"'))
1.21 daniel 888: ctxt->cur++;
1.22 daniel 889: if (!IS_CHAR(ctxt->cur[0])) {
1.21 daniel 890: fprintf(stderr, "Unfinished SystemLiteral %30s\n", q);
891: } else {
892: ret = xmlStrndup(q, ctxt->cur - q);
893: ctxt->cur++;
894: }
895: } else if (ctxt->cur[0] == '\'') {
896: ctxt->cur++;
897: q = ctxt->cur;
1.22 daniel 898: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
1.21 daniel 899: ctxt->cur++;
1.22 daniel 900: if (!IS_CHAR(ctxt->cur[0])) {
1.21 daniel 901: fprintf(stderr, "Unfinished SystemLiteral %30s\n", q);
902: } else {
903: ret = xmlStrndup(q, ctxt->cur - q);
904: ctxt->cur++;
905: }
906: } else {
907: fprintf(stderr, "SystemLiteral \" or ' expected: %30s\n", ctxt->cur);
908: }
909:
910: return(ret);
911: }
912:
913: /*
1.27 daniel 914: * xmlParsePubidLiteral: parse an XML public literal
1.21 daniel 915: *
1.22 daniel 916: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1.21 daniel 917: */
918:
919: CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
920: const CHAR *q;
921: CHAR *ret = NULL;
922: /*
923: * Name ::= (Letter | '_') (NameChar)*
924: */
925: if (ctxt->cur[0] == '"') {
926: ctxt->cur++;
927: q = ctxt->cur;
928: while (IS_PUBIDCHAR(ctxt->cur[0])) ctxt->cur++;
929: if (ctxt->cur[0] != '"') {
930: fprintf(stderr, "Unfinished PubidLiteral %30s\n", q);
931: } else {
932: ret = xmlStrndup(q, ctxt->cur - q);
933: ctxt->cur++;
934: }
935: } else if (ctxt->cur[0] == '\'') {
936: ctxt->cur++;
937: q = ctxt->cur;
938: while ((IS_LETTER(ctxt->cur[0])) && (ctxt->cur[0] != '\''))
939: ctxt->cur++;
940: if (!IS_LETTER(ctxt->cur[0])) {
941: fprintf(stderr, "Unfinished SystemLiteral %30s\n", q);
942: } else {
943: ret = xmlStrndup(q, ctxt->cur - q);
944: ctxt->cur++;
945: }
946: } else {
947: fprintf(stderr, "SystemLiteral \" or ' expected: %30s\n", ctxt->cur);
948: }
949:
950: return(ret);
951: }
952:
953: /*
1.27 daniel 954: * xmlParseCharData: parse a CharData section.
955: * if we are within a CDATA section ']]>' marks an end of section.
956: *
957: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
958: */
959:
960: CHAR *xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
961: const CHAR *q;
962: CHAR *ret = NULL;
963:
964: q = ctxt->cur;
965: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '<') &&
966: (ctxt->cur[0] != '&')) {
967: ctxt->cur++;
968: if ((cdata) && (ctxt->cur[0] == ']') && (ctxt->cur[1] == ']') &&
969: (ctxt->cur[2] == '>')) break;
970: }
971: if (q == ctxt->cur) return(NULL);
972: ret = xmlStrndup(q, ctxt->cur - q);
973: return(ret);
974: }
975:
976: /*
1.22 daniel 977: * xmlParseExternalID: Parse an External ID
978: *
979: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
980: * | 'PUBLIC' S PubidLiteral S SystemLiteral
981: */
982:
983: CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **PubidLiteral) {
984: CHAR *ExternalID = NULL;
985:
986: *PubidLiteral = NULL;
987: if ((ctxt->cur[0] == 'S') && (ctxt->cur[1] == 'Y') &&
988: (ctxt->cur[2] == 'S') && (ctxt->cur[3] == 'T') &&
989: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M')) {
990: ctxt->cur += 6;
991: SKIP_BLANKS(ctxt->cur);
992: ExternalID = xmlParseSystemLiteral(ctxt);
993: if (ExternalID == NULL)
994: fprintf(stderr, "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
995: } else if ((ctxt->cur[0] == 'P') && (ctxt->cur[1] == 'U') &&
996: (ctxt->cur[2] == 'B') && (ctxt->cur[3] == 'L') &&
997: (ctxt->cur[4] == 'I') && (ctxt->cur[5] == 'C')) {
998: ctxt->cur += 6;
999: SKIP_BLANKS(ctxt->cur);
1000: *PubidLiteral = xmlParsePubidLiteral(ctxt);
1001: if (*PubidLiteral == NULL)
1002: fprintf(stderr, "xmlParseExternalID: PUBLIC, no PubidLiteral\n");
1003: SKIP_BLANKS(ctxt->cur);
1004: ExternalID = xmlParseSystemLiteral(ctxt);
1005: if (ExternalID == NULL)
1006: fprintf(stderr, "xmlParseExternalID: SYSTEM, no SystemLiteral\n");
1007: }
1008: return(ExternalID);
1009: }
1010:
1011: /*
1.1 veillard 1012: * Parse and return a string between quotes or doublequotes
1013: */
1.16 daniel 1014: CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.1 veillard 1015: CHAR *ret = NULL;
1.17 daniel 1016: const CHAR *q;
1.1 veillard 1017:
1.16 daniel 1018: if (ctxt->cur[0] == '"') {
1019: ctxt->cur++;
1020: q = ctxt->cur;
1021: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++;
1022: if (ctxt->cur[0] != '"')
1.7 veillard 1023: fprintf(stderr, "String not closed \"%.50s\n", q);
1.1 veillard 1024: else {
1.16 daniel 1025: ret = xmlStrndup(q, ctxt->cur - q);
1026: ctxt->cur++;
1.1 veillard 1027: }
1.16 daniel 1028: } else if (ctxt->cur[0] == '\''){
1029: ctxt->cur++;
1030: q = ctxt->cur;
1031: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++;
1032: if (ctxt->cur[0] != '\'')
1.7 veillard 1033: fprintf(stderr, "String not closed '%.50s\n", q);
1.1 veillard 1034: else {
1.16 daniel 1035: ret = xmlStrndup(q, ctxt->cur - q);
1036: ctxt->cur++;
1.1 veillard 1037: }
1038: }
1039: return(ret);
1040: }
1041:
1042: /*
1.3 veillard 1043: * Skip an XML (SGML) comment <!-- .... -->
1.16 daniel 1044: *
1045: * TODO !!!! Save the comment in the tree !!!
1.22 daniel 1046: *
1047: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 1048: */
1.16 daniel 1049: void xmlParserSkipComment(xmlParserCtxtPtr ctxt) {
1.17 daniel 1050: const CHAR *q, *start;
1051: const CHAR *r;
1.3 veillard 1052:
1053: /*
1.22 daniel 1054: * Check that there is a comment right here.
1.3 veillard 1055: */
1.16 daniel 1056: if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '!') ||
1057: (ctxt->cur[2] != '-') || (ctxt->cur[3] != '-')) return;
1.3 veillard 1058:
1.16 daniel 1059: ctxt->cur += 4;
1060: start = q = ctxt->cur;
1061: ctxt->cur++;
1062: r = ctxt->cur;
1063: ctxt->cur++;
1064: while (IS_CHAR(ctxt->cur[0]) &&
1065: ((ctxt->cur[0] == ':') || (ctxt->cur[0] != '>') ||
1066: (*r != '-') || (*q != '-'))) {
1067: ctxt->cur++;r++;q++;
1.3 veillard 1068: }
1.16 daniel 1069: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 1070: fprintf(stderr, "Comment not terminated <!--%.50s\n", start);
1.16 daniel 1071: ctxt->cur = start; /* !!! We shouldn't really try to recover !!! */
1.3 veillard 1072: } else {
1.16 daniel 1073: ctxt->cur++;
1.3 veillard 1074: }
1075: }
1076:
1077: /*
1.13 veillard 1078: * xmlParseNamespace: parse specific '<?namespace ...' constructs.
1.22 daniel 1079: *
1.29 daniel 1080: * TODO !!!!!!!!!!
1081: *
1082: * This is what the older xml-name Working Draft specified, a bunch of
1083: * other stuff may still rely on it, so support is still here as
1084: * if ot was declared on the root of the Tree:-(
1.1 veillard 1085: */
1086:
1.16 daniel 1087: void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.1 veillard 1088: CHAR *href = NULL;
1089: CHAR *AS = NULL;
1.3 veillard 1090: int garbage = 0;
1.1 veillard 1091:
1092: /*
1.18 daniel 1093: * We just skipped "namespace" or "xml:namespace"
1.1 veillard 1094: */
1.16 daniel 1095: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1096:
1.16 daniel 1097: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '>')) {
1.1 veillard 1098: /*
1.18 daniel 1099: * We can have "ns" or "prefix" attributes
1100: * Old encoding as 'href' or 'AS' attributes is still supported
1.1 veillard 1101: */
1.18 daniel 1102: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 's')) {
1103: garbage = 0;
1104: ctxt->cur += 2;
1105: SKIP_BLANKS(ctxt->cur);
1106:
1107: if (ctxt->cur[0] != '=') continue;
1108: ctxt->cur++;
1109: SKIP_BLANKS(ctxt->cur);
1110:
1111: href = xmlParseQuotedString(ctxt);
1112: SKIP_BLANKS(ctxt->cur);
1113: } else if ((ctxt->cur[0] == 'h') && (ctxt->cur[1] == 'r') &&
1.16 daniel 1114: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f')) {
1.3 veillard 1115: garbage = 0;
1.16 daniel 1116: ctxt->cur += 4;
1117: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1118:
1.16 daniel 1119: if (ctxt->cur[0] != '=') continue;
1120: ctxt->cur++;
1121: SKIP_BLANKS(ctxt->cur);
1122:
1123: href = xmlParseQuotedString(ctxt);
1124: SKIP_BLANKS(ctxt->cur);
1.18 daniel 1125: } else if ((ctxt->cur[0] == 'p') && (ctxt->cur[1] == 'r') &&
1126: (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f') &&
1127: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'x')) {
1128: garbage = 0;
1129: ctxt->cur += 6;
1130: SKIP_BLANKS(ctxt->cur);
1131:
1132: if (ctxt->cur[0] != '=') continue;
1133: ctxt->cur++;
1134: SKIP_BLANKS(ctxt->cur);
1135:
1136: AS = xmlParseQuotedString(ctxt);
1137: SKIP_BLANKS(ctxt->cur);
1.16 daniel 1138: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'S')) {
1.3 veillard 1139: garbage = 0;
1.16 daniel 1140: ctxt->cur += 2;
1141: SKIP_BLANKS(ctxt->cur);
1.1 veillard 1142:
1.16 daniel 1143: if (ctxt->cur[0] != '=') continue;
1144: ctxt->cur++;
1145: SKIP_BLANKS(ctxt->cur);
1146:
1147: AS = xmlParseQuotedString(ctxt);
1148: SKIP_BLANKS(ctxt->cur);
1149: } else if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
1.3 veillard 1150: garbage = 0;
1.16 daniel 1151: ctxt->cur ++;
1.1 veillard 1152: } else {
1.3 veillard 1153: /*
1154: * Found garbage when parsing the namespace
1155: */
1156: if (!garbage) fprintf(stderr,
1.13 veillard 1157: "\nxmlParseNamespace found garbage: ");
1.16 daniel 1158: fprintf(stderr, "%c", ctxt->cur[0]);
1159: ctxt->cur++;
1.1 veillard 1160: }
1161: }
1162:
1.16 daniel 1163: MOVETO_ENDTAG(ctxt->cur);
1164: ctxt->cur++;
1.1 veillard 1165:
1166: /*
1167: * Register the DTD.
1168: */
1169: if (href != NULL)
1.16 daniel 1170: xmlNewDtd(ctxt->doc, href, AS);
1.1 veillard 1171:
1.8 veillard 1172: if (AS != NULL) free(AS);
1173: if (href != NULL) free(href);
1.1 veillard 1174: }
1175:
1176: /*
1.22 daniel 1177: * xmlParsePITarget: parse the name of a PI
1178: *
1179: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1180: */
1181:
1182: CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1183: CHAR *name;
1184:
1185: name = xmlParseName(ctxt);
1186: if ((name != NULL) && (name[3] == 0) &&
1187: ((name[0] == 'x') || (name[0] == 'X')) &&
1188: ((name[0] == 'm') || (name[0] == 'M')) &&
1189: ((name[0] == 'l') || (name[0] == 'L'))) {
1190: fprintf(stderr, "xmlParsePItarget: invalid name 'xml'\n");
1191: return(NULL);
1192: }
1193: return(name);
1194: }
1195:
1196: /*
1.3 veillard 1197: * xmlParsePI: parse an XML Processing Instruction.
1.22 daniel 1198: *
1199: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.3 veillard 1200: */
1201:
1.16 daniel 1202: void xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1203: CHAR *target;
1204:
1.16 daniel 1205: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
1.3 veillard 1206: /*
1207: * this is a Processing Instruction.
1208: */
1.16 daniel 1209: ctxt->cur += 2;
1.3 veillard 1210:
1211: /*
1.22 daniel 1212: * Parse the target name and check for special support like
1213: * namespace.
1214: *
1215: * TODO : PI handling should be dynamically redefinable using an
1216: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1217: */
1.22 daniel 1218: target = xmlParsePITarget(ctxt);
1219: if (target != NULL) {
1220: /*
1221: * Support for the Processing Instruction related to namespace.
1222: */
1223: if ((target[0] == 'n') && (target[1] == 'a') &&
1224: (target[2] == 'm') && (target[3] == 'e') &&
1225: (target[4] == 's') && (target[5] == 'p') &&
1226: (target[6] == 'a') && (target[7] == 'c') &&
1227: (target[8] == 'e')) {
1228: xmlParseNamespace(ctxt);
1229: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1230: (target[2] == 'l') && (target[3] == ':') &&
1231: (target[4] == 'n') && (target[5] == 'a') &&
1232: (target[6] == 'm') && (target[7] == 'e') &&
1233: (target[8] == 's') && (target[9] == 'p') &&
1234: (target[10] == 'a') && (target[11] == 'c') &&
1235: (target[12] == 'e')) {
1236: xmlParseNamespace(ctxt);
1237: } else {
1238: /* Unknown PI, ignore it ! */
1239: fprintf(stderr, "xmlParsePI : skipping unknown PI %s\n",
1240: target);
1241: while (IS_CHAR(ctxt->cur[0]) &&
1.24 daniel 1242: ((ctxt->cur[0] != '?') || (ctxt->cur[1] != '>')))
1.22 daniel 1243: ctxt->cur++;
1244: if (!IS_CHAR(ctxt->cur[0])) {
1245: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1246: target);
1.24 daniel 1247: } else
1248: ctxt->cur += 2;
1.22 daniel 1249: }
1.3 veillard 1250: } else {
1.22 daniel 1251: fprintf(stderr, "xmlParsePI : no target name...\n");
1252: /********* Should we try to complete parsing the PI ???
1253: while (IS_CHAR(ctxt->cur[0]) &&
1254: (ctxt->cur[0] != '?') && (ctxt->cur[0] != '>'))
1255: ctxt->cur++;
1256: if (!IS_CHAR(ctxt->cur[0])) {
1257: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1258: target);
1259: }
1260: ********************************************************/
1261: }
1262: }
1263: }
1264:
1265: /*
1266: * xmlParseNotationDecl: parse a notation declaration
1267: *
1268: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1269: *
1270: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1271: *
1272: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1273: * 'PUBLIC' S PubidLiteral S SystemLiteral
1274: *
1275: * Hence there is actually 3 choices:
1276: * 'PUBLIC' S PubidLiteral
1277: * 'PUBLIC' S PubidLiteral S SystemLiteral
1278: * and 'SYSTEM' S SystemLiteral
1279: */
1280:
1281: void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1282: CHAR *name;
1283:
1284: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1285: (ctxt->cur[2] == 'N') && (ctxt->cur[3] == 'O') &&
1286: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'A') &&
1287: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'I') &&
1288: (ctxt->cur[8] == 'O') && (ctxt->cur[9] == 'N') &&
1289: (IS_BLANK(ctxt->cur[10]))) {
1290: ctxt->cur += 10;
1291: SKIP_BLANKS(ctxt->cur);
1292:
1293: name = xmlParseName(ctxt);
1294: if (name == NULL) {
1295: fprintf(stderr,
1296: "xmlParseAttributeListDecl: no name for Element %30s\n",
1297: ctxt->cur - 10);
1298: return;
1299: }
1300: SKIP_BLANKS(ctxt->cur);
1301: /*
1302: * TODO !!!!!!
1303: */
1304: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1305: ctxt->cur++;
1306: free(name);
1307: }
1308: }
1309:
1310: /*
1311: * xmlParseEntityDecl: parse <!ENTITY declarations
1312: *
1313: * [70] EntityDecl ::= GEDecl | PEDecl
1314: *
1315: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1316: *
1317: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1318: *
1319: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1320: *
1321: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1322: *
1323: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1324: */
1325:
1326: void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1327: CHAR *name;
1.24 daniel 1328: CHAR *value = NULL;
1329: CHAR *id = NULL, *literal = NULL;
1330: CHAR *ndata = NULL;
1.22 daniel 1331: int typePEDef = 0;
1332:
1333: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1334: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'N') &&
1335: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
1336: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'Y') &&
1337: (IS_BLANK(ctxt->cur[8]))) {
1338: ctxt->cur += 8;
1339: SKIP_BLANKS(ctxt->cur);
1340:
1341: if (ctxt->cur[0] == '%') {
1.16 daniel 1342: ctxt->cur++;
1.22 daniel 1343: SKIP_BLANKS(ctxt->cur);
1344: typePEDef = 1;
1345: }
1346:
1347: name = xmlParseName(ctxt);
1.24 daniel 1348: if (name == NULL) {
1349: fprintf(stderr, "xmlParseEntityDecl: no name %30s\n",
1350: ctxt->cur - 10);
1351: return;
1352: }
1353: SKIP_BLANKS(ctxt->cur);
1354:
1.22 daniel 1355: /*
1.24 daniel 1356: * TODO handle the various case of definitions...
1.22 daniel 1357: */
1.24 daniel 1358: if (typePEDef) {
1359: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
1360: value = xmlParseEntityValue(ctxt);
1361: else {
1362: id = xmlParseExternalID(ctxt, &literal);
1363: }
1364: } else {
1365: if ((ctxt->cur[0] == '"') || (ctxt->cur[0] == '\''))
1366: value = xmlParseEntityValue(ctxt);
1367: else {
1368: id = xmlParseExternalID(ctxt, &literal);
1369: SKIP_BLANKS(ctxt->cur);
1370: if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'D') &&
1371: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1372: (ctxt->cur[4] == 'A')) {
1373: ndata = xmlParseName(ctxt);
1374: }
1375: }
1376: }
1377: SKIP_BLANKS(ctxt->cur);
1378: if (ctxt->cur[0] != '>') {
1379: fprintf(stderr,
1380: "xmlParseEntityDecl: entity %s not terminated %30s\n",
1.25 daniel 1381: name, ctxt->cur - 10);
1.24 daniel 1382: } else
1.22 daniel 1383: ctxt->cur++;
1384: }
1385: }
1386:
1387: /*
1388: * xmlParseEnumeratedType: parse and Enumerated attribute type.
1389: *
1390: * [57] EnumeratedType ::= NotationType | Enumeration
1391: *
1392: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1393: *
1394: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1395: */
1396:
1397: void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1398: /*
1399: * TODO !!!
1400: */
1401: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1402: ctxt->cur++;
1403: }
1404:
1405: /*
1406: * xmlParseAttributeType: parse the Attribute list def for an element
1407: *
1408: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1409: *
1410: * [55] StringType ::= 'CDATA'
1411: *
1412: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
1413: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1414: */
1415: void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1416: if ((ctxt->cur[0] == 'C') && (ctxt->cur[1] == 'D') &&
1417: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1418: (ctxt->cur[4] == 'A')) {
1419: ctxt->cur += 5;
1420: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D')) {
1421: ctxt->cur += 2;
1422: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
1423: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
1424: (ctxt->cur[4] == 'F')) {
1425: ctxt->cur += 5;
1426: } else if ((ctxt->cur[0] == 'I') && (ctxt->cur[1] == 'D') &&
1427: (ctxt->cur[2] == 'R') && (ctxt->cur[3] == 'E') &&
1428: (ctxt->cur[4] == 'F') && (ctxt->cur[5] == 'S')) {
1429: ctxt->cur += 6;
1430: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
1431: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
1432: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'Y')) {
1433: ctxt->cur += 6;
1434: } else if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'N') &&
1435: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'I') &&
1436: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'I') &&
1437: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'S')) {
1438: ctxt->cur += 8;
1439: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
1440: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
1441: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
1442: (ctxt->cur[6] == 'N')) {
1443: ctxt->cur += 7;
1444: } else if ((ctxt->cur[0] == 'N') && (ctxt->cur[1] == 'M') &&
1445: (ctxt->cur[2] == 'T') && (ctxt->cur[3] == 'O') &&
1446: (ctxt->cur[4] == 'K') && (ctxt->cur[5] == 'E') &&
1447: (ctxt->cur[6] == 'N') && (ctxt->cur[7] == 'S')) {
1448: } else {
1449: xmlParseEnumeratedType(ctxt, name);
1450: }
1451: }
1452:
1453: /*
1454: * xmlParseAttributeListDecl: parse the Attribute list def for an element
1455: *
1456: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1457: *
1458: * [53] AttDef ::= S Name S AttType S DefaultDecl
1459: */
1460: void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1461: CHAR *name;
1462:
1463: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1464: (ctxt->cur[2] == 'A') && (ctxt->cur[3] == 'T') &&
1465: (ctxt->cur[4] == 'T') && (ctxt->cur[5] == 'L') &&
1466: (ctxt->cur[6] == 'I') && (ctxt->cur[7] == 'S') &&
1467: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
1468: ctxt->cur += 9;
1469: SKIP_BLANKS(ctxt->cur);
1470: name = xmlParseName(ctxt);
1471: if (name == NULL) {
1472: fprintf(stderr,
1473: "xmlParseAttributeListDecl: no name for Element %30s\n",
1474: ctxt->cur - 10);
1475: return;
1476: }
1477: SKIP_BLANKS(ctxt->cur);
1478: while (ctxt->cur[0] != '>') {
1479: const CHAR *check = ctxt->cur;
1480:
1481: xmlParseAttributeType(ctxt, name);
1482: SKIP_BLANKS(ctxt->cur);
1483: if (check == ctxt->cur) {
1484: fprintf(stderr,
1485: "xmlParseAttributeListDecl: detected error %30s\n",
1486: check - 10);
1487: break;
1488: }
1489: }
1490: if (ctxt->cur[0] == '>')
1491: ctxt->cur++;
1492:
1493: free(name);
1494: }
1495: }
1496:
1497: /*
1498: * xmlParseElementContentDecl: parse the declaration for an Element content
1499: * either Mixed or Children, the cases EMPTY and ANY being handled
1500: * int xmlParseElementDecl.
1501: *
1502: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
1503: *
1504: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1505: *
1506: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
1507: *
1508: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
1509: *
1510: * or
1511: *
1512: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
1513: * '(' S? '#PCDATA' S? ')'
1514: */
1515:
1516: void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
1517: /*
1518: * TODO This has to be parsed correctly, currently we just skip until
1519: * we reach the first '>'.
1.29 daniel 1520: * !!!!!!!
1.22 daniel 1521: */
1522: while ((IS_CHAR(ctxt->cur[0])) && (ctxt->cur[0] != '>'))
1523: ctxt->cur++;
1524: }
1525:
1526: /*
1527: * xmlParseElementDecl: parse an Element declaration.
1528: *
1529: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1530: *
1531: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1532: *
1533: * TODO There is a check [ VC: Unique Element Type Declaration ]
1534: */
1535: void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1536: CHAR *name;
1537:
1538: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1539: (ctxt->cur[2] == 'E') && (ctxt->cur[3] == 'L') &&
1540: (ctxt->cur[4] == 'E') && (ctxt->cur[5] == 'M') &&
1541: (ctxt->cur[6] == 'E') && (ctxt->cur[7] == 'N') &&
1542: (ctxt->cur[8] == 'T') && (IS_BLANK(ctxt->cur[9]))) {
1543: ctxt->cur += 9;
1544: SKIP_BLANKS(ctxt->cur);
1545: name = xmlParseName(ctxt);
1546: if (name == NULL) {
1547: fprintf(stderr, "xmlParseElementDecl: no name for Element %30s\n",
1548: ctxt->cur - 10);
1549: return;
1550: }
1551: SKIP_BLANKS(ctxt->cur);
1552: if ((ctxt->cur[0] == 'E') && (ctxt->cur[1] == 'M') &&
1553: (ctxt->cur[2] == 'P') && (ctxt->cur[3] == 'T') &&
1554: (ctxt->cur[4] == 'Y')) {
1555: ctxt->cur += 5;
1556: /*
1557: * Element must always be empty.
1558: */
1559: } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'N') &&
1560: (ctxt->cur[2] == 'Y')) {
1561: ctxt->cur += 3;
1562: /*
1563: * Element is a generic container.
1564: */
1565: } else {
1566: xmlParseElementContentDecl(ctxt, name);
1567: }
1568: SKIP_BLANKS(ctxt->cur);
1569: if (ctxt->cur[0] != '>') {
1570: fprintf(stderr,
1571: "xmlParseElementDecl: expected '>' at the end %30s\n",
1572: ctxt->cur - 10);
1573: } else
1574: ctxt->cur++;
1575: }
1576: }
1577:
1578: /*
1579: * xmlParseMarkupDecl: parse Markup declarations
1580: *
1581: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
1582: * NotationDecl | PI | Comment
1583: *
1584: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
1585: */
1586: void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1587: xmlParseElementDecl(ctxt);
1588: xmlParseAttributeListDecl(ctxt);
1589: xmlParseEntityDecl(ctxt);
1590: xmlParseNotationDecl(ctxt);
1591: xmlParsePI(ctxt);
1592: xmlParserSkipComment(ctxt);
1593: }
1594:
1595: /*
1.24 daniel 1596: * xmlParseCharRef: parse Reference declarations
1597: *
1598: * [66] CharRef ::= '&#' [0-9]+ ';' |
1599: * '&#x' [0-9a-fA-F]+ ';'
1600: */
1601: CHAR xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 1602: int val = 0;
1.24 daniel 1603: CHAR ret = 0;
1604:
1605: if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#') &&
1606: (ctxt->cur[2] == 'x')) {
1607: ctxt->cur += 3;
1608: while (ctxt->cur[0] != ';') {
1609: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
1.29 daniel 1610: val = val * 16 + (ctxt->cur[0] - '0');
1.24 daniel 1611: else if ((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'f'))
1.29 daniel 1612: val = val * 16 + (ctxt->cur[0] - 'a') + 10;
1.24 daniel 1613: else if ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'F'))
1.29 daniel 1614: val = val * 16 + (ctxt->cur[0] - 'A') + 10;
1.24 daniel 1615: else {
1616: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
1617: ctxt->cur - 10);
1.29 daniel 1618: val = 0;
1.24 daniel 1619: break;
1620: }
1621: }
1622: if (ctxt->cur[0] != ';')
1623: ctxt->cur++;
1624: } else if ((ctxt->cur[0] == '&') && (ctxt->cur[1] == '#')) {
1625: ctxt->cur += 2;
1626: while (ctxt->cur[0] != ';') {
1627: if ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9'))
1.29 daniel 1628: val = val * 16 + (ctxt->cur[0] - '0');
1.24 daniel 1629: else {
1630: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
1631: ctxt->cur - 10);
1.29 daniel 1632: val = 0;
1.24 daniel 1633: break;
1634: }
1635: }
1636: if (ctxt->cur[0] != ';')
1637: ctxt->cur++;
1638: } else {
1639: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
1640: ctxt->cur);
1641: }
1.29 daniel 1642: /*
1643: * Check the value IS_CHAR ...
1644: */
1645: if (IS_CHAR(val))
1646: ret = (CHAR) val;
1647: else {
1648: fprintf(stderr, "xmlParseCharRef: invalid value %20s\n",
1649: ctxt->cur - 10);
1650:
1651: ret = '?';
1652: }
1.24 daniel 1653: return(ret);
1654: }
1655:
1656: /*
1657: * xmlParseEntityRef: parse ENTITY references declarations
1658: *
1659: * [68] EntityRef ::= '&' Name ';'
1660: */
1661: CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1662: CHAR *name;
1663:
1664: if (ctxt->cur[0] == '&') {
1665: ctxt->cur++;
1666: name = xmlParseName(ctxt);
1667: if (name == NULL) {
1.25 daniel 1668: fprintf(stderr, "xmlParseEntityRef: no name %30s\n",
1.24 daniel 1669: ctxt->cur - 10);
1670: } else {
1671: if (ctxt->cur[0] == ';') {
1672: ctxt->cur++;
1673: /*
1674: * TODO there is a VC check here !!!
1675: * [ VC: Entity Declared ]
1676: */
1677: free(name);
1678: } else {
1.25 daniel 1679: fprintf(stderr, "xmlParseEntityRef: expecting ';' %30s\n",
1.24 daniel 1680: ctxt->cur - 10);
1681: }
1682: }
1683: }
1.25 daniel 1684: return(NULL); /* TODO !!!! */
1.24 daniel 1685: }
1686:
1687: /*
1688: * xmlParseReference: parse Reference declarations
1689: *
1690: * [67] Reference ::= EntityRef | CharRef
1691: */
1692: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt) {
1693: CHAR *name;
1694:
1695: if (ctxt->cur[0] == '&') {
1696: return(xmlParseEntityRef(ctxt));
1697: } else {
1698: ctxt->cur++;
1699: name = xmlParseName(ctxt);
1700: if (name == NULL) {
1.25 daniel 1701: fprintf(stderr, "xmlParseReference: no name %30s\n",
1.24 daniel 1702: ctxt->cur - 10);
1703: } else {
1704: if (ctxt->cur[0] == ';') {
1705: ctxt->cur++;
1706: /*
1707: * TODO there is a VC check here !!!
1708: * [ VC: Entity Declared ]
1709: */
1710: free(name);
1711: } else {
1.25 daniel 1712: fprintf(stderr, "xmlParseReference: expecting ';' %30s\n",
1.24 daniel 1713: ctxt->cur - 10);
1714: }
1715: }
1716: }
1.25 daniel 1717: return(NULL); /* TODO !!!! */
1.24 daniel 1718: }
1719:
1720: /*
1.22 daniel 1721: * xmlParsePEReference: parse PEReference declarations
1722: *
1723: * [69] PEReference ::= '%' Name ';'
1724: */
1.24 daniel 1725: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 1726: CHAR *name;
1727:
1728: if (ctxt->cur[0] == '%') {
1729: ctxt->cur++;
1730: name = xmlParseName(ctxt);
1731: if (name == NULL) {
1732: fprintf(stderr, "xmlParsePEReference: no name %30s\n",
1733: ctxt->cur - 10);
1734: } else {
1735: if (ctxt->cur[0] == ';') {
1736: ctxt->cur++;
1737: /*
1738: * TODO there is a VC check here !!!
1739: * [ VC: Entity Declared ]
1740: */
1741: free(name);
1742: } else {
1743: fprintf(stderr, "xmlParsePEReference: expecting ';' %30s\n",
1744: ctxt->cur - 10);
1745: }
1.3 veillard 1746: }
1747: }
1.25 daniel 1748: return(NULL); /* TODO !!!! */
1.3 veillard 1749: }
1750:
1751: /*
1.21 daniel 1752: * xmlParseDocTypeDecl : parse a DOCTYPE declaration
1753: *
1.22 daniel 1754: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
1755: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 1756: */
1757:
1758: void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1759: CHAR *name;
1760: CHAR *ExternalID = NULL;
1.22 daniel 1761: CHAR *SystemID = NULL;
1.21 daniel 1762:
1763: /*
1764: * We know that '<!DOCTYPE' has been detected.
1765: */
1766: ctxt->cur += 9;
1767:
1768: SKIP_BLANKS(ctxt->cur);
1769:
1770: /*
1771: * Parse the DOCTYPE name.
1772: */
1773: name = xmlParseName(ctxt);
1774: if (name == NULL) {
1775: fprintf(stderr, "xmlParseDocTypeDecl : no DOCTYPE name ! : %30s\n",
1776: ctxt->cur - 10);
1777: }
1778:
1779: SKIP_BLANKS(ctxt->cur);
1780:
1781: /*
1.22 daniel 1782: * Check for SystemID and ExternalID
1783: */
1784: SystemID = xmlParseExternalID(ctxt, &ExternalID);
1785: SKIP_BLANKS(ctxt->cur);
1786:
1787: /*
1788: * Is there any DTD definition ?
1789: */
1790: if (ctxt->cur[0] == '[') {
1791: ctxt->cur++;
1792: /*
1793: * Parse the succession of Markup declarations and
1794: * PEReferences.
1795: * Subsequence (markupdecl | PEReference | S)*
1796: */
1797: while (ctxt->cur[0] != ']') {
1798: const CHAR *check = ctxt->cur;
1799:
1800: SKIP_BLANKS(ctxt->cur);
1801: xmlParseMarkupDecl(ctxt);
1802: xmlParsePEReference(ctxt);
1803:
1804: if (ctxt->cur == check) {
1805: fprintf(stderr,
1806: "xmlParseDocTypeDecl: error detected in Markup declaration\n\t%50s\n",
1807: check - 10);
1808: break;
1809: }
1810: }
1811: if (ctxt->cur[0] == ']') ctxt->cur++;
1812: }
1813:
1814: /*
1815: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 1816: */
1.22 daniel 1817: if (ctxt->cur[0] != '>') {
1818: fprintf(stderr, "DOCTYPE unproperly terminated %30s\n",
1819: ctxt->cur - 10);
1820: /* We shouldn't try to resynchronize ... */
1.21 daniel 1821: }
1.22 daniel 1822: ctxt->cur++;
1823:
1824: /*
1825: * Cleanup, since we don't use all those identifiers
1826: * TODO : the DOCTYPE if available should be stored !
1827: */
1828: if (SystemID != NULL) free(SystemID);
1829: if (ExternalID != NULL) free(ExternalID);
1830: if (name != NULL) free(name);
1.21 daniel 1831: }
1832:
1833: /*
1.3 veillard 1834: * xmlParseAttribute: parse a start of tag.
1835: *
1.22 daniel 1836: * [41] Attribute ::= Name Eq AttValue
1837: *
1838: * [25] Eq ::= S? '=' S?
1839: *
1.29 daniel 1840: * With namespace:
1841: *
1842: * [NS 11] Attribute ::= QName Eq AttValue
1.3 veillard 1843: */
1844:
1.16 daniel 1845: void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 1846: CHAR *name, *value = NULL;
1.29 daniel 1847: CHAR *ns;
1.3 veillard 1848:
1.29 daniel 1849: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 1850: if (name == NULL) {
1851: fprintf(stderr,
1852: "xmlParseAttribute: error parsing attribute name %30s\n",
1.23 daniel 1853: ctxt->cur - 10);
1.29 daniel 1854: return;
1.3 veillard 1855: }
1.22 daniel 1856: /*
1857: * TODO: Check for Namespace ...
1858: */
1.29 daniel 1859: if (ns != NULL) {
1860: fprintf(stderr,
1861: "xmlParseAttribute: don't handle attributes namespace\n");
1862: free(ns);
1863: }
1.3 veillard 1864:
1865: /*
1.29 daniel 1866: * read the value
1.3 veillard 1867: */
1.16 daniel 1868: SKIP_BLANKS(ctxt->cur);
1869: if (ctxt->cur[0] == '=') {
1870: ctxt->cur++;
1871: SKIP_BLANKS(ctxt->cur);
1.29 daniel 1872: value = xmlParseAttValue(ctxt);
1873: } else {
1874: fprintf(stderr, "Specification mandate value for attribute %s : %30s\n",
1875: name, ctxt->cur - 10);
1.3 veillard 1876: }
1877:
1878: /*
1879: * Add the attribute to the node.
1880: */
1.17 daniel 1881: if (name != NULL) {
1.3 veillard 1882: xmlNewProp(node, name, value);
1.17 daniel 1883: free(name);
1884: }
1.29 daniel 1885: if (value != NULL)
1.17 daniel 1886: free(value);
1.3 veillard 1887: }
1888:
1889: /*
1.29 daniel 1890: * xmlParseStartTag: parse a start of tag either for rule element or
1891: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 1892: *
1893: * [40] STag ::= '<' Name (S Attribute)* S? '>'
1894: *
1.29 daniel 1895: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1896: *
1897: * With namespace:
1898: *
1899: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
1900: *
1901: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.2 veillard 1902: */
1903:
1.16 daniel 1904: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.17 daniel 1905: const CHAR *q;
1906: CHAR *ns, *name;
1.3 veillard 1907: xmlDtdPtr dtd = NULL;
1.2 veillard 1908: xmlNodePtr ret = NULL;
1909:
1.16 daniel 1910: if (ctxt->cur[0] != '<') return(NULL);
1911: ctxt->cur++;
1.3 veillard 1912:
1.29 daniel 1913: name = xmlNamespaceParseQName(ctxt, &ns);
1914: if (ns != NULL) {
1.3 veillard 1915: /*
1916: * Search the DTD associated to ns.
1917: */
1.16 daniel 1918: dtd = xmlSearchDtd(ctxt->doc, ns);
1.3 veillard 1919: if (dtd == NULL)
1.7 veillard 1920: fprintf(stderr, "Start tag : Couldn't find namespace %s\n", ns);
1.3 veillard 1921: free(ns);
1.29 daniel 1922: }
1.3 veillard 1923:
1924: ret = xmlNewNode(dtd, name, NULL);
1.2 veillard 1925:
1.3 veillard 1926: /*
1927: * Now parse the attributes, it ends up with the ending
1928: *
1929: * (S Attribute)* S?
1930: */
1.16 daniel 1931: SKIP_BLANKS(ctxt->cur);
1932: while ((IS_CHAR(ctxt->cur[0])) &&
1933: (ctxt->cur[0] != '>') &&
1934: ((ctxt->cur[0] != '/') || (ctxt->cur[1] != '>'))) {
1.29 daniel 1935: const CHAR *q = ctxt->cur;
1936:
1937: xmlParseAttribute(ctxt, ret);
1938: SKIP_BLANKS(ctxt->cur);
1939:
1940: if (q == ctxt->cur) {
1941: fprintf(stderr,
1942: "xmlParseStartTag: problem parsing attributes %30s\n",
1943: ctxt->cur - 10);
1944: break;
1.3 veillard 1945: }
1946: }
1947:
1948: return(ret);
1949: }
1950:
1951: /*
1.27 daniel 1952: * xmlParseEndTag: parse an end of tag
1953: *
1954: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 1955: *
1956: * With namespace
1957: *
1958: * [9] ETag ::= '</' QName S? '>'
1.7 veillard 1959: */
1960:
1.16 daniel 1961: void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlDtdPtr *dtdPtr, CHAR **tagPtr) {
1.17 daniel 1962: const CHAR *q;
1963: CHAR *ns, *name;
1.7 veillard 1964: xmlDtdPtr dtd = NULL;
1965:
1966: *dtdPtr = NULL;
1967: *tagPtr = NULL;
1968:
1.27 daniel 1969: if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/')) {
1970: fprintf(stderr, "xmlParseEndTag: '</' not found %30s\n", ctxt->cur -10);
1971: return;
1972: }
1973: ctxt->cur += 2;
1.7 veillard 1974:
1.29 daniel 1975: name = xmlNamespaceParseQName(ctxt, &ns);
1976: if (ns != NULL) {
1.7 veillard 1977: /*
1978: * Search the DTD associated to ns.
1979: */
1.16 daniel 1980: dtd = xmlSearchDtd(ctxt->doc, ns);
1.7 veillard 1981: if (dtd == NULL)
1982: fprintf(stderr, "End tag : Couldn't find namespace %s\n", ns);
1983: free(ns);
1.29 daniel 1984: }
1.7 veillard 1985:
1986: *dtdPtr = dtd;
1987: *tagPtr = name;
1988:
1989: /*
1990: * We should definitely be at the ending "S? '>'" part
1991: */
1.16 daniel 1992: SKIP_BLANKS(ctxt->cur);
1993: if ((!IS_CHAR(ctxt->cur[0])) || (ctxt->cur[0] != '>')) {
1994: fprintf(stderr, "End tag : expected '>', got %.20s\n", ctxt->cur);
1.7 veillard 1995: } else
1.16 daniel 1996: ctxt->cur++;
1.7 veillard 1997:
1998: return;
1999: }
2000:
2001: /*
1.3 veillard 2002: * xmlParseCDSect: escaped pure raw content.
1.29 daniel 2003: *
2004: * [18] CDSect ::= CDStart CData CDEnd
2005: *
2006: * [19] CDStart ::= '<![CDATA['
2007: *
2008: * [20] Data ::= (Char* - (Char* ']]>' Char*))
2009: *
2010: * [21] CDEnd ::= ']]>'
1.3 veillard 2011: */
1.16 daniel 2012: CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 2013: const CHAR *r, *s, *base;
2014: CHAR *ret;
1.3 veillard 2015:
1.29 daniel 2016: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2017: (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
2018: (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
2019: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
2020: (ctxt->cur[8] == '[')) {
2021: ctxt->cur += 9;
2022: } else
2023: return(NULL);
1.16 daniel 2024: base = ctxt->cur;
2025: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 2026: fprintf(stderr, "CData section not finished : %.20s\n", base);
1.3 veillard 2027: return(NULL);
2028: }
1.16 daniel 2029: r = ctxt->cur++;
2030: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 2031: fprintf(stderr, "CData section not finished : %.20s\n", base);
1.3 veillard 2032: return(NULL);
2033: }
1.16 daniel 2034: s = ctxt->cur++;
2035: while (IS_CHAR(ctxt->cur[0]) &&
2036: ((*r != ']') || (*s != ']') || (ctxt->cur[0] != '>'))) {
2037: r++;s++;ctxt->cur++;
1.3 veillard 2038: }
1.16 daniel 2039: if (!IS_CHAR(ctxt->cur[0])) {
1.7 veillard 2040: fprintf(stderr, "CData section not finished : %.20s\n", base);
1.3 veillard 2041: return(NULL);
2042: }
1.16 daniel 2043: ret = xmlStrndup(base, ctxt->cur-base);
2044:
1.2 veillard 2045: return(ret);
2046: }
2047:
2048: /*
2049: * xmlParseContent: a content is
2050: * (element | PCData | Reference | CDSect | PI | Comment)
2051: *
1.27 daniel 2052: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 2053: */
2054:
1.27 daniel 2055: void xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.17 daniel 2056: const CHAR *q;
2057: CHAR *data = NULL;
1.2 veillard 2058: xmlNodePtr ret = NULL;
2059:
1.27 daniel 2060: while ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/')) {
1.29 daniel 2061: const CHAR *test;
1.27 daniel 2062: ret = NULL;
2063: data = NULL;
2064:
2065: /*
2066: * First case : a Processing Instruction.
2067: */
2068: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
2069: xmlParsePI(ctxt);
2070: }
2071: /*
2072: * Second case : a CDSection
2073: */
2074: else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2075: (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
2076: (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
2077: (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
2078: (ctxt->cur[8] == '[')) {
2079: data = xmlParseCDSect(ctxt);
2080: }
2081: /*
2082: * Third case : a comment
2083: */
2084: else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2085: (ctxt->cur[2] == '-') && (ctxt->cur[3] == '-')) {
2086: xmlParserSkipComment(ctxt);
2087: }
2088: /*
2089: * Fourth case : a sub-element.
2090: */
2091: else if (ctxt->cur[0] == '<') {
2092: ret = xmlParseElement(ctxt);
2093: }
2094: /*
2095: * Last case, text. Note that References are handled directly.
2096: */
2097: else {
2098: q = ctxt->cur;
2099: while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '<')) ctxt->cur++;
2100:
2101: if (!IS_CHAR(ctxt->cur[0])) {
2102: fprintf(stderr, "Truncated content : %.50s\n", q);
2103: return;
2104: }
1.3 veillard 2105:
1.27 daniel 2106: /*
2107: * Do the Entities decoding...
2108: */
2109: data = xmlStrdup(xmlDecodeEntities(ctxt->doc, q, ctxt->cur - q));
1.3 veillard 2110: }
1.14 veillard 2111:
2112: /*
1.27 daniel 2113: * Handle the data if any. If there is no child
2114: * add it as content, otherwise create a new node of type text.
1.14 veillard 2115: */
1.27 daniel 2116: if (data != NULL)
2117: data = xmlHandleData(data);
2118: if (data != NULL) {
2119: if (node->childs == NULL)
2120: xmlNodeSetContent(node, data);
2121: else
2122: ret = xmlNewText(data);
2123: free(data);
2124: }
2125: if (ret != NULL)
2126: xmlAddChild(node, ret);
1.29 daniel 2127: if (test == ctxt->cur) {
2128: fprintf(stderr,
2129: "xmlParseContent: detected an error in element content\n\t%50s\n",
2130: ctxt->cur - 20);
2131: break;
2132: }
1.3 veillard 2133: }
1.2 veillard 2134: }
2135:
2136: /*
2137: * xmlParseElement: parse an XML element
1.26 daniel 2138: *
2139: * [39] element ::= EmptyElemTag | STag content ETag
2140: *
2141: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 2142: */
1.26 daniel 2143:
1.2 veillard 2144:
1.16 daniel 2145: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 2146: xmlNodePtr ret;
1.17 daniel 2147: const CHAR *openTag = ctxt->cur;
1.27 daniel 2148: CHAR *endTag;
2149: xmlDtdPtr endDtd;
1.2 veillard 2150:
1.16 daniel 2151: ret = xmlParseStartTag(ctxt);
1.3 veillard 2152: if (ret == NULL) {
2153: return(NULL);
2154: }
1.2 veillard 2155:
2156: /*
2157: * Check for an Empty Element.
2158: */
1.16 daniel 2159: if ((ctxt->cur[0] == '/') && (ctxt->cur[1] == '>')) {
2160: ctxt->cur += 2;
1.2 veillard 2161: return(ret);
2162: }
1.16 daniel 2163: if (ctxt->cur[0] == '>') ctxt->cur++;
1.2 veillard 2164: else {
1.16 daniel 2165: fprintf(stderr, "Couldn't find end of Start Tag %.30s\n", openTag);
2166: return(NULL);
1.2 veillard 2167: }
2168:
2169: /*
2170: * Parse the content of the element:
2171: */
1.27 daniel 2172: xmlParseContent(ctxt, ret);
1.16 daniel 2173: if (!IS_CHAR(ctxt->cur[0])) {
2174: fprintf(stderr, "Premature end of data in tag %.30s\n", openTag);
2175: return(NULL);
1.2 veillard 2176: }
2177:
2178: /*
1.27 daniel 2179: * parse the end of tag: '</' should be here.
1.2 veillard 2180: */
1.27 daniel 2181: xmlParseEndTag(ctxt, &endDtd, &endTag);
1.7 veillard 2182:
1.27 daniel 2183: /*
2184: * Check that the Name in the ETag is the same as in the STag.
2185: */
2186: if (endDtd != ret->dtd) {
2187: fprintf(stderr, "Start and End tags don't use the same DTD:\n");
2188: fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, endTag);
2189: }
2190: if (strcmp(ret->name, endTag)) {
2191: fprintf(stderr, "Start and End tags don't use the same name:\n");
2192: fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, endTag);
2193: }
1.7 veillard 2194:
1.27 daniel 2195: if ( endTag != NULL )
2196: free(endTag);
1.2 veillard 2197:
2198: return(ret);
2199: }
2200:
2201: /*
1.29 daniel 2202: * xmlParseVersionNum: parse the XML version value.
2203: *
2204: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
2205: */
2206: CHAR *xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
2207: const CHAR *q = ctxt->cur;
2208: CHAR *ret;
2209:
2210: while (IS_CHAR(ctxt->cur[0]) &&
2211: (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2212: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z')) ||
2213: ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9')) ||
2214: (ctxt->cur[0] == '-'))) ctxt->cur++;
2215: ret = xmlStrndup(q, ctxt->cur - q);
2216: return(ret);
2217: }
2218:
2219: /*
2220: * xmlParseVersionInfo: parse the XML version.
2221: *
2222: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
2223: *
2224: * [25] Eq ::= S? '=' S?
2225: */
2226:
2227: CHAR *xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
2228: CHAR *version = NULL;
2229: const CHAR *q;
2230:
2231: if ((ctxt->cur[0] == 'v') && (ctxt->cur[1] == 'e') &&
2232: (ctxt->cur[2] == 'r') && (ctxt->cur[3] == 's') &&
2233: (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'o') &&
2234: (ctxt->cur[6] == 'n')) {
2235: ctxt->cur += 7;
2236: SKIP_BLANKS(ctxt->cur);
2237: if (ctxt->cur[0] == '"') {
2238: ctxt->cur++;
2239: } else if (ctxt->cur[0] == '\''){
2240: ctxt->cur++;
2241: q = ctxt->cur;
2242: version = xmlParseVersionNum(ctxt);
2243: if (ctxt->cur[0] != '\'')
2244: fprintf(stderr, "String not closed '%.50s\n", q);
2245: else
2246: ctxt->cur++;
2247: } else if (ctxt->cur[0] == '"'){
2248: ctxt->cur++;
2249: q = ctxt->cur;
2250: version = xmlParseVersionNum(ctxt);
2251: if (ctxt->cur[0] != '"')
2252: fprintf(stderr, "String not closed '%.50s\n", q);
2253: else
2254: ctxt->cur++;
2255: }
2256: }
2257: return(version);
2258: }
2259:
2260: /*
2261: * xmlParseEncName: parse the XML encoding name
2262: *
2263: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
2264: */
2265: CHAR *xmlParseEncName(xmlParserCtxtPtr ctxt) {
2266: const CHAR *q = ctxt->cur;
2267: CHAR *ret = NULL;
2268:
2269: if (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2270: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z'))) {
2271: ctxt->cur++;
2272: while (IS_CHAR(ctxt->cur[0]) &&
2273: (((ctxt->cur[0] >= 'a') && (ctxt->cur[0] <= 'z')) ||
2274: ((ctxt->cur[0] >= 'A') && (ctxt->cur[0] <= 'Z')) ||
2275: ((ctxt->cur[0] >= '0') && (ctxt->cur[0] <= '9')) ||
2276: (ctxt->cur[0] == '-'))) ctxt->cur++;
2277: ret = xmlStrndup(q, ctxt->cur - q);
2278: } else {
2279: fprintf(stderr, "Invalid XML encoding name %20s\n", ctxt->cur);
2280: }
2281: return(ret);
2282: }
2283:
2284: /*
2285: * xmlParseEncodingDecl: parse the XML encoding declaration
2286: *
2287: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
2288: */
2289:
2290: CHAR *xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
2291: CHAR *encoding = NULL;
2292: const CHAR *q;
2293:
2294: SKIP_BLANKS(ctxt->cur);
2295: if ((ctxt->cur[0] == 'e') && (ctxt->cur[1] == 'n') &&
2296: (ctxt->cur[2] == 'c') && (ctxt->cur[3] == 'o') &&
2297: (ctxt->cur[4] == 'd') && (ctxt->cur[5] == 'i') &&
2298: (ctxt->cur[6] == 'n') && (ctxt->cur[7] == 'g')) {
2299: ctxt->cur += 8;
2300: SKIP_BLANKS(ctxt->cur);
2301: if (ctxt->cur[0] == '"') {
2302: ctxt->cur++;
2303: } else if (ctxt->cur[0] == '\''){
2304: ctxt->cur++;
2305: q = ctxt->cur;
2306: encoding = xmlParseEncName(ctxt);
2307: if (ctxt->cur[0] != '\'')
2308: fprintf(stderr, "String not closed '%.50s\n", q);
2309: else
2310: ctxt->cur++;
2311: } else if (ctxt->cur[0] == '"'){
2312: ctxt->cur++;
2313: q = ctxt->cur;
2314: encoding = xmlParseEncName(ctxt);
2315: if (ctxt->cur[0] != '"')
2316: fprintf(stderr, "String not closed '%.50s\n", q);
2317: else
2318: ctxt->cur++;
2319: }
2320: }
2321: return(encoding);
2322: }
2323:
2324: /*
2325: * xmlParseSDDecl: parse the XML standalone declaration
2326: *
2327: * [32] SDDecl ::= S 'standalone' Eq
2328: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
2329: */
2330:
2331: int xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
2332: int standalone = -1;
2333:
2334: SKIP_BLANKS(ctxt->cur);
2335: if ((ctxt->cur[0] == 's') && (ctxt->cur[1] == 't') &&
2336: (ctxt->cur[2] == 'a') && (ctxt->cur[3] == 'n') &&
2337: (ctxt->cur[4] == 'd') && (ctxt->cur[5] == 'a') &&
2338: (ctxt->cur[6] == 'l') && (ctxt->cur[7] == 'o') &&
2339: (ctxt->cur[8] == 'n') && (ctxt->cur[9] == 'e')) {
2340: ctxt->cur += 10;
2341: SKIP_BLANKS(ctxt->cur);
2342: if (ctxt->cur[0] == '"') {
2343: ctxt->cur++;
2344: } else if (ctxt->cur[0] == '\''){
2345: ctxt->cur++;
2346: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'o')) {
2347: standalone = 0;
2348: } else if ((ctxt->cur[0] == 'y') && (ctxt->cur[1] == 'e') &&
2349: (ctxt->cur[2] == 's')) {
2350: standalone = 1;
2351: } else {
2352: fprintf(stderr, "standalone accepts only 'yes' or 'no': %20s\n",
2353: ctxt->cur);
2354: }
2355: if (ctxt->cur[0] != '\'')
2356: fprintf(stderr, "String not closed '%.50s\n", ctxt->cur);
2357: else
2358: ctxt->cur++;
2359: } else if (ctxt->cur[0] == '"'){
2360: ctxt->cur++;
2361: if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'o')) {
2362: standalone = 0;
2363: } else if ((ctxt->cur[0] == 'y') && (ctxt->cur[1] == 'e') &&
2364: (ctxt->cur[2] == 's')) {
2365: standalone = 1;
2366: } else {
2367: fprintf(stderr, "standalone accepts only 'yes' or 'no': %20s\n",
2368: ctxt->cur);
2369: }
2370: if (ctxt->cur[0] != '"')
2371: fprintf(stderr, "String not closed '%.50s\n", ctxt->cur);
2372: else
2373: ctxt->cur++;
2374: }
2375: }
2376: return(standalone);
2377: }
2378:
2379: /*
1.1 veillard 2380: * xmlParseXMLDecl: parse an XML declaration header
1.29 daniel 2381: *
2382: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 2383: */
2384:
1.16 daniel 2385: void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 2386: CHAR *version;
1.29 daniel 2387: CHAR *encoding;
2388: int standalone;
1.1 veillard 2389:
2390: /*
1.19 daniel 2391: * We know that '<?xml' is here.
1.1 veillard 2392: */
1.16 daniel 2393: ctxt->cur += 5;
1.1 veillard 2394:
2395: /*
2396: * Parse the version info
2397: */
1.16 daniel 2398: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2399:
2400: /*
1.29 daniel 2401: * We should have the VersionInfo here.
1.1 veillard 2402: */
1.29 daniel 2403: version = xmlParseVersionInfo(ctxt);
2404: if (version == NULL)
1.16 daniel 2405: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.29 daniel 2406: else {
2407: ctxt->doc = xmlNewDoc(version);
2408: free(version);
2409: }
2410:
2411: /*
2412: * We may have the encoding declaration
2413: */
2414: encoding = xmlParseEncodingDecl(ctxt);
2415: if (encoding != NULL) {
2416: /* TODO !!!!! encoding support ... */
2417: free(encoding);
1.1 veillard 2418: }
2419:
2420: /*
1.29 daniel 2421: * We may have the standalone status.
1.1 veillard 2422: */
1.29 daniel 2423: standalone = xmlParseSDDecl(ctxt);
1.1 veillard 2424:
1.29 daniel 2425: SKIP_BLANKS(ctxt->cur);
2426: if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
2427: ctxt->cur += 2;
2428: } else {
2429: MOVETO_ENDTAG(ctxt->cur);
2430: fprintf(stderr, "parsing XML declaration: '?>' expected: %20s\n",
2431: ctxt->cur - 10);
2432: }
1.1 veillard 2433: }
2434:
2435: /*
1.22 daniel 2436: * xmlParseMisc: parse an XML Misc* optionnal field.
1.21 daniel 2437: * Misc*
2438: *
1.22 daniel 2439: * [27] Misc ::= Comment | PI | S
1.1 veillard 2440: */
2441:
1.16 daniel 2442: void xmlParseMisc(xmlParserCtxtPtr ctxt) {
2443: while (((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) ||
2444: ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
1.21 daniel 2445: (ctxt->cur[2] == '-') && (ctxt->cur[3] == '-')) ||
1.16 daniel 2446: IS_BLANK(ctxt->cur[0])) {
2447: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
2448: xmlParsePI(ctxt);
2449: } else if (IS_BLANK(ctxt->cur[0])) {
2450: ctxt->cur++;
1.1 veillard 2451: } else
1.16 daniel 2452: xmlParserSkipComment(ctxt);
1.1 veillard 2453: }
2454: }
2455:
2456: /*
1.16 daniel 2457: * xmlParseDocument : parse an XML document and build a tree.
1.21 daniel 2458: *
1.22 daniel 2459: * [1] document ::= prolog element Misc*
1.29 daniel 2460: *
2461: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.1 veillard 2462: */
2463:
1.16 daniel 2464: int xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.14 veillard 2465: /*
2466: * We should check for encoding here and plug-in some
2467: * conversion code TODO !!!!
2468: */
1.1 veillard 2469:
2470: /*
2471: * Wipe out everything which is before the first '<'
2472: */
1.16 daniel 2473: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2474:
2475: /*
2476: * Check for the XMLDecl in the Prolog.
2477: */
1.16 daniel 2478: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.19 daniel 2479: (ctxt->cur[2] == 'x') && (ctxt->cur[3] == 'm') &&
2480: (ctxt->cur[4] == 'l')) {
2481: xmlParseXMLDecl(ctxt);
2482: /* SKIP_EOL(cur); */
2483: SKIP_BLANKS(ctxt->cur);
2484: } else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
1.16 daniel 2485: (ctxt->cur[2] == 'X') && (ctxt->cur[3] == 'M') &&
2486: (ctxt->cur[4] == 'L')) {
1.19 daniel 2487: /*
2488: * The first drafts were using <?XML and the final W3C REC
2489: * now use <?xml ...
2490: */
1.16 daniel 2491: xmlParseXMLDecl(ctxt);
1.1 veillard 2492: /* SKIP_EOL(cur); */
1.16 daniel 2493: SKIP_BLANKS(ctxt->cur);
1.1 veillard 2494: } else {
1.16 daniel 2495: ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
1.1 veillard 2496: }
2497:
2498: /*
2499: * The Misc part of the Prolog
1.21 daniel 2500: * Misc*
2501: * Misc ::= Comment | PI | S
1.1 veillard 2502: */
1.16 daniel 2503: xmlParseMisc(ctxt);
1.1 veillard 2504:
2505: /*
1.29 daniel 2506: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 2507: * (doctypedecl Misc*)?
2508: */
1.22 daniel 2509: if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
2510: (ctxt->cur[2] == 'D') && (ctxt->cur[3] == 'O') &&
2511: (ctxt->cur[4] == 'C') && (ctxt->cur[5] == 'T') &&
2512: (ctxt->cur[6] == 'Y') && (ctxt->cur[7] == 'P') &&
2513: (ctxt->cur[8] == 'E')) {
2514: xmlParseDocTypeDecl(ctxt);
2515: xmlParseMisc(ctxt);
1.21 daniel 2516: }
2517:
2518: /*
2519: * Time to start parsing the tree itself
1.1 veillard 2520: */
1.16 daniel 2521: ctxt->doc->root = xmlParseElement(ctxt);
2522:
2523: return(0);
2524: }
2525:
2526: /*
2527: * xmlParseDoc : parse an XML in-memory document and build a tree.
2528: */
2529:
2530: xmlDocPtr xmlParseDoc(CHAR *cur) {
2531: xmlDocPtr ret;
2532: xmlParserCtxtPtr ctxt;
2533:
2534: if (cur == NULL) return(NULL);
1.1 veillard 2535:
1.16 daniel 2536: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2537: if (ctxt == NULL) {
2538: perror("malloc");
2539: return(NULL);
2540: }
2541:
1.19 daniel 2542: xmlInitParserCtxt(ctxt);
1.16 daniel 2543: ctxt->base = cur;
2544: ctxt->cur = cur;
2545:
2546: xmlParseDocument(ctxt);
2547: ret = ctxt->doc;
1.20 daniel 2548: free(ctxt->nodes);
1.16 daniel 2549: free(ctxt);
2550:
1.1 veillard 2551: return(ret);
2552: }
2553:
1.9 httpng 2554: /*
2555: * xmlParseFile : parse an XML file and build a tree.
2556: */
2557:
2558: xmlDocPtr xmlParseFile(const char *filename) {
2559: xmlDocPtr ret;
1.20 daniel 2560: #ifdef HAVE_ZLIB_H
2561: gzFile input;
2562: #else
1.9 httpng 2563: int input;
1.20 daniel 2564: #endif
1.9 httpng 2565: int res;
2566: struct stat buf;
2567: char *buffer;
1.16 daniel 2568: xmlParserCtxtPtr ctxt;
1.9 httpng 2569:
1.11 veillard 2570: res = stat(filename, &buf);
1.9 httpng 2571: if (res < 0) return(NULL);
2572:
1.20 daniel 2573: #ifdef HAVE_ZLIB_H
2574: retry_bigger:
2575: buffer = malloc((buf.st_size * 20) + 100);
2576: #else
1.9 httpng 2577: buffer = malloc(buf.st_size + 100);
1.20 daniel 2578: #endif
1.9 httpng 2579: if (buffer == NULL) {
2580: perror("malloc");
2581: return(NULL);
2582: }
2583:
2584: memset(buffer, 0, sizeof(buffer));
1.20 daniel 2585: #ifdef HAVE_ZLIB_H
2586: input = gzopen (filename, "r");
2587: if (input == NULL) {
2588: fprintf (stderr, "Cannot read file %s :\n", filename);
2589: perror ("gzopen failed");
2590: return(NULL);
2591: }
2592: #else
1.9 httpng 2593: input = open (filename, O_RDONLY);
2594: if (input < 0) {
2595: fprintf (stderr, "Cannot read file %s :\n", filename);
2596: perror ("open failed");
2597: return(NULL);
2598: }
1.20 daniel 2599: #endif
2600: #ifdef HAVE_ZLIB_H
2601: res = gzread(input, buffer, 20 * buf.st_size);
2602: #else
1.9 httpng 2603: res = read(input, buffer, buf.st_size);
1.20 daniel 2604: #endif
1.9 httpng 2605: if (res < 0) {
2606: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 2607: #ifdef HAVE_ZLIB_H
2608: perror ("gzread failed");
2609: #else
1.9 httpng 2610: perror ("read failed");
1.20 daniel 2611: #endif
1.9 httpng 2612: return(NULL);
2613: }
1.20 daniel 2614: #ifdef HAVE_ZLIB_H
2615: gzclose(input);
2616: if (res >= 20 * buf.st_size) {
2617: free(buffer);
2618: buf.st_size *= 2;
2619: goto retry_bigger;
2620: }
2621: buf.st_size = res;
2622: #else
1.9 httpng 2623: close(input);
1.20 daniel 2624: #endif
2625:
1.9 httpng 2626:
1.16 daniel 2627: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2628: if (ctxt == NULL) {
2629: perror("malloc");
2630: return(NULL);
2631: }
1.9 httpng 2632: buffer[buf.st_size] = '\0';
1.16 daniel 2633:
1.19 daniel 2634: xmlInitParserCtxt(ctxt);
1.17 daniel 2635: ctxt->filename = filename;
1.16 daniel 2636: ctxt->base = buffer;
2637: ctxt->cur = buffer;
2638:
2639: xmlParseDocument(ctxt);
2640: ret = ctxt->doc;
1.9 httpng 2641: free(buffer);
1.20 daniel 2642: free(ctxt->nodes);
2643: free(ctxt);
2644:
2645: return(ret);
2646: }
2647:
2648: /*
2649: * xmlParseFile : parse an XML memory block and build a tree.
2650: */
2651:
2652: xmlDocPtr xmlParseMemory(char *buffer, int size) {
2653: xmlDocPtr ret;
2654: xmlParserCtxtPtr ctxt;
2655:
2656: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
2657: if (ctxt == NULL) {
2658: perror("malloc");
2659: return(NULL);
2660: }
2661:
2662: buffer[size - 1] = '\0';
2663:
2664: xmlInitParserCtxt(ctxt);
2665: ctxt->base = buffer;
2666: ctxt->cur = buffer;
2667:
2668: xmlParseDocument(ctxt);
2669: ret = ctxt->doc;
2670: free(ctxt->nodes);
1.16 daniel 2671: free(ctxt);
2672:
1.9 httpng 2673: return(ret);
1.17 daniel 2674: }
2675:
2676:
2677:
2678:
2679: /* Initialize parser context */
2680: void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2681: {
1.19 daniel 2682: int i;
2683:
2684: ctxt->filename = NULL;
2685: ctxt->base = NULL;
2686: ctxt->cur = NULL;
2687: ctxt->line = 1;
2688: ctxt->col = 1;
2689: ctxt->doc = NULL;
2690: ctxt->depth = 0;
2691: ctxt->max_depth = 10;
2692: ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr));
2693: if (ctxt->nodes == NULL) {
2694: fprintf(stderr, "malloc of %d byte failed\n",
2695: ctxt->max_depth * sizeof(xmlNodePtr));
2696: ctxt->max_depth = 0;
2697: } else {
2698: for (i = 0;i < ctxt->max_depth;i++)
2699: ctxt->nodes[i] = NULL;
2700: }
1.17 daniel 2701: }
2702:
2703:
1.19 daniel 2704: /*
2705: * Clear (release owned resources) and reinitialize context
2706: */
1.17 daniel 2707: void xmlClearParserCtxt(xmlParserCtxtPtr ctx)
2708: {
1.19 daniel 2709: xmlInitParserCtxt(ctx);
1.17 daniel 2710: }
2711:
2712:
1.19 daniel 2713: /*
2714: * Setup the parser context to parse a new buffer; Clears any prior
2715: * contents from the parser context. The buffer parameter must not be
2716: * NULL, but the filename parameter can be
2717: */
1.17 daniel 2718: void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
2719: const char* filename)
2720: {
2721: xmlClearParserCtxt(ctxt);
2722: ctxt->base = buffer;
2723: ctxt->cur = buffer;
2724: ctxt->filename = filename;
2725: }
2726:
2727:
2728:
2729: void xmlReportError(xmlParserCtxtPtr ctx, const CHAR* msg)
2730: {
2731: fputs(msg, stderr);
1.9 httpng 2732: }
Webmaster