Annotation of XML/SGMLparser.c, revision 1.1
1.1 ! veillard 1: /*
! 2: * SGMLparser.c : an attempt to parse Docbook documents
! 3: *
! 4: * See Copyright for the status of this software.
! 5: *
! 6: * Daniel.Veillard@w3.org
! 7: */
! 8:
! 9: #ifdef WIN32
! 10: #include "win32config.h"
! 11: #else
! 12: #include "config.h"
! 13: #endif
! 14:
! 15: #include "xmlversion.h"
! 16: #ifdef LIBXML_SGML_ENABLED
! 17:
! 18: #include <stdio.h>
! 19: #include <string.h>
! 20: #ifdef HAVE_CTYPE_H
! 21: #include <ctype.h>
! 22: #endif
! 23: #ifdef HAVE_STDLIB_H
! 24: #include <stdlib.h>
! 25: #endif
! 26: #ifdef HAVE_SYS_STAT_H
! 27: #include <sys/stat.h>
! 28: #endif
! 29: #ifdef HAVE_FCNTL_H
! 30: #include <fcntl.h>
! 31: #endif
! 32: #ifdef HAVE_UNISTD_H
! 33: #include <unistd.h>
! 34: #endif
! 35: #ifdef HAVE_ZLIB_H
! 36: #include <zlib.h>
! 37: #endif
! 38:
! 39: #include <libxml/xmlmemory.h>
! 40: #include <libxml/tree.h>
! 41: #include <libxml/SGMLparser.h>
! 42: #include <libxml/entities.h>
! 43: #include <libxml/encoding.h>
! 44: #include <libxml/parser.h>
! 45: #include <libxml/valid.h>
! 46: #include <libxml/parserInternals.h>
! 47: #include <libxml/xmlIO.h>
! 48: #include <libxml/SAX.h>
! 49: #include "xml-error.h"
! 50:
! 51: #define SGML_MAX_NAMELEN 1000
! 52: #define INPUT_CHUNK 50
! 53: #define SGML_PARSER_BIG_BUFFER_SIZE 1000
! 54: #define SGML_PARSER_BUFFER_SIZE 100
! 55:
! 56: /* #define DEBUG */
! 57: /* #define DEBUG_PUSH */
! 58:
! 59: /************************************************************************
! 60: * *
! 61: * Parser stacks related functions and macros *
! 62: * *
! 63: ************************************************************************/
! 64:
! 65: /*
! 66: * Generic function for accessing stacks in the Parser Context
! 67: */
! 68:
! 69: #define PUSH_AND_POP(scope, type, name) \
! 70: scope int sgml##name##Push(sgmlParserCtxtPtr ctxt, type value) { \
! 71: if (ctxt->name##Nr >= ctxt->name##Max) { \
! 72: ctxt->name##Max *= 2; \
! 73: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
! 74: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
! 75: if (ctxt->name##Tab == NULL) { \
! 76: fprintf(stderr, "realloc failed !\n"); \
! 77: return(0); \
! 78: } \
! 79: } \
! 80: ctxt->name##Tab[ctxt->name##Nr] = value; \
! 81: ctxt->name = value; \
! 82: return(ctxt->name##Nr++); \
! 83: } \
! 84: scope type sgml##name##Pop(sgmlParserCtxtPtr ctxt) { \
! 85: type ret; \
! 86: if (ctxt->name##Nr < 0) return(0); \
! 87: ctxt->name##Nr--; \
! 88: if (ctxt->name##Nr < 0) return(0); \
! 89: if (ctxt->name##Nr > 0) \
! 90: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
! 91: else \
! 92: ctxt->name = NULL; \
! 93: ret = ctxt->name##Tab[ctxt->name##Nr]; \
! 94: ctxt->name##Tab[ctxt->name##Nr] = 0; \
! 95: return(ret); \
! 96: } \
! 97:
! 98: PUSH_AND_POP(extern, xmlNodePtr, node)
! 99: PUSH_AND_POP(extern, xmlChar*, name)
! 100:
! 101: /*
! 102: * Macros for accessing the content. Those should be used only by the parser,
! 103: * and not exported.
! 104: *
! 105: * Dirty macros, i.e. one need to make assumption on the context to use them
! 106: *
! 107: * CUR_PTR return the current pointer to the xmlChar to be parsed.
! 108: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
! 109: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
! 110: * in UNICODE mode. This should be used internally by the parser
! 111: * only to compare to ASCII values otherwise it would break when
! 112: * running with UTF-8 encoding.
! 113: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
! 114: * to compare on ASCII based substring.
! 115: * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
! 116: * it should be used only to compare on ASCII based substring.
! 117: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
! 118: * strings within the parser.
! 119: *
! 120: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
! 121: *
! 122: * CURRENT Returns the current char value, with the full decoding of
! 123: * UTF-8 if we are using this mode. It returns an int.
! 124: * NEXT Skip to the next character, this does the proper decoding
! 125: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
! 126: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
! 127: */
! 128:
! 129: #define UPPER (toupper(*ctxt->input->cur))
! 130:
! 131: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
! 132:
! 133: #define NXT(val) ctxt->input->cur[(val)]
! 134:
! 135: #define UPP(val) (toupper(ctxt->input->cur[(val)]))
! 136:
! 137: #define CUR_PTR ctxt->input->cur
! 138:
! 139: #define SHRINK xmlParserInputShrink(ctxt->input)
! 140:
! 141: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
! 142:
! 143: #define CURRENT ((int) (*ctxt->input->cur))
! 144:
! 145: #define SKIP_BLANKS sgmlSkipBlankChars(ctxt);
! 146:
! 147: #if 0
! 148: #define CUR ((int) (*ctxt->input->cur))
! 149: #define NEXT sgmlNextChar(ctxt);
! 150: #else
! 151: /* Inported from XML */
! 152:
! 153: /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
! 154: #define CUR ((int) (*ctxt->input->cur))
! 155: #define NEXT xmlNextChar(ctxt);ctxt->nbChars++;
! 156:
! 157: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
! 158: #define NXT(val) ctxt->input->cur[(val)]
! 159: #define CUR_PTR ctxt->input->cur
! 160:
! 161:
! 162: #define NEXTL(l) \
! 163: if (*(ctxt->input->cur) == '\n') { \
! 164: ctxt->input->line++; ctxt->input->col = 1; \
! 165: } else ctxt->input->col++; \
! 166: ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++;
! 167:
! 168: /************
! 169: \
! 170: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
! 171: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
! 172: ************/
! 173:
! 174: #define CUR_CHAR(l) sgmlCurrentChar(ctxt, &l);
! 175: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
! 176:
! 177: #define COPY_BUF(l,b,i,v) \
! 178: if (l == 1) b[i++] = (xmlChar) v; \
! 179: else i += xmlCopyChar(l,&b[i],v);
! 180: #endif
! 181:
! 182: /**
! 183: * sgmlCurrentChar:
! 184: * @ctxt: the SGML parser context
! 185: * @len: pointer to the length of the char read
! 186: *
! 187: * The current char value, if using UTF-8 this may actaully span multiple
! 188: * bytes in the input buffer. Implement the end of line normalization:
! 189: * 2.11 End-of-Line Handling
! 190: * If the encoding is unspecified, in the case we find an ISO-Latin-1
! 191: * char, then the encoding converter is plugged in automatically.
! 192: *
! 193: * Returns the current char value and its lenght
! 194: */
! 195:
! 196: int
! 197: sgmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
! 198: if (ctxt->instate == XML_PARSER_EOF)
! 199: return(0);
! 200:
! 201: if (ctxt->token != 0) {
! 202: *len = 0;
! 203: return(ctxt->token);
! 204: }
! 205: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
! 206: /*
! 207: * We are supposed to handle UTF8, check it's valid
! 208: * From rfc2044: encoding of the Unicode values on UTF-8:
! 209: *
! 210: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
! 211: * 0000 0000-0000 007F 0xxxxxxx
! 212: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
! 213: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
! 214: *
! 215: * Check for the 0x110000 limit too
! 216: */
! 217: const unsigned char *cur = ctxt->input->cur;
! 218: unsigned char c;
! 219: unsigned int val;
! 220:
! 221: c = *cur;
! 222: if (c & 0x80) {
! 223: if (cur[1] == 0)
! 224: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
! 225: if ((cur[1] & 0xc0) != 0x80)
! 226: goto encoding_error;
! 227: if ((c & 0xe0) == 0xe0) {
! 228:
! 229: if (cur[2] == 0)
! 230: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
! 231: if ((cur[2] & 0xc0) != 0x80)
! 232: goto encoding_error;
! 233: if ((c & 0xf0) == 0xf0) {
! 234: if (cur[3] == 0)
! 235: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
! 236: if (((c & 0xf8) != 0xf0) ||
! 237: ((cur[3] & 0xc0) != 0x80))
! 238: goto encoding_error;
! 239: /* 4-byte code */
! 240: *len = 4;
! 241: val = (cur[0] & 0x7) << 18;
! 242: val |= (cur[1] & 0x3f) << 12;
! 243: val |= (cur[2] & 0x3f) << 6;
! 244: val |= cur[3] & 0x3f;
! 245: } else {
! 246: /* 3-byte code */
! 247: *len = 3;
! 248: val = (cur[0] & 0xf) << 12;
! 249: val |= (cur[1] & 0x3f) << 6;
! 250: val |= cur[2] & 0x3f;
! 251: }
! 252: } else {
! 253: /* 2-byte code */
! 254: *len = 2;
! 255: val = (cur[0] & 0x1f) << 6;
! 256: val |= cur[1] & 0x3f;
! 257: }
! 258: if (!IS_CHAR(val)) {
! 259: if ((ctxt->sax != NULL) &&
! 260: (ctxt->sax->error != NULL))
! 261: ctxt->sax->error(ctxt->userData,
! 262: "Char 0x%X out of allowed range\n", val);
! 263: ctxt->errNo = XML_ERR_INVALID_ENCODING;
! 264: ctxt->wellFormed = 0;
! 265: ctxt->disableSAX = 1;
! 266: }
! 267: return(val);
! 268: } else {
! 269: /* 1-byte code */
! 270: *len = 1;
! 271: return((int) *ctxt->input->cur);
! 272: }
! 273: }
! 274: /*
! 275: * Assume it's a fixed lenght encoding (1) with
! 276: * a compatibke encoding for the ASCII set, since
! 277: * XML constructs only use < 128 chars
! 278: */
! 279: *len = 1;
! 280: if ((int) *ctxt->input->cur < 0x80)
! 281: return((int) *ctxt->input->cur);
! 282:
! 283: /*
! 284: * Humm this is bad, do an automatic flow conversion
! 285: */
! 286: xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
! 287: ctxt->charset = XML_CHAR_ENCODING_UTF8;
! 288: return(xmlCurrentChar(ctxt, len));
! 289:
! 290: encoding_error:
! 291: /*
! 292: * If we detect an UTF8 error that probably mean that the
! 293: * input encoding didn't get properly advertized in the
! 294: * declaration header. Report the error and switch the encoding
! 295: * to ISO-Latin-1 (if you don't like this policy, just declare the
! 296: * encoding !)
! 297: */
! 298: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
! 299: ctxt->sax->error(ctxt->userData,
! 300: "Input is not proper UTF-8, indicate encoding !\n");
! 301: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
! 302: ctxt->input->cur[0], ctxt->input->cur[1],
! 303: ctxt->input->cur[2], ctxt->input->cur[3]);
! 304: }
! 305: ctxt->errNo = XML_ERR_INVALID_ENCODING;
! 306:
! 307: ctxt->charset = XML_CHAR_ENCODING_8859_1;
! 308: *len = 1;
! 309: return((int) *ctxt->input->cur);
! 310: }
! 311:
! 312: /**
! 313: * sgmlNextChar:
! 314: * @ctxt: the SGML parser context
! 315: *
! 316: * Skip to the next char input char.
! 317: */
! 318:
! 319: void
! 320: sgmlNextChar(sgmlParserCtxtPtr ctxt) {
! 321: if (ctxt->instate == XML_PARSER_EOF)
! 322: return;
! 323: if ((*ctxt->input->cur == 0) &&
! 324: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
! 325: xmlPopInput(ctxt);
! 326: } else {
! 327: if (*(ctxt->input->cur) == '\n') {
! 328: ctxt->input->line++; ctxt->input->col = 1;
! 329: } else ctxt->input->col++;
! 330: ctxt->input->cur++;
! 331: ctxt->nbChars++;
! 332: if (*ctxt->input->cur == 0)
! 333: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
! 334: }
! 335: }
! 336:
! 337: /**
! 338: * sgmlSkipBlankChars:
! 339: * @ctxt: the SGML parser context
! 340: *
! 341: * skip all blanks character found at that point in the input streams.
! 342: *
! 343: * Returns the number of space chars skipped
! 344: */
! 345:
! 346: int
! 347: sgmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
! 348: int res = 0;
! 349:
! 350: while (IS_BLANK(*(ctxt->input->cur))) {
! 351: if ((*ctxt->input->cur == 0) &&
! 352: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
! 353: xmlPopInput(ctxt);
! 354: } else {
! 355: if (*(ctxt->input->cur) == '\n') {
! 356: ctxt->input->line++; ctxt->input->col = 1;
! 357: } else ctxt->input->col++;
! 358: ctxt->input->cur++;
! 359: ctxt->nbChars++;
! 360: if (*ctxt->input->cur == 0)
! 361: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
! 362: }
! 363: res++;
! 364: }
! 365: return(res);
! 366: }
! 367:
! 368:
! 369:
! 370: /************************************************************************
! 371: * *
! 372: * The list of SGML elements and their properties *
! 373: * *
! 374: ************************************************************************/
! 375:
! 376: /*
! 377: * Start Tag: 1 means the start tag can be ommited
! 378: * End Tag: 1 means the end tag can be ommited
! 379: * 2 means it's forbidden (empty elements)
! 380: * Depr: this element is deprecated
! 381: * DTD: 1 means that this element is valid only in the Loose DTD
! 382: * 2 means that this element is valid only in the Frameset DTD
! 383: *
! 384: * Name,Start Tag,End Tag, Empty, Depr., DTD, Description
! 385: */
! 386: sgmlElemDesc docbookElementTable[] = {
! 387: { "abbrev", 0, 0, 0, 3, 0, "" }, /* word */
! 388: { "abstract", 0, 0, 0, 9, 0, "" }, /* title */
! 389: { "accel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 390: { "ackno", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 391: { "acronym", 0, 0, 0, 3, 0, "" }, /* word */
! 392: { "action", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 393: { "address", 0, 0, 0, 1, 0, "" },
! 394: { "affiliation",0, 0, 0, 9, 0, "" }, /* shortaffil */
! 395: { "alt", 0, 0, 0, 1, 0, "" },
! 396: { "anchor", 0, 2, 1, 0, 0, "" },
! 397: { "answer", 0, 0, 0, 9, 0, "" }, /* label */
! 398: { "appendix", 0, 0, 0, 9, 0, "" }, /* appendixinfo */
! 399: { "appendixinfo",0, 0, 0, 9, 0, "" }, /* graphic */
! 400: { "application",0, 0, 0, 2, 0, "" }, /* para */
! 401: { "area", 0, 2, 1, 0, 0, "" },
! 402: { "areaset", 0, 0, 0, 9, 0, "" }, /* area */
! 403: { "areaspec", 0, 0, 0, 9, 0, "" }, /* area */
! 404: { "arg", 0, 0, 0, 1, 0, "" },
! 405: { "article", 0, 0, 0, 9, 0, "" }, /* div.title.content */
! 406: { "articleinfo",0, 0, 0, 9, 0, "" }, /* graphic */
! 407: { "artpagenums",0, 0, 0, 4, 0, "" }, /* docinfo */
! 408: { "attribution",0, 0, 0, 2, 0, "" }, /* para */
! 409: { "audiodata", 0, 2, 1, 0, 0, "" },
! 410: { "audioobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
! 411: { "authorblurb",0, 0, 0, 9, 0, "" }, /* title */
! 412: { "authorgroup",0, 0, 0, 9, 0, "" }, /* author */
! 413: { "authorinitials",0, 0, 0, 4, 0, "" }, /* docinfo */
! 414: { "author", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */
! 415: { "beginpage", 0, 2, 1, 0, 0, "" },
! 416: { "bibliodiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
! 417: { "biblioentry",0, 0, 0, 9, 0, "" }, /* articleinfo */
! 418: { "bibliography",0, 0, 0, 9, 0, "" }, /* bibliographyinfo */
! 419: { "bibliographyinfo",0, 0, 0, 9, 0, "" }, /* graphic */
! 420: { "bibliomisc", 0, 0, 0, 2, 0, "" }, /* para */
! 421: { "bibliomixed",0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix, bibliomset) */
! 422: { "bibliomset", 0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix; | bibliomset) */
! 423: { "biblioset", 0, 0, 0, 9, 0, "" }, /* bibliocomponent.mix */
! 424: { "blockquote", 0, 0, 0, 9, 0, "" }, /* title */
! 425: { "book", 0, 0, 0, 9, 0, "" }, /* div.title.content */
! 426: { "bookinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
! 427: { "bridgehead", 0, 0, 0, 8, 0, "" }, /* title */
! 428: { "callout", 0, 0, 0, 9, 0, "" }, /* component.mix */
! 429: { "calloutlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 430: { "caption", 0, 0, 0, 9, 0, "" }, /* textobject.mix */
! 431: { "caution", 0, 0, 0, 9, 0, "" }, /* title */
! 432: { "chapter", 0, 0, 0, 9, 0, "" }, /* chapterinfo */
! 433: { "chapterinfo",0, 0, 0, 9, 0, "" }, /* graphic */
! 434: { "citation", 0, 0, 0, 2, 0, "" }, /* para */
! 435: { "citerefentry",0, 0, 0, 9, 0, "" }, /* refentrytitle */
! 436: { "citetitle", 0, 0, 0, 2, 0, "" }, /* para */
! 437: { "city", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 438: { "classname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 439: { "classsynopsisinfo",0,0, 0, 9, 0, "" }, /* cptr */
! 440: { "classsynopsis",0, 0, 0, 9, 0, "" }, /* ooclass */
! 441: { "cmdsynopsis",0, 0, 0, 9, 0, "" }, /* command */
! 442: { "co", 0, 2, 1, 0, 0, "" },
! 443: { "collab", 0, 0, 0, 9, 0, "" }, /* collabname */
! 444: { "collabname", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 445: { "colophon", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
! 446: { "colspec", 0, 2, 1, 0, 0, "" },
! 447: { "colspec", 0, 2, 1, 0, 0, "" },
! 448: { "command", 0, 0, 0, 9, 0, "" }, /* cptr */
! 449: { "computeroutput",0, 0, 0, 9, 0, "" }, /* cptr */
! 450: { "confdates", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 451: { "confgroup", 0, 0, 0, 9, 0, "" }, /* confdates */
! 452: { "confnum", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 453: { "confsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */
! 454: { "conftitle", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 455: { "constant", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 456: { "constructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */
! 457: { "contractnum",0, 0, 0, 4, 0, "" }, /* docinfo */
! 458: { "contractsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */
! 459: { "contrib", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 460: { "copyright", 0, 0, 0, 9, 0, "" }, /* year */
! 461: { "corpauthor", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 462: { "corpname", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 463: { "country", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 464: { "database", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 465: { "date", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 466: { "dedication", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
! 467: { "destructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */
! 468: { "edition", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 469: { "editor", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */
! 470: { "email", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 471: { "emphasis", 0, 0, 0, 2, 0, "" }, /* para */
! 472: { "entry", 0, 0, 0, 9, 0, "" }, /* tbl.entry.mdl */
! 473: { "entrytbl", 0, 0, 0, 9, 0, "" }, /* tbl.entrytbl.mdl */
! 474: { "envar", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 475: { "epigraph", 0, 0, 0, 9, 0, "" }, /* attribution */
! 476: { "equation", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 477: { "errorcode", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 478: { "errorname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 479: { "errortype", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 480: { "example", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 481: { "exceptionname",0, 0, 0, 7, 0, "" }, /* smallcptr */
! 482: { "fax", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 483: { "fieldsynopsis", 0, 0, 0, 9, 0, "" }, /* modifier */
! 484: { "figure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 485: { "filename", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 486: { "firstname", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 487: { "firstterm", 0, 0, 0, 3, 0, "" }, /* word */
! 488: { "footnote", 0, 0, 0, 9, 0, "" }, /* footnote.mix */
! 489: { "footnoteref",0, 2, 1, 0, 0, "" },
! 490: { "foreignphrase",0, 0, 0, 2, 0, "" }, /* para */
! 491: { "formalpara", 0, 0, 0, 9, 0, "" }, /* title */
! 492: { "funcdef", 0, 0, 0, 1, 0, "" },
! 493: { "funcparams", 0, 0, 0, 9, 0, "" }, /* cptr */
! 494: { "funcprototype",0, 0, 0, 9, 0, "" }, /* funcdef */
! 495: { "funcsynopsis",0, 0, 0, 9, 0, "" }, /* funcsynopsisinfo */
! 496: { "funcsynopsisinfo", 0, 0, 0, 9, 0, "" }, /* cptr */
! 497: { "function", 0, 0, 0, 9, 0, "" }, /* cptr */
! 498: { "glossary", 0, 0, 0, 9, 0, "" }, /* glossaryinfo */
! 499: { "glossaryinfo",0, 0, 0, 9, 0, "" }, /* graphic */
! 500: { "glossdef", 0, 0, 0, 9, 0, "" }, /* glossdef.mix */
! 501: { "glossdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
! 502: { "glossentry", 0, 0, 0, 9, 0, "" }, /* glossterm */
! 503: { "glosslist", 0, 0, 0, 9, 0, "" }, /* glossentry */
! 504: { "glossseealso",0, 0, 0, 2, 0, "" }, /* para */
! 505: { "glosssee", 0, 0, 0, 2, 0, "" }, /* para */
! 506: { "glossterm", 0, 0, 0, 2, 0, "" }, /* para */
! 507: { "graphic", 0, 2, 1, 0, 0, "" },
! 508: { "graphicco", 0, 0, 0, 9, 0, "" }, /* areaspec */
! 509: { "group", 0, 0, 0, 9, 0, "" }, /* arg */
! 510: { "guibutton", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 511: { "guiicon", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 512: { "guilabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 513: { "guimenuitem",0, 0, 0, 7, 0, "" }, /* smallcptr */
! 514: { "guimenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 515: { "guisubmenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 516: { "hardware", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 517: { "highlights", 0, 0, 0, 9, 0, "" }, /* highlights.mix */
! 518: { "holder", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 519: { "honorific", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 520: { "imagedata", 0, 2, 1, 0, 0, "" },
! 521: { "imageobjectco",0, 0, 0, 9, 0, "" }, /* areaspec */
! 522: { "imageobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
! 523: { "important", 0, 0, 0, 9, 0, "" }, /* title */
! 524: { "indexdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
! 525: { "indexentry", 0, 0, 0, 9, 0, "" }, /* primaryie */
! 526: { "index", 0, 0, 0, 9, 0, "" }, /* indexinfo */
! 527: { "indexinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
! 528: { "indexterm", 0, 0, 0, 9, 0, "" }, /* primary */
! 529: { "informalequation",0, 0, 0, 9, 0, "" }, /* equation.content */
! 530: { "informalexample",0, 0, 0, 9, 0, "" }, /* example.mix */
! 531: { "informalfigure",0, 0, 0, 9, 0, "" }, /* figure.mix */
! 532: { "informaltable",0, 0, 0, 9, 0, "" }, /* graphic */
! 533: { "initializer",0, 0, 0, 7, 0, "" }, /* smallcptr */
! 534: { "inlineequation",0, 0, 0, 9, 0, "" }, /* inlineequation.content */
! 535: { "inlinegraphic",0, 2, 1, 0, 0, "" },
! 536: { "inlinemediaobject",0,0, 0, 9, 0, "" }, /* objectinfo */
! 537: { "interfacename",0, 0, 0, 7, 0, "" }, /* smallcptr */
! 538: { "interface", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 539: { "invpartnumber",0, 0, 0, 4, 0, "" }, /* docinfo */
! 540: { "isbn", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 541: { "issn", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 542: { "issuenum", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 543: { "itemizedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 544: { "itermset", 0, 0, 0, 9, 0, "" }, /* indexterm */
! 545: { "jobtitle", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 546: { "keycap", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 547: { "keycode", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 548: { "keycombo", 0, 0, 0, 9, 0, "" }, /* keycap */
! 549: { "keysym", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 550: { "keyword", 0, 0, 0, 1, 0, "" },
! 551: { "keywordset", 0, 0, 0, 9, 0, "" }, /* keyword */
! 552: { "label", 0, 0, 0, 3, 0, "" }, /* word */
! 553: { "legalnotice",0, 0, 0, 9, 0, "" }, /* title */
! 554: { "lineage", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 555: { "lineannotation",0, 0, 0, 2, 0, "" }, /* para */
! 556: { "link", 0, 0, 0, 2, 0, "" }, /* para */
! 557: { "listitem", 0, 0, 0, 9, 0, "" }, /* component.mix */
! 558: { "literal", 0, 0, 0, 9, 0, "" }, /* cptr */
! 559: { "literallayout",0, 0, 0, 2, 0, "" }, /* para */
! 560: { "lot", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */
! 561: { "lotentry", 0, 0, 0, 2, 0, "" }, /* para */
! 562: { "manvolnum", 0, 0, 0, 3, 0, "" }, /* word */
! 563: { "markup", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 564: { "medialabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 565: { "mediaobjectco",0, 0, 0, 9, 0, "" }, /* objectinfo */
! 566: { "mediaobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
! 567: { "member", 0, 0, 0, 2, 0, "" }, /* para */
! 568: { "menuchoice", 0, 0, 0, 9, 0, "" }, /* shortcut */
! 569: { "methodname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 570: { "methodparam",0, 0, 0, 9, 0, "" }, /* modifier */
! 571: { "methodsynopsis",0, 0, 0, 9, 0, "" }, /* modifier */
! 572: { "modespec", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 573: { "modifier", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 574: { "mousebutton",0, 0, 0, 7, 0, "" }, /* smallcptr */
! 575: { "msgaud", 0, 0, 0, 2, 0, "" }, /* para */
! 576: { "msgentry", 0, 0, 0, 9, 0, "" }, /* msg */
! 577: { "msgexplan", 0, 0, 0, 9, 0, "" }, /* title */
! 578: { "msginfo", 0, 0, 0, 9, 0, "" }, /* msglevel */
! 579: { "msglevel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 580: { "msgmain", 0, 0, 0, 9, 0, "" }, /* title */
! 581: { "msgorig", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 582: { "msgrel", 0, 0, 0, 9, 0, "" }, /* title */
! 583: { "msgset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 584: { "msgsub", 0, 0, 0, 9, 0, "" }, /* title */
! 585: { "msgtext", 0, 0, 0, 9, 0, "" }, /* component.mix */
! 586: { "msg", 0, 0, 0, 9, 0, "" }, /* title */
! 587: { "note", 0, 0, 0, 9, 0, "" }, /* title */
! 588: { "objectinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
! 589: { "olink", 0, 0, 0, 2, 0, "" }, /* para */
! 590: { "ooclass", 0, 0, 0, 9, 0, "" }, /* modifier */
! 591: { "ooexception",0, 0, 0, 9, 0, "" }, /* modifier */
! 592: { "oointerface",0, 0, 0, 9, 0, "" }, /* modifier */
! 593: { "optional", 0, 0, 0, 9, 0, "" }, /* cptr */
! 594: { "option", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 595: { "orderedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 596: { "orgdiv", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 597: { "orgname", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 598: { "otheraddr", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 599: { "othercredit",0, 0, 0, 9, 0, "" }, /* person.ident.mix */
! 600: { "othername", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 601: { "pagenums", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 602: { "paramdef", 0, 0, 0, 1, 0, "" },
! 603: { "parameter", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 604: { "para", 0, 0, 0, 2, 0, "" }, /* para */
! 605: { "partinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
! 606: { "partintro", 0, 0, 0, 9, 0, "" }, /* div.title.content */
! 607: { "part", 0, 0, 0, 9, 0, "" }, /* partinfo */
! 608: { "phone", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 609: { "phrase", 0, 0, 0, 2, 0, "" }, /* para */
! 610: { "pob", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 611: { "postcode", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 612: { "prefaceinfo",0, 0, 0, 9, 0, "" }, /* graphic */
! 613: { "preface", 0, 0, 0, 9, 0, "" }, /* prefaceinfo */
! 614: { "primaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
! 615: { "primary ", 0, 0, 0, 4, 0, "" }, /* ndxterm */
! 616: { "printhistory",0, 0, 0, 9, 0, "" }, /* para.class */
! 617: { "procedure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 618: { "productname",0, 0, 0, 2, 0, "" }, /* para */
! 619: { "productnumber",0, 0, 0, 4, 0, "" }, /* docinfo */
! 620: { "programlistingco",0, 0, 0, 9, 0, "" }, /* areaspec */
! 621: { "programlisting",0, 0, 0, 2, 0, "" }, /* para */
! 622: { "prompt", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 623: { "property", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 624: { "pubdate", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 625: { "publishername",0, 0, 0, 4, 0, "" }, /* docinfo */
! 626: { "publisher", 0, 0, 0, 9, 0, "" }, /* publishername */
! 627: { "pubsnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 628: { "qandadiv", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 629: { "qandaentry", 0, 0, 0, 9, 0, "" }, /* revhistory */
! 630: { "qandaset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 631: { "question", 0, 0, 0, 9, 0, "" }, /* label */
! 632: { "quote", 0, 0, 0, 2, 0, "" }, /* para */
! 633: { "refclass", 0, 0, 0, 9, 0, "" }, /* refclass.char.mix */
! 634: { "refdescriptor",0, 0, 0, 9, 0, "" }, /* refname.char.mix */
! 635: { "refentryinfo",0, 0, 0, 9, 0, "" }, /* graphic */
! 636: { "refentry", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */
! 637: { "refentrytitle",0, 0, 0, 2, 0, "" }, /* para */
! 638: { "referenceinfo",0, 0, 0, 9, 0, "" }, /* graphic */
! 639: { "reference", 0, 0, 0, 9, 0, "" }, /* referenceinfo */
! 640: { "refmeta", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */
! 641: { "refmiscinfo",0, 0, 0, 4, 0, "" }, /* docinfo */
! 642: { "refnamediv", 0, 0, 0, 9, 0, "" }, /* refdescriptor */
! 643: { "refname", 0, 0, 0, 9, 0, "" }, /* refname.char.mix */
! 644: { "refpurpose", 0, 0, 0, 9, 0, "" }, /* refinline.char.mix */
! 645: { "refsect1info",0, 0, 0, 9, 0, "" }, /* graphic */
! 646: { "refsect1", 0, 0, 0, 9, 0, "" }, /* refsect */
! 647: { "refsect2info",0, 0, 0, 9, 0, "" }, /* graphic */
! 648: { "refsect2", 0, 0, 0, 9, 0, "" }, /* refsect */
! 649: { "refsect3info",0, 0, 0, 9, 0, "" }, /* graphic */
! 650: { "refsect3", 0, 0, 0, 9, 0, "" }, /* refsect */
! 651: { "refsynopsisdivinfo",0,0, 0, 9, 0, "" }, /* graphic */
! 652: { "refsynopsisdiv",0, 0, 0, 9, 0, "" }, /* refsynopsisdivinfo */
! 653: { "releaseinfo",0, 0, 0, 4, 0, "" }, /* docinfo */
! 654: { "remark", 0, 0, 0, 2, 0, "" }, /* para */
! 655: { "replaceable",0, 0, 0, 1, 0, "" },
! 656: { "returnvalue",0, 0, 0, 7, 0, "" }, /* smallcptr */
! 657: { "revdescription",0, 0, 0, 9, 0, "" }, /* revdescription.mix */
! 658: { "revhistory", 0, 0, 0, 9, 0, "" }, /* revision */
! 659: { "revision", 0, 0, 0, 9, 0, "" }, /* revnumber */
! 660: { "revnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 661: { "revremark", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 662: { "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */
! 663: { "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */
! 664: { "sbr", 0, 2, 1, 0, 0, "" },
! 665: { "screenco", 0, 0, 0, 9, 0, "" }, /* areaspec */
! 666: { "screeninfo", 0, 0, 0, 2, 0, "" }, /* para */
! 667: { "screen", 0, 0, 0, 2, 0, "" }, /* para */
! 668: { "screenshot", 0, 0, 0, 9, 0, "" }, /* screeninfo */
! 669: { "secondaryie",0, 0, 0, 4, 0, "" }, /* ndxterm */
! 670: { "secondary", 0, 0, 0, 4, 0, "" }, /* ndxterm */
! 671: { "sect1info", 0, 0, 0, 9, 0, "" }, /* graphic */
! 672: { "sect1", 0, 0, 0, 9, 0, "" }, /* sect */
! 673: { "sect2info", 0, 0, 0, 9, 0, "" }, /* graphic */
! 674: { "sect2", 0, 0, 0, 9, 0, "" }, /* sect */
! 675: { "sect3info", 0, 0, 0, 9, 0, "" }, /* graphic */
! 676: { "sect3", 0, 0, 0, 9, 0, "" }, /* sect */
! 677: { "sect4info", 0, 0, 0, 9, 0, "" }, /* graphic */
! 678: { "sect4", 0, 0, 0, 9, 0, "" }, /* sect */
! 679: { "sect5info", 0, 0, 0, 9, 0, "" }, /* graphic */
! 680: { "sect5", 0, 0, 0, 9, 0, "" }, /* sect */
! 681: { "sectioninfo",0, 0, 0, 9, 0, "" }, /* graphic */
! 682: { "section", 0, 0, 0, 9, 0, "" }, /* sectioninfo */
! 683: { "seealsoie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
! 684: { "seealso", 0, 0, 0, 4, 0, "" }, /* ndxterm */
! 685: { "seeie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
! 686: { "see", 0, 0, 0, 4, 0, "" }, /* ndxterm */
! 687: { "seglistitem",0, 0, 0, 9, 0, "" }, /* seg */
! 688: { "segmentedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 689: { "seg", 0, 0, 0, 2, 0, "" }, /* para */
! 690: { "segtitle", 0, 0, 0, 8, 0, "" }, /* title */
! 691: { "seriesvolnums", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 692: { "set", 0, 0, 0, 9, 0, "" }, /* div.title.content */
! 693: { "setindexinfo",0, 0, 0, 9, 0, "" }, /* graphic */
! 694: { "setindex", 0, 0, 0, 9, 0, "" }, /* setindexinfo */
! 695: { "setinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
! 696: { "sgmltag", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 697: { "shortaffil", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 698: { "shortcut", 0, 0, 0, 9, 0, "" }, /* keycap */
! 699: { "sidebarinfo",0, 0, 0, 9, 0, "" }, /* graphic */
! 700: { "sidebar", 0, 0, 0, 9, 0, "" }, /* sidebarinfo */
! 701: { "simpara", 0, 0, 0, 2, 0, "" }, /* para */
! 702: { "simplelist", 0, 0, 0, 9, 0, "" }, /* member */
! 703: { "simplemsgentry", 0, 0, 0, 9, 0, "" }, /* msgtext */
! 704: { "simplesect", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
! 705: { "spanspec", 0, 2, 1, 0, 0, "" },
! 706: { "state", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 707: { "step", 0, 0, 0, 9, 0, "" }, /* title */
! 708: { "street", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 709: { "structfield",0, 0, 0, 7, 0, "" }, /* smallcptr */
! 710: { "structname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 711: { "subjectset", 0, 0, 0, 9, 0, "" }, /* subject */
! 712: { "subject", 0, 0, 0, 9, 0, "" }, /* subjectterm */
! 713: { "subjectterm",0, 0, 0, 1, 0, "" },
! 714: { "subscript", 0, 0, 0, 1, 0, "" },
! 715: { "substeps", 0, 0, 0, 9, 0, "" }, /* step */
! 716: { "subtitle", 0, 0, 0, 8, 0, "" }, /* title */
! 717: { "superscript", 0, 0, 0, 1, 0, "" },
! 718: { "surname", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 719: { "symbol", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 720: { "synopfragment", 0, 0, 0, 9, 0, "" }, /* arg */
! 721: { "synopfragmentref", 0, 0, 0, 1, 0, "" },
! 722: { "synopsis", 0, 0, 0, 2, 0, "" }, /* para */
! 723: { "systemitem", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 724: { "table", 0, 0, 0, 9, 0, "" }, /* tbl.table.mdl */
! 725: /* { "%tbl.table.name;", 0, 0, 0, 9, 0, "" },*/ /* tbl.table.mdl */
! 726: { "tbody", 0, 0, 0, 9, 0, "" }, /* row */
! 727: { "tbody", 0, 0, 0, 9, 0, "" }, /* row */
! 728: { "term", 0, 0, 0, 2, 0, "" }, /* para */
! 729: { "tertiaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
! 730: { "tertiary ", 0, 0, 0, 4, 0, "" }, /* ndxterm */
! 731: { "textobject", 0, 0, 0, 9, 0, "" }, /* objectinfo */
! 732: { "tfoot", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */
! 733: { "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */
! 734: { "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */
! 735: { "thead", 0, 0, 0, 9, 0, "" }, /* row */
! 736: { "thead", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */
! 737: { "tip", 0, 0, 0, 9, 0, "" }, /* title */
! 738: { "titleabbrev",0, 0, 0, 8, 0, "" }, /* title */
! 739: { "title", 0, 0, 0, 8, 0, "" }, /* title */
! 740: { "tocback", 0, 0, 0, 2, 0, "" }, /* para */
! 741: { "toc", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */
! 742: { "tocchap", 0, 0, 0, 9, 0, "" }, /* tocentry */
! 743: { "tocentry", 0, 0, 0, 2, 0, "" }, /* para */
! 744: { "tocfront", 0, 0, 0, 2, 0, "" }, /* para */
! 745: { "toclevel1", 0, 0, 0, 9, 0, "" }, /* tocentry */
! 746: { "toclevel2", 0, 0, 0, 9, 0, "" }, /* tocentry */
! 747: { "toclevel3", 0, 0, 0, 9, 0, "" }, /* tocentry */
! 748: { "toclevel4", 0, 0, 0, 9, 0, "" }, /* tocentry */
! 749: { "toclevel5", 0, 0, 0, 9, 0, "" }, /* tocentry */
! 750: { "tocpart", 0, 0, 0, 9, 0, "" }, /* tocentry */
! 751: { "token", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 752: { "trademark", 0, 0, 0, 1, 0, "" },
! 753: { "type", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 754: { "ulink", 0, 0, 0, 2, 0, "" }, /* para */
! 755: { "userinput", 0, 0, 0, 9, 0, "" }, /* cptr */
! 756: { "varargs", 0, 2, 1, 0, 0, "" },
! 757: { "variablelist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
! 758: { "varlistentry",0, 0, 0, 9, 0, "" }, /* term */
! 759: { "varname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
! 760: { "videodata", 0, 2, 1, 0, 0, "" },
! 761: { "videoobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
! 762: { "void", 0, 2, 1, 0, 0, "" },
! 763: { "volumenum", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 764: { "warning", 0, 0, 0, 9, 0, "" }, /* title */
! 765: { "wordasword", 0, 0, 0, 3, 0, "" }, /* word */
! 766: { "xref", 0, 2, 1, 0, 0, "" },
! 767: { "year", 0, 0, 0, 4, 0, "" }, /* docinfo */
! 768: };
! 769:
! 770: /*
! 771: * start tags that imply the end of a current element
! 772: * any tag of each line implies the end of the current element if the type of
! 773: * that element is in the same line
! 774: */
! 775: char *sgmlEquEnd[] = {
! 776: "dt", "dd", "li", "option", NULL,
! 777: "h1", "h2", "h3", "h4", "h5", "h6", NULL,
! 778: "ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL,
! 779: NULL
! 780: };
! 781: /*
! 782: * acording the SGML DTD, HR should be added to the 2nd line above, as it
! 783: * is not allowed within a H1, H2, H3, etc. But we should tolerate that case
! 784: * because many documents contain rules in headings...
! 785: */
! 786:
! 787: /*
! 788: * start tags that imply the end of current element
! 789: */
! 790: char *sgmlStartClose[] = {
! 791: NULL
! 792: };
! 793:
! 794: /*
! 795: * The list of SGML elements which are supposed not to have
! 796: * CDATA content and where a p element will be implied
! 797: *
! 798: * TODO: extend that list by reading the SGML SGML DtD on
! 799: * implied paragraph
! 800: */
! 801: static char *sgmlNoContentElements[] = {
! 802: NULL
! 803: };
! 804:
! 805:
! 806: static char** sgmlStartCloseIndex[100];
! 807: static int sgmlStartCloseIndexinitialized = 0;
! 808:
! 809: /************************************************************************
! 810: * *
! 811: * functions to handle SGML specific data *
! 812: * *
! 813: ************************************************************************/
! 814:
! 815: /**
! 816: * sgmlInitAutoClose:
! 817: *
! 818: * Initialize the sgmlStartCloseIndex for fast lookup of closing tags names.
! 819: *
! 820: */
! 821: void
! 822: sgmlInitAutoClose(void) {
! 823: int index, i = 0;
! 824:
! 825: if (sgmlStartCloseIndexinitialized) return;
! 826:
! 827: for (index = 0;index < 100;index ++) sgmlStartCloseIndex[index] = NULL;
! 828: index = 0;
! 829: while ((sgmlStartClose[i] != NULL) && (index < 100 - 1)) {
! 830: sgmlStartCloseIndex[index++] = &sgmlStartClose[i];
! 831: while (sgmlStartClose[i] != NULL) i++;
! 832: i++;
! 833: }
! 834: }
! 835:
! 836: /**
! 837: * sgmlTagLookup:
! 838: * @tag: The tag name
! 839: *
! 840: * Lookup the SGML tag in the ElementTable
! 841: *
! 842: * Returns the related sgmlElemDescPtr or NULL if not found.
! 843: */
! 844: sgmlElemDescPtr
! 845: sgmlTagLookup(const xmlChar *tag) {
! 846: int i;
! 847:
! 848: for (i = 0; i < (sizeof(docbookElementTable) /
! 849: sizeof(docbookElementTable[0]));i++) {
! 850: if (!xmlStrcmp(tag, BAD_CAST docbookElementTable[i].name))
! 851: return(&docbookElementTable[i]);
! 852: }
! 853: return(NULL);
! 854: }
! 855:
! 856: /**
! 857: * sgmlCheckAutoClose:
! 858: * @newtag: The new tag name
! 859: * @oldtag: The old tag name
! 860: *
! 861: * Checks wether the new tag is one of the registered valid tags for closing old.
! 862: * Initialize the sgmlStartCloseIndex for fast lookup of closing tags names.
! 863: *
! 864: * Returns 0 if no, 1 if yes.
! 865: */
! 866: int
! 867: sgmlCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) {
! 868: int i, index;
! 869: char **close;
! 870:
! 871: if (sgmlStartCloseIndexinitialized == 0) sgmlInitAutoClose();
! 872:
! 873: /* inefficient, but not a big deal */
! 874: for (index = 0; index < 100;index++) {
! 875: close = sgmlStartCloseIndex[index];
! 876: if (close == NULL) return(0);
! 877: if (!xmlStrcmp(BAD_CAST *close, newtag)) break;
! 878: }
! 879:
! 880: i = close - sgmlStartClose;
! 881: i++;
! 882: while (sgmlStartClose[i] != NULL) {
! 883: if (!xmlStrcmp(BAD_CAST sgmlStartClose[i], oldtag)) {
! 884: return(1);
! 885: }
! 886: i++;
! 887: }
! 888: return(0);
! 889: }
! 890:
! 891: /**
! 892: * sgmlAutoCloseOnClose:
! 893: * @ctxt: an SGML parser context
! 894: * @newtag: The new tag name
! 895: *
! 896: * The HTmL DtD allows an ending tag to implicitely close other tags.
! 897: */
! 898: void
! 899: sgmlAutoCloseOnClose(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
! 900: sgmlElemDescPtr info;
! 901: xmlChar *oldname;
! 902: int i;
! 903:
! 904: if ((newtag[0] == '/') && (newtag[1] == 0))
! 905: return;
! 906:
! 907: #ifdef DEBUG
! 908: fprintf(stderr,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
! 909: for (i = 0;i < ctxt->nameNr;i++)
! 910: fprintf(stderr,"%d : %s\n", i, ctxt->nameTab[i]);
! 911: #endif
! 912:
! 913: for (i = (ctxt->nameNr - 1);i >= 0;i--) {
! 914: if (!xmlStrcmp(newtag, ctxt->nameTab[i])) break;
! 915: }
! 916: if (i < 0) return;
! 917:
! 918: while (xmlStrcmp(newtag, ctxt->name)) {
! 919: info = sgmlTagLookup(ctxt->name);
! 920: if ((info == NULL) || (info->endTag == 1)) {
! 921: #ifdef DEBUG
! 922: fprintf(stderr,"sgmlAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name);
! 923: #endif
! 924: } else {
! 925: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 926: ctxt->sax->error(ctxt->userData,
! 927: "Opening and ending tag mismatch: %s and %s\n",
! 928: newtag, ctxt->name);
! 929: ctxt->wellFormed = 0;
! 930: }
! 931: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
! 932: ctxt->sax->endElement(ctxt->userData, ctxt->name);
! 933: oldname = sgmlnamePop(ctxt);
! 934: if (oldname != NULL) {
! 935: #ifdef DEBUG
! 936: fprintf(stderr,"sgmlAutoCloseOnClose: popped %s\n", oldname);
! 937: #endif
! 938: xmlFree(oldname);
! 939: }
! 940: }
! 941: }
! 942:
! 943: /**
! 944: * sgmlAutoClose:
! 945: * @ctxt: an SGML parser context
! 946: * @newtag: The new tag name or NULL
! 947: *
! 948: * The HTmL DtD allows a tag to implicitely close other tags.
! 949: * The list is kept in sgmlStartClose array. This function is
! 950: * called when a new tag has been detected and generates the
! 951: * appropriates closes if possible/needed.
! 952: * If newtag is NULL this mean we are at the end of the resource
! 953: * and we should check
! 954: */
! 955: void
! 956: sgmlAutoClose(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
! 957: xmlChar *oldname;
! 958: while ((newtag != NULL) && (ctxt->name != NULL) &&
! 959: (sgmlCheckAutoClose(newtag, ctxt->name))) {
! 960: #ifdef DEBUG
! 961: fprintf(stderr,"sgmlAutoClose: %s closes %s\n", newtag, ctxt->name);
! 962: #endif
! 963: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
! 964: ctxt->sax->endElement(ctxt->userData, ctxt->name);
! 965: oldname = sgmlnamePop(ctxt);
! 966: if (oldname != NULL) {
! 967: #ifdef DEBUG
! 968: fprintf(stderr,"sgmlAutoClose: popped %s\n", oldname);
! 969: #endif
! 970: xmlFree(oldname);
! 971: }
! 972: }
! 973: #if 0
! 974: if (newtag == NULL) {
! 975: sgmlAutoCloseOnClose(ctxt, BAD_CAST"head");
! 976: sgmlAutoCloseOnClose(ctxt, BAD_CAST"body");
! 977: sgmlAutoCloseOnClose(ctxt, BAD_CAST"sgml");
! 978: }
! 979: while ((newtag == NULL) && (ctxt->name != NULL) &&
! 980: ((!xmlStrcmp(ctxt->name, BAD_CAST"head")) ||
! 981: (!xmlStrcmp(ctxt->name, BAD_CAST"body")) ||
! 982: (!xmlStrcmp(ctxt->name, BAD_CAST"sgml")))) {
! 983: #ifdef DEBUG
! 984: fprintf(stderr,"sgmlAutoClose: EOF closes %s\n", ctxt->name);
! 985: #endif
! 986: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
! 987: ctxt->sax->endElement(ctxt->userData, ctxt->name);
! 988: oldname = sgmlnamePop(ctxt);
! 989: if (oldname != NULL) {
! 990: #ifdef DEBUG
! 991: fprintf(stderr,"sgmlAutoClose: popped %s\n", oldname);
! 992: #endif
! 993: xmlFree(oldname);
! 994: }
! 995: }
! 996: #endif
! 997: }
! 998:
! 999: /**
! 1000: * sgmlAutoCloseTag:
! 1001: * @doc: the SGML document
! 1002: * @name: The tag name
! 1003: * @elem: the SGML element
! 1004: *
! 1005: * The HTmL DtD allows a tag to implicitely close other tags.
! 1006: * The list is kept in sgmlStartClose array. This function checks
! 1007: * if the element or one of it's children would autoclose the
! 1008: * given tag.
! 1009: *
! 1010: * Returns 1 if autoclose, 0 otherwise
! 1011: */
! 1012: int
! 1013: sgmlAutoCloseTag(sgmlDocPtr doc, const xmlChar *name, sgmlNodePtr elem) {
! 1014: sgmlNodePtr child;
! 1015:
! 1016: if (elem == NULL) return(1);
! 1017: if (!xmlStrcmp(name, elem->name)) return(0);
! 1018: if (sgmlCheckAutoClose(elem->name, name)) return(1);
! 1019: child = elem->children;
! 1020: while (child != NULL) {
! 1021: if (sgmlAutoCloseTag(doc, name, child)) return(1);
! 1022: child = child->next;
! 1023: }
! 1024: return(0);
! 1025: }
! 1026:
! 1027: /**
! 1028: * sgmlIsAutoClosed:
! 1029: * @doc: the SGML document
! 1030: * @elem: the SGML element
! 1031: *
! 1032: * The HTmL DtD allows a tag to implicitely close other tags.
! 1033: * The list is kept in sgmlStartClose array. This function checks
! 1034: * if a tag is autoclosed by one of it's child
! 1035: *
! 1036: * Returns 1 if autoclosed, 0 otherwise
! 1037: */
! 1038: int
! 1039: sgmlIsAutoClosed(sgmlDocPtr doc, sgmlNodePtr elem) {
! 1040: sgmlNodePtr child;
! 1041:
! 1042: if (elem == NULL) return(1);
! 1043: child = elem->children;
! 1044: while (child != NULL) {
! 1045: if (sgmlAutoCloseTag(doc, elem->name, child)) return(1);
! 1046: child = child->next;
! 1047: }
! 1048: return(0);
! 1049: }
! 1050:
! 1051: /**
! 1052: * sgmlCheckImplied:
! 1053: * @ctxt: an SGML parser context
! 1054: * @newtag: The new tag name
! 1055: *
! 1056: * The HTmL DtD allows a tag to exists only implicitely
! 1057: * called when a new tag has been detected and generates the
! 1058: * appropriates implicit tags if missing
! 1059: */
! 1060: void
! 1061: sgmlCheckImplied(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
! 1062: #if 0
! 1063: if (!xmlStrcmp(newtag, BAD_CAST"sgml"))
! 1064: return;
! 1065: if (ctxt->nameNr <= 0) {
! 1066: #ifdef DEBUG
! 1067: fprintf(stderr,"Implied element sgml: pushed sgml\n");
! 1068: #endif
! 1069: sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"sgml"));
! 1070: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
! 1071: ctxt->sax->startElement(ctxt->userData, BAD_CAST"sgml", NULL);
! 1072: }
! 1073: if ((!xmlStrcmp(newtag, BAD_CAST"body")) || (!xmlStrcmp(newtag, BAD_CAST"head")))
! 1074: return;
! 1075: if (ctxt->nameNr <= 1) {
! 1076: if ((!xmlStrcmp(newtag, BAD_CAST"script")) ||
! 1077: (!xmlStrcmp(newtag, BAD_CAST"style")) ||
! 1078: (!xmlStrcmp(newtag, BAD_CAST"meta")) ||
! 1079: (!xmlStrcmp(newtag, BAD_CAST"link")) ||
! 1080: (!xmlStrcmp(newtag, BAD_CAST"title")) ||
! 1081: (!xmlStrcmp(newtag, BAD_CAST"base"))) {
! 1082: /*
! 1083: * dropped OBJECT ... i you put it first BODY will be
! 1084: * assumed !
! 1085: */
! 1086: #ifdef DEBUG
! 1087: fprintf(stderr,"Implied element head: pushed head\n");
! 1088: #endif
! 1089: sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"head"));
! 1090: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
! 1091: ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
! 1092: } else {
! 1093: #ifdef DEBUG
! 1094: fprintf(stderr,"Implied element body: pushed body\n");
! 1095: #endif
! 1096: sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"body"));
! 1097: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
! 1098: ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
! 1099: }
! 1100: }
! 1101: #endif
! 1102: }
! 1103:
! 1104: /**
! 1105: * sgmlCheckParagraph
! 1106: * @ctxt: an SGML parser context
! 1107: *
! 1108: * Check whether a p element need to be implied before inserting
! 1109: * characters in the current element.
! 1110: *
! 1111: * Returns 1 if a paragraph has been inserted, 0 if not and -1
! 1112: * in case of error.
! 1113: */
! 1114:
! 1115: int
! 1116: sgmlCheckParagraph(sgmlParserCtxtPtr ctxt) {
! 1117: const xmlChar *tag;
! 1118: int i;
! 1119:
! 1120: if (ctxt == NULL)
! 1121: return(-1);
! 1122: tag = ctxt->name;
! 1123: if (tag == NULL) {
! 1124: sgmlAutoClose(ctxt, BAD_CAST"p");
! 1125: sgmlCheckImplied(ctxt, BAD_CAST"p");
! 1126: sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
! 1127: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
! 1128: ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
! 1129: return(1);
! 1130: }
! 1131: for (i = 0; sgmlNoContentElements[i] != NULL; i++) {
! 1132: if (!xmlStrcmp(tag, BAD_CAST sgmlNoContentElements[i])) {
! 1133: #ifdef DEBUG
! 1134: fprintf(stderr,"Implied element paragraph\n");
! 1135: #endif
! 1136: sgmlAutoClose(ctxt, BAD_CAST"p");
! 1137: sgmlCheckImplied(ctxt, BAD_CAST"p");
! 1138: sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
! 1139: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
! 1140: ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
! 1141: return(1);
! 1142: }
! 1143: }
! 1144: return(0);
! 1145: }
! 1146:
! 1147: /************************************************************************
! 1148: * *
! 1149: * The list of SGML predefined entities *
! 1150: * *
! 1151: ************************************************************************/
! 1152:
! 1153:
! 1154: sgmlEntityDesc docbookEntitiesTable[] = {
! 1155: /*
! 1156: * the 4 absolute ones, plus apostrophe.
! 1157: */
! 1158: { 0x0026, "amp", "AMPERSAND" },
! 1159: { 0x003C, "lt", "LESS-THAN SIGN" },
! 1160:
! 1161: /*
! 1162: * Converted with VI macros from docbook ent files
! 1163: */
! 1164: { 0x0021, "excl", "EXCLAMATION MARK" },
! 1165: { 0x0022, "quot", "QUOTATION MARK" },
! 1166: { 0x0023, "num", "NUMBER SIGN" },
! 1167: { 0x0024, "dollar", "DOLLAR SIGN" },
! 1168: { 0x0025, "percnt", "PERCENT SIGN" },
! 1169: { 0x0027, "apos", "APOSTROPHE" },
! 1170: { 0x0028, "lpar", "LEFT PARENTHESIS" },
! 1171: { 0x0029, "rpar", "RIGHT PARENTHESIS" },
! 1172: { 0x002A, "ast", "ASTERISK OPERATOR" },
! 1173: { 0x002B, "plus", "PLUS SIGN" },
! 1174: { 0x002C, "comma", "COMMA" },
! 1175: { 0x002D, "hyphen", "HYPHEN-MINUS" },
! 1176: { 0x002E, "period", "FULL STOP" },
! 1177: { 0x002F, "sol", "SOLIDUS" },
! 1178: { 0x003A, "colon", "COLON" },
! 1179: { 0x003B, "semi", "SEMICOLON" },
! 1180: { 0x003D, "equals", "EQUALS SIGN" },
! 1181: { 0x003E, "gt", "GREATER-THAN SIGN" },
! 1182: { 0x003F, "quest", "QUESTION MARK" },
! 1183: { 0x0040, "commat", "COMMERCIAL AT" },
! 1184: { 0x005B, "lsqb", "LEFT SQUARE BRACKET" },
! 1185: { 0x005C, "bsol", "REVERSE SOLIDUS" },
! 1186: { 0x005D, "rsqb", "RIGHT SQUARE BRACKET" },
! 1187: { 0x005E, "circ", "RING OPERATOR" },
! 1188: { 0x005F, "lowbar", "LOW LINE" },
! 1189: { 0x0060, "grave", "GRAVE ACCENT" },
! 1190: { 0x007B, "lcub", "LEFT CURLY BRACKET" },
! 1191: { 0x007C, "verbar", "VERTICAL LINE" },
! 1192: { 0x007D, "rcub", "RIGHT CURLY BRACKET" },
! 1193: { 0x00A0, "nbsp", "NO-BREAK SPACE" },
! 1194: { 0x00A1, "iexcl", "INVERTED EXCLAMATION MARK" },
! 1195: { 0x00A2, "cent", "CENT SIGN" },
! 1196: { 0x00A3, "pound", "POUND SIGN" },
! 1197: { 0x00A4, "curren", "CURRENCY SIGN" },
! 1198: { 0x00A5, "yen", "YEN SIGN" },
! 1199: { 0x00A6, "brvbar", "BROKEN BAR" },
! 1200: { 0x00A7, "sect", "SECTION SIGN" },
! 1201: { 0x00A8, "die", "" },
! 1202: { 0x00A8, "Dot", "" },
! 1203: { 0x00A8, "uml", "" },
! 1204: { 0x00A9, "copy", "COPYRIGHT SIGN" },
! 1205: { 0x00AA, "ordf", "FEMININE ORDINAL INDICATOR" },
! 1206: { 0x00AB, "laquo", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK" },
! 1207: { 0x00AC, "not", "NOT SIGN" },
! 1208: { 0x00AD, "shy", "SOFT HYPHEN" },
! 1209: { 0x00AE, "reg", "REG TRADE MARK SIGN" },
! 1210: { 0x00AF, "macr", "MACRON" },
! 1211: { 0x00B0, "deg", "DEGREE SIGN" },
! 1212: { 0x00B1, "plusmn", "PLUS-MINUS SIGN" },
! 1213: { 0x00B2, "sup2", "SUPERSCRIPT TWO" },
! 1214: { 0x00B3, "sup3", "SUPERSCRIPT THREE" },
! 1215: { 0x00B4, "acute", "ACUTE ACCENT" },
! 1216: { 0x00B5, "micro", "MICRO SIGN" },
! 1217: { 0x00B6, "para", "PILCROW SIGN" },
! 1218: { 0x00B7, "middot", "MIDDLE DOT" },
! 1219: { 0x00B8, "cedil", "CEDILLA" },
! 1220: { 0x00B9, "sup1", "SUPERSCRIPT ONE" },
! 1221: { 0x00BA, "ordm", "MASCULINE ORDINAL INDICATOR" },
! 1222: { 0x00BB, "raquo", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" },
! 1223: { 0x00BC, "frac14", "VULGAR FRACTION ONE QUARTER" },
! 1224: { 0x00BD, "frac12", "VULGAR FRACTION ONE HALF" },
! 1225: { 0x00BD, "half", "VULGAR FRACTION ONE HALF" },
! 1226: { 0x00BE, "frac34", "VULGAR FRACTION THREE QUARTERS" },
! 1227: { 0x00BF, "iquest", "INVERTED QUESTION MARK" },
! 1228: { 0x00C0, "Agrave", "LATIN CAPITAL LETTER A WITH GRAVE" },
! 1229: { 0x00C1, "Aacute", "LATIN CAPITAL LETTER A WITH ACUTE" },
! 1230: { 0x00C2, "Acirc", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX" },
! 1231: { 0x00C3, "Atilde", "LATIN CAPITAL LETTER A WITH TILDE" },
! 1232: { 0x00C4, "Auml", "LATIN CAPITAL LETTER A WITH DIAERESIS" },
! 1233: { 0x00C5, "Aring", "LATIN CAPITAL LETTER A WITH RING ABOVE" },
! 1234: { 0x00C6, "AElig", "LATIN CAPITAL LETTER AE" },
! 1235: { 0x00C7, "Ccedil", "LATIN CAPITAL LETTER C WITH CEDILLA" },
! 1236: { 0x00C8, "Egrave", "LATIN CAPITAL LETTER E WITH GRAVE" },
! 1237: { 0x00C9, "Eacute", "LATIN CAPITAL LETTER E WITH ACUTE" },
! 1238: { 0x00CA, "Ecirc", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX" },
! 1239: { 0x00CB, "Euml", "LATIN CAPITAL LETTER E WITH DIAERESIS" },
! 1240: { 0x00CC, "Igrave", "LATIN CAPITAL LETTER I WITH GRAVE" },
! 1241: { 0x00CD, "Iacute", "LATIN CAPITAL LETTER I WITH ACUTE" },
! 1242: { 0x00CE, "Icirc", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX" },
! 1243: { 0x00CF, "Iuml", "LATIN CAPITAL LETTER I WITH DIAERESIS" },
! 1244: { 0x00D0, "ETH", "LATIN CAPITAL LETTER ETH" },
! 1245: { 0x00D1, "Ntilde", "LATIN CAPITAL LETTER N WITH TILDE" },
! 1246: { 0x00D2, "Ograve", "LATIN CAPITAL LETTER O WITH GRAVE" },
! 1247: { 0x00D3, "Oacute", "LATIN CAPITAL LETTER O WITH ACUTE" },
! 1248: { 0x00D4, "Ocirc", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX" },
! 1249: { 0x00D5, "Otilde", "LATIN CAPITAL LETTER O WITH TILDE" },
! 1250: { 0x00D6, "Ouml", "LATIN CAPITAL LETTER O WITH DIAERESIS" },
! 1251: { 0x00D7, "times", "MULTIPLICATION SIGN" },
! 1252: { 0x00D8, "Oslash", "LATIN CAPITAL LETTER O WITH STROKE" },
! 1253: { 0x00D9, "Ugrave", "LATIN CAPITAL LETTER U WITH GRAVE" },
! 1254: { 0x00DA, "Uacute", "LATIN CAPITAL LETTER U WITH ACUTE" },
! 1255: { 0x00DB, "Ucirc", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX" },
! 1256: { 0x00DC, "Uuml", "LATIN CAPITAL LETTER U WITH DIAERESIS" },
! 1257: { 0x00DD, "Yacute", "LATIN CAPITAL LETTER Y WITH ACUTE" },
! 1258: { 0x00DE, "THORN", "LATIN CAPITAL LETTER THORN" },
! 1259: { 0x00DF, "szlig", "LATIN SMALL LETTER SHARP S" },
! 1260: { 0x00E0, "agrave", "LATIN SMALL LETTER A WITH GRAVE" },
! 1261: { 0x00E1, "aacute", "LATIN SMALL LETTER A WITH ACUTE" },
! 1262: { 0x00E2, "acirc", "LATIN SMALL LETTER A WITH CIRCUMFLEX" },
! 1263: { 0x00E3, "atilde", "LATIN SMALL LETTER A WITH TILDE" },
! 1264: { 0x00E4, "auml", "LATIN SMALL LETTER A WITH DIAERESIS" },
! 1265: { 0x00E5, "aring", "LATIN SMALL LETTER A WITH RING ABOVE" },
! 1266: { 0x00E6, "aelig", "LATIN SMALL LETTER AE" },
! 1267: { 0x00E7, "ccedil", "LATIN SMALL LETTER C WITH CEDILLA" },
! 1268: { 0x00E8, "egrave", "LATIN SMALL LETTER E WITH GRAVE" },
! 1269: { 0x00E9, "eacute", "LATIN SMALL LETTER E WITH ACUTE" },
! 1270: { 0x00EA, "ecirc", "LATIN SMALL LETTER E WITH CIRCUMFLEX" },
! 1271: { 0x00EB, "euml", "LATIN SMALL LETTER E WITH DIAERESIS" },
! 1272: { 0x00EC, "igrave", "LATIN SMALL LETTER I WITH GRAVE" },
! 1273: { 0x00ED, "iacute", "LATIN SMALL LETTER I WITH ACUTE" },
! 1274: { 0x00EE, "icirc", "LATIN SMALL LETTER I WITH CIRCUMFLEX" },
! 1275: { 0x00EF, "iuml", "LATIN SMALL LETTER I WITH DIAERESIS" },
! 1276: { 0x00F0, "eth", "LATIN SMALL LETTER ETH" },
! 1277: { 0x00F1, "ntilde", "LATIN SMALL LETTER N WITH TILDE" },
! 1278: { 0x00F2, "ograve", "LATIN SMALL LETTER O WITH GRAVE" },
! 1279: { 0x00F3, "oacute", "LATIN SMALL LETTER O WITH ACUTE" },
! 1280: { 0x00F4, "ocirc", "LATIN SMALL LETTER O WITH CIRCUMFLEX" },
! 1281: { 0x00F5, "otilde", "LATIN SMALL LETTER O WITH TILDE" },
! 1282: { 0x00F6, "ouml", "LATIN SMALL LETTER O WITH DIAERESIS" },
! 1283: { 0x00F7, "divide", "DIVISION SIGN" },
! 1284: { 0x00F8, "oslash", "CIRCLED DIVISION SLASH" },
! 1285: { 0x00F9, "ugrave", "LATIN SMALL LETTER U WITH GRAVE" },
! 1286: { 0x00FA, "uacute", "LATIN SMALL LETTER U WITH ACUTE" },
! 1287: { 0x00FB, "ucirc", "LATIN SMALL LETTER U WITH CIRCUMFLEX" },
! 1288: { 0x00FC, "uuml", "LATIN SMALL LETTER U WITH DIAERESIS" },
! 1289: { 0x00FD, "yacute", "LATIN SMALL LETTER Y WITH ACUTE" },
! 1290: { 0x00FE, "thorn", "LATIN SMALL LETTER THORN" },
! 1291: { 0x00FF, "yuml", "LATIN SMALL LETTER Y WITH DIAERESIS" },
! 1292: { 0x0100, "Amacr", "LATIN CAPITAL LETTER A WITH MACRON" },
! 1293: { 0x0101, "amacr", "LATIN SMALL LETTER A WITH MACRON" },
! 1294: { 0x0102, "Abreve", "LATIN CAPITAL LETTER A WITH BREVE" },
! 1295: { 0x0103, "abreve", "LATIN SMALL LETTER A WITH BREVE" },
! 1296: { 0x0104, "Aogon", "LATIN CAPITAL LETTER A WITH OGONEK" },
! 1297: { 0x0105, "aogon", "LATIN SMALL LETTER A WITH OGONEK" },
! 1298: { 0x0106, "Cacute", "LATIN CAPITAL LETTER C WITH ACUTE" },
! 1299: { 0x0107, "cacute", "LATIN SMALL LETTER C WITH ACUTE" },
! 1300: { 0x0108, "Ccirc", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX" },
! 1301: { 0x0109, "ccirc", "LATIN SMALL LETTER C WITH CIRCUMFLEX" },
! 1302: { 0x010A, "Cdot", "LATIN CAPITAL LETTER C WITH DOT ABOVE" },
! 1303: { 0x010B, "cdot", "DOT OPERATOR" },
! 1304: { 0x010C, "Ccaron", "LATIN CAPITAL LETTER C WITH CARON" },
! 1305: { 0x010D, "ccaron", "LATIN SMALL LETTER C WITH CARON" },
! 1306: { 0x010E, "Dcaron", "LATIN CAPITAL LETTER D WITH CARON" },
! 1307: { 0x010F, "dcaron", "LATIN SMALL LETTER D WITH CARON" },
! 1308: { 0x0110, "Dstrok", "LATIN CAPITAL LETTER D WITH STROKE" },
! 1309: { 0x0111, "dstrok", "LATIN SMALL LETTER D WITH STROKE" },
! 1310: { 0x0112, "Emacr", "LATIN CAPITAL LETTER E WITH MACRON" },
! 1311: { 0x0113, "emacr", "LATIN SMALL LETTER E WITH MACRON" },
! 1312: { 0x0116, "Edot", "LATIN CAPITAL LETTER E WITH DOT ABOVE" },
! 1313: { 0x0117, "edot", "LATIN SMALL LETTER E WITH DOT ABOVE" },
! 1314: { 0x0118, "Eogon", "LATIN CAPITAL LETTER E WITH OGONEK" },
! 1315: { 0x0119, "eogon", "LATIN SMALL LETTER E WITH OGONEK" },
! 1316: { 0x011A, "Ecaron", "LATIN CAPITAL LETTER E WITH CARON" },
! 1317: { 0x011B, "ecaron", "LATIN SMALL LETTER E WITH CARON" },
! 1318: { 0x011C, "Gcirc", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX" },
! 1319: { 0x011D, "gcirc", "LATIN SMALL LETTER G WITH CIRCUMFLEX" },
! 1320: { 0x011E, "Gbreve", "LATIN CAPITAL LETTER G WITH BREVE" },
! 1321: { 0x011F, "gbreve", "LATIN SMALL LETTER G WITH BREVE" },
! 1322: { 0x0120, "Gdot", "LATIN CAPITAL LETTER G WITH DOT ABOVE" },
! 1323: { 0x0121, "gdot", "LATIN SMALL LETTER G WITH DOT ABOVE" },
! 1324: { 0x0122, "Gcedil", "LATIN CAPITAL LETTER G WITH CEDILLA" },
! 1325: { 0x0124, "Hcirc", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX" },
! 1326: { 0x0125, "hcirc", "LATIN SMALL LETTER H WITH CIRCUMFLEX" },
! 1327: { 0x0126, "Hstrok", "LATIN CAPITAL LETTER H WITH STROKE" },
! 1328: { 0x0127, "hstrok", "LATIN SMALL LETTER H WITH STROKE" },
! 1329: { 0x0128, "Itilde", "LATIN CAPITAL LETTER I WITH TILDE" },
! 1330: { 0x0129, "itilde", "LATIN SMALL LETTER I WITH TILDE" },
! 1331: { 0x012A, "Imacr", "LATIN CAPITAL LETTER I WITH MACRON" },
! 1332: { 0x012B, "imacr", "LATIN SMALL LETTER I WITH MACRON" },
! 1333: { 0x012E, "Iogon", "LATIN CAPITAL LETTER I WITH OGONEK" },
! 1334: { 0x012F, "iogon", "LATIN SMALL LETTER I WITH OGONEK" },
! 1335: { 0x0130, "Idot", "LATIN CAPITAL LETTER I WITH DOT ABOVE" },
! 1336: { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
! 1337: { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
! 1338: { 0x0132, "IJlig", "LATIN CAPITAL LIGATURE IJ" },
! 1339: { 0x0133, "ijlig", "LATIN SMALL LIGATURE IJ" },
! 1340: { 0x0134, "Jcirc", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX" },
! 1341: { 0x0135, "jcirc", "LATIN SMALL LETTER J WITH CIRCUMFLEX" },
! 1342: { 0x0136, "Kcedil", "LATIN CAPITAL LETTER K WITH CEDILLA" },
! 1343: { 0x0137, "kcedil", "LATIN SMALL LETTER K WITH CEDILLA" },
! 1344: { 0x0138, "kgreen", "LATIN SMALL LETTER KRA" },
! 1345: { 0x0139, "Lacute", "LATIN CAPITAL LETTER L WITH ACUTE" },
! 1346: { 0x013A, "lacute", "LATIN SMALL LETTER L WITH ACUTE" },
! 1347: { 0x013B, "Lcedil", "LATIN CAPITAL LETTER L WITH CEDILLA" },
! 1348: { 0x013C, "lcedil", "LATIN SMALL LETTER L WITH CEDILLA" },
! 1349: { 0x013D, "Lcaron", "LATIN CAPITAL LETTER L WITH CARON" },
! 1350: { 0x013E, "lcaron", "LATIN SMALL LETTER L WITH CARON" },
! 1351: { 0x013F, "Lmidot", "LATIN CAPITAL LETTER L WITH MIDDLE DOT" },
! 1352: { 0x0140, "lmidot", "LATIN SMALL LETTER L WITH MIDDLE DOT" },
! 1353: { 0x0141, "Lstrok", "LATIN CAPITAL LETTER L WITH STROKE" },
! 1354: { 0x0142, "lstrok", "LATIN SMALL LETTER L WITH STROKE" },
! 1355: { 0x0143, "Nacute", "LATIN CAPITAL LETTER N WITH ACUTE" },
! 1356: { 0x0144, "nacute", "LATIN SMALL LETTER N WITH ACUTE" },
! 1357: { 0x0145, "Ncedil", "LATIN CAPITAL LETTER N WITH CEDILLA" },
! 1358: { 0x0146, "ncedil", "LATIN SMALL LETTER N WITH CEDILLA" },
! 1359: { 0x0147, "Ncaron", "LATIN CAPITAL LETTER N WITH CARON" },
! 1360: { 0x0148, "ncaron", "LATIN SMALL LETTER N WITH CARON" },
! 1361: { 0x0149, "napos", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE" },
! 1362: { 0x014A, "ENG", "LATIN CAPITAL LETTER ENG" },
! 1363: { 0x014B, "eng", "LATIN SMALL LETTER ENG" },
! 1364: { 0x014C, "Omacr", "LATIN CAPITAL LETTER O WITH MACRON" },
! 1365: { 0x014D, "omacr", "LATIN SMALL LETTER O WITH MACRON" },
! 1366: { 0x0150, "Odblac", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE" },
! 1367: { 0x0151, "odblac", "LATIN SMALL LETTER O WITH DOUBLE ACUTE" },
! 1368: { 0x0152, "OElig", "LATIN CAPITAL LIGATURE OE" },
! 1369: { 0x0153, "oelig", "LATIN SMALL LIGATURE OE" },
! 1370: { 0x0154, "Racute", "LATIN CAPITAL LETTER R WITH ACUTE" },
! 1371: { 0x0155, "racute", "LATIN SMALL LETTER R WITH ACUTE" },
! 1372: { 0x0156, "Rcedil", "LATIN CAPITAL LETTER R WITH CEDILLA" },
! 1373: { 0x0157, "rcedil", "LATIN SMALL LETTER R WITH CEDILLA" },
! 1374: { 0x0158, "Rcaron", "LATIN CAPITAL LETTER R WITH CARON" },
! 1375: { 0x0159, "rcaron", "LATIN SMALL LETTER R WITH CARON" },
! 1376: { 0x015A, "Sacute", "LATIN CAPITAL LETTER S WITH ACUTE" },
! 1377: { 0x015B, "sacute", "LATIN SMALL LETTER S WITH ACUTE" },
! 1378: { 0x015C, "Scirc", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX" },
! 1379: { 0x015D, "scirc", "LATIN SMALL LETTER S WITH CIRCUMFLEX" },
! 1380: { 0x015E, "Scedil", "LATIN CAPITAL LETTER S WITH CEDILLA" },
! 1381: { 0x015F, "scedil", "LATIN SMALL LETTER S WITH CEDILLA" },
! 1382: { 0x0160, "Scaron", "LATIN CAPITAL LETTER S WITH CARON" },
! 1383: { 0x0161, "scaron", "LATIN SMALL LETTER S WITH CARON" },
! 1384: { 0x0162, "Tcedil", "LATIN CAPITAL LETTER T WITH CEDILLA" },
! 1385: { 0x0163, "tcedil", "LATIN SMALL LETTER T WITH CEDILLA" },
! 1386: { 0x0164, "Tcaron", "LATIN CAPITAL LETTER T WITH CARON" },
! 1387: { 0x0165, "tcaron", "LATIN SMALL LETTER T WITH CARON" },
! 1388: { 0x0166, "Tstrok", "LATIN CAPITAL LETTER T WITH STROKE" },
! 1389: { 0x0167, "tstrok", "LATIN SMALL LETTER T WITH STROKE" },
! 1390: { 0x0168, "Utilde", "LATIN CAPITAL LETTER U WITH TILDE" },
! 1391: { 0x0169, "utilde", "LATIN SMALL LETTER U WITH TILDE" },
! 1392: { 0x016A, "Umacr", "LATIN CAPITAL LETTER U WITH MACRON" },
! 1393: { 0x016B, "umacr", "LATIN SMALL LETTER U WITH MACRON" },
! 1394: { 0x016C, "Ubreve", "LATIN CAPITAL LETTER U WITH BREVE" },
! 1395: { 0x016D, "ubreve", "LATIN SMALL LETTER U WITH BREVE" },
! 1396: { 0x016E, "Uring", "LATIN CAPITAL LETTER U WITH RING ABOVE" },
! 1397: { 0x016F, "uring", "LATIN SMALL LETTER U WITH RING ABOVE" },
! 1398: { 0x0170, "Udblac", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE" },
! 1399: { 0x0171, "udblac", "LATIN SMALL LETTER U WITH DOUBLE ACUTE" },
! 1400: { 0x0172, "Uogon", "LATIN CAPITAL LETTER U WITH OGONEK" },
! 1401: { 0x0173, "uogon", "LATIN SMALL LETTER U WITH OGONEK" },
! 1402: { 0x0174, "Wcirc", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX" },
! 1403: { 0x0175, "wcirc", "LATIN SMALL LETTER W WITH CIRCUMFLEX" },
! 1404: { 0x0176, "Ycirc", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX" },
! 1405: { 0x0177, "ycirc", "LATIN SMALL LETTER Y WITH CIRCUMFLEX" },
! 1406: { 0x0178, "Yuml", "LATIN CAPITAL LETTER Y WITH DIAERESIS" },
! 1407: { 0x0179, "Zacute", "LATIN CAPITAL LETTER Z WITH ACUTE" },
! 1408: { 0x017A, "zacute", "LATIN SMALL LETTER Z WITH ACUTE" },
! 1409: { 0x017B, "Zdot", "LATIN CAPITAL LETTER Z WITH DOT ABOVE" },
! 1410: { 0x017C, "zdot", "LATIN SMALL LETTER Z WITH DOT ABOVE" },
! 1411: { 0x017D, "Zcaron", "LATIN CAPITAL LETTER Z WITH CARON" },
! 1412: { 0x017E, "zcaron", "LATIN SMALL LETTER Z WITH CARON" },
! 1413: { 0x0192, "fnof", "LATIN SMALL LETTER F WITH HOOK" },
! 1414: { 0x01F5, "gacute", "LATIN SMALL LETTER G WITH ACUTE" },
! 1415: { 0x02C7, "caron", "CARON" },
! 1416: { 0x02D8, "breve", "BREVE" },
! 1417: { 0x02D9, "dot", "DOT ABOVE" },
! 1418: { 0x02DA, "ring", "RING ABOVE" },
! 1419: { 0x02DB, "ogon", "OGONEK" },
! 1420: { 0x02DC, "tilde", "TILDE" },
! 1421: { 0x02DD, "dblac", "DOUBLE ACUTE ACCENT" },
! 1422: { 0x0386, "Aacgr", "GREEK CAPITAL LETTER ALPHA WITH TONOS" },
! 1423: { 0x0388, "Eacgr", "GREEK CAPITAL LETTER EPSILON WITH TONOS" },
! 1424: { 0x0389, "EEacgr", "GREEK CAPITAL LETTER ETA WITH TONOS" },
! 1425: { 0x038A, "Iacgr", "GREEK CAPITAL LETTER IOTA WITH TONOS" },
! 1426: { 0x038C, "Oacgr", "GREEK CAPITAL LETTER OMICRON WITH TONOS" },
! 1427: { 0x038E, "Uacgr", "GREEK CAPITAL LETTER UPSILON WITH TONOS" },
! 1428: { 0x038F, "OHacgr", "GREEK CAPITAL LETTER OMEGA WITH TONOS" },
! 1429: { 0x0390, "idiagr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS" },
! 1430: { 0x0391, "Agr", "GREEK CAPITAL LETTER ALPHA" },
! 1431: { 0x0392, "Bgr", "GREEK CAPITAL LETTER BETA" },
! 1432: { 0x0393, "b.Gamma", "GREEK CAPITAL LETTER GAMMA" },
! 1433: { 0x0393, "Gamma", "GREEK CAPITAL LETTER GAMMA" },
! 1434: { 0x0393, "Ggr", "GREEK CAPITAL LETTER GAMMA" },
! 1435: { 0x0394, "b.Delta", "GREEK CAPITAL LETTER DELTA" },
! 1436: { 0x0394, "Delta", "GREEK CAPITAL LETTER DELTA" },
! 1437: { 0x0394, "Dgr", "GREEK CAPITAL LETTER DELTA" },
! 1438: { 0x0395, "Egr", "GREEK CAPITAL LETTER EPSILON" },
! 1439: { 0x0396, "Zgr", "GREEK CAPITAL LETTER ZETA" },
! 1440: { 0x0397, "EEgr", "GREEK CAPITAL LETTER ETA" },
! 1441: { 0x0398, "b.Theta", "GREEK CAPITAL LETTER THETA" },
! 1442: { 0x0398, "Theta", "GREEK CAPITAL LETTER THETA" },
! 1443: { 0x0398, "THgr", "GREEK CAPITAL LETTER THETA" },
! 1444: { 0x0399, "Igr", "GREEK CAPITAL LETTER IOTA" },
! 1445: { 0x039A, "Kgr", "GREEK CAPITAL LETTER KAPPA" },
! 1446: { 0x039B, "b.Lambda", "GREEK CAPITAL LETTER LAMDA" },
! 1447: { 0x039B, "Lambda", "GREEK CAPITAL LETTER LAMDA" },
! 1448: { 0x039B, "Lgr", "GREEK CAPITAL LETTER LAMDA" },
! 1449: { 0x039C, "Mgr", "GREEK CAPITAL LETTER MU" },
! 1450: { 0x039D, "Ngr", "GREEK CAPITAL LETTER NU" },
! 1451: { 0x039E, "b.Xi", "GREEK CAPITAL LETTER XI" },
! 1452: { 0x039E, "Xgr", "GREEK CAPITAL LETTER XI" },
! 1453: { 0x039E, "Xi", "GREEK CAPITAL LETTER XI" },
! 1454: { 0x039F, "Ogr", "GREEK CAPITAL LETTER OMICRON" },
! 1455: { 0x03A0, "b.Pi", "GREEK CAPITAL LETTER PI" },
! 1456: { 0x03A0, "Pgr", "GREEK CAPITAL LETTER PI" },
! 1457: { 0x03A0, "Pi", "GREEK CAPITAL LETTER PI" },
! 1458: { 0x03A1, "Rgr", "GREEK CAPITAL LETTER RHO" },
! 1459: { 0x03A3, "b.Sigma", "GREEK CAPITAL LETTER SIGMA" },
! 1460: { 0x03A3, "Sgr", "GREEK CAPITAL LETTER SIGMA" },
! 1461: { 0x03A3, "Sigma", "GREEK CAPITAL LETTER SIGMA" },
! 1462: { 0x03A4, "Tgr", "GREEK CAPITAL LETTER TAU" },
! 1463: { 0x03A5, "Ugr", "" },
! 1464: { 0x03A6, "b.Phi", "GREEK CAPITAL LETTER PHI" },
! 1465: { 0x03A6, "PHgr", "GREEK CAPITAL LETTER PHI" },
! 1466: { 0x03A6, "Phi", "GREEK CAPITAL LETTER PHI" },
! 1467: { 0x03A7, "KHgr", "GREEK CAPITAL LETTER CHI" },
! 1468: { 0x03A8, "b.Psi", "GREEK CAPITAL LETTER PSI" },
! 1469: { 0x03A8, "PSgr", "GREEK CAPITAL LETTER PSI" },
! 1470: { 0x03A8, "Psi", "GREEK CAPITAL LETTER PSI" },
! 1471: { 0x03A9, "b.Omega", "GREEK CAPITAL LETTER OMEGA" },
! 1472: { 0x03A9, "OHgr", "GREEK CAPITAL LETTER OMEGA" },
! 1473: { 0x03A9, "Omega", "GREEK CAPITAL LETTER OMEGA" },
! 1474: { 0x03AA, "Idigr", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA" },
! 1475: { 0x03AB, "Udigr", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA" },
! 1476: { 0x03AC, "aacgr", "GREEK SMALL LETTER ALPHA WITH TONOS" },
! 1477: { 0x03AD, "eacgr", "GREEK SMALL LETTER EPSILON WITH TONOS" },
! 1478: { 0x03AE, "eeacgr", "GREEK SMALL LETTER ETA WITH TONOS" },
! 1479: { 0x03AF, "iacgr", "GREEK SMALL LETTER IOTA WITH TONOS" },
! 1480: { 0x03B0, "udiagr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS" },
! 1481: { 0x03B1, "agr", "" },
! 1482: { 0x03B1, "alpha", "" },
! 1483: { 0x03B1, "b.alpha", "" },
! 1484: { 0x03B2, "b.beta", "GREEK SMALL LETTER BETA" },
! 1485: { 0x03B2, "beta", "GREEK SMALL LETTER BETA" },
! 1486: { 0x03B2, "bgr", "GREEK SMALL LETTER BETA" },
! 1487: { 0x03B3, "b.gamma", "GREEK SMALL LETTER GAMMA" },
! 1488: { 0x03B3, "gamma", "GREEK SMALL LETTER GAMMA" },
! 1489: { 0x03B3, "ggr", "GREEK SMALL LETTER GAMMA" },
! 1490: { 0x03B4, "b.delta", "GREEK SMALL LETTER DELTA" },
! 1491: { 0x03B4, "delta", "GREEK SMALL LETTER DELTA" },
! 1492: { 0x03B4, "dgr", "GREEK SMALL LETTER DELTA" },
! 1493: { 0x03B5, "b.epsi", "" },
! 1494: { 0x03B5, "b.epsis", "" },
! 1495: { 0x03B5, "b.epsiv", "" },
! 1496: { 0x03B5, "egr", "" },
! 1497: { 0x03B5, "epsiv", "" },
! 1498: { 0x03B6, "b.zeta", "GREEK SMALL LETTER ZETA" },
! 1499: { 0x03B6, "zeta", "GREEK SMALL LETTER ZETA" },
! 1500: { 0x03B6, "zgr", "GREEK SMALL LETTER ZETA" },
! 1501: { 0x03B7, "b.eta", "GREEK SMALL LETTER ETA" },
! 1502: { 0x03B7, "eegr", "GREEK SMALL LETTER ETA" },
! 1503: { 0x03B7, "eta", "GREEK SMALL LETTER ETA" },
! 1504: { 0x03B8, "b.thetas", "" },
! 1505: { 0x03B8, "thetas", "" },
! 1506: { 0x03B8, "thgr", "" },
! 1507: { 0x03B9, "b.iota", "GREEK SMALL LETTER IOTA" },
! 1508: { 0x03B9, "igr", "GREEK SMALL LETTER IOTA" },
! 1509: { 0x03B9, "iota", "GREEK SMALL LETTER IOTA" },
! 1510: { 0x03BA, "b.kappa", "GREEK SMALL LETTER KAPPA" },
! 1511: { 0x03BA, "kappa", "GREEK SMALL LETTER KAPPA" },
! 1512: { 0x03BA, "kgr", "GREEK SMALL LETTER KAPPA" },
! 1513: { 0x03BB, "b.lambda", "GREEK SMALL LETTER LAMDA" },
! 1514: { 0x03BB, "lambda", "GREEK SMALL LETTER LAMDA" },
! 1515: { 0x03BB, "lgr", "GREEK SMALL LETTER LAMDA" },
! 1516: { 0x03BC, "b.mu", "GREEK SMALL LETTER MU" },
! 1517: { 0x03BC, "mgr", "GREEK SMALL LETTER MU" },
! 1518: { 0x03BC, "mu", "GREEK SMALL LETTER MU" },
! 1519: { 0x03BD, "b.nu", "GREEK SMALL LETTER NU" },
! 1520: { 0x03BD, "ngr", "GREEK SMALL LETTER NU" },
! 1521: { 0x03BD, "nu", "GREEK SMALL LETTER NU" },
! 1522: { 0x03BE, "b.xi", "GREEK SMALL LETTER XI" },
! 1523: { 0x03BE, "xgr", "GREEK SMALL LETTER XI" },
! 1524: { 0x03BE, "xi", "GREEK SMALL LETTER XI" },
! 1525: { 0x03BF, "ogr", "GREEK SMALL LETTER OMICRON" },
! 1526: { 0x03C0, "b.pi", "GREEK SMALL LETTER PI" },
! 1527: { 0x03C0, "pgr", "GREEK SMALL LETTER PI" },
! 1528: { 0x03C0, "pi", "GREEK SMALL LETTER PI" },
! 1529: { 0x03C1, "b.rho", "GREEK SMALL LETTER RHO" },
! 1530: { 0x03C1, "rgr", "GREEK SMALL LETTER RHO" },
! 1531: { 0x03C1, "rho", "GREEK SMALL LETTER RHO" },
! 1532: { 0x03C2, "b.sigmav", "" },
! 1533: { 0x03C2, "sfgr", "" },
! 1534: { 0x03C2, "sigmav", "" },
! 1535: { 0x03C3, "b.sigma", "GREEK SMALL LETTER SIGMA" },
! 1536: { 0x03C3, "sgr", "GREEK SMALL LETTER SIGMA" },
! 1537: { 0x03C3, "sigma", "GREEK SMALL LETTER SIGMA" },
! 1538: { 0x03C4, "b.tau", "GREEK SMALL LETTER TAU" },
! 1539: { 0x03C4, "tau", "GREEK SMALL LETTER TAU" },
! 1540: { 0x03C4, "tgr", "GREEK SMALL LETTER TAU" },
! 1541: { 0x03C5, "b.upsi", "GREEK SMALL LETTER UPSILON" },
! 1542: { 0x03C5, "ugr", "GREEK SMALL LETTER UPSILON" },
! 1543: { 0x03C5, "upsi", "GREEK SMALL LETTER UPSILON" },
! 1544: { 0x03C6, "b.phis", "GREEK SMALL LETTER PHI" },
! 1545: { 0x03C6, "phgr", "GREEK SMALL LETTER PHI" },
! 1546: { 0x03C6, "phis", "GREEK SMALL LETTER PHI" },
! 1547: { 0x03C7, "b.chi", "GREEK SMALL LETTER CHI" },
! 1548: { 0x03C7, "chi", "GREEK SMALL LETTER CHI" },
! 1549: { 0x03C7, "khgr", "GREEK SMALL LETTER CHI" },
! 1550: { 0x03C8, "b.psi", "GREEK SMALL LETTER PSI" },
! 1551: { 0x03C8, "psgr", "GREEK SMALL LETTER PSI" },
! 1552: { 0x03C8, "psi", "GREEK SMALL LETTER PSI" },
! 1553: { 0x03C9, "b.omega", "GREEK SMALL LETTER OMEGA" },
! 1554: { 0x03C9, "ohgr", "GREEK SMALL LETTER OMEGA" },
! 1555: { 0x03C9, "omega", "GREEK SMALL LETTER OMEGA" },
! 1556: { 0x03CA, "idigr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA" },
! 1557: { 0x03CB, "udigr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA" },
! 1558: { 0x03CC, "oacgr", "GREEK SMALL LETTER OMICRON WITH TONOS" },
! 1559: { 0x03CD, "uacgr", "GREEK SMALL LETTER UPSILON WITH TONOS" },
! 1560: { 0x03CE, "ohacgr", "GREEK SMALL LETTER OMEGA WITH TONOS" },
! 1561: { 0x03D1, "b.thetav", "" },
! 1562: { 0x03D1, "thetav", "" },
! 1563: { 0x03D2, "b.Upsi", "" },
! 1564: { 0x03D2, "Upsi", "" },
! 1565: { 0x03D5, "b.phiv", "GREEK PHI SYMBOL" },
! 1566: { 0x03D5, "phiv", "GREEK PHI SYMBOL" },
! 1567: { 0x03D6, "b.piv", "GREEK PI SYMBOL" },
! 1568: { 0x03D6, "piv", "GREEK PI SYMBOL" },
! 1569: { 0x03DC, "b.gammad", "GREEK LETTER DIGAMMA" },
! 1570: { 0x03DC, "gammad", "GREEK LETTER DIGAMMA" },
! 1571: { 0x03F0, "b.kappav", "GREEK KAPPA SYMBOL" },
! 1572: { 0x03F0, "kappav", "GREEK KAPPA SYMBOL" },
! 1573: { 0x03F1, "b.rhov", "GREEK RHO SYMBOL" },
! 1574: { 0x03F1, "rhov", "GREEK RHO SYMBOL" },
! 1575: { 0x0401, "IOcy", "CYRILLIC CAPITAL LETTER IO" },
! 1576: { 0x0402, "DJcy", "CYRILLIC CAPITAL LETTER DJE" },
! 1577: { 0x0403, "GJcy", "CYRILLIC CAPITAL LETTER GJE" },
! 1578: { 0x0404, "Jukcy", "CYRILLIC CAPITAL LETTER UKRAINIAN IE" },
! 1579: { 0x0405, "DScy", "CYRILLIC CAPITAL LETTER DZE" },
! 1580: { 0x0406, "Iukcy", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I" },
! 1581: { 0x0407, "YIcy", "CYRILLIC CAPITAL LETTER YI" },
! 1582: { 0x0408, "Jsercy", "CYRILLIC CAPITAL LETTER JE" },
! 1583: { 0x0409, "LJcy", "CYRILLIC CAPITAL LETTER LJE" },
! 1584: { 0x040A, "NJcy", "CYRILLIC CAPITAL LETTER NJE" },
! 1585: { 0x040B, "TSHcy", "CYRILLIC CAPITAL LETTER TSHE" },
! 1586: { 0x040C, "KJcy", "CYRILLIC CAPITAL LETTER KJE" },
! 1587: { 0x040E, "Ubrcy", "CYRILLIC CAPITAL LETTER SHORT U" },
! 1588: { 0x040F, "DZcy", "CYRILLIC CAPITAL LETTER DZHE" },
! 1589: { 0x0410, "Acy", "CYRILLIC CAPITAL LETTER A" },
! 1590: { 0x0411, "Bcy", "CYRILLIC CAPITAL LETTER BE" },
! 1591: { 0x0412, "Vcy", "CYRILLIC CAPITAL LETTER VE" },
! 1592: { 0x0413, "Gcy", "CYRILLIC CAPITAL LETTER GHE" },
! 1593: { 0x0414, "Dcy", "CYRILLIC CAPITAL LETTER DE" },
! 1594: { 0x0415, "IEcy", "CYRILLIC CAPITAL LETTER IE" },
! 1595: { 0x0416, "ZHcy", "CYRILLIC CAPITAL LETTER ZHE" },
! 1596: { 0x0417, "Zcy", "CYRILLIC CAPITAL LETTER ZE" },
! 1597: { 0x0418, "Icy", "CYRILLIC CAPITAL LETTER I" },
! 1598: { 0x0419, "Jcy", "CYRILLIC CAPITAL LETTER SHORT I" },
! 1599: { 0x041A, "Kcy", "CYRILLIC CAPITAL LETTER KA" },
! 1600: { 0x041B, "Lcy", "CYRILLIC CAPITAL LETTER EL" },
! 1601: { 0x041C, "Mcy", "CYRILLIC CAPITAL LETTER EM" },
! 1602: { 0x041D, "Ncy", "CYRILLIC CAPITAL LETTER EN" },
! 1603: { 0x041E, "Ocy", "CYRILLIC CAPITAL LETTER O" },
! 1604: { 0x041F, "Pcy", "CYRILLIC CAPITAL LETTER PE" },
! 1605: { 0x0420, "Rcy", "CYRILLIC CAPITAL LETTER ER" },
! 1606: { 0x0421, "Scy", "CYRILLIC CAPITAL LETTER ES" },
! 1607: { 0x0422, "Tcy", "CYRILLIC CAPITAL LETTER TE" },
! 1608: { 0x0423, "Ucy", "CYRILLIC CAPITAL LETTER U" },
! 1609: { 0x0424, "Fcy", "CYRILLIC CAPITAL LETTER EF" },
! 1610: { 0x0425, "KHcy", "CYRILLIC CAPITAL LETTER HA" },
! 1611: { 0x0426, "TScy", "CYRILLIC CAPITAL LETTER TSE" },
! 1612: { 0x0427, "CHcy", "CYRILLIC CAPITAL LETTER CHE" },
! 1613: { 0x0428, "SHcy", "CYRILLIC CAPITAL LETTER SHA" },
! 1614: { 0x0429, "SHCHcy", "CYRILLIC CAPITAL LETTER SHCHA" },
! 1615: { 0x042A, "HARDcy", "CYRILLIC CAPITAL LETTER HARD SIGN" },
! 1616: { 0x042B, "Ycy", "CYRILLIC CAPITAL LETTER YERU" },
! 1617: { 0x042C, "SOFTcy", "CYRILLIC CAPITAL LETTER SOFT SIGN" },
! 1618: { 0x042D, "Ecy", "CYRILLIC CAPITAL LETTER E" },
! 1619: { 0x042E, "YUcy", "CYRILLIC CAPITAL LETTER YU" },
! 1620: { 0x042F, "YAcy", "CYRILLIC CAPITAL LETTER YA" },
! 1621: { 0x0430, "acy", "CYRILLIC SMALL LETTER A" },
! 1622: { 0x0431, "bcy", "CYRILLIC SMALL LETTER BE" },
! 1623: { 0x0432, "vcy", "CYRILLIC SMALL LETTER VE" },
! 1624: { 0x0433, "gcy", "CYRILLIC SMALL LETTER GHE" },
! 1625: { 0x0434, "dcy", "CYRILLIC SMALL LETTER DE" },
! 1626: { 0x0435, "iecy", "CYRILLIC SMALL LETTER IE" },
! 1627: { 0x0436, "zhcy", "CYRILLIC SMALL LETTER ZHE" },
! 1628: { 0x0437, "zcy", "CYRILLIC SMALL LETTER ZE" },
! 1629: { 0x0438, "icy", "CYRILLIC SMALL LETTER I" },
! 1630: { 0x0439, "jcy", "CYRILLIC SMALL LETTER SHORT I" },
! 1631: { 0x043A, "kcy", "CYRILLIC SMALL LETTER KA" },
! 1632: { 0x043B, "lcy", "CYRILLIC SMALL LETTER EL" },
! 1633: { 0x043C, "mcy", "CYRILLIC SMALL LETTER EM" },
! 1634: { 0x043D, "ncy", "CYRILLIC SMALL LETTER EN" },
! 1635: { 0x043E, "ocy", "CYRILLIC SMALL LETTER O" },
! 1636: { 0x043F, "pcy", "CYRILLIC SMALL LETTER PE" },
! 1637: { 0x0440, "rcy", "CYRILLIC SMALL LETTER ER" },
! 1638: { 0x0441, "scy", "CYRILLIC SMALL LETTER ES" },
! 1639: { 0x0442, "tcy", "CYRILLIC SMALL LETTER TE" },
! 1640: { 0x0443, "ucy", "CYRILLIC SMALL LETTER U" },
! 1641: { 0x0444, "fcy", "CYRILLIC SMALL LETTER EF" },
! 1642: { 0x0445, "khcy", "CYRILLIC SMALL LETTER HA" },
! 1643: { 0x0446, "tscy", "CYRILLIC SMALL LETTER TSE" },
! 1644: { 0x0447, "chcy", "CYRILLIC SMALL LETTER CHE" },
! 1645: { 0x0448, "shcy", "CYRILLIC SMALL LETTER SHA" },
! 1646: { 0x0449, "shchcy", "CYRILLIC SMALL LETTER SHCHA" },
! 1647: { 0x044A, "hardcy", "CYRILLIC SMALL LETTER HARD SIGN" },
! 1648: { 0x044B, "ycy", "CYRILLIC SMALL LETTER YERU" },
! 1649: { 0x044C, "softcy", "CYRILLIC SMALL LETTER SOFT SIGN" },
! 1650: { 0x044D, "ecy", "CYRILLIC SMALL LETTER E" },
! 1651: { 0x044E, "yucy", "CYRILLIC SMALL LETTER YU" },
! 1652: { 0x044F, "yacy", "CYRILLIC SMALL LETTER YA" },
! 1653: { 0x0451, "iocy", "CYRILLIC SMALL LETTER IO" },
! 1654: { 0x0452, "djcy", "CYRILLIC SMALL LETTER DJE" },
! 1655: { 0x0453, "gjcy", "CYRILLIC SMALL LETTER GJE" },
! 1656: { 0x0454, "jukcy", "CYRILLIC SMALL LETTER UKRAINIAN IE" },
! 1657: { 0x0455, "dscy", "CYRILLIC SMALL LETTER DZE" },
! 1658: { 0x0456, "iukcy", "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I" },
! 1659: { 0x0457, "yicy", "CYRILLIC SMALL LETTER YI" },
! 1660: { 0x0458, "jsercy", "CYRILLIC SMALL LETTER JE" },
! 1661: { 0x0459, "ljcy", "CYRILLIC SMALL LETTER LJE" },
! 1662: { 0x045A, "njcy", "CYRILLIC SMALL LETTER NJE" },
! 1663: { 0x045B, "tshcy", "CYRILLIC SMALL LETTER TSHE" },
! 1664: { 0x045C, "kjcy", "CYRILLIC SMALL LETTER KJE" },
! 1665: { 0x045E, "ubrcy", "CYRILLIC SMALL LETTER SHORT U" },
! 1666: { 0x045F, "dzcy", "CYRILLIC SMALL LETTER DZHE" },
! 1667: { 0x2002, "ensp", "EN SPACE" },
! 1668: { 0x2003, "emsp", "EM SPACE" },
! 1669: { 0x2004, "emsp13", "THREE-PER-EM SPACE" },
! 1670: { 0x2005, "emsp14", "FOUR-PER-EM SPACE" },
! 1671: { 0x2007, "numsp", "FIGURE SPACE" },
! 1672: { 0x2008, "puncsp", "PUNCTUATION SPACE" },
! 1673: { 0x2009, "thinsp", "THIN SPACE" },
! 1674: { 0x200A, "hairsp", "HAIR SPACE" },
! 1675: { 0x2010, "dash", "HYPHEN" },
! 1676: { 0x2013, "ndash", "EN DASH" },
! 1677: { 0x2014, "mdash", "EM DASH" },
! 1678: { 0x2015, "horbar", "HORIZONTAL BAR" },
! 1679: { 0x2016, "Verbar", "DOUBLE VERTICAL LINE" },
! 1680: { 0x2018, "lsquo", "" },
! 1681: { 0x2018, "rsquor", "" },
! 1682: { 0x2019, "rsquo", "RIGHT SINGLE QUOTATION MARK" },
! 1683: { 0x201A, "lsquor", "SINGLE LOW-9 QUOTATION MARK" },
! 1684: { 0x201C, "ldquo", "" },
! 1685: { 0x201C, "rdquor", "" },
! 1686: { 0x201D, "rdquo", "RIGHT DOUBLE QUOTATION MARK" },
! 1687: { 0x201E, "ldquor", "DOUBLE LOW-9 QUOTATION MARK" },
! 1688: { 0x2020, "dagger", "DAGGER" },
! 1689: { 0x2021, "Dagger", "DOUBLE DAGGER" },
! 1690: { 0x2022, "bull", "BULLET" },
! 1691: { 0x2025, "nldr", "TWO DOT LEADER" },
! 1692: { 0x2026, "hellip", "HORIZONTAL ELLIPSIS" },
! 1693: { 0x2026, "mldr", "HORIZONTAL ELLIPSIS" },
! 1694: { 0x2030, "permil", "PER MILLE SIGN" },
! 1695: { 0x2032, "prime", "PRIME" },
! 1696: { 0x2032, "vprime", "PRIME" },
! 1697: { 0x2033, "Prime", "DOUBLE PRIME" },
! 1698: { 0x2034, "tprime", "TRIPLE PRIME" },
! 1699: { 0x2035, "bprime", "REVERSED PRIME" },
! 1700: { 0x2041, "caret", "CARET" },
! 1701: { 0x2043, "hybull", "HYPHEN BULLET" },
! 1702: { 0x20DB, "tdot", "COMBINING THREE DOTS ABOVE" },
! 1703: { 0x20DC, "DotDot", "COMBINING FOUR DOTS ABOVE" },
! 1704: { 0x2105, "incare", "CARE OF" },
! 1705: { 0x210B, "hamilt", "SCRIPT CAPITAL H" },
! 1706: { 0x210F, "planck", "PLANCK CONSTANT OVER TWO PI" },
! 1707: { 0x2111, "image", "BLACK-LETTER CAPITAL I" },
! 1708: { 0x2112, "lagran", "SCRIPT CAPITAL L" },
! 1709: { 0x2113, "ell", "SCRIPT SMALL L" },
! 1710: { 0x2116, "numero", "NUMERO SIGN" },
! 1711: { 0x2117, "copysr", "SOUND RECORDING COPYRIGHT" },
! 1712: { 0x2118, "weierp", "SCRIPT CAPITAL P" },
! 1713: { 0x211C, "real", "BLACK-LETTER CAPITAL R" },
! 1714: { 0x211E, "rx", "PRESCRIPTION TAKE" },
! 1715: { 0x2122, "trade", "TRADE MARK SIGN" },
! 1716: { 0x2126, "ohm", "OHM SIGN" },
! 1717: { 0x212B, "angst", "ANGSTROM SIGN" },
! 1718: { 0x212C, "bernou", "SCRIPT CAPITAL B" },
! 1719: { 0x2133, "phmmat", "SCRIPT CAPITAL M" },
! 1720: { 0x2134, "order", "SCRIPT SMALL O" },
! 1721: { 0x2135, "aleph", "ALEF SYMBOL" },
! 1722: { 0x2136, "beth", "BET SYMBOL" },
! 1723: { 0x2137, "gimel", "GIMEL SYMBOL" },
! 1724: { 0x2138, "daleth", "DALET SYMBOL" },
! 1725: { 0x2153, "frac13", "VULGAR FRACTION ONE THIRD" },
! 1726: { 0x2154, "frac23", "VULGAR FRACTION TWO THIRDS" },
! 1727: { 0x2155, "frac15", "VULGAR FRACTION ONE FIFTH" },
! 1728: { 0x2156, "frac25", "VULGAR FRACTION TWO FIFTHS" },
! 1729: { 0x2157, "frac35", "VULGAR FRACTION THREE FIFTHS" },
! 1730: { 0x2158, "frac45", "VULGAR FRACTION FOUR FIFTHS" },
! 1731: { 0x2159, "frac16", "VULGAR FRACTION ONE SIXTH" },
! 1732: { 0x215A, "frac56", "VULGAR FRACTION FIVE SIXTHS" },
! 1733: { 0x215B, "frac18", "" },
! 1734: { 0x215C, "frac38", "" },
! 1735: { 0x215D, "frac58", "" },
! 1736: { 0x215E, "frac78", "" },
! 1737: { 0x2190, "larr", "LEFTWARDS DOUBLE ARROW" },
! 1738: { 0x2191, "uarr", "UPWARDS ARROW" },
! 1739: { 0x2192, "rarr", "RIGHTWARDS DOUBLE ARROW" },
! 1740: { 0x2193, "darr", "DOWNWARDS ARROW" },
! 1741: { 0x2194, "harr", "LEFT RIGHT ARROW" },
! 1742: { 0x2194, "xhArr", "LEFT RIGHT ARROW" },
! 1743: { 0x2194, "xharr", "LEFT RIGHT ARROW" },
! 1744: { 0x2195, "varr", "UP DOWN ARROW" },
! 1745: { 0x2196, "nwarr", "NORTH WEST ARROW" },
! 1746: { 0x2197, "nearr", "NORTH EAST ARROW" },
! 1747: { 0x2198, "drarr", "SOUTH EAST ARROW" },
! 1748: { 0x2199, "dlarr", "SOUTH WEST ARROW" },
! 1749: { 0x219A, "nlarr", "LEFTWARDS ARROW WITH STROKE" },
! 1750: { 0x219B, "nrarr", "RIGHTWARDS ARROW WITH STROKE" },
! 1751: { 0x219D, "rarrw", "RIGHTWARDS SQUIGGLE ARROW" },
! 1752: { 0x219E, "Larr", "LEFTWARDS TWO HEADED ARROW" },
! 1753: { 0x21A0, "Rarr", "RIGHTWARDS TWO HEADED ARROW" },
! 1754: { 0x21A2, "larrtl", "LEFTWARDS ARROW WITH TAIL" },
! 1755: { 0x21A3, "rarrtl", "RIGHTWARDS ARROW WITH TAIL" },
! 1756: { 0x21A6, "map", "RIGHTWARDS ARROW FROM BAR" },
! 1757: { 0x21A9, "larrhk", "LEFTWARDS ARROW WITH HOOK" },
! 1758: { 0x21AA, "rarrhk", "RIGHTWARDS ARROW WITH HOOK" },
! 1759: { 0x21AB, "larrlp", "LEFTWARDS ARROW WITH LOOP" },
! 1760: { 0x21AC, "rarrlp", "RIGHTWARDS ARROW WITH LOOP" },
! 1761: { 0x21AD, "harrw", "LEFT RIGHT WAVE ARROW" },
! 1762: { 0x21AE, "nharr", "LEFT RIGHT ARROW WITH STROKE" },
! 1763: { 0x21B0, "lsh", "UPWARDS ARROW WITH TIP LEFTWARDS" },
! 1764: { 0x21B1, "rsh", "UPWARDS ARROW WITH TIP RIGHTWARDS" },
! 1765: { 0x21B6, "cularr", "ANTICLOCKWISE TOP SEMICIRCLE ARROW" },
! 1766: { 0x21B7, "curarr", "CLOCKWISE TOP SEMICIRCLE ARROW" },
! 1767: { 0x21BA, "olarr", "ANTICLOCKWISE OPEN CIRCLE ARROW" },
! 1768: { 0x21BB, "orarr", "CLOCKWISE OPEN CIRCLE ARROW" },
! 1769: { 0x21BC, "lharu", "LEFTWARDS HARPOON WITH BARB UPWARDS" },
! 1770: { 0x21BD, "lhard", "LEFTWARDS HARPOON WITH BARB DOWNWARDS" },
! 1771: { 0x21BE, "uharr", "UPWARDS HARPOON WITH BARB RIGHTWARDS" },
! 1772: { 0x21BF, "uharl", "UPWARDS HARPOON WITH BARB LEFTWARDS" },
! 1773: { 0x21C0, "rharu", "RIGHTWARDS HARPOON WITH BARB UPWARDS" },
! 1774: { 0x21C1, "rhard", "RIGHTWARDS HARPOON WITH BARB DOWNWARDS" },
! 1775: { 0x21C2, "dharr", "DOWNWARDS HARPOON WITH BARB RIGHTWARDS" },
! 1776: { 0x21C3, "dharl", "DOWNWARDS HARPOON WITH BARB LEFTWARDS" },
! 1777: { 0x21C4, "rlarr2", "RIGHTWARDS ARROW OVER LEFTWARDS ARROW" },
! 1778: { 0x21C6, "lrarr2", "LEFTWARDS ARROW OVER RIGHTWARDS ARROW" },
! 1779: { 0x21C7, "larr2", "LEFTWARDS PAIRED ARROWS" },
! 1780: { 0x21C8, "uarr2", "UPWARDS PAIRED ARROWS" },
! 1781: { 0x21C9, "rarr2", "RIGHTWARDS PAIRED ARROWS" },
! 1782: { 0x21CA, "darr2", "DOWNWARDS PAIRED ARROWS" },
! 1783: { 0x21CB, "lrhar2", "LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON" },
! 1784: { 0x21CC, "rlhar2", "RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON" },
! 1785: { 0x21CD, "nlArr", "LEFTWARDS DOUBLE ARROW WITH STROKE" },
! 1786: { 0x21CE, "nhArr", "LEFT RIGHT DOUBLE ARROW WITH STROKE" },
! 1787: { 0x21CF, "nrArr", "RIGHTWARDS DOUBLE ARROW WITH STROKE" },
! 1788: { 0x21D0, "lArr", "LEFTWARDS ARROW" },
! 1789: { 0x21D0, "xlArr", "LEFTWARDS DOUBLE ARROW" },
! 1790: { 0x21D1, "uArr", "UPWARDS DOUBLE ARROW" },
! 1791: { 0x21D2, "rArr", "RIGHTWARDS ARROW" },
! 1792: { 0x21D2, "xrArr", "RIGHTWARDS DOUBLE ARROW" },
! 1793: { 0x21D3, "dArr", "DOWNWARDS DOUBLE ARROW" },
! 1794: { 0x21D4, "hArr", "" },
! 1795: { 0x21D4, "iff", "LEFT RIGHT DOUBLE ARROW" },
! 1796: { 0x21D5, "vArr", "UP DOWN DOUBLE ARROW" },
! 1797: { 0x21DA, "lAarr", "LEFTWARDS TRIPLE ARROW" },
! 1798: { 0x21DB, "rAarr", "RIGHTWARDS TRIPLE ARROW" },
! 1799: { 0x2200, "forall", "" },
! 1800: { 0x2201, "comp", "COMPLEMENT" },
! 1801: { 0x2202, "part", "" },
! 1802: { 0x2203, "exist", "" },
! 1803: { 0x2204, "nexist", "THERE DOES NOT EXIST" },
! 1804: { 0x2205, "empty", "" },
! 1805: { 0x2207, "nabla", "NABLA" },
! 1806: { 0x2209, "notin", "" },
! 1807: { 0x220A, "epsi", "" },
! 1808: { 0x220A, "epsis", "" },
! 1809: { 0x220A, "isin", "" },
! 1810: { 0x220D, "bepsi", "SMALL CONTAINS AS MEMBER" },
! 1811: { 0x220D, "ni", "" },
! 1812: { 0x220F, "prod", "N-ARY PRODUCT" },
! 1813: { 0x2210, "amalg", "N-ARY COPRODUCT" },
! 1814: { 0x2210, "coprod", "N-ARY COPRODUCT" },
! 1815: { 0x2210, "samalg", "" },
! 1816: { 0x2211, "sum", "N-ARY SUMMATION" },
! 1817: { 0x2212, "minus", "MINUS SIGN" },
! 1818: { 0x2213, "mnplus", "" },
! 1819: { 0x2214, "plusdo", "DOT PLUS" },
! 1820: { 0x2216, "setmn", "SET MINUS" },
! 1821: { 0x2216, "ssetmn", "SET MINUS" },
! 1822: { 0x2217, "lowast", "ASTERISK OPERATOR" },
! 1823: { 0x2218, "compfn", "RING OPERATOR" },
! 1824: { 0x221A, "radic", "" },
! 1825: { 0x221D, "prop", "" },
! 1826: { 0x221D, "vprop", "" },
! 1827: { 0x221E, "infin", "" },
! 1828: { 0x221F, "ang90", "RIGHT ANGLE" },
! 1829: { 0x2220, "ang", "ANGLE" },
! 1830: { 0x2221, "angmsd", "MEASURED ANGLE" },
! 1831: { 0x2222, "angsph", "" },
! 1832: { 0x2223, "mid", "" },
! 1833: { 0x2224, "nmid", "DOES NOT DIVIDE" },
! 1834: { 0x2225, "par", "PARALLEL TO" },
! 1835: { 0x2225, "spar", "PARALLEL TO" },
! 1836: { 0x2226, "npar", "NOT PARALLEL TO" },
! 1837: { 0x2226, "nspar", "NOT PARALLEL TO" },
! 1838: { 0x2227, "and", "" },
! 1839: { 0x2228, "or", "" },
! 1840: { 0x2229, "cap", "" },
! 1841: { 0x222A, "cup", "" },
! 1842: { 0x222B, "int", "" },
! 1843: { 0x222E, "conint", "" },
! 1844: { 0x2234, "there4", "" },
! 1845: { 0x2235, "becaus", "BECAUSE" },
! 1846: { 0x223C, "sim", "" },
! 1847: { 0x223C, "thksim", "TILDE OPERATOR" },
! 1848: { 0x223D, "bsim", "" },
! 1849: { 0x2240, "wreath", "WREATH PRODUCT" },
! 1850: { 0x2241, "nsim", "" },
! 1851: { 0x2243, "sime", "" },
! 1852: { 0x2244, "nsime", "" },
! 1853: { 0x2245, "cong", "" },
! 1854: { 0x2247, "ncong", "NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO" },
! 1855: { 0x2248, "ap", "" },
! 1856: { 0x2248, "thkap", "ALMOST EQUAL TO" },
! 1857: { 0x2249, "nap", "NOT ALMOST EQUAL TO" },
! 1858: { 0x224A, "ape", "" },
! 1859: { 0x224C, "bcong", "ALL EQUAL TO" },
! 1860: { 0x224D, "asymp", "EQUIVALENT TO" },
! 1861: { 0x224E, "bump", "" },
! 1862: { 0x224F, "bumpe", "" },
! 1863: { 0x2250, "esdot", "" },
! 1864: { 0x2251, "eDot", "" },
! 1865: { 0x2252, "efDot", "" },
! 1866: { 0x2253, "erDot", "" },
! 1867: { 0x2254, "colone", "" },
! 1868: { 0x2255, "ecolon", "" },
! 1869: { 0x2256, "ecir", "" },
! 1870: { 0x2257, "cire", "" },
! 1871: { 0x2259, "wedgeq", "ESTIMATES" },
! 1872: { 0x225C, "trie", "" },
! 1873: { 0x2260, "ne", "" },
! 1874: { 0x2261, "equiv", "" },
! 1875: { 0x2262, "nequiv", "NOT IDENTICAL TO" },
! 1876: { 0x2264, "le", "" },
! 1877: { 0x2264, "les", "LESS-THAN OR EQUAL TO" },
! 1878: { 0x2265, "ge", "GREATER-THAN OR EQUAL TO" },
! 1879: { 0x2265, "ges", "GREATER-THAN OR EQUAL TO" },
! 1880: { 0x2266, "lE", "" },
! 1881: { 0x2267, "gE", "" },
! 1882: { 0x2268, "lnE", "" },
! 1883: { 0x2268, "lne", "" },
! 1884: { 0x2268, "lvnE", "LESS-THAN BUT NOT EQUAL TO" },
! 1885: { 0x2269, "gnE", "" },
! 1886: { 0x2269, "gne", "" },
! 1887: { 0x2269, "gvnE", "GREATER-THAN BUT NOT EQUAL TO" },
! 1888: { 0x226A, "Lt", "MUCH LESS-THAN" },
! 1889: { 0x226B, "Gt", "MUCH GREATER-THAN" },
! 1890: { 0x226C, "twixt", "BETWEEN" },
! 1891: { 0x226E, "nlt", "NOT LESS-THAN" },
! 1892: { 0x226F, "ngt", "NOT GREATER-THAN" },
! 1893: { 0x2270, "nlE", "" },
! 1894: { 0x2270, "nle", "NEITHER LESS-THAN NOR EQUAL TO" },
! 1895: { 0x2270, "nles", "" },
! 1896: { 0x2271, "ngE", "" },
! 1897: { 0x2271, "nge", "NEITHER GREATER-THAN NOR EQUAL TO" },
! 1898: { 0x2271, "nges", "" },
! 1899: { 0x2272, "lap", "LESS-THAN OR EQUIVALENT TO" },
! 1900: { 0x2272, "lsim", "LESS-THAN OR EQUIVALENT TO" },
! 1901: { 0x2273, "gap", "GREATER-THAN OR EQUIVALENT TO" },
! 1902: { 0x2273, "gsim", "GREATER-THAN OR EQUIVALENT TO" },
! 1903: { 0x2276, "lg", "LESS-THAN OR GREATER-THAN" },
! 1904: { 0x2277, "gl", "" },
! 1905: { 0x227A, "pr", "" },
! 1906: { 0x227B, "sc", "" },
! 1907: { 0x227C, "cupre", "" },
! 1908: { 0x227C, "pre", "" },
! 1909: { 0x227D, "sccue", "" },
! 1910: { 0x227D, "sce", "" },
! 1911: { 0x227E, "prap", "" },
! 1912: { 0x227E, "prsim", "" },
! 1913: { 0x227F, "scap", "" },
! 1914: { 0x227F, "scsim", "" },
! 1915: { 0x2280, "npr", "DOES NOT PRECEDE" },
! 1916: { 0x2281, "nsc", "DOES NOT SUCCEED" },
! 1917: { 0x2282, "sub", "" },
! 1918: { 0x2283, "sup", "" },
! 1919: { 0x2284, "nsub", "NOT A SUBSET OF" },
! 1920: { 0x2285, "nsup", "NOT A SUPERSET OF" },
! 1921: { 0x2286, "subE", "" },
! 1922: { 0x2286, "sube", "" },
! 1923: { 0x2287, "supE", "" },
! 1924: { 0x2287, "supe", "" },
! 1925: { 0x2288, "nsubE", "" },
! 1926: { 0x2288, "nsube", "" },
! 1927: { 0x2289, "nsupE", "" },
! 1928: { 0x2289, "nsupe", "" },
! 1929: { 0x228A, "subne", "" },
! 1930: { 0x228A, "subnE", "SUBSET OF WITH NOT EQUAL TO" },
! 1931: { 0x228A, "vsubne", "SUBSET OF WITH NOT EQUAL TO" },
! 1932: { 0x228B, "supnE", "" },
! 1933: { 0x228B, "supne", "" },
! 1934: { 0x228B, "vsupnE", "SUPERSET OF WITH NOT EQUAL TO" },
! 1935: { 0x228B, "vsupne", "SUPERSET OF WITH NOT EQUAL TO" },
! 1936: { 0x228E, "uplus", "MULTISET UNION" },
! 1937: { 0x228F, "sqsub", "" },
! 1938: { 0x2290, "sqsup", "" },
! 1939: { 0x2291, "sqsube", "" },
! 1940: { 0x2292, "sqsupe", "" },
! 1941: { 0x2293, "sqcap", "SQUARE CAP" },
! 1942: { 0x2294, "sqcup", "SQUARE CUP" },
! 1943: { 0x2295, "oplus", "CIRCLED PLUS" },
! 1944: { 0x2296, "ominus", "CIRCLED MINUS" },
! 1945: { 0x2297, "otimes", "CIRCLED TIMES" },
! 1946: { 0x2298, "osol", "CIRCLED DIVISION SLASH" },
! 1947: { 0x2299, "odot", "CIRCLED DOT OPERATOR" },
! 1948: { 0x229A, "ocir", "CIRCLED RING OPERATOR" },
! 1949: { 0x229B, "oast", "CIRCLED ASTERISK OPERATOR" },
! 1950: { 0x229D, "odash", "CIRCLED DASH" },
! 1951: { 0x229E, "plusb", "SQUARED PLUS" },
! 1952: { 0x229F, "minusb", "SQUARED MINUS" },
! 1953: { 0x22A0, "timesb", "SQUARED TIMES" },
! 1954: { 0x22A1, "sdotb", "SQUARED DOT OPERATOR" },
! 1955: { 0x22A2, "vdash", "" },
! 1956: { 0x22A3, "dashv", "" },
! 1957: { 0x22A4, "top", "DOWN TACK" },
! 1958: { 0x22A5, "bottom", "" },
! 1959: { 0x22A5, "perp", "" },
! 1960: { 0x22A7, "models", "MODELS" },
! 1961: { 0x22A8, "vDash", "" },
! 1962: { 0x22A9, "Vdash", "" },
! 1963: { 0x22AA, "Vvdash", "" },
! 1964: { 0x22AC, "nvdash", "DOES NOT PROVE" },
! 1965: { 0x22AD, "nvDash", "NOT TRUE" },
! 1966: { 0x22AE, "nVdash", "DOES NOT FORCE" },
! 1967: { 0x22AF, "nVDash", "NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE" },
! 1968: { 0x22B2, "vltri", "" },
! 1969: { 0x22B3, "vrtri", "" },
! 1970: { 0x22B4, "ltrie", "" },
! 1971: { 0x22B5, "rtrie", "" },
! 1972: { 0x22B8, "mumap", "MULTIMAP" },
! 1973: { 0x22BA, "intcal", "INTERCALATE" },
! 1974: { 0x22BB, "veebar", "" },
! 1975: { 0x22BC, "barwed", "NAND" },
! 1976: { 0x22C4, "diam", "DIAMOND OPERATOR" },
! 1977: { 0x22C5, "sdot", "DOT OPERATOR" },
! 1978: { 0x22C6, "sstarf", "STAR OPERATOR" },
! 1979: { 0x22C6, "star", "STAR OPERATOR" },
! 1980: { 0x22C7, "divonx", "DIVISION TIMES" },
! 1981: { 0x22C8, "bowtie", "" },
! 1982: { 0x22C9, "ltimes", "LEFT NORMAL FACTOR SEMIDIRECT PRODUCT" },
! 1983: { 0x22CA, "rtimes", "RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT" },
! 1984: { 0x22CB, "lthree", "LEFT SEMIDIRECT PRODUCT" },
! 1985: { 0x22CC, "rthree", "RIGHT SEMIDIRECT PRODUCT" },
! 1986: { 0x22CD, "bsime", "" },
! 1987: { 0x22CE, "cuvee", "CURLY LOGICAL OR" },
! 1988: { 0x22CF, "cuwed", "CURLY LOGICAL AND" },
! 1989: { 0x22D0, "Sub", "" },
! 1990: { 0x22D1, "Sup", "" },
! 1991: { 0x22D2, "Cap", "DOUBLE INTERSECTION" },
! 1992: { 0x22D3, "Cup", "DOUBLE UNION" },
! 1993: { 0x22D4, "fork", "" },
! 1994: { 0x22D6, "ldot", "" },
! 1995: { 0x22D7, "gsdot", "" },
! 1996: { 0x22D8, "Ll", "" },
! 1997: { 0x22D9, "Gg", "VERY MUCH GREATER-THAN" },
! 1998: { 0x22DA, "lEg", "" },
! 1999: { 0x22DA, "leg", "" },
! 2000: { 0x22DB, "gEl", "" },
! 2001: { 0x22DB, "gel", "" },
! 2002: { 0x22DC, "els", "" },
! 2003: { 0x22DD, "egs", "" },
! 2004: { 0x22DE, "cuepr", "" },
! 2005: { 0x22DF, "cuesc", "" },
! 2006: { 0x22E0, "npre", "DOES NOT PRECEDE OR EQUAL" },
! 2007: { 0x22E1, "nsce", "DOES NOT SUCCEED OR EQUAL" },
! 2008: { 0x22E6, "lnsim", "" },
! 2009: { 0x22E7, "gnsim", "GREATER-THAN BUT NOT EQUIVALENT TO" },
! 2010: { 0x22E8, "prnap", "" },
! 2011: { 0x22E8, "prnsim", "" },
! 2012: { 0x22E9, "scnap", "" },
! 2013: { 0x22E9, "scnsim", "" },
! 2014: { 0x22EA, "nltri", "NOT NORMAL SUBGROUP OF" },
! 2015: { 0x22EB, "nrtri", "DOES NOT CONTAIN AS NORMAL SUBGROUP" },
! 2016: { 0x22EC, "nltrie", "NOT NORMAL SUBGROUP OF OR EQUAL TO" },
! 2017: { 0x22ED, "nrtrie", "DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL" },
! 2018: { 0x22EE, "vellip", "" },
! 2019: { 0x2306, "Barwed", "PERSPECTIVE" },
! 2020: { 0x2308, "lceil", "LEFT CEILING" },
! 2021: { 0x2309, "rceil", "RIGHT CEILING" },
! 2022: { 0x230A, "lfloor", "LEFT FLOOR" },
! 2023: { 0x230B, "rfloor", "RIGHT FLOOR" },
! 2024: { 0x230C, "drcrop", "BOTTOM RIGHT CROP" },
! 2025: { 0x230D, "dlcrop", "BOTTOM LEFT CROP" },
! 2026: { 0x230E, "urcrop", "TOP RIGHT CROP" },
! 2027: { 0x230F, "ulcrop", "TOP LEFT CROP" },
! 2028: { 0x2315, "telrec", "TELEPHONE RECORDER" },
! 2029: { 0x2316, "target", "POSITION INDICATOR" },
! 2030: { 0x231C, "ulcorn", "TOP LEFT CORNER" },
! 2031: { 0x231D, "urcorn", "TOP RIGHT CORNER" },
! 2032: { 0x231E, "dlcorn", "BOTTOM LEFT CORNER" },
! 2033: { 0x231F, "drcorn", "BOTTOM RIGHT CORNER" },
! 2034: { 0x2322, "frown", "" },
! 2035: { 0x2322, "sfrown", "FROWN" },
! 2036: { 0x2323, "smile", "" },
! 2037: { 0x2323, "ssmile", "SMILE" },
! 2038: { 0x2423, "blank", "OPEN BOX" },
! 2039: { 0x24C8, "oS", "CIRCLED LATIN CAPITAL LETTER S" },
! 2040: { 0x2500, "boxh", "BOX DRAWINGS LIGHT HORIZONTAL" },
! 2041: { 0x2502, "boxv", "BOX DRAWINGS LIGHT VERTICAL" },
! 2042: { 0x250C, "boxdr", "BOX DRAWINGS LIGHT DOWN AND RIGHT" },
! 2043: { 0x2510, "boxdl", "BOX DRAWINGS LIGHT DOWN AND LEFT" },
! 2044: { 0x2514, "boxur", "BOX DRAWINGS LIGHT UP AND RIGHT" },
! 2045: { 0x2518, "boxul", "BOX DRAWINGS LIGHT UP AND LEFT" },
! 2046: { 0x251C, "boxvr", "BOX DRAWINGS LIGHT VERTICAL AND RIGHT" },
! 2047: { 0x2524, "boxvl", "BOX DRAWINGS LIGHT VERTICAL AND LEFT" },
! 2048: { 0x252C, "boxhd", "BOX DRAWINGS LIGHT DOWN AND HORIZONTAL" },
! 2049: { 0x2534, "boxhu", "BOX DRAWINGS LIGHT UP AND HORIZONTAL" },
! 2050: { 0x253C, "boxvh", "BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL" },
! 2051: { 0x2550, "boxH", "BOX DRAWINGS DOUBLE HORIZONTAL" },
! 2052: { 0x2551, "boxV", "BOX DRAWINGS DOUBLE VERTICAL" },
! 2053: { 0x2552, "boxDR", "BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE" },
! 2054: { 0x2553, "boxDr", "BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE" },
! 2055: { 0x2554, "boxdR", "BOX DRAWINGS DOUBLE DOWN AND RIGHT" },
! 2056: { 0x2555, "boxDL", "BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE" },
! 2057: { 0x2556, "boxdL", "BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE" },
! 2058: { 0x2557, "boxDl", "BOX DRAWINGS DOUBLE DOWN AND LEFT" },
! 2059: { 0x2558, "boxUR", "BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE" },
! 2060: { 0x2559, "boxuR", "BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE" },
! 2061: { 0x255A, "boxUr", "BOX DRAWINGS DOUBLE UP AND RIGHT" },
! 2062: { 0x255B, "boxUL", "BOX DRAWINGS UP SINGLE AND LEFT DOUBLE" },
! 2063: { 0x255C, "boxUl", "BOX DRAWINGS UP DOUBLE AND LEFT SINGLE" },
! 2064: { 0x255D, "boxuL", "BOX DRAWINGS DOUBLE UP AND LEFT" },
! 2065: { 0x255E, "boxvR", "BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE" },
! 2066: { 0x255F, "boxVR", "BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE" },
! 2067: { 0x2560, "boxVr", "BOX DRAWINGS DOUBLE VERTICAL AND RIGHT" },
! 2068: { 0x2561, "boxvL", "BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE" },
! 2069: { 0x2562, "boxVL", "BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE" },
! 2070: { 0x2563, "boxVl", "BOX DRAWINGS DOUBLE VERTICAL AND LEFT" },
! 2071: { 0x2564, "boxhD", "BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE" },
! 2072: { 0x2565, "boxHD", "BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE" },
! 2073: { 0x2566, "boxHd", "BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL" },
! 2074: { 0x2567, "boxhU", "BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE" },
! 2075: { 0x2568, "boxHU", "BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE" },
! 2076: { 0x2569, "boxHu", "BOX DRAWINGS DOUBLE UP AND HORIZONTAL" },
! 2077: { 0x256A, "boxvH", "BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE" },
! 2078: { 0x256B, "boxVH", "BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE" },
! 2079: { 0x256C, "boxVh", "BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL" },
! 2080: { 0x2580, "uhblk", "UPPER HALF BLOCK" },
! 2081: { 0x2584, "lhblk", "LOWER HALF BLOCK" },
! 2082: { 0x2588, "block", "FULL BLOCK" },
! 2083: { 0x2591, "blk14", "LIGHT SHADE" },
! 2084: { 0x2592, "blk12", "MEDIUM SHADE" },
! 2085: { 0x2593, "blk34", "DARK SHADE" },
! 2086: { 0x25A1, "square", "WHITE SQUARE" },
! 2087: { 0x25A1, "squ", "WHITE SQUARE" },
! 2088: { 0x25AA, "squf", "" },
! 2089: { 0x25AD, "rect", "WHITE RECTANGLE" },
! 2090: { 0x25AE, "marker", "BLACK VERTICAL RECTANGLE" },
! 2091: { 0x25B3, "xutri", "WHITE UP-POINTING TRIANGLE" },
! 2092: { 0x25B4, "utrif", "BLACK UP-POINTING TRIANGLE" },
! 2093: { 0x25B5, "utri", "WHITE UP-POINTING TRIANGLE" },
! 2094: { 0x25B8, "rtrif", "BLACK RIGHT-POINTING TRIANGLE" },
! 2095: { 0x25B9, "rtri", "WHITE RIGHT-POINTING TRIANGLE" },
! 2096: { 0x25BD, "xdtri", "WHITE DOWN-POINTING TRIANGLE" },
! 2097: { 0x25BE, "dtrif", "BLACK DOWN-POINTING TRIANGLE" },
! 2098: { 0x25BF, "dtri", "WHITE DOWN-POINTING TRIANGLE" },
! 2099: { 0x25C2, "ltrif", "BLACK LEFT-POINTING TRIANGLE" },
! 2100: { 0x25C3, "ltri", "WHITE LEFT-POINTING TRIANGLE" },
! 2101: { 0x25CA, "loz", "LOZENGE" },
! 2102: { 0x25CB, "cir", "WHITE CIRCLE" },
! 2103: { 0x25CB, "xcirc", "WHITE CIRCLE" },
! 2104: { 0x2605, "starf", "BLACK STAR" },
! 2105: { 0x260E, "phone", "TELEPHONE SIGN" },
! 2106: { 0x2640, "female", "" },
! 2107: { 0x2642, "male", "MALE SIGN" },
! 2108: { 0x2660, "spades", "BLACK SPADE SUIT" },
! 2109: { 0x2663, "clubs", "BLACK CLUB SUIT" },
! 2110: { 0x2665, "hearts", "BLACK HEART SUIT" },
! 2111: { 0x2666, "diams", "BLACK DIAMOND SUIT" },
! 2112: { 0x2669, "sung", "" },
! 2113: { 0x266D, "flat", "MUSIC FLAT SIGN" },
! 2114: { 0x266E, "natur", "MUSIC NATURAL SIGN" },
! 2115: { 0x266F, "sharp", "MUSIC SHARP SIGN" },
! 2116: { 0x2713, "check", "CHECK MARK" },
! 2117: { 0x2717, "cross", "BALLOT X" },
! 2118: { 0x2720, "malt", "MALTESE CROSS" },
! 2119: { 0x2726, "lozf", "" },
! 2120: { 0x2736, "sext", "SIX POINTED BLACK STAR" },
! 2121: { 0x3008, "lang", "" },
! 2122: { 0x3009, "rang", "" },
! 2123: { 0xE291, "rpargt", "" },
! 2124: { 0xE2A2, "lnap", "" },
! 2125: { 0xE2AA, "nsmid", "" },
! 2126: { 0xE2B3, "prnE", "" },
! 2127: { 0xE2B5, "scnE", "" },
! 2128: { 0xE2B8, "vsubnE", "" },
! 2129: { 0xE301, "smid", "" },
! 2130: { 0xE411, "gnap", "" },
! 2131: { 0xFB00, "fflig", "" },
! 2132: { 0xFB01, "filig", "" },
! 2133: { 0xFB02, "fllig", "" },
! 2134: { 0xFB03, "ffilig", "" },
! 2135: { 0xFB04, "ffllig", "" },
! 2136: { 0xFE68, "sbsol", "SMALL REVERSE SOLIDUS" },
! 2137: };
! 2138:
! 2139: /************************************************************************
! 2140: * *
! 2141: * Commodity functions to handle entities *
! 2142: * *
! 2143: ************************************************************************/
! 2144:
! 2145: /*
! 2146: * Macro used to grow the current buffer.
! 2147: */
! 2148: #define growBuffer(buffer) { \
! 2149: buffer##_size *= 2; \
! 2150: buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
! 2151: if (buffer == NULL) { \
! 2152: perror("realloc failed"); \
! 2153: return(NULL); \
! 2154: } \
! 2155: }
! 2156:
! 2157: /**
! 2158: * sgmlEntityLookup:
! 2159: * @name: the entity name
! 2160: *
! 2161: * Lookup the given entity in EntitiesTable
! 2162: *
! 2163: * TODO: the linear scan is really ugly, an hash table is really needed.
! 2164: *
! 2165: * Returns the associated sgmlEntityDescPtr if found, NULL otherwise.
! 2166: */
! 2167: sgmlEntityDescPtr
! 2168: sgmlEntityLookup(const xmlChar *name) {
! 2169: int i;
! 2170:
! 2171: for (i = 0;i < (sizeof(docbookEntitiesTable)/
! 2172: sizeof(docbookEntitiesTable[0]));i++) {
! 2173: if (!xmlStrcmp(name, BAD_CAST docbookEntitiesTable[i].name)) {
! 2174: #ifdef DEBUG
! 2175: fprintf(stderr,"Found entity %s\n", name);
! 2176: #endif
! 2177: return(&docbookEntitiesTable[i]);
! 2178: }
! 2179: }
! 2180: return(NULL);
! 2181: }
! 2182:
! 2183: /**
! 2184: * sgmlEntityValueLookup:
! 2185: * @value: the entity's unicode value
! 2186: *
! 2187: * Lookup the given entity in EntitiesTable
! 2188: *
! 2189: * TODO: the linear scan is really ugly, an hash table is really needed.
! 2190: *
! 2191: * Returns the associated sgmlEntityDescPtr if found, NULL otherwise.
! 2192: */
! 2193: sgmlEntityDescPtr
! 2194: sgmlEntityValueLookup(int value) {
! 2195: int i;
! 2196: #ifdef DEBUG
! 2197: int lv = 0;
! 2198: #endif
! 2199:
! 2200: for (i = 0;i < (sizeof(docbookEntitiesTable)/
! 2201: sizeof(docbookEntitiesTable[0]));i++) {
! 2202: if (docbookEntitiesTable[i].value >= value) {
! 2203: if (docbookEntitiesTable[i].value > value)
! 2204: break;
! 2205: #ifdef DEBUG
! 2206: fprintf(stderr,"Found entity %s\n", docbookEntitiesTable[i].name);
! 2207: #endif
! 2208: return(&docbookEntitiesTable[i]);
! 2209: }
! 2210: #ifdef DEBUG
! 2211: if (lv > docbookEntitiesTable[i].value) {
! 2212: fprintf(stderr, "docbookEntitiesTable[] is not sorted (%d > %d)!\n",
! 2213: lv, docbookEntitiesTable[i].value);
! 2214: }
! 2215: lv = docbookEntitiesTable[i].value;
! 2216: #endif
! 2217: }
! 2218: return(NULL);
! 2219: }
! 2220:
! 2221: /**
! 2222: * UTF8ToSgml:
! 2223: * @out: a pointer to an array of bytes to store the result
! 2224: * @outlen: the length of @out
! 2225: * @in: a pointer to an array of UTF-8 chars
! 2226: * @inlen: the length of @in
! 2227: *
! 2228: * Take a block of UTF-8 chars in and try to convert it to an ASCII
! 2229: * plus SGML entities block of chars out.
! 2230: *
! 2231: * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
! 2232: * The value of @inlen after return is the number of octets consumed
! 2233: * as the return value is positive, else unpredictiable.
! 2234: * The value of @outlen after return is the number of octets consumed.
! 2235: */
! 2236: int
! 2237: UTF8ToSgml(unsigned char* out, int *outlen,
! 2238: const unsigned char* in, int *inlen) {
! 2239: const unsigned char* processed = in;
! 2240: const unsigned char* outend;
! 2241: const unsigned char* outstart = out;
! 2242: const unsigned char* instart = in;
! 2243: const unsigned char* inend;
! 2244: unsigned int c, d;
! 2245: int trailing;
! 2246:
! 2247: if (in == NULL) {
! 2248: /*
! 2249: * initialization nothing to do
! 2250: */
! 2251: *outlen = 0;
! 2252: *inlen = 0;
! 2253: return(0);
! 2254: }
! 2255: inend = in + (*inlen);
! 2256: outend = out + (*outlen);
! 2257: while (in < inend) {
! 2258: d = *in++;
! 2259: if (d < 0x80) { c= d; trailing= 0; }
! 2260: else if (d < 0xC0) {
! 2261: /* trailing byte in leading position */
! 2262: *outlen = out - outstart;
! 2263: *inlen = processed - instart;
! 2264: return(-2);
! 2265: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
! 2266: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
! 2267: else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
! 2268: else {
! 2269: /* no chance for this in Ascii */
! 2270: *outlen = out - outstart;
! 2271: *inlen = processed - instart;
! 2272: return(-2);
! 2273: }
! 2274:
! 2275: if (inend - in < trailing) {
! 2276: break;
! 2277: }
! 2278:
! 2279: for ( ; trailing; trailing--) {
! 2280: if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
! 2281: break;
! 2282: c <<= 6;
! 2283: c |= d & 0x3F;
! 2284: }
! 2285:
! 2286: /* assertion: c is a single UTF-4 value */
! 2287: if (c < 0x80) {
! 2288: if (out + 1 >= outend)
! 2289: break;
! 2290: *out++ = c;
! 2291: } else {
! 2292: int len;
! 2293: sgmlEntityDescPtr ent;
! 2294:
! 2295: /*
! 2296: * Try to lookup a predefined SGML entity for it
! 2297: */
! 2298:
! 2299: ent = sgmlEntityValueLookup(c);
! 2300: if (ent == NULL) {
! 2301: /* no chance for this in Ascii */
! 2302: *outlen = out - outstart;
! 2303: *inlen = processed - instart;
! 2304: return(-2);
! 2305: }
! 2306: len = strlen(ent->name);
! 2307: if (out + 2 + len >= outend)
! 2308: break;
! 2309: *out++ = '&';
! 2310: memcpy(out, ent->name, len);
! 2311: out += len;
! 2312: *out++ = ';';
! 2313: }
! 2314: processed = in;
! 2315: }
! 2316: *outlen = out - outstart;
! 2317: *inlen = processed - instart;
! 2318: return(0);
! 2319: }
! 2320:
! 2321: /**
! 2322: * sgmlEncodeEntities:
! 2323: * @out: a pointer to an array of bytes to store the result
! 2324: * @outlen: the length of @out
! 2325: * @in: a pointer to an array of UTF-8 chars
! 2326: * @inlen: the length of @in
! 2327: * @quoteChar: the quote character to escape (' or ") or zero.
! 2328: *
! 2329: * Take a block of UTF-8 chars in and try to convert it to an ASCII
! 2330: * plus SGML entities block of chars out.
! 2331: *
! 2332: * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
! 2333: * The value of @inlen after return is the number of octets consumed
! 2334: * as the return value is positive, else unpredictiable.
! 2335: * The value of @outlen after return is the number of octets consumed.
! 2336: */
! 2337: int
! 2338: sgmlEncodeEntities(unsigned char* out, int *outlen,
! 2339: const unsigned char* in, int *inlen, int quoteChar) {
! 2340: const unsigned char* processed = in;
! 2341: const unsigned char* outend = out + (*outlen);
! 2342: const unsigned char* outstart = out;
! 2343: const unsigned char* instart = in;
! 2344: const unsigned char* inend = in + (*inlen);
! 2345: unsigned int c, d;
! 2346: int trailing;
! 2347:
! 2348: while (in < inend) {
! 2349: d = *in++;
! 2350: if (d < 0x80) { c= d; trailing= 0; }
! 2351: else if (d < 0xC0) {
! 2352: /* trailing byte in leading position */
! 2353: *outlen = out - outstart;
! 2354: *inlen = processed - instart;
! 2355: return(-2);
! 2356: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
! 2357: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
! 2358: else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
! 2359: else {
! 2360: /* no chance for this in Ascii */
! 2361: *outlen = out - outstart;
! 2362: *inlen = processed - instart;
! 2363: return(-2);
! 2364: }
! 2365:
! 2366: if (inend - in < trailing)
! 2367: break;
! 2368:
! 2369: while (trailing--) {
! 2370: if (((d= *in++) & 0xC0) != 0x80) {
! 2371: *outlen = out - outstart;
! 2372: *inlen = processed - instart;
! 2373: return(-2);
! 2374: }
! 2375: c <<= 6;
! 2376: c |= d & 0x3F;
! 2377: }
! 2378:
! 2379: /* assertion: c is a single UTF-4 value */
! 2380: if (c < 0x80 && c != quoteChar && c != '&' && c != '<' && c != '>') {
! 2381: if (out >= outend)
! 2382: break;
! 2383: *out++ = c;
! 2384: } else {
! 2385: sgmlEntityDescPtr ent;
! 2386: const char *cp;
! 2387: char nbuf[16];
! 2388: int len;
! 2389:
! 2390: /*
! 2391: * Try to lookup a predefined SGML entity for it
! 2392: */
! 2393: ent = sgmlEntityValueLookup(c);
! 2394: if (ent == NULL) {
! 2395: sprintf(nbuf, "#%u", c);
! 2396: cp = nbuf;
! 2397: }
! 2398: else
! 2399: cp = ent->name;
! 2400: len = strlen(cp);
! 2401: if (out + 2 + len > outend)
! 2402: break;
! 2403: *out++ = '&';
! 2404: memcpy(out, cp, len);
! 2405: out += len;
! 2406: *out++ = ';';
! 2407: }
! 2408: processed = in;
! 2409: }
! 2410: *outlen = out - outstart;
! 2411: *inlen = processed - instart;
! 2412: return(0);
! 2413: }
! 2414:
! 2415: /**
! 2416: * sgmlDecodeEntities:
! 2417: * @ctxt: the parser context
! 2418: * @len: the len to decode (in bytes !), -1 for no size limit
! 2419: * @end: an end marker xmlChar, 0 if none
! 2420: * @end2: an end marker xmlChar, 0 if none
! 2421: * @end3: an end marker xmlChar, 0 if none
! 2422: *
! 2423: * Subtitute the SGML entities by their value
! 2424: *
! 2425: * DEPRECATED !!!!
! 2426: *
! 2427: * Returns A newly allocated string with the substitution done. The caller
! 2428: * must deallocate it !
! 2429: */
! 2430: xmlChar *
! 2431: sgmlDecodeEntities(sgmlParserCtxtPtr ctxt, int len,
! 2432: xmlChar end, xmlChar end2, xmlChar end3) {
! 2433: xmlChar *name = NULL;
! 2434: xmlChar *buffer = NULL;
! 2435: unsigned int buffer_size = 0;
! 2436: unsigned int nbchars = 0;
! 2437: sgmlEntityDescPtr ent;
! 2438: unsigned int max = (unsigned int) len;
! 2439: int c,l;
! 2440:
! 2441: if (ctxt->depth > 40) {
! 2442: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2443: ctxt->sax->error(ctxt->userData,
! 2444: "Detected entity reference loop\n");
! 2445: ctxt->wellFormed = 0;
! 2446: ctxt->disableSAX = 1;
! 2447: ctxt->errNo = XML_ERR_ENTITY_LOOP;
! 2448: return(NULL);
! 2449: }
! 2450:
! 2451: /*
! 2452: * allocate a translation buffer.
! 2453: */
! 2454: buffer_size = SGML_PARSER_BIG_BUFFER_SIZE;
! 2455: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
! 2456: if (buffer == NULL) {
! 2457: perror("xmlDecodeEntities: malloc failed");
! 2458: return(NULL);
! 2459: }
! 2460:
! 2461: /*
! 2462: * Ok loop until we reach one of the ending char or a size limit.
! 2463: */
! 2464: c = CUR_CHAR(l);
! 2465: while ((nbchars < max) && (c != end) &&
! 2466: (c != end2) && (c != end3)) {
! 2467:
! 2468: if (c == 0) break;
! 2469: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
! 2470: int val = sgmlParseCharRef(ctxt);
! 2471: COPY_BUF(0,buffer,nbchars,val);
! 2472: NEXTL(l);
! 2473: } else if ((c == '&') && (ctxt->token != '&')) {
! 2474: ent = sgmlParseEntityRef(ctxt, &name);
! 2475: if (name != NULL) {
! 2476: if (ent != NULL) {
! 2477: int val = ent->value;
! 2478: COPY_BUF(0,buffer,nbchars,val);
! 2479: NEXTL(l);
! 2480: } else {
! 2481: const xmlChar *cur = name;
! 2482:
! 2483: buffer[nbchars++] = '&';
! 2484: if (nbchars > buffer_size - SGML_PARSER_BUFFER_SIZE) {
! 2485: growBuffer(buffer);
! 2486: }
! 2487: while (*cur != 0) {
! 2488: buffer[nbchars++] = *cur++;
! 2489: }
! 2490: buffer[nbchars++] = ';';
! 2491: }
! 2492: }
! 2493: } else {
! 2494: COPY_BUF(l,buffer,nbchars,c);
! 2495: NEXTL(l);
! 2496: if (nbchars > buffer_size - SGML_PARSER_BUFFER_SIZE) {
! 2497: growBuffer(buffer);
! 2498: }
! 2499: }
! 2500: c = CUR_CHAR(l);
! 2501: }
! 2502: buffer[nbchars++] = 0;
! 2503: return(buffer);
! 2504: }
! 2505:
! 2506: /************************************************************************
! 2507: * *
! 2508: * Commodity functions to handle streams *
! 2509: * *
! 2510: ************************************************************************/
! 2511:
! 2512: /**
! 2513: * sgmlFreeInputStream:
! 2514: * @input: an sgmlParserInputPtr
! 2515: *
! 2516: * Free up an input stream.
! 2517: */
! 2518: void
! 2519: sgmlFreeInputStream(sgmlParserInputPtr input) {
! 2520: if (input == NULL) return;
! 2521:
! 2522: if (input->filename != NULL) xmlFree((char *) input->filename);
! 2523: if (input->directory != NULL) xmlFree((char *) input->directory);
! 2524: if ((input->free != NULL) && (input->base != NULL))
! 2525: input->free((xmlChar *) input->base);
! 2526: if (input->buf != NULL)
! 2527: xmlFreeParserInputBuffer(input->buf);
! 2528: memset(input, -1, sizeof(sgmlParserInput));
! 2529: xmlFree(input);
! 2530: }
! 2531:
! 2532: /**
! 2533: * sgmlNewInputStream:
! 2534: * @ctxt: an SGML parser context
! 2535: *
! 2536: * Create a new input stream structure
! 2537: * Returns the new input stream or NULL
! 2538: */
! 2539: sgmlParserInputPtr
! 2540: sgmlNewInputStream(sgmlParserCtxtPtr ctxt) {
! 2541: sgmlParserInputPtr input;
! 2542:
! 2543: input = (xmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
! 2544: if (input == NULL) {
! 2545: ctxt->errNo = XML_ERR_NO_MEMORY;
! 2546: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2547: ctxt->sax->error(ctxt->userData,
! 2548: "malloc: couldn't allocate a new input stream\n");
! 2549: ctxt->errNo = XML_ERR_NO_MEMORY;
! 2550: return(NULL);
! 2551: }
! 2552: memset(input, 0, sizeof(sgmlParserInput));
! 2553: input->filename = NULL;
! 2554: input->directory = NULL;
! 2555: input->base = NULL;
! 2556: input->cur = NULL;
! 2557: input->buf = NULL;
! 2558: input->line = 1;
! 2559: input->col = 1;
! 2560: input->buf = NULL;
! 2561: input->free = NULL;
! 2562: input->version = NULL;
! 2563: input->consumed = 0;
! 2564: input->length = 0;
! 2565: return(input);
! 2566: }
! 2567:
! 2568:
! 2569: /************************************************************************
! 2570: * *
! 2571: * Commodity functions, cleanup needed ? *
! 2572: * *
! 2573: ************************************************************************/
! 2574:
! 2575: /**
! 2576: * areBlanks:
! 2577: * @ctxt: an SGML parser context
! 2578: * @str: a xmlChar *
! 2579: * @len: the size of @str
! 2580: *
! 2581: * Is this a sequence of blank chars that one can ignore ?
! 2582: *
! 2583: * Returns 1 if ignorable 0 otherwise.
! 2584: */
! 2585:
! 2586: static int areBlanks(sgmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
! 2587: int i;
! 2588: xmlNodePtr lastChild;
! 2589:
! 2590: for (i = 0;i < len;i++)
! 2591: if (!(IS_BLANK(str[i]))) return(0);
! 2592:
! 2593: if (CUR == 0) return(1);
! 2594: if (CUR != '<') return(0);
! 2595: if (ctxt->name == NULL)
! 2596: return(1);
! 2597: #if 0
! 2598: if (!xmlStrcmp(ctxt->name, BAD_CAST"sgml"))
! 2599: return(1);
! 2600: if (!xmlStrcmp(ctxt->name, BAD_CAST"head"))
! 2601: return(1);
! 2602: if (!xmlStrcmp(ctxt->name, BAD_CAST"body"))
! 2603: return(1);
! 2604: #endif
! 2605: if (ctxt->node == NULL) return(0);
! 2606: lastChild = xmlGetLastChild(ctxt->node);
! 2607: if (lastChild == NULL) {
! 2608: if (ctxt->node->content != NULL) return(0);
! 2609: } else if (xmlNodeIsText(lastChild))
! 2610: return(0);
! 2611: return(1);
! 2612: }
! 2613:
! 2614: /**
! 2615: * sgmlHandleEntity:
! 2616: * @ctxt: an SGML parser context
! 2617: * @entity: an XML entity pointer.
! 2618: *
! 2619: * Default handling of an SGML entity, call the parser with the
! 2620: * substitution string
! 2621: */
! 2622:
! 2623: void
! 2624: sgmlHandleEntity(sgmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
! 2625: int len;
! 2626:
! 2627: if (entity->content == NULL) {
! 2628: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 2629: ctxt->sax->error(ctxt->userData, "sgmlHandleEntity %s: content == NULL\n",
! 2630: entity->name);
! 2631: ctxt->wellFormed = 0;
! 2632: return;
! 2633: }
! 2634: len = xmlStrlen(entity->content);
! 2635:
! 2636: /*
! 2637: * Just handle the content as a set of chars.
! 2638: */
! 2639: sgmlCheckParagraph(ctxt);
! 2640: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
! 2641: ctxt->sax->characters(ctxt->userData, entity->content, len);
! 2642:
! 2643: }
! 2644:
! 2645: /**
! 2646: * sgmlNewDocNoDtD:
! 2647: * @URI: URI for the dtd, or NULL
! 2648: * @ExternalID: the external ID of the DTD, or NULL
! 2649: *
! 2650: * Returns a new document, do not intialize the DTD if not provided
! 2651: */
! 2652: sgmlDocPtr
! 2653: sgmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
! 2654: xmlDocPtr cur;
! 2655:
! 2656: /*
! 2657: * Allocate a new document and fill the fields.
! 2658: */
! 2659: cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
! 2660: if (cur == NULL) {
! 2661: fprintf(stderr, "xmlNewDoc : malloc failed\n");
! 2662: return(NULL);
! 2663: }
! 2664: memset(cur, 0, sizeof(xmlDoc));
! 2665:
! 2666: cur->type = XML_SGML_DOCUMENT_NODE;
! 2667: cur->version = NULL;
! 2668: cur->intSubset = NULL;
! 2669: if ((ExternalID != NULL) ||
! 2670: (URI != NULL))
! 2671: xmlCreateIntSubset(cur, BAD_CAST "SGML", ExternalID, URI);
! 2672: cur->doc = cur;
! 2673: cur->name = NULL;
! 2674: cur->children = NULL;
! 2675: cur->extSubset = NULL;
! 2676: cur->oldNs = NULL;
! 2677: cur->encoding = NULL;
! 2678: cur->standalone = 1;
! 2679: cur->compression = 0;
! 2680: cur->ids = NULL;
! 2681: cur->refs = NULL;
! 2682: #ifndef XML_WITHOUT_CORBA
! 2683: cur->_private = NULL;
! 2684: #endif
! 2685: return(cur);
! 2686: }
! 2687:
! 2688: /**
! 2689: * sgmlNewDoc:
! 2690: * @URI: URI for the dtd, or NULL
! 2691: * @ExternalID: the external ID of the DTD, or NULL
! 2692: *
! 2693: * Returns a new document
! 2694: */
! 2695: sgmlDocPtr
! 2696: sgmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
! 2697: if ((URI == NULL) && (ExternalID == NULL))
! 2698: return(sgmlNewDocNoDtD(
! 2699: BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
! 2700: BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd"));
! 2701:
! 2702: return(sgmlNewDocNoDtD(URI, ExternalID));
! 2703: }
! 2704:
! 2705:
! 2706: /************************************************************************
! 2707: * *
! 2708: * The parser itself *
! 2709: * Relates to http://www.w3.org/TR/docbook *
! 2710: * *
! 2711: ************************************************************************/
! 2712:
! 2713: /************************************************************************
! 2714: * *
! 2715: * The parser itself *
! 2716: * *
! 2717: ************************************************************************/
! 2718:
! 2719: /**
! 2720: * sgmlParseSGMLName:
! 2721: * @ctxt: an SGML parser context
! 2722: *
! 2723: * parse an SGML tag or attribute name, note that we convert it to lowercase
! 2724: * since SGML names are not case-sensitive.
! 2725: *
! 2726: * Returns the Tag Name parsed or NULL
! 2727: */
! 2728:
! 2729: xmlChar *
! 2730: sgmlParseSGMLName(sgmlParserCtxtPtr ctxt) {
! 2731: xmlChar *ret = NULL;
! 2732: int i = 0;
! 2733: xmlChar loc[SGML_PARSER_BUFFER_SIZE];
! 2734:
! 2735: if (!IS_LETTER(CUR) && (CUR != '_') &&
! 2736: (CUR != ':')) return(NULL);
! 2737:
! 2738: while ((i < SGML_PARSER_BUFFER_SIZE) &&
! 2739: ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
! 2740: (CUR == ':') || (CUR == '_'))) {
! 2741: if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
! 2742: else loc[i] = CUR;
! 2743: i++;
! 2744:
! 2745: NEXT;
! 2746: }
! 2747:
! 2748: ret = xmlStrndup(loc, i);
! 2749:
! 2750: return(ret);
! 2751: }
! 2752:
! 2753: /**
! 2754: * sgmlParseName:
! 2755: * @ctxt: an SGML parser context
! 2756: *
! 2757: * parse an SGML name, this routine is case sensistive.
! 2758: *
! 2759: * Returns the Name parsed or NULL
! 2760: */
! 2761:
! 2762: xmlChar *
! 2763: sgmlParseName(sgmlParserCtxtPtr ctxt) {
! 2764: xmlChar buf[SGML_MAX_NAMELEN];
! 2765: int len = 0;
! 2766:
! 2767: GROW;
! 2768: if (!IS_LETTER(CUR) && (CUR != '_')) {
! 2769: return(NULL);
! 2770: }
! 2771:
! 2772: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
! 2773: (CUR == '.') || (CUR == '-') ||
! 2774: (CUR == '_') || (CUR == ':') ||
! 2775: (IS_COMBINING(CUR)) ||
! 2776: (IS_EXTENDER(CUR))) {
! 2777: buf[len++] = CUR;
! 2778: NEXT;
! 2779: if (len >= SGML_MAX_NAMELEN) {
! 2780: fprintf(stderr,
! 2781: "sgmlParseName: reached SGML_MAX_NAMELEN limit\n");
! 2782: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
! 2783: (CUR == '.') || (CUR == '-') ||
! 2784: (CUR == '_') || (CUR == ':') ||
! 2785: (IS_COMBINING(CUR)) ||
! 2786: (IS_EXTENDER(CUR)))
! 2787: NEXT;
! 2788: break;
! 2789: }
! 2790: }
! 2791: return(xmlStrndup(buf, len));
! 2792: }
! 2793:
! 2794: /**
! 2795: * sgmlParseSGMLAttribute:
! 2796: * @ctxt: an SGML parser context
! 2797: * @stop: a char stop value
! 2798: *
! 2799: * parse an SGML attribute value till the stop (quote), if
! 2800: * stop is 0 then it stops at the first space
! 2801: *
! 2802: * Returns the attribute parsed or NULL
! 2803: */
! 2804:
! 2805: xmlChar *
! 2806: sgmlParseSGMLAttribute(sgmlParserCtxtPtr ctxt, const xmlChar stop) {
! 2807: #if 0
! 2808: xmlChar buf[SGML_MAX_NAMELEN];
! 2809: int len = 0;
! 2810:
! 2811: GROW;
! 2812: while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
! 2813: if ((stop == 0) && (IS_BLANK(CUR))) break;
! 2814: buf[len++] = CUR;
! 2815: NEXT;
! 2816: if (len >= SGML_MAX_NAMELEN) {
! 2817: fprintf(stderr,
! 2818: "sgmlParseSGMLAttribute: reached SGML_MAX_NAMELEN limit\n");
! 2819: while ((!IS_BLANK(CUR)) && (CUR != '<') &&
! 2820: (CUR != '>') &&
! 2821: (CUR != '\'') && (CUR != '"'))
! 2822: NEXT;
! 2823: break;
! 2824: }
! 2825: }
! 2826: return(xmlStrndup(buf, len));
! 2827: #else
! 2828: xmlChar *buffer = NULL;
! 2829: int buffer_size = 0;
! 2830: xmlChar *out = NULL;
! 2831: xmlChar *name = NULL;
! 2832:
! 2833: xmlChar *cur = NULL;
! 2834: sgmlEntityDescPtr ent;
! 2835:
! 2836: /*
! 2837: * allocate a translation buffer.
! 2838: */
! 2839: buffer_size = SGML_PARSER_BIG_BUFFER_SIZE;
! 2840: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
! 2841: if (buffer == NULL) {
! 2842: perror("sgmlParseSGMLAttribute: malloc failed");
! 2843: return(NULL);
! 2844: }
! 2845: out = buffer;
! 2846:
! 2847: /*
! 2848: * Ok loop until we reach one of the ending chars
! 2849: */
! 2850: while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
! 2851: if ((stop == 0) && (IS_BLANK(CUR))) break;
! 2852: if (CUR == '&') {
! 2853: if (NXT(1) == '#') {
! 2854: unsigned int c;
! 2855: int bits;
! 2856:
! 2857: c = sgmlParseCharRef(ctxt);
! 2858: if (c < 0x80)
! 2859: { *out++ = c; bits= -6; }
! 2860: else if (c < 0x800)
! 2861: { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
! 2862: else if (c < 0x10000)
! 2863: { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
! 2864: else
! 2865: { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
! 2866:
! 2867: for ( ; bits >= 0; bits-= 6) {
! 2868: *out++ = ((c >> bits) & 0x3F) | 0x80;
! 2869: }
! 2870: } else {
! 2871: ent = sgmlParseEntityRef(ctxt, &name);
! 2872: if (name == NULL) {
! 2873: *out++ = '&';
! 2874: if (out - buffer > buffer_size - 100) {
! 2875: int index = out - buffer;
! 2876:
! 2877: growBuffer(buffer);
! 2878: out = &buffer[index];
! 2879: }
! 2880: } else if (ent == NULL) {
! 2881: *out++ = '&';
! 2882: cur = name;
! 2883: while (*cur != 0) {
! 2884: if (out - buffer > buffer_size - 100) {
! 2885: int index = out - buffer;
! 2886:
! 2887: growBuffer(buffer);
! 2888: out = &buffer[index];
! 2889: }
! 2890: *out++ = *cur++;
! 2891: }
! 2892: xmlFree(name);
! 2893: } else {
! 2894: unsigned int c;
! 2895: int bits;
! 2896:
! 2897: if (out - buffer > buffer_size - 100) {
! 2898: int index = out - buffer;
! 2899:
! 2900: growBuffer(buffer);
! 2901: out = &buffer[index];
! 2902: }
! 2903: c = (xmlChar)ent->value;
! 2904: if (c < 0x80)
! 2905: { *out++ = c; bits= -6; }
! 2906: else if (c < 0x800)
! 2907: { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
! 2908: else if (c < 0x10000)
! 2909: { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
! 2910: else
! 2911: { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
! 2912:
! 2913: for ( ; bits >= 0; bits-= 6) {
! 2914: *out++ = ((c >> bits) & 0x3F) | 0x80;
! 2915: }
! 2916: xmlFree(name);
! 2917: }
! 2918: }
! 2919: } else {
! 2920: unsigned int c;
! 2921: int bits;
! 2922:
! 2923: if (out - buffer > buffer_size - 100) {
! 2924: int index = out - buffer;
! 2925:
! 2926: growBuffer(buffer);
! 2927: out = &buffer[index];
! 2928: }
! 2929: c = CUR;
! 2930: if (c < 0x80)
! 2931: { *out++ = c; bits= -6; }
! 2932: else if (c < 0x800)
! 2933: { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
! 2934: else if (c < 0x10000)
! 2935: { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
! 2936: else
! 2937: { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
! 2938:
! 2939: for ( ; bits >= 0; bits-= 6) {
! 2940: *out++ = ((c >> bits) & 0x3F) | 0x80;
! 2941: }
! 2942: NEXT;
! 2943: }
! 2944: }
! 2945: *out++ = 0;
! 2946: return(buffer);
! 2947: #endif
! 2948: }
! 2949:
! 2950: /**
! 2951: * sgmlParseNmtoken:
! 2952: * @ctxt: an SGML parser context
! 2953: *
! 2954: * parse an SGML Nmtoken.
! 2955: *
! 2956: * Returns the Nmtoken parsed or NULL
! 2957: */
! 2958:
! 2959: xmlChar *
! 2960: sgmlParseNmtoken(sgmlParserCtxtPtr ctxt) {
! 2961: xmlChar buf[SGML_MAX_NAMELEN];
! 2962: int len = 0;
! 2963:
! 2964: GROW;
! 2965: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
! 2966: (CUR == '.') || (CUR == '-') ||
! 2967: (CUR == '_') || (CUR == ':') ||
! 2968: (IS_COMBINING(CUR)) ||
! 2969: (IS_EXTENDER(CUR))) {
! 2970: buf[len++] = CUR;
! 2971: NEXT;
! 2972: if (len >= SGML_MAX_NAMELEN) {
! 2973: fprintf(stderr,
! 2974: "sgmlParseNmtoken: reached SGML_MAX_NAMELEN limit\n");
! 2975: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
! 2976: (CUR == '.') || (CUR == '-') ||
! 2977: (CUR == '_') || (CUR == ':') ||
! 2978: (IS_COMBINING(CUR)) ||
! 2979: (IS_EXTENDER(CUR)))
! 2980: NEXT;
! 2981: break;
! 2982: }
! 2983: }
! 2984: return(xmlStrndup(buf, len));
! 2985: }
! 2986:
! 2987: /**
! 2988: * sgmlParseEntityRef:
! 2989: * @ctxt: an SGML parser context
! 2990: * @str: location to store the entity name
! 2991: *
! 2992: * parse an SGML ENTITY references
! 2993: *
! 2994: * [68] EntityRef ::= '&' Name ';'
! 2995: *
! 2996: * Returns the associated sgmlEntityDescPtr if found, or NULL otherwise,
! 2997: * if non-NULL *str will have to be freed by the caller.
! 2998: */
! 2999: sgmlEntityDescPtr
! 3000: sgmlParseEntityRef(sgmlParserCtxtPtr ctxt, xmlChar **str) {
! 3001: xmlChar *name;
! 3002: sgmlEntityDescPtr ent = NULL;
! 3003: *str = NULL;
! 3004:
! 3005: if (CUR == '&') {
! 3006: NEXT;
! 3007: name = sgmlParseName(ctxt);
! 3008: if (name == NULL) {
! 3009: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3010: ctxt->sax->error(ctxt->userData, "sgmlParseEntityRef: no name\n");
! 3011: ctxt->wellFormed = 0;
! 3012: } else {
! 3013: GROW;
! 3014: if (CUR == ';') {
! 3015: *str = name;
! 3016:
! 3017: /*
! 3018: * Lookup the entity in the table.
! 3019: */
! 3020: ent = sgmlEntityLookup(name);
! 3021: if (ent != NULL) /* OK that's ugly !!! */
! 3022: NEXT;
! 3023: } else {
! 3024: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3025: ctxt->sax->error(ctxt->userData,
! 3026: "sgmlParseEntityRef: expecting ';'\n");
! 3027: *str = name;
! 3028: }
! 3029: }
! 3030: }
! 3031: return(ent);
! 3032: }
! 3033:
! 3034: /**
! 3035: * sgmlParseAttValue:
! 3036: * @ctxt: an SGML parser context
! 3037: *
! 3038: * parse a value for an attribute
! 3039: * Note: the parser won't do substitution of entities here, this
! 3040: * will be handled later in xmlStringGetNodeList, unless it was
! 3041: * asked for ctxt->replaceEntities != 0
! 3042: *
! 3043: * Returns the AttValue parsed or NULL.
! 3044: */
! 3045:
! 3046: xmlChar *
! 3047: sgmlParseAttValue(sgmlParserCtxtPtr ctxt) {
! 3048: xmlChar *ret = NULL;
! 3049:
! 3050: if (CUR == '"') {
! 3051: NEXT;
! 3052: ret = sgmlParseSGMLAttribute(ctxt, '"');
! 3053: if (CUR != '"') {
! 3054: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3055: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
! 3056: ctxt->wellFormed = 0;
! 3057: } else
! 3058: NEXT;
! 3059: } else if (CUR == '\'') {
! 3060: NEXT;
! 3061: ret = sgmlParseSGMLAttribute(ctxt, '\'');
! 3062: if (CUR != '\'') {
! 3063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3064: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
! 3065: ctxt->wellFormed = 0;
! 3066: } else
! 3067: NEXT;
! 3068: } else {
! 3069: /*
! 3070: * That's an SGMLism, the attribute value may not be quoted
! 3071: */
! 3072: ret = sgmlParseSGMLAttribute(ctxt, 0);
! 3073: if (ret == NULL) {
! 3074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3075: ctxt->sax->error(ctxt->userData, "AttValue: no value found\n");
! 3076: ctxt->wellFormed = 0;
! 3077: }
! 3078: }
! 3079: return(ret);
! 3080: }
! 3081:
! 3082: /**
! 3083: * sgmlParseSystemLiteral:
! 3084: * @ctxt: an SGML parser context
! 3085: *
! 3086: * parse an SGML Literal
! 3087: *
! 3088: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
! 3089: *
! 3090: * Returns the SystemLiteral parsed or NULL
! 3091: */
! 3092:
! 3093: xmlChar *
! 3094: sgmlParseSystemLiteral(sgmlParserCtxtPtr ctxt) {
! 3095: const xmlChar *q;
! 3096: xmlChar *ret = NULL;
! 3097:
! 3098: if (CUR == '"') {
! 3099: NEXT;
! 3100: q = CUR_PTR;
! 3101: while ((IS_CHAR(CUR)) && (CUR != '"'))
! 3102: NEXT;
! 3103: if (!IS_CHAR(CUR)) {
! 3104: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3105: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
! 3106: ctxt->wellFormed = 0;
! 3107: } else {
! 3108: ret = xmlStrndup(q, CUR_PTR - q);
! 3109: NEXT;
! 3110: }
! 3111: } else if (CUR == '\'') {
! 3112: NEXT;
! 3113: q = CUR_PTR;
! 3114: while ((IS_CHAR(CUR)) && (CUR != '\''))
! 3115: NEXT;
! 3116: if (!IS_CHAR(CUR)) {
! 3117: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3118: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
! 3119: ctxt->wellFormed = 0;
! 3120: } else {
! 3121: ret = xmlStrndup(q, CUR_PTR - q);
! 3122: NEXT;
! 3123: }
! 3124: } else {
! 3125: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3126: ctxt->sax->error(ctxt->userData,
! 3127: "SystemLiteral \" or ' expected\n");
! 3128: ctxt->wellFormed = 0;
! 3129: }
! 3130:
! 3131: return(ret);
! 3132: }
! 3133:
! 3134: /**
! 3135: * sgmlParsePubidLiteral:
! 3136: * @ctxt: an SGML parser context
! 3137: *
! 3138: * parse an SGML public literal
! 3139: *
! 3140: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
! 3141: *
! 3142: * Returns the PubidLiteral parsed or NULL.
! 3143: */
! 3144:
! 3145: xmlChar *
! 3146: sgmlParsePubidLiteral(sgmlParserCtxtPtr ctxt) {
! 3147: const xmlChar *q;
! 3148: xmlChar *ret = NULL;
! 3149: /*
! 3150: * Name ::= (Letter | '_') (NameChar)*
! 3151: */
! 3152: if (CUR == '"') {
! 3153: NEXT;
! 3154: q = CUR_PTR;
! 3155: while (IS_PUBIDCHAR(CUR)) NEXT;
! 3156: if (CUR != '"') {
! 3157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3158: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
! 3159: ctxt->wellFormed = 0;
! 3160: } else {
! 3161: ret = xmlStrndup(q, CUR_PTR - q);
! 3162: NEXT;
! 3163: }
! 3164: } else if (CUR == '\'') {
! 3165: NEXT;
! 3166: q = CUR_PTR;
! 3167: while ((IS_LETTER(CUR)) && (CUR != '\''))
! 3168: NEXT;
! 3169: if (!IS_LETTER(CUR)) {
! 3170: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3171: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
! 3172: ctxt->wellFormed = 0;
! 3173: } else {
! 3174: ret = xmlStrndup(q, CUR_PTR - q);
! 3175: NEXT;
! 3176: }
! 3177: } else {
! 3178: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3179: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
! 3180: ctxt->wellFormed = 0;
! 3181: }
! 3182:
! 3183: return(ret);
! 3184: }
! 3185:
! 3186: /**
! 3187: * sgmlParseCharData:
! 3188: * @ctxt: an SGML parser context
! 3189: * @cdata: int indicating whether we are within a CDATA section
! 3190: *
! 3191: * parse a CharData section.
! 3192: * if we are within a CDATA section ']]>' marks an end of section.
! 3193: *
! 3194: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
! 3195: */
! 3196:
! 3197: void
! 3198: sgmlParseCharData(sgmlParserCtxtPtr ctxt, int cdata) {
! 3199: xmlChar buf[SGML_PARSER_BIG_BUFFER_SIZE + 5];
! 3200: int nbchar = 0;
! 3201: int cur, l;
! 3202:
! 3203: SHRINK;
! 3204: cur = CUR_CHAR(l);
! 3205: while (((cur != '<') || (ctxt->token == '<')) &&
! 3206: ((cur != '&') || (ctxt->token == '&')) &&
! 3207: (IS_CHAR(cur))) {
! 3208: COPY_BUF(l,buf,nbchar,cur);
! 3209: if (nbchar >= SGML_PARSER_BIG_BUFFER_SIZE) {
! 3210: /*
! 3211: * Ok the segment is to be consumed as chars.
! 3212: */
! 3213: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
! 3214: if (areBlanks(ctxt, buf, nbchar)) {
! 3215: if (ctxt->sax->ignorableWhitespace != NULL)
! 3216: ctxt->sax->ignorableWhitespace(ctxt->userData,
! 3217: buf, nbchar);
! 3218: } else {
! 3219: sgmlCheckParagraph(ctxt);
! 3220: if (ctxt->sax->characters != NULL)
! 3221: ctxt->sax->characters(ctxt->userData, buf, nbchar);
! 3222: }
! 3223: }
! 3224: nbchar = 0;
! 3225: }
! 3226: NEXTL(l);
! 3227: cur = CUR_CHAR(l);
! 3228: }
! 3229: if (nbchar != 0) {
! 3230: /*
! 3231: * Ok the segment is to be consumed as chars.
! 3232: */
! 3233: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
! 3234: if (areBlanks(ctxt, buf, nbchar)) {
! 3235: if (ctxt->sax->ignorableWhitespace != NULL)
! 3236: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
! 3237: } else {
! 3238: sgmlCheckParagraph(ctxt);
! 3239: if (ctxt->sax->characters != NULL)
! 3240: ctxt->sax->characters(ctxt->userData, buf, nbchar);
! 3241: }
! 3242: }
! 3243: }
! 3244: }
! 3245:
! 3246: /**
! 3247: * sgmlParseExternalID:
! 3248: * @ctxt: an SGML parser context
! 3249: * @publicID: a xmlChar** receiving PubidLiteral
! 3250: * @strict: indicate whether we should restrict parsing to only
! 3251: * production [75], see NOTE below
! 3252: *
! 3253: * Parse an External ID or a Public ID
! 3254: *
! 3255: * NOTE: Productions [75] and [83] interract badly since [75] can generate
! 3256: * 'PUBLIC' S PubidLiteral S SystemLiteral
! 3257: *
! 3258: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
! 3259: * | 'PUBLIC' S PubidLiteral S SystemLiteral
! 3260: *
! 3261: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
! 3262: *
! 3263: * Returns the function returns SystemLiteral and in the second
! 3264: * case publicID receives PubidLiteral, is strict is off
! 3265: * it is possible to return NULL and have publicID set.
! 3266: */
! 3267:
! 3268: xmlChar *
! 3269: sgmlParseExternalID(sgmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
! 3270: xmlChar *URI = NULL;
! 3271:
! 3272: if ((UPPER == 'S') && (UPP(1) == 'Y') &&
! 3273: (UPP(2) == 'S') && (UPP(3) == 'T') &&
! 3274: (UPP(4) == 'E') && (UPP(5) == 'M')) {
! 3275: SKIP(6);
! 3276: if (!IS_BLANK(CUR)) {
! 3277: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3278: ctxt->sax->error(ctxt->userData,
! 3279: "Space required after 'SYSTEM'\n");
! 3280: ctxt->wellFormed = 0;
! 3281: }
! 3282: SKIP_BLANKS;
! 3283: URI = sgmlParseSystemLiteral(ctxt);
! 3284: if (URI == NULL) {
! 3285: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3286: ctxt->sax->error(ctxt->userData,
! 3287: "sgmlParseExternalID: SYSTEM, no URI\n");
! 3288: ctxt->wellFormed = 0;
! 3289: }
! 3290: } else if ((UPPER == 'P') && (UPP(1) == 'U') &&
! 3291: (UPP(2) == 'B') && (UPP(3) == 'L') &&
! 3292: (UPP(4) == 'I') && (UPP(5) == 'C')) {
! 3293: SKIP(6);
! 3294: if (!IS_BLANK(CUR)) {
! 3295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3296: ctxt->sax->error(ctxt->userData,
! 3297: "Space required after 'PUBLIC'\n");
! 3298: ctxt->wellFormed = 0;
! 3299: }
! 3300: SKIP_BLANKS;
! 3301: *publicID = sgmlParsePubidLiteral(ctxt);
! 3302: if (*publicID == NULL) {
! 3303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3304: ctxt->sax->error(ctxt->userData,
! 3305: "sgmlParseExternalID: PUBLIC, no Public Identifier\n");
! 3306: ctxt->wellFormed = 0;
! 3307: }
! 3308: SKIP_BLANKS;
! 3309: if ((CUR == '"') || (CUR == '\'')) {
! 3310: URI = sgmlParseSystemLiteral(ctxt);
! 3311: }
! 3312: }
! 3313: return(URI);
! 3314: }
! 3315:
! 3316: /**
! 3317: * sgmlParseComment:
! 3318: * @ctxt: an SGML parser context
! 3319: *
! 3320: * Parse an XML (SGML) comment <!-- .... -->
! 3321: *
! 3322: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
! 3323: */
! 3324: void
! 3325: sgmlParseComment(sgmlParserCtxtPtr ctxt) {
! 3326: xmlChar *buf = NULL;
! 3327: int len;
! 3328: int size = SGML_PARSER_BUFFER_SIZE;
! 3329: int q, ql;
! 3330: int r, rl;
! 3331: int cur, l;
! 3332: xmlParserInputState state;
! 3333:
! 3334: /*
! 3335: * Check that there is a comment right here.
! 3336: */
! 3337: if ((RAW != '<') || (NXT(1) != '!') ||
! 3338: (NXT(2) != '-') || (NXT(3) != '-')) return;
! 3339:
! 3340: state = ctxt->instate;
! 3341: ctxt->instate = XML_PARSER_COMMENT;
! 3342: SHRINK;
! 3343: SKIP(4);
! 3344: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
! 3345: if (buf == NULL) {
! 3346: fprintf(stderr, "malloc of %d byte failed\n", size);
! 3347: ctxt->instate = state;
! 3348: return;
! 3349: }
! 3350: q = CUR_CHAR(ql);
! 3351: NEXTL(ql);
! 3352: r = CUR_CHAR(rl);
! 3353: NEXTL(rl);
! 3354: cur = CUR_CHAR(l);
! 3355: len = 0;
! 3356: while (IS_CHAR(cur) &&
! 3357: ((cur != '>') ||
! 3358: (r != '-') || (q != '-'))) {
! 3359: if (len + 5 >= size) {
! 3360: size *= 2;
! 3361: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
! 3362: if (buf == NULL) {
! 3363: fprintf(stderr, "realloc of %d byte failed\n", size);
! 3364: ctxt->instate = state;
! 3365: return;
! 3366: }
! 3367: }
! 3368: COPY_BUF(ql,buf,len,q);
! 3369: q = r;
! 3370: ql = rl;
! 3371: r = cur;
! 3372: rl = l;
! 3373: NEXTL(l);
! 3374: cur = CUR_CHAR(l);
! 3375: if (cur == 0) {
! 3376: SHRINK;
! 3377: GROW;
! 3378: cur = CUR_CHAR(l);
! 3379: }
! 3380: }
! 3381: buf[len] = 0;
! 3382: if (!IS_CHAR(cur)) {
! 3383: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3384: ctxt->sax->error(ctxt->userData,
! 3385: "Comment not terminated \n<!--%.50s\n", buf);
! 3386: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
! 3387: ctxt->wellFormed = 0;
! 3388: xmlFree(buf);
! 3389: } else {
! 3390: NEXT;
! 3391: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
! 3392: (!ctxt->disableSAX))
! 3393: ctxt->sax->comment(ctxt->userData, buf);
! 3394: xmlFree(buf);
! 3395: }
! 3396: ctxt->instate = state;
! 3397: }
! 3398:
! 3399: /**
! 3400: * sgmlParseCharRef:
! 3401: * @ctxt: an SGML parser context
! 3402: *
! 3403: * parse Reference declarations
! 3404: *
! 3405: * [66] CharRef ::= '&#' [0-9]+ ';' |
! 3406: * '&#x' [0-9a-fA-F]+ ';'
! 3407: *
! 3408: * Returns the value parsed (as an int)
! 3409: */
! 3410: int
! 3411: sgmlParseCharRef(sgmlParserCtxtPtr ctxt) {
! 3412: int val = 0;
! 3413:
! 3414: if ((CUR == '&') && (NXT(1) == '#') &&
! 3415: (NXT(2) == 'x')) {
! 3416: SKIP(3);
! 3417: while (CUR != ';') {
! 3418: if ((CUR >= '0') && (CUR <= '9'))
! 3419: val = val * 16 + (CUR - '0');
! 3420: else if ((CUR >= 'a') && (CUR <= 'f'))
! 3421: val = val * 16 + (CUR - 'a') + 10;
! 3422: else if ((CUR >= 'A') && (CUR <= 'F'))
! 3423: val = val * 16 + (CUR - 'A') + 10;
! 3424: else {
! 3425: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3426: ctxt->sax->error(ctxt->userData,
! 3427: "sgmlParseCharRef: invalid hexadecimal value\n");
! 3428: ctxt->wellFormed = 0;
! 3429: val = 0;
! 3430: break;
! 3431: }
! 3432: NEXT;
! 3433: }
! 3434: if (CUR == ';')
! 3435: NEXT;
! 3436: } else if ((CUR == '&') && (NXT(1) == '#')) {
! 3437: SKIP(2);
! 3438: while (CUR != ';') {
! 3439: if ((CUR >= '0') && (CUR <= '9'))
! 3440: val = val * 10 + (CUR - '0');
! 3441: else {
! 3442: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3443: ctxt->sax->error(ctxt->userData,
! 3444: "sgmlParseCharRef: invalid decimal value\n");
! 3445: ctxt->wellFormed = 0;
! 3446: val = 0;
! 3447: break;
! 3448: }
! 3449: NEXT;
! 3450: }
! 3451: if (CUR == ';')
! 3452: NEXT;
! 3453: } else {
! 3454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3455: ctxt->sax->error(ctxt->userData, "sgmlParseCharRef: invalid value\n");
! 3456: ctxt->wellFormed = 0;
! 3457: }
! 3458: /*
! 3459: * Check the value IS_CHAR ...
! 3460: */
! 3461: if (IS_CHAR(val)) {
! 3462: return(val);
! 3463: } else {
! 3464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3465: ctxt->sax->error(ctxt->userData, "sgmlParseCharRef: invalid xmlChar value %d\n",
! 3466: val);
! 3467: ctxt->wellFormed = 0;
! 3468: }
! 3469: return(0);
! 3470: }
! 3471:
! 3472:
! 3473: /**
! 3474: * sgmlParseDocTypeDecl :
! 3475: * @ctxt: an SGML parser context
! 3476: *
! 3477: * parse a DOCTYPE declaration
! 3478: *
! 3479: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
! 3480: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
! 3481: */
! 3482:
! 3483: void
! 3484: sgmlParseDocTypeDecl(sgmlParserCtxtPtr ctxt) {
! 3485: xmlChar *name;
! 3486: xmlChar *ExternalID = NULL;
! 3487: xmlChar *URI = NULL;
! 3488:
! 3489: /*
! 3490: * We know that '<!DOCTYPE' has been detected.
! 3491: */
! 3492: SKIP(9);
! 3493:
! 3494: SKIP_BLANKS;
! 3495:
! 3496: /*
! 3497: * Parse the DOCTYPE name.
! 3498: */
! 3499: name = sgmlParseName(ctxt);
! 3500: if (name == NULL) {
! 3501: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3502: ctxt->sax->error(ctxt->userData, "sgmlParseDocTypeDecl : no DOCTYPE name !\n");
! 3503: ctxt->wellFormed = 0;
! 3504: }
! 3505: /*
! 3506: * Check that upper(name) == "SGML" !!!!!!!!!!!!!
! 3507: */
! 3508:
! 3509: SKIP_BLANKS;
! 3510:
! 3511: /*
! 3512: * Check for SystemID and ExternalID
! 3513: */
! 3514: URI = sgmlParseExternalID(ctxt, &ExternalID, 0);
! 3515: SKIP_BLANKS;
! 3516:
! 3517: /*
! 3518: * We should be at the end of the DOCTYPE declaration.
! 3519: */
! 3520: if (CUR != '>') {
! 3521: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3522: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
! 3523: ctxt->wellFormed = 0;
! 3524: /* We shouldn't try to resynchronize ... */
! 3525: }
! 3526: NEXT;
! 3527:
! 3528: /*
! 3529: * Create or update the document accordingly to the DOCTYPE
! 3530: */
! 3531: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
! 3532: (!ctxt->disableSAX))
! 3533: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
! 3534:
! 3535: /*
! 3536: * Cleanup, since we don't use all those identifiers
! 3537: */
! 3538: if (URI != NULL) xmlFree(URI);
! 3539: if (ExternalID != NULL) xmlFree(ExternalID);
! 3540: if (name != NULL) xmlFree(name);
! 3541: }
! 3542:
! 3543: /**
! 3544: * sgmlParseAttribute:
! 3545: * @ctxt: an SGML parser context
! 3546: * @value: a xmlChar ** used to store the value of the attribute
! 3547: *
! 3548: * parse an attribute
! 3549: *
! 3550: * [41] Attribute ::= Name Eq AttValue
! 3551: *
! 3552: * [25] Eq ::= S? '=' S?
! 3553: *
! 3554: * With namespace:
! 3555: *
! 3556: * [NS 11] Attribute ::= QName Eq AttValue
! 3557: *
! 3558: * Also the case QName == xmlns:??? is handled independently as a namespace
! 3559: * definition.
! 3560: *
! 3561: * Returns the attribute name, and the value in *value.
! 3562: */
! 3563:
! 3564: xmlChar *
! 3565: sgmlParseAttribute(sgmlParserCtxtPtr ctxt, xmlChar **value) {
! 3566: xmlChar *name, *val = NULL;
! 3567:
! 3568: *value = NULL;
! 3569: name = sgmlParseName(ctxt);
! 3570: if (name == NULL) {
! 3571: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3572: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
! 3573: ctxt->wellFormed = 0;
! 3574: return(NULL);
! 3575: }
! 3576:
! 3577: /*
! 3578: * read the value
! 3579: */
! 3580: SKIP_BLANKS;
! 3581: if (CUR == '=') {
! 3582: NEXT;
! 3583: SKIP_BLANKS;
! 3584: val = sgmlParseAttValue(ctxt);
! 3585: /******
! 3586: } else {
! 3587: * TODO : some attribute must have values, some may not
! 3588: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3589: ctxt->sax->warning(ctxt->userData,
! 3590: "No value for attribute %s\n", name); */
! 3591: }
! 3592:
! 3593: *value = val;
! 3594: return(name);
! 3595: }
! 3596:
! 3597: /**
! 3598: * sgmlCheckEncoding:
! 3599: * @ctxt: an SGML parser context
! 3600: * @attvalue: the attribute value
! 3601: *
! 3602: * Checks an http-equiv attribute from a Meta tag to detect
! 3603: * the encoding
! 3604: * If a new encoding is detected the parser is switched to decode
! 3605: * it and pass UTF8
! 3606: */
! 3607: void
! 3608: sgmlCheckEncoding(sgmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
! 3609: const xmlChar *encoding;
! 3610:
! 3611: if ((ctxt == NULL) || (attvalue == NULL))
! 3612: return;
! 3613:
! 3614: encoding = xmlStrstr(attvalue, BAD_CAST"charset=");
! 3615: if (encoding == NULL)
! 3616: encoding = xmlStrstr(attvalue, BAD_CAST"Charset=");
! 3617: if (encoding == NULL)
! 3618: encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET=");
! 3619: if (encoding != NULL) {
! 3620: encoding += 8;
! 3621: } else {
! 3622: encoding = xmlStrstr(attvalue, BAD_CAST"charset =");
! 3623: if (encoding == NULL)
! 3624: encoding = xmlStrstr(attvalue, BAD_CAST"Charset =");
! 3625: if (encoding == NULL)
! 3626: encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET =");
! 3627: if (encoding != NULL)
! 3628: encoding += 9;
! 3629: }
! 3630: if (encoding != NULL) {
! 3631: xmlCharEncoding enc;
! 3632: xmlCharEncodingHandlerPtr handler;
! 3633:
! 3634: while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
! 3635:
! 3636: if (ctxt->input->encoding != NULL)
! 3637: xmlFree((xmlChar *) ctxt->input->encoding);
! 3638: ctxt->input->encoding = xmlStrdup(encoding);
! 3639:
! 3640: enc = xmlParseCharEncoding((const char *) encoding);
! 3641: /*
! 3642: * registered set of known encodings
! 3643: */
! 3644: if (enc != XML_CHAR_ENCODING_ERROR) {
! 3645: xmlSwitchEncoding(ctxt, enc);
! 3646: ctxt->charset = XML_CHAR_ENCODING_UTF8;
! 3647: } else {
! 3648: /*
! 3649: * fallback for unknown encodings
! 3650: */
! 3651: handler = xmlFindCharEncodingHandler((const char *) encoding);
! 3652: if (handler != NULL) {
! 3653: xmlSwitchToEncoding(ctxt, handler);
! 3654: ctxt->charset = XML_CHAR_ENCODING_UTF8;
! 3655: } else {
! 3656: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
! 3657: }
! 3658: }
! 3659:
! 3660: if ((ctxt->input->buf != NULL) &&
! 3661: (ctxt->input->buf->encoder != NULL) &&
! 3662: (ctxt->input->buf->raw != NULL) &&
! 3663: (ctxt->input->buf->buffer != NULL)) {
! 3664: int nbchars;
! 3665: int processed;
! 3666:
! 3667: /*
! 3668: * convert as much as possible to the parser reading buffer.
! 3669: */
! 3670: processed = ctxt->input->cur - ctxt->input->base;
! 3671: xmlBufferShrink(ctxt->input->buf->buffer, processed);
! 3672: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
! 3673: ctxt->input->buf->buffer,
! 3674: ctxt->input->buf->raw);
! 3675: if (nbchars < 0) {
! 3676: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3677: ctxt->sax->error(ctxt->userData,
! 3678: "sgmlCheckEncoding: encoder error\n");
! 3679: ctxt->errNo = XML_ERR_INVALID_ENCODING;
! 3680: }
! 3681: ctxt->input->base =
! 3682: ctxt->input->cur = ctxt->input->buf->buffer->content;
! 3683: }
! 3684: }
! 3685: }
! 3686:
! 3687: /**
! 3688: * sgmlCheckMeta:
! 3689: * @ctxt: an SGML parser context
! 3690: * @atts: the attributes values
! 3691: *
! 3692: * Checks an attributes from a Meta tag
! 3693: */
! 3694: void
! 3695: sgmlCheckMeta(sgmlParserCtxtPtr ctxt, const xmlChar **atts) {
! 3696: int i;
! 3697: const xmlChar *att, *value;
! 3698: int http = 0;
! 3699: const xmlChar *content = NULL;
! 3700:
! 3701: if ((ctxt == NULL) || (atts == NULL))
! 3702: return;
! 3703:
! 3704: i = 0;
! 3705: att = atts[i++];
! 3706: while (att != NULL) {
! 3707: value = atts[i++];
! 3708: if ((value != NULL) &&
! 3709: ((!xmlStrcmp(att, BAD_CAST"http-equiv")) ||
! 3710: (!xmlStrcmp(att, BAD_CAST"Http-Equiv")) ||
! 3711: (!xmlStrcmp(att, BAD_CAST"HTTP-EQUIV"))) &&
! 3712: ((!xmlStrcmp(value, BAD_CAST"Content-Type")) ||
! 3713: (!xmlStrcmp(value, BAD_CAST"content-type")) ||
! 3714: (!xmlStrcmp(value, BAD_CAST"CONTENT-TYPE"))))
! 3715: http = 1;
! 3716: else if ((value != NULL) &&
! 3717: ((!xmlStrcmp(att, BAD_CAST"content")) ||
! 3718: (!xmlStrcmp(att, BAD_CAST"Content")) ||
! 3719: (!xmlStrcmp(att, BAD_CAST"CONTENT"))))
! 3720: content = value;
! 3721: att = atts[i++];
! 3722: }
! 3723: if ((http) && (content != NULL))
! 3724: sgmlCheckEncoding(ctxt, content);
! 3725:
! 3726: }
! 3727:
! 3728: /**
! 3729: * sgmlParseStartTag:
! 3730: * @ctxt: an SGML parser context
! 3731: *
! 3732: * parse a start of tag either for rule element or
! 3733: * EmptyElement. In both case we don't parse the tag closing chars.
! 3734: *
! 3735: * [40] STag ::= '<' Name (S Attribute)* S? '>'
! 3736: *
! 3737: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
! 3738: *
! 3739: * With namespace:
! 3740: *
! 3741: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
! 3742: *
! 3743: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
! 3744: *
! 3745: */
! 3746:
! 3747: void
! 3748: sgmlParseStartTag(sgmlParserCtxtPtr ctxt) {
! 3749: xmlChar *name;
! 3750: xmlChar *attname;
! 3751: xmlChar *attvalue;
! 3752: const xmlChar **atts = NULL;
! 3753: int nbatts = 0;
! 3754: int maxatts = 0;
! 3755: int meta = 0;
! 3756: int i;
! 3757:
! 3758: if (CUR != '<') return;
! 3759: NEXT;
! 3760:
! 3761: GROW;
! 3762: name = sgmlParseSGMLName(ctxt);
! 3763: if (name == NULL) {
! 3764: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3765: ctxt->sax->error(ctxt->userData,
! 3766: "sgmlParseStartTag: invalid element name\n");
! 3767: ctxt->wellFormed = 0;
! 3768: return;
! 3769: }
! 3770: if (!xmlStrcmp(name, BAD_CAST"meta"))
! 3771: meta = 1;
! 3772:
! 3773: /*
! 3774: * Check for auto-closure of SGML elements.
! 3775: */
! 3776: sgmlAutoClose(ctxt, name);
! 3777:
! 3778: /*
! 3779: * Check for implied SGML elements.
! 3780: */
! 3781: sgmlCheckImplied(ctxt, name);
! 3782:
! 3783: /*
! 3784: * Now parse the attributes, it ends up with the ending
! 3785: *
! 3786: * (S Attribute)* S?
! 3787: */
! 3788: SKIP_BLANKS;
! 3789: while ((IS_CHAR(CUR)) &&
! 3790: (CUR != '>') &&
! 3791: ((CUR != '/') || (NXT(1) != '>'))) {
! 3792: long cons = ctxt->nbChars;
! 3793:
! 3794: GROW;
! 3795: attname = sgmlParseAttribute(ctxt, &attvalue);
! 3796: if (attname != NULL) {
! 3797:
! 3798: /*
! 3799: * Well formedness requires at most one declaration of an attribute
! 3800: */
! 3801: for (i = 0; i < nbatts;i += 2) {
! 3802: if (!xmlStrcmp(atts[i], attname)) {
! 3803: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3804: ctxt->sax->error(ctxt->userData,
! 3805: "Attribute %s redefined\n",
! 3806: attname);
! 3807: ctxt->wellFormed = 0;
! 3808: xmlFree(attname);
! 3809: if (attvalue != NULL)
! 3810: xmlFree(attvalue);
! 3811: goto failed;
! 3812: }
! 3813: }
! 3814:
! 3815: /*
! 3816: * Add the pair to atts
! 3817: */
! 3818: if (atts == NULL) {
! 3819: maxatts = 10;
! 3820: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
! 3821: if (atts == NULL) {
! 3822: fprintf(stderr, "malloc of %ld byte failed\n",
! 3823: maxatts * (long)sizeof(xmlChar *));
! 3824: if (name != NULL) xmlFree(name);
! 3825: return;
! 3826: }
! 3827: } else if (nbatts + 4 > maxatts) {
! 3828: maxatts *= 2;
! 3829: atts = (const xmlChar **) xmlRealloc(atts, maxatts * sizeof(xmlChar *));
! 3830: if (atts == NULL) {
! 3831: fprintf(stderr, "realloc of %ld byte failed\n",
! 3832: maxatts * (long)sizeof(xmlChar *));
! 3833: if (name != NULL) xmlFree(name);
! 3834: return;
! 3835: }
! 3836: }
! 3837: atts[nbatts++] = attname;
! 3838: atts[nbatts++] = attvalue;
! 3839: atts[nbatts] = NULL;
! 3840: atts[nbatts + 1] = NULL;
! 3841: }
! 3842:
! 3843: failed:
! 3844: SKIP_BLANKS;
! 3845: if (cons == ctxt->nbChars) {
! 3846: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3847: ctxt->sax->error(ctxt->userData,
! 3848: "sgmlParseStartTag: problem parsing attributes\n");
! 3849: ctxt->wellFormed = 0;
! 3850: break;
! 3851: }
! 3852: }
! 3853:
! 3854: /*
! 3855: * Handle specific association to the META tag
! 3856: */
! 3857: if (meta)
! 3858: sgmlCheckMeta(ctxt, atts);
! 3859:
! 3860: /*
! 3861: * SAX: Start of Element !
! 3862: */
! 3863: sgmlnamePush(ctxt, xmlStrdup(name));
! 3864: #ifdef DEBUG
! 3865: fprintf(stderr,"Start of element %s: pushed %s\n", name, ctxt->name);
! 3866: #endif
! 3867: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
! 3868: ctxt->sax->startElement(ctxt->userData, name, atts);
! 3869:
! 3870: if (atts != NULL) {
! 3871: for (i = 0;i < nbatts;i++) {
! 3872: if (atts[i] != NULL)
! 3873: xmlFree((xmlChar *) atts[i]);
! 3874: }
! 3875: xmlFree((void *) atts);
! 3876: }
! 3877: if (name != NULL) xmlFree(name);
! 3878: }
! 3879:
! 3880: /**
! 3881: * sgmlParseEndTag:
! 3882: * @ctxt: an SGML parser context
! 3883: *
! 3884: * parse an end of tag
! 3885: *
! 3886: * [42] ETag ::= '</' Name S? '>'
! 3887: *
! 3888: * With namespace
! 3889: *
! 3890: * [NS 9] ETag ::= '</' QName S? '>'
! 3891: */
! 3892:
! 3893: void
! 3894: sgmlParseEndTag(sgmlParserCtxtPtr ctxt) {
! 3895: xmlChar *name;
! 3896: xmlChar *oldname;
! 3897: int i;
! 3898:
! 3899: if ((CUR != '<') || (NXT(1) != '/')) {
! 3900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3901: ctxt->sax->error(ctxt->userData, "sgmlParseEndTag: '</' not found\n");
! 3902: ctxt->wellFormed = 0;
! 3903: return;
! 3904: }
! 3905: SKIP(2);
! 3906:
! 3907: name = sgmlParseSGMLName(ctxt);
! 3908: if (name == NULL) {
! 3909: if (CUR == '>') {
! 3910: NEXT;
! 3911: oldname = sgmlnamePop(ctxt);
! 3912: if (oldname != NULL) {
! 3913: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
! 3914: ctxt->sax->endElement(ctxt->userData, name);
! 3915: #ifdef DEBUG
! 3916: fprintf(stderr,"End of tag </>: popping out %s\n", oldname);
! 3917: #endif
! 3918: xmlFree(oldname);
! 3919: #ifdef DEBUG
! 3920: } else {
! 3921: fprintf(stderr,"End of tag </>: stack empty !!!\n");
! 3922: #endif
! 3923: }
! 3924: return;
! 3925: } else
! 3926: return;
! 3927: }
! 3928:
! 3929: /*
! 3930: * We should definitely be at the ending "S? '>'" part
! 3931: */
! 3932: SKIP_BLANKS;
! 3933: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
! 3934: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3935: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
! 3936: ctxt->wellFormed = 0;
! 3937: } else
! 3938: NEXT;
! 3939:
! 3940: /*
! 3941: * If the name read is not one of the element in the parsing stack
! 3942: * then return, it's just an error.
! 3943: */
! 3944: for (i = (ctxt->nameNr - 1);i >= 0;i--) {
! 3945: if (!xmlStrcmp(name, ctxt->nameTab[i])) break;
! 3946: }
! 3947: if (i < 0) {
! 3948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3949: ctxt->sax->error(ctxt->userData,
! 3950: "Unexpected end tag : %s\n", name);
! 3951: xmlFree(name);
! 3952: ctxt->wellFormed = 0;
! 3953: return;
! 3954: }
! 3955:
! 3956:
! 3957: /*
! 3958: * Check for auto-closure of SGML elements.
! 3959: */
! 3960:
! 3961: sgmlAutoCloseOnClose(ctxt, name);
! 3962:
! 3963: /*
! 3964: * Well formedness constraints, opening and closing must match.
! 3965: * With the exception that the autoclose may have popped stuff out
! 3966: * of the stack.
! 3967: */
! 3968: if (((name[0] != '/') || (name[1] != 0)) &&
! 3969: (xmlStrcmp(name, ctxt->name))) {
! 3970: #ifdef DEBUG
! 3971: fprintf(stderr,"End of tag %s: expecting %s\n", name, ctxt->name);
! 3972: #endif
! 3973: if ((ctxt->name != NULL) &&
! 3974: (xmlStrcmp(ctxt->name, name))) {
! 3975: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 3976: ctxt->sax->error(ctxt->userData,
! 3977: "Opening and ending tag mismatch: %s and %s\n",
! 3978: name, ctxt->name);
! 3979: ctxt->wellFormed = 0;
! 3980: }
! 3981: }
! 3982:
! 3983: /*
! 3984: * SAX: End of Tag
! 3985: */
! 3986: oldname = ctxt->name;
! 3987: if (((name[0] == '/') && (name[1] == 0)) ||
! 3988: ((oldname != NULL) && (!xmlStrcmp(oldname, name)))) {
! 3989: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
! 3990: ctxt->sax->endElement(ctxt->userData, name);
! 3991: oldname = sgmlnamePop(ctxt);
! 3992: if (oldname != NULL) {
! 3993: #ifdef DEBUG
! 3994: fprintf(stderr,"End of tag %s: popping out %s\n", name, oldname);
! 3995: #endif
! 3996: xmlFree(oldname);
! 3997: #ifdef DEBUG
! 3998: } else {
! 3999: fprintf(stderr,"End of tag %s: stack empty !!!\n", name);
! 4000: #endif
! 4001: }
! 4002: }
! 4003:
! 4004: if (name != NULL)
! 4005: xmlFree(name);
! 4006:
! 4007: return;
! 4008: }
! 4009:
! 4010:
! 4011: /**
! 4012: * sgmlParseReference:
! 4013: * @ctxt: an SGML parser context
! 4014: *
! 4015: * parse and handle entity references in content,
! 4016: * this will end-up in a call to character() since this is either a
! 4017: * CharRef, or a predefined entity.
! 4018: */
! 4019: void
! 4020: sgmlParseReference(sgmlParserCtxtPtr ctxt) {
! 4021: sgmlEntityDescPtr ent;
! 4022: xmlChar out[6];
! 4023: xmlChar *name;
! 4024: if (CUR != '&') return;
! 4025:
! 4026: if (NXT(1) == '#') {
! 4027: unsigned int c;
! 4028: int bits, i = 0;
! 4029:
! 4030: c = sgmlParseCharRef(ctxt);
! 4031: if (c < 0x80) { out[i++]= c; bits= -6; }
! 4032: else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; }
! 4033: else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; }
! 4034: else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; }
! 4035:
! 4036: for ( ; bits >= 0; bits-= 6) {
! 4037: out[i++]= ((c >> bits) & 0x3F) | 0x80;
! 4038: }
! 4039: out[i] = 0;
! 4040:
! 4041: sgmlCheckParagraph(ctxt);
! 4042: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
! 4043: ctxt->sax->characters(ctxt->userData, out, i);
! 4044: } else {
! 4045: ent = sgmlParseEntityRef(ctxt, &name);
! 4046: if (name == NULL) {
! 4047: sgmlCheckParagraph(ctxt);
! 4048: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
! 4049: ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
! 4050: return;
! 4051: }
! 4052: if ((ent == NULL) || (ent->value <= 0)) {
! 4053: sgmlCheckParagraph(ctxt);
! 4054: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
! 4055: ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
! 4056: ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
! 4057: /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
! 4058: }
! 4059: } else {
! 4060: unsigned int c;
! 4061: int bits, i = 0;
! 4062:
! 4063: c = ent->value;
! 4064: if (c < 0x80)
! 4065: { out[i++]= c; bits= -6; }
! 4066: else if (c < 0x800)
! 4067: { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; }
! 4068: else if (c < 0x10000)
! 4069: { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; }
! 4070: else
! 4071: { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; }
! 4072:
! 4073: for ( ; bits >= 0; bits-= 6) {
! 4074: out[i++]= ((c >> bits) & 0x3F) | 0x80;
! 4075: }
! 4076: out[i] = 0;
! 4077:
! 4078: sgmlCheckParagraph(ctxt);
! 4079: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
! 4080: ctxt->sax->characters(ctxt->userData, out, i);
! 4081: }
! 4082: xmlFree(name);
! 4083: }
! 4084: }
! 4085:
! 4086: /**
! 4087: * sgmlParseContent:
! 4088: * @ctxt: an SGML parser context
! 4089: * @name: the node name
! 4090: *
! 4091: * Parse a content: comment, sub-element, reference or text.
! 4092: *
! 4093: */
! 4094:
! 4095: void
! 4096: sgmlParseContent(sgmlParserCtxtPtr ctxt) {
! 4097: xmlChar *currentNode;
! 4098: int depth;
! 4099:
! 4100: currentNode = xmlStrdup(ctxt->name);
! 4101: depth = ctxt->nameNr;
! 4102: while (1) {
! 4103: long cons = ctxt->nbChars;
! 4104:
! 4105: GROW;
! 4106: /*
! 4107: * Our tag or one of it's parent or children is ending.
! 4108: */
! 4109: if ((CUR == '<') && (NXT(1) == '/')) {
! 4110: sgmlParseEndTag(ctxt);
! 4111: if (currentNode != NULL) xmlFree(currentNode);
! 4112: return;
! 4113: }
! 4114:
! 4115: /*
! 4116: * Has this node been popped out during parsing of
! 4117: * the next element
! 4118: */
! 4119: if ((xmlStrcmp(currentNode, ctxt->name)) &&
! 4120: (depth >= ctxt->nameNr)) {
! 4121: if (currentNode != NULL) xmlFree(currentNode);
! 4122: return;
! 4123: }
! 4124:
! 4125: /*
! 4126: * Sometimes DOCTYPE arrives in the middle of the document
! 4127: */
! 4128: if ((CUR == '<') && (NXT(1) == '!') &&
! 4129: (UPP(2) == 'D') && (UPP(3) == 'O') &&
! 4130: (UPP(4) == 'C') && (UPP(5) == 'T') &&
! 4131: (UPP(6) == 'Y') && (UPP(7) == 'P') &&
! 4132: (UPP(8) == 'E')) {
! 4133: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4134: ctxt->sax->error(ctxt->userData,
! 4135: "Misplaced DOCTYPE declaration\n");
! 4136: ctxt->wellFormed = 0;
! 4137: sgmlParseDocTypeDecl(ctxt);
! 4138: }
! 4139:
! 4140: /*
! 4141: * First case : a comment
! 4142: */
! 4143: if ((CUR == '<') && (NXT(1) == '!') &&
! 4144: (NXT(2) == '-') && (NXT(3) == '-')) {
! 4145: sgmlParseComment(ctxt);
! 4146: }
! 4147:
! 4148: /*
! 4149: * Second case : a sub-element.
! 4150: */
! 4151: else if (CUR == '<') {
! 4152: sgmlParseElement(ctxt);
! 4153: }
! 4154:
! 4155: /*
! 4156: * Third case : a reference. If if has not been resolved,
! 4157: * parsing returns it's Name, create the node
! 4158: */
! 4159: else if (CUR == '&') {
! 4160: sgmlParseReference(ctxt);
! 4161: }
! 4162:
! 4163: /*
! 4164: * Fourth : end of the resource
! 4165: */
! 4166: else if (CUR == 0) {
! 4167: sgmlAutoClose(ctxt, NULL);
! 4168: }
! 4169:
! 4170: /*
! 4171: * Last case, text. Note that References are handled directly.
! 4172: */
! 4173: else {
! 4174: sgmlParseCharData(ctxt, 0);
! 4175: }
! 4176:
! 4177: if (cons == ctxt->nbChars) {
! 4178: if (ctxt->node != NULL) {
! 4179: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4180: ctxt->sax->error(ctxt->userData,
! 4181: "detected an error in element content\n");
! 4182: ctxt->wellFormed = 0;
! 4183: }
! 4184: break;
! 4185: }
! 4186:
! 4187: GROW;
! 4188: }
! 4189: if (currentNode != NULL) xmlFree(currentNode);
! 4190: }
! 4191:
! 4192: /**
! 4193: * sgmlParseElement:
! 4194: * @ctxt: an SGML parser context
! 4195: *
! 4196: * parse an SGML element, this is highly recursive
! 4197: *
! 4198: * [39] element ::= EmptyElemTag | STag content ETag
! 4199: *
! 4200: * [41] Attribute ::= Name Eq AttValue
! 4201: */
! 4202:
! 4203: void
! 4204: sgmlParseElement(sgmlParserCtxtPtr ctxt) {
! 4205: xmlChar *name;
! 4206: xmlChar *currentNode = NULL;
! 4207: sgmlElemDescPtr info;
! 4208: sgmlParserNodeInfo node_info;
! 4209: xmlChar *oldname;
! 4210: int depth = ctxt->nameNr;
! 4211:
! 4212: /* Capture start position */
! 4213: if (ctxt->record_info) {
! 4214: node_info.begin_pos = ctxt->input->consumed +
! 4215: (CUR_PTR - ctxt->input->base);
! 4216: node_info.begin_line = ctxt->input->line;
! 4217: }
! 4218:
! 4219: oldname = xmlStrdup(ctxt->name);
! 4220: sgmlParseStartTag(ctxt);
! 4221: name = ctxt->name;
! 4222: #ifdef DEBUG
! 4223: if (oldname == NULL)
! 4224: fprintf(stderr, "Start of element %s\n", name);
! 4225: else if (name == NULL)
! 4226: fprintf(stderr, "Start of element failed, was %s\n", oldname);
! 4227: else
! 4228: fprintf(stderr, "Start of element %s, was %s\n", name, oldname);
! 4229: #endif
! 4230: if (((depth == ctxt->nameNr) && (!xmlStrcmp(oldname, ctxt->name))) ||
! 4231: (name == NULL)) {
! 4232: if (CUR == '>')
! 4233: NEXT;
! 4234: if (oldname != NULL)
! 4235: xmlFree(oldname);
! 4236: return;
! 4237: }
! 4238: if (oldname != NULL)
! 4239: xmlFree(oldname);
! 4240:
! 4241: /*
! 4242: * Lookup the info for that element.
! 4243: */
! 4244: info = sgmlTagLookup(name);
! 4245: if (info == NULL) {
! 4246: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4247: ctxt->sax->error(ctxt->userData, "Tag %s invalid\n",
! 4248: name);
! 4249: ctxt->wellFormed = 0;
! 4250: } else if (info->depr) {
! 4251: /***************************
! 4252: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
! 4253: ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n",
! 4254: name);
! 4255: ***************************/
! 4256: }
! 4257:
! 4258: /*
! 4259: * Check for an Empty Element labelled the XML/SGML way
! 4260: */
! 4261: if ((CUR == '/') && (NXT(1) == '>')) {
! 4262: SKIP(2);
! 4263: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
! 4264: ctxt->sax->endElement(ctxt->userData, name);
! 4265: oldname = sgmlnamePop(ctxt);
! 4266: #ifdef DEBUG
! 4267: fprintf(stderr,"End of tag the XML way: popping out %s\n", oldname);
! 4268: #endif
! 4269: if (oldname != NULL)
! 4270: xmlFree(oldname);
! 4271: return;
! 4272: }
! 4273:
! 4274: if (CUR == '>') {
! 4275: NEXT;
! 4276: } else {
! 4277: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4278: ctxt->sax->error(ctxt->userData,
! 4279: "Couldn't find end of Start Tag %s\n",
! 4280: name);
! 4281: ctxt->wellFormed = 0;
! 4282:
! 4283: /*
! 4284: * end of parsing of this node.
! 4285: */
! 4286: if (!xmlStrcmp(name, ctxt->name)) {
! 4287: nodePop(ctxt);
! 4288: oldname = sgmlnamePop(ctxt);
! 4289: #ifdef DEBUG
! 4290: fprintf(stderr,"End of start tag problem: popping out %s\n", oldname);
! 4291: #endif
! 4292: if (oldname != NULL)
! 4293: xmlFree(oldname);
! 4294: }
! 4295:
! 4296: /*
! 4297: * Capture end position and add node
! 4298: */
! 4299: if ( currentNode != NULL && ctxt->record_info ) {
! 4300: node_info.end_pos = ctxt->input->consumed +
! 4301: (CUR_PTR - ctxt->input->base);
! 4302: node_info.end_line = ctxt->input->line;
! 4303: node_info.node = ctxt->node;
! 4304: xmlParserAddNodeInfo(ctxt, &node_info);
! 4305: }
! 4306: return;
! 4307: }
! 4308:
! 4309: /*
! 4310: * Check for an Empty Element from DTD definition
! 4311: */
! 4312: if ((info != NULL) && (info->empty)) {
! 4313: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
! 4314: ctxt->sax->endElement(ctxt->userData, name);
! 4315: oldname = sgmlnamePop(ctxt);
! 4316: #ifdef DEBUG
! 4317: fprintf(stderr,"End of empty tag %s : popping out %s\n", name, oldname);
! 4318: #endif
! 4319: if (oldname != NULL)
! 4320: xmlFree(oldname);
! 4321: return;
! 4322: }
! 4323:
! 4324: /*
! 4325: * Parse the content of the element:
! 4326: */
! 4327: currentNode = xmlStrdup(ctxt->name);
! 4328: depth = ctxt->nameNr;
! 4329: while (IS_CHAR(CUR)) {
! 4330: sgmlParseContent(ctxt);
! 4331: if (ctxt->nameNr < depth) break;
! 4332: }
! 4333:
! 4334: if (!IS_CHAR(CUR)) {
! 4335: /************
! 4336: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4337: ctxt->sax->error(ctxt->userData,
! 4338: "Premature end of data in tag %s\n", currentNode);
! 4339: ctxt->wellFormed = 0;
! 4340: *************/
! 4341:
! 4342: /*
! 4343: * end of parsing of this node.
! 4344: */
! 4345: nodePop(ctxt);
! 4346: oldname = sgmlnamePop(ctxt);
! 4347: #ifdef DEBUG
! 4348: fprintf(stderr,"Premature end of tag %s : popping out %s\n", name, oldname);
! 4349: #endif
! 4350: if (oldname != NULL)
! 4351: xmlFree(oldname);
! 4352: if (currentNode != NULL)
! 4353: xmlFree(currentNode);
! 4354: return;
! 4355: }
! 4356:
! 4357: /*
! 4358: * Capture end position and add node
! 4359: */
! 4360: if ( currentNode != NULL && ctxt->record_info ) {
! 4361: node_info.end_pos = ctxt->input->consumed +
! 4362: (CUR_PTR - ctxt->input->base);
! 4363: node_info.end_line = ctxt->input->line;
! 4364: node_info.node = ctxt->node;
! 4365: xmlParserAddNodeInfo(ctxt, &node_info);
! 4366: }
! 4367: if (currentNode != NULL)
! 4368: xmlFree(currentNode);
! 4369: }
! 4370:
! 4371: /**
! 4372: * sgmlParseDocument :
! 4373: * @ctxt: an SGML parser context
! 4374: *
! 4375: * parse an SGML document (and build a tree if using the standard SAX
! 4376: * interface).
! 4377: *
! 4378: * Returns 0, -1 in case of error. the parser context is augmented
! 4379: * as a result of the parsing.
! 4380: */
! 4381:
! 4382: int
! 4383: sgmlParseDocument(sgmlParserCtxtPtr ctxt) {
! 4384: xmlDtdPtr dtd;
! 4385:
! 4386: sgmlDefaultSAXHandlerInit();
! 4387: ctxt->html = 2;
! 4388:
! 4389: GROW;
! 4390: /*
! 4391: * SAX: beginning of the document processing.
! 4392: */
! 4393: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
! 4394: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
! 4395:
! 4396: /*
! 4397: * Wipe out everything which is before the first '<'
! 4398: */
! 4399: SKIP_BLANKS;
! 4400: if (CUR == 0) {
! 4401: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4402: ctxt->sax->error(ctxt->userData, "Document is empty\n");
! 4403: ctxt->wellFormed = 0;
! 4404: }
! 4405:
! 4406: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
! 4407: ctxt->sax->startDocument(ctxt->userData);
! 4408:
! 4409:
! 4410: /*
! 4411: * Parse possible comments before any content
! 4412: */
! 4413: while ((CUR == '<') && (NXT(1) == '!') &&
! 4414: (NXT(2) == '-') && (NXT(3) == '-')) {
! 4415: sgmlParseComment(ctxt);
! 4416: SKIP_BLANKS;
! 4417: }
! 4418:
! 4419:
! 4420: /*
! 4421: * Then possibly doc type declaration(s) and more Misc
! 4422: * (doctypedecl Misc*)?
! 4423: */
! 4424: if ((CUR == '<') && (NXT(1) == '!') &&
! 4425: (UPP(2) == 'D') && (UPP(3) == 'O') &&
! 4426: (UPP(4) == 'C') && (UPP(5) == 'T') &&
! 4427: (UPP(6) == 'Y') && (UPP(7) == 'P') &&
! 4428: (UPP(8) == 'E')) {
! 4429: sgmlParseDocTypeDecl(ctxt);
! 4430: }
! 4431: SKIP_BLANKS;
! 4432:
! 4433: /*
! 4434: * Parse possible comments before any content
! 4435: */
! 4436: while ((CUR == '<') && (NXT(1) == '!') &&
! 4437: (NXT(2) == '-') && (NXT(3) == '-')) {
! 4438: sgmlParseComment(ctxt);
! 4439: SKIP_BLANKS;
! 4440: }
! 4441:
! 4442: /*
! 4443: * Time to start parsing the tree itself
! 4444: */
! 4445: sgmlParseContent(ctxt);
! 4446:
! 4447: /*
! 4448: * autoclose
! 4449: */
! 4450: if (CUR == 0)
! 4451: sgmlAutoClose(ctxt, NULL);
! 4452:
! 4453:
! 4454: /*
! 4455: * SAX: end of the document processing.
! 4456: */
! 4457: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
! 4458: ctxt->sax->endDocument(ctxt->userData);
! 4459:
! 4460: if (ctxt->myDoc != NULL) {
! 4461: dtd = xmlGetIntSubset(ctxt->myDoc);
! 4462: if (dtd == NULL)
! 4463: ctxt->myDoc->intSubset =
! 4464: xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
! 4465: BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
! 4466: BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
! 4467: }
! 4468: if (! ctxt->wellFormed) return(-1);
! 4469: return(0);
! 4470: }
! 4471:
! 4472:
! 4473: /************************************************************************
! 4474: * *
! 4475: * Parser contexts handling *
! 4476: * *
! 4477: ************************************************************************/
! 4478:
! 4479: /**
! 4480: * xmlInitParserCtxt:
! 4481: * @ctxt: an SGML parser context
! 4482: *
! 4483: * Initialize a parser context
! 4484: */
! 4485:
! 4486: void
! 4487: sgmlInitParserCtxt(sgmlParserCtxtPtr ctxt)
! 4488: {
! 4489: sgmlSAXHandler *sax;
! 4490:
! 4491: if (ctxt == NULL) return;
! 4492: memset(ctxt, 0, sizeof(sgmlParserCtxt));
! 4493:
! 4494: sax = (sgmlSAXHandler *) xmlMalloc(sizeof(sgmlSAXHandler));
! 4495: if (sax == NULL) {
! 4496: fprintf(stderr, "sgmlInitParserCtxt: out of memory\n");
! 4497: }
! 4498: memset(sax, 0, sizeof(sgmlSAXHandler));
! 4499:
! 4500: /* Allocate the Input stack */
! 4501: ctxt->inputTab = (sgmlParserInputPtr *)
! 4502: xmlMalloc(5 * sizeof(sgmlParserInputPtr));
! 4503: if (ctxt->inputTab == NULL) {
! 4504: fprintf(stderr, "sgmlInitParserCtxt: out of memory\n");
! 4505: }
! 4506: ctxt->inputNr = 0;
! 4507: ctxt->inputMax = 5;
! 4508: ctxt->input = NULL;
! 4509: ctxt->version = NULL;
! 4510: ctxt->encoding = NULL;
! 4511: ctxt->standalone = -1;
! 4512: ctxt->instate = XML_PARSER_START;
! 4513:
! 4514: /* Allocate the Node stack */
! 4515: ctxt->nodeTab = (sgmlNodePtr *) xmlMalloc(10 * sizeof(sgmlNodePtr));
! 4516: ctxt->nodeNr = 0;
! 4517: ctxt->nodeMax = 10;
! 4518: ctxt->node = NULL;
! 4519:
! 4520: /* Allocate the Name stack */
! 4521: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
! 4522: ctxt->nameNr = 0;
! 4523: ctxt->nameMax = 10;
! 4524: ctxt->name = NULL;
! 4525:
! 4526: if (sax == NULL) ctxt->sax = &sgmlDefaultSAXHandler;
! 4527: else {
! 4528: ctxt->sax = sax;
! 4529: memcpy(sax, &sgmlDefaultSAXHandler, sizeof(sgmlSAXHandler));
! 4530: }
! 4531: ctxt->userData = ctxt;
! 4532: ctxt->myDoc = NULL;
! 4533: ctxt->wellFormed = 1;
! 4534: ctxt->replaceEntities = 0;
! 4535: ctxt->html = 2;
! 4536: ctxt->record_info = 0;
! 4537: ctxt->validate = 0;
! 4538: ctxt->nbChars = 0;
! 4539: ctxt->checkIndex = 0;
! 4540: xmlInitNodeInfoSeq(&ctxt->node_seq);
! 4541: }
! 4542:
! 4543: /**
! 4544: * sgmlFreeParserCtxt:
! 4545: * @ctxt: an SGML parser context
! 4546: *
! 4547: * Free all the memory used by a parser context. However the parsed
! 4548: * document in ctxt->myDoc is not freed.
! 4549: */
! 4550:
! 4551: void
! 4552: sgmlFreeParserCtxt(sgmlParserCtxtPtr ctxt)
! 4553: {
! 4554: xmlFreeParserCtxt(ctxt);
! 4555: }
! 4556:
! 4557: /**
! 4558: * sgmlCreateDocParserCtxt :
! 4559: * @cur: a pointer to an array of xmlChar
! 4560: * @encoding: a free form C string describing the SGML document encoding, or NULL
! 4561: *
! 4562: * Create a parser context for an SGML document.
! 4563: *
! 4564: * Returns the new parser context or NULL
! 4565: */
! 4566: sgmlParserCtxtPtr
! 4567: sgmlCreateDocParserCtxt(xmlChar *cur, const char *encoding) {
! 4568: sgmlParserCtxtPtr ctxt;
! 4569: sgmlParserInputPtr input;
! 4570: /* sgmlCharEncoding enc; */
! 4571:
! 4572: ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
! 4573: if (ctxt == NULL) {
! 4574: perror("malloc");
! 4575: return(NULL);
! 4576: }
! 4577: sgmlInitParserCtxt(ctxt);
! 4578: input = (sgmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
! 4579: if (input == NULL) {
! 4580: perror("malloc");
! 4581: xmlFree(ctxt);
! 4582: return(NULL);
! 4583: }
! 4584: memset(input, 0, sizeof(sgmlParserInput));
! 4585:
! 4586: input->line = 1;
! 4587: input->col = 1;
! 4588: input->base = cur;
! 4589: input->cur = cur;
! 4590:
! 4591: inputPush(ctxt, input);
! 4592: return(ctxt);
! 4593: }
! 4594:
! 4595: /************************************************************************
! 4596: * *
! 4597: * Progressive parsing interfaces *
! 4598: * *
! 4599: ************************************************************************/
! 4600:
! 4601: /**
! 4602: * sgmlParseLookupSequence:
! 4603: * @ctxt: an SGML parser context
! 4604: * @first: the first char to lookup
! 4605: * @next: the next char to lookup or zero
! 4606: * @third: the next char to lookup or zero
! 4607: *
! 4608: * Try to find if a sequence (first, next, third) or just (first next) or
! 4609: * (first) is available in the input stream.
! 4610: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
! 4611: * to avoid rescanning sequences of bytes, it DOES change the state of the
! 4612: * parser, do not use liberally.
! 4613: * This is basically similar to xmlParseLookupSequence()
! 4614: *
! 4615: * Returns the index to the current parsing point if the full sequence
! 4616: * is available, -1 otherwise.
! 4617: */
! 4618: int
! 4619: sgmlParseLookupSequence(sgmlParserCtxtPtr ctxt, xmlChar first,
! 4620: xmlChar next, xmlChar third) {
! 4621: int base, len;
! 4622: sgmlParserInputPtr in;
! 4623: const xmlChar *buf;
! 4624:
! 4625: in = ctxt->input;
! 4626: if (in == NULL) return(-1);
! 4627: base = in->cur - in->base;
! 4628: if (base < 0) return(-1);
! 4629: if (ctxt->checkIndex > base)
! 4630: base = ctxt->checkIndex;
! 4631: if (in->buf == NULL) {
! 4632: buf = in->base;
! 4633: len = in->length;
! 4634: } else {
! 4635: buf = in->buf->buffer->content;
! 4636: len = in->buf->buffer->use;
! 4637: }
! 4638: /* take into account the sequence length */
! 4639: if (third) len -= 2;
! 4640: else if (next) len --;
! 4641: for (;base < len;base++) {
! 4642: if (buf[base] == first) {
! 4643: if (third != 0) {
! 4644: if ((buf[base + 1] != next) ||
! 4645: (buf[base + 2] != third)) continue;
! 4646: } else if (next != 0) {
! 4647: if (buf[base + 1] != next) continue;
! 4648: }
! 4649: ctxt->checkIndex = 0;
! 4650: #ifdef DEBUG_PUSH
! 4651: if (next == 0)
! 4652: fprintf(stderr, "HPP: lookup '%c' found at %d\n",
! 4653: first, base);
! 4654: else if (third == 0)
! 4655: fprintf(stderr, "HPP: lookup '%c%c' found at %d\n",
! 4656: first, next, base);
! 4657: else
! 4658: fprintf(stderr, "HPP: lookup '%c%c%c' found at %d\n",
! 4659: first, next, third, base);
! 4660: #endif
! 4661: return(base - (in->cur - in->base));
! 4662: }
! 4663: }
! 4664: ctxt->checkIndex = base;
! 4665: #ifdef DEBUG_PUSH
! 4666: if (next == 0)
! 4667: fprintf(stderr, "HPP: lookup '%c' failed\n", first);
! 4668: else if (third == 0)
! 4669: fprintf(stderr, "HPP: lookup '%c%c' failed\n", first, next);
! 4670: else
! 4671: fprintf(stderr, "HPP: lookup '%c%c%c' failed\n", first, next, third);
! 4672: #endif
! 4673: return(-1);
! 4674: }
! 4675:
! 4676: /**
! 4677: * sgmlParseTryOrFinish:
! 4678: * @ctxt: an SGML parser context
! 4679: * @terminate: last chunk indicator
! 4680: *
! 4681: * Try to progress on parsing
! 4682: *
! 4683: * Returns zero if no parsing was possible
! 4684: */
! 4685: int
! 4686: sgmlParseTryOrFinish(sgmlParserCtxtPtr ctxt, int terminate) {
! 4687: int ret = 0;
! 4688: sgmlParserInputPtr in;
! 4689: int avail = 0;
! 4690: xmlChar cur, next;
! 4691:
! 4692: #ifdef DEBUG_PUSH
! 4693: switch (ctxt->instate) {
! 4694: case XML_PARSER_EOF:
! 4695: fprintf(stderr, "HPP: try EOF\n"); break;
! 4696: case XML_PARSER_START:
! 4697: fprintf(stderr, "HPP: try START\n"); break;
! 4698: case XML_PARSER_MISC:
! 4699: fprintf(stderr, "HPP: try MISC\n");break;
! 4700: case XML_PARSER_COMMENT:
! 4701: fprintf(stderr, "HPP: try COMMENT\n");break;
! 4702: case XML_PARSER_PROLOG:
! 4703: fprintf(stderr, "HPP: try PROLOG\n");break;
! 4704: case XML_PARSER_START_TAG:
! 4705: fprintf(stderr, "HPP: try START_TAG\n");break;
! 4706: case XML_PARSER_CONTENT:
! 4707: fprintf(stderr, "HPP: try CONTENT\n");break;
! 4708: case XML_PARSER_CDATA_SECTION:
! 4709: fprintf(stderr, "HPP: try CDATA_SECTION\n");break;
! 4710: case XML_PARSER_END_TAG:
! 4711: fprintf(stderr, "HPP: try END_TAG\n");break;
! 4712: case XML_PARSER_ENTITY_DECL:
! 4713: fprintf(stderr, "HPP: try ENTITY_DECL\n");break;
! 4714: case XML_PARSER_ENTITY_VALUE:
! 4715: fprintf(stderr, "HPP: try ENTITY_VALUE\n");break;
! 4716: case XML_PARSER_ATTRIBUTE_VALUE:
! 4717: fprintf(stderr, "HPP: try ATTRIBUTE_VALUE\n");break;
! 4718: case XML_PARSER_DTD:
! 4719: fprintf(stderr, "HPP: try DTD\n");break;
! 4720: case XML_PARSER_EPILOG:
! 4721: fprintf(stderr, "HPP: try EPILOG\n");break;
! 4722: case XML_PARSER_PI:
! 4723: fprintf(stderr, "HPP: try PI\n");break;
! 4724: }
! 4725: #endif
! 4726:
! 4727: while (1) {
! 4728:
! 4729: in = ctxt->input;
! 4730: if (in == NULL) break;
! 4731: if (in->buf == NULL)
! 4732: avail = in->length - (in->cur - in->base);
! 4733: else
! 4734: avail = in->buf->buffer->use - (in->cur - in->base);
! 4735: if ((avail == 0) && (terminate)) {
! 4736: sgmlAutoClose(ctxt, NULL);
! 4737: if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
! 4738: /*
! 4739: * SAX: end of the document processing.
! 4740: */
! 4741: ctxt->instate = XML_PARSER_EOF;
! 4742: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
! 4743: ctxt->sax->endDocument(ctxt->userData);
! 4744: }
! 4745: }
! 4746: if (avail < 1)
! 4747: goto done;
! 4748: switch (ctxt->instate) {
! 4749: case XML_PARSER_EOF:
! 4750: /*
! 4751: * Document parsing is done !
! 4752: */
! 4753: goto done;
! 4754: case XML_PARSER_START:
! 4755: /*
! 4756: * Very first chars read from the document flow.
! 4757: */
! 4758: cur = in->cur[0];
! 4759: if (IS_BLANK(cur)) {
! 4760: SKIP_BLANKS;
! 4761: if (in->buf == NULL)
! 4762: avail = in->length - (in->cur - in->base);
! 4763: else
! 4764: avail = in->buf->buffer->use - (in->cur - in->base);
! 4765: }
! 4766: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
! 4767: ctxt->sax->setDocumentLocator(ctxt->userData,
! 4768: &xmlDefaultSAXLocator);
! 4769: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
! 4770: (!ctxt->disableSAX))
! 4771: ctxt->sax->startDocument(ctxt->userData);
! 4772:
! 4773: cur = in->cur[0];
! 4774: next = in->cur[1];
! 4775: if ((cur == '<') && (next == '!') &&
! 4776: (UPP(2) == 'D') && (UPP(3) == 'O') &&
! 4777: (UPP(4) == 'C') && (UPP(5) == 'T') &&
! 4778: (UPP(6) == 'Y') && (UPP(7) == 'P') &&
! 4779: (UPP(8) == 'E')) {
! 4780: if ((!terminate) &&
! 4781: (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
! 4782: goto done;
! 4783: #ifdef DEBUG_PUSH
! 4784: fprintf(stderr, "HPP: Parsing internal subset\n");
! 4785: #endif
! 4786: sgmlParseDocTypeDecl(ctxt);
! 4787: ctxt->instate = XML_PARSER_PROLOG;
! 4788: #ifdef DEBUG_PUSH
! 4789: fprintf(stderr, "HPP: entering PROLOG\n");
! 4790: #endif
! 4791: } else {
! 4792: ctxt->instate = XML_PARSER_MISC;
! 4793: }
! 4794: #ifdef DEBUG_PUSH
! 4795: fprintf(stderr, "HPP: entering MISC\n");
! 4796: #endif
! 4797: break;
! 4798: case XML_PARSER_MISC:
! 4799: SKIP_BLANKS;
! 4800: if (in->buf == NULL)
! 4801: avail = in->length - (in->cur - in->base);
! 4802: else
! 4803: avail = in->buf->buffer->use - (in->cur - in->base);
! 4804: if (avail < 2)
! 4805: goto done;
! 4806: cur = in->cur[0];
! 4807: next = in->cur[1];
! 4808: if ((cur == '<') && (next == '!') &&
! 4809: (in->cur[2] == '-') && (in->cur[3] == '-')) {
! 4810: if ((!terminate) &&
! 4811: (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
! 4812: goto done;
! 4813: #ifdef DEBUG_PUSH
! 4814: fprintf(stderr, "HPP: Parsing Comment\n");
! 4815: #endif
! 4816: sgmlParseComment(ctxt);
! 4817: ctxt->instate = XML_PARSER_MISC;
! 4818: } else if ((cur == '<') && (next == '!') &&
! 4819: (UPP(2) == 'D') && (UPP(3) == 'O') &&
! 4820: (UPP(4) == 'C') && (UPP(5) == 'T') &&
! 4821: (UPP(6) == 'Y') && (UPP(7) == 'P') &&
! 4822: (UPP(8) == 'E')) {
! 4823: if ((!terminate) &&
! 4824: (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
! 4825: goto done;
! 4826: #ifdef DEBUG_PUSH
! 4827: fprintf(stderr, "HPP: Parsing internal subset\n");
! 4828: #endif
! 4829: sgmlParseDocTypeDecl(ctxt);
! 4830: ctxt->instate = XML_PARSER_PROLOG;
! 4831: #ifdef DEBUG_PUSH
! 4832: fprintf(stderr, "HPP: entering PROLOG\n");
! 4833: #endif
! 4834: } else if ((cur == '<') && (next == '!') &&
! 4835: (avail < 9)) {
! 4836: goto done;
! 4837: } else {
! 4838: ctxt->instate = XML_PARSER_START_TAG;
! 4839: #ifdef DEBUG_PUSH
! 4840: fprintf(stderr, "HPP: entering START_TAG\n");
! 4841: #endif
! 4842: }
! 4843: break;
! 4844: case XML_PARSER_PROLOG:
! 4845: SKIP_BLANKS;
! 4846: if (in->buf == NULL)
! 4847: avail = in->length - (in->cur - in->base);
! 4848: else
! 4849: avail = in->buf->buffer->use - (in->cur - in->base);
! 4850: if (avail < 2)
! 4851: goto done;
! 4852: cur = in->cur[0];
! 4853: next = in->cur[1];
! 4854: if ((cur == '<') && (next == '!') &&
! 4855: (in->cur[2] == '-') && (in->cur[3] == '-')) {
! 4856: if ((!terminate) &&
! 4857: (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
! 4858: goto done;
! 4859: #ifdef DEBUG_PUSH
! 4860: fprintf(stderr, "HPP: Parsing Comment\n");
! 4861: #endif
! 4862: sgmlParseComment(ctxt);
! 4863: ctxt->instate = XML_PARSER_PROLOG;
! 4864: } else if ((cur == '<') && (next == '!') &&
! 4865: (avail < 4)) {
! 4866: goto done;
! 4867: } else {
! 4868: ctxt->instate = XML_PARSER_START_TAG;
! 4869: #ifdef DEBUG_PUSH
! 4870: fprintf(stderr, "HPP: entering START_TAG\n");
! 4871: #endif
! 4872: }
! 4873: break;
! 4874: case XML_PARSER_EPILOG:
! 4875: if (in->buf == NULL)
! 4876: avail = in->length - (in->cur - in->base);
! 4877: else
! 4878: avail = in->buf->buffer->use - (in->cur - in->base);
! 4879: if (avail < 1)
! 4880: goto done;
! 4881: cur = in->cur[0];
! 4882: if (IS_BLANK(cur)) {
! 4883: sgmlParseCharData(ctxt, 0);
! 4884: goto done;
! 4885: }
! 4886: if (avail < 2)
! 4887: goto done;
! 4888: next = in->cur[1];
! 4889: if ((cur == '<') && (next == '!') &&
! 4890: (in->cur[2] == '-') && (in->cur[3] == '-')) {
! 4891: if ((!terminate) &&
! 4892: (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
! 4893: goto done;
! 4894: #ifdef DEBUG_PUSH
! 4895: fprintf(stderr, "HPP: Parsing Comment\n");
! 4896: #endif
! 4897: sgmlParseComment(ctxt);
! 4898: ctxt->instate = XML_PARSER_EPILOG;
! 4899: } else if ((cur == '<') && (next == '!') &&
! 4900: (avail < 4)) {
! 4901: goto done;
! 4902: } else {
! 4903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4904: ctxt->sax->error(ctxt->userData,
! 4905: "Extra content at the end of the document\n");
! 4906: ctxt->wellFormed = 0;
! 4907: ctxt->errNo = XML_ERR_DOCUMENT_END;
! 4908: ctxt->instate = XML_PARSER_EOF;
! 4909: #ifdef DEBUG_PUSH
! 4910: fprintf(stderr, "HPP: entering EOF\n");
! 4911: #endif
! 4912: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
! 4913: ctxt->sax->endDocument(ctxt->userData);
! 4914: goto done;
! 4915: }
! 4916: break;
! 4917: case XML_PARSER_START_TAG: {
! 4918: xmlChar *name, *oldname;
! 4919: int depth = ctxt->nameNr;
! 4920: sgmlElemDescPtr info;
! 4921:
! 4922: if (avail < 2)
! 4923: goto done;
! 4924: cur = in->cur[0];
! 4925: if (cur != '<') {
! 4926: ctxt->instate = XML_PARSER_CONTENT;
! 4927: #ifdef DEBUG_PUSH
! 4928: fprintf(stderr, "HPP: entering CONTENT\n");
! 4929: #endif
! 4930: break;
! 4931: }
! 4932: if ((!terminate) &&
! 4933: (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
! 4934: goto done;
! 4935:
! 4936: oldname = xmlStrdup(ctxt->name);
! 4937: sgmlParseStartTag(ctxt);
! 4938: name = ctxt->name;
! 4939: #ifdef DEBUG
! 4940: if (oldname == NULL)
! 4941: fprintf(stderr, "Start of element %s\n", name);
! 4942: else if (name == NULL)
! 4943: fprintf(stderr, "Start of element failed, was %s\n",
! 4944: oldname);
! 4945: else
! 4946: fprintf(stderr, "Start of element %s, was %s\n",
! 4947: name, oldname);
! 4948: #endif
! 4949: if (((depth == ctxt->nameNr) &&
! 4950: (!xmlStrcmp(oldname, ctxt->name))) ||
! 4951: (name == NULL)) {
! 4952: if (CUR == '>')
! 4953: NEXT;
! 4954: if (oldname != NULL)
! 4955: xmlFree(oldname);
! 4956: break;
! 4957: }
! 4958: if (oldname != NULL)
! 4959: xmlFree(oldname);
! 4960:
! 4961: /*
! 4962: * Lookup the info for that element.
! 4963: */
! 4964: info = sgmlTagLookup(name);
! 4965: if (info == NULL) {
! 4966: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4967: ctxt->sax->error(ctxt->userData, "Tag %s invalid\n",
! 4968: name);
! 4969: ctxt->wellFormed = 0;
! 4970: } else if (info->depr) {
! 4971: /***************************
! 4972: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
! 4973: ctxt->sax->warning(ctxt->userData,
! 4974: "Tag %s is deprecated\n",
! 4975: name);
! 4976: ***************************/
! 4977: }
! 4978:
! 4979: /*
! 4980: * Check for an Empty Element labelled the XML/SGML way
! 4981: */
! 4982: if ((CUR == '/') && (NXT(1) == '>')) {
! 4983: SKIP(2);
! 4984: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
! 4985: ctxt->sax->endElement(ctxt->userData, name);
! 4986: oldname = sgmlnamePop(ctxt);
! 4987: #ifdef DEBUG
! 4988: fprintf(stderr,"End of tag the XML way: popping out %s\n",
! 4989: oldname);
! 4990: #endif
! 4991: if (oldname != NULL)
! 4992: xmlFree(oldname);
! 4993: ctxt->instate = XML_PARSER_CONTENT;
! 4994: #ifdef DEBUG_PUSH
! 4995: fprintf(stderr, "HPP: entering CONTENT\n");
! 4996: #endif
! 4997: break;
! 4998: }
! 4999:
! 5000: if (CUR == '>') {
! 5001: NEXT;
! 5002: } else {
! 5003: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 5004: ctxt->sax->error(ctxt->userData,
! 5005: "Couldn't find end of Start Tag %s\n",
! 5006: name);
! 5007: ctxt->wellFormed = 0;
! 5008:
! 5009: /*
! 5010: * end of parsing of this node.
! 5011: */
! 5012: if (!xmlStrcmp(name, ctxt->name)) {
! 5013: nodePop(ctxt);
! 5014: oldname = sgmlnamePop(ctxt);
! 5015: #ifdef DEBUG
! 5016: fprintf(stderr,
! 5017: "End of start tag problem: popping out %s\n", oldname);
! 5018: #endif
! 5019: if (oldname != NULL)
! 5020: xmlFree(oldname);
! 5021: }
! 5022:
! 5023: ctxt->instate = XML_PARSER_CONTENT;
! 5024: #ifdef DEBUG_PUSH
! 5025: fprintf(stderr, "HPP: entering CONTENT\n");
! 5026: #endif
! 5027: break;
! 5028: }
! 5029:
! 5030: /*
! 5031: * Check for an Empty Element from DTD definition
! 5032: */
! 5033: if ((info != NULL) && (info->empty)) {
! 5034: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
! 5035: ctxt->sax->endElement(ctxt->userData, name);
! 5036: oldname = sgmlnamePop(ctxt);
! 5037: #ifdef DEBUG
! 5038: fprintf(stderr,"End of empty tag %s : popping out %s\n", name, oldname);
! 5039: #endif
! 5040: if (oldname != NULL)
! 5041: xmlFree(oldname);
! 5042: }
! 5043: ctxt->instate = XML_PARSER_CONTENT;
! 5044: #ifdef DEBUG_PUSH
! 5045: fprintf(stderr, "HPP: entering CONTENT\n");
! 5046: #endif
! 5047: break;
! 5048: }
! 5049: case XML_PARSER_CONTENT: {
! 5050: long cons;
! 5051: /*
! 5052: * Handle preparsed entities and charRef
! 5053: */
! 5054: if (ctxt->token != 0) {
! 5055: xmlChar chr[2] = { 0 , 0 } ;
! 5056:
! 5057: chr[0] = (xmlChar) ctxt->token;
! 5058: sgmlCheckParagraph(ctxt);
! 5059: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
! 5060: ctxt->sax->characters(ctxt->userData, chr, 1);
! 5061: ctxt->token = 0;
! 5062: ctxt->checkIndex = 0;
! 5063: }
! 5064: if ((avail == 1) && (terminate)) {
! 5065: cur = in->cur[0];
! 5066: if ((cur != '<') && (cur != '&')) {
! 5067: if (ctxt->sax != NULL) {
! 5068: if (IS_BLANK(cur)) {
! 5069: if (ctxt->sax->ignorableWhitespace != NULL)
! 5070: ctxt->sax->ignorableWhitespace(
! 5071: ctxt->userData, &cur, 1);
! 5072: } else {
! 5073: sgmlCheckParagraph(ctxt);
! 5074: if (ctxt->sax->characters != NULL)
! 5075: ctxt->sax->characters(
! 5076: ctxt->userData, &cur, 1);
! 5077: }
! 5078: }
! 5079: ctxt->token = 0;
! 5080: ctxt->checkIndex = 0;
! 5081: NEXT;
! 5082: }
! 5083: break;
! 5084: }
! 5085: if (avail < 2)
! 5086: goto done;
! 5087: cur = in->cur[0];
! 5088: next = in->cur[1];
! 5089: cons = ctxt->nbChars;
! 5090: /*
! 5091: * Sometimes DOCTYPE arrives in the middle of the document
! 5092: */
! 5093: if ((cur == '<') && (next == '!') &&
! 5094: (UPP(2) == 'D') && (UPP(3) == 'O') &&
! 5095: (UPP(4) == 'C') && (UPP(5) == 'T') &&
! 5096: (UPP(6) == 'Y') && (UPP(7) == 'P') &&
! 5097: (UPP(8) == 'E')) {
! 5098: if ((!terminate) &&
! 5099: (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
! 5100: goto done;
! 5101: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 5102: ctxt->sax->error(ctxt->userData,
! 5103: "Misplaced DOCTYPE declaration\n");
! 5104: ctxt->wellFormed = 0;
! 5105: sgmlParseDocTypeDecl(ctxt);
! 5106: } else if ((cur == '<') && (next == '!') &&
! 5107: (in->cur[2] == '-') && (in->cur[3] == '-')) {
! 5108: if ((!terminate) &&
! 5109: (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
! 5110: goto done;
! 5111: #ifdef DEBUG_PUSH
! 5112: fprintf(stderr, "HPP: Parsing Comment\n");
! 5113: #endif
! 5114: sgmlParseComment(ctxt);
! 5115: ctxt->instate = XML_PARSER_CONTENT;
! 5116: } else if ((cur == '<') && (next == '!') && (avail < 4)) {
! 5117: goto done;
! 5118: } else if ((cur == '<') && (next == '/')) {
! 5119: ctxt->instate = XML_PARSER_END_TAG;
! 5120: ctxt->checkIndex = 0;
! 5121: #ifdef DEBUG_PUSH
! 5122: fprintf(stderr, "HPP: entering END_TAG\n");
! 5123: #endif
! 5124: break;
! 5125: } else if (cur == '<') {
! 5126: ctxt->instate = XML_PARSER_START_TAG;
! 5127: ctxt->checkIndex = 0;
! 5128: #ifdef DEBUG_PUSH
! 5129: fprintf(stderr, "HPP: entering START_TAG\n");
! 5130: #endif
! 5131: break;
! 5132: } else if (cur == '&') {
! 5133: if ((!terminate) &&
! 5134: (sgmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
! 5135: goto done;
! 5136: #ifdef DEBUG_PUSH
! 5137: fprintf(stderr, "HPP: Parsing Reference\n");
! 5138: #endif
! 5139: /* TODO: check generation of subtrees if noent !!! */
! 5140: sgmlParseReference(ctxt);
! 5141: } else {
! 5142: /* TODO Avoid the extra copy, handle directly !!!!!! */
! 5143: /*
! 5144: * Goal of the following test is :
! 5145: * - minimize calls to the SAX 'character' callback
! 5146: * when they are mergeable
! 5147: */
! 5148: if ((ctxt->inputNr == 1) &&
! 5149: (avail < SGML_PARSER_BIG_BUFFER_SIZE)) {
! 5150: if ((!terminate) &&
! 5151: (sgmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
! 5152: goto done;
! 5153: }
! 5154: ctxt->checkIndex = 0;
! 5155: #ifdef DEBUG_PUSH
! 5156: fprintf(stderr, "HPP: Parsing char data\n");
! 5157: #endif
! 5158: sgmlParseCharData(ctxt, 0);
! 5159: }
! 5160: if (cons == ctxt->nbChars) {
! 5161: if (ctxt->node != NULL) {
! 5162: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 5163: ctxt->sax->error(ctxt->userData,
! 5164: "detected an error in element content\n");
! 5165: ctxt->wellFormed = 0;
! 5166: NEXT;
! 5167: }
! 5168: break;
! 5169: }
! 5170:
! 5171: break;
! 5172: }
! 5173: case XML_PARSER_END_TAG:
! 5174: if (avail < 2)
! 5175: goto done;
! 5176: if ((!terminate) &&
! 5177: (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
! 5178: goto done;
! 5179: sgmlParseEndTag(ctxt);
! 5180: if (ctxt->nameNr == 0) {
! 5181: ctxt->instate = XML_PARSER_EPILOG;
! 5182: } else {
! 5183: ctxt->instate = XML_PARSER_CONTENT;
! 5184: }
! 5185: ctxt->checkIndex = 0;
! 5186: #ifdef DEBUG_PUSH
! 5187: fprintf(stderr, "HPP: entering CONTENT\n");
! 5188: #endif
! 5189: break;
! 5190: case XML_PARSER_CDATA_SECTION:
! 5191: fprintf(stderr, "HPP: internal error, state == CDATA\n");
! 5192: ctxt->instate = XML_PARSER_CONTENT;
! 5193: ctxt->checkIndex = 0;
! 5194: #ifdef DEBUG_PUSH
! 5195: fprintf(stderr, "HPP: entering CONTENT\n");
! 5196: #endif
! 5197: break;
! 5198: case XML_PARSER_DTD:
! 5199: fprintf(stderr, "HPP: internal error, state == DTD\n");
! 5200: ctxt->instate = XML_PARSER_CONTENT;
! 5201: ctxt->checkIndex = 0;
! 5202: #ifdef DEBUG_PUSH
! 5203: fprintf(stderr, "HPP: entering CONTENT\n");
! 5204: #endif
! 5205: break;
! 5206: case XML_PARSER_COMMENT:
! 5207: fprintf(stderr, "HPP: internal error, state == COMMENT\n");
! 5208: ctxt->instate = XML_PARSER_CONTENT;
! 5209: ctxt->checkIndex = 0;
! 5210: #ifdef DEBUG_PUSH
! 5211: fprintf(stderr, "HPP: entering CONTENT\n");
! 5212: #endif
! 5213: break;
! 5214: case XML_PARSER_PI:
! 5215: fprintf(stderr, "HPP: internal error, state == PI\n");
! 5216: ctxt->instate = XML_PARSER_CONTENT;
! 5217: ctxt->checkIndex = 0;
! 5218: #ifdef DEBUG_PUSH
! 5219: fprintf(stderr, "HPP: entering CONTENT\n");
! 5220: #endif
! 5221: break;
! 5222: case XML_PARSER_ENTITY_DECL:
! 5223: fprintf(stderr, "HPP: internal error, state == ENTITY_DECL\n");
! 5224: ctxt->instate = XML_PARSER_CONTENT;
! 5225: ctxt->checkIndex = 0;
! 5226: #ifdef DEBUG_PUSH
! 5227: fprintf(stderr, "HPP: entering CONTENT\n");
! 5228: #endif
! 5229: break;
! 5230: case XML_PARSER_ENTITY_VALUE:
! 5231: fprintf(stderr, "HPP: internal error, state == ENTITY_VALUE\n");
! 5232: ctxt->instate = XML_PARSER_CONTENT;
! 5233: ctxt->checkIndex = 0;
! 5234: #ifdef DEBUG_PUSH
! 5235: fprintf(stderr, "HPP: entering DTD\n");
! 5236: #endif
! 5237: break;
! 5238: case XML_PARSER_ATTRIBUTE_VALUE:
! 5239: fprintf(stderr, "HPP: internal error, state == ATTRIBUTE_VALUE\n");
! 5240: ctxt->instate = XML_PARSER_START_TAG;
! 5241: ctxt->checkIndex = 0;
! 5242: #ifdef DEBUG_PUSH
! 5243: fprintf(stderr, "HPP: entering START_TAG\n");
! 5244: #endif
! 5245: break;
! 5246: case XML_PARSER_SYSTEM_LITERAL:
! 5247: fprintf(stderr, "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n");
! 5248: ctxt->instate = XML_PARSER_CONTENT;
! 5249: ctxt->checkIndex = 0;
! 5250: #ifdef DEBUG_PUSH
! 5251: fprintf(stderr, "HPP: entering CONTENT\n");
! 5252: #endif
! 5253: break;
! 5254: }
! 5255: }
! 5256: done:
! 5257: if ((avail == 0) && (terminate)) {
! 5258: sgmlAutoClose(ctxt, NULL);
! 5259: if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
! 5260: /*
! 5261: * SAX: end of the document processing.
! 5262: */
! 5263: ctxt->instate = XML_PARSER_EOF;
! 5264: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
! 5265: ctxt->sax->endDocument(ctxt->userData);
! 5266: }
! 5267: }
! 5268: if ((ctxt->myDoc != NULL) &&
! 5269: ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
! 5270: (ctxt->instate == XML_PARSER_EPILOG))) {
! 5271: xmlDtdPtr dtd;
! 5272: dtd = xmlGetIntSubset(ctxt->myDoc);
! 5273: if (dtd == NULL)
! 5274: ctxt->myDoc->intSubset =
! 5275: xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
! 5276: BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
! 5277: BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
! 5278: }
! 5279: #ifdef DEBUG_PUSH
! 5280: fprintf(stderr, "HPP: done %d\n", ret);
! 5281: #endif
! 5282: return(ret);
! 5283: }
! 5284:
! 5285: /**
! 5286: * sgmlParseTry:
! 5287: * @ctxt: an SGML parser context
! 5288: *
! 5289: * Try to progress on parsing
! 5290: *
! 5291: * Returns zero if no parsing was possible
! 5292: */
! 5293: int
! 5294: sgmlParseTry(sgmlParserCtxtPtr ctxt) {
! 5295: return(sgmlParseTryOrFinish(ctxt, 0));
! 5296: }
! 5297:
! 5298: /**
! 5299: * sgmlParseChunk:
! 5300: * @ctxt: an XML parser context
! 5301: * @chunk: an char array
! 5302: * @size: the size in byte of the chunk
! 5303: * @terminate: last chunk indicator
! 5304: *
! 5305: * Parse a Chunk of memory
! 5306: *
! 5307: * Returns zero if no error, the xmlParserErrors otherwise.
! 5308: */
! 5309: int
! 5310: sgmlParseChunk(sgmlParserCtxtPtr ctxt, const char *chunk, int size,
! 5311: int terminate) {
! 5312: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
! 5313: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
! 5314: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
! 5315: int cur = ctxt->input->cur - ctxt->input->base;
! 5316:
! 5317: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
! 5318: ctxt->input->base = ctxt->input->buf->buffer->content + base;
! 5319: ctxt->input->cur = ctxt->input->base + cur;
! 5320: #ifdef DEBUG_PUSH
! 5321: fprintf(stderr, "HPP: pushed %d\n", size);
! 5322: #endif
! 5323:
! 5324: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
! 5325: sgmlParseTryOrFinish(ctxt, terminate);
! 5326: } else if (ctxt->instate != XML_PARSER_EOF) {
! 5327: xmlParserInputBufferPush(ctxt->input->buf, 0, "");
! 5328: sgmlParseTryOrFinish(ctxt, terminate);
! 5329: }
! 5330: if (terminate) {
! 5331: if ((ctxt->instate != XML_PARSER_EOF) &&
! 5332: (ctxt->instate != XML_PARSER_EPILOG) &&
! 5333: (ctxt->instate != XML_PARSER_MISC)) {
! 5334: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 5335: ctxt->sax->error(ctxt->userData,
! 5336: "Extra content at the end of the document\n");
! 5337: ctxt->wellFormed = 0;
! 5338: ctxt->errNo = XML_ERR_DOCUMENT_END;
! 5339: }
! 5340: if (ctxt->instate != XML_PARSER_EOF) {
! 5341: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
! 5342: ctxt->sax->endDocument(ctxt->userData);
! 5343: }
! 5344: ctxt->instate = XML_PARSER_EOF;
! 5345: }
! 5346: return((xmlParserErrors) ctxt->errNo);
! 5347: }
! 5348:
! 5349: /************************************************************************
! 5350: * *
! 5351: * User entry points *
! 5352: * *
! 5353: ************************************************************************/
! 5354:
! 5355: /**
! 5356: * sgmlCreatePushParserCtxt :
! 5357: * @sax: a SAX handler
! 5358: * @user_data: The user data returned on SAX callbacks
! 5359: * @chunk: a pointer to an array of chars
! 5360: * @size: number of chars in the array
! 5361: * @filename: an optional file name or URI
! 5362: * @enc: an optional encoding
! 5363: *
! 5364: * Create a parser context for using the SGML parser in push mode
! 5365: * To allow content encoding detection, @size should be >= 4
! 5366: * The value of @filename is used for fetching external entities
! 5367: * and error/warning reports.
! 5368: *
! 5369: * Returns the new parser context or NULL
! 5370: */
! 5371: sgmlParserCtxtPtr
! 5372: sgmlCreatePushParserCtxt(sgmlSAXHandlerPtr sax, void *user_data,
! 5373: const char *chunk, int size, const char *filename,
! 5374: xmlCharEncoding enc) {
! 5375: sgmlParserCtxtPtr ctxt;
! 5376: sgmlParserInputPtr inputStream;
! 5377: xmlParserInputBufferPtr buf;
! 5378:
! 5379: buf = xmlAllocParserInputBuffer(enc);
! 5380: if (buf == NULL) return(NULL);
! 5381:
! 5382: ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
! 5383: if (ctxt == NULL) {
! 5384: xmlFree(buf);
! 5385: return(NULL);
! 5386: }
! 5387: memset(ctxt, 0, sizeof(sgmlParserCtxt));
! 5388: sgmlInitParserCtxt(ctxt);
! 5389: if (sax != NULL) {
! 5390: if (ctxt->sax != &sgmlDefaultSAXHandler)
! 5391: xmlFree(ctxt->sax);
! 5392: ctxt->sax = (sgmlSAXHandlerPtr) xmlMalloc(sizeof(sgmlSAXHandler));
! 5393: if (ctxt->sax == NULL) {
! 5394: xmlFree(buf);
! 5395: xmlFree(ctxt);
! 5396: return(NULL);
! 5397: }
! 5398: memcpy(ctxt->sax, sax, sizeof(sgmlSAXHandler));
! 5399: if (user_data != NULL)
! 5400: ctxt->userData = user_data;
! 5401: }
! 5402: if (filename == NULL) {
! 5403: ctxt->directory = NULL;
! 5404: } else {
! 5405: ctxt->directory = xmlParserGetDirectory(filename);
! 5406: }
! 5407:
! 5408: inputStream = sgmlNewInputStream(ctxt);
! 5409: if (inputStream == NULL) {
! 5410: xmlFreeParserCtxt(ctxt);
! 5411: return(NULL);
! 5412: }
! 5413:
! 5414: if (filename == NULL)
! 5415: inputStream->filename = NULL;
! 5416: else
! 5417: inputStream->filename = xmlMemStrdup(filename);
! 5418: inputStream->buf = buf;
! 5419: inputStream->base = inputStream->buf->buffer->content;
! 5420: inputStream->cur = inputStream->buf->buffer->content;
! 5421:
! 5422: inputPush(ctxt, inputStream);
! 5423:
! 5424: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
! 5425: (ctxt->input->buf != NULL)) {
! 5426: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
! 5427: #ifdef DEBUG_PUSH
! 5428: fprintf(stderr, "HPP: pushed %d\n", size);
! 5429: #endif
! 5430: }
! 5431:
! 5432: return(ctxt);
! 5433: }
! 5434:
! 5435: /**
! 5436: * sgmlSAXParseDoc :
! 5437: * @cur: a pointer to an array of xmlChar
! 5438: * @encoding: a free form C string describing the SGML document encoding, or NULL
! 5439: * @sax: the SAX handler block
! 5440: * @userData: if using SAX, this pointer will be provided on callbacks.
! 5441: *
! 5442: * parse an SGML in-memory document and build a tree.
! 5443: * It use the given SAX function block to handle the parsing callback.
! 5444: * If sax is NULL, fallback to the default DOM tree building routines.
! 5445: *
! 5446: * Returns the resulting document tree
! 5447: */
! 5448:
! 5449: sgmlDocPtr
! 5450: sgmlSAXParseDoc(xmlChar *cur, const char *encoding, sgmlSAXHandlerPtr sax, void *userData) {
! 5451: sgmlDocPtr ret;
! 5452: sgmlParserCtxtPtr ctxt;
! 5453:
! 5454: if (cur == NULL) return(NULL);
! 5455:
! 5456:
! 5457: ctxt = sgmlCreateDocParserCtxt(cur, encoding);
! 5458: if (ctxt == NULL) return(NULL);
! 5459: if (sax != NULL) {
! 5460: ctxt->sax = sax;
! 5461: ctxt->userData = userData;
! 5462: }
! 5463:
! 5464: sgmlParseDocument(ctxt);
! 5465: ret = ctxt->myDoc;
! 5466: if (sax != NULL) {
! 5467: ctxt->sax = NULL;
! 5468: ctxt->userData = NULL;
! 5469: }
! 5470: sgmlFreeParserCtxt(ctxt);
! 5471:
! 5472: return(ret);
! 5473: }
! 5474:
! 5475: /**
! 5476: * sgmlParseDoc :
! 5477: * @cur: a pointer to an array of xmlChar
! 5478: * @encoding: a free form C string describing the SGML document encoding, or NULL
! 5479: *
! 5480: * parse an SGML in-memory document and build a tree.
! 5481: *
! 5482: * Returns the resulting document tree
! 5483: */
! 5484:
! 5485: sgmlDocPtr
! 5486: sgmlParseDoc(xmlChar *cur, const char *encoding) {
! 5487: return(sgmlSAXParseDoc(cur, encoding, NULL, NULL));
! 5488: }
! 5489:
! 5490:
! 5491: /**
! 5492: * sgmlCreateFileParserCtxt :
! 5493: * @filename: the filename
! 5494: * @encoding: a free form C string describing the SGML document encoding, or NULL
! 5495: *
! 5496: * Create a parser context for a file content.
! 5497: * Automatic support for ZLIB/Compress compressed document is provided
! 5498: * by default if found at compile-time.
! 5499: *
! 5500: * Returns the new parser context or NULL
! 5501: */
! 5502: sgmlParserCtxtPtr
! 5503: sgmlCreateFileParserCtxt(const char *filename, const char *encoding)
! 5504: {
! 5505: sgmlParserCtxtPtr ctxt;
! 5506: sgmlParserInputPtr inputStream;
! 5507: xmlParserInputBufferPtr buf;
! 5508: /* sgmlCharEncoding enc; */
! 5509:
! 5510: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
! 5511: if (buf == NULL) return(NULL);
! 5512:
! 5513: ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
! 5514: if (ctxt == NULL) {
! 5515: perror("malloc");
! 5516: return(NULL);
! 5517: }
! 5518: memset(ctxt, 0, sizeof(sgmlParserCtxt));
! 5519: sgmlInitParserCtxt(ctxt);
! 5520: inputStream = (sgmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
! 5521: if (inputStream == NULL) {
! 5522: perror("malloc");
! 5523: xmlFree(ctxt);
! 5524: return(NULL);
! 5525: }
! 5526: memset(inputStream, 0, sizeof(sgmlParserInput));
! 5527:
! 5528: inputStream->filename = xmlMemStrdup(filename);
! 5529: inputStream->line = 1;
! 5530: inputStream->col = 1;
! 5531: inputStream->buf = buf;
! 5532: inputStream->directory = NULL;
! 5533:
! 5534: inputStream->base = inputStream->buf->buffer->content;
! 5535: inputStream->cur = inputStream->buf->buffer->content;
! 5536: inputStream->free = NULL;
! 5537:
! 5538: inputPush(ctxt, inputStream);
! 5539: return(ctxt);
! 5540: }
! 5541:
! 5542: /**
! 5543: * sgmlSAXParseFile :
! 5544: * @filename: the filename
! 5545: * @encoding: a free form C string describing the SGML document encoding, or NULL
! 5546: * @sax: the SAX handler block
! 5547: * @userData: if using SAX, this pointer will be provided on callbacks.
! 5548: *
! 5549: * parse an SGML file and build a tree. Automatic support for ZLIB/Compress
! 5550: * compressed document is provided by default if found at compile-time.
! 5551: * It use the given SAX function block to handle the parsing callback.
! 5552: * If sax is NULL, fallback to the default DOM tree building routines.
! 5553: *
! 5554: * Returns the resulting document tree
! 5555: */
! 5556:
! 5557: sgmlDocPtr
! 5558: sgmlSAXParseFile(const char *filename, const char *encoding, sgmlSAXHandlerPtr sax,
! 5559: void *userData) {
! 5560: sgmlDocPtr ret;
! 5561: sgmlParserCtxtPtr ctxt;
! 5562: sgmlSAXHandlerPtr oldsax = NULL;
! 5563:
! 5564: ctxt = sgmlCreateFileParserCtxt(filename, encoding);
! 5565: if (ctxt == NULL) return(NULL);
! 5566: if (sax != NULL) {
! 5567: oldsax = ctxt->sax;
! 5568: ctxt->sax = sax;
! 5569: ctxt->userData = userData;
! 5570: }
! 5571:
! 5572: sgmlParseDocument(ctxt);
! 5573:
! 5574: ret = ctxt->myDoc;
! 5575: if (sax != NULL) {
! 5576: ctxt->sax = oldsax;
! 5577: ctxt->userData = NULL;
! 5578: }
! 5579: sgmlFreeParserCtxt(ctxt);
! 5580:
! 5581: return(ret);
! 5582: }
! 5583:
! 5584: /**
! 5585: * sgmlParseFile :
! 5586: * @filename: the filename
! 5587: * @encoding: a free form C string describing the SGML document encoding, or NULL
! 5588: *
! 5589: * parse an SGML file and build a tree. Automatic support for ZLIB/Compress
! 5590: * compressed document is provided by default if found at compile-time.
! 5591: *
! 5592: * Returns the resulting document tree
! 5593: */
! 5594:
! 5595: sgmlDocPtr
! 5596: sgmlParseFile(const char *filename, const char *encoding) {
! 5597: return(sgmlSAXParseFile(filename, encoding, NULL, NULL));
! 5598: }
! 5599:
! 5600: #endif /* LIBXML_SGML_ENABLED */
Webmaster