Annotation of XML/SGMLparser.c, revision 1.3

1.1       veillard    1: /*
                      2:  * SGMLparser.c : an attempt to parse Docbook documents
                      3:  *
                      4:  * See Copyright for the status of this software.
                      5:  *
                      6:  * Daniel.Veillard@w3.org
                      7:  */
                      8: 
                      9: #ifdef WIN32
                     10: #include "win32config.h"
                     11: #else
                     12: #include "config.h"
                     13: #endif
                     14: 
                     15: #include "xmlversion.h"
                     16: #ifdef LIBXML_SGML_ENABLED
                     17: 
                     18: #include <stdio.h>
                     19: #include <string.h>
                     20: #ifdef HAVE_CTYPE_H
                     21: #include <ctype.h>
                     22: #endif
                     23: #ifdef HAVE_STDLIB_H
                     24: #include <stdlib.h>
                     25: #endif
                     26: #ifdef HAVE_SYS_STAT_H
                     27: #include <sys/stat.h>
                     28: #endif
                     29: #ifdef HAVE_FCNTL_H
                     30: #include <fcntl.h>
                     31: #endif
                     32: #ifdef HAVE_UNISTD_H
                     33: #include <unistd.h>
                     34: #endif
                     35: #ifdef HAVE_ZLIB_H
                     36: #include <zlib.h>
                     37: #endif
                     38: 
                     39: #include <libxml/xmlmemory.h>
                     40: #include <libxml/tree.h>
                     41: #include <libxml/SGMLparser.h>
                     42: #include <libxml/entities.h>
                     43: #include <libxml/encoding.h>
                     44: #include <libxml/parser.h>
                     45: #include <libxml/valid.h>
                     46: #include <libxml/parserInternals.h>
                     47: #include <libxml/xmlIO.h>
                     48: #include <libxml/SAX.h>
1.3     ! veillard   49: #include <libxml/uri.h>
1.1       veillard   50: #include "xml-error.h"
                     51: 
                     52: #define SGML_MAX_NAMELEN 1000
                     53: #define INPUT_CHUNK     50
                     54: #define SGML_PARSER_BIG_BUFFER_SIZE 1000
                     55: #define SGML_PARSER_BUFFER_SIZE 100
                     56: 
                     57: /* #define DEBUG */
                     58: /* #define DEBUG_PUSH */
                     59: 
                     60: /************************************************************************
                     61:  *                                                                     *
                     62:  *             Parser stacks related functions and macros              *
                     63:  *                                                                     *
                     64:  ************************************************************************/
                     65: 
                     66: /*
                     67:  * Generic function for accessing stacks in the Parser Context
                     68:  */
                     69: 
                     70: #define PUSH_AND_POP(scope, type, name)                                        \
                     71: scope int sgml##name##Push(sgmlParserCtxtPtr ctxt, type value) {       \
                     72:     if (ctxt->name##Nr >= ctxt->name##Max) {                           \
                     73:        ctxt->name##Max *= 2;                                           \
                     74:         ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab,         \
                     75:                     ctxt->name##Max * sizeof(ctxt->name##Tab[0]));     \
                     76:         if (ctxt->name##Tab == NULL) {                                 \
                     77:            fprintf(stderr, "realloc failed !\n");                      \
                     78:            return(0);                                                  \
                     79:        }                                                               \
                     80:     }                                                                  \
                     81:     ctxt->name##Tab[ctxt->name##Nr] = value;                           \
                     82:     ctxt->name = value;                                                        \
                     83:     return(ctxt->name##Nr++);                                          \
                     84: }                                                                      \
                     85: scope type sgml##name##Pop(sgmlParserCtxtPtr ctxt) {                   \
                     86:     type ret;                                                          \
                     87:     if (ctxt->name##Nr < 0) return(0);                                 \
                     88:     ctxt->name##Nr--;                                                  \
                     89:     if (ctxt->name##Nr < 0) return(0);                                 \
                     90:     if (ctxt->name##Nr > 0)                                            \
                     91:        ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1];               \
                     92:     else                                                               \
                     93:         ctxt->name = NULL;                                             \
                     94:     ret = ctxt->name##Tab[ctxt->name##Nr];                             \
                     95:     ctxt->name##Tab[ctxt->name##Nr] = 0;                               \
                     96:     return(ret);                                                       \
                     97: }                                                                      \
                     98: 
                     99: PUSH_AND_POP(extern, xmlNodePtr, node)
                    100: PUSH_AND_POP(extern, xmlChar*, name)
                    101: 
                    102: /*
                    103:  * Macros for accessing the content. Those should be used only by the parser,
                    104:  * and not exported.
                    105:  *
                    106:  * Dirty macros, i.e. one need to make assumption on the context to use them
                    107:  *
                    108:  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
                    109:  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
                    110:  *           in ISO-Latin or UTF-8, and the current 16 bit value if compiled
                    111:  *           in UNICODE mode. This should be used internally by the parser
                    112:  *           only to compare to ASCII values otherwise it would break when
                    113:  *           running with UTF-8 encoding.
                    114:  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
                    115:  *           to compare on ASCII based substring.
                    116:  *   UPP(n)  returns the n'th next xmlChar converted to uppercase. Same as CUR
                    117:  *           it should be used only to compare on ASCII based substring.
                    118:  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
                    119:  *           strings within the parser.
                    120:  *
                    121:  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
                    122:  *
                    123:  *   CURRENT Returns the current char value, with the full decoding of
                    124:  *           UTF-8 if we are using this mode. It returns an int.
                    125:  *   NEXT    Skip to the next character, this does the proper decoding
                    126:  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
                    127:  *   COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
                    128:  */
                    129: 
                    130: #define UPPER (toupper(*ctxt->input->cur))
                    131: 
                    132: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
                    133: 
                    134: #define NXT(val) ctxt->input->cur[(val)]
                    135: 
                    136: #define UPP(val) (toupper(ctxt->input->cur[(val)]))
                    137: 
                    138: #define CUR_PTR ctxt->input->cur
                    139: 
                    140: #define SHRINK  xmlParserInputShrink(ctxt->input)
                    141: 
                    142: #define GROW  xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
                    143: 
                    144: #define CURRENT ((int) (*ctxt->input->cur))
                    145: 
                    146: #define SKIP_BLANKS sgmlSkipBlankChars(ctxt);
                    147: 
                    148: #if 0
                    149: #define CUR ((int) (*ctxt->input->cur))
                    150: #define NEXT sgmlNextChar(ctxt);
                    151: #else
                    152: /* Inported from XML */
                    153: 
                    154: /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
                    155: #define CUR ((int) (*ctxt->input->cur))
                    156: #define NEXT xmlNextChar(ctxt);ctxt->nbChars++;
                    157: 
                    158: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
                    159: #define NXT(val) ctxt->input->cur[(val)]
                    160: #define CUR_PTR ctxt->input->cur
                    161: 
                    162: 
                    163: #define NEXTL(l)                                                       \
                    164:     if (*(ctxt->input->cur) == '\n') {                                 \
                    165:        ctxt->input->line++; ctxt->input->col = 1;                      \
                    166:     } else ctxt->input->col++;                                         \
                    167:     ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++;
                    168:     
                    169: /************
                    170:     \
                    171:     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);    \
                    172:     if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
                    173:  ************/
                    174: 
                    175: #define CUR_CHAR(l) sgmlCurrentChar(ctxt, &l);
                    176: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
                    177: 
                    178: #define COPY_BUF(l,b,i,v)                                              \
                    179:     if (l == 1) b[i++] = (xmlChar) v;                                  \
                    180:     else i += xmlCopyChar(l,&b[i],v);
                    181: #endif
                    182: 
                    183: /**
                    184:  * sgmlCurrentChar:
                    185:  * @ctxt:  the SGML parser context
                    186:  * @len:  pointer to the length of the char read
                    187:  *
                    188:  * The current char value, if using UTF-8 this may actaully span multiple
                    189:  * bytes in the input buffer. Implement the end of line normalization:
                    190:  * 2.11 End-of-Line Handling
                    191:  * If the encoding is unspecified, in the case we find an ISO-Latin-1
                    192:  * char, then the encoding converter is plugged in automatically.
                    193:  *
                    194:  * Returns the current char value and its lenght
                    195:  */
                    196: 
                    197: int
                    198: sgmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
                    199:     if (ctxt->instate == XML_PARSER_EOF)
                    200:        return(0);
                    201: 
                    202:     if (ctxt->token != 0) {
                    203:        *len = 0;
                    204:        return(ctxt->token);
                    205:     }  
                    206:     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
                    207:        /*
                    208:         * We are supposed to handle UTF8, check it's valid
                    209:         * From rfc2044: encoding of the Unicode values on UTF-8:
                    210:         *
                    211:         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                    212:         * 0000 0000-0000 007F   0xxxxxxx
                    213:         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
                    214:         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
                    215:         *
                    216:         * Check for the 0x110000 limit too
                    217:         */
                    218:        const unsigned char *cur = ctxt->input->cur;
                    219:        unsigned char c;
                    220:        unsigned int val;
                    221: 
                    222:        c = *cur;
                    223:        if (c & 0x80) {
                    224:            if (cur[1] == 0)
                    225:                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    226:            if ((cur[1] & 0xc0) != 0x80)
                    227:                goto encoding_error;
                    228:            if ((c & 0xe0) == 0xe0) {
                    229: 
                    230:                if (cur[2] == 0)
                    231:                    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    232:                if ((cur[2] & 0xc0) != 0x80)
                    233:                    goto encoding_error;
                    234:                if ((c & 0xf0) == 0xf0) {
                    235:                    if (cur[3] == 0)
                    236:                        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    237:                    if (((c & 0xf8) != 0xf0) ||
                    238:                        ((cur[3] & 0xc0) != 0x80))
                    239:                        goto encoding_error;
                    240:                    /* 4-byte code */
                    241:                    *len = 4;
                    242:                    val = (cur[0] & 0x7) << 18;
                    243:                    val |= (cur[1] & 0x3f) << 12;
                    244:                    val |= (cur[2] & 0x3f) << 6;
                    245:                    val |= cur[3] & 0x3f;
                    246:                } else {
                    247:                  /* 3-byte code */
                    248:                    *len = 3;
                    249:                    val = (cur[0] & 0xf) << 12;
                    250:                    val |= (cur[1] & 0x3f) << 6;
                    251:                    val |= cur[2] & 0x3f;
                    252:                }
                    253:            } else {
                    254:              /* 2-byte code */
                    255:                *len = 2;
                    256:                val = (cur[0] & 0x1f) << 6;
                    257:                val |= cur[1] & 0x3f;
                    258:            }
                    259:            if (!IS_CHAR(val)) {
                    260:                if ((ctxt->sax != NULL) &&
                    261:                    (ctxt->sax->error != NULL))
                    262:                    ctxt->sax->error(ctxt->userData, 
                    263:                                     "Char 0x%X out of allowed range\n", val);
                    264:                ctxt->errNo = XML_ERR_INVALID_ENCODING;
                    265:                ctxt->wellFormed = 0;
                    266:                ctxt->disableSAX = 1;
                    267:            }    
                    268:            return(val);
                    269:        } else {
                    270:            /* 1-byte code */
                    271:            *len = 1;
                    272:            return((int) *ctxt->input->cur);
                    273:        }
                    274:     }
                    275:     /*
                    276:      * Assume it's a fixed lenght encoding (1) with
                    277:      * a compatibke encoding for the ASCII set, since
                    278:      * XML constructs only use < 128 chars
                    279:      */
                    280:     *len = 1;
                    281:     if ((int) *ctxt->input->cur < 0x80)
                    282:        return((int) *ctxt->input->cur);
                    283: 
                    284:     /*
                    285:      * Humm this is bad, do an automatic flow conversion
                    286:      */
                    287:     xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
                    288:     ctxt->charset = XML_CHAR_ENCODING_UTF8;
                    289:     return(xmlCurrentChar(ctxt, len));
                    290: 
                    291: encoding_error:
                    292:     /*
                    293:      * If we detect an UTF8 error that probably mean that the
                    294:      * input encoding didn't get properly advertized in the
                    295:      * declaration header. Report the error and switch the encoding
                    296:      * to ISO-Latin-1 (if you don't like this policy, just declare the
                    297:      * encoding !)
                    298:      */
                    299:     if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
                    300:        ctxt->sax->error(ctxt->userData, 
                    301:                         "Input is not proper UTF-8, indicate encoding !\n");
                    302:        ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
                    303:                        ctxt->input->cur[0], ctxt->input->cur[1],
                    304:                        ctxt->input->cur[2], ctxt->input->cur[3]);
                    305:     }
                    306:     ctxt->errNo = XML_ERR_INVALID_ENCODING;
                    307: 
                    308:     ctxt->charset = XML_CHAR_ENCODING_8859_1; 
                    309:     *len = 1;
                    310:     return((int) *ctxt->input->cur);
                    311: }
                    312: 
                    313: /**
                    314:  * sgmlNextChar:
                    315:  * @ctxt:  the SGML parser context
                    316:  *
                    317:  * Skip to the next char input char.
                    318:  */
                    319: 
                    320: void
                    321: sgmlNextChar(sgmlParserCtxtPtr ctxt) {
                    322:     if (ctxt->instate == XML_PARSER_EOF)
                    323:        return;
                    324:     if ((*ctxt->input->cur == 0) &&
                    325:         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
                    326:            xmlPopInput(ctxt);
                    327:     } else {
                    328:         if (*(ctxt->input->cur) == '\n') {
                    329:            ctxt->input->line++; ctxt->input->col = 1;
                    330:        } else ctxt->input->col++;
                    331:        ctxt->input->cur++;
                    332:        ctxt->nbChars++;
                    333:         if (*ctxt->input->cur == 0)
                    334:            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    335:     }
                    336: }
                    337: 
                    338: /**
                    339:  * sgmlSkipBlankChars:
                    340:  * @ctxt:  the SGML parser context
                    341:  *
                    342:  * skip all blanks character found at that point in the input streams.
                    343:  *
                    344:  * Returns the number of space chars skipped
                    345:  */
                    346: 
                    347: int
                    348: sgmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
                    349:     int res = 0;
                    350: 
                    351:     while (IS_BLANK(*(ctxt->input->cur))) {
                    352:        if ((*ctxt->input->cur == 0) &&
                    353:            (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
                    354:                xmlPopInput(ctxt);
                    355:        } else {
                    356:            if (*(ctxt->input->cur) == '\n') {
                    357:                ctxt->input->line++; ctxt->input->col = 1;
                    358:            } else ctxt->input->col++;
                    359:            ctxt->input->cur++;
                    360:            ctxt->nbChars++;
                    361:            if (*ctxt->input->cur == 0)
                    362:                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    363:        }
                    364:        res++;
                    365:     }
                    366:     return(res);
                    367: }
                    368: 
                    369: 
                    370: 
                    371: /************************************************************************
                    372:  *                                                                     *
                    373:  *             The list of SGML elements and their properties          *
                    374:  *                                                                     *
                    375:  ************************************************************************/
                    376: 
                    377: /*
                    378:  *  Start Tag: 1 means the start tag can be ommited
                    379:  *  End Tag:   1 means the end tag can be ommited
                    380:  *             2 means it's forbidden (empty elements)
                    381:  *  Depr:      this element is deprecated
                    382:  *  DTD:       1 means that this element is valid only in the Loose DTD
                    383:  *             2 means that this element is valid only in the Frameset DTD
                    384:  *
                    385:  * Name,Start Tag,End Tag,  Empty,  Depr.,    DTD, Description
                    386:  */
                    387: sgmlElemDesc  docbookElementTable[] = {
                    388: { "abbrev",    0,      0,      0,      3,      0, "" }, /* word */
                    389: { "abstract",  0,      0,      0,      9,      0, "" }, /* title */
                    390: { "accel",     0,      0,      0,      7,      0, "" }, /* smallcptr */
                    391: { "ackno",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    392: { "acronym",   0,      0,      0,      3,      0, "" }, /* word */
                    393: { "action",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    394: { "address",   0,      0,      0,      1,      0, "" },
                    395: { "affiliation",0,     0,      0,      9,      0, "" }, /* shortaffil */
                    396: { "alt",       0,      0,      0,      1,      0, "" },
                    397: { "anchor",    0,      2,      1,      0,      0, "" },
                    398: { "answer",    0,      0,      0,      9,      0, "" }, /* label */
                    399: { "appendix",  0,      0,      0,      9,      0, "" }, /* appendixinfo */
                    400: { "appendixinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    401: { "application",0,     0,      0,      2,      0, "" }, /* para */
                    402: { "area",      0,      2,      1,      0,      0, "" },
                    403: { "areaset",   0,      0,      0,      9,      0, "" }, /* area */
                    404: { "areaspec",  0,      0,      0,      9,      0, "" }, /* area */
                    405: { "arg",       0,      0,      0,      1,      0, "" },
                    406: { "article",   0,      0,      0,      9,      0, "" }, /* div.title.content */
                    407: { "articleinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    408: { "artpagenums",0,     0,      0,      4,      0, "" }, /* docinfo */
                    409: { "attribution",0,     0,      0,      2,      0, "" }, /* para */
                    410: { "audiodata", 0,      2,      1,      0,      0, "" },
                    411: { "audioobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    412: { "authorblurb",0,     0,      0,      9,      0, "" }, /* title */
                    413: { "authorgroup",0,     0,      0,      9,      0, "" }, /* author */
                    414: { "authorinitials",0,  0,      0,      4,      0, "" }, /* docinfo */
                    415: { "author",    0,      0,      0,      9,      0, "" }, /* person.ident.mix */
                    416: { "beginpage", 0,      2,      1,      0,      0, "" },
                    417: { "bibliodiv", 0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    418: { "biblioentry",0,     0,      0,      9,      0, "" }, /* articleinfo */
                    419: { "bibliography",0,    0,      0,      9,      0, "" }, /* bibliographyinfo */
                    420: { "bibliographyinfo",0,        0,      0,      9,      0, "" }, /* graphic */
                    421: { "bibliomisc",        0,      0,      0,      2,      0, "" }, /* para */
                    422: { "bibliomixed",0,     0,      0,      1,      0, "" }, /* %bibliocomponent.mix, bibliomset) */
                    423: { "bibliomset",        0,      0,      0,      1,      0, "" }, /* %bibliocomponent.mix; | bibliomset) */
                    424: { "biblioset", 0,      0,      0,      9,      0, "" }, /* bibliocomponent.mix */
                    425: { "blockquote",        0,      0,      0,      9,      0, "" }, /* title */
                    426: { "book",      0,      0,      0,      9,      0, "" }, /* div.title.content */
                    427: { "bookinfo",  0,      0,      0,      9,      0, "" }, /* graphic */
                    428: { "bridgehead",        0,      0,      0,      8,      0, "" }, /* title */
                    429: { "callout",   0,      0,      0,      9,      0, "" }, /* component.mix */
                    430: { "calloutlist",0,     0,      0,      9,      0, "" }, /* formalobject.title.content */
                    431: { "caption",   0,      0,      0,      9,      0, "" }, /* textobject.mix */
                    432: { "caution",   0,      0,      0,      9,      0, "" }, /* title */
                    433: { "chapter",   0,      0,      0,      9,      0, "" }, /* chapterinfo */
                    434: { "chapterinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    435: { "citation",  0,      0,      0,      2,      0, "" }, /* para */
                    436: { "citerefentry",0,    0,      0,      9,      0, "" }, /* refentrytitle */
                    437: { "citetitle", 0,      0,      0,      2,      0, "" }, /* para */
                    438: { "city",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    439: { "classname", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    440: { "classsynopsisinfo",0,0,     0,      9,      0, "" }, /* cptr */
                    441: { "classsynopsis",0,   0,      0,      9,      0, "" }, /* ooclass */
                    442: { "cmdsynopsis",0,     0,      0,      9,      0, "" }, /* command */
                    443: { "co",                0,      2,      1,      0,      0, "" },
                    444: { "collab",    0,      0,      0,      9,      0, "" }, /* collabname */
                    445: { "collabname",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    446: { "colophon",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    447: { "colspec",   0,      2,      1,      0,      0, "" },
                    448: { "colspec",   0,      2,      1,      0,      0, "" },
                    449: { "command",   0,      0,      0,      9,      0, "" }, /* cptr */
                    450: { "computeroutput",0,  0,      0,      9,      0, "" }, /* cptr */
                    451: { "confdates", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    452: { "confgroup", 0,      0,      0,      9,      0, "" }, /* confdates */
                    453: { "confnum",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    454: { "confsponsor",0,     0,      0,      4,      0, "" }, /* docinfo */
                    455: { "conftitle", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    456: { "constant",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    457: { "constructorsynopsis",0,0,   0,      9,      0, "" }, /* modifier */
                    458: { "contractnum",0,     0,      0,      4,      0, "" }, /* docinfo */
                    459: { "contractsponsor",0, 0,      0,      4,      0, "" }, /* docinfo */
                    460: { "contrib",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    461: { "copyright", 0,      0,      0,      9,      0, "" }, /* year */
                    462: { "corpauthor",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    463: { "corpname",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    464: { "country",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    465: { "database",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    466: { "date",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    467: { "dedication",        0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    468: { "destructorsynopsis",0,0,    0,      9,      0, "" }, /* modifier */
                    469: { "edition",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    470: { "editor",    0,      0,      0,      9,      0, "" }, /* person.ident.mix */
                    471: { "email",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    472: { "emphasis",  0,      0,      0,      2,      0, "" }, /* para */
                    473: { "entry",     0,      0,      0,      9,      0, "" }, /* tbl.entry.mdl */
                    474: { "entrytbl",  0,      0,      0,      9,      0, "" }, /* tbl.entrytbl.mdl */
                    475: { "envar",     0,      0,      0,      7,      0, "" }, /* smallcptr */
                    476: { "epigraph",  0,      0,      0,      9,      0, "" }, /* attribution */
                    477: { "equation",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    478: { "errorcode", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    479: { "errorname", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    480: { "errortype", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    481: { "example",   0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    482: { "exceptionname",0,   0,      0,      7,      0, "" }, /* smallcptr */
                    483: { "fax",       0,      0,      0,      4,      0, "" }, /* docinfo */
                    484: { "fieldsynopsis",     0,      0,      0,      9,      0, "" }, /* modifier */
                    485: { "figure",    0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    486: { "filename",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    487: { "firstname", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    488: { "firstterm", 0,      0,      0,      3,      0, "" }, /* word */
                    489: { "footnote",  0,      0,      0,      9,      0, "" }, /* footnote.mix */
                    490: { "footnoteref",0,     2,      1,      0,      0, "" },
                    491: { "foreignphrase",0,   0,      0,      2,      0, "" }, /* para */
                    492: { "formalpara",        0,      0,      0,      9,      0, "" }, /* title */
                    493: { "funcdef",   0,      0,      0,      1,      0, "" },
                    494: { "funcparams",        0,      0,      0,      9,      0, "" }, /* cptr */
                    495: { "funcprototype",0,   0,      0,      9,      0, "" }, /* funcdef */
                    496: { "funcsynopsis",0,    0,      0,      9,      0, "" }, /* funcsynopsisinfo */
                    497: { "funcsynopsisinfo",  0,      0,      0,      9,      0, "" }, /* cptr */
                    498: { "function",  0,      0,      0,      9,      0, "" }, /* cptr */
                    499: { "glossary",  0,      0,      0,      9,      0, "" }, /* glossaryinfo */
                    500: { "glossaryinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    501: { "glossdef",  0,      0,      0,      9,      0, "" }, /* glossdef.mix */
                    502: { "glossdiv",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    503: { "glossentry",        0,      0,      0,      9,      0, "" }, /* glossterm */
                    504: { "glosslist", 0,      0,      0,      9,      0, "" }, /* glossentry */
                    505: { "glossseealso",0,    0,      0,      2,      0, "" }, /* para */
                    506: { "glosssee",  0,      0,      0,      2,      0, "" }, /* para */
                    507: { "glossterm", 0,      0,      0,      2,      0, "" }, /* para */
                    508: { "graphic",   0,      2,      1,      0,      0, "" },
                    509: { "graphicco", 0,      0,      0,      9,      0, "" }, /* areaspec */
                    510: { "group",     0,      0,      0,      9,      0, "" }, /* arg */
                    511: { "guibutton", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    512: { "guiicon",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    513: { "guilabel",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    514: { "guimenuitem",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    515: { "guimenu",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    516: { "guisubmenu",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    517: { "hardware",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    518: { "highlights",        0,      0,      0,      9,      0, "" }, /* highlights.mix */
                    519: { "holder",    0,      0,      0,      4,      0, "" }, /* docinfo */
                    520: { "honorific", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    521: { "imagedata", 0,      2,      1,      0,      0, "" },
                    522: { "imageobjectco",0,   0,      0,      9,      0, "" }, /* areaspec */
                    523: { "imageobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    524: { "important", 0,      0,      0,      9,      0, "" }, /* title */
                    525: { "indexdiv",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    526: { "indexentry",        0,      0,      0,      9,      0, "" }, /* primaryie */
                    527: { "index",     0,      0,      0,      9,      0, "" }, /* indexinfo */
                    528: { "indexinfo", 0,      0,      0,      9,      0, "" }, /* graphic */
                    529: { "indexterm", 0,      0,      0,      9,      0, "" }, /* primary */
                    530: { "informalequation",0,        0,      0,      9,      0, "" }, /* equation.content */
                    531: { "informalexample",0, 0,      0,      9,      0, "" }, /* example.mix */
                    532: { "informalfigure",0,  0,      0,      9,      0, "" }, /* figure.mix */
                    533: { "informaltable",0,   0,      0,      9,      0, "" }, /* graphic */
                    534: { "initializer",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    535: { "inlineequation",0,  0,      0,      9,      0, "" }, /* inlineequation.content */
                    536: { "inlinegraphic",0,   2,      1,      0,      0, "" },
                    537: { "inlinemediaobject",0,0,     0,      9,      0, "" }, /* objectinfo */
                    538: { "interfacename",0,   0,      0,      7,      0, "" }, /* smallcptr */
                    539: { "interface", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    540: { "invpartnumber",0,   0,      0,      4,      0, "" }, /* docinfo */
                    541: { "isbn",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    542: { "issn",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    543: { "issuenum",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    544: { "itemizedlist",0,    0,      0,      9,      0, "" }, /* formalobject.title.content */
                    545: { "itermset",  0,      0,      0,      9,      0, "" }, /* indexterm */
                    546: { "jobtitle",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    547: { "keycap",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    548: { "keycode",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    549: { "keycombo",  0,      0,      0,      9,      0, "" }, /* keycap */
                    550: { "keysym",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    551: { "keyword",   0,      0,      0,      1,      0, "" },
                    552: { "keywordset",        0,      0,      0,      9,      0, "" }, /* keyword */
                    553: { "label",     0,      0,      0,      3,      0, "" }, /* word */
                    554: { "legalnotice",0,     0,      0,      9,      0, "" }, /* title */
                    555: { "lineage",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    556: { "lineannotation",0,  0,      0,      2,      0, "" }, /* para */
                    557: { "link",      0,      0,      0,      2,      0, "" }, /* para */
                    558: { "listitem",  0,      0,      0,      9,      0, "" }, /* component.mix */
                    559: { "literal",   0,      0,      0,      9,      0, "" }, /* cptr */
                    560: { "literallayout",0,   0,      0,      2,      0, "" }, /* para */
                    561: { "lot",       0,      0,      0,      9,      0, "" }, /* bookcomponent.title.content */
                    562: { "lotentry",  0,      0,      0,      2,      0, "" }, /* para */
                    563: { "manvolnum", 0,      0,      0,      3,      0, "" }, /* word */
                    564: { "markup",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    565: { "medialabel",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    566: { "mediaobjectco",0,   0,      0,      9,      0, "" }, /* objectinfo */
                    567: { "mediaobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    568: { "member",    0,      0,      0,      2,      0, "" }, /* para */
                    569: { "menuchoice",        0,      0,      0,      9,      0, "" }, /* shortcut */
                    570: { "methodname",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    571: { "methodparam",0,     0,      0,      9,      0, "" }, /* modifier */
                    572: { "methodsynopsis",0,  0,      0,      9,      0, "" }, /* modifier */
                    573: { "modespec",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    574: { "modifier",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    575: { "mousebutton",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    576: { "msgaud",    0,      0,      0,      2,      0, "" }, /* para */
                    577: { "msgentry",  0,      0,      0,      9,      0, "" }, /* msg */
                    578: { "msgexplan", 0,      0,      0,      9,      0, "" }, /* title */
                    579: { "msginfo",   0,      0,      0,      9,      0, "" }, /* msglevel */
                    580: { "msglevel",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    581: { "msgmain",   0,      0,      0,      9,      0, "" }, /* title */
                    582: { "msgorig",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    583: { "msgrel",    0,      0,      0,      9,      0, "" }, /* title */
                    584: { "msgset",    0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    585: { "msgsub",    0,      0,      0,      9,      0, "" }, /* title */
                    586: { "msgtext",   0,      0,      0,      9,      0, "" }, /* component.mix */
                    587: { "msg",       0,      0,      0,      9,      0, "" }, /* title */
                    588: { "note",      0,      0,      0,      9,      0, "" }, /* title */
                    589: { "objectinfo",        0,      0,      0,      9,      0, "" }, /* graphic */
                    590: { "olink",     0,      0,      0,      2,      0, "" }, /* para */
                    591: { "ooclass",   0,      0,      0,      9,      0, "" }, /* modifier */
                    592: { "ooexception",0,     0,      0,      9,      0, "" }, /* modifier */
                    593: { "oointerface",0,     0,      0,      9,      0, "" }, /* modifier */
                    594: { "optional",  0,      0,      0,      9,      0, "" }, /* cptr */
                    595: { "option",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    596: { "orderedlist",0,     0,      0,      9,      0, "" }, /* formalobject.title.content */
                    597: { "orgdiv",    0,      0,      0,      4,      0, "" }, /* docinfo */
                    598: { "orgname",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    599: { "otheraddr", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    600: { "othercredit",0,     0,      0,      9,      0, "" }, /* person.ident.mix */
                    601: { "othername", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    602: { "pagenums",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    603: { "paramdef",  0,      0,      0,      1,      0, "" },
                    604: { "parameter", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    605: { "para",      0,      0,      0,      2,      0, "" }, /* para */
                    606: { "partinfo",  0,      0,      0,      9,      0, "" }, /* graphic */
                    607: { "partintro", 0,      0,      0,      9,      0, "" }, /* div.title.content */
                    608: { "part",      0,      0,      0,      9,      0, "" }, /* partinfo */
                    609: { "phone",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    610: { "phrase",    0,      0,      0,      2,      0, "" }, /* para */
                    611: { "pob",       0,      0,      0,      4,      0, "" }, /* docinfo */
                    612: { "postcode",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    613: { "prefaceinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    614: { "preface",   0,      0,      0,      9,      0, "" }, /* prefaceinfo */
                    615: { "primaryie", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    616: { "primary  ", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    617: { "printhistory",0,    0,      0,      9,      0, "" }, /* para.class */
                    618: { "procedure", 0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    619: { "productname",0,     0,      0,      2,      0, "" }, /* para */
                    620: { "productnumber",0,   0,      0,      4,      0, "" }, /* docinfo */
                    621: { "programlistingco",0,        0,      0,      9,      0, "" }, /* areaspec */
                    622: { "programlisting",0,  0,      0,      2,      0, "" }, /* para */
                    623: { "prompt",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    624: { "property",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    625: { "pubdate",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    626: { "publishername",0,   0,      0,      4,      0, "" }, /* docinfo */
                    627: { "publisher", 0,      0,      0,      9,      0, "" }, /* publishername */
                    628: { "pubsnumber",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    629: { "qandadiv",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    630: { "qandaentry",        0,      0,      0,      9,      0, "" }, /* revhistory */
                    631: { "qandaset",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    632: { "question",  0,      0,      0,      9,      0, "" }, /* label */
                    633: { "quote",     0,      0,      0,      2,      0, "" }, /* para */
                    634: { "refclass",  0,      0,      0,      9,      0, "" }, /* refclass.char.mix */
                    635: { "refdescriptor",0,   0,      0,      9,      0, "" }, /* refname.char.mix */
                    636: { "refentryinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    637: { "refentry",  0,      0,      0,      9,      0, "" }, /* ndxterm.class */
                    638: { "refentrytitle",0,   0,      0,      2,      0, "" }, /* para */
                    639: { "referenceinfo",0,   0,      0,      9,      0, "" }, /* graphic */
                    640: { "reference", 0,      0,      0,      9,      0, "" }, /* referenceinfo */
                    641: { "refmeta",   0,      0,      0,      9,      0, "" }, /* ndxterm.class */
                    642: { "refmiscinfo",0,     0,      0,      4,      0, "" }, /* docinfo */
                    643: { "refnamediv",        0,      0,      0,      9,      0, "" }, /* refdescriptor */
                    644: { "refname",   0,      0,      0,      9,      0, "" }, /* refname.char.mix */
                    645: { "refpurpose",        0,      0,      0,      9,      0, "" }, /* refinline.char.mix */
                    646: { "refsect1info",0,    0,      0,      9,      0, "" }, /* graphic */
                    647: { "refsect1",  0,      0,      0,      9,      0, "" }, /* refsect */
                    648: { "refsect2info",0,    0,      0,      9,      0, "" }, /* graphic */
                    649: { "refsect2",  0,      0,      0,      9,      0, "" }, /* refsect */
                    650: { "refsect3info",0,    0,      0,      9,      0, "" }, /* graphic */
                    651: { "refsect3",  0,      0,      0,      9,      0, "" }, /* refsect */
                    652: { "refsynopsisdivinfo",0,0,    0,      9,      0, "" }, /* graphic */
                    653: { "refsynopsisdiv",0,  0,      0,      9,      0, "" }, /* refsynopsisdivinfo */
                    654: { "releaseinfo",0,     0,      0,      4,      0, "" }, /* docinfo */
                    655: { "remark",    0,      0,      0,      2,      0, "" }, /* para */
                    656: { "replaceable",0,     0,      0,      1,      0, "" },
                    657: { "returnvalue",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    658: { "revdescription",0,  0,      0,      9,      0, "" }, /* revdescription.mix */
                    659: { "revhistory",        0,      0,      0,      9,      0, "" }, /* revision */
                    660: { "revision",  0,      0,      0,      9,      0, "" }, /* revnumber */
                    661: { "revnumber", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    662: { "revremark", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    663: { "row",       0,      0,      0,      9,      0, "" }, /* tbl.row.mdl */
                    664: { "row",       0,      0,      0,      9,      0, "" }, /* tbl.row.mdl */
                    665: { "sbr",       0,      2,      1,      0,      0, "" },
                    666: { "screenco",  0,      0,      0,      9,      0, "" }, /* areaspec */
                    667: { "screeninfo",        0,      0,      0,      2,      0, "" }, /* para */
                    668: { "screen",    0,      0,      0,      2,      0, "" }, /* para */
                    669: { "screenshot",        0,      0,      0,      9,      0, "" }, /* screeninfo */
                    670: { "secondaryie",0,     0,      0,      4,      0, "" }, /* ndxterm */
                    671: { "secondary", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    672: { "sect1info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    673: { "sect1",     0,      0,      0,      9,      0, "" }, /* sect */
                    674: { "sect2info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    675: { "sect2",     0,      0,      0,      9,      0, "" }, /* sect */
                    676: { "sect3info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    677: { "sect3",     0,      0,      0,      9,      0, "" }, /* sect */
                    678: { "sect4info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    679: { "sect4",     0,      0,      0,      9,      0, "" }, /* sect */
                    680: { "sect5info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    681: { "sect5",     0,      0,      0,      9,      0, "" }, /* sect */
                    682: { "sectioninfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    683: { "section",   0,      0,      0,      9,      0, "" }, /* sectioninfo */
                    684: { "seealsoie", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    685: { "seealso",   0,      0,      0,      4,      0, "" }, /* ndxterm */
                    686: { "seeie",     0,      0,      0,      4,      0, "" }, /* ndxterm */
                    687: { "see",       0,      0,      0,      4,      0, "" }, /* ndxterm */
                    688: { "seglistitem",0,     0,      0,      9,      0, "" }, /* seg */
                    689: { "segmentedlist",0,   0,      0,      9,      0, "" }, /* formalobject.title.content */
                    690: { "seg",       0,      0,      0,      2,      0, "" }, /* para */
                    691: { "segtitle",  0,      0,      0,      8,      0, "" }, /* title */
                    692: { "seriesvolnums",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    693: { "set",       0,      0,      0,      9,      0, "" }, /* div.title.content */
                    694: { "setindexinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    695: { "setindex",  0,      0,      0,      9,      0, "" }, /* setindexinfo */
                    696: { "setinfo",   0,      0,      0,      9,      0, "" }, /* graphic */
                    697: { "sgmltag",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    698: { "shortaffil",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    699: { "shortcut",  0,      0,      0,      9,      0, "" }, /* keycap */
                    700: { "sidebarinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    701: { "sidebar",   0,      0,      0,      9,      0, "" }, /* sidebarinfo */
                    702: { "simpara",   0,      0,      0,      2,      0, "" }, /* para */
                    703: { "simplelist",        0,      0,      0,      9,      0, "" }, /* member */
                    704: { "simplemsgentry",    0,      0,      0,      9,      0, "" }, /* msgtext */
                    705: { "simplesect",        0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    706: { "spanspec",  0,      2,      1,      0,      0, "" },
                    707: { "state",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    708: { "step",      0,      0,      0,      9,      0, "" }, /* title */
                    709: { "street",    0,      0,      0,      4,      0, "" }, /* docinfo */
                    710: { "structfield",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    711: { "structname",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    712: { "subjectset",        0,      0,      0,      9,      0, "" }, /* subject */
                    713: { "subject",   0,      0,      0,      9,      0, "" }, /* subjectterm */
                    714: { "subjectterm",0,     0,      0,      1,      0, "" },
                    715: { "subscript", 0,      0,      0,      1,      0, "" },
                    716: { "substeps",  0,      0,      0,      9,      0, "" }, /* step */
                    717: { "subtitle",  0,      0,      0,      8,      0, "" }, /* title */
                    718: { "superscript",       0,      0,      0,      1,      0, "" },
                    719: { "surname",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    720: { "symbol",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    721: { "synopfragment",     0,      0,      0,      9,      0, "" }, /* arg */
                    722: { "synopfragmentref",  0,      0,      0,      1,      0, "" },
                    723: { "synopsis",  0,      0,      0,      2,      0, "" }, /* para */
                    724: { "systemitem",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    725: { "table",     0,      0,      0,      9,      0, "" }, /* tbl.table.mdl */
                    726: /* { "%tbl.table.name;",       0,      0,      0,      9,      0, "" },*/ /* tbl.table.mdl */
                    727: { "tbody",     0,      0,      0,      9,      0, "" }, /* row */
                    728: { "tbody",     0,      0,      0,      9,      0, "" }, /* row */
                    729: { "term",      0,      0,      0,      2,      0, "" }, /* para */
                    730: { "tertiaryie",        0,      0,      0,      4,      0, "" }, /* ndxterm */
                    731: { "tertiary ", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    732: { "textobject",        0,      0,      0,      9,      0, "" }, /* objectinfo */
                    733: { "tfoot",     0,      0,      0,      9,      0, "" }, /* tbl.hdft.mdl */
                    734: { "tgroup",    0,      0,      0,      9,      0, "" }, /* tbl.tgroup.mdl */
                    735: { "tgroup",    0,      0,      0,      9,      0, "" }, /* tbl.tgroup.mdl */
                    736: { "thead",     0,      0,      0,      9,      0, "" }, /* row */
                    737: { "thead",     0,      0,      0,      9,      0, "" }, /* tbl.hdft.mdl */
                    738: { "tip",       0,      0,      0,      9,      0, "" }, /* title */
                    739: { "titleabbrev",0,     0,      0,      8,      0, "" }, /* title */
                    740: { "title",     0,      0,      0,      8,      0, "" }, /* title */
                    741: { "tocback",   0,      0,      0,      2,      0, "" }, /* para */
                    742: { "toc",       0,      0,      0,      9,      0, "" }, /* bookcomponent.title.content */
                    743: { "tocchap",   0,      0,      0,      9,      0, "" }, /* tocentry */
                    744: { "tocentry",  0,      0,      0,      2,      0, "" }, /* para */
                    745: { "tocfront",  0,      0,      0,      2,      0, "" }, /* para */
                    746: { "toclevel1", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    747: { "toclevel2", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    748: { "toclevel3", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    749: { "toclevel4", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    750: { "toclevel5", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    751: { "tocpart",   0,      0,      0,      9,      0, "" }, /* tocentry */
                    752: { "token",     0,      0,      0,      7,      0, "" }, /* smallcptr */
                    753: { "trademark", 0,      0,      0,      1,      0, "" },
                    754: { "type",      0,      0,      0,      7,      0, "" }, /* smallcptr */
                    755: { "ulink",     0,      0,      0,      2,      0, "" }, /* para */
                    756: { "userinput", 0,      0,      0,      9,      0, "" }, /* cptr */
                    757: { "varargs",   0,      2,      1,      0,      0, "" },
                    758: { "variablelist",0,    0,      0,      9,      0, "" }, /* formalobject.title.content */
                    759: { "varlistentry",0,    0,      0,      9,      0, "" }, /* term */
                    760: { "varname",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    761: { "videodata", 0,      2,      1,      0,      0, "" },
                    762: { "videoobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    763: { "void",      0,      2,      1,      0,      0, "" },
                    764: { "volumenum", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    765: { "warning",   0,      0,      0,      9,      0, "" }, /* title */
                    766: { "wordasword",        0,      0,      0,      3,      0, "" }, /* word */
                    767: { "xref",      0,      2,      1,      0,      0, "" },
                    768: { "year",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    769: };
                    770: 
                    771: /*
                    772:  * start tags that imply the end of a current element
                    773:  * any tag of each line implies the end of the current element if the type of
                    774:  * that element is in the same line
                    775:  */
                    776: char *sgmlEquEnd[] = {
                    777: "dt", "dd", "li", "option", NULL,
                    778: "h1", "h2", "h3", "h4", "h5", "h6", NULL,
                    779: "ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL,
                    780: NULL
                    781: };
                    782: /*
                    783:  * acording the SGML DTD, HR should be added to the 2nd line above, as it
                    784:  * is not allowed within a H1, H2, H3, etc. But we should tolerate that case
                    785:  * because many documents contain rules in headings...
                    786:  */
                    787: 
                    788: /*
                    789:  * start tags that imply the end of current element
                    790:  */
                    791: char *sgmlStartClose[] = {
                    792: NULL
                    793: };
                    794: 
                    795: /*
                    796:  * The list of SGML elements which are supposed not to have
                    797:  * CDATA content and where a p element will be implied
                    798:  *
                    799:  * TODO: extend that list by reading the SGML SGML DtD on
                    800:  *       implied paragraph
                    801:  */
                    802: static char *sgmlNoContentElements[] = {
                    803:     NULL
                    804: };
                    805: 
                    806: 
                    807: static char** sgmlStartCloseIndex[100];
                    808: static int sgmlStartCloseIndexinitialized = 0;
                    809: 
                    810: /************************************************************************
                    811:  *                                                                     *
                    812:  *             functions to handle SGML specific data                  *
                    813:  *                                                                     *
                    814:  ************************************************************************/
                    815: 
                    816: /**
                    817:  * sgmlInitAutoClose:
                    818:  *
                    819:  * Initialize the sgmlStartCloseIndex for fast lookup of closing tags names.
                    820:  *
                    821:  */
                    822: void
                    823: sgmlInitAutoClose(void) {
                    824:     int index, i = 0;
                    825: 
                    826:     if (sgmlStartCloseIndexinitialized) return;
                    827: 
                    828:     for (index = 0;index < 100;index ++) sgmlStartCloseIndex[index] = NULL;
                    829:     index = 0;
                    830:     while ((sgmlStartClose[i] != NULL) && (index < 100 - 1)) {
                    831:         sgmlStartCloseIndex[index++] = &sgmlStartClose[i];
                    832:        while (sgmlStartClose[i] != NULL) i++;
                    833:        i++;
                    834:     }
                    835: }
                    836: 
                    837: /**
                    838:  * sgmlTagLookup:
                    839:  * @tag:  The tag name
                    840:  *
                    841:  * Lookup the SGML tag in the ElementTable
                    842:  *
                    843:  * Returns the related sgmlElemDescPtr or NULL if not found.
                    844:  */
                    845: sgmlElemDescPtr
                    846: sgmlTagLookup(const xmlChar *tag) {
                    847:     int i;
                    848: 
                    849:     for (i = 0; i < (sizeof(docbookElementTable) /
                    850:                      sizeof(docbookElementTable[0]));i++) {
                    851:         if (!xmlStrcmp(tag, BAD_CAST docbookElementTable[i].name))
                    852:            return(&docbookElementTable[i]);
                    853:     }
                    854:     return(NULL);
                    855: }
                    856: 
                    857: /**
                    858:  * sgmlCheckAutoClose:
                    859:  * @newtag:  The new tag name
                    860:  * @oldtag:  The old tag name
                    861:  *
                    862:  * Checks wether the new tag is one of the registered valid tags for closing old.
                    863:  * Initialize the sgmlStartCloseIndex for fast lookup of closing tags names.
                    864:  *
                    865:  * Returns 0 if no, 1 if yes.
                    866:  */
                    867: int
                    868: sgmlCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) {
                    869:     int i, index;
                    870:     char **close;
                    871: 
                    872:     if (sgmlStartCloseIndexinitialized == 0) sgmlInitAutoClose();
                    873: 
                    874:     /* inefficient, but not a big deal */
                    875:     for (index = 0; index < 100;index++) {
                    876:         close = sgmlStartCloseIndex[index];
                    877:        if (close == NULL) return(0);
                    878:        if (!xmlStrcmp(BAD_CAST *close, newtag)) break;
                    879:     }
                    880: 
                    881:     i = close - sgmlStartClose;
                    882:     i++;
                    883:     while (sgmlStartClose[i] != NULL) {
                    884:         if (!xmlStrcmp(BAD_CAST sgmlStartClose[i], oldtag)) {
                    885:            return(1);
                    886:        }
                    887:        i++;
                    888:     }
                    889:     return(0);
                    890: }
                    891: 
                    892: /**
                    893:  * sgmlAutoCloseOnClose:
                    894:  * @ctxt:  an SGML parser context
                    895:  * @newtag:  The new tag name
                    896:  *
                    897:  * The HTmL DtD allows an ending tag to implicitely close other tags.
                    898:  */
                    899: void
                    900: sgmlAutoCloseOnClose(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
                    901:     sgmlElemDescPtr info;
                    902:     xmlChar *oldname;
                    903:     int i;
                    904: 
                    905:     if ((newtag[0] == '/') && (newtag[1] == 0))
                    906:        return;
                    907: 
                    908: #ifdef DEBUG
                    909:     fprintf(stderr,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
                    910:     for (i = 0;i < ctxt->nameNr;i++) 
                    911:         fprintf(stderr,"%d : %s\n", i, ctxt->nameTab[i]);
                    912: #endif
                    913: 
                    914:     for (i = (ctxt->nameNr - 1);i >= 0;i--) {
                    915:         if (!xmlStrcmp(newtag, ctxt->nameTab[i])) break;
                    916:     }
                    917:     if (i < 0) return;
                    918: 
                    919:     while (xmlStrcmp(newtag, ctxt->name)) {
                    920:        info = sgmlTagLookup(ctxt->name);
                    921:        if ((info == NULL) || (info->endTag == 1)) {
                    922: #ifdef DEBUG
                    923:            fprintf(stderr,"sgmlAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name);
                    924: #endif
                    925:         } else {
                    926:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                    927:                ctxt->sax->error(ctxt->userData,
                    928:                 "Opening and ending tag mismatch: %s and %s\n",
                    929:                                 newtag, ctxt->name);
                    930:            ctxt->wellFormed = 0;
                    931:        }
                    932:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                    933:            ctxt->sax->endElement(ctxt->userData, ctxt->name);
                    934:        oldname = sgmlnamePop(ctxt);
                    935:        if (oldname != NULL) {
                    936: #ifdef DEBUG
                    937:            fprintf(stderr,"sgmlAutoCloseOnClose: popped %s\n", oldname);
                    938: #endif
                    939:            xmlFree(oldname);
                    940:        }       
                    941:     }
                    942: }
                    943: 
                    944: /**
                    945:  * sgmlAutoClose:
                    946:  * @ctxt:  an SGML parser context
                    947:  * @newtag:  The new tag name or NULL
                    948:  *
                    949:  * The HTmL DtD allows a tag to implicitely close other tags.
                    950:  * The list is kept in sgmlStartClose array. This function is
                    951:  * called when a new tag has been detected and generates the
                    952:  * appropriates closes if possible/needed.
                    953:  * If newtag is NULL this mean we are at the end of the resource
                    954:  * and we should check 
                    955:  */
                    956: void
                    957: sgmlAutoClose(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
                    958:     xmlChar *oldname;
                    959:     while ((newtag != NULL) && (ctxt->name != NULL) && 
                    960:            (sgmlCheckAutoClose(newtag, ctxt->name))) {
                    961: #ifdef DEBUG
                    962:        fprintf(stderr,"sgmlAutoClose: %s closes %s\n", newtag, ctxt->name);
                    963: #endif
                    964:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                    965:            ctxt->sax->endElement(ctxt->userData, ctxt->name);
                    966:        oldname = sgmlnamePop(ctxt);
                    967:        if (oldname != NULL) {
                    968: #ifdef DEBUG
                    969:            fprintf(stderr,"sgmlAutoClose: popped %s\n", oldname);
                    970: #endif
                    971:            xmlFree(oldname);
                    972:         }
                    973:     }
                    974: #if 0
                    975:     if (newtag == NULL) {
                    976:        sgmlAutoCloseOnClose(ctxt, BAD_CAST"head");
                    977:        sgmlAutoCloseOnClose(ctxt, BAD_CAST"body");
                    978:        sgmlAutoCloseOnClose(ctxt, BAD_CAST"sgml");
                    979:     }
                    980:     while ((newtag == NULL) && (ctxt->name != NULL) &&
                    981:           ((!xmlStrcmp(ctxt->name, BAD_CAST"head")) ||
                    982:            (!xmlStrcmp(ctxt->name, BAD_CAST"body")) ||
                    983:            (!xmlStrcmp(ctxt->name, BAD_CAST"sgml")))) {
                    984: #ifdef DEBUG
                    985:        fprintf(stderr,"sgmlAutoClose: EOF closes %s\n", ctxt->name);
                    986: #endif
                    987:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                    988:            ctxt->sax->endElement(ctxt->userData, ctxt->name);
                    989:        oldname = sgmlnamePop(ctxt);
                    990:        if (oldname != NULL) {
                    991: #ifdef DEBUG
                    992:            fprintf(stderr,"sgmlAutoClose: popped %s\n", oldname);
                    993: #endif
                    994:            xmlFree(oldname);
                    995:         }
                    996:    }
                    997: #endif
                    998: }
                    999: 
                   1000: /**
                   1001:  * sgmlAutoCloseTag:
                   1002:  * @doc:  the SGML document
                   1003:  * @name:  The tag name
                   1004:  * @elem:  the SGML element
                   1005:  *
                   1006:  * The HTmL DtD allows a tag to implicitely close other tags.
                   1007:  * The list is kept in sgmlStartClose array. This function checks
                   1008:  * if the element or one of it's children would autoclose the
                   1009:  * given tag.
                   1010:  *
                   1011:  * Returns 1 if autoclose, 0 otherwise
                   1012:  */
                   1013: int
                   1014: sgmlAutoCloseTag(sgmlDocPtr doc, const xmlChar *name, sgmlNodePtr elem) {
                   1015:     sgmlNodePtr child;
                   1016: 
                   1017:     if (elem == NULL) return(1);
                   1018:     if (!xmlStrcmp(name, elem->name)) return(0);
                   1019:     if (sgmlCheckAutoClose(elem->name, name)) return(1);
                   1020:     child = elem->children;
                   1021:     while (child != NULL) {
                   1022:         if (sgmlAutoCloseTag(doc, name, child)) return(1);
                   1023:        child = child->next;
                   1024:     }
                   1025:     return(0);
                   1026: }
                   1027: 
                   1028: /**
                   1029:  * sgmlIsAutoClosed:
                   1030:  * @doc:  the SGML document
                   1031:  * @elem:  the SGML element
                   1032:  *
                   1033:  * The HTmL DtD allows a tag to implicitely close other tags.
                   1034:  * The list is kept in sgmlStartClose array. This function checks
                   1035:  * if a tag is autoclosed by one of it's child
                   1036:  *
                   1037:  * Returns 1 if autoclosed, 0 otherwise
                   1038:  */
                   1039: int
                   1040: sgmlIsAutoClosed(sgmlDocPtr doc, sgmlNodePtr elem) {
                   1041:     sgmlNodePtr child;
                   1042: 
                   1043:     if (elem == NULL) return(1);
                   1044:     child = elem->children;
                   1045:     while (child != NULL) {
                   1046:        if (sgmlAutoCloseTag(doc, elem->name, child)) return(1);
                   1047:        child = child->next;
                   1048:     }
                   1049:     return(0);
                   1050: }
                   1051: 
                   1052: /**
                   1053:  * sgmlCheckImplied:
                   1054:  * @ctxt:  an SGML parser context
                   1055:  * @newtag:  The new tag name
                   1056:  *
                   1057:  * The HTmL DtD allows a tag to exists only implicitely
                   1058:  * called when a new tag has been detected and generates the
                   1059:  * appropriates implicit tags if missing
                   1060:  */
                   1061: void
                   1062: sgmlCheckImplied(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
                   1063: #if 0
                   1064:     if (!xmlStrcmp(newtag, BAD_CAST"sgml"))
                   1065:        return;
                   1066:     if (ctxt->nameNr <= 0) {
                   1067: #ifdef DEBUG
                   1068:        fprintf(stderr,"Implied element sgml: pushed sgml\n");
                   1069: #endif    
                   1070:        sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"sgml"));
                   1071:        if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1072:            ctxt->sax->startElement(ctxt->userData, BAD_CAST"sgml", NULL);
                   1073:     }
                   1074:     if ((!xmlStrcmp(newtag, BAD_CAST"body")) || (!xmlStrcmp(newtag, BAD_CAST"head")))
                   1075:         return;
                   1076:     if (ctxt->nameNr <= 1) {
                   1077:        if ((!xmlStrcmp(newtag, BAD_CAST"script")) ||
                   1078:            (!xmlStrcmp(newtag, BAD_CAST"style")) ||
                   1079:            (!xmlStrcmp(newtag, BAD_CAST"meta")) ||
                   1080:            (!xmlStrcmp(newtag, BAD_CAST"link")) ||
                   1081:            (!xmlStrcmp(newtag, BAD_CAST"title")) ||
                   1082:            (!xmlStrcmp(newtag, BAD_CAST"base"))) {
                   1083:            /* 
                   1084:             * dropped OBJECT ... i you put it first BODY will be
                   1085:             * assumed !
                   1086:             */
                   1087: #ifdef DEBUG
                   1088:            fprintf(stderr,"Implied element head: pushed head\n");
                   1089: #endif    
                   1090:            sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"head"));
                   1091:            if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1092:                ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
                   1093:        } else {
                   1094: #ifdef DEBUG
                   1095:            fprintf(stderr,"Implied element body: pushed body\n");
                   1096: #endif    
                   1097:            sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"body"));
                   1098:            if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1099:                ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
                   1100:        }
                   1101:     }
                   1102: #endif
                   1103: }
                   1104: 
                   1105: /**
                   1106:  * sgmlCheckParagraph
                   1107:  * @ctxt:  an SGML parser context
                   1108:  *
                   1109:  * Check whether a p element need to be implied before inserting
                   1110:  * characters in the current element.
                   1111:  *
                   1112:  * Returns 1 if a paragraph has been inserted, 0 if not and -1
                   1113:  *         in case of error.
                   1114:  */
                   1115: 
                   1116: int
                   1117: sgmlCheckParagraph(sgmlParserCtxtPtr ctxt) {
                   1118:     const xmlChar *tag;
                   1119:     int i;
                   1120: 
                   1121:     if (ctxt == NULL)
                   1122:        return(-1);
                   1123:     tag = ctxt->name;
                   1124:     if (tag == NULL) {
                   1125:        sgmlAutoClose(ctxt, BAD_CAST"p");
                   1126:        sgmlCheckImplied(ctxt, BAD_CAST"p");
                   1127:        sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
                   1128:        if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1129:            ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
                   1130:        return(1);
                   1131:     }
                   1132:     for (i = 0; sgmlNoContentElements[i] != NULL; i++) {
                   1133:        if (!xmlStrcmp(tag, BAD_CAST sgmlNoContentElements[i])) {
                   1134: #ifdef DEBUG
                   1135:            fprintf(stderr,"Implied element paragraph\n");
                   1136: #endif    
                   1137:            sgmlAutoClose(ctxt, BAD_CAST"p");
                   1138:            sgmlCheckImplied(ctxt, BAD_CAST"p");
                   1139:            sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
                   1140:            if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1141:                ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
                   1142:            return(1);
                   1143:        }
                   1144:     }
                   1145:     return(0);
                   1146: }
                   1147: 
                   1148: /************************************************************************
                   1149:  *                                                                     *
                   1150:  *             The list of SGML predefined entities                    *
                   1151:  *                                                                     *
                   1152:  ************************************************************************/
                   1153: 
                   1154: 
                   1155: sgmlEntityDesc  docbookEntitiesTable[] = {
                   1156: /*
                   1157:  * the 4 absolute ones, plus apostrophe.
                   1158:  */
                   1159: { 0x0026, "amp", "AMPERSAND" },
                   1160: { 0x003C, "lt",        "LESS-THAN SIGN" },
                   1161: 
                   1162: /*
                   1163:  * Converted with VI macros from docbook ent files
                   1164:  */
                   1165: { 0x0021, "excl", "EXCLAMATION MARK" },
                   1166: { 0x0022, "quot", "QUOTATION MARK" },
                   1167: { 0x0023, "num", "NUMBER SIGN" },
                   1168: { 0x0024, "dollar", "DOLLAR SIGN" },
                   1169: { 0x0025, "percnt", "PERCENT SIGN" },
                   1170: { 0x0027, "apos", "APOSTROPHE" },
                   1171: { 0x0028, "lpar", "LEFT PARENTHESIS" },
                   1172: { 0x0029, "rpar", "RIGHT PARENTHESIS" },
                   1173: { 0x002A, "ast", "ASTERISK OPERATOR" },
                   1174: { 0x002B, "plus", "PLUS SIGN" },
                   1175: { 0x002C, "comma", "COMMA" },
                   1176: { 0x002D, "hyphen", "HYPHEN-MINUS" },
                   1177: { 0x002E, "period", "FULL STOP" },
                   1178: { 0x002F, "sol", "SOLIDUS" },
                   1179: { 0x003A, "colon", "COLON" },
                   1180: { 0x003B, "semi", "SEMICOLON" },
                   1181: { 0x003D, "equals", "EQUALS SIGN" },
                   1182: { 0x003E, "gt", "GREATER-THAN SIGN" },
                   1183: { 0x003F, "quest", "QUESTION MARK" },
                   1184: { 0x0040, "commat", "COMMERCIAL AT" },
                   1185: { 0x005B, "lsqb", "LEFT SQUARE BRACKET" },
                   1186: { 0x005C, "bsol", "REVERSE SOLIDUS" },
                   1187: { 0x005D, "rsqb", "RIGHT SQUARE BRACKET" },
                   1188: { 0x005E, "circ", "RING OPERATOR" },
                   1189: { 0x005F, "lowbar", "LOW LINE" },
                   1190: { 0x0060, "grave", "GRAVE ACCENT" },
                   1191: { 0x007B, "lcub", "LEFT CURLY BRACKET" },
                   1192: { 0x007C, "verbar", "VERTICAL LINE" },
                   1193: { 0x007D, "rcub", "RIGHT CURLY BRACKET" },
                   1194: { 0x00A0, "nbsp", "NO-BREAK SPACE" },
                   1195: { 0x00A1, "iexcl", "INVERTED EXCLAMATION MARK" },
                   1196: { 0x00A2, "cent", "CENT SIGN" },
                   1197: { 0x00A3, "pound", "POUND SIGN" },
                   1198: { 0x00A4, "curren", "CURRENCY SIGN" },
                   1199: { 0x00A5, "yen", "YEN SIGN" },
                   1200: { 0x00A6, "brvbar", "BROKEN BAR" },
                   1201: { 0x00A7, "sect", "SECTION SIGN" },
                   1202: { 0x00A8, "die", "" },
                   1203: { 0x00A8, "Dot", "" },
                   1204: { 0x00A8, "uml", "" },
                   1205: { 0x00A9, "copy", "COPYRIGHT SIGN" },
                   1206: { 0x00AA, "ordf", "FEMININE ORDINAL INDICATOR" },
                   1207: { 0x00AB, "laquo", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK" },
                   1208: { 0x00AC, "not", "NOT SIGN" },
                   1209: { 0x00AD, "shy", "SOFT HYPHEN" },
                   1210: { 0x00AE, "reg", "REG TRADE MARK SIGN" },
                   1211: { 0x00AF, "macr", "MACRON" },
                   1212: { 0x00B0, "deg", "DEGREE SIGN" },
                   1213: { 0x00B1, "plusmn", "PLUS-MINUS SIGN" },
                   1214: { 0x00B2, "sup2", "SUPERSCRIPT TWO" },
                   1215: { 0x00B3, "sup3", "SUPERSCRIPT THREE" },
                   1216: { 0x00B4, "acute", "ACUTE ACCENT" },
                   1217: { 0x00B5, "micro", "MICRO SIGN" },
                   1218: { 0x00B6, "para", "PILCROW SIGN" },
                   1219: { 0x00B7, "middot", "MIDDLE DOT" },
                   1220: { 0x00B8, "cedil", "CEDILLA" },
                   1221: { 0x00B9, "sup1", "SUPERSCRIPT ONE" },
                   1222: { 0x00BA, "ordm", "MASCULINE ORDINAL INDICATOR" },
                   1223: { 0x00BB, "raquo", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" },
                   1224: { 0x00BC, "frac14", "VULGAR FRACTION ONE QUARTER" },
                   1225: { 0x00BD, "frac12", "VULGAR FRACTION ONE HALF" },
                   1226: { 0x00BD, "half", "VULGAR FRACTION ONE HALF" },
                   1227: { 0x00BE, "frac34", "VULGAR FRACTION THREE QUARTERS" },
                   1228: { 0x00BF, "iquest", "INVERTED QUESTION MARK" },
                   1229: { 0x00C0, "Agrave", "LATIN CAPITAL LETTER A WITH GRAVE" },
                   1230: { 0x00C1, "Aacute", "LATIN CAPITAL LETTER A WITH ACUTE" },
                   1231: { 0x00C2, "Acirc", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX" },
                   1232: { 0x00C3, "Atilde", "LATIN CAPITAL LETTER A WITH TILDE" },
                   1233: { 0x00C4, "Auml", "LATIN CAPITAL LETTER A WITH DIAERESIS" },
                   1234: { 0x00C5, "Aring", "LATIN CAPITAL LETTER A WITH RING ABOVE" },
                   1235: { 0x00C6, "AElig", "LATIN CAPITAL LETTER AE" },
                   1236: { 0x00C7, "Ccedil", "LATIN CAPITAL LETTER C WITH CEDILLA" },
                   1237: { 0x00C8, "Egrave", "LATIN CAPITAL LETTER E WITH GRAVE" },
                   1238: { 0x00C9, "Eacute", "LATIN CAPITAL LETTER E WITH ACUTE" },
                   1239: { 0x00CA, "Ecirc", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX" },
                   1240: { 0x00CB, "Euml", "LATIN CAPITAL LETTER E WITH DIAERESIS" },
                   1241: { 0x00CC, "Igrave", "LATIN CAPITAL LETTER I WITH GRAVE" },
                   1242: { 0x00CD, "Iacute", "LATIN CAPITAL LETTER I WITH ACUTE" },
                   1243: { 0x00CE, "Icirc", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX" },
                   1244: { 0x00CF, "Iuml", "LATIN CAPITAL LETTER I WITH DIAERESIS" },
                   1245: { 0x00D0, "ETH", "LATIN CAPITAL LETTER ETH" },
                   1246: { 0x00D1, "Ntilde", "LATIN CAPITAL LETTER N WITH TILDE" },
                   1247: { 0x00D2, "Ograve", "LATIN CAPITAL LETTER O WITH GRAVE" },
                   1248: { 0x00D3, "Oacute", "LATIN CAPITAL LETTER O WITH ACUTE" },
                   1249: { 0x00D4, "Ocirc", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX" },
                   1250: { 0x00D5, "Otilde", "LATIN CAPITAL LETTER O WITH TILDE" },
                   1251: { 0x00D6, "Ouml", "LATIN CAPITAL LETTER O WITH DIAERESIS" },
                   1252: { 0x00D7, "times", "MULTIPLICATION SIGN" },
                   1253: { 0x00D8, "Oslash", "LATIN CAPITAL LETTER O WITH STROKE" },
                   1254: { 0x00D9, "Ugrave", "LATIN CAPITAL LETTER U WITH GRAVE" },
                   1255: { 0x00DA, "Uacute", "LATIN CAPITAL LETTER U WITH ACUTE" },
                   1256: { 0x00DB, "Ucirc", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX" },
                   1257: { 0x00DC, "Uuml", "LATIN CAPITAL LETTER U WITH DIAERESIS" },
                   1258: { 0x00DD, "Yacute", "LATIN CAPITAL LETTER Y WITH ACUTE" },
                   1259: { 0x00DE, "THORN", "LATIN CAPITAL LETTER THORN" },
                   1260: { 0x00DF, "szlig", "LATIN SMALL LETTER SHARP S" },
                   1261: { 0x00E0, "agrave", "LATIN SMALL LETTER A WITH GRAVE" },
                   1262: { 0x00E1, "aacute", "LATIN SMALL LETTER A WITH ACUTE" },
                   1263: { 0x00E2, "acirc", "LATIN SMALL LETTER A WITH CIRCUMFLEX" },
                   1264: { 0x00E3, "atilde", "LATIN SMALL LETTER A WITH TILDE" },
                   1265: { 0x00E4, "auml", "LATIN SMALL LETTER A WITH DIAERESIS" },
                   1266: { 0x00E5, "aring", "LATIN SMALL LETTER A WITH RING ABOVE" },
                   1267: { 0x00E6, "aelig", "LATIN SMALL LETTER AE" },
                   1268: { 0x00E7, "ccedil", "LATIN SMALL LETTER C WITH CEDILLA" },
                   1269: { 0x00E8, "egrave", "LATIN SMALL LETTER E WITH GRAVE" },
                   1270: { 0x00E9, "eacute", "LATIN SMALL LETTER E WITH ACUTE" },
                   1271: { 0x00EA, "ecirc", "LATIN SMALL LETTER E WITH CIRCUMFLEX" },
                   1272: { 0x00EB, "euml", "LATIN SMALL LETTER E WITH DIAERESIS" },
                   1273: { 0x00EC, "igrave", "LATIN SMALL LETTER I WITH GRAVE" },
                   1274: { 0x00ED, "iacute", "LATIN SMALL LETTER I WITH ACUTE" },
                   1275: { 0x00EE, "icirc", "LATIN SMALL LETTER I WITH CIRCUMFLEX" },
                   1276: { 0x00EF, "iuml", "LATIN SMALL LETTER I WITH DIAERESIS" },
                   1277: { 0x00F0, "eth", "LATIN SMALL LETTER ETH" },
                   1278: { 0x00F1, "ntilde", "LATIN SMALL LETTER N WITH TILDE" },
                   1279: { 0x00F2, "ograve", "LATIN SMALL LETTER O WITH GRAVE" },
                   1280: { 0x00F3, "oacute", "LATIN SMALL LETTER O WITH ACUTE" },
                   1281: { 0x00F4, "ocirc", "LATIN SMALL LETTER O WITH CIRCUMFLEX" },
                   1282: { 0x00F5, "otilde", "LATIN SMALL LETTER O WITH TILDE" },
                   1283: { 0x00F6, "ouml", "LATIN SMALL LETTER O WITH DIAERESIS" },
                   1284: { 0x00F7, "divide", "DIVISION SIGN" },
                   1285: { 0x00F8, "oslash", "CIRCLED DIVISION SLASH" },
                   1286: { 0x00F9, "ugrave", "LATIN SMALL LETTER U WITH GRAVE" },
                   1287: { 0x00FA, "uacute", "LATIN SMALL LETTER U WITH ACUTE" },
                   1288: { 0x00FB, "ucirc", "LATIN SMALL LETTER U WITH CIRCUMFLEX" },
                   1289: { 0x00FC, "uuml", "LATIN SMALL LETTER U WITH DIAERESIS" },
                   1290: { 0x00FD, "yacute", "LATIN SMALL LETTER Y WITH ACUTE" },
                   1291: { 0x00FE, "thorn", "LATIN SMALL LETTER THORN" },
                   1292: { 0x00FF, "yuml", "LATIN SMALL LETTER Y WITH DIAERESIS" },
                   1293: { 0x0100, "Amacr", "LATIN CAPITAL LETTER A WITH MACRON" },
                   1294: { 0x0101, "amacr", "LATIN SMALL LETTER A WITH MACRON" },
                   1295: { 0x0102, "Abreve", "LATIN CAPITAL LETTER A WITH BREVE" },
                   1296: { 0x0103, "abreve", "LATIN SMALL LETTER A WITH BREVE" },
                   1297: { 0x0104, "Aogon", "LATIN CAPITAL LETTER A WITH OGONEK" },
                   1298: { 0x0105, "aogon", "LATIN SMALL LETTER A WITH OGONEK" },
                   1299: { 0x0106, "Cacute", "LATIN CAPITAL LETTER C WITH ACUTE" },
                   1300: { 0x0107, "cacute", "LATIN SMALL LETTER C WITH ACUTE" },
                   1301: { 0x0108, "Ccirc", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX" },
                   1302: { 0x0109, "ccirc", "LATIN SMALL LETTER C WITH CIRCUMFLEX" },
                   1303: { 0x010A, "Cdot", "LATIN CAPITAL LETTER C WITH DOT ABOVE" },
                   1304: { 0x010B, "cdot", "DOT OPERATOR" },
                   1305: { 0x010C, "Ccaron", "LATIN CAPITAL LETTER C WITH CARON" },
                   1306: { 0x010D, "ccaron", "LATIN SMALL LETTER C WITH CARON" },
                   1307: { 0x010E, "Dcaron", "LATIN CAPITAL LETTER D WITH CARON" },
                   1308: { 0x010F, "dcaron", "LATIN SMALL LETTER D WITH CARON" },
                   1309: { 0x0110, "Dstrok", "LATIN CAPITAL LETTER D WITH STROKE" },
                   1310: { 0x0111, "dstrok", "LATIN SMALL LETTER D WITH STROKE" },
                   1311: { 0x0112, "Emacr", "LATIN CAPITAL LETTER E WITH MACRON" },
                   1312: { 0x0113, "emacr", "LATIN SMALL LETTER E WITH MACRON" },
                   1313: { 0x0116, "Edot", "LATIN CAPITAL LETTER E WITH DOT ABOVE" },
                   1314: { 0x0117, "edot", "LATIN SMALL LETTER E WITH DOT ABOVE" },
                   1315: { 0x0118, "Eogon", "LATIN CAPITAL LETTER E WITH OGONEK" },
                   1316: { 0x0119, "eogon", "LATIN SMALL LETTER E WITH OGONEK" },
                   1317: { 0x011A, "Ecaron", "LATIN CAPITAL LETTER E WITH CARON" },
                   1318: { 0x011B, "ecaron", "LATIN SMALL LETTER E WITH CARON" },
                   1319: { 0x011C, "Gcirc", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX" },
                   1320: { 0x011D, "gcirc", "LATIN SMALL LETTER G WITH CIRCUMFLEX" },
                   1321: { 0x011E, "Gbreve", "LATIN CAPITAL LETTER G WITH BREVE" },
                   1322: { 0x011F, "gbreve", "LATIN SMALL LETTER G WITH BREVE" },
                   1323: { 0x0120, "Gdot", "LATIN CAPITAL LETTER G WITH DOT ABOVE" },
                   1324: { 0x0121, "gdot", "LATIN SMALL LETTER G WITH DOT ABOVE" },
                   1325: { 0x0122, "Gcedil", "LATIN CAPITAL LETTER G WITH CEDILLA" },
                   1326: { 0x0124, "Hcirc", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX" },
                   1327: { 0x0125, "hcirc", "LATIN SMALL LETTER H WITH CIRCUMFLEX" },
                   1328: { 0x0126, "Hstrok", "LATIN CAPITAL LETTER H WITH STROKE" },
                   1329: { 0x0127, "hstrok", "LATIN SMALL LETTER H WITH STROKE" },
                   1330: { 0x0128, "Itilde", "LATIN CAPITAL LETTER I WITH TILDE" },
                   1331: { 0x0129, "itilde", "LATIN SMALL LETTER I WITH TILDE" },
                   1332: { 0x012A, "Imacr", "LATIN CAPITAL LETTER I WITH MACRON" },
                   1333: { 0x012B, "imacr", "LATIN SMALL LETTER I WITH MACRON" },
                   1334: { 0x012E, "Iogon", "LATIN CAPITAL LETTER I WITH OGONEK" },
                   1335: { 0x012F, "iogon", "LATIN SMALL LETTER I WITH OGONEK" },
                   1336: { 0x0130, "Idot", "LATIN CAPITAL LETTER I WITH DOT ABOVE" },
                   1337: { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
                   1338: { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
                   1339: { 0x0132, "IJlig", "LATIN CAPITAL LIGATURE IJ" },
                   1340: { 0x0133, "ijlig", "LATIN SMALL LIGATURE IJ" },
                   1341: { 0x0134, "Jcirc", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX" },
                   1342: { 0x0135, "jcirc", "LATIN SMALL LETTER J WITH CIRCUMFLEX" },
                   1343: { 0x0136, "Kcedil", "LATIN CAPITAL LETTER K WITH CEDILLA" },
                   1344: { 0x0137, "kcedil", "LATIN SMALL LETTER K WITH CEDILLA" },
                   1345: { 0x0138, "kgreen", "LATIN SMALL LETTER KRA" },
                   1346: { 0x0139, "Lacute", "LATIN CAPITAL LETTER L WITH ACUTE" },
                   1347: { 0x013A, "lacute", "LATIN SMALL LETTER L WITH ACUTE" },
                   1348: { 0x013B, "Lcedil", "LATIN CAPITAL LETTER L WITH CEDILLA" },
                   1349: { 0x013C, "lcedil", "LATIN SMALL LETTER L WITH CEDILLA" },
                   1350: { 0x013D, "Lcaron", "LATIN CAPITAL LETTER L WITH CARON" },
                   1351: { 0x013E, "lcaron", "LATIN SMALL LETTER L WITH CARON" },
                   1352: { 0x013F, "Lmidot", "LATIN CAPITAL LETTER L WITH MIDDLE DOT" },
                   1353: { 0x0140, "lmidot", "LATIN SMALL LETTER L WITH MIDDLE DOT" },
                   1354: { 0x0141, "Lstrok", "LATIN CAPITAL LETTER L WITH STROKE" },
                   1355: { 0x0142, "lstrok", "LATIN SMALL LETTER L WITH STROKE" },
                   1356: { 0x0143, "Nacute", "LATIN CAPITAL LETTER N WITH ACUTE" },
                   1357: { 0x0144, "nacute", "LATIN SMALL LETTER N WITH ACUTE" },
                   1358: { 0x0145, "Ncedil", "LATIN CAPITAL LETTER N WITH CEDILLA" },
                   1359: { 0x0146, "ncedil", "LATIN SMALL LETTER N WITH CEDILLA" },
                   1360: { 0x0147, "Ncaron", "LATIN CAPITAL LETTER N WITH CARON" },
                   1361: { 0x0148, "ncaron", "LATIN SMALL LETTER N WITH CARON" },
                   1362: { 0x0149, "napos", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE" },
                   1363: { 0x014A, "ENG", "LATIN CAPITAL LETTER ENG" },
                   1364: { 0x014B, "eng", "LATIN SMALL LETTER ENG" },
                   1365: { 0x014C, "Omacr", "LATIN CAPITAL LETTER O WITH MACRON" },
                   1366: { 0x014D, "omacr", "LATIN SMALL LETTER O WITH MACRON" },
                   1367: { 0x0150, "Odblac", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE" },
                   1368: { 0x0151, "odblac", "LATIN SMALL LETTER O WITH DOUBLE ACUTE" },
                   1369: { 0x0152, "OElig", "LATIN CAPITAL LIGATURE OE" },
                   1370: { 0x0153, "oelig", "LATIN SMALL LIGATURE OE" },
                   1371: { 0x0154, "Racute", "LATIN CAPITAL LETTER R WITH ACUTE" },
                   1372: { 0x0155, "racute", "LATIN SMALL LETTER R WITH ACUTE" },
                   1373: { 0x0156, "Rcedil", "LATIN CAPITAL LETTER R WITH CEDILLA" },
                   1374: { 0x0157, "rcedil", "LATIN SMALL LETTER R WITH CEDILLA" },
                   1375: { 0x0158, "Rcaron", "LATIN CAPITAL LETTER R WITH CARON" },
                   1376: { 0x0159, "rcaron", "LATIN SMALL LETTER R WITH CARON" },
                   1377: { 0x015A, "Sacute", "LATIN CAPITAL LETTER S WITH ACUTE" },
                   1378: { 0x015B, "sacute", "LATIN SMALL LETTER S WITH ACUTE" },
                   1379: { 0x015C, "Scirc", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX" },
                   1380: { 0x015D, "scirc", "LATIN SMALL LETTER S WITH CIRCUMFLEX" },
                   1381: { 0x015E, "Scedil", "LATIN CAPITAL LETTER S WITH CEDILLA" },
                   1382: { 0x015F, "scedil", "LATIN SMALL LETTER S WITH CEDILLA" },
                   1383: { 0x0160, "Scaron", "LATIN CAPITAL LETTER S WITH CARON" },
                   1384: { 0x0161, "scaron", "LATIN SMALL LETTER S WITH CARON" },
                   1385: { 0x0162, "Tcedil", "LATIN CAPITAL LETTER T WITH CEDILLA" },
                   1386: { 0x0163, "tcedil", "LATIN SMALL LETTER T WITH CEDILLA" },
                   1387: { 0x0164, "Tcaron", "LATIN CAPITAL LETTER T WITH CARON" },
                   1388: { 0x0165, "tcaron", "LATIN SMALL LETTER T WITH CARON" },
                   1389: { 0x0166, "Tstrok", "LATIN CAPITAL LETTER T WITH STROKE" },
                   1390: { 0x0167, "tstrok", "LATIN SMALL LETTER T WITH STROKE" },
                   1391: { 0x0168, "Utilde", "LATIN CAPITAL LETTER U WITH TILDE" },
                   1392: { 0x0169, "utilde", "LATIN SMALL LETTER U WITH TILDE" },
                   1393: { 0x016A, "Umacr", "LATIN CAPITAL LETTER U WITH MACRON" },
                   1394: { 0x016B, "umacr", "LATIN SMALL LETTER U WITH MACRON" },
                   1395: { 0x016C, "Ubreve", "LATIN CAPITAL LETTER U WITH BREVE" },
                   1396: { 0x016D, "ubreve", "LATIN SMALL LETTER U WITH BREVE" },
                   1397: { 0x016E, "Uring", "LATIN CAPITAL LETTER U WITH RING ABOVE" },
                   1398: { 0x016F, "uring", "LATIN SMALL LETTER U WITH RING ABOVE" },
                   1399: { 0x0170, "Udblac", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE" },
                   1400: { 0x0171, "udblac", "LATIN SMALL LETTER U WITH DOUBLE ACUTE" },
                   1401: { 0x0172, "Uogon", "LATIN CAPITAL LETTER U WITH OGONEK" },
                   1402: { 0x0173, "uogon", "LATIN SMALL LETTER U WITH OGONEK" },
                   1403: { 0x0174, "Wcirc", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX" },
                   1404: { 0x0175, "wcirc", "LATIN SMALL LETTER W WITH CIRCUMFLEX" },
                   1405: { 0x0176, "Ycirc", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX" },
                   1406: { 0x0177, "ycirc", "LATIN SMALL LETTER Y WITH CIRCUMFLEX" },
                   1407: { 0x0178, "Yuml", "LATIN CAPITAL LETTER Y WITH DIAERESIS" },
                   1408: { 0x0179, "Zacute", "LATIN CAPITAL LETTER Z WITH ACUTE" },
                   1409: { 0x017A, "zacute", "LATIN SMALL LETTER Z WITH ACUTE" },
                   1410: { 0x017B, "Zdot", "LATIN CAPITAL LETTER Z WITH DOT ABOVE" },
                   1411: { 0x017C, "zdot", "LATIN SMALL LETTER Z WITH DOT ABOVE" },
                   1412: { 0x017D, "Zcaron", "LATIN CAPITAL LETTER Z WITH CARON" },
                   1413: { 0x017E, "zcaron", "LATIN SMALL LETTER Z WITH CARON" },
                   1414: { 0x0192, "fnof", "LATIN SMALL LETTER F WITH HOOK" },
                   1415: { 0x01F5, "gacute", "LATIN SMALL LETTER G WITH ACUTE" },
                   1416: { 0x02C7, "caron", "CARON" },
                   1417: { 0x02D8, "breve", "BREVE" },
                   1418: { 0x02D9, "dot", "DOT ABOVE" },
                   1419: { 0x02DA, "ring", "RING ABOVE" },
                   1420: { 0x02DB, "ogon", "OGONEK" },
                   1421: { 0x02DC, "tilde", "TILDE" },
                   1422: { 0x02DD, "dblac", "DOUBLE ACUTE ACCENT" },
                   1423: { 0x0386, "Aacgr", "GREEK CAPITAL LETTER ALPHA WITH TONOS" },
                   1424: { 0x0388, "Eacgr", "GREEK CAPITAL LETTER EPSILON WITH TONOS" },
                   1425: { 0x0389, "EEacgr", "GREEK CAPITAL LETTER ETA WITH TONOS" },
                   1426: { 0x038A, "Iacgr", "GREEK CAPITAL LETTER IOTA WITH TONOS" },
                   1427: { 0x038C, "Oacgr", "GREEK CAPITAL LETTER OMICRON WITH TONOS" },
                   1428: { 0x038E, "Uacgr", "GREEK CAPITAL LETTER UPSILON WITH TONOS" },
                   1429: { 0x038F, "OHacgr", "GREEK CAPITAL LETTER OMEGA WITH TONOS" },
                   1430: { 0x0390, "idiagr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS" },
                   1431: { 0x0391, "Agr", "GREEK CAPITAL LETTER ALPHA" },
                   1432: { 0x0392, "Bgr", "GREEK CAPITAL LETTER BETA" },
                   1433: { 0x0393, "b.Gamma", "GREEK CAPITAL LETTER GAMMA" },
                   1434: { 0x0393, "Gamma", "GREEK CAPITAL LETTER GAMMA" },
                   1435: { 0x0393, "Ggr", "GREEK CAPITAL LETTER GAMMA" },
                   1436: { 0x0394, "b.Delta", "GREEK CAPITAL LETTER DELTA" },
                   1437: { 0x0394, "Delta", "GREEK CAPITAL LETTER DELTA" },
                   1438: { 0x0394, "Dgr", "GREEK CAPITAL LETTER DELTA" },
                   1439: { 0x0395, "Egr", "GREEK CAPITAL LETTER EPSILON" },
                   1440: { 0x0396, "Zgr", "GREEK CAPITAL LETTER ZETA" },
                   1441: { 0x0397, "EEgr", "GREEK CAPITAL LETTER ETA" },
                   1442: { 0x0398, "b.Theta", "GREEK CAPITAL LETTER THETA" },
                   1443: { 0x0398, "Theta", "GREEK CAPITAL LETTER THETA" },
                   1444: { 0x0398, "THgr", "GREEK CAPITAL LETTER THETA" },
                   1445: { 0x0399, "Igr", "GREEK CAPITAL LETTER IOTA" },
                   1446: { 0x039A, "Kgr", "GREEK CAPITAL LETTER KAPPA" },
                   1447: { 0x039B, "b.Lambda", "GREEK CAPITAL LETTER LAMDA" },
                   1448: { 0x039B, "Lambda", "GREEK CAPITAL LETTER LAMDA" },
                   1449: { 0x039B, "Lgr", "GREEK CAPITAL LETTER LAMDA" },
                   1450: { 0x039C, "Mgr", "GREEK CAPITAL LETTER MU" },
                   1451: { 0x039D, "Ngr", "GREEK CAPITAL LETTER NU" },
                   1452: { 0x039E, "b.Xi", "GREEK CAPITAL LETTER XI" },
                   1453: { 0x039E, "Xgr", "GREEK CAPITAL LETTER XI" },
                   1454: { 0x039E, "Xi", "GREEK CAPITAL LETTER XI" },
                   1455: { 0x039F, "Ogr", "GREEK CAPITAL LETTER OMICRON" },
                   1456: { 0x03A0, "b.Pi", "GREEK CAPITAL LETTER PI" },
                   1457: { 0x03A0, "Pgr", "GREEK CAPITAL LETTER PI" },
                   1458: { 0x03A0, "Pi", "GREEK CAPITAL LETTER PI" },
                   1459: { 0x03A1, "Rgr", "GREEK CAPITAL LETTER RHO" },
                   1460: { 0x03A3, "b.Sigma", "GREEK CAPITAL LETTER SIGMA" },
                   1461: { 0x03A3, "Sgr", "GREEK CAPITAL LETTER SIGMA" },
                   1462: { 0x03A3, "Sigma", "GREEK CAPITAL LETTER SIGMA" },
                   1463: { 0x03A4, "Tgr", "GREEK CAPITAL LETTER TAU" },
                   1464: { 0x03A5, "Ugr", "" },
                   1465: { 0x03A6, "b.Phi", "GREEK CAPITAL LETTER PHI" },
                   1466: { 0x03A6, "PHgr", "GREEK CAPITAL LETTER PHI" },
                   1467: { 0x03A6, "Phi", "GREEK CAPITAL LETTER PHI" },
                   1468: { 0x03A7, "KHgr", "GREEK CAPITAL LETTER CHI" },
                   1469: { 0x03A8, "b.Psi", "GREEK CAPITAL LETTER PSI" },
                   1470: { 0x03A8, "PSgr", "GREEK CAPITAL LETTER PSI" },
                   1471: { 0x03A8, "Psi", "GREEK CAPITAL LETTER PSI" },
                   1472: { 0x03A9, "b.Omega", "GREEK CAPITAL LETTER OMEGA" },
                   1473: { 0x03A9, "OHgr", "GREEK CAPITAL LETTER OMEGA" },
                   1474: { 0x03A9, "Omega", "GREEK CAPITAL LETTER OMEGA" },
                   1475: { 0x03AA, "Idigr", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA" },
                   1476: { 0x03AB, "Udigr", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA" },
                   1477: { 0x03AC, "aacgr", "GREEK SMALL LETTER ALPHA WITH TONOS" },
                   1478: { 0x03AD, "eacgr", "GREEK SMALL LETTER EPSILON WITH TONOS" },
                   1479: { 0x03AE, "eeacgr", "GREEK SMALL LETTER ETA WITH TONOS" },
                   1480: { 0x03AF, "iacgr", "GREEK SMALL LETTER IOTA WITH TONOS" },
                   1481: { 0x03B0, "udiagr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS" },
                   1482: { 0x03B1, "agr", "" },
                   1483: { 0x03B1, "alpha", "" },
                   1484: { 0x03B1, "b.alpha", "" },
                   1485: { 0x03B2, "b.beta", "GREEK SMALL LETTER BETA" },
                   1486: { 0x03B2, "beta", "GREEK SMALL LETTER BETA" },
                   1487: { 0x03B2, "bgr", "GREEK SMALL LETTER BETA" },
                   1488: { 0x03B3, "b.gamma", "GREEK SMALL LETTER GAMMA" },
                   1489: { 0x03B3, "gamma", "GREEK SMALL LETTER GAMMA" },
                   1490: { 0x03B3, "ggr", "GREEK SMALL LETTER GAMMA" },
                   1491: { 0x03B4, "b.delta", "GREEK SMALL LETTER DELTA" },
                   1492: { 0x03B4, "delta", "GREEK SMALL LETTER DELTA" },
                   1493: { 0x03B4, "dgr", "GREEK SMALL LETTER DELTA" },
                   1494: { 0x03B5, "b.epsi", "" },
                   1495: { 0x03B5, "b.epsis", "" },
                   1496: { 0x03B5, "b.epsiv", "" },
                   1497: { 0x03B5, "egr", "" },
                   1498: { 0x03B5, "epsiv", "" },
                   1499: { 0x03B6, "b.zeta", "GREEK SMALL LETTER ZETA" },
                   1500: { 0x03B6, "zeta", "GREEK SMALL LETTER ZETA" },
                   1501: { 0x03B6, "zgr", "GREEK SMALL LETTER ZETA" },
                   1502: { 0x03B7, "b.eta", "GREEK SMALL LETTER ETA" },
                   1503: { 0x03B7, "eegr", "GREEK SMALL LETTER ETA" },
                   1504: { 0x03B7, "eta", "GREEK SMALL LETTER ETA" },
                   1505: { 0x03B8, "b.thetas", "" },
                   1506: { 0x03B8, "thetas", "" },
                   1507: { 0x03B8, "thgr", "" },
                   1508: { 0x03B9, "b.iota", "GREEK SMALL LETTER IOTA" },
                   1509: { 0x03B9, "igr", "GREEK SMALL LETTER IOTA" },
                   1510: { 0x03B9, "iota", "GREEK SMALL LETTER IOTA" },
                   1511: { 0x03BA, "b.kappa", "GREEK SMALL LETTER KAPPA" },
                   1512: { 0x03BA, "kappa", "GREEK SMALL LETTER KAPPA" },
                   1513: { 0x03BA, "kgr", "GREEK SMALL LETTER KAPPA" },
                   1514: { 0x03BB, "b.lambda", "GREEK SMALL LETTER LAMDA" },
                   1515: { 0x03BB, "lambda", "GREEK SMALL LETTER LAMDA" },
                   1516: { 0x03BB, "lgr", "GREEK SMALL LETTER LAMDA" },
                   1517: { 0x03BC, "b.mu", "GREEK SMALL LETTER MU" },
                   1518: { 0x03BC, "mgr", "GREEK SMALL LETTER MU" },
                   1519: { 0x03BC, "mu", "GREEK SMALL LETTER MU" },
                   1520: { 0x03BD, "b.nu", "GREEK SMALL LETTER NU" },
                   1521: { 0x03BD, "ngr", "GREEK SMALL LETTER NU" },
                   1522: { 0x03BD, "nu", "GREEK SMALL LETTER NU" },
                   1523: { 0x03BE, "b.xi", "GREEK SMALL LETTER XI" },
                   1524: { 0x03BE, "xgr", "GREEK SMALL LETTER XI" },
                   1525: { 0x03BE, "xi", "GREEK SMALL LETTER XI" },
                   1526: { 0x03BF, "ogr", "GREEK SMALL LETTER OMICRON" },
                   1527: { 0x03C0, "b.pi", "GREEK SMALL LETTER PI" },
                   1528: { 0x03C0, "pgr", "GREEK SMALL LETTER PI" },
                   1529: { 0x03C0, "pi", "GREEK SMALL LETTER PI" },
                   1530: { 0x03C1, "b.rho", "GREEK SMALL LETTER RHO" },
                   1531: { 0x03C1, "rgr", "GREEK SMALL LETTER RHO" },
                   1532: { 0x03C1, "rho", "GREEK SMALL LETTER RHO" },
                   1533: { 0x03C2, "b.sigmav", "" },
                   1534: { 0x03C2, "sfgr", "" },
                   1535: { 0x03C2, "sigmav", "" },
                   1536: { 0x03C3, "b.sigma", "GREEK SMALL LETTER SIGMA" },
                   1537: { 0x03C3, "sgr", "GREEK SMALL LETTER SIGMA" },
                   1538: { 0x03C3, "sigma", "GREEK SMALL LETTER SIGMA" },
                   1539: { 0x03C4, "b.tau", "GREEK SMALL LETTER TAU" },
                   1540: { 0x03C4, "tau", "GREEK SMALL LETTER TAU" },
                   1541: { 0x03C4, "tgr", "GREEK SMALL LETTER TAU" },
                   1542: { 0x03C5, "b.upsi", "GREEK SMALL LETTER UPSILON" },
                   1543: { 0x03C5, "ugr", "GREEK SMALL LETTER UPSILON" },
                   1544: { 0x03C5, "upsi", "GREEK SMALL LETTER UPSILON" },
                   1545: { 0x03C6, "b.phis", "GREEK SMALL LETTER PHI" },
                   1546: { 0x03C6, "phgr", "GREEK SMALL LETTER PHI" },
                   1547: { 0x03C6, "phis", "GREEK SMALL LETTER PHI" },
                   1548: { 0x03C7, "b.chi", "GREEK SMALL LETTER CHI" },
                   1549: { 0x03C7, "chi", "GREEK SMALL LETTER CHI" },
                   1550: { 0x03C7, "khgr", "GREEK SMALL LETTER CHI" },
                   1551: { 0x03C8, "b.psi", "GREEK SMALL LETTER PSI" },
                   1552: { 0x03C8, "psgr", "GREEK SMALL LETTER PSI" },
                   1553: { 0x03C8, "psi", "GREEK SMALL LETTER PSI" },
                   1554: { 0x03C9, "b.omega", "GREEK SMALL LETTER OMEGA" },
                   1555: { 0x03C9, "ohgr", "GREEK SMALL LETTER OMEGA" },
                   1556: { 0x03C9, "omega", "GREEK SMALL LETTER OMEGA" },
                   1557: { 0x03CA, "idigr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA" },
                   1558: { 0x03CB, "udigr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA" },
                   1559: { 0x03CC, "oacgr", "GREEK SMALL LETTER OMICRON WITH TONOS" },
                   1560: { 0x03CD, "uacgr", "GREEK SMALL LETTER UPSILON WITH TONOS" },
                   1561: { 0x03CE, "ohacgr", "GREEK SMALL LETTER OMEGA WITH TONOS" },
                   1562: { 0x03D1, "b.thetav", "" },
                   1563: { 0x03D1, "thetav", "" },
                   1564: { 0x03D2, "b.Upsi", "" },
                   1565: { 0x03D2, "Upsi", "" },
                   1566: { 0x03D5, "b.phiv", "GREEK PHI SYMBOL" },
                   1567: { 0x03D5, "phiv", "GREEK PHI SYMBOL" },
                   1568: { 0x03D6, "b.piv", "GREEK PI SYMBOL" },
                   1569: { 0x03D6, "piv", "GREEK PI SYMBOL" },
                   1570: { 0x03DC, "b.gammad", "GREEK LETTER DIGAMMA" },
                   1571: { 0x03DC, "gammad", "GREEK LETTER DIGAMMA" },
                   1572: { 0x03F0, "b.kappav", "GREEK KAPPA SYMBOL" },
                   1573: { 0x03F0, "kappav", "GREEK KAPPA SYMBOL" },
                   1574: { 0x03F1, "b.rhov", "GREEK RHO SYMBOL" },
                   1575: { 0x03F1, "rhov", "GREEK RHO SYMBOL" },
                   1576: { 0x0401, "IOcy", "CYRILLIC CAPITAL LETTER IO" },
                   1577: { 0x0402, "DJcy", "CYRILLIC CAPITAL LETTER DJE" },
                   1578: { 0x0403, "GJcy", "CYRILLIC CAPITAL LETTER GJE" },
                   1579: { 0x0404, "Jukcy", "CYRILLIC CAPITAL LETTER UKRAINIAN IE" },
                   1580: { 0x0405, "DScy", "CYRILLIC CAPITAL LETTER DZE" },
                   1581: { 0x0406, "Iukcy", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I" },
                   1582: { 0x0407, "YIcy", "CYRILLIC CAPITAL LETTER YI" },
                   1583: { 0x0408, "Jsercy", "CYRILLIC CAPITAL LETTER JE" },
                   1584: { 0x0409, "LJcy", "CYRILLIC CAPITAL LETTER LJE" },
                   1585: { 0x040A, "NJcy", "CYRILLIC CAPITAL LETTER NJE" },
                   1586: { 0x040B, "TSHcy", "CYRILLIC CAPITAL LETTER TSHE" },
                   1587: { 0x040C, "KJcy", "CYRILLIC CAPITAL LETTER KJE" },
                   1588: { 0x040E, "Ubrcy", "CYRILLIC CAPITAL LETTER SHORT U" },
                   1589: { 0x040F, "DZcy", "CYRILLIC CAPITAL LETTER DZHE" },
                   1590: { 0x0410, "Acy", "CYRILLIC CAPITAL LETTER A" },
                   1591: { 0x0411, "Bcy", "CYRILLIC CAPITAL LETTER BE" },
                   1592: { 0x0412, "Vcy", "CYRILLIC CAPITAL LETTER VE" },
                   1593: { 0x0413, "Gcy", "CYRILLIC CAPITAL LETTER GHE" },
                   1594: { 0x0414, "Dcy", "CYRILLIC CAPITAL LETTER DE" },
                   1595: { 0x0415, "IEcy", "CYRILLIC CAPITAL LETTER IE" },
                   1596: { 0x0416, "ZHcy", "CYRILLIC CAPITAL LETTER ZHE" },
                   1597: { 0x0417, "Zcy", "CYRILLIC CAPITAL LETTER ZE" },
                   1598: { 0x0418, "Icy", "CYRILLIC CAPITAL LETTER I" },
                   1599: { 0x0419, "Jcy", "CYRILLIC CAPITAL LETTER SHORT I" },
                   1600: { 0x041A, "Kcy", "CYRILLIC CAPITAL LETTER KA" },
                   1601: { 0x041B, "Lcy", "CYRILLIC CAPITAL LETTER EL" },
                   1602: { 0x041C, "Mcy", "CYRILLIC CAPITAL LETTER EM" },
                   1603: { 0x041D, "Ncy", "CYRILLIC CAPITAL LETTER EN" },
                   1604: { 0x041E, "Ocy", "CYRILLIC CAPITAL LETTER O" },
                   1605: { 0x041F, "Pcy", "CYRILLIC CAPITAL LETTER PE" },
                   1606: { 0x0420, "Rcy", "CYRILLIC CAPITAL LETTER ER" },
                   1607: { 0x0421, "Scy", "CYRILLIC CAPITAL LETTER ES" },
                   1608: { 0x0422, "Tcy", "CYRILLIC CAPITAL LETTER TE" },
                   1609: { 0x0423, "Ucy", "CYRILLIC CAPITAL LETTER U" },
                   1610: { 0x0424, "Fcy", "CYRILLIC CAPITAL LETTER EF" },
                   1611: { 0x0425, "KHcy", "CYRILLIC CAPITAL LETTER HA" },
                   1612: { 0x0426, "TScy", "CYRILLIC CAPITAL LETTER TSE" },
                   1613: { 0x0427, "CHcy", "CYRILLIC CAPITAL LETTER CHE" },
                   1614: { 0x0428, "SHcy", "CYRILLIC CAPITAL LETTER SHA" },
                   1615: { 0x0429, "SHCHcy", "CYRILLIC CAPITAL LETTER SHCHA" },
                   1616: { 0x042A, "HARDcy", "CYRILLIC CAPITAL LETTER HARD SIGN" },
                   1617: { 0x042B, "Ycy", "CYRILLIC CAPITAL LETTER YERU" },
                   1618: { 0x042C, "SOFTcy", "CYRILLIC CAPITAL LETTER SOFT SIGN" },
                   1619: { 0x042D, "Ecy", "CYRILLIC CAPITAL LETTER E" },
                   1620: { 0x042E, "YUcy", "CYRILLIC CAPITAL LETTER YU" },
                   1621: { 0x042F, "YAcy", "CYRILLIC CAPITAL LETTER YA" },
                   1622: { 0x0430, "acy", "CYRILLIC SMALL LETTER A" },
                   1623: { 0x0431, "bcy", "CYRILLIC SMALL LETTER BE" },
                   1624: { 0x0432, "vcy", "CYRILLIC SMALL LETTER VE" },
                   1625: { 0x0433, "gcy", "CYRILLIC SMALL LETTER GHE" },
                   1626: { 0x0434, "dcy", "CYRILLIC SMALL LETTER DE" },
                   1627: { 0x0435, "iecy", "CYRILLIC SMALL LETTER IE" },
                   1628: { 0x0436, "zhcy", "CYRILLIC SMALL LETTER ZHE" },
                   1629: { 0x0437, "zcy", "CYRILLIC SMALL LETTER ZE" },
                   1630: { 0x0438, "icy", "CYRILLIC SMALL LETTER I" },
                   1631: { 0x0439, "jcy", "CYRILLIC SMALL LETTER SHORT I" },
                   1632: { 0x043A, "kcy", "CYRILLIC SMALL LETTER KA" },
                   1633: { 0x043B, "lcy", "CYRILLIC SMALL LETTER EL" },
                   1634: { 0x043C, "mcy", "CYRILLIC SMALL LETTER EM" },
                   1635: { 0x043D, "ncy", "CYRILLIC SMALL LETTER EN" },
                   1636: { 0x043E, "ocy", "CYRILLIC SMALL LETTER O" },
                   1637: { 0x043F, "pcy", "CYRILLIC SMALL LETTER PE" },
                   1638: { 0x0440, "rcy", "CYRILLIC SMALL LETTER ER" },
                   1639: { 0x0441, "scy", "CYRILLIC SMALL LETTER ES" },
                   1640: { 0x0442, "tcy", "CYRILLIC SMALL LETTER TE" },
                   1641: { 0x0443, "ucy", "CYRILLIC SMALL LETTER U" },
                   1642: { 0x0444, "fcy", "CYRILLIC SMALL LETTER EF" },
                   1643: { 0x0445, "khcy", "CYRILLIC SMALL LETTER HA" },
                   1644: { 0x0446, "tscy", "CYRILLIC SMALL LETTER TSE" },
                   1645: { 0x0447, "chcy", "CYRILLIC SMALL LETTER CHE" },
                   1646: { 0x0448, "shcy", "CYRILLIC SMALL LETTER SHA" },
                   1647: { 0x0449, "shchcy", "CYRILLIC SMALL LETTER SHCHA" },
                   1648: { 0x044A, "hardcy", "CYRILLIC SMALL LETTER HARD SIGN" },
                   1649: { 0x044B, "ycy", "CYRILLIC SMALL LETTER YERU" },
                   1650: { 0x044C, "softcy", "CYRILLIC SMALL LETTER SOFT SIGN" },
                   1651: { 0x044D, "ecy", "CYRILLIC SMALL LETTER E" },
                   1652: { 0x044E, "yucy", "CYRILLIC SMALL LETTER YU" },
                   1653: { 0x044F, "yacy", "CYRILLIC SMALL LETTER YA" },
                   1654: { 0x0451, "iocy", "CYRILLIC SMALL LETTER IO" },
                   1655: { 0x0452, "djcy", "CYRILLIC SMALL LETTER DJE" },
                   1656: { 0x0453, "gjcy", "CYRILLIC SMALL LETTER GJE" },
                   1657: { 0x0454, "jukcy", "CYRILLIC SMALL LETTER UKRAINIAN IE" },
                   1658: { 0x0455, "dscy", "CYRILLIC SMALL LETTER DZE" },
                   1659: { 0x0456, "iukcy", "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I" },
                   1660: { 0x0457, "yicy", "CYRILLIC SMALL LETTER YI" },
                   1661: { 0x0458, "jsercy", "CYRILLIC SMALL LETTER JE" },
                   1662: { 0x0459, "ljcy", "CYRILLIC SMALL LETTER LJE" },
                   1663: { 0x045A, "njcy", "CYRILLIC SMALL LETTER NJE" },
                   1664: { 0x045B, "tshcy", "CYRILLIC SMALL LETTER TSHE" },
                   1665: { 0x045C, "kjcy", "CYRILLIC SMALL LETTER KJE" },
                   1666: { 0x045E, "ubrcy", "CYRILLIC SMALL LETTER SHORT U" },
                   1667: { 0x045F, "dzcy", "CYRILLIC SMALL LETTER DZHE" },
                   1668: { 0x2002, "ensp", "EN SPACE" },
                   1669: { 0x2003, "emsp", "EM SPACE" },
                   1670: { 0x2004, "emsp13", "THREE-PER-EM SPACE" },
                   1671: { 0x2005, "emsp14", "FOUR-PER-EM SPACE" },
                   1672: { 0x2007, "numsp", "FIGURE SPACE" },
                   1673: { 0x2008, "puncsp", "PUNCTUATION SPACE" },
                   1674: { 0x2009, "thinsp", "THIN SPACE" },
                   1675: { 0x200A, "hairsp", "HAIR SPACE" },
                   1676: { 0x2010, "dash", "HYPHEN" },
                   1677: { 0x2013, "ndash", "EN DASH" },
                   1678: { 0x2014, "mdash", "EM DASH" },
                   1679: { 0x2015, "horbar", "HORIZONTAL BAR" },
                   1680: { 0x2016, "Verbar", "DOUBLE VERTICAL LINE" },
                   1681: { 0x2018, "lsquo", "" },
                   1682: { 0x2018, "rsquor", "" },
                   1683: { 0x2019, "rsquo", "RIGHT SINGLE QUOTATION MARK" },
                   1684: { 0x201A, "lsquor", "SINGLE LOW-9 QUOTATION MARK" },
                   1685: { 0x201C, "ldquo", "" },
                   1686: { 0x201C, "rdquor", "" },
                   1687: { 0x201D, "rdquo", "RIGHT DOUBLE QUOTATION MARK" },
                   1688: { 0x201E, "ldquor", "DOUBLE LOW-9 QUOTATION MARK" },
                   1689: { 0x2020, "dagger", "DAGGER" },
                   1690: { 0x2021, "Dagger", "DOUBLE DAGGER" },
                   1691: { 0x2022, "bull", "BULLET" },
                   1692: { 0x2025, "nldr", "TWO DOT LEADER" },
                   1693: { 0x2026, "hellip", "HORIZONTAL ELLIPSIS" },
                   1694: { 0x2026, "mldr", "HORIZONTAL ELLIPSIS" },
                   1695: { 0x2030, "permil", "PER MILLE SIGN" },
                   1696: { 0x2032, "prime", "PRIME" },
                   1697: { 0x2032, "vprime", "PRIME" },
                   1698: { 0x2033, "Prime", "DOUBLE PRIME" },
                   1699: { 0x2034, "tprime", "TRIPLE PRIME" },
                   1700: { 0x2035, "bprime", "REVERSED PRIME" },
                   1701: { 0x2041, "caret", "CARET" },
                   1702: { 0x2043, "hybull", "HYPHEN BULLET" },
                   1703: { 0x20DB, "tdot", "COMBINING THREE DOTS ABOVE" },
                   1704: { 0x20DC, "DotDot", "COMBINING FOUR DOTS ABOVE" },
                   1705: { 0x2105, "incare", "CARE OF" },
                   1706: { 0x210B, "hamilt", "SCRIPT CAPITAL H" },
                   1707: { 0x210F, "planck", "PLANCK CONSTANT OVER TWO PI" },
                   1708: { 0x2111, "image", "BLACK-LETTER CAPITAL I" },
                   1709: { 0x2112, "lagran", "SCRIPT CAPITAL L" },
                   1710: { 0x2113, "ell", "SCRIPT SMALL L" },
                   1711: { 0x2116, "numero", "NUMERO SIGN" },
                   1712: { 0x2117, "copysr", "SOUND RECORDING COPYRIGHT" },
                   1713: { 0x2118, "weierp", "SCRIPT CAPITAL P" },
                   1714: { 0x211C, "real", "BLACK-LETTER CAPITAL R" },
                   1715: { 0x211E, "rx", "PRESCRIPTION TAKE" },
                   1716: { 0x2122, "trade", "TRADE MARK SIGN" },
                   1717: { 0x2126, "ohm", "OHM SIGN" },
                   1718: { 0x212B, "angst", "ANGSTROM SIGN" },
                   1719: { 0x212C, "bernou", "SCRIPT CAPITAL B" },
                   1720: { 0x2133, "phmmat", "SCRIPT CAPITAL M" },
                   1721: { 0x2134, "order", "SCRIPT SMALL O" },
                   1722: { 0x2135, "aleph", "ALEF SYMBOL" },
                   1723: { 0x2136, "beth", "BET SYMBOL" },
                   1724: { 0x2137, "gimel", "GIMEL SYMBOL" },
                   1725: { 0x2138, "daleth", "DALET SYMBOL" },
                   1726: { 0x2153, "frac13", "VULGAR FRACTION ONE THIRD" },
                   1727: { 0x2154, "frac23", "VULGAR FRACTION TWO THIRDS" },
                   1728: { 0x2155, "frac15", "VULGAR FRACTION ONE FIFTH" },
                   1729: { 0x2156, "frac25", "VULGAR FRACTION TWO FIFTHS" },
                   1730: { 0x2157, "frac35", "VULGAR FRACTION THREE FIFTHS" },
                   1731: { 0x2158, "frac45", "VULGAR FRACTION FOUR FIFTHS" },
                   1732: { 0x2159, "frac16", "VULGAR FRACTION ONE SIXTH" },
                   1733: { 0x215A, "frac56", "VULGAR FRACTION FIVE SIXTHS" },
                   1734: { 0x215B, "frac18", "" },
                   1735: { 0x215C, "frac38", "" },
                   1736: { 0x215D, "frac58", "" },
                   1737: { 0x215E, "frac78", "" },
                   1738: { 0x2190, "larr", "LEFTWARDS DOUBLE ARROW" },
                   1739: { 0x2191, "uarr", "UPWARDS ARROW" },
                   1740: { 0x2192, "rarr", "RIGHTWARDS DOUBLE ARROW" },
                   1741: { 0x2193, "darr", "DOWNWARDS ARROW" },
                   1742: { 0x2194, "harr", "LEFT RIGHT ARROW" },
                   1743: { 0x2194, "xhArr", "LEFT RIGHT ARROW" },
                   1744: { 0x2194, "xharr", "LEFT RIGHT ARROW" },
                   1745: { 0x2195, "varr", "UP DOWN ARROW" },
                   1746: { 0x2196, "nwarr", "NORTH WEST ARROW" },
                   1747: { 0x2197, "nearr", "NORTH EAST ARROW" },
                   1748: { 0x2198, "drarr", "SOUTH EAST ARROW" },
                   1749: { 0x2199, "dlarr", "SOUTH WEST ARROW" },
                   1750: { 0x219A, "nlarr", "LEFTWARDS ARROW WITH STROKE" },
                   1751: { 0x219B, "nrarr", "RIGHTWARDS ARROW WITH STROKE" },
                   1752: { 0x219D, "rarrw", "RIGHTWARDS SQUIGGLE ARROW" },
                   1753: { 0x219E, "Larr", "LEFTWARDS TWO HEADED ARROW" },
                   1754: { 0x21A0, "Rarr", "RIGHTWARDS TWO HEADED ARROW" },
                   1755: { 0x21A2, "larrtl", "LEFTWARDS ARROW WITH TAIL" },
                   1756: { 0x21A3, "rarrtl", "RIGHTWARDS ARROW WITH TAIL" },
                   1757: { 0x21A6, "map", "RIGHTWARDS ARROW FROM BAR" },
                   1758: { 0x21A9, "larrhk", "LEFTWARDS ARROW WITH HOOK" },
                   1759: { 0x21AA, "rarrhk", "RIGHTWARDS ARROW WITH HOOK" },
                   1760: { 0x21AB, "larrlp", "LEFTWARDS ARROW WITH LOOP" },
                   1761: { 0x21AC, "rarrlp", "RIGHTWARDS ARROW WITH LOOP" },
                   1762: { 0x21AD, "harrw", "LEFT RIGHT WAVE ARROW" },
                   1763: { 0x21AE, "nharr", "LEFT RIGHT ARROW WITH STROKE" },
                   1764: { 0x21B0, "lsh", "UPWARDS ARROW WITH TIP LEFTWARDS" },
                   1765: { 0x21B1, "rsh", "UPWARDS ARROW WITH TIP RIGHTWARDS" },
                   1766: { 0x21B6, "cularr", "ANTICLOCKWISE TOP SEMICIRCLE ARROW" },
                   1767: { 0x21B7, "curarr", "CLOCKWISE TOP SEMICIRCLE ARROW" },
                   1768: { 0x21BA, "olarr", "ANTICLOCKWISE OPEN CIRCLE ARROW" },
                   1769: { 0x21BB, "orarr", "CLOCKWISE OPEN CIRCLE ARROW" },
                   1770: { 0x21BC, "lharu", "LEFTWARDS HARPOON WITH BARB UPWARDS" },
                   1771: { 0x21BD, "lhard", "LEFTWARDS HARPOON WITH BARB DOWNWARDS" },
                   1772: { 0x21BE, "uharr", "UPWARDS HARPOON WITH BARB RIGHTWARDS" },
                   1773: { 0x21BF, "uharl", "UPWARDS HARPOON WITH BARB LEFTWARDS" },
                   1774: { 0x21C0, "rharu", "RIGHTWARDS HARPOON WITH BARB UPWARDS" },
                   1775: { 0x21C1, "rhard", "RIGHTWARDS HARPOON WITH BARB DOWNWARDS" },
                   1776: { 0x21C2, "dharr", "DOWNWARDS HARPOON WITH BARB RIGHTWARDS" },
                   1777: { 0x21C3, "dharl", "DOWNWARDS HARPOON WITH BARB LEFTWARDS" },
                   1778: { 0x21C4, "rlarr2", "RIGHTWARDS ARROW OVER LEFTWARDS ARROW" },
                   1779: { 0x21C6, "lrarr2", "LEFTWARDS ARROW OVER RIGHTWARDS ARROW" },
                   1780: { 0x21C7, "larr2", "LEFTWARDS PAIRED ARROWS" },
                   1781: { 0x21C8, "uarr2", "UPWARDS PAIRED ARROWS" },
                   1782: { 0x21C9, "rarr2", "RIGHTWARDS PAIRED ARROWS" },
                   1783: { 0x21CA, "darr2", "DOWNWARDS PAIRED ARROWS" },
                   1784: { 0x21CB, "lrhar2", "LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON" },
                   1785: { 0x21CC, "rlhar2", "RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON" },
                   1786: { 0x21CD, "nlArr", "LEFTWARDS DOUBLE ARROW WITH STROKE" },
                   1787: { 0x21CE, "nhArr", "LEFT RIGHT DOUBLE ARROW WITH STROKE" },
                   1788: { 0x21CF, "nrArr", "RIGHTWARDS DOUBLE ARROW WITH STROKE" },
                   1789: { 0x21D0, "lArr", "LEFTWARDS ARROW" },
                   1790: { 0x21D0, "xlArr", "LEFTWARDS DOUBLE ARROW" },
                   1791: { 0x21D1, "uArr", "UPWARDS DOUBLE ARROW" },
                   1792: { 0x21D2, "rArr", "RIGHTWARDS ARROW" },
                   1793: { 0x21D2, "xrArr", "RIGHTWARDS DOUBLE ARROW" },
                   1794: { 0x21D3, "dArr", "DOWNWARDS DOUBLE ARROW" },
                   1795: { 0x21D4, "hArr", "" },
                   1796: { 0x21D4, "iff", "LEFT RIGHT DOUBLE ARROW" },
                   1797: { 0x21D5, "vArr", "UP DOWN DOUBLE ARROW" },
                   1798: { 0x21DA, "lAarr", "LEFTWARDS TRIPLE ARROW" },
                   1799: { 0x21DB, "rAarr", "RIGHTWARDS TRIPLE ARROW" },
                   1800: { 0x2200, "forall", "" },
                   1801: { 0x2201, "comp", "COMPLEMENT" },
                   1802: { 0x2202, "part", "" },
                   1803: { 0x2203, "exist", "" },
                   1804: { 0x2204, "nexist", "THERE DOES NOT EXIST" },
                   1805: { 0x2205, "empty", "" },
                   1806: { 0x2207, "nabla", "NABLA" },
                   1807: { 0x2209, "notin", "" },
                   1808: { 0x220A, "epsi", "" },
                   1809: { 0x220A, "epsis", "" },
                   1810: { 0x220A, "isin", "" },
                   1811: { 0x220D, "bepsi", "SMALL CONTAINS AS MEMBER" },
                   1812: { 0x220D, "ni", "" },
                   1813: { 0x220F, "prod", "N-ARY PRODUCT" },
                   1814: { 0x2210, "amalg", "N-ARY COPRODUCT" },
                   1815: { 0x2210, "coprod", "N-ARY COPRODUCT" },
                   1816: { 0x2210, "samalg", "" },
                   1817: { 0x2211, "sum", "N-ARY SUMMATION" },
                   1818: { 0x2212, "minus", "MINUS SIGN" },
                   1819: { 0x2213, "mnplus", "" },
                   1820: { 0x2214, "plusdo", "DOT PLUS" },
                   1821: { 0x2216, "setmn", "SET MINUS" },
                   1822: { 0x2216, "ssetmn", "SET MINUS" },
                   1823: { 0x2217, "lowast", "ASTERISK OPERATOR" },
                   1824: { 0x2218, "compfn", "RING OPERATOR" },
                   1825: { 0x221A, "radic", "" },
                   1826: { 0x221D, "prop", "" },
                   1827: { 0x221D, "vprop", "" },
                   1828: { 0x221E, "infin", "" },
                   1829: { 0x221F, "ang90", "RIGHT ANGLE" },
                   1830: { 0x2220, "ang", "ANGLE" },
                   1831: { 0x2221, "angmsd", "MEASURED ANGLE" },
                   1832: { 0x2222, "angsph", "" },
                   1833: { 0x2223, "mid", "" },
                   1834: { 0x2224, "nmid", "DOES NOT DIVIDE" },
                   1835: { 0x2225, "par", "PARALLEL TO" },
                   1836: { 0x2225, "spar", "PARALLEL TO" },
                   1837: { 0x2226, "npar", "NOT PARALLEL TO" },
                   1838: { 0x2226, "nspar", "NOT PARALLEL TO" },
                   1839: { 0x2227, "and", "" },
                   1840: { 0x2228, "or", "" },
                   1841: { 0x2229, "cap", "" },
                   1842: { 0x222A, "cup", "" },
                   1843: { 0x222B, "int", "" },
                   1844: { 0x222E, "conint", "" },
                   1845: { 0x2234, "there4", "" },
                   1846: { 0x2235, "becaus", "BECAUSE" },
                   1847: { 0x223C, "sim", "" },
                   1848: { 0x223C, "thksim", "TILDE OPERATOR" },
                   1849: { 0x223D, "bsim", "" },
                   1850: { 0x2240, "wreath", "WREATH PRODUCT" },
                   1851: { 0x2241, "nsim", "" },
                   1852: { 0x2243, "sime", "" },
                   1853: { 0x2244, "nsime", "" },
                   1854: { 0x2245, "cong", "" },
                   1855: { 0x2247, "ncong", "NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO" },
                   1856: { 0x2248, "ap", "" },
                   1857: { 0x2248, "thkap", "ALMOST EQUAL TO" },
                   1858: { 0x2249, "nap", "NOT ALMOST EQUAL TO" },
                   1859: { 0x224A, "ape", "" },
                   1860: { 0x224C, "bcong", "ALL EQUAL TO" },
                   1861: { 0x224D, "asymp", "EQUIVALENT TO" },
                   1862: { 0x224E, "bump", "" },
                   1863: { 0x224F, "bumpe", "" },
                   1864: { 0x2250, "esdot", "" },
                   1865: { 0x2251, "eDot", "" },
                   1866: { 0x2252, "efDot", "" },
                   1867: { 0x2253, "erDot", "" },
                   1868: { 0x2254, "colone", "" },
                   1869: { 0x2255, "ecolon", "" },
                   1870: { 0x2256, "ecir", "" },
                   1871: { 0x2257, "cire", "" },
                   1872: { 0x2259, "wedgeq", "ESTIMATES" },
                   1873: { 0x225C, "trie", "" },
                   1874: { 0x2260, "ne", "" },
                   1875: { 0x2261, "equiv", "" },
                   1876: { 0x2262, "nequiv", "NOT IDENTICAL TO" },
                   1877: { 0x2264, "le", "" },
                   1878: { 0x2264, "les", "LESS-THAN OR EQUAL TO" },
                   1879: { 0x2265, "ge", "GREATER-THAN OR EQUAL TO" },
                   1880: { 0x2265, "ges", "GREATER-THAN OR EQUAL TO" },
                   1881: { 0x2266, "lE", "" },
                   1882: { 0x2267, "gE", "" },
                   1883: { 0x2268, "lnE", "" },
                   1884: { 0x2268, "lne", "" },
                   1885: { 0x2268, "lvnE", "LESS-THAN BUT NOT EQUAL TO" },
                   1886: { 0x2269, "gnE", "" },
                   1887: { 0x2269, "gne", "" },
                   1888: { 0x2269, "gvnE", "GREATER-THAN BUT NOT EQUAL TO" },
                   1889: { 0x226A, "Lt", "MUCH LESS-THAN" },
                   1890: { 0x226B, "Gt", "MUCH GREATER-THAN" },
                   1891: { 0x226C, "twixt", "BETWEEN" },
                   1892: { 0x226E, "nlt", "NOT LESS-THAN" },
                   1893: { 0x226F, "ngt", "NOT GREATER-THAN" },
                   1894: { 0x2270, "nlE", "" },
                   1895: { 0x2270, "nle", "NEITHER LESS-THAN NOR EQUAL TO" },
                   1896: { 0x2270, "nles", "" },
                   1897: { 0x2271, "ngE", "" },
                   1898: { 0x2271, "nge", "NEITHER GREATER-THAN NOR EQUAL TO" },
                   1899: { 0x2271, "nges", "" },
                   1900: { 0x2272, "lap", "LESS-THAN OR EQUIVALENT TO" },
                   1901: { 0x2272, "lsim", "LESS-THAN OR EQUIVALENT TO" },
                   1902: { 0x2273, "gap", "GREATER-THAN OR EQUIVALENT TO" },
                   1903: { 0x2273, "gsim", "GREATER-THAN OR EQUIVALENT TO" },
                   1904: { 0x2276, "lg", "LESS-THAN OR GREATER-THAN" },
                   1905: { 0x2277, "gl", "" },
                   1906: { 0x227A, "pr", "" },
                   1907: { 0x227B, "sc", "" },
                   1908: { 0x227C, "cupre", "" },
                   1909: { 0x227C, "pre", "" },
                   1910: { 0x227D, "sccue", "" },
                   1911: { 0x227D, "sce", "" },
                   1912: { 0x227E, "prap", "" },
                   1913: { 0x227E, "prsim", "" },
                   1914: { 0x227F, "scap", "" },
                   1915: { 0x227F, "scsim", "" },
                   1916: { 0x2280, "npr", "DOES NOT PRECEDE" },
                   1917: { 0x2281, "nsc", "DOES NOT SUCCEED" },
                   1918: { 0x2282, "sub", "" },
                   1919: { 0x2283, "sup", "" },
                   1920: { 0x2284, "nsub", "NOT A SUBSET OF" },
                   1921: { 0x2285, "nsup", "NOT A SUPERSET OF" },
                   1922: { 0x2286, "subE", "" },
                   1923: { 0x2286, "sube", "" },
                   1924: { 0x2287, "supE", "" },
                   1925: { 0x2287, "supe", "" },
                   1926: { 0x2288, "nsubE", "" },
                   1927: { 0x2288, "nsube", "" },
                   1928: { 0x2289, "nsupE", "" },
                   1929: { 0x2289, "nsupe", "" },
                   1930: { 0x228A, "subne", "" },
                   1931: { 0x228A, "subnE", "SUBSET OF WITH NOT EQUAL TO" },
                   1932: { 0x228A, "vsubne", "SUBSET OF WITH NOT EQUAL TO" },
                   1933: { 0x228B, "supnE", "" },
                   1934: { 0x228B, "supne", "" },
                   1935: { 0x228B, "vsupnE", "SUPERSET OF WITH NOT EQUAL TO" },
                   1936: { 0x228B, "vsupne", "SUPERSET OF WITH NOT EQUAL TO" },
                   1937: { 0x228E, "uplus", "MULTISET UNION" },
                   1938: { 0x228F, "sqsub", "" },
                   1939: { 0x2290, "sqsup", "" },
                   1940: { 0x2291, "sqsube", "" },
                   1941: { 0x2292, "sqsupe", "" },
                   1942: { 0x2293, "sqcap", "SQUARE CAP" },
                   1943: { 0x2294, "sqcup", "SQUARE CUP" },
                   1944: { 0x2295, "oplus", "CIRCLED PLUS" },
                   1945: { 0x2296, "ominus", "CIRCLED MINUS" },
                   1946: { 0x2297, "otimes", "CIRCLED TIMES" },
                   1947: { 0x2298, "osol", "CIRCLED DIVISION SLASH" },
                   1948: { 0x2299, "odot", "CIRCLED DOT OPERATOR" },
                   1949: { 0x229A, "ocir", "CIRCLED RING OPERATOR" },
                   1950: { 0x229B, "oast", "CIRCLED ASTERISK OPERATOR" },
                   1951: { 0x229D, "odash", "CIRCLED DASH" },
                   1952: { 0x229E, "plusb", "SQUARED PLUS" },
                   1953: { 0x229F, "minusb", "SQUARED MINUS" },
                   1954: { 0x22A0, "timesb", "SQUARED TIMES" },
                   1955: { 0x22A1, "sdotb", "SQUARED DOT OPERATOR" },
                   1956: { 0x22A2, "vdash", "" },
                   1957: { 0x22A3, "dashv", "" },
                   1958: { 0x22A4, "top", "DOWN TACK" },
                   1959: { 0x22A5, "bottom", "" },
                   1960: { 0x22A5, "perp", "" },
                   1961: { 0x22A7, "models", "MODELS" },
                   1962: { 0x22A8, "vDash", "" },
                   1963: { 0x22A9, "Vdash", "" },
                   1964: { 0x22AA, "Vvdash", "" },
                   1965: { 0x22AC, "nvdash", "DOES NOT PROVE" },
                   1966: { 0x22AD, "nvDash", "NOT TRUE" },
                   1967: { 0x22AE, "nVdash", "DOES NOT FORCE" },
                   1968: { 0x22AF, "nVDash", "NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE" },
                   1969: { 0x22B2, "vltri", "" },
                   1970: { 0x22B3, "vrtri", "" },
                   1971: { 0x22B4, "ltrie", "" },
                   1972: { 0x22B5, "rtrie", "" },
                   1973: { 0x22B8, "mumap", "MULTIMAP" },
                   1974: { 0x22BA, "intcal", "INTERCALATE" },
                   1975: { 0x22BB, "veebar", "" },
                   1976: { 0x22BC, "barwed", "NAND" },
                   1977: { 0x22C4, "diam", "DIAMOND OPERATOR" },
                   1978: { 0x22C5, "sdot", "DOT OPERATOR" },
                   1979: { 0x22C6, "sstarf", "STAR OPERATOR" },
                   1980: { 0x22C6, "star", "STAR OPERATOR" },
                   1981: { 0x22C7, "divonx", "DIVISION TIMES" },
                   1982: { 0x22C8, "bowtie", "" },
                   1983: { 0x22C9, "ltimes", "LEFT NORMAL FACTOR SEMIDIRECT PRODUCT" },
                   1984: { 0x22CA, "rtimes", "RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT" },
                   1985: { 0x22CB, "lthree", "LEFT SEMIDIRECT PRODUCT" },
                   1986: { 0x22CC, "rthree", "RIGHT SEMIDIRECT PRODUCT" },
                   1987: { 0x22CD, "bsime", "" },
                   1988: { 0x22CE, "cuvee", "CURLY LOGICAL OR" },
                   1989: { 0x22CF, "cuwed", "CURLY LOGICAL AND" },
                   1990: { 0x22D0, "Sub", "" },
                   1991: { 0x22D1, "Sup", "" },
                   1992: { 0x22D2, "Cap", "DOUBLE INTERSECTION" },
                   1993: { 0x22D3, "Cup", "DOUBLE UNION" },
                   1994: { 0x22D4, "fork", "" },
                   1995: { 0x22D6, "ldot", "" },
                   1996: { 0x22D7, "gsdot", "" },
                   1997: { 0x22D8, "Ll", "" },
                   1998: { 0x22D9, "Gg", "VERY MUCH GREATER-THAN" },
                   1999: { 0x22DA, "lEg", "" },
                   2000: { 0x22DA, "leg", "" },
                   2001: { 0x22DB, "gEl", "" },
                   2002: { 0x22DB, "gel", "" },
                   2003: { 0x22DC, "els", "" },
                   2004: { 0x22DD, "egs", "" },
                   2005: { 0x22DE, "cuepr", "" },
                   2006: { 0x22DF, "cuesc", "" },
                   2007: { 0x22E0, "npre", "DOES NOT PRECEDE OR EQUAL" },
                   2008: { 0x22E1, "nsce", "DOES NOT SUCCEED OR EQUAL" },
                   2009: { 0x22E6, "lnsim", "" },
                   2010: { 0x22E7, "gnsim", "GREATER-THAN BUT NOT EQUIVALENT TO" },
                   2011: { 0x22E8, "prnap", "" },
                   2012: { 0x22E8, "prnsim", "" },
                   2013: { 0x22E9, "scnap", "" },
                   2014: { 0x22E9, "scnsim", "" },
                   2015: { 0x22EA, "nltri", "NOT NORMAL SUBGROUP OF" },
                   2016: { 0x22EB, "nrtri", "DOES NOT CONTAIN AS NORMAL SUBGROUP" },
                   2017: { 0x22EC, "nltrie", "NOT NORMAL SUBGROUP OF OR EQUAL TO" },
                   2018: { 0x22ED, "nrtrie", "DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL" },
                   2019: { 0x22EE, "vellip", "" },
                   2020: { 0x2306, "Barwed", "PERSPECTIVE" },
                   2021: { 0x2308, "lceil", "LEFT CEILING" },
                   2022: { 0x2309, "rceil", "RIGHT CEILING" },
                   2023: { 0x230A, "lfloor", "LEFT FLOOR" },
                   2024: { 0x230B, "rfloor", "RIGHT FLOOR" },
                   2025: { 0x230C, "drcrop", "BOTTOM RIGHT CROP" },
                   2026: { 0x230D, "dlcrop", "BOTTOM LEFT CROP" },
                   2027: { 0x230E, "urcrop", "TOP RIGHT CROP" },
                   2028: { 0x230F, "ulcrop", "TOP LEFT CROP" },
                   2029: { 0x2315, "telrec", "TELEPHONE RECORDER" },
                   2030: { 0x2316, "target", "POSITION INDICATOR" },
                   2031: { 0x231C, "ulcorn", "TOP LEFT CORNER" },
                   2032: { 0x231D, "urcorn", "TOP RIGHT CORNER" },
                   2033: { 0x231E, "dlcorn", "BOTTOM LEFT CORNER" },
                   2034: { 0x231F, "drcorn", "BOTTOM RIGHT CORNER" },
                   2035: { 0x2322, "frown", "" },
                   2036: { 0x2322, "sfrown", "FROWN" },
                   2037: { 0x2323, "smile", "" },
                   2038: { 0x2323, "ssmile", "SMILE" },
                   2039: { 0x2423, "blank", "OPEN BOX" },
                   2040: { 0x24C8, "oS", "CIRCLED LATIN CAPITAL LETTER S" },
                   2041: { 0x2500, "boxh", "BOX DRAWINGS LIGHT HORIZONTAL" },
                   2042: { 0x2502, "boxv", "BOX DRAWINGS LIGHT VERTICAL" },
                   2043: { 0x250C, "boxdr", "BOX DRAWINGS LIGHT DOWN AND RIGHT" },
                   2044: { 0x2510, "boxdl", "BOX DRAWINGS LIGHT DOWN AND LEFT" },
                   2045: { 0x2514, "boxur", "BOX DRAWINGS LIGHT UP AND RIGHT" },
                   2046: { 0x2518, "boxul", "BOX DRAWINGS LIGHT UP AND LEFT" },
                   2047: { 0x251C, "boxvr", "BOX DRAWINGS LIGHT VERTICAL AND RIGHT" },
                   2048: { 0x2524, "boxvl", "BOX DRAWINGS LIGHT VERTICAL AND LEFT" },
                   2049: { 0x252C, "boxhd", "BOX DRAWINGS LIGHT DOWN AND HORIZONTAL" },
                   2050: { 0x2534, "boxhu", "BOX DRAWINGS LIGHT UP AND HORIZONTAL" },
                   2051: { 0x253C, "boxvh", "BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL" },
                   2052: { 0x2550, "boxH", "BOX DRAWINGS DOUBLE HORIZONTAL" },
                   2053: { 0x2551, "boxV", "BOX DRAWINGS DOUBLE VERTICAL" },
                   2054: { 0x2552, "boxDR", "BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE" },
                   2055: { 0x2553, "boxDr", "BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE" },
                   2056: { 0x2554, "boxdR", "BOX DRAWINGS DOUBLE DOWN AND RIGHT" },
                   2057: { 0x2555, "boxDL", "BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE" },
                   2058: { 0x2556, "boxdL", "BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE" },
                   2059: { 0x2557, "boxDl", "BOX DRAWINGS DOUBLE DOWN AND LEFT" },
                   2060: { 0x2558, "boxUR", "BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE" },
                   2061: { 0x2559, "boxuR", "BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE" },
                   2062: { 0x255A, "boxUr", "BOX DRAWINGS DOUBLE UP AND RIGHT" },
                   2063: { 0x255B, "boxUL", "BOX DRAWINGS UP SINGLE AND LEFT DOUBLE" },
                   2064: { 0x255C, "boxUl", "BOX DRAWINGS UP DOUBLE AND LEFT SINGLE" },
                   2065: { 0x255D, "boxuL", "BOX DRAWINGS DOUBLE UP AND LEFT" },
                   2066: { 0x255E, "boxvR", "BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE" },
                   2067: { 0x255F, "boxVR", "BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE" },
                   2068: { 0x2560, "boxVr", "BOX DRAWINGS DOUBLE VERTICAL AND RIGHT" },
                   2069: { 0x2561, "boxvL", "BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE" },
                   2070: { 0x2562, "boxVL", "BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE" },
                   2071: { 0x2563, "boxVl", "BOX DRAWINGS DOUBLE VERTICAL AND LEFT" },
                   2072: { 0x2564, "boxhD", "BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE" },
                   2073: { 0x2565, "boxHD", "BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE" },
                   2074: { 0x2566, "boxHd", "BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL" },
                   2075: { 0x2567, "boxhU", "BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE" },
                   2076: { 0x2568, "boxHU", "BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE" },
                   2077: { 0x2569, "boxHu", "BOX DRAWINGS DOUBLE UP AND HORIZONTAL" },
                   2078: { 0x256A, "boxvH", "BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE" },
                   2079: { 0x256B, "boxVH", "BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE" },
                   2080: { 0x256C, "boxVh", "BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL" },
                   2081: { 0x2580, "uhblk", "UPPER HALF BLOCK" },
                   2082: { 0x2584, "lhblk", "LOWER HALF BLOCK" },
                   2083: { 0x2588, "block", "FULL BLOCK" },
                   2084: { 0x2591, "blk14", "LIGHT SHADE" },
                   2085: { 0x2592, "blk12", "MEDIUM SHADE" },
                   2086: { 0x2593, "blk34", "DARK SHADE" },
                   2087: { 0x25A1, "square", "WHITE SQUARE" },
                   2088: { 0x25A1, "squ", "WHITE SQUARE" },
                   2089: { 0x25AA, "squf", "" },
                   2090: { 0x25AD, "rect", "WHITE RECTANGLE" },
                   2091: { 0x25AE, "marker", "BLACK VERTICAL RECTANGLE" },
                   2092: { 0x25B3, "xutri", "WHITE UP-POINTING TRIANGLE" },
                   2093: { 0x25B4, "utrif", "BLACK UP-POINTING TRIANGLE" },
                   2094: { 0x25B5, "utri", "WHITE UP-POINTING TRIANGLE" },
                   2095: { 0x25B8, "rtrif", "BLACK RIGHT-POINTING TRIANGLE" },
                   2096: { 0x25B9, "rtri", "WHITE RIGHT-POINTING TRIANGLE" },
                   2097: { 0x25BD, "xdtri", "WHITE DOWN-POINTING TRIANGLE" },
                   2098: { 0x25BE, "dtrif", "BLACK DOWN-POINTING TRIANGLE" },
                   2099: { 0x25BF, "dtri", "WHITE DOWN-POINTING TRIANGLE" },
                   2100: { 0x25C2, "ltrif", "BLACK LEFT-POINTING TRIANGLE" },
                   2101: { 0x25C3, "ltri", "WHITE LEFT-POINTING TRIANGLE" },
                   2102: { 0x25CA, "loz", "LOZENGE" },
                   2103: { 0x25CB, "cir", "WHITE CIRCLE" },
                   2104: { 0x25CB, "xcirc", "WHITE CIRCLE" },
                   2105: { 0x2605, "starf", "BLACK STAR" },
                   2106: { 0x260E, "phone", "TELEPHONE SIGN" },
                   2107: { 0x2640, "female", "" },
                   2108: { 0x2642, "male", "MALE SIGN" },
                   2109: { 0x2660, "spades", "BLACK SPADE SUIT" },
                   2110: { 0x2663, "clubs", "BLACK CLUB SUIT" },
                   2111: { 0x2665, "hearts", "BLACK HEART SUIT" },
                   2112: { 0x2666, "diams", "BLACK DIAMOND SUIT" },
                   2113: { 0x2669, "sung", "" },
                   2114: { 0x266D, "flat", "MUSIC FLAT SIGN" },
                   2115: { 0x266E, "natur", "MUSIC NATURAL SIGN" },
                   2116: { 0x266F, "sharp", "MUSIC SHARP SIGN" },
                   2117: { 0x2713, "check", "CHECK MARK" },
                   2118: { 0x2717, "cross", "BALLOT X" },
                   2119: { 0x2720, "malt", "MALTESE CROSS" },
                   2120: { 0x2726, "lozf", "" },
                   2121: { 0x2736, "sext", "SIX POINTED BLACK STAR" },
                   2122: { 0x3008, "lang", "" },
                   2123: { 0x3009, "rang", "" },
                   2124: { 0xE291, "rpargt", "" },
                   2125: { 0xE2A2, "lnap", "" },
                   2126: { 0xE2AA, "nsmid", "" },
                   2127: { 0xE2B3, "prnE", "" },
                   2128: { 0xE2B5, "scnE", "" },
                   2129: { 0xE2B8, "vsubnE", "" },
                   2130: { 0xE301, "smid", "" },
                   2131: { 0xE411, "gnap", "" },
                   2132: { 0xFB00, "fflig", "" },
                   2133: { 0xFB01, "filig", "" },
                   2134: { 0xFB02, "fllig", "" },
                   2135: { 0xFB03, "ffilig", "" },
                   2136: { 0xFB04, "ffllig", "" },
                   2137: { 0xFE68, "sbsol", "SMALL REVERSE SOLIDUS" },
                   2138: };
                   2139: 
                   2140: /************************************************************************
                   2141:  *                                                                     *
                   2142:  *             Commodity functions to handle entities                  *
                   2143:  *                                                                     *
                   2144:  ************************************************************************/
                   2145: 
                   2146: /*
                   2147:  * Macro used to grow the current buffer.
                   2148:  */
                   2149: #define growBuffer(buffer) {                                           \
                   2150:     buffer##_size *= 2;                                                        \
                   2151:     buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar));  \
                   2152:     if (buffer == NULL) {                                              \
                   2153:        perror("realloc failed");                                       \
                   2154:        return(NULL);                                                   \
                   2155:     }                                                                  \
                   2156: }
                   2157: 
                   2158: /**
                   2159:  * sgmlEntityLookup:
                   2160:  * @name: the entity name
                   2161:  *
                   2162:  * Lookup the given entity in EntitiesTable
                   2163:  *
                   2164:  * TODO: the linear scan is really ugly, an hash table is really needed.
                   2165:  *
                   2166:  * Returns the associated sgmlEntityDescPtr if found, NULL otherwise.
                   2167:  */
                   2168: sgmlEntityDescPtr
                   2169: sgmlEntityLookup(const xmlChar *name) {
                   2170:     int i;
                   2171: 
                   2172:     for (i = 0;i < (sizeof(docbookEntitiesTable)/
                   2173:                     sizeof(docbookEntitiesTable[0]));i++) {
                   2174:         if (!xmlStrcmp(name, BAD_CAST docbookEntitiesTable[i].name)) {
                   2175: #ifdef DEBUG
                   2176:             fprintf(stderr,"Found entity %s\n", name);
                   2177: #endif
                   2178:             return(&docbookEntitiesTable[i]);
                   2179:        }
                   2180:     }
                   2181:     return(NULL);
                   2182: }
                   2183: 
                   2184: /**
                   2185:  * sgmlEntityValueLookup:
                   2186:  * @value: the entity's unicode value
                   2187:  *
                   2188:  * Lookup the given entity in EntitiesTable
                   2189:  *
                   2190:  * TODO: the linear scan is really ugly, an hash table is really needed.
                   2191:  *
                   2192:  * Returns the associated sgmlEntityDescPtr if found, NULL otherwise.
                   2193:  */
                   2194: sgmlEntityDescPtr
                   2195: sgmlEntityValueLookup(int value) {
                   2196:     int i;
                   2197: #ifdef DEBUG
                   2198:     int lv = 0;
                   2199: #endif
                   2200: 
                   2201:     for (i = 0;i < (sizeof(docbookEntitiesTable)/
                   2202:                     sizeof(docbookEntitiesTable[0]));i++) {
                   2203:         if (docbookEntitiesTable[i].value >= value) {
                   2204:            if (docbookEntitiesTable[i].value > value)
                   2205:                break;
                   2206: #ifdef DEBUG
                   2207:            fprintf(stderr,"Found entity %s\n", docbookEntitiesTable[i].name);
                   2208: #endif
                   2209:             return(&docbookEntitiesTable[i]);
                   2210:        }
                   2211: #ifdef DEBUG
                   2212:        if (lv > docbookEntitiesTable[i].value) {
                   2213:            fprintf(stderr, "docbookEntitiesTable[] is not sorted (%d > %d)!\n",
                   2214:                    lv, docbookEntitiesTable[i].value);
                   2215:        }
                   2216:        lv = docbookEntitiesTable[i].value;
                   2217: #endif
                   2218:     }
                   2219:     return(NULL);
                   2220: }
                   2221: 
                   2222: /**
                   2223:  * UTF8ToSgml:
                   2224:  * @out:  a pointer to an array of bytes to store the result
                   2225:  * @outlen:  the length of @out
                   2226:  * @in:  a pointer to an array of UTF-8 chars
                   2227:  * @inlen:  the length of @in
                   2228:  *
                   2229:  * Take a block of UTF-8 chars in and try to convert it to an ASCII
                   2230:  * plus SGML entities block of chars out.
                   2231:  *
                   2232:  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
                   2233:  * The value of @inlen after return is the number of octets consumed
                   2234:  *     as the return value is positive, else unpredictiable.
                   2235:  * The value of @outlen after return is the number of octets consumed.
                   2236:  */
                   2237: int
                   2238: UTF8ToSgml(unsigned char* out, int *outlen,
                   2239:               const unsigned char* in, int *inlen) {
                   2240:     const unsigned char* processed = in;
                   2241:     const unsigned char* outend;
                   2242:     const unsigned char* outstart = out;
                   2243:     const unsigned char* instart = in;
                   2244:     const unsigned char* inend;
                   2245:     unsigned int c, d;
                   2246:     int trailing;
                   2247: 
                   2248:     if (in == NULL) {
                   2249:         /*
                   2250:         * initialization nothing to do
                   2251:         */
                   2252:        *outlen = 0;
                   2253:        *inlen = 0;
                   2254:        return(0);
                   2255:     }
                   2256:     inend = in + (*inlen);
                   2257:     outend = out + (*outlen);
                   2258:     while (in < inend) {
                   2259:        d = *in++;
                   2260:        if      (d < 0x80)  { c= d; trailing= 0; }
                   2261:        else if (d < 0xC0) {
                   2262:            /* trailing byte in leading position */
                   2263:            *outlen = out - outstart;
                   2264:            *inlen = processed - instart;
                   2265:            return(-2);
                   2266:         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
                   2267:         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
                   2268:         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
                   2269:        else {
                   2270:            /* no chance for this in Ascii */
                   2271:            *outlen = out - outstart;
                   2272:            *inlen = processed - instart;
                   2273:            return(-2);
                   2274:        }
                   2275: 
                   2276:        if (inend - in < trailing) {
                   2277:            break;
                   2278:        } 
                   2279: 
                   2280:        for ( ; trailing; trailing--) {
                   2281:            if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
                   2282:                break;
                   2283:            c <<= 6;
                   2284:            c |= d & 0x3F;
                   2285:        }
                   2286: 
                   2287:        /* assertion: c is a single UTF-4 value */
                   2288:        if (c < 0x80) {
                   2289:            if (out + 1 >= outend)
                   2290:                break;
                   2291:            *out++ = c;
                   2292:        } else {
                   2293:            int len;
                   2294:            sgmlEntityDescPtr ent;
                   2295: 
                   2296:            /*
                   2297:             * Try to lookup a predefined SGML entity for it
                   2298:             */
                   2299: 
                   2300:            ent = sgmlEntityValueLookup(c);
                   2301:            if (ent == NULL) {
                   2302:                /* no chance for this in Ascii */
                   2303:                *outlen = out - outstart;
                   2304:                *inlen = processed - instart;
                   2305:                return(-2);
                   2306:            }
                   2307:            len = strlen(ent->name);
                   2308:            if (out + 2 + len >= outend)
                   2309:                break;
                   2310:            *out++ = '&';
                   2311:            memcpy(out, ent->name, len);
                   2312:            out += len;
                   2313:            *out++ = ';';
                   2314:        }
                   2315:        processed = in;
                   2316:     }
                   2317:     *outlen = out - outstart;
                   2318:     *inlen = processed - instart;
                   2319:     return(0);
                   2320: }
                   2321: 
                   2322: /**
                   2323:  * sgmlEncodeEntities:
                   2324:  * @out:  a pointer to an array of bytes to store the result
                   2325:  * @outlen:  the length of @out
                   2326:  * @in:  a pointer to an array of UTF-8 chars
                   2327:  * @inlen:  the length of @in
                   2328:  * @quoteChar: the quote character to escape (' or ") or zero.
                   2329:  *
                   2330:  * Take a block of UTF-8 chars in and try to convert it to an ASCII
                   2331:  * plus SGML entities block of chars out.
                   2332:  *
                   2333:  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
                   2334:  * The value of @inlen after return is the number of octets consumed
                   2335:  *     as the return value is positive, else unpredictiable.
                   2336:  * The value of @outlen after return is the number of octets consumed.
                   2337:  */
                   2338: int
                   2339: sgmlEncodeEntities(unsigned char* out, int *outlen,
                   2340:                   const unsigned char* in, int *inlen, int quoteChar) {
                   2341:     const unsigned char* processed = in;
                   2342:     const unsigned char* outend = out + (*outlen);
                   2343:     const unsigned char* outstart = out;
                   2344:     const unsigned char* instart = in;
                   2345:     const unsigned char* inend = in + (*inlen);
                   2346:     unsigned int c, d;
                   2347:     int trailing;
                   2348: 
                   2349:     while (in < inend) {
                   2350:        d = *in++;
                   2351:        if      (d < 0x80)  { c= d; trailing= 0; }
                   2352:        else if (d < 0xC0) {
                   2353:            /* trailing byte in leading position */
                   2354:            *outlen = out - outstart;
                   2355:            *inlen = processed - instart;
                   2356:            return(-2);
                   2357:         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
                   2358:         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
                   2359:         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
                   2360:        else {
                   2361:            /* no chance for this in Ascii */
                   2362:            *outlen = out - outstart;
                   2363:            *inlen = processed - instart;
                   2364:            return(-2);
                   2365:        }
                   2366: 
                   2367:        if (inend - in < trailing)
                   2368:            break;
                   2369: 
                   2370:        while (trailing--) {
                   2371:            if (((d= *in++) & 0xC0) != 0x80) {
                   2372:                *outlen = out - outstart;
                   2373:                *inlen = processed - instart;
                   2374:                return(-2);
                   2375:            }
                   2376:            c <<= 6;
                   2377:            c |= d & 0x3F;
                   2378:        }
                   2379: 
                   2380:        /* assertion: c is a single UTF-4 value */
                   2381:        if (c < 0x80 && c != quoteChar && c != '&' && c != '<' && c != '>') {
                   2382:            if (out >= outend)
                   2383:                break;
                   2384:            *out++ = c;
                   2385:        } else {
                   2386:            sgmlEntityDescPtr ent;
                   2387:            const char *cp;
                   2388:            char nbuf[16];
                   2389:            int len;
                   2390: 
                   2391:            /*
                   2392:             * Try to lookup a predefined SGML entity for it
                   2393:             */
                   2394:            ent = sgmlEntityValueLookup(c);
                   2395:            if (ent == NULL) {
                   2396:                sprintf(nbuf, "#%u", c);
                   2397:                cp = nbuf;
                   2398:            }
                   2399:            else
                   2400:                cp = ent->name;
                   2401:            len = strlen(cp);
                   2402:            if (out + 2 + len > outend)
                   2403:                break;
                   2404:            *out++ = '&';
                   2405:            memcpy(out, cp, len);
                   2406:            out += len;
                   2407:            *out++ = ';';
                   2408:        }
                   2409:        processed = in;
                   2410:     }
                   2411:     *outlen = out - outstart;
                   2412:     *inlen = processed - instart;
                   2413:     return(0);
                   2414: }
                   2415: 
                   2416: /**
                   2417:  * sgmlDecodeEntities:
                   2418:  * @ctxt:  the parser context
                   2419:  * @len:  the len to decode (in bytes !), -1 for no size limit
                   2420:  * @end:  an end marker xmlChar, 0 if none
                   2421:  * @end2:  an end marker xmlChar, 0 if none
                   2422:  * @end3:  an end marker xmlChar, 0 if none
                   2423:  *
                   2424:  * Subtitute the SGML entities by their value
                   2425:  *
                   2426:  * DEPRECATED !!!!
                   2427:  *
                   2428:  * Returns A newly allocated string with the substitution done. The caller
                   2429:  *      must deallocate it !
                   2430:  */
                   2431: xmlChar *
                   2432: sgmlDecodeEntities(sgmlParserCtxtPtr ctxt, int len,
                   2433:                   xmlChar end, xmlChar  end2, xmlChar end3) {
                   2434:     xmlChar *name = NULL;
                   2435:     xmlChar *buffer = NULL;
                   2436:     unsigned int buffer_size = 0;
                   2437:     unsigned int nbchars = 0;
                   2438:     sgmlEntityDescPtr ent;
                   2439:     unsigned int max = (unsigned int) len;
                   2440:     int c,l;
                   2441: 
                   2442:     if (ctxt->depth > 40) {
                   2443:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   2444:            ctxt->sax->error(ctxt->userData,
                   2445:                "Detected entity reference loop\n");
                   2446:        ctxt->wellFormed = 0;
                   2447:        ctxt->disableSAX = 1;
                   2448:        ctxt->errNo = XML_ERR_ENTITY_LOOP;
                   2449:        return(NULL);
                   2450:     }
                   2451: 
                   2452:     /*
                   2453:      * allocate a translation buffer.
                   2454:      */
                   2455:     buffer_size = SGML_PARSER_BIG_BUFFER_SIZE;
                   2456:     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
                   2457:     if (buffer == NULL) {
                   2458:        perror("xmlDecodeEntities: malloc failed");
                   2459:        return(NULL);
                   2460:     }
                   2461: 
                   2462:     /*
                   2463:      * Ok loop until we reach one of the ending char or a size limit.
                   2464:      */
                   2465:     c = CUR_CHAR(l);
                   2466:     while ((nbchars < max) && (c != end) &&
                   2467:            (c != end2) && (c != end3)) {
                   2468: 
                   2469:        if (c == 0) break;
                   2470:         if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
                   2471:            int val = sgmlParseCharRef(ctxt);
                   2472:            COPY_BUF(0,buffer,nbchars,val);
                   2473:            NEXTL(l);
                   2474:        } else if ((c == '&') && (ctxt->token != '&')) {
                   2475:            ent = sgmlParseEntityRef(ctxt, &name);
                   2476:            if (name != NULL) {
                   2477:                if (ent != NULL) {
                   2478:                    int val = ent->value;
                   2479:                    COPY_BUF(0,buffer,nbchars,val);
                   2480:                    NEXTL(l);
                   2481:                } else {
                   2482:                    const xmlChar *cur = name;
                   2483: 
                   2484:                    buffer[nbchars++] = '&';
                   2485:                    if (nbchars > buffer_size - SGML_PARSER_BUFFER_SIZE) {
                   2486:                        growBuffer(buffer);
                   2487:                    }
                   2488:                    while (*cur != 0) {
                   2489:                        buffer[nbchars++] = *cur++;
                   2490:                    }
                   2491:                    buffer[nbchars++] = ';';
                   2492:                }
                   2493:            }
                   2494:        } else {
                   2495:            COPY_BUF(l,buffer,nbchars,c);
                   2496:            NEXTL(l);
                   2497:            if (nbchars > buffer_size - SGML_PARSER_BUFFER_SIZE) {
                   2498:              growBuffer(buffer);
                   2499:            }
                   2500:        }
                   2501:        c = CUR_CHAR(l);
                   2502:     }
                   2503:     buffer[nbchars++] = 0;
                   2504:     return(buffer);
                   2505: }
                   2506: 
                   2507: /************************************************************************
                   2508:  *                                                                     *
                   2509:  *             Commodity functions to handle streams                   *
                   2510:  *                                                                     *
                   2511:  ************************************************************************/
                   2512: 
                   2513: /**
                   2514:  * sgmlFreeInputStream:
                   2515:  * @input:  an sgmlParserInputPtr
                   2516:  *
                   2517:  * Free up an input stream.
                   2518:  */
                   2519: void
                   2520: sgmlFreeInputStream(sgmlParserInputPtr input) {
                   2521:     if (input == NULL) return;
                   2522: 
                   2523:     if (input->filename != NULL) xmlFree((char *) input->filename);
                   2524:     if (input->directory != NULL) xmlFree((char *) input->directory);
                   2525:     if ((input->free != NULL) && (input->base != NULL))
                   2526:         input->free((xmlChar *) input->base);
                   2527:     if (input->buf != NULL) 
                   2528:         xmlFreeParserInputBuffer(input->buf);
                   2529:     memset(input, -1, sizeof(sgmlParserInput));
                   2530:     xmlFree(input);
                   2531: }
                   2532: 
                   2533: /**
                   2534:  * sgmlNewInputStream:
                   2535:  * @ctxt:  an SGML parser context
                   2536:  *
                   2537:  * Create a new input stream structure
                   2538:  * Returns the new input stream or NULL
                   2539:  */
                   2540: sgmlParserInputPtr
                   2541: sgmlNewInputStream(sgmlParserCtxtPtr ctxt) {
                   2542:     sgmlParserInputPtr input;
                   2543: 
                   2544:     input = (xmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
                   2545:     if (input == NULL) {
                   2546:         ctxt->errNo = XML_ERR_NO_MEMORY;
                   2547:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   2548:            ctxt->sax->error(ctxt->userData, 
                   2549:                             "malloc: couldn't allocate a new input stream\n");
                   2550:        ctxt->errNo = XML_ERR_NO_MEMORY;
                   2551:        return(NULL);
                   2552:     }
                   2553:     memset(input, 0, sizeof(sgmlParserInput));
                   2554:     input->filename = NULL;
                   2555:     input->directory = NULL;
                   2556:     input->base = NULL;
                   2557:     input->cur = NULL;
                   2558:     input->buf = NULL;
                   2559:     input->line = 1;
                   2560:     input->col = 1;
                   2561:     input->buf = NULL;
                   2562:     input->free = NULL;
                   2563:     input->version = NULL;
                   2564:     input->consumed = 0;
                   2565:     input->length = 0;
                   2566:     return(input);
                   2567: }
                   2568: 
                   2569: 
                   2570: /************************************************************************
                   2571:  *                                                                     *
                   2572:  *             Commodity functions, cleanup needed ?                   *
                   2573:  *                                                                     *
                   2574:  ************************************************************************/
                   2575: 
                   2576: /**
                   2577:  * areBlanks:
                   2578:  * @ctxt:  an SGML parser context
                   2579:  * @str:  a xmlChar *
                   2580:  * @len:  the size of @str
                   2581:  *
                   2582:  * Is this a sequence of blank chars that one can ignore ?
                   2583:  *
                   2584:  * Returns 1 if ignorable 0 otherwise.
                   2585:  */
                   2586: 
                   2587: static int areBlanks(sgmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
                   2588:     int i;
                   2589:     xmlNodePtr lastChild;
                   2590: 
                   2591:     for (i = 0;i < len;i++)
                   2592:         if (!(IS_BLANK(str[i]))) return(0);
                   2593: 
                   2594:     if (CUR == 0) return(1);
                   2595:     if (CUR != '<') return(0);
                   2596:     if (ctxt->name == NULL)
                   2597:        return(1);
                   2598: #if 0
                   2599:     if (!xmlStrcmp(ctxt->name, BAD_CAST"sgml"))
                   2600:        return(1);
                   2601:     if (!xmlStrcmp(ctxt->name, BAD_CAST"head"))
                   2602:        return(1);
                   2603:     if (!xmlStrcmp(ctxt->name, BAD_CAST"body"))
                   2604:        return(1);
                   2605: #endif
                   2606:     if (ctxt->node == NULL) return(0);
                   2607:     lastChild = xmlGetLastChild(ctxt->node);
                   2608:     if (lastChild == NULL) {
                   2609:         if (ctxt->node->content != NULL) return(0);
                   2610:     } else if (xmlNodeIsText(lastChild))
                   2611:         return(0);
                   2612:     return(1);
                   2613: }
                   2614: 
                   2615: /**
                   2616:  * sgmlHandleEntity:
                   2617:  * @ctxt:  an SGML parser context
                   2618:  * @entity:  an XML entity pointer.
                   2619:  *
                   2620:  * Default handling of an SGML entity, call the parser with the
                   2621:  * substitution string
                   2622:  */
                   2623: 
                   2624: void
                   2625: sgmlHandleEntity(sgmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
                   2626:     int len;
                   2627: 
                   2628:     if (entity->content == NULL) {
                   2629:         if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   2630:            ctxt->sax->error(ctxt->userData, "sgmlHandleEntity %s: content == NULL\n",
                   2631:                       entity->name);
                   2632:        ctxt->wellFormed = 0;
                   2633:         return;
                   2634:     }
                   2635:     len = xmlStrlen(entity->content);
                   2636: 
                   2637:     /*
                   2638:      * Just handle the content as a set of chars.
                   2639:      */
                   2640:     sgmlCheckParagraph(ctxt);
                   2641:     if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   2642:        ctxt->sax->characters(ctxt->userData, entity->content, len);
                   2643: 
                   2644: }
                   2645: 
                   2646: /**
                   2647:  * sgmlNewDocNoDtD:
                   2648:  * @URI:  URI for the dtd, or NULL
                   2649:  * @ExternalID:  the external ID of the DTD, or NULL
                   2650:  *
                   2651:  * Returns a new document, do not intialize the DTD if not provided
                   2652:  */
                   2653: sgmlDocPtr
                   2654: sgmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
                   2655:     xmlDocPtr cur;
                   2656: 
                   2657:     /*
                   2658:      * Allocate a new document and fill the fields.
                   2659:      */
                   2660:     cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
                   2661:     if (cur == NULL) {
                   2662:         fprintf(stderr, "xmlNewDoc : malloc failed\n");
                   2663:        return(NULL);
                   2664:     }
                   2665:     memset(cur, 0, sizeof(xmlDoc));
                   2666: 
                   2667:     cur->type = XML_SGML_DOCUMENT_NODE;
                   2668:     cur->version = NULL;
                   2669:     cur->intSubset = NULL;
                   2670:     if ((ExternalID != NULL) ||
                   2671:        (URI != NULL))
                   2672:        xmlCreateIntSubset(cur, BAD_CAST "SGML", ExternalID, URI);
                   2673:     cur->doc = cur;
                   2674:     cur->name = NULL;
                   2675:     cur->children = NULL; 
                   2676:     cur->extSubset = NULL;
                   2677:     cur->oldNs = NULL;
                   2678:     cur->encoding = NULL;
                   2679:     cur->standalone = 1;
                   2680:     cur->compression = 0;
                   2681:     cur->ids = NULL;
                   2682:     cur->refs = NULL;
                   2683: #ifndef XML_WITHOUT_CORBA
                   2684:     cur->_private = NULL;
                   2685: #endif
                   2686:     return(cur);
                   2687: }
                   2688: 
                   2689: /**
                   2690:  * sgmlNewDoc:
                   2691:  * @URI:  URI for the dtd, or NULL
                   2692:  * @ExternalID:  the external ID of the DTD, or NULL
                   2693:  *
                   2694:  * Returns a new document
                   2695:  */
                   2696: sgmlDocPtr
                   2697: sgmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
                   2698:     if ((URI == NULL) && (ExternalID == NULL))
                   2699:        return(sgmlNewDocNoDtD(
                   2700:                    BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
                   2701:                    BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd"));
                   2702: 
                   2703:     return(sgmlNewDocNoDtD(URI, ExternalID));
                   2704: }
                   2705: 
                   2706: 
                   2707: /************************************************************************
                   2708:  *                                                                     *
                   2709:  *                     The parser itself                               *
                   2710:  *     Relates to http://www.w3.org/TR/docbook                         *
                   2711:  *                                                                     *
                   2712:  ************************************************************************/
                   2713: 
                   2714: /************************************************************************
                   2715:  *                                                                     *
                   2716:  *                     The parser itself                               *
                   2717:  *                                                                     *
                   2718:  ************************************************************************/
                   2719: 
                   2720: /**
                   2721:  * sgmlParseSGMLName:
                   2722:  * @ctxt:  an SGML parser context
                   2723:  *
                   2724:  * parse an SGML tag or attribute name, note that we convert it to lowercase
                   2725:  * since SGML names are not case-sensitive.
                   2726:  *
                   2727:  * Returns the Tag Name parsed or NULL
                   2728:  */
                   2729: 
                   2730: xmlChar *
                   2731: sgmlParseSGMLName(sgmlParserCtxtPtr ctxt) {
                   2732:     xmlChar *ret = NULL;
                   2733:     int i = 0;
                   2734:     xmlChar loc[SGML_PARSER_BUFFER_SIZE];
                   2735: 
                   2736:     if (!IS_LETTER(CUR) && (CUR != '_') &&
                   2737:         (CUR != ':')) return(NULL);
                   2738: 
                   2739:     while ((i < SGML_PARSER_BUFFER_SIZE) &&
                   2740:            ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2741:           (CUR == ':') || (CUR == '_'))) {
                   2742:        if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
                   2743:         else loc[i] = CUR;
                   2744:        i++;
                   2745:        
                   2746:        NEXT;
                   2747:     }
                   2748:     
                   2749:     ret = xmlStrndup(loc, i);
                   2750: 
                   2751:     return(ret);
                   2752: }
                   2753: 
                   2754: /**
                   2755:  * sgmlParseName:
                   2756:  * @ctxt:  an SGML parser context
                   2757:  *
                   2758:  * parse an SGML name, this routine is case sensistive.
                   2759:  *
                   2760:  * Returns the Name parsed or NULL
                   2761:  */
                   2762: 
                   2763: xmlChar *
                   2764: sgmlParseName(sgmlParserCtxtPtr ctxt) {
                   2765:     xmlChar buf[SGML_MAX_NAMELEN];
                   2766:     int len = 0;
                   2767: 
                   2768:     GROW;
                   2769:     if (!IS_LETTER(CUR) && (CUR != '_')) {
                   2770:        return(NULL);
                   2771:     }
                   2772: 
                   2773:     while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2774:            (CUR == '.') || (CUR == '-') ||
                   2775:           (CUR == '_') || (CUR == ':') || 
                   2776:           (IS_COMBINING(CUR)) ||
                   2777:           (IS_EXTENDER(CUR))) {
                   2778:        buf[len++] = CUR;
                   2779:        NEXT;
                   2780:        if (len >= SGML_MAX_NAMELEN) {
                   2781:            fprintf(stderr, 
                   2782:               "sgmlParseName: reached SGML_MAX_NAMELEN limit\n");
                   2783:            while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2784:                   (CUR == '.') || (CUR == '-') ||
                   2785:                   (CUR == '_') || (CUR == ':') || 
                   2786:                   (IS_COMBINING(CUR)) ||
                   2787:                   (IS_EXTENDER(CUR)))
                   2788:                 NEXT;
                   2789:            break;
                   2790:        }
                   2791:     }
                   2792:     return(xmlStrndup(buf, len));
                   2793: }
                   2794: 
                   2795: /**
                   2796:  * sgmlParseSGMLAttribute:
                   2797:  * @ctxt:  an SGML parser context
                   2798:  * @stop:  a char stop value
                   2799:  * 
                   2800:  * parse an SGML attribute value till the stop (quote), if
                   2801:  * stop is 0 then it stops at the first space
                   2802:  *
                   2803:  * Returns the attribute parsed or NULL
                   2804:  */
                   2805: 
                   2806: xmlChar *
                   2807: sgmlParseSGMLAttribute(sgmlParserCtxtPtr ctxt, const xmlChar stop) {
                   2808: #if 0
                   2809:     xmlChar buf[SGML_MAX_NAMELEN];
                   2810:     int len = 0;
                   2811: 
                   2812:     GROW;
                   2813:     while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
                   2814:        if ((stop == 0) && (IS_BLANK(CUR))) break;
                   2815:        buf[len++] = CUR;
                   2816:        NEXT;
                   2817:        if (len >= SGML_MAX_NAMELEN) {
                   2818:            fprintf(stderr, 
                   2819:               "sgmlParseSGMLAttribute: reached SGML_MAX_NAMELEN limit\n");
                   2820:            while ((!IS_BLANK(CUR)) && (CUR != '<') &&
                   2821:                   (CUR != '>') &&
                   2822:                   (CUR != '\'') && (CUR != '"'))
                   2823:                 NEXT;
                   2824:            break;
                   2825:        }
                   2826:     }
                   2827:     return(xmlStrndup(buf, len));
                   2828: #else    
                   2829:     xmlChar *buffer = NULL;
                   2830:     int buffer_size = 0;
                   2831:     xmlChar *out = NULL;
                   2832:     xmlChar *name = NULL;
                   2833: 
                   2834:     xmlChar *cur = NULL;
                   2835:     sgmlEntityDescPtr ent;
                   2836: 
                   2837:     /*
                   2838:      * allocate a translation buffer.
                   2839:      */
                   2840:     buffer_size = SGML_PARSER_BIG_BUFFER_SIZE;
                   2841:     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
                   2842:     if (buffer == NULL) {
                   2843:        perror("sgmlParseSGMLAttribute: malloc failed");
                   2844:        return(NULL);
                   2845:     }
                   2846:     out = buffer;
                   2847: 
                   2848:     /*
                   2849:      * Ok loop until we reach one of the ending chars
                   2850:      */
                   2851:     while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
                   2852:        if ((stop == 0) && (IS_BLANK(CUR))) break;
                   2853:         if (CUR == '&') {
                   2854:            if (NXT(1) == '#') {
                   2855:                unsigned int c;
                   2856:                int bits;
                   2857: 
                   2858:                c = sgmlParseCharRef(ctxt);
                   2859:                if      (c <    0x80)
                   2860:                        { *out++  = c;                bits= -6; }
                   2861:                else if (c <   0x800)
                   2862:                        { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   2863:                else if (c < 0x10000)
                   2864:                        { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   2865:                else                 
                   2866:                        { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   2867:         
                   2868:                for ( ; bits >= 0; bits-= 6) {
                   2869:                    *out++  = ((c >> bits) & 0x3F) | 0x80;
                   2870:                }
                   2871:            } else {
                   2872:                ent = sgmlParseEntityRef(ctxt, &name);
                   2873:                if (name == NULL) {
                   2874:                    *out++ = '&';
                   2875:                    if (out - buffer > buffer_size - 100) {
                   2876:                        int index = out - buffer;
                   2877: 
                   2878:                        growBuffer(buffer);
                   2879:                        out = &buffer[index];
                   2880:                    }
                   2881:                } else if (ent == NULL) {
                   2882:                    *out++ = '&';
                   2883:                    cur = name;
                   2884:                    while (*cur != 0) {
                   2885:                        if (out - buffer > buffer_size - 100) {
                   2886:                            int index = out - buffer;
                   2887: 
                   2888:                            growBuffer(buffer);
                   2889:                            out = &buffer[index];
                   2890:                        }
                   2891:                        *out++ = *cur++;
                   2892:                    }
                   2893:                    xmlFree(name);
                   2894:                } else {
                   2895:                    unsigned int c;
                   2896:                    int bits;
                   2897: 
                   2898:                    if (out - buffer > buffer_size - 100) {
                   2899:                        int index = out - buffer;
                   2900: 
                   2901:                        growBuffer(buffer);
                   2902:                        out = &buffer[index];
                   2903:                    }
                   2904:                    c = (xmlChar)ent->value;
                   2905:                    if      (c <    0x80)
                   2906:                        { *out++  = c;                bits= -6; }
                   2907:                    else if (c <   0x800)
                   2908:                        { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   2909:                    else if (c < 0x10000)
                   2910:                        { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   2911:                    else                 
                   2912:                        { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   2913:             
                   2914:                    for ( ; bits >= 0; bits-= 6) {
                   2915:                        *out++  = ((c >> bits) & 0x3F) | 0x80;
                   2916:                    }
                   2917:                    xmlFree(name);
                   2918:                }
                   2919:            }
                   2920:        } else {
                   2921:            unsigned int c;
                   2922:            int bits;
                   2923: 
                   2924:            if (out - buffer > buffer_size - 100) {
                   2925:                int index = out - buffer;
                   2926: 
                   2927:                growBuffer(buffer);
                   2928:                out = &buffer[index];
                   2929:            }
                   2930:            c = CUR;
                   2931:            if      (c <    0x80)
                   2932:                    { *out++  = c;                bits= -6; }
                   2933:            else if (c <   0x800)
                   2934:                    { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   2935:            else if (c < 0x10000)
                   2936:                    { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   2937:            else                 
                   2938:                    { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   2939:      
                   2940:            for ( ; bits >= 0; bits-= 6) {
                   2941:                *out++  = ((c >> bits) & 0x3F) | 0x80;
                   2942:            }
                   2943:            NEXT;
                   2944:        }
                   2945:     }
                   2946:     *out++ = 0;
                   2947:     return(buffer);
                   2948: #endif
                   2949: }
                   2950: 
                   2951: /**
                   2952:  * sgmlParseNmtoken:
                   2953:  * @ctxt:  an SGML parser context
                   2954:  * 
                   2955:  * parse an SGML Nmtoken.
                   2956:  *
                   2957:  * Returns the Nmtoken parsed or NULL
                   2958:  */
                   2959: 
                   2960: xmlChar *
                   2961: sgmlParseNmtoken(sgmlParserCtxtPtr ctxt) {
                   2962:     xmlChar buf[SGML_MAX_NAMELEN];
                   2963:     int len = 0;
                   2964: 
                   2965:     GROW;
                   2966:     while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2967:            (CUR == '.') || (CUR == '-') ||
                   2968:           (CUR == '_') || (CUR == ':') || 
                   2969:           (IS_COMBINING(CUR)) ||
                   2970:           (IS_EXTENDER(CUR))) {
                   2971:        buf[len++] = CUR;
                   2972:        NEXT;
                   2973:        if (len >= SGML_MAX_NAMELEN) {
                   2974:            fprintf(stderr, 
                   2975:               "sgmlParseNmtoken: reached SGML_MAX_NAMELEN limit\n");
                   2976:            while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2977:                   (CUR == '.') || (CUR == '-') ||
                   2978:                   (CUR == '_') || (CUR == ':') || 
                   2979:                   (IS_COMBINING(CUR)) ||
                   2980:                   (IS_EXTENDER(CUR)))
                   2981:                 NEXT;
                   2982:            break;
                   2983:        }
                   2984:     }
                   2985:     return(xmlStrndup(buf, len));
                   2986: }
                   2987: 
                   2988: /**
                   2989:  * sgmlParseEntityRef:
                   2990:  * @ctxt:  an SGML parser context
                   2991:  * @str:  location to store the entity name
                   2992:  *
                   2993:  * parse an SGML ENTITY references
                   2994:  *
                   2995:  * [68] EntityRef ::= '&' Name ';'
                   2996:  *
                   2997:  * Returns the associated sgmlEntityDescPtr if found, or NULL otherwise,
                   2998:  *         if non-NULL *str will have to be freed by the caller.
                   2999:  */
                   3000: sgmlEntityDescPtr
                   3001: sgmlParseEntityRef(sgmlParserCtxtPtr ctxt, xmlChar **str) {
                   3002:     xmlChar *name;
                   3003:     sgmlEntityDescPtr ent = NULL;
                   3004:     *str = NULL;
                   3005: 
                   3006:     if (CUR == '&') {
                   3007:         NEXT;
                   3008:         name = sgmlParseName(ctxt);
                   3009:        if (name == NULL) {
                   3010:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3011:                ctxt->sax->error(ctxt->userData, "sgmlParseEntityRef: no name\n");
                   3012:            ctxt->wellFormed = 0;
                   3013:        } else {
                   3014:            GROW;
                   3015:            if (CUR == ';') {
                   3016:                *str = name;
                   3017: 
                   3018:                /*
                   3019:                 * Lookup the entity in the table.
                   3020:                 */
                   3021:                ent = sgmlEntityLookup(name);
                   3022:                if (ent != NULL) /* OK that's ugly !!! */
                   3023:                    NEXT;
                   3024:            } else {
                   3025:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3026:                    ctxt->sax->error(ctxt->userData,
                   3027:                                     "sgmlParseEntityRef: expecting ';'\n");
                   3028:                *str = name;
                   3029:            }
                   3030:        }
                   3031:     }
                   3032:     return(ent);
                   3033: }
                   3034: 
                   3035: /**
                   3036:  * sgmlParseAttValue:
                   3037:  * @ctxt:  an SGML parser context
                   3038:  *
                   3039:  * parse a value for an attribute
                   3040:  * Note: the parser won't do substitution of entities here, this
                   3041:  * will be handled later in xmlStringGetNodeList, unless it was
                   3042:  * asked for ctxt->replaceEntities != 0 
                   3043:  *
                   3044:  * Returns the AttValue parsed or NULL.
                   3045:  */
                   3046: 
                   3047: xmlChar *
                   3048: sgmlParseAttValue(sgmlParserCtxtPtr ctxt) {
                   3049:     xmlChar *ret = NULL;
                   3050: 
                   3051:     if (CUR == '"') {
                   3052:         NEXT;
                   3053:        ret = sgmlParseSGMLAttribute(ctxt, '"');
                   3054:         if (CUR != '"') {
                   3055:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3056:                ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
                   3057:            ctxt->wellFormed = 0;
                   3058:        } else
                   3059:            NEXT;
                   3060:     } else if (CUR == '\'') {
                   3061:         NEXT;
                   3062:        ret = sgmlParseSGMLAttribute(ctxt, '\'');
                   3063:         if (CUR != '\'') {
                   3064:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3065:                ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
                   3066:            ctxt->wellFormed = 0;
                   3067:        } else
                   3068:            NEXT;
                   3069:     } else {
                   3070:         /*
                   3071:         * That's an SGMLism, the attribute value may not be quoted
                   3072:         */
                   3073:        ret = sgmlParseSGMLAttribute(ctxt, 0);
                   3074:        if (ret == NULL) {
                   3075:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3076:                ctxt->sax->error(ctxt->userData, "AttValue: no value found\n");
                   3077:            ctxt->wellFormed = 0;
                   3078:        }
                   3079:     }
                   3080:     return(ret);
                   3081: }
                   3082: 
                   3083: /**
                   3084:  * sgmlParseSystemLiteral:
                   3085:  * @ctxt:  an SGML parser context
                   3086:  * 
                   3087:  * parse an SGML Literal
                   3088:  *
                   3089:  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
                   3090:  *
                   3091:  * Returns the SystemLiteral parsed or NULL
                   3092:  */
                   3093: 
                   3094: xmlChar *
                   3095: sgmlParseSystemLiteral(sgmlParserCtxtPtr ctxt) {
                   3096:     const xmlChar *q;
                   3097:     xmlChar *ret = NULL;
                   3098: 
                   3099:     if (CUR == '"') {
                   3100:         NEXT;
                   3101:        q = CUR_PTR;
                   3102:        while ((IS_CHAR(CUR)) && (CUR != '"'))
                   3103:            NEXT;
                   3104:        if (!IS_CHAR(CUR)) {
                   3105:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3106:                ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
                   3107:            ctxt->wellFormed = 0;
                   3108:        } else {
                   3109:            ret = xmlStrndup(q, CUR_PTR - q);
                   3110:            NEXT;
                   3111:         }
                   3112:     } else if (CUR == '\'') {
                   3113:         NEXT;
                   3114:        q = CUR_PTR;
                   3115:        while ((IS_CHAR(CUR)) && (CUR != '\''))
                   3116:            NEXT;
                   3117:        if (!IS_CHAR(CUR)) {
                   3118:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3119:                ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
                   3120:            ctxt->wellFormed = 0;
                   3121:        } else {
                   3122:            ret = xmlStrndup(q, CUR_PTR - q);
                   3123:            NEXT;
                   3124:         }
                   3125:     } else {
                   3126:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3127:            ctxt->sax->error(ctxt->userData,
                   3128:                             "SystemLiteral \" or ' expected\n");
                   3129:        ctxt->wellFormed = 0;
                   3130:     }
                   3131:     
                   3132:     return(ret);
                   3133: }
                   3134: 
                   3135: /**
                   3136:  * sgmlParsePubidLiteral:
                   3137:  * @ctxt:  an SGML parser context
                   3138:  *
                   3139:  * parse an SGML public literal
                   3140:  *
                   3141:  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
                   3142:  *
                   3143:  * Returns the PubidLiteral parsed or NULL.
                   3144:  */
                   3145: 
                   3146: xmlChar *
                   3147: sgmlParsePubidLiteral(sgmlParserCtxtPtr ctxt) {
                   3148:     const xmlChar *q;
                   3149:     xmlChar *ret = NULL;
                   3150:     /*
                   3151:      * Name ::= (Letter | '_') (NameChar)*
                   3152:      */
                   3153:     if (CUR == '"') {
                   3154:         NEXT;
                   3155:        q = CUR_PTR;
                   3156:        while (IS_PUBIDCHAR(CUR)) NEXT;
                   3157:        if (CUR != '"') {
                   3158:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3159:                ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
                   3160:            ctxt->wellFormed = 0;
                   3161:        } else {
                   3162:            ret = xmlStrndup(q, CUR_PTR - q);
                   3163:            NEXT;
                   3164:        }
                   3165:     } else if (CUR == '\'') {
                   3166:         NEXT;
                   3167:        q = CUR_PTR;
                   3168:        while ((IS_LETTER(CUR)) && (CUR != '\''))
                   3169:            NEXT;
                   3170:        if (!IS_LETTER(CUR)) {
                   3171:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3172:                ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
                   3173:            ctxt->wellFormed = 0;
                   3174:        } else {
                   3175:            ret = xmlStrndup(q, CUR_PTR - q);
                   3176:            NEXT;
                   3177:        }
                   3178:     } else {
                   3179:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3180:            ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
                   3181:        ctxt->wellFormed = 0;
                   3182:     }
                   3183:     
                   3184:     return(ret);
                   3185: }
                   3186: 
                   3187: /**
                   3188:  * sgmlParseCharData:
                   3189:  * @ctxt:  an SGML parser context
                   3190:  * @cdata:  int indicating whether we are within a CDATA section
                   3191:  *
                   3192:  * parse a CharData section.
                   3193:  * if we are within a CDATA section ']]>' marks an end of section.
                   3194:  *
                   3195:  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
                   3196:  */
                   3197: 
                   3198: void
                   3199: sgmlParseCharData(sgmlParserCtxtPtr ctxt, int cdata) {
                   3200:     xmlChar buf[SGML_PARSER_BIG_BUFFER_SIZE + 5];
                   3201:     int nbchar = 0;
                   3202:     int cur, l;
                   3203: 
                   3204:     SHRINK;
                   3205:     cur = CUR_CHAR(l);
                   3206:     while (((cur != '<') || (ctxt->token == '<')) &&
                   3207:            ((cur != '&') || (ctxt->token == '&')) && 
                   3208:           (IS_CHAR(cur))) {
                   3209:        COPY_BUF(l,buf,nbchar,cur);
                   3210:        if (nbchar >= SGML_PARSER_BIG_BUFFER_SIZE) {
                   3211:            /*
                   3212:             * Ok the segment is to be consumed as chars.
                   3213:             */
                   3214:            if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
                   3215:                if (areBlanks(ctxt, buf, nbchar)) {
                   3216:                    if (ctxt->sax->ignorableWhitespace != NULL)
                   3217:                        ctxt->sax->ignorableWhitespace(ctxt->userData,
                   3218:                                                       buf, nbchar);
                   3219:                } else {
                   3220:                    sgmlCheckParagraph(ctxt);
                   3221:                    if (ctxt->sax->characters != NULL)
                   3222:                        ctxt->sax->characters(ctxt->userData, buf, nbchar);
                   3223:                }
                   3224:            }
                   3225:            nbchar = 0;
                   3226:        }
                   3227:        NEXTL(l);
                   3228:        cur = CUR_CHAR(l);
                   3229:     }
                   3230:     if (nbchar != 0) {
                   3231:        /*
                   3232:         * Ok the segment is to be consumed as chars.
                   3233:         */
                   3234:        if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
                   3235:            if (areBlanks(ctxt, buf, nbchar)) {
                   3236:                if (ctxt->sax->ignorableWhitespace != NULL)
                   3237:                    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
                   3238:            } else {
                   3239:                sgmlCheckParagraph(ctxt);
                   3240:                if (ctxt->sax->characters != NULL)
                   3241:                    ctxt->sax->characters(ctxt->userData, buf, nbchar);
                   3242:            }
                   3243:        }
                   3244:     }
                   3245: }
                   3246: 
                   3247: /**
                   3248:  * sgmlParseExternalID:
                   3249:  * @ctxt:  an SGML parser context
                   3250:  * @publicID:  a xmlChar** receiving PubidLiteral
                   3251:  * @strict: indicate whether we should restrict parsing to only
                   3252:  *          production [75], see NOTE below
                   3253:  *
                   3254:  * Parse an External ID or a Public ID
                   3255:  *
                   3256:  * NOTE: Productions [75] and [83] interract badly since [75] can generate
                   3257:  *       'PUBLIC' S PubidLiteral S SystemLiteral
                   3258:  *
                   3259:  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
                   3260:  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
                   3261:  *
                   3262:  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
                   3263:  *
                   3264:  * Returns the function returns SystemLiteral and in the second
                   3265:  *                case publicID receives PubidLiteral, is strict is off
                   3266:  *                it is possible to return NULL and have publicID set.
                   3267:  */
                   3268: 
                   3269: xmlChar *
                   3270: sgmlParseExternalID(sgmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
                   3271:     xmlChar *URI = NULL;
                   3272: 
                   3273:     if ((UPPER == 'S') && (UPP(1) == 'Y') &&
                   3274:          (UPP(2) == 'S') && (UPP(3) == 'T') &&
                   3275:         (UPP(4) == 'E') && (UPP(5) == 'M')) {
                   3276:         SKIP(6);
                   3277:        if (!IS_BLANK(CUR)) {
                   3278:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3279:                ctxt->sax->error(ctxt->userData,
                   3280:                    "Space required after 'SYSTEM'\n");
                   3281:            ctxt->wellFormed = 0;
                   3282:        }
                   3283:         SKIP_BLANKS;
                   3284:        URI = sgmlParseSystemLiteral(ctxt);
                   3285:        if (URI == NULL) {
                   3286:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3287:                ctxt->sax->error(ctxt->userData,
                   3288:                  "sgmlParseExternalID: SYSTEM, no URI\n");
                   3289:            ctxt->wellFormed = 0;
                   3290:         }
                   3291:     } else if ((UPPER == 'P') && (UPP(1) == 'U') &&
                   3292:               (UPP(2) == 'B') && (UPP(3) == 'L') &&
                   3293:               (UPP(4) == 'I') && (UPP(5) == 'C')) {
                   3294:         SKIP(6);
                   3295:        if (!IS_BLANK(CUR)) {
                   3296:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3297:                ctxt->sax->error(ctxt->userData,
                   3298:                    "Space required after 'PUBLIC'\n");
                   3299:            ctxt->wellFormed = 0;
                   3300:        }
                   3301:         SKIP_BLANKS;
                   3302:        *publicID = sgmlParsePubidLiteral(ctxt);
                   3303:        if (*publicID == NULL) {
                   3304:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3305:                ctxt->sax->error(ctxt->userData, 
                   3306:                  "sgmlParseExternalID: PUBLIC, no Public Identifier\n");
                   3307:            ctxt->wellFormed = 0;
                   3308:        }
                   3309:         SKIP_BLANKS;
                   3310:         if ((CUR == '"') || (CUR == '\'')) {
                   3311:            URI = sgmlParseSystemLiteral(ctxt);
                   3312:        }
                   3313:     }
                   3314:     return(URI);
                   3315: }
                   3316: 
                   3317: /**
                   3318:  * sgmlParseComment:
                   3319:  * @ctxt:  an SGML parser context
                   3320:  *
                   3321:  * Parse an XML (SGML) comment <!-- .... -->
                   3322:  *
                   3323:  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
                   3324:  */
                   3325: void
                   3326: sgmlParseComment(sgmlParserCtxtPtr ctxt) {
                   3327:     xmlChar *buf = NULL;
                   3328:     int len;
                   3329:     int size = SGML_PARSER_BUFFER_SIZE;
                   3330:     int q, ql;
                   3331:     int r, rl;
                   3332:     int cur, l;
                   3333:     xmlParserInputState state;
                   3334: 
                   3335:     /*
                   3336:      * Check that there is a comment right here.
                   3337:      */
                   3338:     if ((RAW != '<') || (NXT(1) != '!') ||
                   3339:         (NXT(2) != '-') || (NXT(3) != '-')) return;
                   3340: 
                   3341:     state = ctxt->instate;
                   3342:     ctxt->instate = XML_PARSER_COMMENT;
                   3343:     SHRINK;
                   3344:     SKIP(4);
                   3345:     buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
                   3346:     if (buf == NULL) {
                   3347:        fprintf(stderr, "malloc of %d byte failed\n", size);
                   3348:        ctxt->instate = state;
                   3349:        return;
                   3350:     }
                   3351:     q = CUR_CHAR(ql);
                   3352:     NEXTL(ql);
                   3353:     r = CUR_CHAR(rl);
                   3354:     NEXTL(rl);
                   3355:     cur = CUR_CHAR(l);
                   3356:     len = 0;
                   3357:     while (IS_CHAR(cur) &&
                   3358:            ((cur != '>') ||
                   3359:            (r != '-') || (q != '-'))) {
                   3360:        if (len + 5 >= size) {
                   3361:            size *= 2;
                   3362:            buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
                   3363:            if (buf == NULL) {
                   3364:                fprintf(stderr, "realloc of %d byte failed\n", size);
                   3365:                ctxt->instate = state;
                   3366:                return;
                   3367:            }
                   3368:        }
                   3369:        COPY_BUF(ql,buf,len,q);
                   3370:        q = r;
                   3371:        ql = rl;
                   3372:        r = cur;
                   3373:        rl = l;
                   3374:        NEXTL(l);
                   3375:        cur = CUR_CHAR(l);
                   3376:        if (cur == 0) {
                   3377:            SHRINK;
                   3378:            GROW;
                   3379:            cur = CUR_CHAR(l);
                   3380:        }
                   3381:     }
                   3382:     buf[len] = 0;
                   3383:     if (!IS_CHAR(cur)) {
                   3384:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3385:            ctxt->sax->error(ctxt->userData,
                   3386:                             "Comment not terminated \n<!--%.50s\n", buf);
                   3387:        ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
                   3388:        ctxt->wellFormed = 0;
                   3389:        xmlFree(buf);
                   3390:     } else {
                   3391:         NEXT;
                   3392:        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
                   3393:            (!ctxt->disableSAX))
                   3394:            ctxt->sax->comment(ctxt->userData, buf);
                   3395:        xmlFree(buf);
                   3396:     }
                   3397:     ctxt->instate = state;
                   3398: }
                   3399: 
                   3400: /**
                   3401:  * sgmlParseCharRef:
                   3402:  * @ctxt:  an SGML parser context
                   3403:  *
                   3404:  * parse Reference declarations
                   3405:  *
                   3406:  * [66] CharRef ::= '&#' [0-9]+ ';' |
                   3407:  *                  '&#x' [0-9a-fA-F]+ ';'
                   3408:  *
                   3409:  * Returns the value parsed (as an int)
                   3410:  */
                   3411: int
                   3412: sgmlParseCharRef(sgmlParserCtxtPtr ctxt) {
                   3413:     int val = 0;
                   3414: 
                   3415:     if ((CUR == '&') && (NXT(1) == '#') &&
                   3416:         (NXT(2) == 'x')) {
                   3417:        SKIP(3);
                   3418:        while (CUR != ';') {
                   3419:            if ((CUR >= '0') && (CUR <= '9')) 
                   3420:                val = val * 16 + (CUR - '0');
                   3421:            else if ((CUR >= 'a') && (CUR <= 'f'))
                   3422:                val = val * 16 + (CUR - 'a') + 10;
                   3423:            else if ((CUR >= 'A') && (CUR <= 'F'))
                   3424:                val = val * 16 + (CUR - 'A') + 10;
                   3425:            else {
                   3426:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3427:                    ctxt->sax->error(ctxt->userData, 
                   3428:                         "sgmlParseCharRef: invalid hexadecimal value\n");
                   3429:                ctxt->wellFormed = 0;
                   3430:                val = 0;
                   3431:                break;
                   3432:            }
                   3433:            NEXT;
                   3434:        }
                   3435:        if (CUR == ';')
                   3436:            NEXT;
                   3437:     } else if  ((CUR == '&') && (NXT(1) == '#')) {
                   3438:        SKIP(2);
                   3439:        while (CUR != ';') {
                   3440:            if ((CUR >= '0') && (CUR <= '9')) 
                   3441:                val = val * 10 + (CUR - '0');
                   3442:            else {
                   3443:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3444:                    ctxt->sax->error(ctxt->userData, 
                   3445:                         "sgmlParseCharRef: invalid decimal value\n");
                   3446:                ctxt->wellFormed = 0;
                   3447:                val = 0;
                   3448:                break;
                   3449:            }
                   3450:            NEXT;
                   3451:        }
                   3452:        if (CUR == ';')
                   3453:            NEXT;
                   3454:     } else {
                   3455:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3456:            ctxt->sax->error(ctxt->userData, "sgmlParseCharRef: invalid value\n");
                   3457:        ctxt->wellFormed = 0;
                   3458:     }
                   3459:     /*
                   3460:      * Check the value IS_CHAR ...
                   3461:      */
                   3462:     if (IS_CHAR(val)) {
                   3463:         return(val);
                   3464:     } else {
                   3465:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3466:            ctxt->sax->error(ctxt->userData, "sgmlParseCharRef: invalid xmlChar value %d\n",
                   3467:                             val);
                   3468:        ctxt->wellFormed = 0;
                   3469:     }
                   3470:     return(0);
                   3471: }
                   3472: 
                   3473: 
                   3474: /**
                   3475:  * sgmlParseDocTypeDecl :
                   3476:  * @ctxt:  an SGML parser context
                   3477:  *
                   3478:  * parse a DOCTYPE declaration
                   3479:  *
                   3480:  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 
                   3481:  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
                   3482:  */
                   3483: 
                   3484: void
                   3485: sgmlParseDocTypeDecl(sgmlParserCtxtPtr ctxt) {
                   3486:     xmlChar *name;
                   3487:     xmlChar *ExternalID = NULL;
                   3488:     xmlChar *URI = NULL;
                   3489: 
                   3490:     /*
                   3491:      * We know that '<!DOCTYPE' has been detected.
                   3492:      */
                   3493:     SKIP(9);
                   3494: 
                   3495:     SKIP_BLANKS;
                   3496: 
                   3497:     /*
                   3498:      * Parse the DOCTYPE name.
                   3499:      */
                   3500:     name = sgmlParseName(ctxt);
                   3501:     if (name == NULL) {
                   3502:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3503:            ctxt->sax->error(ctxt->userData, "sgmlParseDocTypeDecl : no DOCTYPE name !\n");
                   3504:        ctxt->wellFormed = 0;
                   3505:     }
                   3506:     /*
                   3507:      * Check that upper(name) == "SGML" !!!!!!!!!!!!!
                   3508:      */
                   3509: 
                   3510:     SKIP_BLANKS;
                   3511: 
                   3512:     /*
                   3513:      * Check for SystemID and ExternalID
                   3514:      */
                   3515:     URI = sgmlParseExternalID(ctxt, &ExternalID, 0);
                   3516:     SKIP_BLANKS;
                   3517: 
                   3518:     /*
1.2       veillard 3519:      * Create or update the document accordingly to the DOCTYPE
                   3520:      */
                   3521:     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
                   3522:        (!ctxt->disableSAX))
                   3523:        ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
                   3524: 
                   3525:     /*
                   3526:      * Is there any internal subset declarations ?
                   3527:      * they are handled separately in sgmlParseInternalSubset()
                   3528:      */
                   3529:     if (RAW == '[')
                   3530:        return;
                   3531: 
                   3532: 
                   3533:     /*
1.1       veillard 3534:      * We should be at the end of the DOCTYPE declaration.
                   3535:      */
                   3536:     if (CUR != '>') {
                   3537:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3538:            ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
                   3539:        ctxt->wellFormed = 0;
                   3540:         /* We shouldn't try to resynchronize ... */
                   3541:     }
                   3542:     NEXT;
                   3543: 
                   3544:     /*
                   3545:      * Cleanup, since we don't use all those identifiers
                   3546:      */
                   3547:     if (URI != NULL) xmlFree(URI);
                   3548:     if (ExternalID != NULL) xmlFree(ExternalID);
                   3549:     if (name != NULL) xmlFree(name);
                   3550: }
                   3551: 
                   3552: /**
                   3553:  * sgmlParseAttribute:
                   3554:  * @ctxt:  an SGML parser context
                   3555:  * @value:  a xmlChar ** used to store the value of the attribute
                   3556:  *
                   3557:  * parse an attribute
                   3558:  *
                   3559:  * [41] Attribute ::= Name Eq AttValue
                   3560:  *
                   3561:  * [25] Eq ::= S? '=' S?
                   3562:  *
                   3563:  * With namespace:
                   3564:  *
                   3565:  * [NS 11] Attribute ::= QName Eq AttValue
                   3566:  *
                   3567:  * Also the case QName == xmlns:??? is handled independently as a namespace
                   3568:  * definition.
                   3569:  *
                   3570:  * Returns the attribute name, and the value in *value.
                   3571:  */
                   3572: 
                   3573: xmlChar *
                   3574: sgmlParseAttribute(sgmlParserCtxtPtr ctxt, xmlChar **value) {
                   3575:     xmlChar *name, *val = NULL;
                   3576: 
                   3577:     *value = NULL;
                   3578:     name = sgmlParseName(ctxt);
                   3579:     if (name == NULL) {
                   3580:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3581:            ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
                   3582:        ctxt->wellFormed = 0;
                   3583:         return(NULL);
                   3584:     }
                   3585: 
                   3586:     /*
                   3587:      * read the value
                   3588:      */
                   3589:     SKIP_BLANKS;
                   3590:     if (CUR == '=') {
                   3591:         NEXT;
                   3592:        SKIP_BLANKS;
                   3593:        val = sgmlParseAttValue(ctxt);
                   3594:        /******
                   3595:     } else {
                   3596:         * TODO : some attribute must have values, some may not
                   3597:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3598:            ctxt->sax->warning(ctxt->userData,
                   3599:               "No value for attribute %s\n", name); */
                   3600:     }
                   3601: 
                   3602:     *value = val;
                   3603:     return(name);
                   3604: }
                   3605: 
                   3606: /**
                   3607:  * sgmlCheckEncoding:
                   3608:  * @ctxt:  an SGML parser context
                   3609:  * @attvalue: the attribute value
                   3610:  *
                   3611:  * Checks an http-equiv attribute from a Meta tag to detect
                   3612:  * the encoding
                   3613:  * If a new encoding is detected the parser is switched to decode
                   3614:  * it and pass UTF8
                   3615:  */
                   3616: void
                   3617: sgmlCheckEncoding(sgmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
                   3618:     const xmlChar *encoding;
                   3619: 
                   3620:     if ((ctxt == NULL) || (attvalue == NULL))
                   3621:        return;
                   3622: 
                   3623:     encoding = xmlStrstr(attvalue, BAD_CAST"charset=");
                   3624:     if (encoding == NULL) 
                   3625:        encoding = xmlStrstr(attvalue, BAD_CAST"Charset=");
                   3626:     if (encoding == NULL) 
                   3627:        encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET=");
                   3628:     if (encoding != NULL) {
                   3629:        encoding += 8;
                   3630:     } else {
                   3631:        encoding = xmlStrstr(attvalue, BAD_CAST"charset =");
                   3632:        if (encoding == NULL) 
                   3633:            encoding = xmlStrstr(attvalue, BAD_CAST"Charset =");
                   3634:        if (encoding == NULL) 
                   3635:            encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET =");
                   3636:        if (encoding != NULL)
                   3637:            encoding += 9;
                   3638:     }
                   3639:     if (encoding != NULL) {
                   3640:        xmlCharEncoding enc;
                   3641:        xmlCharEncodingHandlerPtr handler;
                   3642: 
                   3643:        while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
                   3644: 
                   3645:        if (ctxt->input->encoding != NULL)
                   3646:            xmlFree((xmlChar *) ctxt->input->encoding);
                   3647:        ctxt->input->encoding = xmlStrdup(encoding);
                   3648: 
                   3649:        enc = xmlParseCharEncoding((const char *) encoding);
                   3650:        /*
                   3651:         * registered set of known encodings
                   3652:         */
                   3653:        if (enc != XML_CHAR_ENCODING_ERROR) {
                   3654:            xmlSwitchEncoding(ctxt, enc);
                   3655:            ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   3656:        } else {
                   3657:            /*
                   3658:             * fallback for unknown encodings
                   3659:             */
                   3660:            handler = xmlFindCharEncodingHandler((const char *) encoding);
                   3661:            if (handler != NULL) {
                   3662:                xmlSwitchToEncoding(ctxt, handler);
                   3663:                ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   3664:            } else {
                   3665:                ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
                   3666:            }
                   3667:        }
                   3668: 
                   3669:        if ((ctxt->input->buf != NULL) &&
                   3670:            (ctxt->input->buf->encoder != NULL) &&
                   3671:            (ctxt->input->buf->raw != NULL) &&
                   3672:            (ctxt->input->buf->buffer != NULL)) {
                   3673:            int nbchars;
                   3674:            int processed;
                   3675: 
                   3676:            /*
                   3677:             * convert as much as possible to the parser reading buffer.
                   3678:             */
                   3679:            processed = ctxt->input->cur - ctxt->input->base;
                   3680:            xmlBufferShrink(ctxt->input->buf->buffer, processed);
                   3681:            nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
                   3682:                                       ctxt->input->buf->buffer,
                   3683:                                       ctxt->input->buf->raw);
                   3684:            if (nbchars < 0) {
                   3685:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3686:                    ctxt->sax->error(ctxt->userData, 
                   3687:                     "sgmlCheckEncoding: encoder error\n");
                   3688:                ctxt->errNo = XML_ERR_INVALID_ENCODING;
                   3689:            }
                   3690:            ctxt->input->base =
                   3691:            ctxt->input->cur = ctxt->input->buf->buffer->content;
                   3692:        }
                   3693:     }
                   3694: }
                   3695: 
                   3696: /**
                   3697:  * sgmlCheckMeta:
                   3698:  * @ctxt:  an SGML parser context
                   3699:  * @atts:  the attributes values
                   3700:  *
                   3701:  * Checks an attributes from a Meta tag
                   3702:  */
                   3703: void
                   3704: sgmlCheckMeta(sgmlParserCtxtPtr ctxt, const xmlChar **atts) {
                   3705:     int i;
                   3706:     const xmlChar *att, *value;
                   3707:     int http = 0;
                   3708:     const xmlChar *content = NULL;
                   3709: 
                   3710:     if ((ctxt == NULL) || (atts == NULL))
                   3711:        return;
                   3712: 
                   3713:     i = 0;
                   3714:     att = atts[i++];
                   3715:     while (att != NULL) {
                   3716:        value = atts[i++];
                   3717:        if ((value != NULL) &&
                   3718:            ((!xmlStrcmp(att, BAD_CAST"http-equiv")) ||
                   3719:             (!xmlStrcmp(att, BAD_CAST"Http-Equiv")) ||
                   3720:             (!xmlStrcmp(att, BAD_CAST"HTTP-EQUIV"))) &&
                   3721:            ((!xmlStrcmp(value, BAD_CAST"Content-Type")) ||
                   3722:             (!xmlStrcmp(value, BAD_CAST"content-type")) ||
                   3723:             (!xmlStrcmp(value, BAD_CAST"CONTENT-TYPE"))))
                   3724:            http = 1;
                   3725:        else if ((value != NULL) &&
                   3726:                 ((!xmlStrcmp(att, BAD_CAST"content")) ||
                   3727:                  (!xmlStrcmp(att, BAD_CAST"Content")) ||
                   3728:                  (!xmlStrcmp(att, BAD_CAST"CONTENT"))))
                   3729:            content = value;
                   3730:        att = atts[i++];
                   3731:     }
                   3732:     if ((http) && (content != NULL))
                   3733:        sgmlCheckEncoding(ctxt, content);
                   3734: 
                   3735: }
                   3736: 
                   3737: /**
                   3738:  * sgmlParseStartTag:
                   3739:  * @ctxt:  an SGML parser context
                   3740:  * 
                   3741:  * parse a start of tag either for rule element or
                   3742:  * EmptyElement. In both case we don't parse the tag closing chars.
                   3743:  *
                   3744:  * [40] STag ::= '<' Name (S Attribute)* S? '>'
                   3745:  *
                   3746:  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
                   3747:  *
                   3748:  * With namespace:
                   3749:  *
                   3750:  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
                   3751:  *
                   3752:  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
                   3753:  *
                   3754:  */
                   3755: 
                   3756: void
                   3757: sgmlParseStartTag(sgmlParserCtxtPtr ctxt) {
                   3758:     xmlChar *name;
                   3759:     xmlChar *attname;
                   3760:     xmlChar *attvalue;
                   3761:     const xmlChar **atts = NULL;
                   3762:     int nbatts = 0;
                   3763:     int maxatts = 0;
                   3764:     int meta = 0;
                   3765:     int i;
                   3766: 
                   3767:     if (CUR != '<') return;
                   3768:     NEXT;
                   3769: 
                   3770:     GROW;
                   3771:     name = sgmlParseSGMLName(ctxt);
                   3772:     if (name == NULL) {
                   3773:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3774:            ctxt->sax->error(ctxt->userData, 
                   3775:             "sgmlParseStartTag: invalid element name\n");
                   3776:        ctxt->wellFormed = 0;
                   3777:         return;
                   3778:     }
                   3779:     if (!xmlStrcmp(name, BAD_CAST"meta"))
                   3780:        meta = 1;
                   3781: 
                   3782:     /*
                   3783:      * Check for auto-closure of SGML elements.
                   3784:      */
                   3785:     sgmlAutoClose(ctxt, name);
                   3786: 
                   3787:     /*
                   3788:      * Check for implied SGML elements.
                   3789:      */
                   3790:     sgmlCheckImplied(ctxt, name);
                   3791: 
                   3792:     /*
                   3793:      * Now parse the attributes, it ends up with the ending
                   3794:      *
                   3795:      * (S Attribute)* S?
                   3796:      */
                   3797:     SKIP_BLANKS;
                   3798:     while ((IS_CHAR(CUR)) &&
                   3799:            (CUR != '>') && 
                   3800:           ((CUR != '/') || (NXT(1) != '>'))) {
                   3801:        long cons = ctxt->nbChars;
                   3802: 
                   3803:        GROW;
                   3804:        attname = sgmlParseAttribute(ctxt, &attvalue);
                   3805:         if (attname != NULL) {
                   3806: 
                   3807:            /*
                   3808:             * Well formedness requires at most one declaration of an attribute
                   3809:             */
                   3810:            for (i = 0; i < nbatts;i += 2) {
                   3811:                if (!xmlStrcmp(atts[i], attname)) {
                   3812:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3813:                        ctxt->sax->error(ctxt->userData,
                   3814:                                         "Attribute %s redefined\n",
                   3815:                                         attname);
                   3816:                    ctxt->wellFormed = 0;
                   3817:                    xmlFree(attname);
                   3818:                    if (attvalue != NULL)
                   3819:                        xmlFree(attvalue);
                   3820:                    goto failed;
                   3821:                }
                   3822:            }
                   3823: 
                   3824:            /*
                   3825:             * Add the pair to atts
                   3826:             */
                   3827:            if (atts == NULL) {
                   3828:                maxatts = 10;
                   3829:                atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
                   3830:                if (atts == NULL) {
                   3831:                    fprintf(stderr, "malloc of %ld byte failed\n",
                   3832:                            maxatts * (long)sizeof(xmlChar *));
                   3833:                    if (name != NULL) xmlFree(name);
                   3834:                    return;
                   3835:                }
                   3836:            } else if (nbatts + 4 > maxatts) {
                   3837:                maxatts *= 2;
                   3838:                atts = (const xmlChar **) xmlRealloc(atts, maxatts * sizeof(xmlChar *));
                   3839:                if (atts == NULL) {
                   3840:                    fprintf(stderr, "realloc of %ld byte failed\n",
                   3841:                            maxatts * (long)sizeof(xmlChar *));
                   3842:                    if (name != NULL) xmlFree(name);
                   3843:                    return;
                   3844:                }
                   3845:            }
                   3846:            atts[nbatts++] = attname;
                   3847:            atts[nbatts++] = attvalue;
                   3848:            atts[nbatts] = NULL;
                   3849:            atts[nbatts + 1] = NULL;
                   3850:        }
                   3851: 
                   3852: failed:
                   3853:        SKIP_BLANKS;
                   3854:         if (cons == ctxt->nbChars) {
                   3855:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3856:                ctxt->sax->error(ctxt->userData, 
                   3857:                 "sgmlParseStartTag: problem parsing attributes\n");
                   3858:            ctxt->wellFormed = 0;
                   3859:            break;
                   3860:        }
                   3861:     }
                   3862: 
                   3863:     /*
                   3864:      * Handle specific association to the META tag
                   3865:      */
                   3866:     if (meta)
                   3867:        sgmlCheckMeta(ctxt, atts);
                   3868: 
                   3869:     /*
                   3870:      * SAX: Start of Element !
                   3871:      */
                   3872:     sgmlnamePush(ctxt, xmlStrdup(name));
                   3873: #ifdef DEBUG
                   3874:     fprintf(stderr,"Start of element %s: pushed %s\n", name, ctxt->name);
                   3875: #endif    
                   3876:     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   3877:         ctxt->sax->startElement(ctxt->userData, name, atts);
                   3878: 
                   3879:     if (atts != NULL) {
                   3880:         for (i = 0;i < nbatts;i++) {
                   3881:            if (atts[i] != NULL)
                   3882:                xmlFree((xmlChar *) atts[i]);
                   3883:        }
                   3884:        xmlFree((void *) atts);
                   3885:     }
                   3886:     if (name != NULL) xmlFree(name);
                   3887: }
                   3888: 
                   3889: /**
                   3890:  * sgmlParseEndTag:
                   3891:  * @ctxt:  an SGML parser context
                   3892:  *
                   3893:  * parse an end of tag
                   3894:  *
                   3895:  * [42] ETag ::= '</' Name S? '>'
                   3896:  *
                   3897:  * With namespace
                   3898:  *
                   3899:  * [NS 9] ETag ::= '</' QName S? '>'
                   3900:  */
                   3901: 
                   3902: void
                   3903: sgmlParseEndTag(sgmlParserCtxtPtr ctxt) {
                   3904:     xmlChar *name;
                   3905:     xmlChar *oldname;
                   3906:     int i;
                   3907: 
                   3908:     if ((CUR != '<') || (NXT(1) != '/')) {
                   3909:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3910:            ctxt->sax->error(ctxt->userData, "sgmlParseEndTag: '</' not found\n");
                   3911:        ctxt->wellFormed = 0;
                   3912:        return;
                   3913:     }
                   3914:     SKIP(2);
                   3915: 
                   3916:     name = sgmlParseSGMLName(ctxt);
                   3917:     if (name == NULL) {
                   3918:        if (CUR == '>') {
                   3919:            NEXT;
                   3920:            oldname = sgmlnamePop(ctxt);
                   3921:            if (oldname != NULL) {
                   3922:                if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   3923:                    ctxt->sax->endElement(ctxt->userData, name);
                   3924: #ifdef DEBUG
                   3925:                fprintf(stderr,"End of tag </>: popping out %s\n", oldname);
                   3926: #endif
                   3927:                xmlFree(oldname);
                   3928: #ifdef DEBUG
                   3929:            } else {
                   3930:                fprintf(stderr,"End of tag </>: stack empty !!!\n");
                   3931: #endif
                   3932:            }
                   3933:            return;
                   3934:        } else
                   3935:            return;
                   3936:     }
                   3937: 
                   3938:     /*
                   3939:      * We should definitely be at the ending "S? '>'" part
                   3940:      */
                   3941:     SKIP_BLANKS;
                   3942:     if ((!IS_CHAR(CUR)) || (CUR != '>')) {
                   3943:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3944:            ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
                   3945:        ctxt->wellFormed = 0;
                   3946:     } else
                   3947:        NEXT;
                   3948: 
                   3949:     /*
                   3950:      * If the name read is not one of the element in the parsing stack
                   3951:      * then return, it's just an error.
                   3952:      */
                   3953:     for (i = (ctxt->nameNr - 1);i >= 0;i--) {
                   3954:         if (!xmlStrcmp(name, ctxt->nameTab[i])) break;
                   3955:     }
                   3956:     if (i < 0) {
                   3957:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3958:            ctxt->sax->error(ctxt->userData,
                   3959:             "Unexpected end tag : %s\n", name);
                   3960:        xmlFree(name);
                   3961:        ctxt->wellFormed = 0;
                   3962:        return;
                   3963:     }
                   3964: 
                   3965: 
                   3966:     /*
                   3967:      * Check for auto-closure of SGML elements.
                   3968:      */
                   3969: 
                   3970:     sgmlAutoCloseOnClose(ctxt, name);
                   3971: 
                   3972:     /*
                   3973:      * Well formedness constraints, opening and closing must match.
                   3974:      * With the exception that the autoclose may have popped stuff out
                   3975:      * of the stack.
                   3976:      */
                   3977:     if (((name[0] != '/') || (name[1] != 0)) &&
                   3978:        (xmlStrcmp(name, ctxt->name))) {
                   3979: #ifdef DEBUG
                   3980:        fprintf(stderr,"End of tag %s: expecting %s\n", name, ctxt->name);
                   3981: #endif
                   3982:         if ((ctxt->name != NULL) && 
                   3983:            (xmlStrcmp(ctxt->name, name))) {
                   3984:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3985:                ctxt->sax->error(ctxt->userData,
                   3986:                 "Opening and ending tag mismatch: %s and %s\n",
                   3987:                                 name, ctxt->name);
                   3988:            ctxt->wellFormed = 0;
                   3989:         }
                   3990:     }
                   3991: 
                   3992:     /*
                   3993:      * SAX: End of Tag
                   3994:      */
                   3995:     oldname = ctxt->name;
                   3996:     if (((name[0] == '/') && (name[1] == 0)) ||
                   3997:        ((oldname != NULL) && (!xmlStrcmp(oldname, name)))) {
                   3998:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   3999:            ctxt->sax->endElement(ctxt->userData, name);
                   4000:        oldname = sgmlnamePop(ctxt);
                   4001:        if (oldname != NULL) {
                   4002: #ifdef DEBUG
                   4003:            fprintf(stderr,"End of tag %s: popping out %s\n", name, oldname);
                   4004: #endif
                   4005:            xmlFree(oldname);
                   4006: #ifdef DEBUG
                   4007:        } else {
                   4008:            fprintf(stderr,"End of tag %s: stack empty !!!\n", name);
                   4009: #endif
                   4010:        }
                   4011:     }
                   4012: 
                   4013:     if (name != NULL)
                   4014:        xmlFree(name);
                   4015: 
                   4016:     return;
                   4017: }
                   4018: 
                   4019: 
                   4020: /**
                   4021:  * sgmlParseReference:
                   4022:  * @ctxt:  an SGML parser context
                   4023:  * 
                   4024:  * parse and handle entity references in content,
                   4025:  * this will end-up in a call to character() since this is either a
                   4026:  * CharRef, or a predefined entity.
                   4027:  */
                   4028: void
                   4029: sgmlParseReference(sgmlParserCtxtPtr ctxt) {
                   4030:     sgmlEntityDescPtr ent;
                   4031:     xmlChar out[6];
                   4032:     xmlChar *name;
                   4033:     if (CUR != '&') return;
                   4034: 
                   4035:     if (NXT(1) == '#') {
                   4036:        unsigned int c;
                   4037:        int bits, i = 0;
                   4038: 
                   4039:        c = sgmlParseCharRef(ctxt);
                   4040:         if      (c <    0x80) { out[i++]= c;                bits= -6; }
                   4041:         else if (c <   0x800) { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   4042:         else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   4043:         else                  { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   4044:  
                   4045:         for ( ; bits >= 0; bits-= 6) {
                   4046:             out[i++]= ((c >> bits) & 0x3F) | 0x80;
                   4047:         }
                   4048:        out[i] = 0;
                   4049: 
                   4050:        sgmlCheckParagraph(ctxt);
                   4051:        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   4052:            ctxt->sax->characters(ctxt->userData, out, i);
                   4053:     } else {
                   4054:        ent = sgmlParseEntityRef(ctxt, &name);
                   4055:        if (name == NULL) {
                   4056:            sgmlCheckParagraph(ctxt);
                   4057:            if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   4058:                ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
                   4059:            return;
                   4060:        }
                   4061:        if ((ent == NULL) || (ent->value <= 0)) {
                   4062:            sgmlCheckParagraph(ctxt);
                   4063:            if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
                   4064:                ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
                   4065:                ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
                   4066:                /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
                   4067:            }
                   4068:        } else {
                   4069:            unsigned int c;
                   4070:            int bits, i = 0;
                   4071: 
                   4072:            c = ent->value;
                   4073:            if      (c <    0x80)
                   4074:                    { out[i++]= c;                bits= -6; }
                   4075:            else if (c <   0x800)
                   4076:                    { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   4077:            else if (c < 0x10000)
                   4078:                    { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   4079:            else                 
                   4080:                    { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   4081:      
                   4082:            for ( ; bits >= 0; bits-= 6) {
                   4083:                out[i++]= ((c >> bits) & 0x3F) | 0x80;
                   4084:            }
                   4085:            out[i] = 0;
                   4086: 
                   4087:            sgmlCheckParagraph(ctxt);
                   4088:            if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   4089:                ctxt->sax->characters(ctxt->userData, out, i);
                   4090:        }
                   4091:        xmlFree(name);
                   4092:     }
                   4093: }
                   4094: 
                   4095: /**
                   4096:  * sgmlParseContent:
                   4097:  * @ctxt:  an SGML parser context
                   4098:  * @name:  the node name
                   4099:  *
                   4100:  * Parse a content: comment, sub-element, reference or text.
                   4101:  *
                   4102:  */
                   4103: 
                   4104: void
                   4105: sgmlParseContent(sgmlParserCtxtPtr ctxt) {
                   4106:     xmlChar *currentNode;
                   4107:     int depth;
                   4108: 
                   4109:     currentNode = xmlStrdup(ctxt->name);
                   4110:     depth = ctxt->nameNr;
                   4111:     while (1) {
                   4112:        long cons = ctxt->nbChars;
                   4113: 
                   4114:         GROW;
                   4115:        /*
                   4116:         * Our tag or one of it's parent or children is ending.
                   4117:         */
                   4118:         if ((CUR == '<') && (NXT(1) == '/')) {
                   4119:            sgmlParseEndTag(ctxt);
                   4120:            if (currentNode != NULL) xmlFree(currentNode);
                   4121:            return;
                   4122:         }
                   4123: 
                   4124:        /*
                   4125:         * Has this node been popped out during parsing of
                   4126:         * the next element
                   4127:         */
                   4128:         if ((xmlStrcmp(currentNode, ctxt->name)) &&
                   4129:            (depth >= ctxt->nameNr)) {
                   4130:            if (currentNode != NULL) xmlFree(currentNode);
                   4131:            return;
                   4132:        }
                   4133: 
                   4134:        /*
                   4135:         * Sometimes DOCTYPE arrives in the middle of the document
                   4136:         */
                   4137:        if ((CUR == '<') && (NXT(1) == '!') &&
                   4138:            (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   4139:            (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   4140:            (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   4141:            (UPP(8) == 'E')) {
                   4142:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4143:                ctxt->sax->error(ctxt->userData,
                   4144:                     "Misplaced DOCTYPE declaration\n");
                   4145:            ctxt->wellFormed = 0;
                   4146:            sgmlParseDocTypeDecl(ctxt);
                   4147:        }
                   4148: 
                   4149:        /*
                   4150:         * First case :  a comment
                   4151:         */
                   4152:        if ((CUR == '<') && (NXT(1) == '!') &&
                   4153:                 (NXT(2) == '-') && (NXT(3) == '-')) {
                   4154:            sgmlParseComment(ctxt);
                   4155:        }
                   4156: 
                   4157:        /*
                   4158:         * Second case :  a sub-element.
                   4159:         */
                   4160:        else if (CUR == '<') {
                   4161:            sgmlParseElement(ctxt);
                   4162:        }
                   4163: 
                   4164:        /*
                   4165:         * Third case : a reference. If if has not been resolved,
                   4166:         *    parsing returns it's Name, create the node 
                   4167:         */
                   4168:        else if (CUR == '&') {
                   4169:            sgmlParseReference(ctxt);
                   4170:        }
                   4171: 
                   4172:        /*
                   4173:         * Fourth : end of the resource
                   4174:         */
                   4175:        else if (CUR == 0) {
                   4176:            sgmlAutoClose(ctxt, NULL);
                   4177:        }
                   4178: 
                   4179:        /*
                   4180:         * Last case, text. Note that References are handled directly.
                   4181:         */
                   4182:        else {
                   4183:            sgmlParseCharData(ctxt, 0);
                   4184:        }
                   4185: 
                   4186:        if (cons == ctxt->nbChars) {
                   4187:            if (ctxt->node != NULL) {
                   4188:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4189:                    ctxt->sax->error(ctxt->userData,
                   4190:                         "detected an error in element content\n");
                   4191:                ctxt->wellFormed = 0;
                   4192:            }
                   4193:             break;
                   4194:        }
                   4195: 
                   4196:         GROW;
                   4197:     }
                   4198:     if (currentNode != NULL) xmlFree(currentNode);
                   4199: }
                   4200: 
                   4201: /**
                   4202:  * sgmlParseElement:
                   4203:  * @ctxt:  an SGML parser context
                   4204:  *
                   4205:  * parse an SGML element, this is highly recursive
                   4206:  *
                   4207:  * [39] element ::= EmptyElemTag | STag content ETag
                   4208:  *
                   4209:  * [41] Attribute ::= Name Eq AttValue
                   4210:  */
                   4211: 
                   4212: void
                   4213: sgmlParseElement(sgmlParserCtxtPtr ctxt) {
                   4214:     xmlChar *name;
                   4215:     xmlChar *currentNode = NULL;
                   4216:     sgmlElemDescPtr info;
                   4217:     sgmlParserNodeInfo node_info;
                   4218:     xmlChar *oldname;
                   4219:     int depth = ctxt->nameNr;
                   4220: 
                   4221:     /* Capture start position */
                   4222:     if (ctxt->record_info) {
                   4223:         node_info.begin_pos = ctxt->input->consumed +
                   4224:                           (CUR_PTR - ctxt->input->base);
                   4225:        node_info.begin_line = ctxt->input->line;
                   4226:     }
                   4227: 
                   4228:     oldname = xmlStrdup(ctxt->name);
                   4229:     sgmlParseStartTag(ctxt);
                   4230:     name = ctxt->name;
                   4231: #ifdef DEBUG
                   4232:     if (oldname == NULL)
                   4233:        fprintf(stderr, "Start of element %s\n", name);
                   4234:     else if (name == NULL)     
                   4235:        fprintf(stderr, "Start of element failed, was %s\n", oldname);
                   4236:     else       
                   4237:        fprintf(stderr, "Start of element %s, was %s\n", name, oldname);
                   4238: #endif
                   4239:     if (((depth == ctxt->nameNr) && (!xmlStrcmp(oldname, ctxt->name))) ||
                   4240:         (name == NULL)) {
                   4241:        if (CUR == '>')
                   4242:            NEXT;
                   4243:        if (oldname != NULL)
                   4244:            xmlFree(oldname);
                   4245:         return;
                   4246:     }
                   4247:     if (oldname != NULL)
                   4248:        xmlFree(oldname);
                   4249: 
                   4250:     /*
                   4251:      * Lookup the info for that element.
                   4252:      */
                   4253:     info = sgmlTagLookup(name);
                   4254:     if (info == NULL) {
                   4255:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4256:            ctxt->sax->error(ctxt->userData, "Tag %s invalid\n",
                   4257:                             name);
                   4258:        ctxt->wellFormed = 0;
                   4259:     } else if (info->depr) {
                   4260: /***************************
                   4261:        if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
                   4262:            ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n",
                   4263:                               name);
                   4264:  ***************************/
                   4265:     }
                   4266: 
                   4267:     /*
                   4268:      * Check for an Empty Element labelled the XML/SGML way
                   4269:      */
                   4270:     if ((CUR == '/') && (NXT(1) == '>')) {
                   4271:         SKIP(2);
                   4272:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   4273:            ctxt->sax->endElement(ctxt->userData, name);
                   4274:        oldname = sgmlnamePop(ctxt);
                   4275: #ifdef DEBUG
                   4276:         fprintf(stderr,"End of tag the XML way: popping out %s\n", oldname);
                   4277: #endif
                   4278:        if (oldname != NULL)
                   4279:            xmlFree(oldname);
                   4280:        return;
                   4281:     }
                   4282: 
                   4283:     if (CUR == '>') {
                   4284:         NEXT;
                   4285:     } else {
                   4286:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4287:            ctxt->sax->error(ctxt->userData,
                   4288:                             "Couldn't find end of Start Tag %s\n",
                   4289:                             name);
                   4290:        ctxt->wellFormed = 0;
                   4291: 
                   4292:        /*
                   4293:         * end of parsing of this node.
                   4294:         */
                   4295:        if (!xmlStrcmp(name, ctxt->name)) { 
                   4296:            nodePop(ctxt);
                   4297:            oldname = sgmlnamePop(ctxt);
                   4298: #ifdef DEBUG
                   4299:            fprintf(stderr,"End of start tag problem: popping out %s\n", oldname);
                   4300: #endif
                   4301:            if (oldname != NULL)
                   4302:                xmlFree(oldname);
                   4303:        }    
                   4304: 
                   4305:        /*
                   4306:         * Capture end position and add node
                   4307:         */
                   4308:        if ( currentNode != NULL && ctxt->record_info ) {
                   4309:           node_info.end_pos = ctxt->input->consumed +
                   4310:                              (CUR_PTR - ctxt->input->base);
                   4311:           node_info.end_line = ctxt->input->line;
                   4312:           node_info.node = ctxt->node;
                   4313:           xmlParserAddNodeInfo(ctxt, &node_info);
                   4314:        }
                   4315:        return;
                   4316:     }
                   4317: 
                   4318:     /*
                   4319:      * Check for an Empty Element from DTD definition
                   4320:      */
                   4321:     if ((info != NULL) && (info->empty)) {
                   4322:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   4323:            ctxt->sax->endElement(ctxt->userData, name);
                   4324:        oldname = sgmlnamePop(ctxt);
                   4325: #ifdef DEBUG
                   4326:        fprintf(stderr,"End of empty tag %s : popping out %s\n", name, oldname);
                   4327: #endif
                   4328:        if (oldname != NULL)
                   4329:            xmlFree(oldname);
                   4330:        return;
                   4331:     }
                   4332: 
                   4333:     /*
                   4334:      * Parse the content of the element:
                   4335:      */
                   4336:     currentNode = xmlStrdup(ctxt->name);
                   4337:     depth = ctxt->nameNr;
                   4338:     while (IS_CHAR(CUR)) {
                   4339:        sgmlParseContent(ctxt);
                   4340:        if (ctxt->nameNr < depth) break; 
                   4341:     }  
                   4342: 
                   4343:     if (!IS_CHAR(CUR)) {
                   4344:        /************
                   4345:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4346:            ctxt->sax->error(ctxt->userData,
                   4347:                 "Premature end of data in tag %s\n", currentNode);
                   4348:        ctxt->wellFormed = 0;
                   4349:         *************/
                   4350: 
                   4351:        /*
                   4352:         * end of parsing of this node.
                   4353:         */
                   4354:        nodePop(ctxt);
                   4355:        oldname = sgmlnamePop(ctxt);
                   4356: #ifdef DEBUG
                   4357:        fprintf(stderr,"Premature end of tag %s : popping out %s\n", name, oldname);
                   4358: #endif
                   4359:        if (oldname != NULL)
                   4360:            xmlFree(oldname);
                   4361:        if (currentNode != NULL)
                   4362:            xmlFree(currentNode);
                   4363:        return;
                   4364:     }
                   4365: 
                   4366:     /*
                   4367:      * Capture end position and add node
                   4368:      */
                   4369:     if ( currentNode != NULL && ctxt->record_info ) {
                   4370:        node_info.end_pos = ctxt->input->consumed +
                   4371:                           (CUR_PTR - ctxt->input->base);
                   4372:        node_info.end_line = ctxt->input->line;
                   4373:        node_info.node = ctxt->node;
                   4374:        xmlParserAddNodeInfo(ctxt, &node_info);
                   4375:     }
                   4376:     if (currentNode != NULL)
                   4377:        xmlFree(currentNode);
                   4378: }
                   4379: 
                   4380: /**
1.3     ! veillard 4381:  * sgmlParseEntityDecl:
        !          4382:  * @ctxt:  an SGML parser context
        !          4383:  *
        !          4384:  * parse <!ENTITY declarations
        !          4385:  *
        !          4386:  */
        !          4387: 
        !          4388: void
        !          4389: sgmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
        !          4390:     xmlChar *name = NULL;
        !          4391:     xmlChar *value = NULL;
        !          4392:     xmlChar *URI = NULL, *literal = NULL;
        !          4393:     xmlChar *ndata = NULL;
        !          4394:     int isParameter = 0;
        !          4395:     xmlChar *orig = NULL;
        !          4396:     
        !          4397:     GROW;
        !          4398:     if ((RAW == '<') && (NXT(1) == '!') &&
        !          4399:         (NXT(2) == 'E') && (NXT(3) == 'N') &&
        !          4400:         (NXT(4) == 'T') && (NXT(5) == 'I') &&
        !          4401:         (NXT(6) == 'T') && (NXT(7) == 'Y')) {
        !          4402:        xmlParserInputPtr input = ctxt->input;
        !          4403:        ctxt->instate = XML_PARSER_ENTITY_DECL;
        !          4404:        SHRINK;
        !          4405:        SKIP(8);
        !          4406:        if (!IS_BLANK(CUR)) {
        !          4407:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4408:                ctxt->sax->error(ctxt->userData,
        !          4409:                                 "Space required after '<!ENTITY'\n");
        !          4410:            ctxt->errNo = XML_ERR_SPACE_REQUIRED;
        !          4411:            ctxt->wellFormed = 0;
        !          4412:            ctxt->disableSAX = 1;
        !          4413:        }
        !          4414:        SKIP_BLANKS;
        !          4415: 
        !          4416:        if (RAW == '%') {
        !          4417:            NEXT;
        !          4418:            if (!IS_BLANK(CUR)) {
        !          4419:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4420:                    ctxt->sax->error(ctxt->userData,
        !          4421:                                     "Space required after '%'\n");
        !          4422:                ctxt->errNo = XML_ERR_SPACE_REQUIRED;
        !          4423:                ctxt->wellFormed = 0;
        !          4424:                ctxt->disableSAX = 1;
        !          4425:            }
        !          4426:            SKIP_BLANKS;
        !          4427:            isParameter = 1;
        !          4428:        }
        !          4429: 
        !          4430:         name = xmlParseName(ctxt);
        !          4431:        if (name == NULL) {
        !          4432:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4433:                ctxt->sax->error(ctxt->userData, "sgmlarseEntityDecl: no name\n");
        !          4434:            ctxt->errNo = XML_ERR_NAME_REQUIRED;
        !          4435:            ctxt->wellFormed = 0;
        !          4436:            ctxt->disableSAX = 1;
        !          4437:             return;
        !          4438:        }
        !          4439:        if (!IS_BLANK(CUR)) {
        !          4440:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4441:                ctxt->sax->error(ctxt->userData,
        !          4442:                     "Space required after the entity name\n");
        !          4443:            ctxt->errNo = XML_ERR_SPACE_REQUIRED;
        !          4444:            ctxt->wellFormed = 0;
        !          4445:            ctxt->disableSAX = 1;
        !          4446:        }
        !          4447:         SKIP_BLANKS;
        !          4448: 
        !          4449:        /*
        !          4450:         * handle the various case of definitions...
        !          4451:         */
        !          4452:        if (isParameter) {
        !          4453:            if ((RAW == '"') || (RAW == '\'')) {
        !          4454:                value = xmlParseEntityValue(ctxt, &orig);
        !          4455:                if (value) {
        !          4456:                    if ((ctxt->sax != NULL) &&
        !          4457:                        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
        !          4458:                        ctxt->sax->entityDecl(ctxt->userData, name,
        !          4459:                                    XML_INTERNAL_PARAMETER_ENTITY,
        !          4460:                                    NULL, NULL, value);
        !          4461:                }
        !          4462:            } else {
        !          4463:                URI = xmlParseExternalID(ctxt, &literal, 1);
        !          4464:                if ((URI == NULL) && (literal == NULL)) {
        !          4465:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4466:                        ctxt->sax->error(ctxt->userData,
        !          4467:                            "Entity value required\n");
        !          4468:                    ctxt->errNo = XML_ERR_VALUE_REQUIRED;
        !          4469:                    ctxt->wellFormed = 0;
        !          4470:                    ctxt->disableSAX = 1;
        !          4471:                }
        !          4472:                if (URI) {
        !          4473:                    xmlURIPtr uri;
        !          4474: 
        !          4475:                    uri = xmlParseURI((const char *) URI);
        !          4476:                    if (uri == NULL) {
        !          4477:                        if ((ctxt->sax != NULL) &&
        !          4478:                            (!ctxt->disableSAX) &&
        !          4479:                            (ctxt->sax->error != NULL))
        !          4480:                            ctxt->sax->error(ctxt->userData,
        !          4481:                                        "Invalid URI: %s\n", URI);
        !          4482:                        ctxt->wellFormed = 0;
        !          4483:                        ctxt->errNo = XML_ERR_INVALID_URI;
        !          4484:                    } else {
        !          4485:                        if (uri->fragment != NULL) {
        !          4486:                            if ((ctxt->sax != NULL) &&
        !          4487:                                (!ctxt->disableSAX) &&
        !          4488:                                (ctxt->sax->error != NULL))
        !          4489:                                ctxt->sax->error(ctxt->userData,
        !          4490:                                            "Fragment not allowed: %s\n", URI);
        !          4491:                            ctxt->wellFormed = 0;
        !          4492:                            ctxt->errNo = XML_ERR_URI_FRAGMENT;
        !          4493:                        } else {
        !          4494:                            if ((ctxt->sax != NULL) &&
        !          4495:                                (!ctxt->disableSAX) &&
        !          4496:                                (ctxt->sax->entityDecl != NULL))
        !          4497:                                ctxt->sax->entityDecl(ctxt->userData, name,
        !          4498:                                            XML_EXTERNAL_PARAMETER_ENTITY,
        !          4499:                                            literal, URI, NULL);
        !          4500:                        }
        !          4501:                        xmlFreeURI(uri);
        !          4502:                    }
        !          4503:                }
        !          4504:            }
        !          4505:        } else {
        !          4506:            if ((RAW == '"') || (RAW == '\'')) {
        !          4507:                value = xmlParseEntityValue(ctxt, &orig);
        !          4508:                if ((ctxt->sax != NULL) &&
        !          4509:                    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
        !          4510:                    ctxt->sax->entityDecl(ctxt->userData, name,
        !          4511:                                XML_INTERNAL_GENERAL_ENTITY,
        !          4512:                                NULL, NULL, value);
        !          4513:            } else {
        !          4514:                URI = xmlParseExternalID(ctxt, &literal, 1);
        !          4515:                if ((URI == NULL) && (literal == NULL)) {
        !          4516:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4517:                        ctxt->sax->error(ctxt->userData,
        !          4518:                            "Entity value required\n");
        !          4519:                    ctxt->errNo = XML_ERR_VALUE_REQUIRED;
        !          4520:                    ctxt->wellFormed = 0;
        !          4521:                    ctxt->disableSAX = 1;
        !          4522:                }
        !          4523:                if (URI) {
        !          4524:                    xmlURIPtr uri;
        !          4525: 
        !          4526:                    uri = xmlParseURI((const char *)URI);
        !          4527:                    if (uri == NULL) {
        !          4528:                        if ((ctxt->sax != NULL) &&
        !          4529:                            (!ctxt->disableSAX) &&
        !          4530:                            (ctxt->sax->error != NULL))
        !          4531:                            ctxt->sax->error(ctxt->userData,
        !          4532:                                        "Invalid URI: %s\n", URI);
        !          4533:                        ctxt->wellFormed = 0;
        !          4534:                        ctxt->errNo = XML_ERR_INVALID_URI;
        !          4535:                    } else {
        !          4536:                        if (uri->fragment != NULL) {
        !          4537:                            if ((ctxt->sax != NULL) &&
        !          4538:                                (!ctxt->disableSAX) &&
        !          4539:                                (ctxt->sax->error != NULL))
        !          4540:                                ctxt->sax->error(ctxt->userData,
        !          4541:                                            "Fragment not allowed: %s\n", URI);
        !          4542:                            ctxt->wellFormed = 0;
        !          4543:                            ctxt->errNo = XML_ERR_URI_FRAGMENT;
        !          4544:                        }
        !          4545:                        xmlFreeURI(uri);
        !          4546:                    }
        !          4547:                }
        !          4548:                if ((RAW != '>') && (!IS_BLANK(CUR))) {
        !          4549:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4550:                        ctxt->sax->error(ctxt->userData,
        !          4551:                            "Space required before content model\n");
        !          4552:                    ctxt->errNo = XML_ERR_SPACE_REQUIRED;
        !          4553:                    ctxt->wellFormed = 0;
        !          4554:                    ctxt->disableSAX = 1;
        !          4555:                }
        !          4556:                SKIP_BLANKS;
        !          4557: 
        !          4558:                /*
        !          4559:                 * SGML specific: here we can get the content model
        !          4560:                 */
        !          4561:                if (RAW != '>') {
        !          4562:                    xmlChar *contmod;
        !          4563: 
        !          4564:                    contmod = xmlParseName(ctxt);
        !          4565: 
        !          4566:                    if (contmod == NULL) {
        !          4567:                        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4568:                            ctxt->sax->error(ctxt->userData,
        !          4569:                                "Could not parse entity content model\n");
        !          4570:                        ctxt->errNo = XML_ERR_SPACE_REQUIRED;
        !          4571:                        ctxt->wellFormed = 0;
        !          4572:                        ctxt->disableSAX = 1;
        !          4573:                    } else {
        !          4574:                        if (!xmlStrcmp(contmod, BAD_CAST"NDATA")) {
        !          4575:                            if (!IS_BLANK(CUR)) {
        !          4576:                                if ((ctxt->sax != NULL) &&
        !          4577:                                    (ctxt->sax->error != NULL))
        !          4578:                                    ctxt->sax->error(ctxt->userData,
        !          4579:                                        "Space required after 'NDATA'\n");
        !          4580:                                ctxt->errNo = XML_ERR_SPACE_REQUIRED;
        !          4581:                                ctxt->wellFormed = 0;
        !          4582:                                ctxt->disableSAX = 1;
        !          4583:                            }
        !          4584:                            SKIP_BLANKS;
        !          4585:                            ndata = xmlParseName(ctxt);
        !          4586:                            if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
        !          4587:                                (ctxt->sax->unparsedEntityDecl != NULL)) {
        !          4588:                                ctxt->sax->unparsedEntityDecl(ctxt->userData,
        !          4589:                                        name, literal, URI, ndata);
        !          4590:                            }
        !          4591:                        } else if (!xmlStrcmp(contmod, BAD_CAST"SUBDOC")) {
        !          4592:                            if ((ctxt->sax != NULL) &&
        !          4593:                                (ctxt->sax->warning != NULL))
        !          4594:                                ctxt->sax->warning(ctxt->userData,
        !          4595:                                    "SUBDOC entities are not supported\n");
        !          4596:                            SKIP_BLANKS;
        !          4597:                            ndata = xmlParseName(ctxt);
        !          4598:                            if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
        !          4599:                                (ctxt->sax->unparsedEntityDecl != NULL)) {
        !          4600:                                ctxt->sax->unparsedEntityDecl(ctxt->userData,
        !          4601:                                        name, literal, URI, ndata);
        !          4602:                            }
        !          4603:                        } else if (!xmlStrcmp(contmod, BAD_CAST"CDATA")) {
        !          4604:                            if ((ctxt->sax != NULL) &&
        !          4605:                                (ctxt->sax->warning != NULL))
        !          4606:                                ctxt->sax->warning(ctxt->userData,
        !          4607:                                    "CDATA entities are not supported\n");
        !          4608:                            SKIP_BLANKS;
        !          4609:                            ndata = xmlParseName(ctxt);
        !          4610:                            if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
        !          4611:                                (ctxt->sax->unparsedEntityDecl != NULL)) {
        !          4612:                                ctxt->sax->unparsedEntityDecl(ctxt->userData,
        !          4613:                                        name, literal, URI, ndata);
        !          4614:                            }
        !          4615:                        }
        !          4616:                        xmlFree(contmod);
        !          4617:                    }
        !          4618:                } else {
        !          4619:                    if ((ctxt->sax != NULL) &&
        !          4620:                        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
        !          4621:                        ctxt->sax->entityDecl(ctxt->userData, name,
        !          4622:                                    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
        !          4623:                                    literal, URI, NULL);
        !          4624:                }
        !          4625:            }
        !          4626:        }
        !          4627:        SKIP_BLANKS;
        !          4628:        if (RAW != '>') {
        !          4629:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4630:                ctxt->sax->error(ctxt->userData, 
        !          4631:                    "sgmlParseEntityDecl: entity %s not terminated\n", name);
        !          4632:            ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
        !          4633:            ctxt->wellFormed = 0;
        !          4634:            ctxt->disableSAX = 1;
        !          4635:        } else {
        !          4636:            if (input != ctxt->input) {
        !          4637:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4638:                    ctxt->sax->error(ctxt->userData, 
        !          4639: "Entity declaration doesn't start and stop in the same entity\n");
        !          4640:                ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
        !          4641:                ctxt->wellFormed = 0;
        !          4642:                ctxt->disableSAX = 1;
        !          4643:            }
        !          4644:            NEXT;
        !          4645:        }
        !          4646:        if (orig != NULL) {
        !          4647:            /*
        !          4648:             * Ugly mechanism to save the raw entity value.
        !          4649:             */
        !          4650:            xmlEntityPtr cur = NULL;
        !          4651: 
        !          4652:            if (isParameter) {
        !          4653:                if ((ctxt->sax != NULL) &&
        !          4654:                    (ctxt->sax->getParameterEntity != NULL))
        !          4655:                    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
        !          4656:            } else {
        !          4657:                if ((ctxt->sax != NULL) &&
        !          4658:                    (ctxt->sax->getEntity != NULL))
        !          4659:                    cur = ctxt->sax->getEntity(ctxt->userData, name);
        !          4660:            }
        !          4661:             if (cur != NULL) {
        !          4662:                if (cur->orig != NULL)
        !          4663:                    xmlFree(orig);
        !          4664:                else
        !          4665:                    cur->orig = orig;
        !          4666:            } else
        !          4667:                xmlFree(orig);
        !          4668:        }
        !          4669:        if (name != NULL) xmlFree(name);
        !          4670:        if (value != NULL) xmlFree(value);
        !          4671:        if (URI != NULL) xmlFree(URI);
        !          4672:        if (literal != NULL) xmlFree(literal);
        !          4673:        if (ndata != NULL) xmlFree(ndata);
        !          4674:     }
        !          4675: }
        !          4676: 
        !          4677: /**
        !          4678:  * sgmlParseMarkupDecl:
        !          4679:  * @ctxt:  an SGML parser context
        !          4680:  * 
        !          4681:  * parse Markup declarations
        !          4682:  *
        !          4683:  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
        !          4684:  *                     NotationDecl | PI | Comment
        !          4685:  */
        !          4686: void
        !          4687: sgmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
        !          4688:     GROW;
        !          4689:     xmlParseElementDecl(ctxt);
        !          4690:     xmlParseAttributeListDecl(ctxt);
        !          4691:     sgmlParseEntityDecl(ctxt);
        !          4692:     xmlParseNotationDecl(ctxt);
        !          4693:     xmlParsePI(ctxt);
        !          4694:     xmlParseComment(ctxt);
        !          4695:     /*
        !          4696:      * This is only for internal subset. On external entities,
        !          4697:      * the replacement is done before parsing stage
        !          4698:      */
        !          4699:     if ((ctxt->external == 0) && (ctxt->inputNr == 1))
        !          4700:        xmlParsePEReference(ctxt);
        !          4701:     ctxt->instate = XML_PARSER_DTD;
        !          4702: }
        !          4703: 
        !          4704: /**
        !          4705:  * sgmlParseInternalsubset:
        !          4706:  * @ctxt:  an SGML parser context
        !          4707:  *
        !          4708:  * parse the internal subset declaration
        !          4709:  *
        !          4710:  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
        !          4711:  */
        !          4712: 
        !          4713: void
        !          4714: sgmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
        !          4715:     /*
        !          4716:      * Is there any DTD definition ?
        !          4717:      */
        !          4718:     if (RAW == '[') {
        !          4719:         ctxt->instate = XML_PARSER_DTD;
        !          4720:         NEXT;
        !          4721:        /*
        !          4722:         * Parse the succession of Markup declarations and 
        !          4723:         * PEReferences.
        !          4724:         * Subsequence (markupdecl | PEReference | S)*
        !          4725:         */
        !          4726:        while (RAW != ']') {
        !          4727:            const xmlChar *check = CUR_PTR;
        !          4728:            int cons = ctxt->input->consumed;
        !          4729: 
        !          4730:            SKIP_BLANKS;
        !          4731:            sgmlParseMarkupDecl(ctxt);
        !          4732:            xmlParsePEReference(ctxt);
        !          4733: 
        !          4734:            /*
        !          4735:             * Pop-up of finished entities.
        !          4736:             */
        !          4737:            while ((RAW == 0) && (ctxt->inputNr > 1))
        !          4738:                xmlPopInput(ctxt);
        !          4739: 
        !          4740:            if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
        !          4741:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4742:                    ctxt->sax->error(ctxt->userData, 
        !          4743:             "sgmlParseInternalSubset: error detected in Markup declaration\n");
        !          4744:                ctxt->wellFormed = 0;
        !          4745:                ctxt->disableSAX = 1;
        !          4746:                ctxt->errNo = XML_ERR_INTERNAL_ERROR;
        !          4747:                break;
        !          4748:            }
        !          4749:        }
        !          4750:        if (RAW == ']') { 
        !          4751:            NEXT;
        !          4752:            SKIP_BLANKS;
        !          4753:        }
        !          4754:     }
        !          4755: 
        !          4756:     /*
        !          4757:      * We should be at the end of the DOCTYPE declaration.
        !          4758:      */
        !          4759:     if (RAW != '>') {
        !          4760:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
        !          4761:            ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
        !          4762:        ctxt->wellFormed = 0;
        !          4763:        ctxt->disableSAX = 1;
        !          4764:        ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
        !          4765:     }
        !          4766:     NEXT;
        !          4767: }
        !          4768: 
        !          4769: /**
1.2       veillard 4770:  * sgmlParseMisc:
                   4771:  * @ctxt:  an XML parser context
                   4772:  * 
                   4773:  * parse an XML Misc* optionnal field.
                   4774:  *
                   4775:  * [27] Misc ::= Comment | PI |  S
                   4776:  */
                   4777: 
                   4778: void
                   4779: sgmlParseMisc(xmlParserCtxtPtr ctxt) {
                   4780:     while (((RAW == '<') && (NXT(1) == '?')) ||
                   4781:            ((RAW == '<') && (NXT(1) == '!') &&
                   4782:            (NXT(2) == '-') && (NXT(3) == '-')) ||
                   4783:            IS_BLANK(CUR)) {
                   4784:         if ((RAW == '<') && (NXT(1) == '?')) {
                   4785:            xmlParsePI(ctxt); /* TODO: SGML PIs differs */
                   4786:        } else if (IS_BLANK(CUR)) {
                   4787:            NEXT;
                   4788:        } else
                   4789:            xmlParseComment(ctxt);
                   4790:     }
                   4791: }
                   4792: 
                   4793: /**
1.1       veillard 4794:  * sgmlParseDocument :
                   4795:  * @ctxt:  an SGML parser context
                   4796:  * 
                   4797:  * parse an SGML document (and build a tree if using the standard SAX
                   4798:  * interface).
                   4799:  *
                   4800:  * Returns 0, -1 in case of error. the parser context is augmented
                   4801:  *                as a result of the parsing.
                   4802:  */
                   4803: 
                   4804: int
                   4805: sgmlParseDocument(sgmlParserCtxtPtr ctxt) {
1.2       veillard 4806:     xmlChar start[4];
                   4807:     xmlCharEncoding enc;
1.1       veillard 4808:     xmlDtdPtr dtd;
                   4809: 
                   4810:     sgmlDefaultSAXHandlerInit();
                   4811:     ctxt->html = 2;
                   4812: 
                   4813:     GROW;
                   4814:     /*
                   4815:      * SAX: beginning of the document processing.
                   4816:      */
                   4817:     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
                   4818:         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
                   4819: 
1.2       veillard 4820:     /* 
                   4821:      * Get the 4 first bytes and decode the charset
                   4822:      * if enc != XML_CHAR_ENCODING_NONE
                   4823:      * plug some encoding conversion routines.
                   4824:      */
                   4825:     start[0] = RAW;
                   4826:     start[1] = NXT(1);
                   4827:     start[2] = NXT(2);
                   4828:     start[3] = NXT(3);
                   4829:     enc = xmlDetectCharEncoding(start, 4);
                   4830:     if (enc != XML_CHAR_ENCODING_NONE) {
                   4831:         xmlSwitchEncoding(ctxt, enc);
                   4832:     }
                   4833: 
1.1       veillard 4834:     /*
                   4835:      * Wipe out everything which is before the first '<'
                   4836:      */
                   4837:     SKIP_BLANKS;
                   4838:     if (CUR == 0) {
                   4839:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4840:            ctxt->sax->error(ctxt->userData, "Document is empty\n");
                   4841:        ctxt->wellFormed = 0;
                   4842:     }
                   4843: 
                   4844:     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
                   4845:        ctxt->sax->startDocument(ctxt->userData);
                   4846: 
                   4847: 
                   4848:     /*
1.2       veillard 4849:      * The Misc part of the Prolog
1.1       veillard 4850:      */
1.2       veillard 4851:     GROW;
                   4852:     sgmlParseMisc(ctxt);
1.1       veillard 4853: 
                   4854:     /*
                   4855:      * Then possibly doc type declaration(s) and more Misc
                   4856:      * (doctypedecl Misc*)?
                   4857:      */
1.2       veillard 4858:     GROW;
                   4859:     if ((RAW == '<') && (NXT(1) == '!') &&
                   4860:        (NXT(2) == 'D') && (NXT(3) == 'O') &&
                   4861:        (NXT(4) == 'C') && (NXT(5) == 'T') &&
                   4862:        (NXT(6) == 'Y') && (NXT(7) == 'P') &&
                   4863:        (NXT(8) == 'E')) {
                   4864: 
                   4865:        ctxt->inSubset = 1;
1.1       veillard 4866:        sgmlParseDocTypeDecl(ctxt);
1.2       veillard 4867:        if (RAW == '[') {
                   4868:            ctxt->instate = XML_PARSER_DTD;
1.3     ! veillard 4869:            sgmlParseInternalSubset(ctxt);
1.2       veillard 4870:        }
                   4871: 
                   4872:        /*
                   4873:         * Create and update the external subset.
                   4874:         */
                   4875:        ctxt->inSubset = 2;
                   4876:        if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
                   4877:            (!ctxt->disableSAX))
                   4878:            ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
                   4879:                                      ctxt->extSubSystem, ctxt->extSubURI);
                   4880:        ctxt->inSubset = 0;
                   4881: 
                   4882: 
                   4883:        ctxt->instate = XML_PARSER_PROLOG;
                   4884:        sgmlParseMisc(ctxt);
1.1       veillard 4885:     }
                   4886: 
                   4887:     /*
                   4888:      * Time to start parsing the tree itself
                   4889:      */
                   4890:     sgmlParseContent(ctxt);
                   4891: 
                   4892:     /*
                   4893:      * autoclose
                   4894:      */
                   4895:     if (CUR == 0)
                   4896:        sgmlAutoClose(ctxt, NULL);
                   4897: 
                   4898: 
                   4899:     /*
                   4900:      * SAX: end of the document processing.
                   4901:      */
                   4902:     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   4903:         ctxt->sax->endDocument(ctxt->userData);
                   4904: 
                   4905:     if (ctxt->myDoc != NULL) {
                   4906:        dtd = xmlGetIntSubset(ctxt->myDoc);
                   4907:        if (dtd == NULL)
                   4908:            ctxt->myDoc->intSubset = 
                   4909:                xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML", 
                   4910:                    BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
                   4911:                    BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
                   4912:     }
                   4913:     if (! ctxt->wellFormed) return(-1);
                   4914:     return(0);
                   4915: }
                   4916: 
                   4917: 
                   4918: /************************************************************************
                   4919:  *                                                                     *
                   4920:  *                     Parser contexts handling                        *
                   4921:  *                                                                     *
                   4922:  ************************************************************************/
                   4923: 
                   4924: /**
                   4925:  * xmlInitParserCtxt:
                   4926:  * @ctxt:  an SGML parser context
                   4927:  *
                   4928:  * Initialize a parser context
                   4929:  */
                   4930: 
                   4931: void
                   4932: sgmlInitParserCtxt(sgmlParserCtxtPtr ctxt)
                   4933: {
                   4934:     sgmlSAXHandler *sax;
                   4935: 
                   4936:     if (ctxt == NULL) return;
                   4937:     memset(ctxt, 0, sizeof(sgmlParserCtxt));
                   4938: 
                   4939:     sax = (sgmlSAXHandler *) xmlMalloc(sizeof(sgmlSAXHandler));
                   4940:     if (sax == NULL) {
                   4941:         fprintf(stderr, "sgmlInitParserCtxt: out of memory\n");
                   4942:     }
                   4943:     memset(sax, 0, sizeof(sgmlSAXHandler));
                   4944: 
                   4945:     /* Allocate the Input stack */
                   4946:     ctxt->inputTab = (sgmlParserInputPtr *) 
                   4947:                       xmlMalloc(5 * sizeof(sgmlParserInputPtr));
                   4948:     if (ctxt->inputTab == NULL) {
                   4949:         fprintf(stderr, "sgmlInitParserCtxt: out of memory\n");
                   4950:     }
                   4951:     ctxt->inputNr = 0;
                   4952:     ctxt->inputMax = 5;
                   4953:     ctxt->input = NULL;
                   4954:     ctxt->version = NULL;
                   4955:     ctxt->encoding = NULL;
                   4956:     ctxt->standalone = -1;
                   4957:     ctxt->instate = XML_PARSER_START;
                   4958: 
                   4959:     /* Allocate the Node stack */
                   4960:     ctxt->nodeTab = (sgmlNodePtr *) xmlMalloc(10 * sizeof(sgmlNodePtr));
                   4961:     ctxt->nodeNr = 0;
                   4962:     ctxt->nodeMax = 10;
                   4963:     ctxt->node = NULL;
                   4964: 
                   4965:     /* Allocate the Name stack */
                   4966:     ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
                   4967:     ctxt->nameNr = 0;
                   4968:     ctxt->nameMax = 10;
                   4969:     ctxt->name = NULL;
                   4970: 
                   4971:     if (sax == NULL) ctxt->sax = &sgmlDefaultSAXHandler;
                   4972:     else {
                   4973:         ctxt->sax = sax;
                   4974:        memcpy(sax, &sgmlDefaultSAXHandler, sizeof(sgmlSAXHandler));
                   4975:     }
                   4976:     ctxt->userData = ctxt;
                   4977:     ctxt->myDoc = NULL;
                   4978:     ctxt->wellFormed = 1;
                   4979:     ctxt->replaceEntities = 0;
                   4980:     ctxt->html = 2;
                   4981:     ctxt->record_info = 0;
                   4982:     ctxt->validate = 0;
                   4983:     ctxt->nbChars = 0;
                   4984:     ctxt->checkIndex = 0;
                   4985:     xmlInitNodeInfoSeq(&ctxt->node_seq);
                   4986: }
                   4987: 
                   4988: /**
                   4989:  * sgmlFreeParserCtxt:
                   4990:  * @ctxt:  an SGML parser context
                   4991:  *
                   4992:  * Free all the memory used by a parser context. However the parsed
                   4993:  * document in ctxt->myDoc is not freed.
                   4994:  */
                   4995: 
                   4996: void
                   4997: sgmlFreeParserCtxt(sgmlParserCtxtPtr ctxt)
                   4998: {
                   4999:     xmlFreeParserCtxt(ctxt);
                   5000: }
                   5001: 
                   5002: /**
                   5003:  * sgmlCreateDocParserCtxt :
                   5004:  * @cur:  a pointer to an array of xmlChar
                   5005:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5006:  *
                   5007:  * Create a parser context for an SGML document.
                   5008:  *
                   5009:  * Returns the new parser context or NULL
                   5010:  */
                   5011: sgmlParserCtxtPtr
                   5012: sgmlCreateDocParserCtxt(xmlChar *cur, const char *encoding) {
                   5013:     sgmlParserCtxtPtr ctxt;
                   5014:     sgmlParserInputPtr input;
                   5015:     /* sgmlCharEncoding enc; */
                   5016: 
                   5017:     ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
                   5018:     if (ctxt == NULL) {
                   5019:         perror("malloc");
                   5020:        return(NULL);
                   5021:     }
                   5022:     sgmlInitParserCtxt(ctxt);
                   5023:     input = (sgmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
                   5024:     if (input == NULL) {
                   5025:         perror("malloc");
                   5026:        xmlFree(ctxt);
                   5027:        return(NULL);
                   5028:     }
                   5029:     memset(input, 0, sizeof(sgmlParserInput));
                   5030: 
                   5031:     input->line = 1;
                   5032:     input->col = 1;
                   5033:     input->base = cur;
                   5034:     input->cur = cur;
                   5035: 
                   5036:     inputPush(ctxt, input);
                   5037:     return(ctxt);
                   5038: }
                   5039: 
                   5040: /************************************************************************
                   5041:  *                                                                     *
                   5042:  *             Progressive parsing interfaces                          *
                   5043:  *                                                                     *
                   5044:  ************************************************************************/
                   5045: 
                   5046: /**
                   5047:  * sgmlParseLookupSequence:
                   5048:  * @ctxt:  an SGML parser context
                   5049:  * @first:  the first char to lookup
                   5050:  * @next:  the next char to lookup or zero
                   5051:  * @third:  the next char to lookup or zero
                   5052:  *
                   5053:  * Try to find if a sequence (first, next, third) or  just (first next) or
                   5054:  * (first) is available in the input stream.
                   5055:  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
                   5056:  * to avoid rescanning sequences of bytes, it DOES change the state of the
                   5057:  * parser, do not use liberally.
                   5058:  * This is basically similar to xmlParseLookupSequence()
                   5059:  *
                   5060:  * Returns the index to the current parsing point if the full sequence
                   5061:  *      is available, -1 otherwise.
                   5062:  */
                   5063: int
                   5064: sgmlParseLookupSequence(sgmlParserCtxtPtr ctxt, xmlChar first,
                   5065:                        xmlChar next, xmlChar third) {
                   5066:     int base, len;
                   5067:     sgmlParserInputPtr in;
                   5068:     const xmlChar *buf;
                   5069: 
                   5070:     in = ctxt->input;
                   5071:     if (in == NULL) return(-1);
                   5072:     base = in->cur - in->base;
                   5073:     if (base < 0) return(-1);
                   5074:     if (ctxt->checkIndex > base)
                   5075:         base = ctxt->checkIndex;
                   5076:     if (in->buf == NULL) {
                   5077:        buf = in->base;
                   5078:        len = in->length;
                   5079:     } else {
                   5080:        buf = in->buf->buffer->content;
                   5081:        len = in->buf->buffer->use;
                   5082:     }
                   5083:     /* take into account the sequence length */
                   5084:     if (third) len -= 2;
                   5085:     else if (next) len --;
                   5086:     for (;base < len;base++) {
                   5087:         if (buf[base] == first) {
                   5088:            if (third != 0) {
                   5089:                if ((buf[base + 1] != next) ||
                   5090:                    (buf[base + 2] != third)) continue;
                   5091:            } else if (next != 0) {
                   5092:                if (buf[base + 1] != next) continue;
                   5093:            }
                   5094:            ctxt->checkIndex = 0;
                   5095: #ifdef DEBUG_PUSH
                   5096:            if (next == 0)
                   5097:                fprintf(stderr, "HPP: lookup '%c' found at %d\n",
                   5098:                        first, base);
                   5099:            else if (third == 0)
                   5100:                fprintf(stderr, "HPP: lookup '%c%c' found at %d\n",
                   5101:                        first, next, base);
                   5102:            else 
                   5103:                fprintf(stderr, "HPP: lookup '%c%c%c' found at %d\n",
                   5104:                        first, next, third, base);
                   5105: #endif
                   5106:            return(base - (in->cur - in->base));
                   5107:        }
                   5108:     }
                   5109:     ctxt->checkIndex = base;
                   5110: #ifdef DEBUG_PUSH
                   5111:     if (next == 0)
                   5112:        fprintf(stderr, "HPP: lookup '%c' failed\n", first);
                   5113:     else if (third == 0)
                   5114:        fprintf(stderr, "HPP: lookup '%c%c' failed\n", first, next);
                   5115:     else       
                   5116:        fprintf(stderr, "HPP: lookup '%c%c%c' failed\n", first, next, third);
                   5117: #endif
                   5118:     return(-1);
                   5119: }
                   5120: 
                   5121: /**
                   5122:  * sgmlParseTryOrFinish:
                   5123:  * @ctxt:  an SGML parser context
                   5124:  * @terminate:  last chunk indicator
                   5125:  *
                   5126:  * Try to progress on parsing
                   5127:  *
                   5128:  * Returns zero if no parsing was possible
                   5129:  */
                   5130: int
                   5131: sgmlParseTryOrFinish(sgmlParserCtxtPtr ctxt, int terminate) {
                   5132:     int ret = 0;
                   5133:     sgmlParserInputPtr in;
                   5134:     int avail = 0;
                   5135:     xmlChar cur, next;
                   5136: 
                   5137: #ifdef DEBUG_PUSH
                   5138:     switch (ctxt->instate) {
                   5139:        case XML_PARSER_EOF:
                   5140:            fprintf(stderr, "HPP: try EOF\n"); break;
                   5141:        case XML_PARSER_START:
                   5142:            fprintf(stderr, "HPP: try START\n"); break;
                   5143:        case XML_PARSER_MISC:
                   5144:            fprintf(stderr, "HPP: try MISC\n");break;
                   5145:        case XML_PARSER_COMMENT:
                   5146:            fprintf(stderr, "HPP: try COMMENT\n");break;
                   5147:        case XML_PARSER_PROLOG:
                   5148:            fprintf(stderr, "HPP: try PROLOG\n");break;
                   5149:        case XML_PARSER_START_TAG:
                   5150:            fprintf(stderr, "HPP: try START_TAG\n");break;
                   5151:        case XML_PARSER_CONTENT:
                   5152:            fprintf(stderr, "HPP: try CONTENT\n");break;
                   5153:        case XML_PARSER_CDATA_SECTION:
                   5154:            fprintf(stderr, "HPP: try CDATA_SECTION\n");break;
                   5155:        case XML_PARSER_END_TAG:
                   5156:            fprintf(stderr, "HPP: try END_TAG\n");break;
                   5157:        case XML_PARSER_ENTITY_DECL:
                   5158:            fprintf(stderr, "HPP: try ENTITY_DECL\n");break;
                   5159:        case XML_PARSER_ENTITY_VALUE:
                   5160:            fprintf(stderr, "HPP: try ENTITY_VALUE\n");break;
                   5161:        case XML_PARSER_ATTRIBUTE_VALUE:
                   5162:            fprintf(stderr, "HPP: try ATTRIBUTE_VALUE\n");break;
                   5163:        case XML_PARSER_DTD:
                   5164:            fprintf(stderr, "HPP: try DTD\n");break;
                   5165:        case XML_PARSER_EPILOG:
                   5166:            fprintf(stderr, "HPP: try EPILOG\n");break;
                   5167:        case XML_PARSER_PI:
                   5168:            fprintf(stderr, "HPP: try PI\n");break;
                   5169:     }
                   5170: #endif
                   5171: 
                   5172:     while (1) {
                   5173: 
                   5174:        in = ctxt->input;
                   5175:        if (in == NULL) break;
                   5176:        if (in->buf == NULL)
                   5177:            avail = in->length - (in->cur - in->base);
                   5178:        else
                   5179:            avail = in->buf->buffer->use - (in->cur - in->base);
                   5180:        if ((avail == 0) && (terminate)) {
                   5181:            sgmlAutoClose(ctxt, NULL);
                   5182:            if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { 
                   5183:                /*
                   5184:                 * SAX: end of the document processing.
                   5185:                 */
                   5186:                ctxt->instate = XML_PARSER_EOF;
                   5187:                if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5188:                    ctxt->sax->endDocument(ctxt->userData);
                   5189:            }
                   5190:        }
                   5191:         if (avail < 1)
                   5192:            goto done;
                   5193:         switch (ctxt->instate) {
                   5194:             case XML_PARSER_EOF:
                   5195:                /*
                   5196:                 * Document parsing is done !
                   5197:                 */
                   5198:                goto done;
                   5199:             case XML_PARSER_START:
                   5200:                /*
                   5201:                 * Very first chars read from the document flow.
                   5202:                 */
                   5203:                cur = in->cur[0];
                   5204:                if (IS_BLANK(cur)) {
                   5205:                    SKIP_BLANKS;
                   5206:                    if (in->buf == NULL)
                   5207:                        avail = in->length - (in->cur - in->base);
                   5208:                    else
                   5209:                        avail = in->buf->buffer->use - (in->cur - in->base);
                   5210:                }
                   5211:                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
                   5212:                    ctxt->sax->setDocumentLocator(ctxt->userData,
                   5213:                                                  &xmlDefaultSAXLocator);
                   5214:                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
                   5215:                    (!ctxt->disableSAX))
                   5216:                    ctxt->sax->startDocument(ctxt->userData);
                   5217: 
                   5218:                cur = in->cur[0];
                   5219:                next = in->cur[1];
                   5220:                if ((cur == '<') && (next == '!') &&
                   5221:                    (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   5222:                    (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   5223:                    (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   5224:                    (UPP(8) == 'E')) {
                   5225:                    if ((!terminate) &&
                   5226:                        (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5227:                        goto done;
                   5228: #ifdef DEBUG_PUSH
                   5229:                    fprintf(stderr, "HPP: Parsing internal subset\n");
                   5230: #endif
                   5231:                    sgmlParseDocTypeDecl(ctxt);
                   5232:                    ctxt->instate = XML_PARSER_PROLOG;
                   5233: #ifdef DEBUG_PUSH
                   5234:                    fprintf(stderr, "HPP: entering PROLOG\n");
                   5235: #endif
                   5236:                 } else {
                   5237:                    ctxt->instate = XML_PARSER_MISC;
                   5238:                }
                   5239: #ifdef DEBUG_PUSH
                   5240:                fprintf(stderr, "HPP: entering MISC\n");
                   5241: #endif
                   5242:                break;
                   5243:             case XML_PARSER_MISC:
                   5244:                SKIP_BLANKS;
                   5245:                if (in->buf == NULL)
                   5246:                    avail = in->length - (in->cur - in->base);
                   5247:                else
                   5248:                    avail = in->buf->buffer->use - (in->cur - in->base);
                   5249:                if (avail < 2)
                   5250:                    goto done;
                   5251:                cur = in->cur[0];
                   5252:                next = in->cur[1];
                   5253:                if ((cur == '<') && (next == '!') &&
                   5254:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5255:                    if ((!terminate) &&
                   5256:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5257:                        goto done;
                   5258: #ifdef DEBUG_PUSH
                   5259:                    fprintf(stderr, "HPP: Parsing Comment\n");
                   5260: #endif
                   5261:                    sgmlParseComment(ctxt);
                   5262:                    ctxt->instate = XML_PARSER_MISC;
                   5263:                } else if ((cur == '<') && (next == '!') &&
                   5264:                    (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   5265:                    (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   5266:                    (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   5267:                    (UPP(8) == 'E')) {
                   5268:                    if ((!terminate) &&
                   5269:                        (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5270:                        goto done;
                   5271: #ifdef DEBUG_PUSH
                   5272:                    fprintf(stderr, "HPP: Parsing internal subset\n");
                   5273: #endif
                   5274:                    sgmlParseDocTypeDecl(ctxt);
                   5275:                    ctxt->instate = XML_PARSER_PROLOG;
                   5276: #ifdef DEBUG_PUSH
                   5277:                    fprintf(stderr, "HPP: entering PROLOG\n");
                   5278: #endif
                   5279:                } else if ((cur == '<') && (next == '!') &&
                   5280:                           (avail < 9)) {
                   5281:                    goto done;
                   5282:                } else {
                   5283:                    ctxt->instate = XML_PARSER_START_TAG;
                   5284: #ifdef DEBUG_PUSH
                   5285:                    fprintf(stderr, "HPP: entering START_TAG\n");
                   5286: #endif
                   5287:                }
                   5288:                break;
                   5289:             case XML_PARSER_PROLOG:
                   5290:                SKIP_BLANKS;
                   5291:                if (in->buf == NULL)
                   5292:                    avail = in->length - (in->cur - in->base);
                   5293:                else
                   5294:                    avail = in->buf->buffer->use - (in->cur - in->base);
                   5295:                if (avail < 2) 
                   5296:                    goto done;
                   5297:                cur = in->cur[0];
                   5298:                next = in->cur[1];
                   5299:                if ((cur == '<') && (next == '!') &&
                   5300:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5301:                    if ((!terminate) &&
                   5302:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5303:                        goto done;
                   5304: #ifdef DEBUG_PUSH
                   5305:                    fprintf(stderr, "HPP: Parsing Comment\n");
                   5306: #endif
                   5307:                    sgmlParseComment(ctxt);
                   5308:                    ctxt->instate = XML_PARSER_PROLOG;
                   5309:                } else if ((cur == '<') && (next == '!') &&
                   5310:                           (avail < 4)) {
                   5311:                    goto done;
                   5312:                } else {
                   5313:                    ctxt->instate = XML_PARSER_START_TAG;
                   5314: #ifdef DEBUG_PUSH
                   5315:                    fprintf(stderr, "HPP: entering START_TAG\n");
                   5316: #endif
                   5317:                }
                   5318:                break;
                   5319:             case XML_PARSER_EPILOG:
                   5320:                if (in->buf == NULL)
                   5321:                    avail = in->length - (in->cur - in->base);
                   5322:                else
                   5323:                    avail = in->buf->buffer->use - (in->cur - in->base);
                   5324:                if (avail < 1)
                   5325:                    goto done;
                   5326:                cur = in->cur[0];
                   5327:                if (IS_BLANK(cur)) {
                   5328:                    sgmlParseCharData(ctxt, 0);
                   5329:                    goto done;
                   5330:                }
                   5331:                if (avail < 2)
                   5332:                    goto done;
                   5333:                next = in->cur[1];
                   5334:                if ((cur == '<') && (next == '!') &&
                   5335:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5336:                    if ((!terminate) &&
                   5337:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5338:                        goto done;
                   5339: #ifdef DEBUG_PUSH
                   5340:                    fprintf(stderr, "HPP: Parsing Comment\n");
                   5341: #endif
                   5342:                    sgmlParseComment(ctxt);
                   5343:                    ctxt->instate = XML_PARSER_EPILOG;
                   5344:                } else if ((cur == '<') && (next == '!') &&
                   5345:                           (avail < 4)) {
                   5346:                    goto done;
                   5347:                } else {
                   5348:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5349:                        ctxt->sax->error(ctxt->userData,
                   5350:                            "Extra content at the end of the document\n");
                   5351:                    ctxt->wellFormed = 0;
                   5352:                    ctxt->errNo = XML_ERR_DOCUMENT_END;
                   5353:                    ctxt->instate = XML_PARSER_EOF;
                   5354: #ifdef DEBUG_PUSH
                   5355:                    fprintf(stderr, "HPP: entering EOF\n");
                   5356: #endif
                   5357:                    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5358:                        ctxt->sax->endDocument(ctxt->userData);
                   5359:                    goto done;
                   5360:                }
                   5361:                break;
                   5362:             case XML_PARSER_START_TAG: {
                   5363:                xmlChar *name, *oldname;
                   5364:                int depth = ctxt->nameNr;
                   5365:                sgmlElemDescPtr info;
                   5366: 
                   5367:                if (avail < 2)
                   5368:                    goto done;
                   5369:                cur = in->cur[0];
                   5370:                if (cur != '<') {
                   5371:                    ctxt->instate = XML_PARSER_CONTENT;
                   5372: #ifdef DEBUG_PUSH
                   5373:                    fprintf(stderr, "HPP: entering CONTENT\n");
                   5374: #endif
                   5375:                    break;
                   5376:                }
                   5377:                if ((!terminate) &&
                   5378:                    (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5379:                    goto done;
                   5380: 
                   5381:                oldname = xmlStrdup(ctxt->name);
                   5382:                sgmlParseStartTag(ctxt);
                   5383:                name = ctxt->name;
                   5384: #ifdef DEBUG
                   5385:                if (oldname == NULL)
                   5386:                    fprintf(stderr, "Start of element %s\n", name);
                   5387:                else if (name == NULL)  
                   5388:                    fprintf(stderr, "Start of element failed, was %s\n",
                   5389:                            oldname);
                   5390:                else    
                   5391:                    fprintf(stderr, "Start of element %s, was %s\n",
                   5392:                            name, oldname);
                   5393: #endif
                   5394:                if (((depth == ctxt->nameNr) &&
                   5395:                     (!xmlStrcmp(oldname, ctxt->name))) ||
                   5396:                    (name == NULL)) {
                   5397:                    if (CUR == '>')
                   5398:                        NEXT;
                   5399:                    if (oldname != NULL)
                   5400:                        xmlFree(oldname);
                   5401:                    break;
                   5402:                }
                   5403:                if (oldname != NULL)
                   5404:                    xmlFree(oldname);
                   5405: 
                   5406:                /*
                   5407:                 * Lookup the info for that element.
                   5408:                 */
                   5409:                info = sgmlTagLookup(name);
                   5410:                if (info == NULL) {
                   5411:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5412:                        ctxt->sax->error(ctxt->userData, "Tag %s invalid\n",
                   5413:                                         name);
                   5414:                    ctxt->wellFormed = 0;
                   5415:                } else if (info->depr) {
                   5416:                    /***************************
                   5417:                    if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
                   5418:                        ctxt->sax->warning(ctxt->userData,
                   5419:                                           "Tag %s is deprecated\n",
                   5420:                                           name);
                   5421:                     ***************************/
                   5422:                }
                   5423: 
                   5424:                /*
                   5425:                 * Check for an Empty Element labelled the XML/SGML way
                   5426:                 */
                   5427:                if ((CUR == '/') && (NXT(1) == '>')) {
                   5428:                    SKIP(2);
                   5429:                    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   5430:                        ctxt->sax->endElement(ctxt->userData, name);
                   5431:                    oldname = sgmlnamePop(ctxt);
                   5432: #ifdef DEBUG
                   5433:                    fprintf(stderr,"End of tag the XML way: popping out %s\n",
                   5434:                            oldname);
                   5435: #endif
                   5436:                    if (oldname != NULL)
                   5437:                        xmlFree(oldname);
                   5438:                    ctxt->instate = XML_PARSER_CONTENT;
                   5439: #ifdef DEBUG_PUSH
                   5440:                    fprintf(stderr, "HPP: entering CONTENT\n");
                   5441: #endif
                   5442:                    break;
                   5443:                }
                   5444: 
                   5445:                if (CUR == '>') {
                   5446:                    NEXT;
                   5447:                } else {
                   5448:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5449:                        ctxt->sax->error(ctxt->userData, 
                   5450:                                         "Couldn't find end of Start Tag %s\n",
                   5451:                                         name);
                   5452:                    ctxt->wellFormed = 0;
                   5453: 
                   5454:                    /*
                   5455:                     * end of parsing of this node.
                   5456:                     */
                   5457:                    if (!xmlStrcmp(name, ctxt->name)) { 
                   5458:                        nodePop(ctxt);
                   5459:                        oldname = sgmlnamePop(ctxt);
                   5460: #ifdef DEBUG
                   5461:                        fprintf(stderr,
                   5462:                         "End of start tag problem: popping out %s\n", oldname);
                   5463: #endif
                   5464:                        if (oldname != NULL)
                   5465:                            xmlFree(oldname);
                   5466:                    }    
                   5467: 
                   5468:                    ctxt->instate = XML_PARSER_CONTENT;
                   5469: #ifdef DEBUG_PUSH
                   5470:                    fprintf(stderr, "HPP: entering CONTENT\n");
                   5471: #endif
                   5472:                    break;
                   5473:                }
                   5474: 
                   5475:                /*
                   5476:                 * Check for an Empty Element from DTD definition
                   5477:                 */
                   5478:                if ((info != NULL) && (info->empty)) {
                   5479:                    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   5480:                        ctxt->sax->endElement(ctxt->userData, name);
                   5481:                    oldname = sgmlnamePop(ctxt);
                   5482: #ifdef DEBUG
                   5483:                    fprintf(stderr,"End of empty tag %s : popping out %s\n", name, oldname);
                   5484: #endif
                   5485:                    if (oldname != NULL)
                   5486:                        xmlFree(oldname);
                   5487:                }
                   5488:                ctxt->instate = XML_PARSER_CONTENT;
                   5489: #ifdef DEBUG_PUSH
                   5490:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5491: #endif
                   5492:                 break;
                   5493:            }
                   5494:             case XML_PARSER_CONTENT: {
                   5495:                long cons;
                   5496:                 /*
                   5497:                 * Handle preparsed entities and charRef
                   5498:                 */
                   5499:                if (ctxt->token != 0) {
                   5500:                    xmlChar chr[2] = { 0 , 0 } ;
                   5501: 
                   5502:                    chr[0] = (xmlChar) ctxt->token;
                   5503:                    sgmlCheckParagraph(ctxt);
                   5504:                    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   5505:                        ctxt->sax->characters(ctxt->userData, chr, 1);
                   5506:                    ctxt->token = 0;
                   5507:                    ctxt->checkIndex = 0;
                   5508:                }
                   5509:                if ((avail == 1) && (terminate)) {
                   5510:                    cur = in->cur[0];
                   5511:                    if ((cur != '<') && (cur != '&')) {
                   5512:                        if (ctxt->sax != NULL) {
                   5513:                            if (IS_BLANK(cur)) {
                   5514:                                if (ctxt->sax->ignorableWhitespace != NULL)
                   5515:                                    ctxt->sax->ignorableWhitespace(
                   5516:                                            ctxt->userData, &cur, 1);
                   5517:                            } else {
                   5518:                                sgmlCheckParagraph(ctxt);
                   5519:                                if (ctxt->sax->characters != NULL)
                   5520:                                    ctxt->sax->characters(
                   5521:                                            ctxt->userData, &cur, 1);
                   5522:                            }
                   5523:                        }
                   5524:                        ctxt->token = 0;
                   5525:                        ctxt->checkIndex = 0;
                   5526:                        NEXT;
                   5527:                    }
                   5528:                    break;
                   5529:                }
                   5530:                if (avail < 2)
                   5531:                    goto done;
                   5532:                cur = in->cur[0];
                   5533:                next = in->cur[1];
                   5534:                cons = ctxt->nbChars;
                   5535:                /*
                   5536:                 * Sometimes DOCTYPE arrives in the middle of the document
                   5537:                 */
                   5538:                if ((cur == '<') && (next == '!') &&
                   5539:                    (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   5540:                    (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   5541:                    (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   5542:                    (UPP(8) == 'E')) {
                   5543:                    if ((!terminate) &&
                   5544:                        (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5545:                        goto done;
                   5546:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5547:                        ctxt->sax->error(ctxt->userData,
                   5548:                             "Misplaced DOCTYPE declaration\n");
                   5549:                    ctxt->wellFormed = 0;
                   5550:                    sgmlParseDocTypeDecl(ctxt);
                   5551:                } else if ((cur == '<') && (next == '!') &&
                   5552:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5553:                    if ((!terminate) &&
                   5554:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5555:                        goto done;
                   5556: #ifdef DEBUG_PUSH
                   5557:                    fprintf(stderr, "HPP: Parsing Comment\n");
                   5558: #endif
                   5559:                    sgmlParseComment(ctxt);
                   5560:                    ctxt->instate = XML_PARSER_CONTENT;
                   5561:                } else if ((cur == '<') && (next == '!') && (avail < 4)) {
                   5562:                    goto done;
                   5563:                } else if ((cur == '<') && (next == '/')) {
                   5564:                    ctxt->instate = XML_PARSER_END_TAG;
                   5565:                    ctxt->checkIndex = 0;
                   5566: #ifdef DEBUG_PUSH
                   5567:                    fprintf(stderr, "HPP: entering END_TAG\n");
                   5568: #endif
                   5569:                    break;
                   5570:                } else if (cur == '<') {
                   5571:                    ctxt->instate = XML_PARSER_START_TAG;
                   5572:                    ctxt->checkIndex = 0;
                   5573: #ifdef DEBUG_PUSH
                   5574:                    fprintf(stderr, "HPP: entering START_TAG\n");
                   5575: #endif
                   5576:                    break;
                   5577:                } else if (cur == '&') {
                   5578:                    if ((!terminate) &&
                   5579:                        (sgmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
                   5580:                        goto done;
                   5581: #ifdef DEBUG_PUSH
                   5582:                    fprintf(stderr, "HPP: Parsing Reference\n");
                   5583: #endif
                   5584:                    /* TODO: check generation of subtrees if noent !!! */
                   5585:                    sgmlParseReference(ctxt);
                   5586:                } else {
                   5587:                    /* TODO Avoid the extra copy, handle directly !!!!!! */
                   5588:                    /*
                   5589:                     * Goal of the following test is :
                   5590:                     *  - minimize calls to the SAX 'character' callback
                   5591:                     *    when they are mergeable
                   5592:                     */
                   5593:                    if ((ctxt->inputNr == 1) &&
                   5594:                        (avail < SGML_PARSER_BIG_BUFFER_SIZE)) {
                   5595:                        if ((!terminate) &&
                   5596:                            (sgmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
                   5597:                            goto done;
                   5598:                     }
                   5599:                    ctxt->checkIndex = 0;
                   5600: #ifdef DEBUG_PUSH
                   5601:                    fprintf(stderr, "HPP: Parsing char data\n");
                   5602: #endif
                   5603:                    sgmlParseCharData(ctxt, 0);
                   5604:                }
                   5605:                if (cons == ctxt->nbChars) {
                   5606:                    if (ctxt->node != NULL) {
                   5607:                        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5608:                            ctxt->sax->error(ctxt->userData,
                   5609:                                 "detected an error in element content\n");
                   5610:                        ctxt->wellFormed = 0;
                   5611:                        NEXT;
                   5612:                    }
                   5613:                    break;
                   5614:                }
                   5615: 
                   5616:                break;
                   5617:            }
                   5618:             case XML_PARSER_END_TAG:
                   5619:                if (avail < 2)
                   5620:                    goto done;
                   5621:                if ((!terminate) &&
                   5622:                    (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5623:                    goto done;
                   5624:                sgmlParseEndTag(ctxt);
                   5625:                if (ctxt->nameNr == 0) {
                   5626:                    ctxt->instate = XML_PARSER_EPILOG;
                   5627:                } else {
                   5628:                    ctxt->instate = XML_PARSER_CONTENT;
                   5629:                }
                   5630:                ctxt->checkIndex = 0;
                   5631: #ifdef DEBUG_PUSH
                   5632:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5633: #endif
                   5634:                break;
                   5635:             case XML_PARSER_CDATA_SECTION:
                   5636:                fprintf(stderr, "HPP: internal error, state == CDATA\n");
                   5637:                ctxt->instate = XML_PARSER_CONTENT;
                   5638:                ctxt->checkIndex = 0;
                   5639: #ifdef DEBUG_PUSH
                   5640:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5641: #endif
                   5642:                break;
                   5643:             case XML_PARSER_DTD:
                   5644:                fprintf(stderr, "HPP: internal error, state == DTD\n");
                   5645:                ctxt->instate = XML_PARSER_CONTENT;
                   5646:                ctxt->checkIndex = 0;
                   5647: #ifdef DEBUG_PUSH
                   5648:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5649: #endif
                   5650:                break;
                   5651:             case XML_PARSER_COMMENT:
                   5652:                fprintf(stderr, "HPP: internal error, state == COMMENT\n");
                   5653:                ctxt->instate = XML_PARSER_CONTENT;
                   5654:                ctxt->checkIndex = 0;
                   5655: #ifdef DEBUG_PUSH
                   5656:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5657: #endif
                   5658:                break;
                   5659:             case XML_PARSER_PI:
                   5660:                fprintf(stderr, "HPP: internal error, state == PI\n");
                   5661:                ctxt->instate = XML_PARSER_CONTENT;
                   5662:                ctxt->checkIndex = 0;
                   5663: #ifdef DEBUG_PUSH
                   5664:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5665: #endif
                   5666:                break;
                   5667:             case XML_PARSER_ENTITY_DECL:
                   5668:                fprintf(stderr, "HPP: internal error, state == ENTITY_DECL\n");
                   5669:                ctxt->instate = XML_PARSER_CONTENT;
                   5670:                ctxt->checkIndex = 0;
                   5671: #ifdef DEBUG_PUSH
                   5672:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5673: #endif
                   5674:                break;
                   5675:             case XML_PARSER_ENTITY_VALUE:
                   5676:                fprintf(stderr, "HPP: internal error, state == ENTITY_VALUE\n");
                   5677:                ctxt->instate = XML_PARSER_CONTENT;
                   5678:                ctxt->checkIndex = 0;
                   5679: #ifdef DEBUG_PUSH
                   5680:                fprintf(stderr, "HPP: entering DTD\n");
                   5681: #endif
                   5682:                break;
                   5683:             case XML_PARSER_ATTRIBUTE_VALUE:
                   5684:                fprintf(stderr, "HPP: internal error, state == ATTRIBUTE_VALUE\n");
                   5685:                ctxt->instate = XML_PARSER_START_TAG;
                   5686:                ctxt->checkIndex = 0;
                   5687: #ifdef DEBUG_PUSH
                   5688:                fprintf(stderr, "HPP: entering START_TAG\n");
                   5689: #endif
                   5690:                break;
                   5691:            case XML_PARSER_SYSTEM_LITERAL:
                   5692:                fprintf(stderr, "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n");
                   5693:                ctxt->instate = XML_PARSER_CONTENT;
                   5694:                ctxt->checkIndex = 0;
                   5695: #ifdef DEBUG_PUSH
                   5696:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5697: #endif
                   5698:                break;
                   5699:        }
                   5700:     }
                   5701: done:    
                   5702:     if ((avail == 0) && (terminate)) {
                   5703:        sgmlAutoClose(ctxt, NULL);
                   5704:        if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { 
                   5705:            /*
                   5706:             * SAX: end of the document processing.
                   5707:             */
                   5708:            ctxt->instate = XML_PARSER_EOF;
                   5709:            if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5710:                ctxt->sax->endDocument(ctxt->userData);
                   5711:        }
                   5712:     }
                   5713:     if ((ctxt->myDoc != NULL) &&
                   5714:        ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
                   5715:         (ctxt->instate == XML_PARSER_EPILOG))) {
                   5716:        xmlDtdPtr dtd;
                   5717:        dtd = xmlGetIntSubset(ctxt->myDoc);
                   5718:        if (dtd == NULL)
                   5719:            ctxt->myDoc->intSubset = 
                   5720:                xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML", 
                   5721:                    BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
                   5722:                    BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
                   5723:     }
                   5724: #ifdef DEBUG_PUSH
                   5725:     fprintf(stderr, "HPP: done %d\n", ret);
                   5726: #endif
                   5727:     return(ret);
                   5728: }
                   5729: 
                   5730: /**
                   5731:  * sgmlParseTry:
                   5732:  * @ctxt:  an SGML parser context
                   5733:  *
                   5734:  * Try to progress on parsing
                   5735:  *
                   5736:  * Returns zero if no parsing was possible
                   5737:  */
                   5738: int
                   5739: sgmlParseTry(sgmlParserCtxtPtr ctxt) {
                   5740:     return(sgmlParseTryOrFinish(ctxt, 0));
                   5741: }
                   5742: 
                   5743: /**
                   5744:  * sgmlParseChunk:
                   5745:  * @ctxt:  an XML parser context
                   5746:  * @chunk:  an char array
                   5747:  * @size:  the size in byte of the chunk
                   5748:  * @terminate:  last chunk indicator
                   5749:  *
                   5750:  * Parse a Chunk of memory
                   5751:  *
                   5752:  * Returns zero if no error, the xmlParserErrors otherwise.
                   5753:  */
                   5754: int
                   5755: sgmlParseChunk(sgmlParserCtxtPtr ctxt, const char *chunk, int size,
                   5756:               int terminate) {
                   5757:     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
                   5758:         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
                   5759:        int base = ctxt->input->base - ctxt->input->buf->buffer->content;
                   5760:        int cur = ctxt->input->cur - ctxt->input->base;
                   5761:        
                   5762:        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);              
                   5763:        ctxt->input->base = ctxt->input->buf->buffer->content + base;
                   5764:        ctxt->input->cur = ctxt->input->base + cur;
                   5765: #ifdef DEBUG_PUSH
                   5766:        fprintf(stderr, "HPP: pushed %d\n", size);
                   5767: #endif
                   5768: 
                   5769:        if ((terminate) || (ctxt->input->buf->buffer->use > 80))
                   5770:            sgmlParseTryOrFinish(ctxt, terminate);
                   5771:     } else if (ctxt->instate != XML_PARSER_EOF) {
                   5772:        xmlParserInputBufferPush(ctxt->input->buf, 0, "");
                   5773:         sgmlParseTryOrFinish(ctxt, terminate);
                   5774:     }
                   5775:     if (terminate) {
                   5776:        if ((ctxt->instate != XML_PARSER_EOF) &&
                   5777:            (ctxt->instate != XML_PARSER_EPILOG) &&
                   5778:            (ctxt->instate != XML_PARSER_MISC)) {
                   5779:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5780:                ctxt->sax->error(ctxt->userData,
                   5781:                    "Extra content at the end of the document\n");
                   5782:            ctxt->wellFormed = 0;
                   5783:            ctxt->errNo = XML_ERR_DOCUMENT_END;
                   5784:        } 
                   5785:        if (ctxt->instate != XML_PARSER_EOF) {
                   5786:            if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5787:                ctxt->sax->endDocument(ctxt->userData);
                   5788:        }
                   5789:        ctxt->instate = XML_PARSER_EOF;
                   5790:     }
                   5791:     return((xmlParserErrors) ctxt->errNo);           
                   5792: }
                   5793: 
                   5794: /************************************************************************
                   5795:  *                                                                     *
                   5796:  *                     User entry points                               *
                   5797:  *                                                                     *
                   5798:  ************************************************************************/
                   5799: 
                   5800: /**
                   5801:  * sgmlCreatePushParserCtxt :
                   5802:  * @sax:  a SAX handler
                   5803:  * @user_data:  The user data returned on SAX callbacks
                   5804:  * @chunk:  a pointer to an array of chars
                   5805:  * @size:  number of chars in the array
                   5806:  * @filename:  an optional file name or URI
                   5807:  * @enc:  an optional encoding
                   5808:  *
                   5809:  * Create a parser context for using the SGML parser in push mode
                   5810:  * To allow content encoding detection, @size should be >= 4
                   5811:  * The value of @filename is used for fetching external entities
                   5812:  * and error/warning reports.
                   5813:  *
                   5814:  * Returns the new parser context or NULL
                   5815:  */
                   5816: sgmlParserCtxtPtr
                   5817: sgmlCreatePushParserCtxt(sgmlSAXHandlerPtr sax, void *user_data, 
                   5818:                          const char *chunk, int size, const char *filename,
                   5819:                         xmlCharEncoding enc) {
                   5820:     sgmlParserCtxtPtr ctxt;
                   5821:     sgmlParserInputPtr inputStream;
                   5822:     xmlParserInputBufferPtr buf;
                   5823: 
                   5824:     buf = xmlAllocParserInputBuffer(enc);
                   5825:     if (buf == NULL) return(NULL);
                   5826: 
                   5827:     ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
                   5828:     if (ctxt == NULL) {
                   5829:        xmlFree(buf);
                   5830:        return(NULL);
                   5831:     }
                   5832:     memset(ctxt, 0, sizeof(sgmlParserCtxt));
                   5833:     sgmlInitParserCtxt(ctxt);
                   5834:     if (sax != NULL) {
                   5835:        if (ctxt->sax != &sgmlDefaultSAXHandler)
                   5836:            xmlFree(ctxt->sax);
                   5837:        ctxt->sax = (sgmlSAXHandlerPtr) xmlMalloc(sizeof(sgmlSAXHandler));
                   5838:        if (ctxt->sax == NULL) {
                   5839:            xmlFree(buf);
                   5840:            xmlFree(ctxt);
                   5841:            return(NULL);
                   5842:        }
                   5843:        memcpy(ctxt->sax, sax, sizeof(sgmlSAXHandler));
                   5844:        if (user_data != NULL)
                   5845:            ctxt->userData = user_data;
                   5846:     }  
                   5847:     if (filename == NULL) {
                   5848:        ctxt->directory = NULL;
                   5849:     } else {
                   5850:         ctxt->directory = xmlParserGetDirectory(filename);
                   5851:     }
                   5852: 
                   5853:     inputStream = sgmlNewInputStream(ctxt);
                   5854:     if (inputStream == NULL) {
                   5855:        xmlFreeParserCtxt(ctxt);
                   5856:        return(NULL);
                   5857:     }
                   5858: 
                   5859:     if (filename == NULL)
                   5860:        inputStream->filename = NULL;
                   5861:     else
                   5862:        inputStream->filename = xmlMemStrdup(filename);
                   5863:     inputStream->buf = buf;
                   5864:     inputStream->base = inputStream->buf->buffer->content;
                   5865:     inputStream->cur = inputStream->buf->buffer->content;
                   5866: 
                   5867:     inputPush(ctxt, inputStream);
                   5868: 
                   5869:     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
                   5870:         (ctxt->input->buf != NULL))  {       
                   5871:        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);              
                   5872: #ifdef DEBUG_PUSH
                   5873:        fprintf(stderr, "HPP: pushed %d\n", size);
                   5874: #endif
                   5875:     }
                   5876: 
                   5877:     return(ctxt);
                   5878: }
                   5879: 
                   5880: /**
                   5881:  * sgmlSAXParseDoc :
                   5882:  * @cur:  a pointer to an array of xmlChar
                   5883:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5884:  * @sax:  the SAX handler block
                   5885:  * @userData: if using SAX, this pointer will be provided on callbacks. 
                   5886:  *
                   5887:  * parse an SGML in-memory document and build a tree.
                   5888:  * It use the given SAX function block to handle the parsing callback.
                   5889:  * If sax is NULL, fallback to the default DOM tree building routines.
                   5890:  * 
                   5891:  * Returns the resulting document tree
                   5892:  */
                   5893: 
                   5894: sgmlDocPtr
                   5895: sgmlSAXParseDoc(xmlChar *cur, const char *encoding, sgmlSAXHandlerPtr sax, void *userData) {
                   5896:     sgmlDocPtr ret;
                   5897:     sgmlParserCtxtPtr ctxt;
                   5898: 
                   5899:     if (cur == NULL) return(NULL);
                   5900: 
                   5901: 
                   5902:     ctxt = sgmlCreateDocParserCtxt(cur, encoding);
                   5903:     if (ctxt == NULL) return(NULL);
                   5904:     if (sax != NULL) { 
                   5905:         ctxt->sax = sax;
                   5906:         ctxt->userData = userData;
                   5907:     }
                   5908: 
                   5909:     sgmlParseDocument(ctxt);
                   5910:     ret = ctxt->myDoc;
                   5911:     if (sax != NULL) {
                   5912:        ctxt->sax = NULL;
                   5913:        ctxt->userData = NULL;
                   5914:     }
                   5915:     sgmlFreeParserCtxt(ctxt);
                   5916:     
                   5917:     return(ret);
                   5918: }
                   5919: 
                   5920: /**
                   5921:  * sgmlParseDoc :
                   5922:  * @cur:  a pointer to an array of xmlChar
                   5923:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5924:  *
                   5925:  * parse an SGML in-memory document and build a tree.
                   5926:  * 
                   5927:  * Returns the resulting document tree
                   5928:  */
                   5929: 
                   5930: sgmlDocPtr
                   5931: sgmlParseDoc(xmlChar *cur, const char *encoding) {
                   5932:     return(sgmlSAXParseDoc(cur, encoding, NULL, NULL));
                   5933: }
                   5934: 
                   5935: 
                   5936: /**
                   5937:  * sgmlCreateFileParserCtxt :
                   5938:  * @filename:  the filename
                   5939:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5940:  *
                   5941:  * Create a parser context for a file content. 
                   5942:  * Automatic support for ZLIB/Compress compressed document is provided
                   5943:  * by default if found at compile-time.
                   5944:  *
                   5945:  * Returns the new parser context or NULL
                   5946:  */
                   5947: sgmlParserCtxtPtr
                   5948: sgmlCreateFileParserCtxt(const char *filename, const char *encoding)
                   5949: {
                   5950:     sgmlParserCtxtPtr ctxt;
                   5951:     sgmlParserInputPtr inputStream;
                   5952:     xmlParserInputBufferPtr buf;
                   5953:     /* sgmlCharEncoding enc; */
                   5954: 
                   5955:     buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
                   5956:     if (buf == NULL) return(NULL);
                   5957: 
                   5958:     ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
                   5959:     if (ctxt == NULL) {
                   5960:         perror("malloc");
                   5961:        return(NULL);
                   5962:     }
                   5963:     memset(ctxt, 0, sizeof(sgmlParserCtxt));
                   5964:     sgmlInitParserCtxt(ctxt);
                   5965:     inputStream = (sgmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
                   5966:     if (inputStream == NULL) {
                   5967:         perror("malloc");
                   5968:        xmlFree(ctxt);
                   5969:        return(NULL);
                   5970:     }
                   5971:     memset(inputStream, 0, sizeof(sgmlParserInput));
                   5972: 
                   5973:     inputStream->filename = xmlMemStrdup(filename);
                   5974:     inputStream->line = 1;
                   5975:     inputStream->col = 1;
                   5976:     inputStream->buf = buf;
                   5977:     inputStream->directory = NULL;
                   5978: 
                   5979:     inputStream->base = inputStream->buf->buffer->content;
                   5980:     inputStream->cur = inputStream->buf->buffer->content;
                   5981:     inputStream->free = NULL;
                   5982: 
                   5983:     inputPush(ctxt, inputStream);
                   5984:     return(ctxt);
                   5985: }
                   5986: 
                   5987: /**
                   5988:  * sgmlSAXParseFile :
                   5989:  * @filename:  the filename
                   5990:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5991:  * @sax:  the SAX handler block
                   5992:  * @userData: if using SAX, this pointer will be provided on callbacks. 
                   5993:  *
                   5994:  * parse an SGML file and build a tree. Automatic support for ZLIB/Compress
                   5995:  * compressed document is provided by default if found at compile-time.
                   5996:  * It use the given SAX function block to handle the parsing callback.
                   5997:  * If sax is NULL, fallback to the default DOM tree building routines.
                   5998:  *
                   5999:  * Returns the resulting document tree
                   6000:  */
                   6001: 
                   6002: sgmlDocPtr
                   6003: sgmlSAXParseFile(const char *filename, const char *encoding, sgmlSAXHandlerPtr sax, 
                   6004:                  void *userData) {
                   6005:     sgmlDocPtr ret;
                   6006:     sgmlParserCtxtPtr ctxt;
                   6007:     sgmlSAXHandlerPtr oldsax = NULL;
                   6008: 
                   6009:     ctxt = sgmlCreateFileParserCtxt(filename, encoding);
                   6010:     if (ctxt == NULL) return(NULL);
                   6011:     if (sax != NULL) {
                   6012:        oldsax = ctxt->sax;
                   6013:         ctxt->sax = sax;
                   6014:         ctxt->userData = userData;
                   6015:     }
                   6016: 
                   6017:     sgmlParseDocument(ctxt);
                   6018: 
                   6019:     ret = ctxt->myDoc;
                   6020:     if (sax != NULL) {
                   6021:         ctxt->sax = oldsax;
                   6022:         ctxt->userData = NULL;
                   6023:     }
                   6024:     sgmlFreeParserCtxt(ctxt);
                   6025:     
                   6026:     return(ret);
                   6027: }
                   6028: 
                   6029: /**
                   6030:  * sgmlParseFile :
                   6031:  * @filename:  the filename
                   6032:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   6033:  *
                   6034:  * parse an SGML file and build a tree. Automatic support for ZLIB/Compress
                   6035:  * compressed document is provided by default if found at compile-time.
                   6036:  *
                   6037:  * Returns the resulting document tree
                   6038:  */
                   6039: 
                   6040: sgmlDocPtr
                   6041: sgmlParseFile(const char *filename, const char *encoding) {
                   6042:     return(sgmlSAXParseFile(filename, encoding, NULL, NULL));
                   6043: }
                   6044: 
                   6045: #endif /* LIBXML_SGML_ENABLED */

Webmaster