XML/SGMLparser.c - annotate

Return to SGMLparser.c CVS log
Up to [Public] / XML
Annotation of XML/SGMLparser.c, revision 1.9

1.1       veillard    1: /*
                      2:  * SGMLparser.c : an attempt to parse Docbook documents
                      3:  *
                      4:  * See Copyright for the status of this software.
                      5:  *
                      6:  * Daniel.Veillard@w3.org
                      7:  */
                      8: 
                      9: #ifdef WIN32
                     10: #include "win32config.h"
                     11: #else
                     12: #include "config.h"
                     13: #endif
                     14: 
                     15: #include "xmlversion.h"
                     16: #ifdef LIBXML_SGML_ENABLED
                     17: 
                     18: #include <stdio.h>
                     19: #include <string.h>
                     20: #ifdef HAVE_CTYPE_H
                     21: #include <ctype.h>
                     22: #endif
                     23: #ifdef HAVE_STDLIB_H
                     24: #include <stdlib.h>
                     25: #endif
                     26: #ifdef HAVE_SYS_STAT_H
                     27: #include <sys/stat.h>
                     28: #endif
                     29: #ifdef HAVE_FCNTL_H
                     30: #include <fcntl.h>
                     31: #endif
                     32: #ifdef HAVE_UNISTD_H
                     33: #include <unistd.h>
                     34: #endif
                     35: #ifdef HAVE_ZLIB_H
                     36: #include <zlib.h>
                     37: #endif
                     38: 
                     39: #include <libxml/xmlmemory.h>
                     40: #include <libxml/tree.h>
                     41: #include <libxml/SGMLparser.h>
                     42: #include <libxml/entities.h>
                     43: #include <libxml/encoding.h>
                     44: #include <libxml/parser.h>
                     45: #include <libxml/valid.h>
                     46: #include <libxml/parserInternals.h>
                     47: #include <libxml/xmlIO.h>
                     48: #include <libxml/SAX.h>
1.3       veillard   49: #include <libxml/uri.h>
1.8       veillard   50: #include <libxml/xmlerror.h>
1.1       veillard   51: 
                     52: #define SGML_MAX_NAMELEN 1000
                     53: #define SGML_PARSER_BIG_BUFFER_SIZE 1000
                     54: #define SGML_PARSER_BUFFER_SIZE 100
                     55: 
                     56: /* #define DEBUG */
                     57: /* #define DEBUG_PUSH */
                     58: 
                     59: /************************************************************************
                     60:  *                                                                     *
                     61:  *             Parser stacks related functions and macros              *
                     62:  *                                                                     *
                     63:  ************************************************************************/
                     64: 
                     65: /*
                     66:  * Generic function for accessing stacks in the Parser Context
                     67:  */
                     68: 
                     69: #define PUSH_AND_POP(scope, type, name)                                        \
                     70: scope int sgml##name##Push(sgmlParserCtxtPtr ctxt, type value) {       \
                     71:     if (ctxt->name##Nr >= ctxt->name##Max) {                           \
                     72:        ctxt->name##Max *= 2;                                           \
                     73:         ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab,         \
                     74:                     ctxt->name##Max * sizeof(ctxt->name##Tab[0]));     \
                     75:         if (ctxt->name##Tab == NULL) {                                 \
                     76:            fprintf(stderr, "realloc failed !\n");                      \
                     77:            return(0);                                                  \
                     78:        }                                                               \
                     79:     }                                                                  \
                     80:     ctxt->name##Tab[ctxt->name##Nr] = value;                           \
                     81:     ctxt->name = value;                                                        \
                     82:     return(ctxt->name##Nr++);                                          \
                     83: }                                                                      \
                     84: scope type sgml##name##Pop(sgmlParserCtxtPtr ctxt) {                   \
                     85:     type ret;                                                          \
                     86:     if (ctxt->name##Nr < 0) return(0);                                 \
                     87:     ctxt->name##Nr--;                                                  \
                     88:     if (ctxt->name##Nr < 0) return(0);                                 \
                     89:     if (ctxt->name##Nr > 0)                                            \
                     90:        ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1];               \
                     91:     else                                                               \
                     92:         ctxt->name = NULL;                                             \
                     93:     ret = ctxt->name##Tab[ctxt->name##Nr];                             \
                     94:     ctxt->name##Tab[ctxt->name##Nr] = 0;                               \
                     95:     return(ret);                                                       \
                     96: }                                                                      \
                     97: 
                     98: PUSH_AND_POP(extern, xmlNodePtr, node)
                     99: PUSH_AND_POP(extern, xmlChar*, name)
                    100: 
                    101: /*
                    102:  * Macros for accessing the content. Those should be used only by the parser,
                    103:  * and not exported.
                    104:  *
                    105:  * Dirty macros, i.e. one need to make assumption on the context to use them
                    106:  *
                    107:  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
                    108:  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
                    109:  *           in ISO-Latin or UTF-8, and the current 16 bit value if compiled
                    110:  *           in UNICODE mode. This should be used internally by the parser
                    111:  *           only to compare to ASCII values otherwise it would break when
                    112:  *           running with UTF-8 encoding.
                    113:  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
                    114:  *           to compare on ASCII based substring.
                    115:  *   UPP(n)  returns the n'th next xmlChar converted to uppercase. Same as CUR
                    116:  *           it should be used only to compare on ASCII based substring.
                    117:  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
                    118:  *           strings within the parser.
                    119:  *
                    120:  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
                    121:  *
                    122:  *   CURRENT Returns the current char value, with the full decoding of
                    123:  *           UTF-8 if we are using this mode. It returns an int.
                    124:  *   NEXT    Skip to the next character, this does the proper decoding
                    125:  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
                    126:  *   COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
                    127:  */
                    128: 
                    129: #define UPPER (toupper(*ctxt->input->cur))
                    130: 
                    131: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
                    132: 
                    133: #define NXT(val) ctxt->input->cur[(val)]
                    134: 
                    135: #define UPP(val) (toupper(ctxt->input->cur[(val)]))
                    136: 
                    137: #define CUR_PTR ctxt->input->cur
                    138: 
                    139: #define SHRINK  xmlParserInputShrink(ctxt->input)
                    140: 
                    141: #define GROW  xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
                    142: 
                    143: #define CURRENT ((int) (*ctxt->input->cur))
                    144: 
1.9     ! veillard  145: #define SKIP_BLANKS sgmlSkipBlankChars(ctxt)
1.1       veillard  146: 
                    147: #if 0
                    148: #define CUR ((int) (*ctxt->input->cur))
                    149: #define NEXT sgmlNextChar(ctxt);
                    150: #else
                    151: /* Inported from XML */
                    152: 
                    153: /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
                    154: #define CUR ((int) (*ctxt->input->cur))
1.9     ! veillard  155: #define NEXT xmlNextChar(ctxt),ctxt->nbChars++
1.1       veillard  156: 
                    157: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
                    158: #define NXT(val) ctxt->input->cur[(val)]
                    159: #define CUR_PTR ctxt->input->cur
                    160: 
                    161: 
1.9     ! veillard  162: #define NEXTL(l) do {                                                  \
1.1       veillard  163:     if (*(ctxt->input->cur) == '\n') {                                 \
                    164:        ctxt->input->line++; ctxt->input->col = 1;                      \
                    165:     } else ctxt->input->col++;                                         \
1.9     ! veillard  166:     ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++;           \
        !           167:   } while (0)
1.1       veillard  168:     
                    169: /************
                    170:     \
                    171:     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);    \
                    172:     if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
                    173:  ************/
                    174: 
1.9     ! veillard  175: #define CUR_CHAR(l) sgmlCurrentChar(ctxt, &l)
        !           176: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1.1       veillard  177: 
                    178: #define COPY_BUF(l,b,i,v)                                              \
                    179:     if (l == 1) b[i++] = (xmlChar) v;                                  \
1.9     ! veillard  180:     else i += xmlCopyChar(l,&b[i],v)
1.1       veillard  181: #endif
                    182: 
                    183: /**
                    184:  * sgmlCurrentChar:
                    185:  * @ctxt:  the SGML parser context
                    186:  * @len:  pointer to the length of the char read
                    187:  *
                    188:  * The current char value, if using UTF-8 this may actaully span multiple
                    189:  * bytes in the input buffer. Implement the end of line normalization:
                    190:  * 2.11 End-of-Line Handling
                    191:  * If the encoding is unspecified, in the case we find an ISO-Latin-1
                    192:  * char, then the encoding converter is plugged in automatically.
                    193:  *
                    194:  * Returns the current char value and its lenght
                    195:  */
                    196: 
                    197: int
                    198: sgmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
                    199:     if (ctxt->instate == XML_PARSER_EOF)
                    200:        return(0);
                    201: 
                    202:     if (ctxt->token != 0) {
                    203:        *len = 0;
                    204:        return(ctxt->token);
                    205:     }  
                    206:     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
                    207:        /*
                    208:         * We are supposed to handle UTF8, check it's valid
                    209:         * From rfc2044: encoding of the Unicode values on UTF-8:
                    210:         *
                    211:         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                    212:         * 0000 0000-0000 007F   0xxxxxxx
                    213:         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
                    214:         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
                    215:         *
                    216:         * Check for the 0x110000 limit too
                    217:         */
                    218:        const unsigned char *cur = ctxt->input->cur;
                    219:        unsigned char c;
                    220:        unsigned int val;
                    221: 
                    222:        c = *cur;
                    223:        if (c & 0x80) {
                    224:            if (cur[1] == 0)
                    225:                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    226:            if ((cur[1] & 0xc0) != 0x80)
                    227:                goto encoding_error;
                    228:            if ((c & 0xe0) == 0xe0) {
                    229: 
                    230:                if (cur[2] == 0)
                    231:                    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    232:                if ((cur[2] & 0xc0) != 0x80)
                    233:                    goto encoding_error;
                    234:                if ((c & 0xf0) == 0xf0) {
                    235:                    if (cur[3] == 0)
                    236:                        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    237:                    if (((c & 0xf8) != 0xf0) ||
                    238:                        ((cur[3] & 0xc0) != 0x80))
                    239:                        goto encoding_error;
                    240:                    /* 4-byte code */
                    241:                    *len = 4;
                    242:                    val = (cur[0] & 0x7) << 18;
                    243:                    val |= (cur[1] & 0x3f) << 12;
                    244:                    val |= (cur[2] & 0x3f) << 6;
                    245:                    val |= cur[3] & 0x3f;
                    246:                } else {
                    247:                  /* 3-byte code */
                    248:                    *len = 3;
                    249:                    val = (cur[0] & 0xf) << 12;
                    250:                    val |= (cur[1] & 0x3f) << 6;
                    251:                    val |= cur[2] & 0x3f;
                    252:                }
                    253:            } else {
                    254:              /* 2-byte code */
                    255:                *len = 2;
                    256:                val = (cur[0] & 0x1f) << 6;
                    257:                val |= cur[1] & 0x3f;
                    258:            }
                    259:            if (!IS_CHAR(val)) {
1.6       veillard  260:                ctxt->errNo = XML_ERR_INVALID_ENCODING;
1.1       veillard  261:                if ((ctxt->sax != NULL) &&
                    262:                    (ctxt->sax->error != NULL))
                    263:                    ctxt->sax->error(ctxt->userData, 
                    264:                                     "Char 0x%X out of allowed range\n", val);
                    265:                ctxt->wellFormed = 0;
                    266:                ctxt->disableSAX = 1;
                    267:            }    
                    268:            return(val);
                    269:        } else {
                    270:            /* 1-byte code */
                    271:            *len = 1;
                    272:            return((int) *ctxt->input->cur);
                    273:        }
                    274:     }
                    275:     /*
                    276:      * Assume it's a fixed lenght encoding (1) with
                    277:      * a compatibke encoding for the ASCII set, since
                    278:      * XML constructs only use < 128 chars
                    279:      */
                    280:     *len = 1;
                    281:     if ((int) *ctxt->input->cur < 0x80)
                    282:        return((int) *ctxt->input->cur);
                    283: 
                    284:     /*
                    285:      * Humm this is bad, do an automatic flow conversion
                    286:      */
                    287:     xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
                    288:     ctxt->charset = XML_CHAR_ENCODING_UTF8;
                    289:     return(xmlCurrentChar(ctxt, len));
                    290: 
                    291: encoding_error:
                    292:     /*
                    293:      * If we detect an UTF8 error that probably mean that the
                    294:      * input encoding didn't get properly advertized in the
                    295:      * declaration header. Report the error and switch the encoding
                    296:      * to ISO-Latin-1 (if you don't like this policy, just declare the
                    297:      * encoding !)
                    298:      */
1.6       veillard  299:     ctxt->errNo = XML_ERR_INVALID_ENCODING;
1.1       veillard  300:     if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
                    301:        ctxt->sax->error(ctxt->userData, 
                    302:                         "Input is not proper UTF-8, indicate encoding !\n");
                    303:        ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
                    304:                        ctxt->input->cur[0], ctxt->input->cur[1],
                    305:                        ctxt->input->cur[2], ctxt->input->cur[3]);
                    306:     }
                    307: 
                    308:     ctxt->charset = XML_CHAR_ENCODING_8859_1; 
                    309:     *len = 1;
                    310:     return((int) *ctxt->input->cur);
                    311: }
                    312: 
                    313: /**
                    314:  * sgmlNextChar:
                    315:  * @ctxt:  the SGML parser context
                    316:  *
                    317:  * Skip to the next char input char.
                    318:  */
                    319: 
                    320: void
                    321: sgmlNextChar(sgmlParserCtxtPtr ctxt) {
                    322:     if (ctxt->instate == XML_PARSER_EOF)
                    323:        return;
                    324:     if ((*ctxt->input->cur == 0) &&
                    325:         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
                    326:            xmlPopInput(ctxt);
                    327:     } else {
                    328:         if (*(ctxt->input->cur) == '\n') {
                    329:            ctxt->input->line++; ctxt->input->col = 1;
                    330:        } else ctxt->input->col++;
                    331:        ctxt->input->cur++;
                    332:        ctxt->nbChars++;
                    333:         if (*ctxt->input->cur == 0)
                    334:            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    335:     }
                    336: }
                    337: 
                    338: /**
                    339:  * sgmlSkipBlankChars:
                    340:  * @ctxt:  the SGML parser context
                    341:  *
                    342:  * skip all blanks character found at that point in the input streams.
                    343:  *
                    344:  * Returns the number of space chars skipped
                    345:  */
                    346: 
                    347: int
                    348: sgmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
                    349:     int res = 0;
                    350: 
                    351:     while (IS_BLANK(*(ctxt->input->cur))) {
                    352:        if ((*ctxt->input->cur == 0) &&
                    353:            (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
                    354:                xmlPopInput(ctxt);
                    355:        } else {
                    356:            if (*(ctxt->input->cur) == '\n') {
                    357:                ctxt->input->line++; ctxt->input->col = 1;
                    358:            } else ctxt->input->col++;
                    359:            ctxt->input->cur++;
                    360:            ctxt->nbChars++;
                    361:            if (*ctxt->input->cur == 0)
                    362:                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    363:        }
                    364:        res++;
                    365:     }
                    366:     return(res);
                    367: }
                    368: 
                    369: 
                    370: 
                    371: /************************************************************************
                    372:  *                                                                     *
                    373:  *             The list of SGML elements and their properties          *
                    374:  *                                                                     *
                    375:  ************************************************************************/
                    376: 
                    377: /*
                    378:  *  Start Tag: 1 means the start tag can be ommited
                    379:  *  End Tag:   1 means the end tag can be ommited
                    380:  *             2 means it's forbidden (empty elements)
                    381:  *  Depr:      this element is deprecated
                    382:  *  DTD:       1 means that this element is valid only in the Loose DTD
                    383:  *             2 means that this element is valid only in the Frameset DTD
                    384:  *
                    385:  * Name,Start Tag,End Tag,  Empty,  Depr.,    DTD, Description
                    386:  */
                    387: sgmlElemDesc  docbookElementTable[] = {
                    388: { "abbrev",    0,      0,      0,      3,      0, "" }, /* word */
                    389: { "abstract",  0,      0,      0,      9,      0, "" }, /* title */
                    390: { "accel",     0,      0,      0,      7,      0, "" }, /* smallcptr */
                    391: { "ackno",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    392: { "acronym",   0,      0,      0,      3,      0, "" }, /* word */
                    393: { "action",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    394: { "address",   0,      0,      0,      1,      0, "" },
                    395: { "affiliation",0,     0,      0,      9,      0, "" }, /* shortaffil */
                    396: { "alt",       0,      0,      0,      1,      0, "" },
                    397: { "anchor",    0,      2,      1,      0,      0, "" },
                    398: { "answer",    0,      0,      0,      9,      0, "" }, /* label */
                    399: { "appendix",  0,      0,      0,      9,      0, "" }, /* appendixinfo */
                    400: { "appendixinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    401: { "application",0,     0,      0,      2,      0, "" }, /* para */
                    402: { "area",      0,      2,      1,      0,      0, "" },
                    403: { "areaset",   0,      0,      0,      9,      0, "" }, /* area */
                    404: { "areaspec",  0,      0,      0,      9,      0, "" }, /* area */
                    405: { "arg",       0,      0,      0,      1,      0, "" },
                    406: { "article",   0,      0,      0,      9,      0, "" }, /* div.title.content */
                    407: { "articleinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    408: { "artpagenums",0,     0,      0,      4,      0, "" }, /* docinfo */
                    409: { "attribution",0,     0,      0,      2,      0, "" }, /* para */
                    410: { "audiodata", 0,      2,      1,      0,      0, "" },
                    411: { "audioobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    412: { "authorblurb",0,     0,      0,      9,      0, "" }, /* title */
                    413: { "authorgroup",0,     0,      0,      9,      0, "" }, /* author */
                    414: { "authorinitials",0,  0,      0,      4,      0, "" }, /* docinfo */
                    415: { "author",    0,      0,      0,      9,      0, "" }, /* person.ident.mix */
                    416: { "beginpage", 0,      2,      1,      0,      0, "" },
                    417: { "bibliodiv", 0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    418: { "biblioentry",0,     0,      0,      9,      0, "" }, /* articleinfo */
                    419: { "bibliography",0,    0,      0,      9,      0, "" }, /* bibliographyinfo */
                    420: { "bibliographyinfo",0,        0,      0,      9,      0, "" }, /* graphic */
                    421: { "bibliomisc",        0,      0,      0,      2,      0, "" }, /* para */
                    422: { "bibliomixed",0,     0,      0,      1,      0, "" }, /* %bibliocomponent.mix, bibliomset) */
                    423: { "bibliomset",        0,      0,      0,      1,      0, "" }, /* %bibliocomponent.mix; | bibliomset) */
                    424: { "biblioset", 0,      0,      0,      9,      0, "" }, /* bibliocomponent.mix */
                    425: { "blockquote",        0,      0,      0,      9,      0, "" }, /* title */
                    426: { "book",      0,      0,      0,      9,      0, "" }, /* div.title.content */
                    427: { "bookinfo",  0,      0,      0,      9,      0, "" }, /* graphic */
                    428: { "bridgehead",        0,      0,      0,      8,      0, "" }, /* title */
                    429: { "callout",   0,      0,      0,      9,      0, "" }, /* component.mix */
                    430: { "calloutlist",0,     0,      0,      9,      0, "" }, /* formalobject.title.content */
                    431: { "caption",   0,      0,      0,      9,      0, "" }, /* textobject.mix */
                    432: { "caution",   0,      0,      0,      9,      0, "" }, /* title */
                    433: { "chapter",   0,      0,      0,      9,      0, "" }, /* chapterinfo */
                    434: { "chapterinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    435: { "citation",  0,      0,      0,      2,      0, "" }, /* para */
                    436: { "citerefentry",0,    0,      0,      9,      0, "" }, /* refentrytitle */
                    437: { "citetitle", 0,      0,      0,      2,      0, "" }, /* para */
                    438: { "city",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    439: { "classname", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    440: { "classsynopsisinfo",0,0,     0,      9,      0, "" }, /* cptr */
                    441: { "classsynopsis",0,   0,      0,      9,      0, "" }, /* ooclass */
                    442: { "cmdsynopsis",0,     0,      0,      9,      0, "" }, /* command */
                    443: { "co",                0,      2,      1,      0,      0, "" },
                    444: { "collab",    0,      0,      0,      9,      0, "" }, /* collabname */
                    445: { "collabname",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    446: { "colophon",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    447: { "colspec",   0,      2,      1,      0,      0, "" },
                    448: { "colspec",   0,      2,      1,      0,      0, "" },
                    449: { "command",   0,      0,      0,      9,      0, "" }, /* cptr */
                    450: { "computeroutput",0,  0,      0,      9,      0, "" }, /* cptr */
                    451: { "confdates", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    452: { "confgroup", 0,      0,      0,      9,      0, "" }, /* confdates */
                    453: { "confnum",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    454: { "confsponsor",0,     0,      0,      4,      0, "" }, /* docinfo */
                    455: { "conftitle", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    456: { "constant",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    457: { "constructorsynopsis",0,0,   0,      9,      0, "" }, /* modifier */
                    458: { "contractnum",0,     0,      0,      4,      0, "" }, /* docinfo */
                    459: { "contractsponsor",0, 0,      0,      4,      0, "" }, /* docinfo */
                    460: { "contrib",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    461: { "copyright", 0,      0,      0,      9,      0, "" }, /* year */
                    462: { "corpauthor",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    463: { "corpname",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    464: { "country",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    465: { "database",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    466: { "date",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    467: { "dedication",        0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    468: { "destructorsynopsis",0,0,    0,      9,      0, "" }, /* modifier */
                    469: { "edition",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    470: { "editor",    0,      0,      0,      9,      0, "" }, /* person.ident.mix */
                    471: { "email",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    472: { "emphasis",  0,      0,      0,      2,      0, "" }, /* para */
                    473: { "entry",     0,      0,      0,      9,      0, "" }, /* tbl.entry.mdl */
                    474: { "entrytbl",  0,      0,      0,      9,      0, "" }, /* tbl.entrytbl.mdl */
                    475: { "envar",     0,      0,      0,      7,      0, "" }, /* smallcptr */
                    476: { "epigraph",  0,      0,      0,      9,      0, "" }, /* attribution */
                    477: { "equation",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    478: { "errorcode", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    479: { "errorname", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    480: { "errortype", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    481: { "example",   0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    482: { "exceptionname",0,   0,      0,      7,      0, "" }, /* smallcptr */
                    483: { "fax",       0,      0,      0,      4,      0, "" }, /* docinfo */
                    484: { "fieldsynopsis",     0,      0,      0,      9,      0, "" }, /* modifier */
                    485: { "figure",    0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    486: { "filename",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    487: { "firstname", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    488: { "firstterm", 0,      0,      0,      3,      0, "" }, /* word */
                    489: { "footnote",  0,      0,      0,      9,      0, "" }, /* footnote.mix */
                    490: { "footnoteref",0,     2,      1,      0,      0, "" },
                    491: { "foreignphrase",0,   0,      0,      2,      0, "" }, /* para */
                    492: { "formalpara",        0,      0,      0,      9,      0, "" }, /* title */
                    493: { "funcdef",   0,      0,      0,      1,      0, "" },
                    494: { "funcparams",        0,      0,      0,      9,      0, "" }, /* cptr */
                    495: { "funcprototype",0,   0,      0,      9,      0, "" }, /* funcdef */
                    496: { "funcsynopsis",0,    0,      0,      9,      0, "" }, /* funcsynopsisinfo */
                    497: { "funcsynopsisinfo",  0,      0,      0,      9,      0, "" }, /* cptr */
                    498: { "function",  0,      0,      0,      9,      0, "" }, /* cptr */
                    499: { "glossary",  0,      0,      0,      9,      0, "" }, /* glossaryinfo */
                    500: { "glossaryinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    501: { "glossdef",  0,      0,      0,      9,      0, "" }, /* glossdef.mix */
                    502: { "glossdiv",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    503: { "glossentry",        0,      0,      0,      9,      0, "" }, /* glossterm */
                    504: { "glosslist", 0,      0,      0,      9,      0, "" }, /* glossentry */
                    505: { "glossseealso",0,    0,      0,      2,      0, "" }, /* para */
                    506: { "glosssee",  0,      0,      0,      2,      0, "" }, /* para */
                    507: { "glossterm", 0,      0,      0,      2,      0, "" }, /* para */
                    508: { "graphic",   0,      2,      1,      0,      0, "" },
                    509: { "graphicco", 0,      0,      0,      9,      0, "" }, /* areaspec */
                    510: { "group",     0,      0,      0,      9,      0, "" }, /* arg */
                    511: { "guibutton", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    512: { "guiicon",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    513: { "guilabel",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    514: { "guimenuitem",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    515: { "guimenu",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    516: { "guisubmenu",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    517: { "hardware",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    518: { "highlights",        0,      0,      0,      9,      0, "" }, /* highlights.mix */
                    519: { "holder",    0,      0,      0,      4,      0, "" }, /* docinfo */
                    520: { "honorific", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    521: { "imagedata", 0,      2,      1,      0,      0, "" },
                    522: { "imageobjectco",0,   0,      0,      9,      0, "" }, /* areaspec */
                    523: { "imageobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    524: { "important", 0,      0,      0,      9,      0, "" }, /* title */
                    525: { "indexdiv",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    526: { "indexentry",        0,      0,      0,      9,      0, "" }, /* primaryie */
                    527: { "index",     0,      0,      0,      9,      0, "" }, /* indexinfo */
                    528: { "indexinfo", 0,      0,      0,      9,      0, "" }, /* graphic */
                    529: { "indexterm", 0,      0,      0,      9,      0, "" }, /* primary */
                    530: { "informalequation",0,        0,      0,      9,      0, "" }, /* equation.content */
                    531: { "informalexample",0, 0,      0,      9,      0, "" }, /* example.mix */
                    532: { "informalfigure",0,  0,      0,      9,      0, "" }, /* figure.mix */
                    533: { "informaltable",0,   0,      0,      9,      0, "" }, /* graphic */
                    534: { "initializer",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    535: { "inlineequation",0,  0,      0,      9,      0, "" }, /* inlineequation.content */
                    536: { "inlinegraphic",0,   2,      1,      0,      0, "" },
                    537: { "inlinemediaobject",0,0,     0,      9,      0, "" }, /* objectinfo */
                    538: { "interfacename",0,   0,      0,      7,      0, "" }, /* smallcptr */
                    539: { "interface", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    540: { "invpartnumber",0,   0,      0,      4,      0, "" }, /* docinfo */
                    541: { "isbn",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    542: { "issn",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    543: { "issuenum",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    544: { "itemizedlist",0,    0,      0,      9,      0, "" }, /* formalobject.title.content */
                    545: { "itermset",  0,      0,      0,      9,      0, "" }, /* indexterm */
                    546: { "jobtitle",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    547: { "keycap",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    548: { "keycode",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    549: { "keycombo",  0,      0,      0,      9,      0, "" }, /* keycap */
                    550: { "keysym",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    551: { "keyword",   0,      0,      0,      1,      0, "" },
                    552: { "keywordset",        0,      0,      0,      9,      0, "" }, /* keyword */
                    553: { "label",     0,      0,      0,      3,      0, "" }, /* word */
                    554: { "legalnotice",0,     0,      0,      9,      0, "" }, /* title */
                    555: { "lineage",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    556: { "lineannotation",0,  0,      0,      2,      0, "" }, /* para */
                    557: { "link",      0,      0,      0,      2,      0, "" }, /* para */
                    558: { "listitem",  0,      0,      0,      9,      0, "" }, /* component.mix */
                    559: { "literal",   0,      0,      0,      9,      0, "" }, /* cptr */
                    560: { "literallayout",0,   0,      0,      2,      0, "" }, /* para */
                    561: { "lot",       0,      0,      0,      9,      0, "" }, /* bookcomponent.title.content */
                    562: { "lotentry",  0,      0,      0,      2,      0, "" }, /* para */
                    563: { "manvolnum", 0,      0,      0,      3,      0, "" }, /* word */
                    564: { "markup",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    565: { "medialabel",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    566: { "mediaobjectco",0,   0,      0,      9,      0, "" }, /* objectinfo */
                    567: { "mediaobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    568: { "member",    0,      0,      0,      2,      0, "" }, /* para */
                    569: { "menuchoice",        0,      0,      0,      9,      0, "" }, /* shortcut */
                    570: { "methodname",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    571: { "methodparam",0,     0,      0,      9,      0, "" }, /* modifier */
                    572: { "methodsynopsis",0,  0,      0,      9,      0, "" }, /* modifier */
                    573: { "modespec",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    574: { "modifier",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    575: { "mousebutton",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    576: { "msgaud",    0,      0,      0,      2,      0, "" }, /* para */
                    577: { "msgentry",  0,      0,      0,      9,      0, "" }, /* msg */
                    578: { "msgexplan", 0,      0,      0,      9,      0, "" }, /* title */
                    579: { "msginfo",   0,      0,      0,      9,      0, "" }, /* msglevel */
                    580: { "msglevel",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    581: { "msgmain",   0,      0,      0,      9,      0, "" }, /* title */
                    582: { "msgorig",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    583: { "msgrel",    0,      0,      0,      9,      0, "" }, /* title */
                    584: { "msgset",    0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    585: { "msgsub",    0,      0,      0,      9,      0, "" }, /* title */
                    586: { "msgtext",   0,      0,      0,      9,      0, "" }, /* component.mix */
                    587: { "msg",       0,      0,      0,      9,      0, "" }, /* title */
                    588: { "note",      0,      0,      0,      9,      0, "" }, /* title */
                    589: { "objectinfo",        0,      0,      0,      9,      0, "" }, /* graphic */
                    590: { "olink",     0,      0,      0,      2,      0, "" }, /* para */
                    591: { "ooclass",   0,      0,      0,      9,      0, "" }, /* modifier */
                    592: { "ooexception",0,     0,      0,      9,      0, "" }, /* modifier */
                    593: { "oointerface",0,     0,      0,      9,      0, "" }, /* modifier */
                    594: { "optional",  0,      0,      0,      9,      0, "" }, /* cptr */
                    595: { "option",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    596: { "orderedlist",0,     0,      0,      9,      0, "" }, /* formalobject.title.content */
                    597: { "orgdiv",    0,      0,      0,      4,      0, "" }, /* docinfo */
                    598: { "orgname",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    599: { "otheraddr", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    600: { "othercredit",0,     0,      0,      9,      0, "" }, /* person.ident.mix */
                    601: { "othername", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    602: { "pagenums",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    603: { "paramdef",  0,      0,      0,      1,      0, "" },
                    604: { "parameter", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    605: { "para",      0,      0,      0,      2,      0, "" }, /* para */
                    606: { "partinfo",  0,      0,      0,      9,      0, "" }, /* graphic */
                    607: { "partintro", 0,      0,      0,      9,      0, "" }, /* div.title.content */
                    608: { "part",      0,      0,      0,      9,      0, "" }, /* partinfo */
                    609: { "phone",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    610: { "phrase",    0,      0,      0,      2,      0, "" }, /* para */
                    611: { "pob",       0,      0,      0,      4,      0, "" }, /* docinfo */
                    612: { "postcode",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    613: { "prefaceinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    614: { "preface",   0,      0,      0,      9,      0, "" }, /* prefaceinfo */
                    615: { "primaryie", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    616: { "primary  ", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    617: { "printhistory",0,    0,      0,      9,      0, "" }, /* para.class */
                    618: { "procedure", 0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    619: { "productname",0,     0,      0,      2,      0, "" }, /* para */
                    620: { "productnumber",0,   0,      0,      4,      0, "" }, /* docinfo */
                    621: { "programlistingco",0,        0,      0,      9,      0, "" }, /* areaspec */
                    622: { "programlisting",0,  0,      0,      2,      0, "" }, /* para */
                    623: { "prompt",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    624: { "property",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    625: { "pubdate",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    626: { "publishername",0,   0,      0,      4,      0, "" }, /* docinfo */
                    627: { "publisher", 0,      0,      0,      9,      0, "" }, /* publishername */
                    628: { "pubsnumber",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    629: { "qandadiv",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    630: { "qandaentry",        0,      0,      0,      9,      0, "" }, /* revhistory */
                    631: { "qandaset",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    632: { "question",  0,      0,      0,      9,      0, "" }, /* label */
                    633: { "quote",     0,      0,      0,      2,      0, "" }, /* para */
                    634: { "refclass",  0,      0,      0,      9,      0, "" }, /* refclass.char.mix */
                    635: { "refdescriptor",0,   0,      0,      9,      0, "" }, /* refname.char.mix */
                    636: { "refentryinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    637: { "refentry",  0,      0,      0,      9,      0, "" }, /* ndxterm.class */
                    638: { "refentrytitle",0,   0,      0,      2,      0, "" }, /* para */
                    639: { "referenceinfo",0,   0,      0,      9,      0, "" }, /* graphic */
                    640: { "reference", 0,      0,      0,      9,      0, "" }, /* referenceinfo */
                    641: { "refmeta",   0,      0,      0,      9,      0, "" }, /* ndxterm.class */
                    642: { "refmiscinfo",0,     0,      0,      4,      0, "" }, /* docinfo */
                    643: { "refnamediv",        0,      0,      0,      9,      0, "" }, /* refdescriptor */
                    644: { "refname",   0,      0,      0,      9,      0, "" }, /* refname.char.mix */
                    645: { "refpurpose",        0,      0,      0,      9,      0, "" }, /* refinline.char.mix */
                    646: { "refsect1info",0,    0,      0,      9,      0, "" }, /* graphic */
                    647: { "refsect1",  0,      0,      0,      9,      0, "" }, /* refsect */
                    648: { "refsect2info",0,    0,      0,      9,      0, "" }, /* graphic */
                    649: { "refsect2",  0,      0,      0,      9,      0, "" }, /* refsect */
                    650: { "refsect3info",0,    0,      0,      9,      0, "" }, /* graphic */
                    651: { "refsect3",  0,      0,      0,      9,      0, "" }, /* refsect */
                    652: { "refsynopsisdivinfo",0,0,    0,      9,      0, "" }, /* graphic */
                    653: { "refsynopsisdiv",0,  0,      0,      9,      0, "" }, /* refsynopsisdivinfo */
                    654: { "releaseinfo",0,     0,      0,      4,      0, "" }, /* docinfo */
                    655: { "remark",    0,      0,      0,      2,      0, "" }, /* para */
                    656: { "replaceable",0,     0,      0,      1,      0, "" },
                    657: { "returnvalue",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    658: { "revdescription",0,  0,      0,      9,      0, "" }, /* revdescription.mix */
                    659: { "revhistory",        0,      0,      0,      9,      0, "" }, /* revision */
                    660: { "revision",  0,      0,      0,      9,      0, "" }, /* revnumber */
                    661: { "revnumber", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    662: { "revremark", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    663: { "row",       0,      0,      0,      9,      0, "" }, /* tbl.row.mdl */
                    664: { "row",       0,      0,      0,      9,      0, "" }, /* tbl.row.mdl */
                    665: { "sbr",       0,      2,      1,      0,      0, "" },
                    666: { "screenco",  0,      0,      0,      9,      0, "" }, /* areaspec */
                    667: { "screeninfo",        0,      0,      0,      2,      0, "" }, /* para */
                    668: { "screen",    0,      0,      0,      2,      0, "" }, /* para */
                    669: { "screenshot",        0,      0,      0,      9,      0, "" }, /* screeninfo */
                    670: { "secondaryie",0,     0,      0,      4,      0, "" }, /* ndxterm */
                    671: { "secondary", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    672: { "sect1info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    673: { "sect1",     0,      0,      0,      9,      0, "" }, /* sect */
                    674: { "sect2info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    675: { "sect2",     0,      0,      0,      9,      0, "" }, /* sect */
                    676: { "sect3info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    677: { "sect3",     0,      0,      0,      9,      0, "" }, /* sect */
                    678: { "sect4info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    679: { "sect4",     0,      0,      0,      9,      0, "" }, /* sect */
                    680: { "sect5info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    681: { "sect5",     0,      0,      0,      9,      0, "" }, /* sect */
                    682: { "sectioninfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    683: { "section",   0,      0,      0,      9,      0, "" }, /* sectioninfo */
                    684: { "seealsoie", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    685: { "seealso",   0,      0,      0,      4,      0, "" }, /* ndxterm */
                    686: { "seeie",     0,      0,      0,      4,      0, "" }, /* ndxterm */
                    687: { "see",       0,      0,      0,      4,      0, "" }, /* ndxterm */
                    688: { "seglistitem",0,     0,      0,      9,      0, "" }, /* seg */
                    689: { "segmentedlist",0,   0,      0,      9,      0, "" }, /* formalobject.title.content */
                    690: { "seg",       0,      0,      0,      2,      0, "" }, /* para */
                    691: { "segtitle",  0,      0,      0,      8,      0, "" }, /* title */
                    692: { "seriesvolnums",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    693: { "set",       0,      0,      0,      9,      0, "" }, /* div.title.content */
                    694: { "setindexinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    695: { "setindex",  0,      0,      0,      9,      0, "" }, /* setindexinfo */
                    696: { "setinfo",   0,      0,      0,      9,      0, "" }, /* graphic */
                    697: { "sgmltag",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    698: { "shortaffil",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    699: { "shortcut",  0,      0,      0,      9,      0, "" }, /* keycap */
                    700: { "sidebarinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    701: { "sidebar",   0,      0,      0,      9,      0, "" }, /* sidebarinfo */
                    702: { "simpara",   0,      0,      0,      2,      0, "" }, /* para */
                    703: { "simplelist",        0,      0,      0,      9,      0, "" }, /* member */
                    704: { "simplemsgentry",    0,      0,      0,      9,      0, "" }, /* msgtext */
                    705: { "simplesect",        0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    706: { "spanspec",  0,      2,      1,      0,      0, "" },
                    707: { "state",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    708: { "step",      0,      0,      0,      9,      0, "" }, /* title */
                    709: { "street",    0,      0,      0,      4,      0, "" }, /* docinfo */
                    710: { "structfield",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    711: { "structname",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    712: { "subjectset",        0,      0,      0,      9,      0, "" }, /* subject */
                    713: { "subject",   0,      0,      0,      9,      0, "" }, /* subjectterm */
                    714: { "subjectterm",0,     0,      0,      1,      0, "" },
                    715: { "subscript", 0,      0,      0,      1,      0, "" },
                    716: { "substeps",  0,      0,      0,      9,      0, "" }, /* step */
                    717: { "subtitle",  0,      0,      0,      8,      0, "" }, /* title */
                    718: { "superscript",       0,      0,      0,      1,      0, "" },
                    719: { "surname",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    720: { "symbol",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    721: { "synopfragment",     0,      0,      0,      9,      0, "" }, /* arg */
                    722: { "synopfragmentref",  0,      0,      0,      1,      0, "" },
                    723: { "synopsis",  0,      0,      0,      2,      0, "" }, /* para */
                    724: { "systemitem",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    725: { "table",     0,      0,      0,      9,      0, "" }, /* tbl.table.mdl */
                    726: /* { "%tbl.table.name;",       0,      0,      0,      9,      0, "" },*/ /* tbl.table.mdl */
                    727: { "tbody",     0,      0,      0,      9,      0, "" }, /* row */
                    728: { "tbody",     0,      0,      0,      9,      0, "" }, /* row */
                    729: { "term",      0,      0,      0,      2,      0, "" }, /* para */
                    730: { "tertiaryie",        0,      0,      0,      4,      0, "" }, /* ndxterm */
                    731: { "tertiary ", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    732: { "textobject",        0,      0,      0,      9,      0, "" }, /* objectinfo */
                    733: { "tfoot",     0,      0,      0,      9,      0, "" }, /* tbl.hdft.mdl */
                    734: { "tgroup",    0,      0,      0,      9,      0, "" }, /* tbl.tgroup.mdl */
                    735: { "tgroup",    0,      0,      0,      9,      0, "" }, /* tbl.tgroup.mdl */
                    736: { "thead",     0,      0,      0,      9,      0, "" }, /* row */
                    737: { "thead",     0,      0,      0,      9,      0, "" }, /* tbl.hdft.mdl */
                    738: { "tip",       0,      0,      0,      9,      0, "" }, /* title */
                    739: { "titleabbrev",0,     0,      0,      8,      0, "" }, /* title */
                    740: { "title",     0,      0,      0,      8,      0, "" }, /* title */
                    741: { "tocback",   0,      0,      0,      2,      0, "" }, /* para */
                    742: { "toc",       0,      0,      0,      9,      0, "" }, /* bookcomponent.title.content */
                    743: { "tocchap",   0,      0,      0,      9,      0, "" }, /* tocentry */
                    744: { "tocentry",  0,      0,      0,      2,      0, "" }, /* para */
                    745: { "tocfront",  0,      0,      0,      2,      0, "" }, /* para */
                    746: { "toclevel1", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    747: { "toclevel2", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    748: { "toclevel3", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    749: { "toclevel4", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    750: { "toclevel5", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    751: { "tocpart",   0,      0,      0,      9,      0, "" }, /* tocentry */
                    752: { "token",     0,      0,      0,      7,      0, "" }, /* smallcptr */
                    753: { "trademark", 0,      0,      0,      1,      0, "" },
                    754: { "type",      0,      0,      0,      7,      0, "" }, /* smallcptr */
                    755: { "ulink",     0,      0,      0,      2,      0, "" }, /* para */
                    756: { "userinput", 0,      0,      0,      9,      0, "" }, /* cptr */
                    757: { "varargs",   0,      2,      1,      0,      0, "" },
                    758: { "variablelist",0,    0,      0,      9,      0, "" }, /* formalobject.title.content */
                    759: { "varlistentry",0,    0,      0,      9,      0, "" }, /* term */
                    760: { "varname",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    761: { "videodata", 0,      2,      1,      0,      0, "" },
                    762: { "videoobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    763: { "void",      0,      2,      1,      0,      0, "" },
                    764: { "volumenum", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    765: { "warning",   0,      0,      0,      9,      0, "" }, /* title */
                    766: { "wordasword",        0,      0,      0,      3,      0, "" }, /* word */
                    767: { "xref",      0,      2,      1,      0,      0, "" },
                    768: { "year",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    769: };
                    770: 
                    771: /*
                    772:  * start tags that imply the end of a current element
                    773:  * any tag of each line implies the end of the current element if the type of
                    774:  * that element is in the same line
                    775:  */
                    776: char *sgmlEquEnd[] = {
                    777: "dt", "dd", "li", "option", NULL,
                    778: "h1", "h2", "h3", "h4", "h5", "h6", NULL,
                    779: "ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL,
                    780: NULL
                    781: };
                    782: /*
                    783:  * acording the SGML DTD, HR should be added to the 2nd line above, as it
                    784:  * is not allowed within a H1, H2, H3, etc. But we should tolerate that case
                    785:  * because many documents contain rules in headings...
                    786:  */
                    787: 
                    788: /*
                    789:  * start tags that imply the end of current element
                    790:  */
                    791: char *sgmlStartClose[] = {
                    792: NULL
                    793: };
                    794: 
                    795: /*
                    796:  * The list of SGML elements which are supposed not to have
                    797:  * CDATA content and where a p element will be implied
                    798:  *
                    799:  * TODO: extend that list by reading the SGML SGML DtD on
                    800:  *       implied paragraph
                    801:  */
                    802: static char *sgmlNoContentElements[] = {
                    803:     NULL
                    804: };
                    805: 
                    806: 
                    807: static char** sgmlStartCloseIndex[100];
                    808: static int sgmlStartCloseIndexinitialized = 0;
                    809: 
                    810: /************************************************************************
                    811:  *                                                                     *
                    812:  *             functions to handle SGML specific data                  *
                    813:  *                                                                     *
                    814:  ************************************************************************/
                    815: 
                    816: /**
                    817:  * sgmlInitAutoClose:
                    818:  *
                    819:  * Initialize the sgmlStartCloseIndex for fast lookup of closing tags names.
                    820:  *
                    821:  */
                    822: void
                    823: sgmlInitAutoClose(void) {
                    824:     int index, i = 0;
                    825: 
                    826:     if (sgmlStartCloseIndexinitialized) return;
                    827: 
                    828:     for (index = 0;index < 100;index ++) sgmlStartCloseIndex[index] = NULL;
                    829:     index = 0;
                    830:     while ((sgmlStartClose[i] != NULL) && (index < 100 - 1)) {
                    831:         sgmlStartCloseIndex[index++] = &sgmlStartClose[i];
                    832:        while (sgmlStartClose[i] != NULL) i++;
                    833:        i++;
                    834:     }
                    835: }
                    836: 
                    837: /**
                    838:  * sgmlTagLookup:
                    839:  * @tag:  The tag name
                    840:  *
                    841:  * Lookup the SGML tag in the ElementTable
                    842:  *
                    843:  * Returns the related sgmlElemDescPtr or NULL if not found.
                    844:  */
                    845: sgmlElemDescPtr
                    846: sgmlTagLookup(const xmlChar *tag) {
                    847:     int i;
                    848: 
                    849:     for (i = 0; i < (sizeof(docbookElementTable) /
                    850:                      sizeof(docbookElementTable[0]));i++) {
1.7       veillard  851:         if (xmlStrEqual(tag, BAD_CAST docbookElementTable[i].name))
1.1       veillard  852:            return(&docbookElementTable[i]);
                    853:     }
                    854:     return(NULL);
                    855: }
                    856: 
                    857: /**
                    858:  * sgmlCheckAutoClose:
                    859:  * @newtag:  The new tag name
                    860:  * @oldtag:  The old tag name
                    861:  *
                    862:  * Checks wether the new tag is one of the registered valid tags for closing old.
                    863:  * Initialize the sgmlStartCloseIndex for fast lookup of closing tags names.
                    864:  *
                    865:  * Returns 0 if no, 1 if yes.
                    866:  */
                    867: int
                    868: sgmlCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) {
                    869:     int i, index;
                    870:     char **close;
                    871: 
                    872:     if (sgmlStartCloseIndexinitialized == 0) sgmlInitAutoClose();
                    873: 
                    874:     /* inefficient, but not a big deal */
                    875:     for (index = 0; index < 100;index++) {
                    876:         close = sgmlStartCloseIndex[index];
                    877:        if (close == NULL) return(0);
1.7       veillard  878:        if (xmlStrEqual(BAD_CAST *close, newtag)) break;
1.1       veillard  879:     }
                    880: 
                    881:     i = close - sgmlStartClose;
                    882:     i++;
                    883:     while (sgmlStartClose[i] != NULL) {
1.7       veillard  884:         if (xmlStrEqual(BAD_CAST sgmlStartClose[i], oldtag)) {
1.1       veillard  885:            return(1);
                    886:        }
                    887:        i++;
                    888:     }
                    889:     return(0);
                    890: }
                    891: 
                    892: /**
                    893:  * sgmlAutoCloseOnClose:
                    894:  * @ctxt:  an SGML parser context
                    895:  * @newtag:  The new tag name
                    896:  *
                    897:  * The HTmL DtD allows an ending tag to implicitely close other tags.
                    898:  */
                    899: void
                    900: sgmlAutoCloseOnClose(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
                    901:     sgmlElemDescPtr info;
                    902:     xmlChar *oldname;
                    903:     int i;
                    904: 
                    905:     if ((newtag[0] == '/') && (newtag[1] == 0))
                    906:        return;
                    907: 
                    908: #ifdef DEBUG
                    909:     fprintf(stderr,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
                    910:     for (i = 0;i < ctxt->nameNr;i++) 
                    911:         fprintf(stderr,"%d : %s\n", i, ctxt->nameTab[i]);
                    912: #endif
                    913: 
                    914:     for (i = (ctxt->nameNr - 1);i >= 0;i--) {
1.7       veillard  915:         if (xmlStrEqual(newtag, ctxt->nameTab[i])) break;
1.1       veillard  916:     }
                    917:     if (i < 0) return;
                    918: 
1.7       veillard  919:     while (!xmlStrEqual(newtag, ctxt->name)) {
1.1       veillard  920:        info = sgmlTagLookup(ctxt->name);
                    921:        if ((info == NULL) || (info->endTag == 1)) {
                    922: #ifdef DEBUG
                    923:            fprintf(stderr,"sgmlAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name);
                    924: #endif
                    925:         } else {
                    926:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                    927:                ctxt->sax->error(ctxt->userData,
                    928:                 "Opening and ending tag mismatch: %s and %s\n",
                    929:                                 newtag, ctxt->name);
                    930:            ctxt->wellFormed = 0;
                    931:        }
                    932:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                    933:            ctxt->sax->endElement(ctxt->userData, ctxt->name);
                    934:        oldname = sgmlnamePop(ctxt);
                    935:        if (oldname != NULL) {
                    936: #ifdef DEBUG
                    937:            fprintf(stderr,"sgmlAutoCloseOnClose: popped %s\n", oldname);
                    938: #endif
                    939:            xmlFree(oldname);
                    940:        }       
                    941:     }
                    942: }
                    943: 
                    944: /**
                    945:  * sgmlAutoClose:
                    946:  * @ctxt:  an SGML parser context
                    947:  * @newtag:  The new tag name or NULL
                    948:  *
                    949:  * The HTmL DtD allows a tag to implicitely close other tags.
                    950:  * The list is kept in sgmlStartClose array. This function is
                    951:  * called when a new tag has been detected and generates the
                    952:  * appropriates closes if possible/needed.
                    953:  * If newtag is NULL this mean we are at the end of the resource
                    954:  * and we should check 
                    955:  */
                    956: void
                    957: sgmlAutoClose(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
                    958:     xmlChar *oldname;
                    959:     while ((newtag != NULL) && (ctxt->name != NULL) && 
                    960:            (sgmlCheckAutoClose(newtag, ctxt->name))) {
                    961: #ifdef DEBUG
                    962:        fprintf(stderr,"sgmlAutoClose: %s closes %s\n", newtag, ctxt->name);
                    963: #endif
                    964:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                    965:            ctxt->sax->endElement(ctxt->userData, ctxt->name);
                    966:        oldname = sgmlnamePop(ctxt);
                    967:        if (oldname != NULL) {
                    968: #ifdef DEBUG
                    969:            fprintf(stderr,"sgmlAutoClose: popped %s\n", oldname);
                    970: #endif
                    971:            xmlFree(oldname);
                    972:         }
                    973:     }
                    974: #if 0
                    975:     if (newtag == NULL) {
                    976:        sgmlAutoCloseOnClose(ctxt, BAD_CAST"head");
                    977:        sgmlAutoCloseOnClose(ctxt, BAD_CAST"body");
                    978:        sgmlAutoCloseOnClose(ctxt, BAD_CAST"sgml");
                    979:     }
                    980:     while ((newtag == NULL) && (ctxt->name != NULL) &&
1.7       veillard  981:           ((xmlStrEqual(ctxt->name, BAD_CAST"head")) ||
                    982:            (xmlStrEqual(ctxt->name, BAD_CAST"body")) ||
                    983:            (xmlStrEqual(ctxt->name, BAD_CAST"sgml")))) {
1.1       veillard  984: #ifdef DEBUG
                    985:        fprintf(stderr,"sgmlAutoClose: EOF closes %s\n", ctxt->name);
                    986: #endif
                    987:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                    988:            ctxt->sax->endElement(ctxt->userData, ctxt->name);
                    989:        oldname = sgmlnamePop(ctxt);
                    990:        if (oldname != NULL) {
                    991: #ifdef DEBUG
                    992:            fprintf(stderr,"sgmlAutoClose: popped %s\n", oldname);
                    993: #endif
                    994:            xmlFree(oldname);
                    995:         }
                    996:    }
                    997: #endif
                    998: }
                    999: 
                   1000: /**
                   1001:  * sgmlAutoCloseTag:
                   1002:  * @doc:  the SGML document
                   1003:  * @name:  The tag name
                   1004:  * @elem:  the SGML element
                   1005:  *
                   1006:  * The HTmL DtD allows a tag to implicitely close other tags.
                   1007:  * The list is kept in sgmlStartClose array. This function checks
                   1008:  * if the element or one of it's children would autoclose the
                   1009:  * given tag.
                   1010:  *
                   1011:  * Returns 1 if autoclose, 0 otherwise
                   1012:  */
                   1013: int
                   1014: sgmlAutoCloseTag(sgmlDocPtr doc, const xmlChar *name, sgmlNodePtr elem) {
                   1015:     sgmlNodePtr child;
                   1016: 
                   1017:     if (elem == NULL) return(1);
1.7       veillard 1018:     if (xmlStrEqual(name, elem->name)) return(0);
1.1       veillard 1019:     if (sgmlCheckAutoClose(elem->name, name)) return(1);
                   1020:     child = elem->children;
                   1021:     while (child != NULL) {
                   1022:         if (sgmlAutoCloseTag(doc, name, child)) return(1);
                   1023:        child = child->next;
                   1024:     }
                   1025:     return(0);
                   1026: }
                   1027: 
                   1028: /**
                   1029:  * sgmlIsAutoClosed:
                   1030:  * @doc:  the SGML document
                   1031:  * @elem:  the SGML element
                   1032:  *
                   1033:  * The HTmL DtD allows a tag to implicitely close other tags.
                   1034:  * The list is kept in sgmlStartClose array. This function checks
                   1035:  * if a tag is autoclosed by one of it's child
                   1036:  *
                   1037:  * Returns 1 if autoclosed, 0 otherwise
                   1038:  */
                   1039: int
                   1040: sgmlIsAutoClosed(sgmlDocPtr doc, sgmlNodePtr elem) {
                   1041:     sgmlNodePtr child;
                   1042: 
                   1043:     if (elem == NULL) return(1);
                   1044:     child = elem->children;
                   1045:     while (child != NULL) {
                   1046:        if (sgmlAutoCloseTag(doc, elem->name, child)) return(1);
                   1047:        child = child->next;
                   1048:     }
                   1049:     return(0);
                   1050: }
                   1051: 
                   1052: /**
                   1053:  * sgmlCheckImplied:
                   1054:  * @ctxt:  an SGML parser context
                   1055:  * @newtag:  The new tag name
                   1056:  *
                   1057:  * The HTmL DtD allows a tag to exists only implicitely
                   1058:  * called when a new tag has been detected and generates the
                   1059:  * appropriates implicit tags if missing
                   1060:  */
                   1061: void
                   1062: sgmlCheckImplied(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
                   1063: #if 0
1.7       veillard 1064:     if (xmlStrEqual(newtag, BAD_CAST"sgml"))
1.1       veillard 1065:        return;
                   1066:     if (ctxt->nameNr <= 0) {
                   1067: #ifdef DEBUG
                   1068:        fprintf(stderr,"Implied element sgml: pushed sgml\n");
                   1069: #endif    
                   1070:        sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"sgml"));
                   1071:        if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1072:            ctxt->sax->startElement(ctxt->userData, BAD_CAST"sgml", NULL);
                   1073:     }
1.7       veillard 1074:     if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head")))
1.1       veillard 1075:         return;
                   1076:     if (ctxt->nameNr <= 1) {
1.7       veillard 1077:        if ((xmlStrEqual(newtag, BAD_CAST"script")) ||
                   1078:            (xmlStrEqual(newtag, BAD_CAST"style")) ||
                   1079:            (xmlStrEqual(newtag, BAD_CAST"meta")) ||
                   1080:            (xmlStrEqual(newtag, BAD_CAST"link")) ||
                   1081:            (xmlStrEqual(newtag, BAD_CAST"title")) ||
                   1082:            (xmlStrEqual(newtag, BAD_CAST"base"))) {
1.1       veillard 1083:            /* 
                   1084:             * dropped OBJECT ... i you put it first BODY will be
                   1085:             * assumed !
                   1086:             */
                   1087: #ifdef DEBUG
                   1088:            fprintf(stderr,"Implied element head: pushed head\n");
                   1089: #endif    
                   1090:            sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"head"));
                   1091:            if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1092:                ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
                   1093:        } else {
                   1094: #ifdef DEBUG
                   1095:            fprintf(stderr,"Implied element body: pushed body\n");
                   1096: #endif    
                   1097:            sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"body"));
                   1098:            if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1099:                ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
                   1100:        }
                   1101:     }
                   1102: #endif
                   1103: }
                   1104: 
                   1105: /**
                   1106:  * sgmlCheckParagraph
                   1107:  * @ctxt:  an SGML parser context
                   1108:  *
                   1109:  * Check whether a p element need to be implied before inserting
                   1110:  * characters in the current element.
                   1111:  *
                   1112:  * Returns 1 if a paragraph has been inserted, 0 if not and -1
                   1113:  *         in case of error.
                   1114:  */
                   1115: 
                   1116: int
                   1117: sgmlCheckParagraph(sgmlParserCtxtPtr ctxt) {
                   1118:     const xmlChar *tag;
                   1119:     int i;
                   1120: 
                   1121:     if (ctxt == NULL)
                   1122:        return(-1);
                   1123:     tag = ctxt->name;
                   1124:     if (tag == NULL) {
                   1125:        sgmlAutoClose(ctxt, BAD_CAST"p");
                   1126:        sgmlCheckImplied(ctxt, BAD_CAST"p");
                   1127:        sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
                   1128:        if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1129:            ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
                   1130:        return(1);
                   1131:     }
                   1132:     for (i = 0; sgmlNoContentElements[i] != NULL; i++) {
1.7       veillard 1133:        if (xmlStrEqual(tag, BAD_CAST sgmlNoContentElements[i])) {
1.1       veillard 1134: #ifdef DEBUG
                   1135:            fprintf(stderr,"Implied element paragraph\n");
                   1136: #endif    
                   1137:            sgmlAutoClose(ctxt, BAD_CAST"p");
                   1138:            sgmlCheckImplied(ctxt, BAD_CAST"p");
                   1139:            sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
                   1140:            if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1141:                ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
                   1142:            return(1);
                   1143:        }
                   1144:     }
                   1145:     return(0);
                   1146: }
                   1147: 
                   1148: /************************************************************************
                   1149:  *                                                                     *
                   1150:  *             The list of SGML predefined entities                    *
                   1151:  *                                                                     *
                   1152:  ************************************************************************/
                   1153: 
                   1154: 
                   1155: sgmlEntityDesc  docbookEntitiesTable[] = {
                   1156: /*
                   1157:  * the 4 absolute ones, plus apostrophe.
                   1158:  */
                   1159: { 0x0026, "amp", "AMPERSAND" },
                   1160: { 0x003C, "lt",        "LESS-THAN SIGN" },
                   1161: 
                   1162: /*
                   1163:  * Converted with VI macros from docbook ent files
                   1164:  */
                   1165: { 0x0021, "excl", "EXCLAMATION MARK" },
                   1166: { 0x0022, "quot", "QUOTATION MARK" },
                   1167: { 0x0023, "num", "NUMBER SIGN" },
                   1168: { 0x0024, "dollar", "DOLLAR SIGN" },
                   1169: { 0x0025, "percnt", "PERCENT SIGN" },
                   1170: { 0x0027, "apos", "APOSTROPHE" },
                   1171: { 0x0028, "lpar", "LEFT PARENTHESIS" },
                   1172: { 0x0029, "rpar", "RIGHT PARENTHESIS" },
                   1173: { 0x002A, "ast", "ASTERISK OPERATOR" },
                   1174: { 0x002B, "plus", "PLUS SIGN" },
                   1175: { 0x002C, "comma", "COMMA" },
                   1176: { 0x002D, "hyphen", "HYPHEN-MINUS" },
                   1177: { 0x002E, "period", "FULL STOP" },
                   1178: { 0x002F, "sol", "SOLIDUS" },
                   1179: { 0x003A, "colon", "COLON" },
                   1180: { 0x003B, "semi", "SEMICOLON" },
                   1181: { 0x003D, "equals", "EQUALS SIGN" },
                   1182: { 0x003E, "gt", "GREATER-THAN SIGN" },
                   1183: { 0x003F, "quest", "QUESTION MARK" },
                   1184: { 0x0040, "commat", "COMMERCIAL AT" },
                   1185: { 0x005B, "lsqb", "LEFT SQUARE BRACKET" },
                   1186: { 0x005C, "bsol", "REVERSE SOLIDUS" },
                   1187: { 0x005D, "rsqb", "RIGHT SQUARE BRACKET" },
                   1188: { 0x005E, "circ", "RING OPERATOR" },
                   1189: { 0x005F, "lowbar", "LOW LINE" },
                   1190: { 0x0060, "grave", "GRAVE ACCENT" },
                   1191: { 0x007B, "lcub", "LEFT CURLY BRACKET" },
                   1192: { 0x007C, "verbar", "VERTICAL LINE" },
                   1193: { 0x007D, "rcub", "RIGHT CURLY BRACKET" },
                   1194: { 0x00A0, "nbsp", "NO-BREAK SPACE" },
                   1195: { 0x00A1, "iexcl", "INVERTED EXCLAMATION MARK" },
                   1196: { 0x00A2, "cent", "CENT SIGN" },
                   1197: { 0x00A3, "pound", "POUND SIGN" },
                   1198: { 0x00A4, "curren", "CURRENCY SIGN" },
                   1199: { 0x00A5, "yen", "YEN SIGN" },
                   1200: { 0x00A6, "brvbar", "BROKEN BAR" },
                   1201: { 0x00A7, "sect", "SECTION SIGN" },
                   1202: { 0x00A8, "die", "" },
                   1203: { 0x00A8, "Dot", "" },
                   1204: { 0x00A8, "uml", "" },
                   1205: { 0x00A9, "copy", "COPYRIGHT SIGN" },
                   1206: { 0x00AA, "ordf", "FEMININE ORDINAL INDICATOR" },
                   1207: { 0x00AB, "laquo", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK" },
                   1208: { 0x00AC, "not", "NOT SIGN" },
                   1209: { 0x00AD, "shy", "SOFT HYPHEN" },
                   1210: { 0x00AE, "reg", "REG TRADE MARK SIGN" },
                   1211: { 0x00AF, "macr", "MACRON" },
                   1212: { 0x00B0, "deg", "DEGREE SIGN" },
                   1213: { 0x00B1, "plusmn", "PLUS-MINUS SIGN" },
                   1214: { 0x00B2, "sup2", "SUPERSCRIPT TWO" },
                   1215: { 0x00B3, "sup3", "SUPERSCRIPT THREE" },
                   1216: { 0x00B4, "acute", "ACUTE ACCENT" },
                   1217: { 0x00B5, "micro", "MICRO SIGN" },
                   1218: { 0x00B6, "para", "PILCROW SIGN" },
                   1219: { 0x00B7, "middot", "MIDDLE DOT" },
                   1220: { 0x00B8, "cedil", "CEDILLA" },
                   1221: { 0x00B9, "sup1", "SUPERSCRIPT ONE" },
                   1222: { 0x00BA, "ordm", "MASCULINE ORDINAL INDICATOR" },
                   1223: { 0x00BB, "raquo", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" },
                   1224: { 0x00BC, "frac14", "VULGAR FRACTION ONE QUARTER" },
                   1225: { 0x00BD, "frac12", "VULGAR FRACTION ONE HALF" },
                   1226: { 0x00BD, "half", "VULGAR FRACTION ONE HALF" },
                   1227: { 0x00BE, "frac34", "VULGAR FRACTION THREE QUARTERS" },
                   1228: { 0x00BF, "iquest", "INVERTED QUESTION MARK" },
                   1229: { 0x00C0, "Agrave", "LATIN CAPITAL LETTER A WITH GRAVE" },
                   1230: { 0x00C1, "Aacute", "LATIN CAPITAL LETTER A WITH ACUTE" },
                   1231: { 0x00C2, "Acirc", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX" },
                   1232: { 0x00C3, "Atilde", "LATIN CAPITAL LETTER A WITH TILDE" },
                   1233: { 0x00C4, "Auml", "LATIN CAPITAL LETTER A WITH DIAERESIS" },
                   1234: { 0x00C5, "Aring", "LATIN CAPITAL LETTER A WITH RING ABOVE" },
                   1235: { 0x00C6, "AElig", "LATIN CAPITAL LETTER AE" },
                   1236: { 0x00C7, "Ccedil", "LATIN CAPITAL LETTER C WITH CEDILLA" },
                   1237: { 0x00C8, "Egrave", "LATIN CAPITAL LETTER E WITH GRAVE" },
                   1238: { 0x00C9, "Eacute", "LATIN CAPITAL LETTER E WITH ACUTE" },
                   1239: { 0x00CA, "Ecirc", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX" },
                   1240: { 0x00CB, "Euml", "LATIN CAPITAL LETTER E WITH DIAERESIS" },
                   1241: { 0x00CC, "Igrave", "LATIN CAPITAL LETTER I WITH GRAVE" },
                   1242: { 0x00CD, "Iacute", "LATIN CAPITAL LETTER I WITH ACUTE" },
                   1243: { 0x00CE, "Icirc", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX" },
                   1244: { 0x00CF, "Iuml", "LATIN CAPITAL LETTER I WITH DIAERESIS" },
                   1245: { 0x00D0, "ETH", "LATIN CAPITAL LETTER ETH" },
                   1246: { 0x00D1, "Ntilde", "LATIN CAPITAL LETTER N WITH TILDE" },
                   1247: { 0x00D2, "Ograve", "LATIN CAPITAL LETTER O WITH GRAVE" },
                   1248: { 0x00D3, "Oacute", "LATIN CAPITAL LETTER O WITH ACUTE" },
                   1249: { 0x00D4, "Ocirc", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX" },
                   1250: { 0x00D5, "Otilde", "LATIN CAPITAL LETTER O WITH TILDE" },
                   1251: { 0x00D6, "Ouml", "LATIN CAPITAL LETTER O WITH DIAERESIS" },
                   1252: { 0x00D7, "times", "MULTIPLICATION SIGN" },
                   1253: { 0x00D8, "Oslash", "LATIN CAPITAL LETTER O WITH STROKE" },
                   1254: { 0x00D9, "Ugrave", "LATIN CAPITAL LETTER U WITH GRAVE" },
                   1255: { 0x00DA, "Uacute", "LATIN CAPITAL LETTER U WITH ACUTE" },
                   1256: { 0x00DB, "Ucirc", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX" },
                   1257: { 0x00DC, "Uuml", "LATIN CAPITAL LETTER U WITH DIAERESIS" },
                   1258: { 0x00DD, "Yacute", "LATIN CAPITAL LETTER Y WITH ACUTE" },
                   1259: { 0x00DE, "THORN", "LATIN CAPITAL LETTER THORN" },
                   1260: { 0x00DF, "szlig", "LATIN SMALL LETTER SHARP S" },
                   1261: { 0x00E0, "agrave", "LATIN SMALL LETTER A WITH GRAVE" },
                   1262: { 0x00E1, "aacute", "LATIN SMALL LETTER A WITH ACUTE" },
                   1263: { 0x00E2, "acirc", "LATIN SMALL LETTER A WITH CIRCUMFLEX" },
                   1264: { 0x00E3, "atilde", "LATIN SMALL LETTER A WITH TILDE" },
                   1265: { 0x00E4, "auml", "LATIN SMALL LETTER A WITH DIAERESIS" },
                   1266: { 0x00E5, "aring", "LATIN SMALL LETTER A WITH RING ABOVE" },
                   1267: { 0x00E6, "aelig", "LATIN SMALL LETTER AE" },
                   1268: { 0x00E7, "ccedil", "LATIN SMALL LETTER C WITH CEDILLA" },
                   1269: { 0x00E8, "egrave", "LATIN SMALL LETTER E WITH GRAVE" },
                   1270: { 0x00E9, "eacute", "LATIN SMALL LETTER E WITH ACUTE" },
                   1271: { 0x00EA, "ecirc", "LATIN SMALL LETTER E WITH CIRCUMFLEX" },
                   1272: { 0x00EB, "euml", "LATIN SMALL LETTER E WITH DIAERESIS" },
                   1273: { 0x00EC, "igrave", "LATIN SMALL LETTER I WITH GRAVE" },
                   1274: { 0x00ED, "iacute", "LATIN SMALL LETTER I WITH ACUTE" },
                   1275: { 0x00EE, "icirc", "LATIN SMALL LETTER I WITH CIRCUMFLEX" },
                   1276: { 0x00EF, "iuml", "LATIN SMALL LETTER I WITH DIAERESIS" },
                   1277: { 0x00F0, "eth", "LATIN SMALL LETTER ETH" },
                   1278: { 0x00F1, "ntilde", "LATIN SMALL LETTER N WITH TILDE" },
                   1279: { 0x00F2, "ograve", "LATIN SMALL LETTER O WITH GRAVE" },
                   1280: { 0x00F3, "oacute", "LATIN SMALL LETTER O WITH ACUTE" },
                   1281: { 0x00F4, "ocirc", "LATIN SMALL LETTER O WITH CIRCUMFLEX" },
                   1282: { 0x00F5, "otilde", "LATIN SMALL LETTER O WITH TILDE" },
                   1283: { 0x00F6, "ouml", "LATIN SMALL LETTER O WITH DIAERESIS" },
                   1284: { 0x00F7, "divide", "DIVISION SIGN" },
                   1285: { 0x00F8, "oslash", "CIRCLED DIVISION SLASH" },
                   1286: { 0x00F9, "ugrave", "LATIN SMALL LETTER U WITH GRAVE" },
                   1287: { 0x00FA, "uacute", "LATIN SMALL LETTER U WITH ACUTE" },
                   1288: { 0x00FB, "ucirc", "LATIN SMALL LETTER U WITH CIRCUMFLEX" },
                   1289: { 0x00FC, "uuml", "LATIN SMALL LETTER U WITH DIAERESIS" },
                   1290: { 0x00FD, "yacute", "LATIN SMALL LETTER Y WITH ACUTE" },
                   1291: { 0x00FE, "thorn", "LATIN SMALL LETTER THORN" },
                   1292: { 0x00FF, "yuml", "LATIN SMALL LETTER Y WITH DIAERESIS" },
                   1293: { 0x0100, "Amacr", "LATIN CAPITAL LETTER A WITH MACRON" },
                   1294: { 0x0101, "amacr", "LATIN SMALL LETTER A WITH MACRON" },
                   1295: { 0x0102, "Abreve", "LATIN CAPITAL LETTER A WITH BREVE" },
                   1296: { 0x0103, "abreve", "LATIN SMALL LETTER A WITH BREVE" },
                   1297: { 0x0104, "Aogon", "LATIN CAPITAL LETTER A WITH OGONEK" },
                   1298: { 0x0105, "aogon", "LATIN SMALL LETTER A WITH OGONEK" },
                   1299: { 0x0106, "Cacute", "LATIN CAPITAL LETTER C WITH ACUTE" },
                   1300: { 0x0107, "cacute", "LATIN SMALL LETTER C WITH ACUTE" },
                   1301: { 0x0108, "Ccirc", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX" },
                   1302: { 0x0109, "ccirc", "LATIN SMALL LETTER C WITH CIRCUMFLEX" },
                   1303: { 0x010A, "Cdot", "LATIN CAPITAL LETTER C WITH DOT ABOVE" },
                   1304: { 0x010B, "cdot", "DOT OPERATOR" },
                   1305: { 0x010C, "Ccaron", "LATIN CAPITAL LETTER C WITH CARON" },
                   1306: { 0x010D, "ccaron", "LATIN SMALL LETTER C WITH CARON" },
                   1307: { 0x010E, "Dcaron", "LATIN CAPITAL LETTER D WITH CARON" },
                   1308: { 0x010F, "dcaron", "LATIN SMALL LETTER D WITH CARON" },
                   1309: { 0x0110, "Dstrok", "LATIN CAPITAL LETTER D WITH STROKE" },
                   1310: { 0x0111, "dstrok", "LATIN SMALL LETTER D WITH STROKE" },
                   1311: { 0x0112, "Emacr", "LATIN CAPITAL LETTER E WITH MACRON" },
                   1312: { 0x0113, "emacr", "LATIN SMALL LETTER E WITH MACRON" },
                   1313: { 0x0116, "Edot", "LATIN CAPITAL LETTER E WITH DOT ABOVE" },
                   1314: { 0x0117, "edot", "LATIN SMALL LETTER E WITH DOT ABOVE" },
                   1315: { 0x0118, "Eogon", "LATIN CAPITAL LETTER E WITH OGONEK" },
                   1316: { 0x0119, "eogon", "LATIN SMALL LETTER E WITH OGONEK" },
                   1317: { 0x011A, "Ecaron", "LATIN CAPITAL LETTER E WITH CARON" },
                   1318: { 0x011B, "ecaron", "LATIN SMALL LETTER E WITH CARON" },
                   1319: { 0x011C, "Gcirc", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX" },
                   1320: { 0x011D, "gcirc", "LATIN SMALL LETTER G WITH CIRCUMFLEX" },
                   1321: { 0x011E, "Gbreve", "LATIN CAPITAL LETTER G WITH BREVE" },
                   1322: { 0x011F, "gbreve", "LATIN SMALL LETTER G WITH BREVE" },
                   1323: { 0x0120, "Gdot", "LATIN CAPITAL LETTER G WITH DOT ABOVE" },
                   1324: { 0x0121, "gdot", "LATIN SMALL LETTER G WITH DOT ABOVE" },
                   1325: { 0x0122, "Gcedil", "LATIN CAPITAL LETTER G WITH CEDILLA" },
                   1326: { 0x0124, "Hcirc", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX" },
                   1327: { 0x0125, "hcirc", "LATIN SMALL LETTER H WITH CIRCUMFLEX" },
                   1328: { 0x0126, "Hstrok", "LATIN CAPITAL LETTER H WITH STROKE" },
                   1329: { 0x0127, "hstrok", "LATIN SMALL LETTER H WITH STROKE" },
                   1330: { 0x0128, "Itilde", "LATIN CAPITAL LETTER I WITH TILDE" },
                   1331: { 0x0129, "itilde", "LATIN SMALL LETTER I WITH TILDE" },
                   1332: { 0x012A, "Imacr", "LATIN CAPITAL LETTER I WITH MACRON" },
                   1333: { 0x012B, "imacr", "LATIN SMALL LETTER I WITH MACRON" },
                   1334: { 0x012E, "Iogon", "LATIN CAPITAL LETTER I WITH OGONEK" },
                   1335: { 0x012F, "iogon", "LATIN SMALL LETTER I WITH OGONEK" },
                   1336: { 0x0130, "Idot", "LATIN CAPITAL LETTER I WITH DOT ABOVE" },
                   1337: { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
                   1338: { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
                   1339: { 0x0132, "IJlig", "LATIN CAPITAL LIGATURE IJ" },
                   1340: { 0x0133, "ijlig", "LATIN SMALL LIGATURE IJ" },
                   1341: { 0x0134, "Jcirc", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX" },
                   1342: { 0x0135, "jcirc", "LATIN SMALL LETTER J WITH CIRCUMFLEX" },
                   1343: { 0x0136, "Kcedil", "LATIN CAPITAL LETTER K WITH CEDILLA" },
                   1344: { 0x0137, "kcedil", "LATIN SMALL LETTER K WITH CEDILLA" },
                   1345: { 0x0138, "kgreen", "LATIN SMALL LETTER KRA" },
                   1346: { 0x0139, "Lacute", "LATIN CAPITAL LETTER L WITH ACUTE" },
                   1347: { 0x013A, "lacute", "LATIN SMALL LETTER L WITH ACUTE" },
                   1348: { 0x013B, "Lcedil", "LATIN CAPITAL LETTER L WITH CEDILLA" },
                   1349: { 0x013C, "lcedil", "LATIN SMALL LETTER L WITH CEDILLA" },
                   1350: { 0x013D, "Lcaron", "LATIN CAPITAL LETTER L WITH CARON" },
                   1351: { 0x013E, "lcaron", "LATIN SMALL LETTER L WITH CARON" },
                   1352: { 0x013F, "Lmidot", "LATIN CAPITAL LETTER L WITH MIDDLE DOT" },
                   1353: { 0x0140, "lmidot", "LATIN SMALL LETTER L WITH MIDDLE DOT" },
                   1354: { 0x0141, "Lstrok", "LATIN CAPITAL LETTER L WITH STROKE" },
                   1355: { 0x0142, "lstrok", "LATIN SMALL LETTER L WITH STROKE" },
                   1356: { 0x0143, "Nacute", "LATIN CAPITAL LETTER N WITH ACUTE" },
                   1357: { 0x0144, "nacute", "LATIN SMALL LETTER N WITH ACUTE" },
                   1358: { 0x0145, "Ncedil", "LATIN CAPITAL LETTER N WITH CEDILLA" },
                   1359: { 0x0146, "ncedil", "LATIN SMALL LETTER N WITH CEDILLA" },
                   1360: { 0x0147, "Ncaron", "LATIN CAPITAL LETTER N WITH CARON" },
                   1361: { 0x0148, "ncaron", "LATIN SMALL LETTER N WITH CARON" },
                   1362: { 0x0149, "napos", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE" },
                   1363: { 0x014A, "ENG", "LATIN CAPITAL LETTER ENG" },
                   1364: { 0x014B, "eng", "LATIN SMALL LETTER ENG" },
                   1365: { 0x014C, "Omacr", "LATIN CAPITAL LETTER O WITH MACRON" },
                   1366: { 0x014D, "omacr", "LATIN SMALL LETTER O WITH MACRON" },
                   1367: { 0x0150, "Odblac", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE" },
                   1368: { 0x0151, "odblac", "LATIN SMALL LETTER O WITH DOUBLE ACUTE" },
                   1369: { 0x0152, "OElig", "LATIN CAPITAL LIGATURE OE" },
                   1370: { 0x0153, "oelig", "LATIN SMALL LIGATURE OE" },
                   1371: { 0x0154, "Racute", "LATIN CAPITAL LETTER R WITH ACUTE" },
                   1372: { 0x0155, "racute", "LATIN SMALL LETTER R WITH ACUTE" },
                   1373: { 0x0156, "Rcedil", "LATIN CAPITAL LETTER R WITH CEDILLA" },
                   1374: { 0x0157, "rcedil", "LATIN SMALL LETTER R WITH CEDILLA" },
                   1375: { 0x0158, "Rcaron", "LATIN CAPITAL LETTER R WITH CARON" },
                   1376: { 0x0159, "rcaron", "LATIN SMALL LETTER R WITH CARON" },
                   1377: { 0x015A, "Sacute", "LATIN CAPITAL LETTER S WITH ACUTE" },
                   1378: { 0x015B, "sacute", "LATIN SMALL LETTER S WITH ACUTE" },
                   1379: { 0x015C, "Scirc", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX" },
                   1380: { 0x015D, "scirc", "LATIN SMALL LETTER S WITH CIRCUMFLEX" },
                   1381: { 0x015E, "Scedil", "LATIN CAPITAL LETTER S WITH CEDILLA" },
                   1382: { 0x015F, "scedil", "LATIN SMALL LETTER S WITH CEDILLA" },
                   1383: { 0x0160, "Scaron", "LATIN CAPITAL LETTER S WITH CARON" },
                   1384: { 0x0161, "scaron", "LATIN SMALL LETTER S WITH CARON" },
                   1385: { 0x0162, "Tcedil", "LATIN CAPITAL LETTER T WITH CEDILLA" },
                   1386: { 0x0163, "tcedil", "LATIN SMALL LETTER T WITH CEDILLA" },
                   1387: { 0x0164, "Tcaron", "LATIN CAPITAL LETTER T WITH CARON" },
                   1388: { 0x0165, "tcaron", "LATIN SMALL LETTER T WITH CARON" },
                   1389: { 0x0166, "Tstrok", "LATIN CAPITAL LETTER T WITH STROKE" },
                   1390: { 0x0167, "tstrok", "LATIN SMALL LETTER T WITH STROKE" },
                   1391: { 0x0168, "Utilde", "LATIN CAPITAL LETTER U WITH TILDE" },
                   1392: { 0x0169, "utilde", "LATIN SMALL LETTER U WITH TILDE" },
                   1393: { 0x016A, "Umacr", "LATIN CAPITAL LETTER U WITH MACRON" },
                   1394: { 0x016B, "umacr", "LATIN SMALL LETTER U WITH MACRON" },
                   1395: { 0x016C, "Ubreve", "LATIN CAPITAL LETTER U WITH BREVE" },
                   1396: { 0x016D, "ubreve", "LATIN SMALL LETTER U WITH BREVE" },
                   1397: { 0x016E, "Uring", "LATIN CAPITAL LETTER U WITH RING ABOVE" },
                   1398: { 0x016F, "uring", "LATIN SMALL LETTER U WITH RING ABOVE" },
                   1399: { 0x0170, "Udblac", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE" },
                   1400: { 0x0171, "udblac", "LATIN SMALL LETTER U WITH DOUBLE ACUTE" },
                   1401: { 0x0172, "Uogon", "LATIN CAPITAL LETTER U WITH OGONEK" },
                   1402: { 0x0173, "uogon", "LATIN SMALL LETTER U WITH OGONEK" },
                   1403: { 0x0174, "Wcirc", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX" },
                   1404: { 0x0175, "wcirc", "LATIN SMALL LETTER W WITH CIRCUMFLEX" },
                   1405: { 0x0176, "Ycirc", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX" },
                   1406: { 0x0177, "ycirc", "LATIN SMALL LETTER Y WITH CIRCUMFLEX" },
                   1407: { 0x0178, "Yuml", "LATIN CAPITAL LETTER Y WITH DIAERESIS" },
                   1408: { 0x0179, "Zacute", "LATIN CAPITAL LETTER Z WITH ACUTE" },
                   1409: { 0x017A, "zacute", "LATIN SMALL LETTER Z WITH ACUTE" },
                   1410: { 0x017B, "Zdot", "LATIN CAPITAL LETTER Z WITH DOT ABOVE" },
                   1411: { 0x017C, "zdot", "LATIN SMALL LETTER Z WITH DOT ABOVE" },
                   1412: { 0x017D, "Zcaron", "LATIN CAPITAL LETTER Z WITH CARON" },
                   1413: { 0x017E, "zcaron", "LATIN SMALL LETTER Z WITH CARON" },
                   1414: { 0x0192, "fnof", "LATIN SMALL LETTER F WITH HOOK" },
                   1415: { 0x01F5, "gacute", "LATIN SMALL LETTER G WITH ACUTE" },
                   1416: { 0x02C7, "caron", "CARON" },
                   1417: { 0x02D8, "breve", "BREVE" },
                   1418: { 0x02D9, "dot", "DOT ABOVE" },
                   1419: { 0x02DA, "ring", "RING ABOVE" },
                   1420: { 0x02DB, "ogon", "OGONEK" },
                   1421: { 0x02DC, "tilde", "TILDE" },
                   1422: { 0x02DD, "dblac", "DOUBLE ACUTE ACCENT" },
                   1423: { 0x0386, "Aacgr", "GREEK CAPITAL LETTER ALPHA WITH TONOS" },
                   1424: { 0x0388, "Eacgr", "GREEK CAPITAL LETTER EPSILON WITH TONOS" },
                   1425: { 0x0389, "EEacgr", "GREEK CAPITAL LETTER ETA WITH TONOS" },
                   1426: { 0x038A, "Iacgr", "GREEK CAPITAL LETTER IOTA WITH TONOS" },
                   1427: { 0x038C, "Oacgr", "GREEK CAPITAL LETTER OMICRON WITH TONOS" },
                   1428: { 0x038E, "Uacgr", "GREEK CAPITAL LETTER UPSILON WITH TONOS" },
                   1429: { 0x038F, "OHacgr", "GREEK CAPITAL LETTER OMEGA WITH TONOS" },
                   1430: { 0x0390, "idiagr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS" },
                   1431: { 0x0391, "Agr", "GREEK CAPITAL LETTER ALPHA" },
                   1432: { 0x0392, "Bgr", "GREEK CAPITAL LETTER BETA" },
                   1433: { 0x0393, "b.Gamma", "GREEK CAPITAL LETTER GAMMA" },
                   1434: { 0x0393, "Gamma", "GREEK CAPITAL LETTER GAMMA" },
                   1435: { 0x0393, "Ggr", "GREEK CAPITAL LETTER GAMMA" },
                   1436: { 0x0394, "b.Delta", "GREEK CAPITAL LETTER DELTA" },
                   1437: { 0x0394, "Delta", "GREEK CAPITAL LETTER DELTA" },
                   1438: { 0x0394, "Dgr", "GREEK CAPITAL LETTER DELTA" },
                   1439: { 0x0395, "Egr", "GREEK CAPITAL LETTER EPSILON" },
                   1440: { 0x0396, "Zgr", "GREEK CAPITAL LETTER ZETA" },
                   1441: { 0x0397, "EEgr", "GREEK CAPITAL LETTER ETA" },
                   1442: { 0x0398, "b.Theta", "GREEK CAPITAL LETTER THETA" },
                   1443: { 0x0398, "Theta", "GREEK CAPITAL LETTER THETA" },
                   1444: { 0x0398, "THgr", "GREEK CAPITAL LETTER THETA" },
                   1445: { 0x0399, "Igr", "GREEK CAPITAL LETTER IOTA" },
                   1446: { 0x039A, "Kgr", "GREEK CAPITAL LETTER KAPPA" },
                   1447: { 0x039B, "b.Lambda", "GREEK CAPITAL LETTER LAMDA" },
                   1448: { 0x039B, "Lambda", "GREEK CAPITAL LETTER LAMDA" },
                   1449: { 0x039B, "Lgr", "GREEK CAPITAL LETTER LAMDA" },
                   1450: { 0x039C, "Mgr", "GREEK CAPITAL LETTER MU" },
                   1451: { 0x039D, "Ngr", "GREEK CAPITAL LETTER NU" },
                   1452: { 0x039E, "b.Xi", "GREEK CAPITAL LETTER XI" },
                   1453: { 0x039E, "Xgr", "GREEK CAPITAL LETTER XI" },
                   1454: { 0x039E, "Xi", "GREEK CAPITAL LETTER XI" },
                   1455: { 0x039F, "Ogr", "GREEK CAPITAL LETTER OMICRON" },
                   1456: { 0x03A0, "b.Pi", "GREEK CAPITAL LETTER PI" },
                   1457: { 0x03A0, "Pgr", "GREEK CAPITAL LETTER PI" },
                   1458: { 0x03A0, "Pi", "GREEK CAPITAL LETTER PI" },
                   1459: { 0x03A1, "Rgr", "GREEK CAPITAL LETTER RHO" },
                   1460: { 0x03A3, "b.Sigma", "GREEK CAPITAL LETTER SIGMA" },
                   1461: { 0x03A3, "Sgr", "GREEK CAPITAL LETTER SIGMA" },
                   1462: { 0x03A3, "Sigma", "GREEK CAPITAL LETTER SIGMA" },
                   1463: { 0x03A4, "Tgr", "GREEK CAPITAL LETTER TAU" },
                   1464: { 0x03A5, "Ugr", "" },
                   1465: { 0x03A6, "b.Phi", "GREEK CAPITAL LETTER PHI" },
                   1466: { 0x03A6, "PHgr", "GREEK CAPITAL LETTER PHI" },
                   1467: { 0x03A6, "Phi", "GREEK CAPITAL LETTER PHI" },
                   1468: { 0x03A7, "KHgr", "GREEK CAPITAL LETTER CHI" },
                   1469: { 0x03A8, "b.Psi", "GREEK CAPITAL LETTER PSI" },
                   1470: { 0x03A8, "PSgr", "GREEK CAPITAL LETTER PSI" },
                   1471: { 0x03A8, "Psi", "GREEK CAPITAL LETTER PSI" },
                   1472: { 0x03A9, "b.Omega", "GREEK CAPITAL LETTER OMEGA" },
                   1473: { 0x03A9, "OHgr", "GREEK CAPITAL LETTER OMEGA" },
                   1474: { 0x03A9, "Omega", "GREEK CAPITAL LETTER OMEGA" },
                   1475: { 0x03AA, "Idigr", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA" },
                   1476: { 0x03AB, "Udigr", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA" },
                   1477: { 0x03AC, "aacgr", "GREEK SMALL LETTER ALPHA WITH TONOS" },
                   1478: { 0x03AD, "eacgr", "GREEK SMALL LETTER EPSILON WITH TONOS" },
                   1479: { 0x03AE, "eeacgr", "GREEK SMALL LETTER ETA WITH TONOS" },
                   1480: { 0x03AF, "iacgr", "GREEK SMALL LETTER IOTA WITH TONOS" },
                   1481: { 0x03B0, "udiagr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS" },
                   1482: { 0x03B1, "agr", "" },
                   1483: { 0x03B1, "alpha", "" },
                   1484: { 0x03B1, "b.alpha", "" },
                   1485: { 0x03B2, "b.beta", "GREEK SMALL LETTER BETA" },
                   1486: { 0x03B2, "beta", "GREEK SMALL LETTER BETA" },
                   1487: { 0x03B2, "bgr", "GREEK SMALL LETTER BETA" },
                   1488: { 0x03B3, "b.gamma", "GREEK SMALL LETTER GAMMA" },
                   1489: { 0x03B3, "gamma", "GREEK SMALL LETTER GAMMA" },
                   1490: { 0x03B3, "ggr", "GREEK SMALL LETTER GAMMA" },
                   1491: { 0x03B4, "b.delta", "GREEK SMALL LETTER DELTA" },
                   1492: { 0x03B4, "delta", "GREEK SMALL LETTER DELTA" },
                   1493: { 0x03B4, "dgr", "GREEK SMALL LETTER DELTA" },
                   1494: { 0x03B5, "b.epsi", "" },
                   1495: { 0x03B5, "b.epsis", "" },
                   1496: { 0x03B5, "b.epsiv", "" },
                   1497: { 0x03B5, "egr", "" },
                   1498: { 0x03B5, "epsiv", "" },
                   1499: { 0x03B6, "b.zeta", "GREEK SMALL LETTER ZETA" },
                   1500: { 0x03B6, "zeta", "GREEK SMALL LETTER ZETA" },
                   1501: { 0x03B6, "zgr", "GREEK SMALL LETTER ZETA" },
                   1502: { 0x03B7, "b.eta", "GREEK SMALL LETTER ETA" },
                   1503: { 0x03B7, "eegr", "GREEK SMALL LETTER ETA" },
                   1504: { 0x03B7, "eta", "GREEK SMALL LETTER ETA" },
                   1505: { 0x03B8, "b.thetas", "" },
                   1506: { 0x03B8, "thetas", "" },
                   1507: { 0x03B8, "thgr", "" },
                   1508: { 0x03B9, "b.iota", "GREEK SMALL LETTER IOTA" },
                   1509: { 0x03B9, "igr", "GREEK SMALL LETTER IOTA" },
                   1510: { 0x03B9, "iota", "GREEK SMALL LETTER IOTA" },
                   1511: { 0x03BA, "b.kappa", "GREEK SMALL LETTER KAPPA" },
                   1512: { 0x03BA, "kappa", "GREEK SMALL LETTER KAPPA" },
                   1513: { 0x03BA, "kgr", "GREEK SMALL LETTER KAPPA" },
                   1514: { 0x03BB, "b.lambda", "GREEK SMALL LETTER LAMDA" },
                   1515: { 0x03BB, "lambda", "GREEK SMALL LETTER LAMDA" },
                   1516: { 0x03BB, "lgr", "GREEK SMALL LETTER LAMDA" },
                   1517: { 0x03BC, "b.mu", "GREEK SMALL LETTER MU" },
                   1518: { 0x03BC, "mgr", "GREEK SMALL LETTER MU" },
                   1519: { 0x03BC, "mu", "GREEK SMALL LETTER MU" },
                   1520: { 0x03BD, "b.nu", "GREEK SMALL LETTER NU" },
                   1521: { 0x03BD, "ngr", "GREEK SMALL LETTER NU" },
                   1522: { 0x03BD, "nu", "GREEK SMALL LETTER NU" },
                   1523: { 0x03BE, "b.xi", "GREEK SMALL LETTER XI" },
                   1524: { 0x03BE, "xgr", "GREEK SMALL LETTER XI" },
                   1525: { 0x03BE, "xi", "GREEK SMALL LETTER XI" },
                   1526: { 0x03BF, "ogr", "GREEK SMALL LETTER OMICRON" },
                   1527: { 0x03C0, "b.pi", "GREEK SMALL LETTER PI" },
                   1528: { 0x03C0, "pgr", "GREEK SMALL LETTER PI" },
                   1529: { 0x03C0, "pi", "GREEK SMALL LETTER PI" },
                   1530: { 0x03C1, "b.rho", "GREEK SMALL LETTER RHO" },
                   1531: { 0x03C1, "rgr", "GREEK SMALL LETTER RHO" },
                   1532: { 0x03C1, "rho", "GREEK SMALL LETTER RHO" },
                   1533: { 0x03C2, "b.sigmav", "" },
                   1534: { 0x03C2, "sfgr", "" },
                   1535: { 0x03C2, "sigmav", "" },
                   1536: { 0x03C3, "b.sigma", "GREEK SMALL LETTER SIGMA" },
                   1537: { 0x03C3, "sgr", "GREEK SMALL LETTER SIGMA" },
                   1538: { 0x03C3, "sigma", "GREEK SMALL LETTER SIGMA" },
                   1539: { 0x03C4, "b.tau", "GREEK SMALL LETTER TAU" },
                   1540: { 0x03C4, "tau", "GREEK SMALL LETTER TAU" },
                   1541: { 0x03C4, "tgr", "GREEK SMALL LETTER TAU" },
                   1542: { 0x03C5, "b.upsi", "GREEK SMALL LETTER UPSILON" },
                   1543: { 0x03C5, "ugr", "GREEK SMALL LETTER UPSILON" },
                   1544: { 0x03C5, "upsi", "GREEK SMALL LETTER UPSILON" },
                   1545: { 0x03C6, "b.phis", "GREEK SMALL LETTER PHI" },
                   1546: { 0x03C6, "phgr", "GREEK SMALL LETTER PHI" },
                   1547: { 0x03C6, "phis", "GREEK SMALL LETTER PHI" },
                   1548: { 0x03C7, "b.chi", "GREEK SMALL LETTER CHI" },
                   1549: { 0x03C7, "chi", "GREEK SMALL LETTER CHI" },
                   1550: { 0x03C7, "khgr", "GREEK SMALL LETTER CHI" },
                   1551: { 0x03C8, "b.psi", "GREEK SMALL LETTER PSI" },
                   1552: { 0x03C8, "psgr", "GREEK SMALL LETTER PSI" },
                   1553: { 0x03C8, "psi", "GREEK SMALL LETTER PSI" },
                   1554: { 0x03C9, "b.omega", "GREEK SMALL LETTER OMEGA" },
                   1555: { 0x03C9, "ohgr", "GREEK SMALL LETTER OMEGA" },
                   1556: { 0x03C9, "omega", "GREEK SMALL LETTER OMEGA" },
                   1557: { 0x03CA, "idigr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA" },
                   1558: { 0x03CB, "udigr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA" },
                   1559: { 0x03CC, "oacgr", "GREEK SMALL LETTER OMICRON WITH TONOS" },
                   1560: { 0x03CD, "uacgr", "GREEK SMALL LETTER UPSILON WITH TONOS" },
                   1561: { 0x03CE, "ohacgr", "GREEK SMALL LETTER OMEGA WITH TONOS" },
                   1562: { 0x03D1, "b.thetav", "" },
                   1563: { 0x03D1, "thetav", "" },
                   1564: { 0x03D2, "b.Upsi", "" },
                   1565: { 0x03D2, "Upsi", "" },
                   1566: { 0x03D5, "b.phiv", "GREEK PHI SYMBOL" },
                   1567: { 0x03D5, "phiv", "GREEK PHI SYMBOL" },
                   1568: { 0x03D6, "b.piv", "GREEK PI SYMBOL" },
                   1569: { 0x03D6, "piv", "GREEK PI SYMBOL" },
                   1570: { 0x03DC, "b.gammad", "GREEK LETTER DIGAMMA" },
                   1571: { 0x03DC, "gammad", "GREEK LETTER DIGAMMA" },
                   1572: { 0x03F0, "b.kappav", "GREEK KAPPA SYMBOL" },
                   1573: { 0x03F0, "kappav", "GREEK KAPPA SYMBOL" },
                   1574: { 0x03F1, "b.rhov", "GREEK RHO SYMBOL" },
                   1575: { 0x03F1, "rhov", "GREEK RHO SYMBOL" },
                   1576: { 0x0401, "IOcy", "CYRILLIC CAPITAL LETTER IO" },
                   1577: { 0x0402, "DJcy", "CYRILLIC CAPITAL LETTER DJE" },
                   1578: { 0x0403, "GJcy", "CYRILLIC CAPITAL LETTER GJE" },
                   1579: { 0x0404, "Jukcy", "CYRILLIC CAPITAL LETTER UKRAINIAN IE" },
                   1580: { 0x0405, "DScy", "CYRILLIC CAPITAL LETTER DZE" },
                   1581: { 0x0406, "Iukcy", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I" },
                   1582: { 0x0407, "YIcy", "CYRILLIC CAPITAL LETTER YI" },
                   1583: { 0x0408, "Jsercy", "CYRILLIC CAPITAL LETTER JE" },
                   1584: { 0x0409, "LJcy", "CYRILLIC CAPITAL LETTER LJE" },
                   1585: { 0x040A, "NJcy", "CYRILLIC CAPITAL LETTER NJE" },
                   1586: { 0x040B, "TSHcy", "CYRILLIC CAPITAL LETTER TSHE" },
                   1587: { 0x040C, "KJcy", "CYRILLIC CAPITAL LETTER KJE" },
                   1588: { 0x040E, "Ubrcy", "CYRILLIC CAPITAL LETTER SHORT U" },
                   1589: { 0x040F, "DZcy", "CYRILLIC CAPITAL LETTER DZHE" },
                   1590: { 0x0410, "Acy", "CYRILLIC CAPITAL LETTER A" },
                   1591: { 0x0411, "Bcy", "CYRILLIC CAPITAL LETTER BE" },
                   1592: { 0x0412, "Vcy", "CYRILLIC CAPITAL LETTER VE" },
                   1593: { 0x0413, "Gcy", "CYRILLIC CAPITAL LETTER GHE" },
                   1594: { 0x0414, "Dcy", "CYRILLIC CAPITAL LETTER DE" },
                   1595: { 0x0415, "IEcy", "CYRILLIC CAPITAL LETTER IE" },
                   1596: { 0x0416, "ZHcy", "CYRILLIC CAPITAL LETTER ZHE" },
                   1597: { 0x0417, "Zcy", "CYRILLIC CAPITAL LETTER ZE" },
                   1598: { 0x0418, "Icy", "CYRILLIC CAPITAL LETTER I" },
                   1599: { 0x0419, "Jcy", "CYRILLIC CAPITAL LETTER SHORT I" },
                   1600: { 0x041A, "Kcy", "CYRILLIC CAPITAL LETTER KA" },
                   1601: { 0x041B, "Lcy", "CYRILLIC CAPITAL LETTER EL" },
                   1602: { 0x041C, "Mcy", "CYRILLIC CAPITAL LETTER EM" },
                   1603: { 0x041D, "Ncy", "CYRILLIC CAPITAL LETTER EN" },
                   1604: { 0x041E, "Ocy", "CYRILLIC CAPITAL LETTER O" },
                   1605: { 0x041F, "Pcy", "CYRILLIC CAPITAL LETTER PE" },
                   1606: { 0x0420, "Rcy", "CYRILLIC CAPITAL LETTER ER" },
                   1607: { 0x0421, "Scy", "CYRILLIC CAPITAL LETTER ES" },
                   1608: { 0x0422, "Tcy", "CYRILLIC CAPITAL LETTER TE" },
                   1609: { 0x0423, "Ucy", "CYRILLIC CAPITAL LETTER U" },
                   1610: { 0x0424, "Fcy", "CYRILLIC CAPITAL LETTER EF" },
                   1611: { 0x0425, "KHcy", "CYRILLIC CAPITAL LETTER HA" },
                   1612: { 0x0426, "TScy", "CYRILLIC CAPITAL LETTER TSE" },
                   1613: { 0x0427, "CHcy", "CYRILLIC CAPITAL LETTER CHE" },
                   1614: { 0x0428, "SHcy", "CYRILLIC CAPITAL LETTER SHA" },
                   1615: { 0x0429, "SHCHcy", "CYRILLIC CAPITAL LETTER SHCHA" },
                   1616: { 0x042A, "HARDcy", "CYRILLIC CAPITAL LETTER HARD SIGN" },
                   1617: { 0x042B, "Ycy", "CYRILLIC CAPITAL LETTER YERU" },
                   1618: { 0x042C, "SOFTcy", "CYRILLIC CAPITAL LETTER SOFT SIGN" },
                   1619: { 0x042D, "Ecy", "CYRILLIC CAPITAL LETTER E" },
                   1620: { 0x042E, "YUcy", "CYRILLIC CAPITAL LETTER YU" },
                   1621: { 0x042F, "YAcy", "CYRILLIC CAPITAL LETTER YA" },
                   1622: { 0x0430, "acy", "CYRILLIC SMALL LETTER A" },
                   1623: { 0x0431, "bcy", "CYRILLIC SMALL LETTER BE" },
                   1624: { 0x0432, "vcy", "CYRILLIC SMALL LETTER VE" },
                   1625: { 0x0433, "gcy", "CYRILLIC SMALL LETTER GHE" },
                   1626: { 0x0434, "dcy", "CYRILLIC SMALL LETTER DE" },
                   1627: { 0x0435, "iecy", "CYRILLIC SMALL LETTER IE" },
                   1628: { 0x0436, "zhcy", "CYRILLIC SMALL LETTER ZHE" },
                   1629: { 0x0437, "zcy", "CYRILLIC SMALL LETTER ZE" },
                   1630: { 0x0438, "icy", "CYRILLIC SMALL LETTER I" },
                   1631: { 0x0439, "jcy", "CYRILLIC SMALL LETTER SHORT I" },
                   1632: { 0x043A, "kcy", "CYRILLIC SMALL LETTER KA" },
                   1633: { 0x043B, "lcy", "CYRILLIC SMALL LETTER EL" },
                   1634: { 0x043C, "mcy", "CYRILLIC SMALL LETTER EM" },
                   1635: { 0x043D, "ncy", "CYRILLIC SMALL LETTER EN" },
                   1636: { 0x043E, "ocy", "CYRILLIC SMALL LETTER O" },
                   1637: { 0x043F, "pcy", "CYRILLIC SMALL LETTER PE" },
                   1638: { 0x0440, "rcy", "CYRILLIC SMALL LETTER ER" },
                   1639: { 0x0441, "scy", "CYRILLIC SMALL LETTER ES" },
                   1640: { 0x0442, "tcy", "CYRILLIC SMALL LETTER TE" },
                   1641: { 0x0443, "ucy", "CYRILLIC SMALL LETTER U" },
                   1642: { 0x0444, "fcy", "CYRILLIC SMALL LETTER EF" },
                   1643: { 0x0445, "khcy", "CYRILLIC SMALL LETTER HA" },
                   1644: { 0x0446, "tscy", "CYRILLIC SMALL LETTER TSE" },
                   1645: { 0x0447, "chcy", "CYRILLIC SMALL LETTER CHE" },
                   1646: { 0x0448, "shcy", "CYRILLIC SMALL LETTER SHA" },
                   1647: { 0x0449, "shchcy", "CYRILLIC SMALL LETTER SHCHA" },
                   1648: { 0x044A, "hardcy", "CYRILLIC SMALL LETTER HARD SIGN" },
                   1649: { 0x044B, "ycy", "CYRILLIC SMALL LETTER YERU" },
                   1650: { 0x044C, "softcy", "CYRILLIC SMALL LETTER SOFT SIGN" },
                   1651: { 0x044D, "ecy", "CYRILLIC SMALL LETTER E" },
                   1652: { 0x044E, "yucy", "CYRILLIC SMALL LETTER YU" },
                   1653: { 0x044F, "yacy", "CYRILLIC SMALL LETTER YA" },
                   1654: { 0x0451, "iocy", "CYRILLIC SMALL LETTER IO" },
                   1655: { 0x0452, "djcy", "CYRILLIC SMALL LETTER DJE" },
                   1656: { 0x0453, "gjcy", "CYRILLIC SMALL LETTER GJE" },
                   1657: { 0x0454, "jukcy", "CYRILLIC SMALL LETTER UKRAINIAN IE" },
                   1658: { 0x0455, "dscy", "CYRILLIC SMALL LETTER DZE" },
                   1659: { 0x0456, "iukcy", "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I" },
                   1660: { 0x0457, "yicy", "CYRILLIC SMALL LETTER YI" },
                   1661: { 0x0458, "jsercy", "CYRILLIC SMALL LETTER JE" },
                   1662: { 0x0459, "ljcy", "CYRILLIC SMALL LETTER LJE" },
                   1663: { 0x045A, "njcy", "CYRILLIC SMALL LETTER NJE" },
                   1664: { 0x045B, "tshcy", "CYRILLIC SMALL LETTER TSHE" },
                   1665: { 0x045C, "kjcy", "CYRILLIC SMALL LETTER KJE" },
                   1666: { 0x045E, "ubrcy", "CYRILLIC SMALL LETTER SHORT U" },
                   1667: { 0x045F, "dzcy", "CYRILLIC SMALL LETTER DZHE" },
                   1668: { 0x2002, "ensp", "EN SPACE" },
                   1669: { 0x2003, "emsp", "EM SPACE" },
                   1670: { 0x2004, "emsp13", "THREE-PER-EM SPACE" },
                   1671: { 0x2005, "emsp14", "FOUR-PER-EM SPACE" },
                   1672: { 0x2007, "numsp", "FIGURE SPACE" },
                   1673: { 0x2008, "puncsp", "PUNCTUATION SPACE" },
                   1674: { 0x2009, "thinsp", "THIN SPACE" },
                   1675: { 0x200A, "hairsp", "HAIR SPACE" },
                   1676: { 0x2010, "dash", "HYPHEN" },
                   1677: { 0x2013, "ndash", "EN DASH" },
                   1678: { 0x2014, "mdash", "EM DASH" },
                   1679: { 0x2015, "horbar", "HORIZONTAL BAR" },
                   1680: { 0x2016, "Verbar", "DOUBLE VERTICAL LINE" },
                   1681: { 0x2018, "lsquo", "" },
                   1682: { 0x2018, "rsquor", "" },
                   1683: { 0x2019, "rsquo", "RIGHT SINGLE QUOTATION MARK" },
                   1684: { 0x201A, "lsquor", "SINGLE LOW-9 QUOTATION MARK" },
                   1685: { 0x201C, "ldquo", "" },
                   1686: { 0x201C, "rdquor", "" },
                   1687: { 0x201D, "rdquo", "RIGHT DOUBLE QUOTATION MARK" },
                   1688: { 0x201E, "ldquor", "DOUBLE LOW-9 QUOTATION MARK" },
                   1689: { 0x2020, "dagger", "DAGGER" },
                   1690: { 0x2021, "Dagger", "DOUBLE DAGGER" },
                   1691: { 0x2022, "bull", "BULLET" },
                   1692: { 0x2025, "nldr", "TWO DOT LEADER" },
                   1693: { 0x2026, "hellip", "HORIZONTAL ELLIPSIS" },
                   1694: { 0x2026, "mldr", "HORIZONTAL ELLIPSIS" },
                   1695: { 0x2030, "permil", "PER MILLE SIGN" },
                   1696: { 0x2032, "prime", "PRIME" },
                   1697: { 0x2032, "vprime", "PRIME" },
                   1698: { 0x2033, "Prime", "DOUBLE PRIME" },
                   1699: { 0x2034, "tprime", "TRIPLE PRIME" },
                   1700: { 0x2035, "bprime", "REVERSED PRIME" },
                   1701: { 0x2041, "caret", "CARET" },
                   1702: { 0x2043, "hybull", "HYPHEN BULLET" },
                   1703: { 0x20DB, "tdot", "COMBINING THREE DOTS ABOVE" },
                   1704: { 0x20DC, "DotDot", "COMBINING FOUR DOTS ABOVE" },
                   1705: { 0x2105, "incare", "CARE OF" },
                   1706: { 0x210B, "hamilt", "SCRIPT CAPITAL H" },
                   1707: { 0x210F, "planck", "PLANCK CONSTANT OVER TWO PI" },
                   1708: { 0x2111, "image", "BLACK-LETTER CAPITAL I" },
                   1709: { 0x2112, "lagran", "SCRIPT CAPITAL L" },
                   1710: { 0x2113, "ell", "SCRIPT SMALL L" },
                   1711: { 0x2116, "numero", "NUMERO SIGN" },
                   1712: { 0x2117, "copysr", "SOUND RECORDING COPYRIGHT" },
                   1713: { 0x2118, "weierp", "SCRIPT CAPITAL P" },
                   1714: { 0x211C, "real", "BLACK-LETTER CAPITAL R" },
                   1715: { 0x211E, "rx", "PRESCRIPTION TAKE" },
                   1716: { 0x2122, "trade", "TRADE MARK SIGN" },
                   1717: { 0x2126, "ohm", "OHM SIGN" },
                   1718: { 0x212B, "angst", "ANGSTROM SIGN" },
                   1719: { 0x212C, "bernou", "SCRIPT CAPITAL B" },
                   1720: { 0x2133, "phmmat", "SCRIPT CAPITAL M" },
                   1721: { 0x2134, "order", "SCRIPT SMALL O" },
                   1722: { 0x2135, "aleph", "ALEF SYMBOL" },
                   1723: { 0x2136, "beth", "BET SYMBOL" },
                   1724: { 0x2137, "gimel", "GIMEL SYMBOL" },
                   1725: { 0x2138, "daleth", "DALET SYMBOL" },
                   1726: { 0x2153, "frac13", "VULGAR FRACTION ONE THIRD" },
                   1727: { 0x2154, "frac23", "VULGAR FRACTION TWO THIRDS" },
                   1728: { 0x2155, "frac15", "VULGAR FRACTION ONE FIFTH" },
                   1729: { 0x2156, "frac25", "VULGAR FRACTION TWO FIFTHS" },
                   1730: { 0x2157, "frac35", "VULGAR FRACTION THREE FIFTHS" },
                   1731: { 0x2158, "frac45", "VULGAR FRACTION FOUR FIFTHS" },
                   1732: { 0x2159, "frac16", "VULGAR FRACTION ONE SIXTH" },
                   1733: { 0x215A, "frac56", "VULGAR FRACTION FIVE SIXTHS" },
                   1734: { 0x215B, "frac18", "" },
                   1735: { 0x215C, "frac38", "" },
                   1736: { 0x215D, "frac58", "" },
                   1737: { 0x215E, "frac78", "" },
                   1738: { 0x2190, "larr", "LEFTWARDS DOUBLE ARROW" },
                   1739: { 0x2191, "uarr", "UPWARDS ARROW" },
                   1740: { 0x2192, "rarr", "RIGHTWARDS DOUBLE ARROW" },
                   1741: { 0x2193, "darr", "DOWNWARDS ARROW" },
                   1742: { 0x2194, "harr", "LEFT RIGHT ARROW" },
                   1743: { 0x2194, "xhArr", "LEFT RIGHT ARROW" },
                   1744: { 0x2194, "xharr", "LEFT RIGHT ARROW" },
                   1745: { 0x2195, "varr", "UP DOWN ARROW" },
                   1746: { 0x2196, "nwarr", "NORTH WEST ARROW" },
                   1747: { 0x2197, "nearr", "NORTH EAST ARROW" },
                   1748: { 0x2198, "drarr", "SOUTH EAST ARROW" },
                   1749: { 0x2199, "dlarr", "SOUTH WEST ARROW" },
                   1750: { 0x219A, "nlarr", "LEFTWARDS ARROW WITH STROKE" },
                   1751: { 0x219B, "nrarr", "RIGHTWARDS ARROW WITH STROKE" },
                   1752: { 0x219D, "rarrw", "RIGHTWARDS SQUIGGLE ARROW" },
                   1753: { 0x219E, "Larr", "LEFTWARDS TWO HEADED ARROW" },
                   1754: { 0x21A0, "Rarr", "RIGHTWARDS TWO HEADED ARROW" },
                   1755: { 0x21A2, "larrtl", "LEFTWARDS ARROW WITH TAIL" },
                   1756: { 0x21A3, "rarrtl", "RIGHTWARDS ARROW WITH TAIL" },
                   1757: { 0x21A6, "map", "RIGHTWARDS ARROW FROM BAR" },
                   1758: { 0x21A9, "larrhk", "LEFTWARDS ARROW WITH HOOK" },
                   1759: { 0x21AA, "rarrhk", "RIGHTWARDS ARROW WITH HOOK" },
                   1760: { 0x21AB, "larrlp", "LEFTWARDS ARROW WITH LOOP" },
                   1761: { 0x21AC, "rarrlp", "RIGHTWARDS ARROW WITH LOOP" },
                   1762: { 0x21AD, "harrw", "LEFT RIGHT WAVE ARROW" },
                   1763: { 0x21AE, "nharr", "LEFT RIGHT ARROW WITH STROKE" },
                   1764: { 0x21B0, "lsh", "UPWARDS ARROW WITH TIP LEFTWARDS" },
                   1765: { 0x21B1, "rsh", "UPWARDS ARROW WITH TIP RIGHTWARDS" },
                   1766: { 0x21B6, "cularr", "ANTICLOCKWISE TOP SEMICIRCLE ARROW" },
                   1767: { 0x21B7, "curarr", "CLOCKWISE TOP SEMICIRCLE ARROW" },
                   1768: { 0x21BA, "olarr", "ANTICLOCKWISE OPEN CIRCLE ARROW" },
                   1769: { 0x21BB, "orarr", "CLOCKWISE OPEN CIRCLE ARROW" },
                   1770: { 0x21BC, "lharu", "LEFTWARDS HARPOON WITH BARB UPWARDS" },
                   1771: { 0x21BD, "lhard", "LEFTWARDS HARPOON WITH BARB DOWNWARDS" },
                   1772: { 0x21BE, "uharr", "UPWARDS HARPOON WITH BARB RIGHTWARDS" },
                   1773: { 0x21BF, "uharl", "UPWARDS HARPOON WITH BARB LEFTWARDS" },
                   1774: { 0x21C0, "rharu", "RIGHTWARDS HARPOON WITH BARB UPWARDS" },
                   1775: { 0x21C1, "rhard", "RIGHTWARDS HARPOON WITH BARB DOWNWARDS" },
                   1776: { 0x21C2, "dharr", "DOWNWARDS HARPOON WITH BARB RIGHTWARDS" },
                   1777: { 0x21C3, "dharl", "DOWNWARDS HARPOON WITH BARB LEFTWARDS" },
                   1778: { 0x21C4, "rlarr2", "RIGHTWARDS ARROW OVER LEFTWARDS ARROW" },
                   1779: { 0x21C6, "lrarr2", "LEFTWARDS ARROW OVER RIGHTWARDS ARROW" },
                   1780: { 0x21C7, "larr2", "LEFTWARDS PAIRED ARROWS" },
                   1781: { 0x21C8, "uarr2", "UPWARDS PAIRED ARROWS" },
                   1782: { 0x21C9, "rarr2", "RIGHTWARDS PAIRED ARROWS" },
                   1783: { 0x21CA, "darr2", "DOWNWARDS PAIRED ARROWS" },
                   1784: { 0x21CB, "lrhar2", "LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON" },
                   1785: { 0x21CC, "rlhar2", "RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON" },
                   1786: { 0x21CD, "nlArr", "LEFTWARDS DOUBLE ARROW WITH STROKE" },
                   1787: { 0x21CE, "nhArr", "LEFT RIGHT DOUBLE ARROW WITH STROKE" },
                   1788: { 0x21CF, "nrArr", "RIGHTWARDS DOUBLE ARROW WITH STROKE" },
                   1789: { 0x21D0, "lArr", "LEFTWARDS ARROW" },
                   1790: { 0x21D0, "xlArr", "LEFTWARDS DOUBLE ARROW" },
                   1791: { 0x21D1, "uArr", "UPWARDS DOUBLE ARROW" },
                   1792: { 0x21D2, "rArr", "RIGHTWARDS ARROW" },
                   1793: { 0x21D2, "xrArr", "RIGHTWARDS DOUBLE ARROW" },
                   1794: { 0x21D3, "dArr", "DOWNWARDS DOUBLE ARROW" },
                   1795: { 0x21D4, "hArr", "" },
                   1796: { 0x21D4, "iff", "LEFT RIGHT DOUBLE ARROW" },
                   1797: { 0x21D5, "vArr", "UP DOWN DOUBLE ARROW" },
                   1798: { 0x21DA, "lAarr", "LEFTWARDS TRIPLE ARROW" },
                   1799: { 0x21DB, "rAarr", "RIGHTWARDS TRIPLE ARROW" },
                   1800: { 0x2200, "forall", "" },
                   1801: { 0x2201, "comp", "COMPLEMENT" },
                   1802: { 0x2202, "part", "" },
                   1803: { 0x2203, "exist", "" },
                   1804: { 0x2204, "nexist", "THERE DOES NOT EXIST" },
                   1805: { 0x2205, "empty", "" },
                   1806: { 0x2207, "nabla", "NABLA" },
                   1807: { 0x2209, "notin", "" },
                   1808: { 0x220A, "epsi", "" },
                   1809: { 0x220A, "epsis", "" },
                   1810: { 0x220A, "isin", "" },
                   1811: { 0x220D, "bepsi", "SMALL CONTAINS AS MEMBER" },
                   1812: { 0x220D, "ni", "" },
                   1813: { 0x220F, "prod", "N-ARY PRODUCT" },
                   1814: { 0x2210, "amalg", "N-ARY COPRODUCT" },
                   1815: { 0x2210, "coprod", "N-ARY COPRODUCT" },
                   1816: { 0x2210, "samalg", "" },
                   1817: { 0x2211, "sum", "N-ARY SUMMATION" },
                   1818: { 0x2212, "minus", "MINUS SIGN" },
                   1819: { 0x2213, "mnplus", "" },
                   1820: { 0x2214, "plusdo", "DOT PLUS" },
                   1821: { 0x2216, "setmn", "SET MINUS" },
                   1822: { 0x2216, "ssetmn", "SET MINUS" },
                   1823: { 0x2217, "lowast", "ASTERISK OPERATOR" },
                   1824: { 0x2218, "compfn", "RING OPERATOR" },
                   1825: { 0x221A, "radic", "" },
                   1826: { 0x221D, "prop", "" },
                   1827: { 0x221D, "vprop", "" },
                   1828: { 0x221E, "infin", "" },
                   1829: { 0x221F, "ang90", "RIGHT ANGLE" },
                   1830: { 0x2220, "ang", "ANGLE" },
                   1831: { 0x2221, "angmsd", "MEASURED ANGLE" },
                   1832: { 0x2222, "angsph", "" },
                   1833: { 0x2223, "mid", "" },
                   1834: { 0x2224, "nmid", "DOES NOT DIVIDE" },
                   1835: { 0x2225, "par", "PARALLEL TO" },
                   1836: { 0x2225, "spar", "PARALLEL TO" },
                   1837: { 0x2226, "npar", "NOT PARALLEL TO" },
                   1838: { 0x2226, "nspar", "NOT PARALLEL TO" },
                   1839: { 0x2227, "and", "" },
                   1840: { 0x2228, "or", "" },
                   1841: { 0x2229, "cap", "" },
                   1842: { 0x222A, "cup", "" },
                   1843: { 0x222B, "int", "" },
                   1844: { 0x222E, "conint", "" },
                   1845: { 0x2234, "there4", "" },
                   1846: { 0x2235, "becaus", "BECAUSE" },
                   1847: { 0x223C, "sim", "" },
                   1848: { 0x223C, "thksim", "TILDE OPERATOR" },
                   1849: { 0x223D, "bsim", "" },
                   1850: { 0x2240, "wreath", "WREATH PRODUCT" },
                   1851: { 0x2241, "nsim", "" },
                   1852: { 0x2243, "sime", "" },
                   1853: { 0x2244, "nsime", "" },
                   1854: { 0x2245, "cong", "" },
                   1855: { 0x2247, "ncong", "NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO" },
                   1856: { 0x2248, "ap", "" },
                   1857: { 0x2248, "thkap", "ALMOST EQUAL TO" },
                   1858: { 0x2249, "nap", "NOT ALMOST EQUAL TO" },
                   1859: { 0x224A, "ape", "" },
                   1860: { 0x224C, "bcong", "ALL EQUAL TO" },
                   1861: { 0x224D, "asymp", "EQUIVALENT TO" },
                   1862: { 0x224E, "bump", "" },
                   1863: { 0x224F, "bumpe", "" },
                   1864: { 0x2250, "esdot", "" },
                   1865: { 0x2251, "eDot", "" },
                   1866: { 0x2252, "efDot", "" },
                   1867: { 0x2253, "erDot", "" },
                   1868: { 0x2254, "colone", "" },
                   1869: { 0x2255, "ecolon", "" },
                   1870: { 0x2256, "ecir", "" },
                   1871: { 0x2257, "cire", "" },
                   1872: { 0x2259, "wedgeq", "ESTIMATES" },
                   1873: { 0x225C, "trie", "" },
                   1874: { 0x2260, "ne", "" },
                   1875: { 0x2261, "equiv", "" },
                   1876: { 0x2262, "nequiv", "NOT IDENTICAL TO" },
                   1877: { 0x2264, "le", "" },
                   1878: { 0x2264, "les", "LESS-THAN OR EQUAL TO" },
                   1879: { 0x2265, "ge", "GREATER-THAN OR EQUAL TO" },
                   1880: { 0x2265, "ges", "GREATER-THAN OR EQUAL TO" },
                   1881: { 0x2266, "lE", "" },
                   1882: { 0x2267, "gE", "" },
                   1883: { 0x2268, "lnE", "" },
                   1884: { 0x2268, "lne", "" },
                   1885: { 0x2268, "lvnE", "LESS-THAN BUT NOT EQUAL TO" },
                   1886: { 0x2269, "gnE", "" },
                   1887: { 0x2269, "gne", "" },
                   1888: { 0x2269, "gvnE", "GREATER-THAN BUT NOT EQUAL TO" },
                   1889: { 0x226A, "Lt", "MUCH LESS-THAN" },
                   1890: { 0x226B, "Gt", "MUCH GREATER-THAN" },
                   1891: { 0x226C, "twixt", "BETWEEN" },
                   1892: { 0x226E, "nlt", "NOT LESS-THAN" },
                   1893: { 0x226F, "ngt", "NOT GREATER-THAN" },
                   1894: { 0x2270, "nlE", "" },
                   1895: { 0x2270, "nle", "NEITHER LESS-THAN NOR EQUAL TO" },
                   1896: { 0x2270, "nles", "" },
                   1897: { 0x2271, "ngE", "" },
                   1898: { 0x2271, "nge", "NEITHER GREATER-THAN NOR EQUAL TO" },
                   1899: { 0x2271, "nges", "" },
                   1900: { 0x2272, "lap", "LESS-THAN OR EQUIVALENT TO" },
                   1901: { 0x2272, "lsim", "LESS-THAN OR EQUIVALENT TO" },
                   1902: { 0x2273, "gap", "GREATER-THAN OR EQUIVALENT TO" },
                   1903: { 0x2273, "gsim", "GREATER-THAN OR EQUIVALENT TO" },
                   1904: { 0x2276, "lg", "LESS-THAN OR GREATER-THAN" },
                   1905: { 0x2277, "gl", "" },
                   1906: { 0x227A, "pr", "" },
                   1907: { 0x227B, "sc", "" },
                   1908: { 0x227C, "cupre", "" },
                   1909: { 0x227C, "pre", "" },
                   1910: { 0x227D, "sccue", "" },
                   1911: { 0x227D, "sce", "" },
                   1912: { 0x227E, "prap", "" },
                   1913: { 0x227E, "prsim", "" },
                   1914: { 0x227F, "scap", "" },
                   1915: { 0x227F, "scsim", "" },
                   1916: { 0x2280, "npr", "DOES NOT PRECEDE" },
                   1917: { 0x2281, "nsc", "DOES NOT SUCCEED" },
                   1918: { 0x2282, "sub", "" },
                   1919: { 0x2283, "sup", "" },
                   1920: { 0x2284, "nsub", "NOT A SUBSET OF" },
                   1921: { 0x2285, "nsup", "NOT A SUPERSET OF" },
                   1922: { 0x2286, "subE", "" },
                   1923: { 0x2286, "sube", "" },
                   1924: { 0x2287, "supE", "" },
                   1925: { 0x2287, "supe", "" },
                   1926: { 0x2288, "nsubE", "" },
                   1927: { 0x2288, "nsube", "" },
                   1928: { 0x2289, "nsupE", "" },
                   1929: { 0x2289, "nsupe", "" },
                   1930: { 0x228A, "subne", "" },
                   1931: { 0x228A, "subnE", "SUBSET OF WITH NOT EQUAL TO" },
                   1932: { 0x228A, "vsubne", "SUBSET OF WITH NOT EQUAL TO" },
                   1933: { 0x228B, "supnE", "" },
                   1934: { 0x228B, "supne", "" },
                   1935: { 0x228B, "vsupnE", "SUPERSET OF WITH NOT EQUAL TO" },
                   1936: { 0x228B, "vsupne", "SUPERSET OF WITH NOT EQUAL TO" },
                   1937: { 0x228E, "uplus", "MULTISET UNION" },
                   1938: { 0x228F, "sqsub", "" },
                   1939: { 0x2290, "sqsup", "" },
                   1940: { 0x2291, "sqsube", "" },
                   1941: { 0x2292, "sqsupe", "" },
                   1942: { 0x2293, "sqcap", "SQUARE CAP" },
                   1943: { 0x2294, "sqcup", "SQUARE CUP" },
                   1944: { 0x2295, "oplus", "CIRCLED PLUS" },
                   1945: { 0x2296, "ominus", "CIRCLED MINUS" },
                   1946: { 0x2297, "otimes", "CIRCLED TIMES" },
                   1947: { 0x2298, "osol", "CIRCLED DIVISION SLASH" },
                   1948: { 0x2299, "odot", "CIRCLED DOT OPERATOR" },
                   1949: { 0x229A, "ocir", "CIRCLED RING OPERATOR" },
                   1950: { 0x229B, "oast", "CIRCLED ASTERISK OPERATOR" },
                   1951: { 0x229D, "odash", "CIRCLED DASH" },
                   1952: { 0x229E, "plusb", "SQUARED PLUS" },
                   1953: { 0x229F, "minusb", "SQUARED MINUS" },
                   1954: { 0x22A0, "timesb", "SQUARED TIMES" },
                   1955: { 0x22A1, "sdotb", "SQUARED DOT OPERATOR" },
                   1956: { 0x22A2, "vdash", "" },
                   1957: { 0x22A3, "dashv", "" },
                   1958: { 0x22A4, "top", "DOWN TACK" },
                   1959: { 0x22A5, "bottom", "" },
                   1960: { 0x22A5, "perp", "" },
                   1961: { 0x22A7, "models", "MODELS" },
                   1962: { 0x22A8, "vDash", "" },
                   1963: { 0x22A9, "Vdash", "" },
                   1964: { 0x22AA, "Vvdash", "" },
                   1965: { 0x22AC, "nvdash", "DOES NOT PROVE" },
                   1966: { 0x22AD, "nvDash", "NOT TRUE" },
                   1967: { 0x22AE, "nVdash", "DOES NOT FORCE" },
                   1968: { 0x22AF, "nVDash", "NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE" },
                   1969: { 0x22B2, "vltri", "" },
                   1970: { 0x22B3, "vrtri", "" },
                   1971: { 0x22B4, "ltrie", "" },
                   1972: { 0x22B5, "rtrie", "" },
                   1973: { 0x22B8, "mumap", "MULTIMAP" },
                   1974: { 0x22BA, "intcal", "INTERCALATE" },
                   1975: { 0x22BB, "veebar", "" },
                   1976: { 0x22BC, "barwed", "NAND" },
                   1977: { 0x22C4, "diam", "DIAMOND OPERATOR" },
                   1978: { 0x22C5, "sdot", "DOT OPERATOR" },
                   1979: { 0x22C6, "sstarf", "STAR OPERATOR" },
                   1980: { 0x22C6, "star", "STAR OPERATOR" },
                   1981: { 0x22C7, "divonx", "DIVISION TIMES" },
                   1982: { 0x22C8, "bowtie", "" },
                   1983: { 0x22C9, "ltimes", "LEFT NORMAL FACTOR SEMIDIRECT PRODUCT" },
                   1984: { 0x22CA, "rtimes", "RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT" },
                   1985: { 0x22CB, "lthree", "LEFT SEMIDIRECT PRODUCT" },
                   1986: { 0x22CC, "rthree", "RIGHT SEMIDIRECT PRODUCT" },
                   1987: { 0x22CD, "bsime", "" },
                   1988: { 0x22CE, "cuvee", "CURLY LOGICAL OR" },
                   1989: { 0x22CF, "cuwed", "CURLY LOGICAL AND" },
                   1990: { 0x22D0, "Sub", "" },
                   1991: { 0x22D1, "Sup", "" },
                   1992: { 0x22D2, "Cap", "DOUBLE INTERSECTION" },
                   1993: { 0x22D3, "Cup", "DOUBLE UNION" },
                   1994: { 0x22D4, "fork", "" },
                   1995: { 0x22D6, "ldot", "" },
                   1996: { 0x22D7, "gsdot", "" },
                   1997: { 0x22D8, "Ll", "" },
                   1998: { 0x22D9, "Gg", "VERY MUCH GREATER-THAN" },
                   1999: { 0x22DA, "lEg", "" },
                   2000: { 0x22DA, "leg", "" },
                   2001: { 0x22DB, "gEl", "" },
                   2002: { 0x22DB, "gel", "" },
                   2003: { 0x22DC, "els", "" },
                   2004: { 0x22DD, "egs", "" },
                   2005: { 0x22DE, "cuepr", "" },
                   2006: { 0x22DF, "cuesc", "" },
                   2007: { 0x22E0, "npre", "DOES NOT PRECEDE OR EQUAL" },
                   2008: { 0x22E1, "nsce", "DOES NOT SUCCEED OR EQUAL" },
                   2009: { 0x22E6, "lnsim", "" },
                   2010: { 0x22E7, "gnsim", "GREATER-THAN BUT NOT EQUIVALENT TO" },
                   2011: { 0x22E8, "prnap", "" },
                   2012: { 0x22E8, "prnsim", "" },
                   2013: { 0x22E9, "scnap", "" },
                   2014: { 0x22E9, "scnsim", "" },
                   2015: { 0x22EA, "nltri", "NOT NORMAL SUBGROUP OF" },
                   2016: { 0x22EB, "nrtri", "DOES NOT CONTAIN AS NORMAL SUBGROUP" },
                   2017: { 0x22EC, "nltrie", "NOT NORMAL SUBGROUP OF OR EQUAL TO" },
                   2018: { 0x22ED, "nrtrie", "DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL" },
                   2019: { 0x22EE, "vellip", "" },
                   2020: { 0x2306, "Barwed", "PERSPECTIVE" },
                   2021: { 0x2308, "lceil", "LEFT CEILING" },
                   2022: { 0x2309, "rceil", "RIGHT CEILING" },
                   2023: { 0x230A, "lfloor", "LEFT FLOOR" },
                   2024: { 0x230B, "rfloor", "RIGHT FLOOR" },
                   2025: { 0x230C, "drcrop", "BOTTOM RIGHT CROP" },
                   2026: { 0x230D, "dlcrop", "BOTTOM LEFT CROP" },
                   2027: { 0x230E, "urcrop", "TOP RIGHT CROP" },
                   2028: { 0x230F, "ulcrop", "TOP LEFT CROP" },
                   2029: { 0x2315, "telrec", "TELEPHONE RECORDER" },
                   2030: { 0x2316, "target", "POSITION INDICATOR" },
                   2031: { 0x231C, "ulcorn", "TOP LEFT CORNER" },
                   2032: { 0x231D, "urcorn", "TOP RIGHT CORNER" },
                   2033: { 0x231E, "dlcorn", "BOTTOM LEFT CORNER" },
                   2034: { 0x231F, "drcorn", "BOTTOM RIGHT CORNER" },
                   2035: { 0x2322, "frown", "" },
                   2036: { 0x2322, "sfrown", "FROWN" },
                   2037: { 0x2323, "smile", "" },
                   2038: { 0x2323, "ssmile", "SMILE" },
                   2039: { 0x2423, "blank", "OPEN BOX" },
                   2040: { 0x24C8, "oS", "CIRCLED LATIN CAPITAL LETTER S" },
                   2041: { 0x2500, "boxh", "BOX DRAWINGS LIGHT HORIZONTAL" },
                   2042: { 0x2502, "boxv", "BOX DRAWINGS LIGHT VERTICAL" },
                   2043: { 0x250C, "boxdr", "BOX DRAWINGS LIGHT DOWN AND RIGHT" },
                   2044: { 0x2510, "boxdl", "BOX DRAWINGS LIGHT DOWN AND LEFT" },
                   2045: { 0x2514, "boxur", "BOX DRAWINGS LIGHT UP AND RIGHT" },
                   2046: { 0x2518, "boxul", "BOX DRAWINGS LIGHT UP AND LEFT" },
                   2047: { 0x251C, "boxvr", "BOX DRAWINGS LIGHT VERTICAL AND RIGHT" },
                   2048: { 0x2524, "boxvl", "BOX DRAWINGS LIGHT VERTICAL AND LEFT" },
                   2049: { 0x252C, "boxhd", "BOX DRAWINGS LIGHT DOWN AND HORIZONTAL" },
                   2050: { 0x2534, "boxhu", "BOX DRAWINGS LIGHT UP AND HORIZONTAL" },
                   2051: { 0x253C, "boxvh", "BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL" },
                   2052: { 0x2550, "boxH", "BOX DRAWINGS DOUBLE HORIZONTAL" },
                   2053: { 0x2551, "boxV", "BOX DRAWINGS DOUBLE VERTICAL" },
                   2054: { 0x2552, "boxDR", "BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE" },
                   2055: { 0x2553, "boxDr", "BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE" },
                   2056: { 0x2554, "boxdR", "BOX DRAWINGS DOUBLE DOWN AND RIGHT" },
                   2057: { 0x2555, "boxDL", "BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE" },
                   2058: { 0x2556, "boxdL", "BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE" },
                   2059: { 0x2557, "boxDl", "BOX DRAWINGS DOUBLE DOWN AND LEFT" },
                   2060: { 0x2558, "boxUR", "BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE" },
                   2061: { 0x2559, "boxuR", "BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE" },
                   2062: { 0x255A, "boxUr", "BOX DRAWINGS DOUBLE UP AND RIGHT" },
                   2063: { 0x255B, "boxUL", "BOX DRAWINGS UP SINGLE AND LEFT DOUBLE" },
                   2064: { 0x255C, "boxUl", "BOX DRAWINGS UP DOUBLE AND LEFT SINGLE" },
                   2065: { 0x255D, "boxuL", "BOX DRAWINGS DOUBLE UP AND LEFT" },
                   2066: { 0x255E, "boxvR", "BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE" },
                   2067: { 0x255F, "boxVR", "BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE" },
                   2068: { 0x2560, "boxVr", "BOX DRAWINGS DOUBLE VERTICAL AND RIGHT" },
                   2069: { 0x2561, "boxvL", "BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE" },
                   2070: { 0x2562, "boxVL", "BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE" },
                   2071: { 0x2563, "boxVl", "BOX DRAWINGS DOUBLE VERTICAL AND LEFT" },
                   2072: { 0x2564, "boxhD", "BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE" },
                   2073: { 0x2565, "boxHD", "BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE" },
                   2074: { 0x2566, "boxHd", "BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL" },
                   2075: { 0x2567, "boxhU", "BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE" },
                   2076: { 0x2568, "boxHU", "BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE" },
                   2077: { 0x2569, "boxHu", "BOX DRAWINGS DOUBLE UP AND HORIZONTAL" },
                   2078: { 0x256A, "boxvH", "BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE" },
                   2079: { 0x256B, "boxVH", "BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE" },
                   2080: { 0x256C, "boxVh", "BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL" },
                   2081: { 0x2580, "uhblk", "UPPER HALF BLOCK" },
                   2082: { 0x2584, "lhblk", "LOWER HALF BLOCK" },
                   2083: { 0x2588, "block", "FULL BLOCK" },
                   2084: { 0x2591, "blk14", "LIGHT SHADE" },
                   2085: { 0x2592, "blk12", "MEDIUM SHADE" },
                   2086: { 0x2593, "blk34", "DARK SHADE" },
                   2087: { 0x25A1, "square", "WHITE SQUARE" },
                   2088: { 0x25A1, "squ", "WHITE SQUARE" },
                   2089: { 0x25AA, "squf", "" },
                   2090: { 0x25AD, "rect", "WHITE RECTANGLE" },
                   2091: { 0x25AE, "marker", "BLACK VERTICAL RECTANGLE" },
                   2092: { 0x25B3, "xutri", "WHITE UP-POINTING TRIANGLE" },
                   2093: { 0x25B4, "utrif", "BLACK UP-POINTING TRIANGLE" },
                   2094: { 0x25B5, "utri", "WHITE UP-POINTING TRIANGLE" },
                   2095: { 0x25B8, "rtrif", "BLACK RIGHT-POINTING TRIANGLE" },
                   2096: { 0x25B9, "rtri", "WHITE RIGHT-POINTING TRIANGLE" },
                   2097: { 0x25BD, "xdtri", "WHITE DOWN-POINTING TRIANGLE" },
                   2098: { 0x25BE, "dtrif", "BLACK DOWN-POINTING TRIANGLE" },
                   2099: { 0x25BF, "dtri", "WHITE DOWN-POINTING TRIANGLE" },
                   2100: { 0x25C2, "ltrif", "BLACK LEFT-POINTING TRIANGLE" },
                   2101: { 0x25C3, "ltri", "WHITE LEFT-POINTING TRIANGLE" },
                   2102: { 0x25CA, "loz", "LOZENGE" },
                   2103: { 0x25CB, "cir", "WHITE CIRCLE" },
                   2104: { 0x25CB, "xcirc", "WHITE CIRCLE" },
                   2105: { 0x2605, "starf", "BLACK STAR" },
                   2106: { 0x260E, "phone", "TELEPHONE SIGN" },
                   2107: { 0x2640, "female", "" },
                   2108: { 0x2642, "male", "MALE SIGN" },
                   2109: { 0x2660, "spades", "BLACK SPADE SUIT" },
                   2110: { 0x2663, "clubs", "BLACK CLUB SUIT" },
                   2111: { 0x2665, "hearts", "BLACK HEART SUIT" },
                   2112: { 0x2666, "diams", "BLACK DIAMOND SUIT" },
                   2113: { 0x2669, "sung", "" },
                   2114: { 0x266D, "flat", "MUSIC FLAT SIGN" },
                   2115: { 0x266E, "natur", "MUSIC NATURAL SIGN" },
                   2116: { 0x266F, "sharp", "MUSIC SHARP SIGN" },
                   2117: { 0x2713, "check", "CHECK MARK" },
                   2118: { 0x2717, "cross", "BALLOT X" },
                   2119: { 0x2720, "malt", "MALTESE CROSS" },
                   2120: { 0x2726, "lozf", "" },
                   2121: { 0x2736, "sext", "SIX POINTED BLACK STAR" },
                   2122: { 0x3008, "lang", "" },
                   2123: { 0x3009, "rang", "" },
                   2124: { 0xE291, "rpargt", "" },
                   2125: { 0xE2A2, "lnap", "" },
                   2126: { 0xE2AA, "nsmid", "" },
                   2127: { 0xE2B3, "prnE", "" },
                   2128: { 0xE2B5, "scnE", "" },
                   2129: { 0xE2B8, "vsubnE", "" },
                   2130: { 0xE301, "smid", "" },
                   2131: { 0xE411, "gnap", "" },
                   2132: { 0xFB00, "fflig", "" },
                   2133: { 0xFB01, "filig", "" },
                   2134: { 0xFB02, "fllig", "" },
                   2135: { 0xFB03, "ffilig", "" },
                   2136: { 0xFB04, "ffllig", "" },
                   2137: { 0xFE68, "sbsol", "SMALL REVERSE SOLIDUS" },
                   2138: };
                   2139: 
                   2140: /************************************************************************
                   2141:  *                                                                     *
                   2142:  *             Commodity functions to handle entities                  *
                   2143:  *                                                                     *
                   2144:  ************************************************************************/
                   2145: 
                   2146: /*
                   2147:  * Macro used to grow the current buffer.
                   2148:  */
                   2149: #define growBuffer(buffer) {                                           \
                   2150:     buffer##_size *= 2;                                                        \
                   2151:     buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar));  \
                   2152:     if (buffer == NULL) {                                              \
                   2153:        perror("realloc failed");                                       \
                   2154:        return(NULL);                                                   \
                   2155:     }                                                                  \
                   2156: }
                   2157: 
                   2158: /**
                   2159:  * sgmlEntityLookup:
                   2160:  * @name: the entity name
                   2161:  *
                   2162:  * Lookup the given entity in EntitiesTable
                   2163:  *
                   2164:  * TODO: the linear scan is really ugly, an hash table is really needed.
                   2165:  *
                   2166:  * Returns the associated sgmlEntityDescPtr if found, NULL otherwise.
                   2167:  */
                   2168: sgmlEntityDescPtr
                   2169: sgmlEntityLookup(const xmlChar *name) {
                   2170:     int i;
                   2171: 
                   2172:     for (i = 0;i < (sizeof(docbookEntitiesTable)/
                   2173:                     sizeof(docbookEntitiesTable[0]));i++) {
1.7       veillard 2174:         if (xmlStrEqual(name, BAD_CAST docbookEntitiesTable[i].name)) {
1.1       veillard 2175: #ifdef DEBUG
                   2176:             fprintf(stderr,"Found entity %s\n", name);
                   2177: #endif
                   2178:             return(&docbookEntitiesTable[i]);
                   2179:        }
                   2180:     }
                   2181:     return(NULL);
                   2182: }
                   2183: 
                   2184: /**
                   2185:  * sgmlEntityValueLookup:
                   2186:  * @value: the entity's unicode value
                   2187:  *
                   2188:  * Lookup the given entity in EntitiesTable
                   2189:  *
                   2190:  * TODO: the linear scan is really ugly, an hash table is really needed.
                   2191:  *
                   2192:  * Returns the associated sgmlEntityDescPtr if found, NULL otherwise.
                   2193:  */
                   2194: sgmlEntityDescPtr
                   2195: sgmlEntityValueLookup(int value) {
                   2196:     int i;
                   2197: #ifdef DEBUG
                   2198:     int lv = 0;
                   2199: #endif
                   2200: 
                   2201:     for (i = 0;i < (sizeof(docbookEntitiesTable)/
                   2202:                     sizeof(docbookEntitiesTable[0]));i++) {
                   2203:         if (docbookEntitiesTable[i].value >= value) {
                   2204:            if (docbookEntitiesTable[i].value > value)
                   2205:                break;
                   2206: #ifdef DEBUG
                   2207:            fprintf(stderr,"Found entity %s\n", docbookEntitiesTable[i].name);
                   2208: #endif
                   2209:             return(&docbookEntitiesTable[i]);
                   2210:        }
                   2211: #ifdef DEBUG
                   2212:        if (lv > docbookEntitiesTable[i].value) {
                   2213:            fprintf(stderr, "docbookEntitiesTable[] is not sorted (%d > %d)!\n",
                   2214:                    lv, docbookEntitiesTable[i].value);
                   2215:        }
                   2216:        lv = docbookEntitiesTable[i].value;
                   2217: #endif
                   2218:     }
                   2219:     return(NULL);
                   2220: }
                   2221: 
                   2222: /**
                   2223:  * UTF8ToSgml:
                   2224:  * @out:  a pointer to an array of bytes to store the result
                   2225:  * @outlen:  the length of @out
                   2226:  * @in:  a pointer to an array of UTF-8 chars
                   2227:  * @inlen:  the length of @in
                   2228:  *
                   2229:  * Take a block of UTF-8 chars in and try to convert it to an ASCII
                   2230:  * plus SGML entities block of chars out.
                   2231:  *
                   2232:  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
                   2233:  * The value of @inlen after return is the number of octets consumed
                   2234:  *     as the return value is positive, else unpredictiable.
                   2235:  * The value of @outlen after return is the number of octets consumed.
                   2236:  */
                   2237: int
                   2238: UTF8ToSgml(unsigned char* out, int *outlen,
                   2239:               const unsigned char* in, int *inlen) {
                   2240:     const unsigned char* processed = in;
                   2241:     const unsigned char* outend;
                   2242:     const unsigned char* outstart = out;
                   2243:     const unsigned char* instart = in;
                   2244:     const unsigned char* inend;
                   2245:     unsigned int c, d;
                   2246:     int trailing;
                   2247: 
                   2248:     if (in == NULL) {
                   2249:         /*
                   2250:         * initialization nothing to do
                   2251:         */
                   2252:        *outlen = 0;
                   2253:        *inlen = 0;
                   2254:        return(0);
                   2255:     }
                   2256:     inend = in + (*inlen);
                   2257:     outend = out + (*outlen);
                   2258:     while (in < inend) {
                   2259:        d = *in++;
                   2260:        if      (d < 0x80)  { c= d; trailing= 0; }
                   2261:        else if (d < 0xC0) {
                   2262:            /* trailing byte in leading position */
                   2263:            *outlen = out - outstart;
                   2264:            *inlen = processed - instart;
                   2265:            return(-2);
                   2266:         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
                   2267:         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
                   2268:         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
                   2269:        else {
                   2270:            /* no chance for this in Ascii */
                   2271:            *outlen = out - outstart;
                   2272:            *inlen = processed - instart;
                   2273:            return(-2);
                   2274:        }
                   2275: 
                   2276:        if (inend - in < trailing) {
                   2277:            break;
                   2278:        } 
                   2279: 
                   2280:        for ( ; trailing; trailing--) {
                   2281:            if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
                   2282:                break;
                   2283:            c <<= 6;
                   2284:            c |= d & 0x3F;
                   2285:        }
                   2286: 
                   2287:        /* assertion: c is a single UTF-4 value */
                   2288:        if (c < 0x80) {
                   2289:            if (out + 1 >= outend)
                   2290:                break;
                   2291:            *out++ = c;
                   2292:        } else {
                   2293:            int len;
                   2294:            sgmlEntityDescPtr ent;
                   2295: 
                   2296:            /*
                   2297:             * Try to lookup a predefined SGML entity for it
                   2298:             */
                   2299: 
                   2300:            ent = sgmlEntityValueLookup(c);
                   2301:            if (ent == NULL) {
                   2302:                /* no chance for this in Ascii */
                   2303:                *outlen = out - outstart;
                   2304:                *inlen = processed - instart;
                   2305:                return(-2);
                   2306:            }
                   2307:            len = strlen(ent->name);
                   2308:            if (out + 2 + len >= outend)
                   2309:                break;
                   2310:            *out++ = '&';
                   2311:            memcpy(out, ent->name, len);
                   2312:            out += len;
                   2313:            *out++ = ';';
                   2314:        }
                   2315:        processed = in;
                   2316:     }
                   2317:     *outlen = out - outstart;
                   2318:     *inlen = processed - instart;
                   2319:     return(0);
                   2320: }
                   2321: 
                   2322: /**
                   2323:  * sgmlEncodeEntities:
                   2324:  * @out:  a pointer to an array of bytes to store the result
                   2325:  * @outlen:  the length of @out
                   2326:  * @in:  a pointer to an array of UTF-8 chars
                   2327:  * @inlen:  the length of @in
                   2328:  * @quoteChar: the quote character to escape (' or ") or zero.
                   2329:  *
                   2330:  * Take a block of UTF-8 chars in and try to convert it to an ASCII
                   2331:  * plus SGML entities block of chars out.
                   2332:  *
                   2333:  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
                   2334:  * The value of @inlen after return is the number of octets consumed
                   2335:  *     as the return value is positive, else unpredictiable.
                   2336:  * The value of @outlen after return is the number of octets consumed.
                   2337:  */
                   2338: int
                   2339: sgmlEncodeEntities(unsigned char* out, int *outlen,
                   2340:                   const unsigned char* in, int *inlen, int quoteChar) {
                   2341:     const unsigned char* processed = in;
                   2342:     const unsigned char* outend = out + (*outlen);
                   2343:     const unsigned char* outstart = out;
                   2344:     const unsigned char* instart = in;
                   2345:     const unsigned char* inend = in + (*inlen);
                   2346:     unsigned int c, d;
                   2347:     int trailing;
                   2348: 
                   2349:     while (in < inend) {
                   2350:        d = *in++;
                   2351:        if      (d < 0x80)  { c= d; trailing= 0; }
                   2352:        else if (d < 0xC0) {
                   2353:            /* trailing byte in leading position */
                   2354:            *outlen = out - outstart;
                   2355:            *inlen = processed - instart;
                   2356:            return(-2);
                   2357:         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
                   2358:         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
                   2359:         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
                   2360:        else {
                   2361:            /* no chance for this in Ascii */
                   2362:            *outlen = out - outstart;
                   2363:            *inlen = processed - instart;
                   2364:            return(-2);
                   2365:        }
                   2366: 
                   2367:        if (inend - in < trailing)
                   2368:            break;
                   2369: 
                   2370:        while (trailing--) {
                   2371:            if (((d= *in++) & 0xC0) != 0x80) {
                   2372:                *outlen = out - outstart;
                   2373:                *inlen = processed - instart;
                   2374:                return(-2);
                   2375:            }
                   2376:            c <<= 6;
                   2377:            c |= d & 0x3F;
                   2378:        }
                   2379: 
                   2380:        /* assertion: c is a single UTF-4 value */
                   2381:        if (c < 0x80 && c != quoteChar && c != '&' && c != '<' && c != '>') {
                   2382:            if (out >= outend)
                   2383:                break;
                   2384:            *out++ = c;
                   2385:        } else {
                   2386:            sgmlEntityDescPtr ent;
                   2387:            const char *cp;
                   2388:            char nbuf[16];
                   2389:            int len;
                   2390: 
                   2391:            /*
                   2392:             * Try to lookup a predefined SGML entity for it
                   2393:             */
                   2394:            ent = sgmlEntityValueLookup(c);
                   2395:            if (ent == NULL) {
                   2396:                sprintf(nbuf, "#%u", c);
                   2397:                cp = nbuf;
                   2398:            }
                   2399:            else
                   2400:                cp = ent->name;
                   2401:            len = strlen(cp);
                   2402:            if (out + 2 + len > outend)
                   2403:                break;
                   2404:            *out++ = '&';
                   2405:            memcpy(out, cp, len);
                   2406:            out += len;
                   2407:            *out++ = ';';
                   2408:        }
                   2409:        processed = in;
                   2410:     }
                   2411:     *outlen = out - outstart;
                   2412:     *inlen = processed - instart;
                   2413:     return(0);
                   2414: }
                   2415: 
                   2416: /**
                   2417:  * sgmlDecodeEntities:
                   2418:  * @ctxt:  the parser context
                   2419:  * @len:  the len to decode (in bytes !), -1 for no size limit
                   2420:  * @end:  an end marker xmlChar, 0 if none
                   2421:  * @end2:  an end marker xmlChar, 0 if none
                   2422:  * @end3:  an end marker xmlChar, 0 if none
                   2423:  *
                   2424:  * Subtitute the SGML entities by their value
                   2425:  *
                   2426:  * DEPRECATED !!!!
                   2427:  *
                   2428:  * Returns A newly allocated string with the substitution done. The caller
                   2429:  *      must deallocate it !
                   2430:  */
                   2431: xmlChar *
                   2432: sgmlDecodeEntities(sgmlParserCtxtPtr ctxt, int len,
                   2433:                   xmlChar end, xmlChar  end2, xmlChar end3) {
                   2434:     xmlChar *name = NULL;
                   2435:     xmlChar *buffer = NULL;
                   2436:     unsigned int buffer_size = 0;
                   2437:     unsigned int nbchars = 0;
                   2438:     sgmlEntityDescPtr ent;
                   2439:     unsigned int max = (unsigned int) len;
                   2440:     int c,l;
                   2441: 
                   2442:     if (ctxt->depth > 40) {
1.6       veillard 2443:        ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.1       veillard 2444:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   2445:            ctxt->sax->error(ctxt->userData,
                   2446:                "Detected entity reference loop\n");
                   2447:        ctxt->wellFormed = 0;
                   2448:        ctxt->disableSAX = 1;
                   2449:        return(NULL);
                   2450:     }
                   2451: 
                   2452:     /*
                   2453:      * allocate a translation buffer.
                   2454:      */
                   2455:     buffer_size = SGML_PARSER_BIG_BUFFER_SIZE;
                   2456:     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
                   2457:     if (buffer == NULL) {
                   2458:        perror("xmlDecodeEntities: malloc failed");
                   2459:        return(NULL);
                   2460:     }
                   2461: 
                   2462:     /*
                   2463:      * Ok loop until we reach one of the ending char or a size limit.
                   2464:      */
                   2465:     c = CUR_CHAR(l);
                   2466:     while ((nbchars < max) && (c != end) &&
                   2467:            (c != end2) && (c != end3)) {
                   2468: 
                   2469:        if (c == 0) break;
                   2470:         if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
                   2471:            int val = sgmlParseCharRef(ctxt);
                   2472:            COPY_BUF(0,buffer,nbchars,val);
                   2473:            NEXTL(l);
                   2474:        } else if ((c == '&') && (ctxt->token != '&')) {
                   2475:            ent = sgmlParseEntityRef(ctxt, &name);
                   2476:            if (name != NULL) {
                   2477:                if (ent != NULL) {
                   2478:                    int val = ent->value;
                   2479:                    COPY_BUF(0,buffer,nbchars,val);
                   2480:                    NEXTL(l);
                   2481:                } else {
                   2482:                    const xmlChar *cur = name;
                   2483: 
                   2484:                    buffer[nbchars++] = '&';
                   2485:                    if (nbchars > buffer_size - SGML_PARSER_BUFFER_SIZE) {
                   2486:                        growBuffer(buffer);
                   2487:                    }
                   2488:                    while (*cur != 0) {
                   2489:                        buffer[nbchars++] = *cur++;
                   2490:                    }
                   2491:                    buffer[nbchars++] = ';';
                   2492:                }
                   2493:            }
                   2494:        } else {
                   2495:            COPY_BUF(l,buffer,nbchars,c);
                   2496:            NEXTL(l);
                   2497:            if (nbchars > buffer_size - SGML_PARSER_BUFFER_SIZE) {
                   2498:              growBuffer(buffer);
                   2499:            }
                   2500:        }
                   2501:        c = CUR_CHAR(l);
                   2502:     }
                   2503:     buffer[nbchars++] = 0;
                   2504:     return(buffer);
                   2505: }
                   2506: 
                   2507: /************************************************************************
                   2508:  *                                                                     *
                   2509:  *             Commodity functions to handle streams                   *
                   2510:  *                                                                     *
                   2511:  ************************************************************************/
                   2512: 
                   2513: /**
                   2514:  * sgmlFreeInputStream:
                   2515:  * @input:  an sgmlParserInputPtr
                   2516:  *
                   2517:  * Free up an input stream.
                   2518:  */
                   2519: void
                   2520: sgmlFreeInputStream(sgmlParserInputPtr input) {
                   2521:     if (input == NULL) return;
                   2522: 
                   2523:     if (input->filename != NULL) xmlFree((char *) input->filename);
                   2524:     if (input->directory != NULL) xmlFree((char *) input->directory);
                   2525:     if ((input->free != NULL) && (input->base != NULL))
                   2526:         input->free((xmlChar *) input->base);
                   2527:     if (input->buf != NULL) 
                   2528:         xmlFreeParserInputBuffer(input->buf);
                   2529:     memset(input, -1, sizeof(sgmlParserInput));
                   2530:     xmlFree(input);
                   2531: }
                   2532: 
                   2533: /**
                   2534:  * sgmlNewInputStream:
                   2535:  * @ctxt:  an SGML parser context
                   2536:  *
                   2537:  * Create a new input stream structure
                   2538:  * Returns the new input stream or NULL
                   2539:  */
                   2540: sgmlParserInputPtr
                   2541: sgmlNewInputStream(sgmlParserCtxtPtr ctxt) {
                   2542:     sgmlParserInputPtr input;
                   2543: 
                   2544:     input = (xmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
                   2545:     if (input == NULL) {
                   2546:         ctxt->errNo = XML_ERR_NO_MEMORY;
                   2547:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   2548:            ctxt->sax->error(ctxt->userData, 
                   2549:                             "malloc: couldn't allocate a new input stream\n");
                   2550:        return(NULL);
                   2551:     }
                   2552:     memset(input, 0, sizeof(sgmlParserInput));
                   2553:     input->filename = NULL;
                   2554:     input->directory = NULL;
                   2555:     input->base = NULL;
                   2556:     input->cur = NULL;
                   2557:     input->buf = NULL;
                   2558:     input->line = 1;
                   2559:     input->col = 1;
                   2560:     input->buf = NULL;
                   2561:     input->free = NULL;
                   2562:     input->version = NULL;
                   2563:     input->consumed = 0;
                   2564:     input->length = 0;
                   2565:     return(input);
                   2566: }
                   2567: 
                   2568: 
                   2569: /************************************************************************
                   2570:  *                                                                     *
                   2571:  *             Commodity functions, cleanup needed ?                   *
                   2572:  *                                                                     *
                   2573:  ************************************************************************/
                   2574: 
                   2575: /**
                   2576:  * areBlanks:
                   2577:  * @ctxt:  an SGML parser context
                   2578:  * @str:  a xmlChar *
                   2579:  * @len:  the size of @str
                   2580:  *
                   2581:  * Is this a sequence of blank chars that one can ignore ?
                   2582:  *
                   2583:  * Returns 1 if ignorable 0 otherwise.
                   2584:  */
                   2585: 
                   2586: static int areBlanks(sgmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
                   2587:     int i;
                   2588:     xmlNodePtr lastChild;
                   2589: 
                   2590:     for (i = 0;i < len;i++)
                   2591:         if (!(IS_BLANK(str[i]))) return(0);
                   2592: 
                   2593:     if (CUR == 0) return(1);
                   2594:     if (CUR != '<') return(0);
                   2595:     if (ctxt->name == NULL)
                   2596:        return(1);
                   2597: #if 0
1.7       veillard 2598:     if (xmlStrEqual(ctxt->name, BAD_CAST"sgml"))
1.1       veillard 2599:        return(1);
1.7       veillard 2600:     if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
1.1       veillard 2601:        return(1);
1.7       veillard 2602:     if (xmlStrEqual(ctxt->name, BAD_CAST"body"))
1.1       veillard 2603:        return(1);
                   2604: #endif
                   2605:     if (ctxt->node == NULL) return(0);
                   2606:     lastChild = xmlGetLastChild(ctxt->node);
                   2607:     if (lastChild == NULL) {
                   2608:         if (ctxt->node->content != NULL) return(0);
                   2609:     } else if (xmlNodeIsText(lastChild))
                   2610:         return(0);
                   2611:     return(1);
                   2612: }
                   2613: 
                   2614: /**
                   2615:  * sgmlHandleEntity:
                   2616:  * @ctxt:  an SGML parser context
                   2617:  * @entity:  an XML entity pointer.
                   2618:  *
                   2619:  * Default handling of an SGML entity, call the parser with the
                   2620:  * substitution string
                   2621:  */
                   2622: 
                   2623: void
                   2624: sgmlHandleEntity(sgmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
                   2625:     int len;
                   2626: 
                   2627:     if (entity->content == NULL) {
                   2628:         if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   2629:            ctxt->sax->error(ctxt->userData, "sgmlHandleEntity %s: content == NULL\n",
                   2630:                       entity->name);
                   2631:        ctxt->wellFormed = 0;
                   2632:         return;
                   2633:     }
                   2634:     len = xmlStrlen(entity->content);
                   2635: 
                   2636:     /*
                   2637:      * Just handle the content as a set of chars.
                   2638:      */
                   2639:     sgmlCheckParagraph(ctxt);
                   2640:     if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   2641:        ctxt->sax->characters(ctxt->userData, entity->content, len);
                   2642: 
                   2643: }
                   2644: 
                   2645: /**
                   2646:  * sgmlNewDocNoDtD:
                   2647:  * @URI:  URI for the dtd, or NULL
                   2648:  * @ExternalID:  the external ID of the DTD, or NULL
                   2649:  *
                   2650:  * Returns a new document, do not intialize the DTD if not provided
                   2651:  */
                   2652: sgmlDocPtr
                   2653: sgmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
                   2654:     xmlDocPtr cur;
                   2655: 
                   2656:     /*
                   2657:      * Allocate a new document and fill the fields.
                   2658:      */
                   2659:     cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
                   2660:     if (cur == NULL) {
                   2661:         fprintf(stderr, "xmlNewDoc : malloc failed\n");
                   2662:        return(NULL);
                   2663:     }
                   2664:     memset(cur, 0, sizeof(xmlDoc));
                   2665: 
                   2666:     cur->type = XML_SGML_DOCUMENT_NODE;
                   2667:     cur->version = NULL;
                   2668:     cur->intSubset = NULL;
                   2669:     if ((ExternalID != NULL) ||
                   2670:        (URI != NULL))
                   2671:        xmlCreateIntSubset(cur, BAD_CAST "SGML", ExternalID, URI);
                   2672:     cur->doc = cur;
                   2673:     cur->name = NULL;
                   2674:     cur->children = NULL; 
                   2675:     cur->extSubset = NULL;
                   2676:     cur->oldNs = NULL;
                   2677:     cur->encoding = NULL;
                   2678:     cur->standalone = 1;
                   2679:     cur->compression = 0;
                   2680:     cur->ids = NULL;
                   2681:     cur->refs = NULL;
                   2682: #ifndef XML_WITHOUT_CORBA
                   2683:     cur->_private = NULL;
                   2684: #endif
                   2685:     return(cur);
                   2686: }
                   2687: 
                   2688: /**
                   2689:  * sgmlNewDoc:
                   2690:  * @URI:  URI for the dtd, or NULL
                   2691:  * @ExternalID:  the external ID of the DTD, or NULL
                   2692:  *
                   2693:  * Returns a new document
                   2694:  */
                   2695: sgmlDocPtr
                   2696: sgmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
                   2697:     if ((URI == NULL) && (ExternalID == NULL))
                   2698:        return(sgmlNewDocNoDtD(
                   2699:                    BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
                   2700:                    BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd"));
                   2701: 
                   2702:     return(sgmlNewDocNoDtD(URI, ExternalID));
                   2703: }
                   2704: 
                   2705: 
                   2706: /************************************************************************
                   2707:  *                                                                     *
                   2708:  *                     The parser itself                               *
                   2709:  *     Relates to http://www.w3.org/TR/docbook                         *
                   2710:  *                                                                     *
                   2711:  ************************************************************************/
                   2712: 
                   2713: /************************************************************************
                   2714:  *                                                                     *
                   2715:  *                     The parser itself                               *
                   2716:  *                                                                     *
                   2717:  ************************************************************************/
                   2718: 
                   2719: /**
                   2720:  * sgmlParseSGMLName:
                   2721:  * @ctxt:  an SGML parser context
                   2722:  *
                   2723:  * parse an SGML tag or attribute name, note that we convert it to lowercase
                   2724:  * since SGML names are not case-sensitive.
                   2725:  *
                   2726:  * Returns the Tag Name parsed or NULL
                   2727:  */
                   2728: 
                   2729: xmlChar *
                   2730: sgmlParseSGMLName(sgmlParserCtxtPtr ctxt) {
                   2731:     xmlChar *ret = NULL;
                   2732:     int i = 0;
                   2733:     xmlChar loc[SGML_PARSER_BUFFER_SIZE];
                   2734: 
                   2735:     if (!IS_LETTER(CUR) && (CUR != '_') &&
                   2736:         (CUR != ':')) return(NULL);
                   2737: 
                   2738:     while ((i < SGML_PARSER_BUFFER_SIZE) &&
                   2739:            ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2740:           (CUR == ':') || (CUR == '_'))) {
                   2741:        if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
                   2742:         else loc[i] = CUR;
                   2743:        i++;
                   2744:        
                   2745:        NEXT;
                   2746:     }
                   2747:     
                   2748:     ret = xmlStrndup(loc, i);
                   2749: 
                   2750:     return(ret);
                   2751: }
                   2752: 
                   2753: /**
                   2754:  * sgmlParseName:
                   2755:  * @ctxt:  an SGML parser context
                   2756:  *
                   2757:  * parse an SGML name, this routine is case sensistive.
                   2758:  *
                   2759:  * Returns the Name parsed or NULL
                   2760:  */
                   2761: 
                   2762: xmlChar *
                   2763: sgmlParseName(sgmlParserCtxtPtr ctxt) {
                   2764:     xmlChar buf[SGML_MAX_NAMELEN];
                   2765:     int len = 0;
                   2766: 
                   2767:     GROW;
                   2768:     if (!IS_LETTER(CUR) && (CUR != '_')) {
                   2769:        return(NULL);
                   2770:     }
                   2771: 
                   2772:     while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2773:            (CUR == '.') || (CUR == '-') ||
                   2774:           (CUR == '_') || (CUR == ':') || 
                   2775:           (IS_COMBINING(CUR)) ||
                   2776:           (IS_EXTENDER(CUR))) {
                   2777:        buf[len++] = CUR;
                   2778:        NEXT;
                   2779:        if (len >= SGML_MAX_NAMELEN) {
                   2780:            fprintf(stderr, 
                   2781:               "sgmlParseName: reached SGML_MAX_NAMELEN limit\n");
                   2782:            while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2783:                   (CUR == '.') || (CUR == '-') ||
                   2784:                   (CUR == '_') || (CUR == ':') || 
                   2785:                   (IS_COMBINING(CUR)) ||
                   2786:                   (IS_EXTENDER(CUR)))
                   2787:                 NEXT;
                   2788:            break;
                   2789:        }
                   2790:     }
                   2791:     return(xmlStrndup(buf, len));
                   2792: }
                   2793: 
                   2794: /**
                   2795:  * sgmlParseSGMLAttribute:
                   2796:  * @ctxt:  an SGML parser context
                   2797:  * @stop:  a char stop value
                   2798:  * 
                   2799:  * parse an SGML attribute value till the stop (quote), if
                   2800:  * stop is 0 then it stops at the first space
                   2801:  *
                   2802:  * Returns the attribute parsed or NULL
                   2803:  */
                   2804: 
                   2805: xmlChar *
                   2806: sgmlParseSGMLAttribute(sgmlParserCtxtPtr ctxt, const xmlChar stop) {
                   2807: #if 0
                   2808:     xmlChar buf[SGML_MAX_NAMELEN];
                   2809:     int len = 0;
                   2810: 
                   2811:     GROW;
                   2812:     while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
                   2813:        if ((stop == 0) && (IS_BLANK(CUR))) break;
                   2814:        buf[len++] = CUR;
                   2815:        NEXT;
                   2816:        if (len >= SGML_MAX_NAMELEN) {
                   2817:            fprintf(stderr, 
                   2818:               "sgmlParseSGMLAttribute: reached SGML_MAX_NAMELEN limit\n");
                   2819:            while ((!IS_BLANK(CUR)) && (CUR != '<') &&
                   2820:                   (CUR != '>') &&
                   2821:                   (CUR != '\'') && (CUR != '"'))
                   2822:                 NEXT;
                   2823:            break;
                   2824:        }
                   2825:     }
                   2826:     return(xmlStrndup(buf, len));
                   2827: #else    
                   2828:     xmlChar *buffer = NULL;
                   2829:     int buffer_size = 0;
                   2830:     xmlChar *out = NULL;
                   2831:     xmlChar *name = NULL;
                   2832: 
                   2833:     xmlChar *cur = NULL;
                   2834:     sgmlEntityDescPtr ent;
                   2835: 
                   2836:     /*
                   2837:      * allocate a translation buffer.
                   2838:      */
                   2839:     buffer_size = SGML_PARSER_BIG_BUFFER_SIZE;
                   2840:     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
                   2841:     if (buffer == NULL) {
                   2842:        perror("sgmlParseSGMLAttribute: malloc failed");
                   2843:        return(NULL);
                   2844:     }
                   2845:     out = buffer;
                   2846: 
                   2847:     /*
                   2848:      * Ok loop until we reach one of the ending chars
                   2849:      */
                   2850:     while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
                   2851:        if ((stop == 0) && (IS_BLANK(CUR))) break;
                   2852:         if (CUR == '&') {
                   2853:            if (NXT(1) == '#') {
                   2854:                unsigned int c;
                   2855:                int bits;
                   2856: 
                   2857:                c = sgmlParseCharRef(ctxt);
                   2858:                if      (c <    0x80)
                   2859:                        { *out++  = c;                bits= -6; }
                   2860:                else if (c <   0x800)
                   2861:                        { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   2862:                else if (c < 0x10000)
                   2863:                        { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   2864:                else                 
                   2865:                        { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   2866:         
                   2867:                for ( ; bits >= 0; bits-= 6) {
                   2868:                    *out++  = ((c >> bits) & 0x3F) | 0x80;
                   2869:                }
                   2870:            } else {
                   2871:                ent = sgmlParseEntityRef(ctxt, &name);
                   2872:                if (name == NULL) {
                   2873:                    *out++ = '&';
                   2874:                    if (out - buffer > buffer_size - 100) {
                   2875:                        int index = out - buffer;
                   2876: 
                   2877:                        growBuffer(buffer);
                   2878:                        out = &buffer[index];
                   2879:                    }
                   2880:                } else if (ent == NULL) {
                   2881:                    *out++ = '&';
                   2882:                    cur = name;
                   2883:                    while (*cur != 0) {
                   2884:                        if (out - buffer > buffer_size - 100) {
                   2885:                            int index = out - buffer;
                   2886: 
                   2887:                            growBuffer(buffer);
                   2888:                            out = &buffer[index];
                   2889:                        }
                   2890:                        *out++ = *cur++;
                   2891:                    }
                   2892:                    xmlFree(name);
                   2893:                } else {
                   2894:                    unsigned int c;
                   2895:                    int bits;
                   2896: 
                   2897:                    if (out - buffer > buffer_size - 100) {
                   2898:                        int index = out - buffer;
                   2899: 
                   2900:                        growBuffer(buffer);
                   2901:                        out = &buffer[index];
                   2902:                    }
                   2903:                    c = (xmlChar)ent->value;
                   2904:                    if      (c <    0x80)
                   2905:                        { *out++  = c;                bits= -6; }
                   2906:                    else if (c <   0x800)
                   2907:                        { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   2908:                    else if (c < 0x10000)
                   2909:                        { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   2910:                    else                 
                   2911:                        { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   2912:             
                   2913:                    for ( ; bits >= 0; bits-= 6) {
                   2914:                        *out++  = ((c >> bits) & 0x3F) | 0x80;
                   2915:                    }
                   2916:                    xmlFree(name);
                   2917:                }
                   2918:            }
                   2919:        } else {
                   2920:            unsigned int c;
                   2921:            int bits;
                   2922: 
                   2923:            if (out - buffer > buffer_size - 100) {
                   2924:                int index = out - buffer;
                   2925: 
                   2926:                growBuffer(buffer);
                   2927:                out = &buffer[index];
                   2928:            }
                   2929:            c = CUR;
                   2930:            if      (c <    0x80)
                   2931:                    { *out++  = c;                bits= -6; }
                   2932:            else if (c <   0x800)
                   2933:                    { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   2934:            else if (c < 0x10000)
                   2935:                    { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   2936:            else                 
                   2937:                    { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   2938:      
                   2939:            for ( ; bits >= 0; bits-= 6) {
                   2940:                *out++  = ((c >> bits) & 0x3F) | 0x80;
                   2941:            }
                   2942:            NEXT;
                   2943:        }
                   2944:     }
                   2945:     *out++ = 0;
                   2946:     return(buffer);
                   2947: #endif
                   2948: }
                   2949: 
                   2950: /**
                   2951:  * sgmlParseNmtoken:
                   2952:  * @ctxt:  an SGML parser context
                   2953:  * 
                   2954:  * parse an SGML Nmtoken.
                   2955:  *
                   2956:  * Returns the Nmtoken parsed or NULL
                   2957:  */
                   2958: 
                   2959: xmlChar *
                   2960: sgmlParseNmtoken(sgmlParserCtxtPtr ctxt) {
                   2961:     xmlChar buf[SGML_MAX_NAMELEN];
                   2962:     int len = 0;
                   2963: 
                   2964:     GROW;
                   2965:     while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2966:            (CUR == '.') || (CUR == '-') ||
                   2967:           (CUR == '_') || (CUR == ':') || 
                   2968:           (IS_COMBINING(CUR)) ||
                   2969:           (IS_EXTENDER(CUR))) {
                   2970:        buf[len++] = CUR;
                   2971:        NEXT;
                   2972:        if (len >= SGML_MAX_NAMELEN) {
                   2973:            fprintf(stderr, 
                   2974:               "sgmlParseNmtoken: reached SGML_MAX_NAMELEN limit\n");
                   2975:            while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2976:                   (CUR == '.') || (CUR == '-') ||
                   2977:                   (CUR == '_') || (CUR == ':') || 
                   2978:                   (IS_COMBINING(CUR)) ||
                   2979:                   (IS_EXTENDER(CUR)))
                   2980:                 NEXT;
                   2981:            break;
                   2982:        }
                   2983:     }
                   2984:     return(xmlStrndup(buf, len));
                   2985: }
                   2986: 
                   2987: /**
                   2988:  * sgmlParseEntityRef:
                   2989:  * @ctxt:  an SGML parser context
                   2990:  * @str:  location to store the entity name
                   2991:  *
                   2992:  * parse an SGML ENTITY references
                   2993:  *
                   2994:  * [68] EntityRef ::= '&' Name ';'
                   2995:  *
                   2996:  * Returns the associated sgmlEntityDescPtr if found, or NULL otherwise,
                   2997:  *         if non-NULL *str will have to be freed by the caller.
                   2998:  */
                   2999: sgmlEntityDescPtr
                   3000: sgmlParseEntityRef(sgmlParserCtxtPtr ctxt, xmlChar **str) {
                   3001:     xmlChar *name;
                   3002:     sgmlEntityDescPtr ent = NULL;
                   3003:     *str = NULL;
                   3004: 
                   3005:     if (CUR == '&') {
                   3006:         NEXT;
                   3007:         name = sgmlParseName(ctxt);
                   3008:        if (name == NULL) {
                   3009:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3010:                ctxt->sax->error(ctxt->userData, "sgmlParseEntityRef: no name\n");
                   3011:            ctxt->wellFormed = 0;
                   3012:        } else {
                   3013:            GROW;
                   3014:            if (CUR == ';') {
                   3015:                *str = name;
                   3016: 
                   3017:                /*
                   3018:                 * Lookup the entity in the table.
                   3019:                 */
                   3020:                ent = sgmlEntityLookup(name);
                   3021:                if (ent != NULL) /* OK that's ugly !!! */
                   3022:                    NEXT;
                   3023:            } else {
                   3024:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3025:                    ctxt->sax->error(ctxt->userData,
                   3026:                                     "sgmlParseEntityRef: expecting ';'\n");
                   3027:                *str = name;
                   3028:            }
                   3029:        }
                   3030:     }
                   3031:     return(ent);
                   3032: }
                   3033: 
                   3034: /**
                   3035:  * sgmlParseAttValue:
                   3036:  * @ctxt:  an SGML parser context
                   3037:  *
                   3038:  * parse a value for an attribute
                   3039:  * Note: the parser won't do substitution of entities here, this
                   3040:  * will be handled later in xmlStringGetNodeList, unless it was
                   3041:  * asked for ctxt->replaceEntities != 0 
                   3042:  *
                   3043:  * Returns the AttValue parsed or NULL.
                   3044:  */
                   3045: 
                   3046: xmlChar *
                   3047: sgmlParseAttValue(sgmlParserCtxtPtr ctxt) {
                   3048:     xmlChar *ret = NULL;
                   3049: 
                   3050:     if (CUR == '"') {
                   3051:         NEXT;
                   3052:        ret = sgmlParseSGMLAttribute(ctxt, '"');
                   3053:         if (CUR != '"') {
                   3054:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3055:                ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
                   3056:            ctxt->wellFormed = 0;
                   3057:        } else
                   3058:            NEXT;
                   3059:     } else if (CUR == '\'') {
                   3060:         NEXT;
                   3061:        ret = sgmlParseSGMLAttribute(ctxt, '\'');
                   3062:         if (CUR != '\'') {
                   3063:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3064:                ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
                   3065:            ctxt->wellFormed = 0;
                   3066:        } else
                   3067:            NEXT;
                   3068:     } else {
                   3069:         /*
                   3070:         * That's an SGMLism, the attribute value may not be quoted
                   3071:         */
                   3072:        ret = sgmlParseSGMLAttribute(ctxt, 0);
                   3073:        if (ret == NULL) {
                   3074:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3075:                ctxt->sax->error(ctxt->userData, "AttValue: no value found\n");
                   3076:            ctxt->wellFormed = 0;
                   3077:        }
                   3078:     }
                   3079:     return(ret);
                   3080: }
                   3081: 
                   3082: /**
                   3083:  * sgmlParseSystemLiteral:
                   3084:  * @ctxt:  an SGML parser context
                   3085:  * 
                   3086:  * parse an SGML Literal
                   3087:  *
                   3088:  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
                   3089:  *
                   3090:  * Returns the SystemLiteral parsed or NULL
                   3091:  */
                   3092: 
                   3093: xmlChar *
                   3094: sgmlParseSystemLiteral(sgmlParserCtxtPtr ctxt) {
                   3095:     const xmlChar *q;
                   3096:     xmlChar *ret = NULL;
                   3097: 
                   3098:     if (CUR == '"') {
                   3099:         NEXT;
                   3100:        q = CUR_PTR;
                   3101:        while ((IS_CHAR(CUR)) && (CUR != '"'))
                   3102:            NEXT;
                   3103:        if (!IS_CHAR(CUR)) {
                   3104:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3105:                ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
                   3106:            ctxt->wellFormed = 0;
                   3107:        } else {
                   3108:            ret = xmlStrndup(q, CUR_PTR - q);
                   3109:            NEXT;
                   3110:         }
                   3111:     } else if (CUR == '\'') {
                   3112:         NEXT;
                   3113:        q = CUR_PTR;
                   3114:        while ((IS_CHAR(CUR)) && (CUR != '\''))
                   3115:            NEXT;
                   3116:        if (!IS_CHAR(CUR)) {
                   3117:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3118:                ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
                   3119:            ctxt->wellFormed = 0;
                   3120:        } else {
                   3121:            ret = xmlStrndup(q, CUR_PTR - q);
                   3122:            NEXT;
                   3123:         }
                   3124:     } else {
                   3125:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3126:            ctxt->sax->error(ctxt->userData,
                   3127:                             "SystemLiteral \" or ' expected\n");
                   3128:        ctxt->wellFormed = 0;
                   3129:     }
                   3130:     
                   3131:     return(ret);
                   3132: }
                   3133: 
                   3134: /**
                   3135:  * sgmlParsePubidLiteral:
                   3136:  * @ctxt:  an SGML parser context
                   3137:  *
                   3138:  * parse an SGML public literal
                   3139:  *
                   3140:  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
                   3141:  *
                   3142:  * Returns the PubidLiteral parsed or NULL.
                   3143:  */
                   3144: 
                   3145: xmlChar *
                   3146: sgmlParsePubidLiteral(sgmlParserCtxtPtr ctxt) {
                   3147:     const xmlChar *q;
                   3148:     xmlChar *ret = NULL;
                   3149:     /*
                   3150:      * Name ::= (Letter | '_') (NameChar)*
                   3151:      */
                   3152:     if (CUR == '"') {
                   3153:         NEXT;
                   3154:        q = CUR_PTR;
                   3155:        while (IS_PUBIDCHAR(CUR)) NEXT;
                   3156:        if (CUR != '"') {
                   3157:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3158:                ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
                   3159:            ctxt->wellFormed = 0;
                   3160:        } else {
                   3161:            ret = xmlStrndup(q, CUR_PTR - q);
                   3162:            NEXT;
                   3163:        }
                   3164:     } else if (CUR == '\'') {
                   3165:         NEXT;
                   3166:        q = CUR_PTR;
                   3167:        while ((IS_LETTER(CUR)) && (CUR != '\''))
                   3168:            NEXT;
                   3169:        if (!IS_LETTER(CUR)) {
                   3170:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3171:                ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
                   3172:            ctxt->wellFormed = 0;
                   3173:        } else {
                   3174:            ret = xmlStrndup(q, CUR_PTR - q);
                   3175:            NEXT;
                   3176:        }
                   3177:     } else {
                   3178:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3179:            ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
                   3180:        ctxt->wellFormed = 0;
                   3181:     }
                   3182:     
                   3183:     return(ret);
                   3184: }
                   3185: 
                   3186: /**
                   3187:  * sgmlParseCharData:
                   3188:  * @ctxt:  an SGML parser context
                   3189:  * @cdata:  int indicating whether we are within a CDATA section
                   3190:  *
                   3191:  * parse a CharData section.
                   3192:  * if we are within a CDATA section ']]>' marks an end of section.
                   3193:  *
                   3194:  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
                   3195:  */
                   3196: 
                   3197: void
                   3198: sgmlParseCharData(sgmlParserCtxtPtr ctxt, int cdata) {
                   3199:     xmlChar buf[SGML_PARSER_BIG_BUFFER_SIZE + 5];
                   3200:     int nbchar = 0;
                   3201:     int cur, l;
                   3202: 
                   3203:     SHRINK;
                   3204:     cur = CUR_CHAR(l);
                   3205:     while (((cur != '<') || (ctxt->token == '<')) &&
                   3206:            ((cur != '&') || (ctxt->token == '&')) && 
                   3207:           (IS_CHAR(cur))) {
                   3208:        COPY_BUF(l,buf,nbchar,cur);
                   3209:        if (nbchar >= SGML_PARSER_BIG_BUFFER_SIZE) {
                   3210:            /*
                   3211:             * Ok the segment is to be consumed as chars.
                   3212:             */
                   3213:            if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
                   3214:                if (areBlanks(ctxt, buf, nbchar)) {
                   3215:                    if (ctxt->sax->ignorableWhitespace != NULL)
                   3216:                        ctxt->sax->ignorableWhitespace(ctxt->userData,
                   3217:                                                       buf, nbchar);
                   3218:                } else {
                   3219:                    sgmlCheckParagraph(ctxt);
                   3220:                    if (ctxt->sax->characters != NULL)
                   3221:                        ctxt->sax->characters(ctxt->userData, buf, nbchar);
                   3222:                }
                   3223:            }
                   3224:            nbchar = 0;
                   3225:        }
                   3226:        NEXTL(l);
                   3227:        cur = CUR_CHAR(l);
                   3228:     }
                   3229:     if (nbchar != 0) {
                   3230:        /*
                   3231:         * Ok the segment is to be consumed as chars.
                   3232:         */
                   3233:        if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
                   3234:            if (areBlanks(ctxt, buf, nbchar)) {
                   3235:                if (ctxt->sax->ignorableWhitespace != NULL)
                   3236:                    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
                   3237:            } else {
                   3238:                sgmlCheckParagraph(ctxt);
                   3239:                if (ctxt->sax->characters != NULL)
                   3240:                    ctxt->sax->characters(ctxt->userData, buf, nbchar);
                   3241:            }
                   3242:        }
                   3243:     }
                   3244: }
                   3245: 
                   3246: /**
                   3247:  * sgmlParseExternalID:
                   3248:  * @ctxt:  an SGML parser context
                   3249:  * @publicID:  a xmlChar** receiving PubidLiteral
                   3250:  * @strict: indicate whether we should restrict parsing to only
                   3251:  *          production [75], see NOTE below
                   3252:  *
                   3253:  * Parse an External ID or a Public ID
                   3254:  *
                   3255:  * NOTE: Productions [75] and [83] interract badly since [75] can generate
                   3256:  *       'PUBLIC' S PubidLiteral S SystemLiteral
                   3257:  *
                   3258:  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
                   3259:  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
                   3260:  *
                   3261:  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
                   3262:  *
                   3263:  * Returns the function returns SystemLiteral and in the second
                   3264:  *                case publicID receives PubidLiteral, is strict is off
                   3265:  *                it is possible to return NULL and have publicID set.
                   3266:  */
                   3267: 
                   3268: xmlChar *
                   3269: sgmlParseExternalID(sgmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
                   3270:     xmlChar *URI = NULL;
                   3271: 
                   3272:     if ((UPPER == 'S') && (UPP(1) == 'Y') &&
                   3273:          (UPP(2) == 'S') && (UPP(3) == 'T') &&
                   3274:         (UPP(4) == 'E') && (UPP(5) == 'M')) {
                   3275:         SKIP(6);
                   3276:        if (!IS_BLANK(CUR)) {
                   3277:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3278:                ctxt->sax->error(ctxt->userData,
                   3279:                    "Space required after 'SYSTEM'\n");
                   3280:            ctxt->wellFormed = 0;
                   3281:        }
                   3282:         SKIP_BLANKS;
                   3283:        URI = sgmlParseSystemLiteral(ctxt);
                   3284:        if (URI == NULL) {
                   3285:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3286:                ctxt->sax->error(ctxt->userData,
                   3287:                  "sgmlParseExternalID: SYSTEM, no URI\n");
                   3288:            ctxt->wellFormed = 0;
                   3289:         }
                   3290:     } else if ((UPPER == 'P') && (UPP(1) == 'U') &&
                   3291:               (UPP(2) == 'B') && (UPP(3) == 'L') &&
                   3292:               (UPP(4) == 'I') && (UPP(5) == 'C')) {
                   3293:         SKIP(6);
                   3294:        if (!IS_BLANK(CUR)) {
                   3295:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3296:                ctxt->sax->error(ctxt->userData,
                   3297:                    "Space required after 'PUBLIC'\n");
                   3298:            ctxt->wellFormed = 0;
                   3299:        }
                   3300:         SKIP_BLANKS;
                   3301:        *publicID = sgmlParsePubidLiteral(ctxt);
                   3302:        if (*publicID == NULL) {
                   3303:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3304:                ctxt->sax->error(ctxt->userData, 
                   3305:                  "sgmlParseExternalID: PUBLIC, no Public Identifier\n");
                   3306:            ctxt->wellFormed = 0;
                   3307:        }
                   3308:         SKIP_BLANKS;
                   3309:         if ((CUR == '"') || (CUR == '\'')) {
                   3310:            URI = sgmlParseSystemLiteral(ctxt);
                   3311:        }
                   3312:     }
                   3313:     return(URI);
                   3314: }
                   3315: 
                   3316: /**
                   3317:  * sgmlParseComment:
                   3318:  * @ctxt:  an SGML parser context
                   3319:  *
                   3320:  * Parse an XML (SGML) comment <!-- .... -->
                   3321:  *
                   3322:  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
                   3323:  */
                   3324: void
                   3325: sgmlParseComment(sgmlParserCtxtPtr ctxt) {
                   3326:     xmlChar *buf = NULL;
                   3327:     int len;
                   3328:     int size = SGML_PARSER_BUFFER_SIZE;
                   3329:     int q, ql;
                   3330:     int r, rl;
                   3331:     int cur, l;
                   3332:     xmlParserInputState state;
                   3333: 
                   3334:     /*
                   3335:      * Check that there is a comment right here.
                   3336:      */
                   3337:     if ((RAW != '<') || (NXT(1) != '!') ||
                   3338:         (NXT(2) != '-') || (NXT(3) != '-')) return;
                   3339: 
                   3340:     state = ctxt->instate;
                   3341:     ctxt->instate = XML_PARSER_COMMENT;
                   3342:     SHRINK;
                   3343:     SKIP(4);
                   3344:     buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
                   3345:     if (buf == NULL) {
                   3346:        fprintf(stderr, "malloc of %d byte failed\n", size);
                   3347:        ctxt->instate = state;
                   3348:        return;
                   3349:     }
                   3350:     q = CUR_CHAR(ql);
                   3351:     NEXTL(ql);
                   3352:     r = CUR_CHAR(rl);
                   3353:     NEXTL(rl);
                   3354:     cur = CUR_CHAR(l);
                   3355:     len = 0;
                   3356:     while (IS_CHAR(cur) &&
                   3357:            ((cur != '>') ||
                   3358:            (r != '-') || (q != '-'))) {
                   3359:        if (len + 5 >= size) {
                   3360:            size *= 2;
                   3361:            buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
                   3362:            if (buf == NULL) {
                   3363:                fprintf(stderr, "realloc of %d byte failed\n", size);
                   3364:                ctxt->instate = state;
                   3365:                return;
                   3366:            }
                   3367:        }
                   3368:        COPY_BUF(ql,buf,len,q);
                   3369:        q = r;
                   3370:        ql = rl;
                   3371:        r = cur;
                   3372:        rl = l;
                   3373:        NEXTL(l);
                   3374:        cur = CUR_CHAR(l);
                   3375:        if (cur == 0) {
                   3376:            SHRINK;
                   3377:            GROW;
                   3378:            cur = CUR_CHAR(l);
                   3379:        }
                   3380:     }
                   3381:     buf[len] = 0;
                   3382:     if (!IS_CHAR(cur)) {
1.6       veillard 3383:        ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.1       veillard 3384:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3385:            ctxt->sax->error(ctxt->userData,
                   3386:                             "Comment not terminated \n<!--%.50s\n", buf);
                   3387:        ctxt->wellFormed = 0;
                   3388:        xmlFree(buf);
                   3389:     } else {
                   3390:         NEXT;
                   3391:        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
                   3392:            (!ctxt->disableSAX))
                   3393:            ctxt->sax->comment(ctxt->userData, buf);
                   3394:        xmlFree(buf);
                   3395:     }
                   3396:     ctxt->instate = state;
                   3397: }
                   3398: 
                   3399: /**
                   3400:  * sgmlParseCharRef:
                   3401:  * @ctxt:  an SGML parser context
                   3402:  *
                   3403:  * parse Reference declarations
                   3404:  *
                   3405:  * [66] CharRef ::= '&#' [0-9]+ ';' |
                   3406:  *                  '&#x' [0-9a-fA-F]+ ';'
                   3407:  *
                   3408:  * Returns the value parsed (as an int)
                   3409:  */
                   3410: int
                   3411: sgmlParseCharRef(sgmlParserCtxtPtr ctxt) {
                   3412:     int val = 0;
                   3413: 
                   3414:     if ((CUR == '&') && (NXT(1) == '#') &&
                   3415:         (NXT(2) == 'x')) {
                   3416:        SKIP(3);
                   3417:        while (CUR != ';') {
                   3418:            if ((CUR >= '0') && (CUR <= '9')) 
                   3419:                val = val * 16 + (CUR - '0');
                   3420:            else if ((CUR >= 'a') && (CUR <= 'f'))
                   3421:                val = val * 16 + (CUR - 'a') + 10;
                   3422:            else if ((CUR >= 'A') && (CUR <= 'F'))
                   3423:                val = val * 16 + (CUR - 'A') + 10;
                   3424:            else {
                   3425:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3426:                    ctxt->sax->error(ctxt->userData, 
                   3427:                         "sgmlParseCharRef: invalid hexadecimal value\n");
                   3428:                ctxt->wellFormed = 0;
                   3429:                val = 0;
                   3430:                break;
                   3431:            }
                   3432:            NEXT;
                   3433:        }
                   3434:        if (CUR == ';')
                   3435:            NEXT;
                   3436:     } else if  ((CUR == '&') && (NXT(1) == '#')) {
                   3437:        SKIP(2);
                   3438:        while (CUR != ';') {
                   3439:            if ((CUR >= '0') && (CUR <= '9')) 
                   3440:                val = val * 10 + (CUR - '0');
                   3441:            else {
                   3442:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3443:                    ctxt->sax->error(ctxt->userData, 
                   3444:                         "sgmlParseCharRef: invalid decimal value\n");
                   3445:                ctxt->wellFormed = 0;
                   3446:                val = 0;
                   3447:                break;
                   3448:            }
                   3449:            NEXT;
                   3450:        }
                   3451:        if (CUR == ';')
                   3452:            NEXT;
                   3453:     } else {
                   3454:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3455:            ctxt->sax->error(ctxt->userData, "sgmlParseCharRef: invalid value\n");
                   3456:        ctxt->wellFormed = 0;
                   3457:     }
                   3458:     /*
                   3459:      * Check the value IS_CHAR ...
                   3460:      */
                   3461:     if (IS_CHAR(val)) {
                   3462:         return(val);
                   3463:     } else {
                   3464:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3465:            ctxt->sax->error(ctxt->userData, "sgmlParseCharRef: invalid xmlChar value %d\n",
                   3466:                             val);
                   3467:        ctxt->wellFormed = 0;
                   3468:     }
                   3469:     return(0);
                   3470: }
                   3471: 
                   3472: 
                   3473: /**
                   3474:  * sgmlParseDocTypeDecl :
                   3475:  * @ctxt:  an SGML parser context
                   3476:  *
                   3477:  * parse a DOCTYPE declaration
                   3478:  *
                   3479:  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 
                   3480:  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
                   3481:  */
                   3482: 
                   3483: void
                   3484: sgmlParseDocTypeDecl(sgmlParserCtxtPtr ctxt) {
                   3485:     xmlChar *name;
                   3486:     xmlChar *ExternalID = NULL;
                   3487:     xmlChar *URI = NULL;
                   3488: 
                   3489:     /*
                   3490:      * We know that '<!DOCTYPE' has been detected.
                   3491:      */
                   3492:     SKIP(9);
                   3493: 
                   3494:     SKIP_BLANKS;
                   3495: 
                   3496:     /*
                   3497:      * Parse the DOCTYPE name.
                   3498:      */
                   3499:     name = sgmlParseName(ctxt);
                   3500:     if (name == NULL) {
                   3501:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3502:            ctxt->sax->error(ctxt->userData, "sgmlParseDocTypeDecl : no DOCTYPE name !\n");
                   3503:        ctxt->wellFormed = 0;
                   3504:     }
                   3505:     /*
                   3506:      * Check that upper(name) == "SGML" !!!!!!!!!!!!!
                   3507:      */
                   3508: 
                   3509:     SKIP_BLANKS;
                   3510: 
                   3511:     /*
                   3512:      * Check for SystemID and ExternalID
                   3513:      */
                   3514:     URI = sgmlParseExternalID(ctxt, &ExternalID, 0);
                   3515:     SKIP_BLANKS;
                   3516: 
                   3517:     /*
1.2       veillard 3518:      * Create or update the document accordingly to the DOCTYPE
                   3519:      */
                   3520:     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
                   3521:        (!ctxt->disableSAX))
                   3522:        ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
                   3523: 
                   3524:     /*
                   3525:      * Is there any internal subset declarations ?
                   3526:      * they are handled separately in sgmlParseInternalSubset()
                   3527:      */
                   3528:     if (RAW == '[')
                   3529:        return;
                   3530: 
                   3531: 
                   3532:     /*
1.1       veillard 3533:      * We should be at the end of the DOCTYPE declaration.
                   3534:      */
                   3535:     if (CUR != '>') {
                   3536:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3537:            ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
                   3538:        ctxt->wellFormed = 0;
                   3539:         /* We shouldn't try to resynchronize ... */
                   3540:     }
                   3541:     NEXT;
                   3542: 
                   3543:     /*
                   3544:      * Cleanup, since we don't use all those identifiers
                   3545:      */
                   3546:     if (URI != NULL) xmlFree(URI);
                   3547:     if (ExternalID != NULL) xmlFree(ExternalID);
                   3548:     if (name != NULL) xmlFree(name);
                   3549: }
                   3550: 
                   3551: /**
                   3552:  * sgmlParseAttribute:
                   3553:  * @ctxt:  an SGML parser context
                   3554:  * @value:  a xmlChar ** used to store the value of the attribute
                   3555:  *
                   3556:  * parse an attribute
                   3557:  *
                   3558:  * [41] Attribute ::= Name Eq AttValue
                   3559:  *
                   3560:  * [25] Eq ::= S? '=' S?
                   3561:  *
                   3562:  * With namespace:
                   3563:  *
                   3564:  * [NS 11] Attribute ::= QName Eq AttValue
                   3565:  *
                   3566:  * Also the case QName == xmlns:??? is handled independently as a namespace
                   3567:  * definition.
                   3568:  *
                   3569:  * Returns the attribute name, and the value in *value.
                   3570:  */
                   3571: 
                   3572: xmlChar *
                   3573: sgmlParseAttribute(sgmlParserCtxtPtr ctxt, xmlChar **value) {
                   3574:     xmlChar *name, *val = NULL;
                   3575: 
                   3576:     *value = NULL;
                   3577:     name = sgmlParseName(ctxt);
                   3578:     if (name == NULL) {
                   3579:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3580:            ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
                   3581:        ctxt->wellFormed = 0;
                   3582:         return(NULL);
                   3583:     }
                   3584: 
                   3585:     /*
                   3586:      * read the value
                   3587:      */
                   3588:     SKIP_BLANKS;
                   3589:     if (CUR == '=') {
                   3590:         NEXT;
                   3591:        SKIP_BLANKS;
                   3592:        val = sgmlParseAttValue(ctxt);
                   3593:        /******
                   3594:     } else {
                   3595:         * TODO : some attribute must have values, some may not
                   3596:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3597:            ctxt->sax->warning(ctxt->userData,
                   3598:               "No value for attribute %s\n", name); */
                   3599:     }
                   3600: 
                   3601:     *value = val;
                   3602:     return(name);
                   3603: }
                   3604: 
                   3605: /**
                   3606:  * sgmlCheckEncoding:
                   3607:  * @ctxt:  an SGML parser context
                   3608:  * @attvalue: the attribute value
                   3609:  *
                   3610:  * Checks an http-equiv attribute from a Meta tag to detect
                   3611:  * the encoding
                   3612:  * If a new encoding is detected the parser is switched to decode
                   3613:  * it and pass UTF8
                   3614:  */
                   3615: void
                   3616: sgmlCheckEncoding(sgmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
                   3617:     const xmlChar *encoding;
                   3618: 
                   3619:     if ((ctxt == NULL) || (attvalue == NULL))
                   3620:        return;
                   3621: 
                   3622:     encoding = xmlStrstr(attvalue, BAD_CAST"charset=");
                   3623:     if (encoding == NULL) 
                   3624:        encoding = xmlStrstr(attvalue, BAD_CAST"Charset=");
                   3625:     if (encoding == NULL) 
                   3626:        encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET=");
                   3627:     if (encoding != NULL) {
                   3628:        encoding += 8;
                   3629:     } else {
                   3630:        encoding = xmlStrstr(attvalue, BAD_CAST"charset =");
                   3631:        if (encoding == NULL) 
                   3632:            encoding = xmlStrstr(attvalue, BAD_CAST"Charset =");
                   3633:        if (encoding == NULL) 
                   3634:            encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET =");
                   3635:        if (encoding != NULL)
                   3636:            encoding += 9;
                   3637:     }
                   3638:     if (encoding != NULL) {
                   3639:        xmlCharEncoding enc;
                   3640:        xmlCharEncodingHandlerPtr handler;
                   3641: 
                   3642:        while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
                   3643: 
                   3644:        if (ctxt->input->encoding != NULL)
                   3645:            xmlFree((xmlChar *) ctxt->input->encoding);
                   3646:        ctxt->input->encoding = xmlStrdup(encoding);
                   3647: 
                   3648:        enc = xmlParseCharEncoding((const char *) encoding);
                   3649:        /*
                   3650:         * registered set of known encodings
                   3651:         */
                   3652:        if (enc != XML_CHAR_ENCODING_ERROR) {
                   3653:            xmlSwitchEncoding(ctxt, enc);
                   3654:            ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   3655:        } else {
                   3656:            /*
                   3657:             * fallback for unknown encodings
                   3658:             */
                   3659:            handler = xmlFindCharEncodingHandler((const char *) encoding);
                   3660:            if (handler != NULL) {
                   3661:                xmlSwitchToEncoding(ctxt, handler);
                   3662:                ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   3663:            } else {
                   3664:                ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
                   3665:            }
                   3666:        }
                   3667: 
                   3668:        if ((ctxt->input->buf != NULL) &&
                   3669:            (ctxt->input->buf->encoder != NULL) &&
                   3670:            (ctxt->input->buf->raw != NULL) &&
                   3671:            (ctxt->input->buf->buffer != NULL)) {
                   3672:            int nbchars;
                   3673:            int processed;
                   3674: 
                   3675:            /*
                   3676:             * convert as much as possible to the parser reading buffer.
                   3677:             */
                   3678:            processed = ctxt->input->cur - ctxt->input->base;
                   3679:            xmlBufferShrink(ctxt->input->buf->buffer, processed);
                   3680:            nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
                   3681:                                       ctxt->input->buf->buffer,
                   3682:                                       ctxt->input->buf->raw);
                   3683:            if (nbchars < 0) {
1.6       veillard 3684:                ctxt->errNo = XML_ERR_INVALID_ENCODING;
1.1       veillard 3685:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3686:                    ctxt->sax->error(ctxt->userData, 
                   3687:                     "sgmlCheckEncoding: encoder error\n");
                   3688:            }
                   3689:            ctxt->input->base =
                   3690:            ctxt->input->cur = ctxt->input->buf->buffer->content;
                   3691:        }
                   3692:     }
                   3693: }
                   3694: 
                   3695: /**
                   3696:  * sgmlCheckMeta:
                   3697:  * @ctxt:  an SGML parser context
                   3698:  * @atts:  the attributes values
                   3699:  *
                   3700:  * Checks an attributes from a Meta tag
                   3701:  */
                   3702: void
                   3703: sgmlCheckMeta(sgmlParserCtxtPtr ctxt, const xmlChar **atts) {
                   3704:     int i;
                   3705:     const xmlChar *att, *value;
                   3706:     int http = 0;
                   3707:     const xmlChar *content = NULL;
                   3708: 
                   3709:     if ((ctxt == NULL) || (atts == NULL))
                   3710:        return;
                   3711: 
                   3712:     i = 0;
                   3713:     att = atts[i++];
                   3714:     while (att != NULL) {
                   3715:        value = atts[i++];
                   3716:        if ((value != NULL) &&
1.7       veillard 3717:            ((xmlStrEqual(att, BAD_CAST"http-equiv")) ||
                   3718:             (xmlStrEqual(att, BAD_CAST"Http-Equiv")) ||
                   3719:             (xmlStrEqual(att, BAD_CAST"HTTP-EQUIV"))) &&
                   3720:            ((xmlStrEqual(value, BAD_CAST"Content-Type")) ||
                   3721:             (xmlStrEqual(value, BAD_CAST"content-type")) ||
                   3722:             (xmlStrEqual(value, BAD_CAST"CONTENT-TYPE"))))
1.1       veillard 3723:            http = 1;
                   3724:        else if ((value != NULL) &&
1.7       veillard 3725:                 ((xmlStrEqual(att, BAD_CAST"content")) ||
                   3726:                  (xmlStrEqual(att, BAD_CAST"Content")) ||
                   3727:                  (xmlStrEqual(att, BAD_CAST"CONTENT"))))
1.1       veillard 3728:            content = value;
                   3729:        att = atts[i++];
                   3730:     }
                   3731:     if ((http) && (content != NULL))
                   3732:        sgmlCheckEncoding(ctxt, content);
                   3733: 
                   3734: }
                   3735: 
                   3736: /**
                   3737:  * sgmlParseStartTag:
                   3738:  * @ctxt:  an SGML parser context
                   3739:  * 
                   3740:  * parse a start of tag either for rule element or
                   3741:  * EmptyElement. In both case we don't parse the tag closing chars.
                   3742:  *
                   3743:  * [40] STag ::= '<' Name (S Attribute)* S? '>'
                   3744:  *
                   3745:  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
                   3746:  *
                   3747:  * With namespace:
                   3748:  *
                   3749:  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
                   3750:  *
                   3751:  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
                   3752:  *
                   3753:  */
                   3754: 
                   3755: void
                   3756: sgmlParseStartTag(sgmlParserCtxtPtr ctxt) {
                   3757:     xmlChar *name;
                   3758:     xmlChar *attname;
                   3759:     xmlChar *attvalue;
                   3760:     const xmlChar **atts = NULL;
                   3761:     int nbatts = 0;
                   3762:     int maxatts = 0;
                   3763:     int meta = 0;
                   3764:     int i;
                   3765: 
                   3766:     if (CUR != '<') return;
                   3767:     NEXT;
                   3768: 
                   3769:     GROW;
                   3770:     name = sgmlParseSGMLName(ctxt);
                   3771:     if (name == NULL) {
                   3772:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3773:            ctxt->sax->error(ctxt->userData, 
                   3774:             "sgmlParseStartTag: invalid element name\n");
                   3775:        ctxt->wellFormed = 0;
                   3776:         return;
                   3777:     }
1.7       veillard 3778:     if (xmlStrEqual(name, BAD_CAST"meta"))
1.1       veillard 3779:        meta = 1;
                   3780: 
                   3781:     /*
                   3782:      * Check for auto-closure of SGML elements.
                   3783:      */
                   3784:     sgmlAutoClose(ctxt, name);
                   3785: 
                   3786:     /*
                   3787:      * Check for implied SGML elements.
                   3788:      */
                   3789:     sgmlCheckImplied(ctxt, name);
                   3790: 
                   3791:     /*
                   3792:      * Now parse the attributes, it ends up with the ending
                   3793:      *
                   3794:      * (S Attribute)* S?
                   3795:      */
                   3796:     SKIP_BLANKS;
                   3797:     while ((IS_CHAR(CUR)) &&
                   3798:            (CUR != '>') && 
                   3799:           ((CUR != '/') || (NXT(1) != '>'))) {
                   3800:        long cons = ctxt->nbChars;
                   3801: 
                   3802:        GROW;
                   3803:        attname = sgmlParseAttribute(ctxt, &attvalue);
                   3804:         if (attname != NULL) {
                   3805: 
                   3806:            /*
                   3807:             * Well formedness requires at most one declaration of an attribute
                   3808:             */
                   3809:            for (i = 0; i < nbatts;i += 2) {
1.7       veillard 3810:                if (xmlStrEqual(atts[i], attname)) {
1.1       veillard 3811:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3812:                        ctxt->sax->error(ctxt->userData,
                   3813:                                         "Attribute %s redefined\n",
                   3814:                                         attname);
                   3815:                    ctxt->wellFormed = 0;
                   3816:                    xmlFree(attname);
                   3817:                    if (attvalue != NULL)
                   3818:                        xmlFree(attvalue);
                   3819:                    goto failed;
                   3820:                }
                   3821:            }
                   3822: 
                   3823:            /*
                   3824:             * Add the pair to atts
                   3825:             */
                   3826:            if (atts == NULL) {
                   3827:                maxatts = 10;
                   3828:                atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
                   3829:                if (atts == NULL) {
                   3830:                    fprintf(stderr, "malloc of %ld byte failed\n",
                   3831:                            maxatts * (long)sizeof(xmlChar *));
                   3832:                    if (name != NULL) xmlFree(name);
                   3833:                    return;
                   3834:                }
                   3835:            } else if (nbatts + 4 > maxatts) {
                   3836:                maxatts *= 2;
                   3837:                atts = (const xmlChar **) xmlRealloc(atts, maxatts * sizeof(xmlChar *));
                   3838:                if (atts == NULL) {
                   3839:                    fprintf(stderr, "realloc of %ld byte failed\n",
                   3840:                            maxatts * (long)sizeof(xmlChar *));
                   3841:                    if (name != NULL) xmlFree(name);
                   3842:                    return;
                   3843:                }
                   3844:            }
                   3845:            atts[nbatts++] = attname;
                   3846:            atts[nbatts++] = attvalue;
                   3847:            atts[nbatts] = NULL;
                   3848:            atts[nbatts + 1] = NULL;
                   3849:        }
                   3850: 
                   3851: failed:
                   3852:        SKIP_BLANKS;
                   3853:         if (cons == ctxt->nbChars) {
                   3854:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3855:                ctxt->sax->error(ctxt->userData, 
                   3856:                 "sgmlParseStartTag: problem parsing attributes\n");
                   3857:            ctxt->wellFormed = 0;
                   3858:            break;
                   3859:        }
                   3860:     }
                   3861: 
                   3862:     /*
                   3863:      * Handle specific association to the META tag
                   3864:      */
                   3865:     if (meta)
                   3866:        sgmlCheckMeta(ctxt, atts);
                   3867: 
                   3868:     /*
                   3869:      * SAX: Start of Element !
                   3870:      */
                   3871:     sgmlnamePush(ctxt, xmlStrdup(name));
                   3872: #ifdef DEBUG
                   3873:     fprintf(stderr,"Start of element %s: pushed %s\n", name, ctxt->name);
                   3874: #endif    
                   3875:     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   3876:         ctxt->sax->startElement(ctxt->userData, name, atts);
                   3877: 
                   3878:     if (atts != NULL) {
                   3879:         for (i = 0;i < nbatts;i++) {
                   3880:            if (atts[i] != NULL)
                   3881:                xmlFree((xmlChar *) atts[i]);
                   3882:        }
                   3883:        xmlFree((void *) atts);
                   3884:     }
                   3885:     if (name != NULL) xmlFree(name);
                   3886: }
                   3887: 
                   3888: /**
                   3889:  * sgmlParseEndTag:
                   3890:  * @ctxt:  an SGML parser context
                   3891:  *
                   3892:  * parse an end of tag
                   3893:  *
                   3894:  * [42] ETag ::= '</' Name S? '>'
                   3895:  *
                   3896:  * With namespace
                   3897:  *
                   3898:  * [NS 9] ETag ::= '</' QName S? '>'
                   3899:  */
                   3900: 
                   3901: void
                   3902: sgmlParseEndTag(sgmlParserCtxtPtr ctxt) {
                   3903:     xmlChar *name;
                   3904:     xmlChar *oldname;
                   3905:     int i;
                   3906: 
                   3907:     if ((CUR != '<') || (NXT(1) != '/')) {
                   3908:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3909:            ctxt->sax->error(ctxt->userData, "sgmlParseEndTag: '</' not found\n");
                   3910:        ctxt->wellFormed = 0;
                   3911:        return;
                   3912:     }
                   3913:     SKIP(2);
                   3914: 
                   3915:     name = sgmlParseSGMLName(ctxt);
                   3916:     if (name == NULL) {
                   3917:        if (CUR == '>') {
                   3918:            NEXT;
                   3919:            oldname = sgmlnamePop(ctxt);
                   3920:            if (oldname != NULL) {
                   3921:                if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   3922:                    ctxt->sax->endElement(ctxt->userData, name);
                   3923: #ifdef DEBUG
                   3924:                fprintf(stderr,"End of tag </>: popping out %s\n", oldname);
                   3925: #endif
                   3926:                xmlFree(oldname);
                   3927: #ifdef DEBUG
                   3928:            } else {
                   3929:                fprintf(stderr,"End of tag </>: stack empty !!!\n");
                   3930: #endif
                   3931:            }
                   3932:            return;
                   3933:        } else
                   3934:            return;
                   3935:     }
                   3936: 
                   3937:     /*
                   3938:      * We should definitely be at the ending "S? '>'" part
                   3939:      */
                   3940:     SKIP_BLANKS;
                   3941:     if ((!IS_CHAR(CUR)) || (CUR != '>')) {
                   3942:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3943:            ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
                   3944:        ctxt->wellFormed = 0;
                   3945:     } else
                   3946:        NEXT;
                   3947: 
                   3948:     /*
                   3949:      * If the name read is not one of the element in the parsing stack
                   3950:      * then return, it's just an error.
                   3951:      */
                   3952:     for (i = (ctxt->nameNr - 1);i >= 0;i--) {
1.7       veillard 3953:         if (xmlStrEqual(name, ctxt->nameTab[i])) break;
1.1       veillard 3954:     }
                   3955:     if (i < 0) {
                   3956:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3957:            ctxt->sax->error(ctxt->userData,
                   3958:             "Unexpected end tag : %s\n", name);
                   3959:        xmlFree(name);
                   3960:        ctxt->wellFormed = 0;
                   3961:        return;
                   3962:     }
                   3963: 
                   3964: 
                   3965:     /*
                   3966:      * Check for auto-closure of SGML elements.
                   3967:      */
                   3968: 
                   3969:     sgmlAutoCloseOnClose(ctxt, name);
                   3970: 
                   3971:     /*
                   3972:      * Well formedness constraints, opening and closing must match.
                   3973:      * With the exception that the autoclose may have popped stuff out
                   3974:      * of the stack.
                   3975:      */
                   3976:     if (((name[0] != '/') || (name[1] != 0)) &&
1.7       veillard 3977:        (!xmlStrEqual(name, ctxt->name))) {
1.1       veillard 3978: #ifdef DEBUG
                   3979:        fprintf(stderr,"End of tag %s: expecting %s\n", name, ctxt->name);
                   3980: #endif
                   3981:         if ((ctxt->name != NULL) && 
1.7       veillard 3982:            (!xmlStrEqual(ctxt->name, name))) {
1.1       veillard 3983:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3984:                ctxt->sax->error(ctxt->userData,
                   3985:                 "Opening and ending tag mismatch: %s and %s\n",
                   3986:                                 name, ctxt->name);
                   3987:            ctxt->wellFormed = 0;
                   3988:         }
                   3989:     }
                   3990: 
                   3991:     /*
                   3992:      * SAX: End of Tag
                   3993:      */
                   3994:     oldname = ctxt->name;
                   3995:     if (((name[0] == '/') && (name[1] == 0)) ||
1.7       veillard 3996:        ((oldname != NULL) && (xmlStrEqual(oldname, name)))) {
1.1       veillard 3997:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   3998:            ctxt->sax->endElement(ctxt->userData, name);
                   3999:        oldname = sgmlnamePop(ctxt);
                   4000:        if (oldname != NULL) {
                   4001: #ifdef DEBUG
                   4002:            fprintf(stderr,"End of tag %s: popping out %s\n", name, oldname);
                   4003: #endif
                   4004:            xmlFree(oldname);
                   4005: #ifdef DEBUG
                   4006:        } else {
                   4007:            fprintf(stderr,"End of tag %s: stack empty !!!\n", name);
                   4008: #endif
                   4009:        }
                   4010:     }
                   4011: 
                   4012:     if (name != NULL)
                   4013:        xmlFree(name);
                   4014: 
                   4015:     return;
                   4016: }
                   4017: 
                   4018: 
                   4019: /**
                   4020:  * sgmlParseReference:
                   4021:  * @ctxt:  an SGML parser context
                   4022:  * 
                   4023:  * parse and handle entity references in content,
                   4024:  * this will end-up in a call to character() since this is either a
                   4025:  * CharRef, or a predefined entity.
                   4026:  */
                   4027: void
                   4028: sgmlParseReference(sgmlParserCtxtPtr ctxt) {
                   4029:     sgmlEntityDescPtr ent;
                   4030:     xmlChar out[6];
                   4031:     xmlChar *name;
                   4032:     if (CUR != '&') return;
                   4033: 
                   4034:     if (NXT(1) == '#') {
                   4035:        unsigned int c;
                   4036:        int bits, i = 0;
                   4037: 
                   4038:        c = sgmlParseCharRef(ctxt);
                   4039:         if      (c <    0x80) { out[i++]= c;                bits= -6; }
                   4040:         else if (c <   0x800) { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   4041:         else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   4042:         else                  { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   4043:  
                   4044:         for ( ; bits >= 0; bits-= 6) {
                   4045:             out[i++]= ((c >> bits) & 0x3F) | 0x80;
                   4046:         }
                   4047:        out[i] = 0;
                   4048: 
                   4049:        sgmlCheckParagraph(ctxt);
                   4050:        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   4051:            ctxt->sax->characters(ctxt->userData, out, i);
                   4052:     } else {
                   4053:        ent = sgmlParseEntityRef(ctxt, &name);
                   4054:        if (name == NULL) {
                   4055:            sgmlCheckParagraph(ctxt);
                   4056:            if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   4057:                ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
                   4058:            return;
                   4059:        }
                   4060:        if ((ent == NULL) || (ent->value <= 0)) {
                   4061:            sgmlCheckParagraph(ctxt);
                   4062:            if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
                   4063:                ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
                   4064:                ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
                   4065:                /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
                   4066:            }
                   4067:        } else {
                   4068:            unsigned int c;
                   4069:            int bits, i = 0;
                   4070: 
                   4071:            c = ent->value;
                   4072:            if      (c <    0x80)
                   4073:                    { out[i++]= c;                bits= -6; }
                   4074:            else if (c <   0x800)
                   4075:                    { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   4076:            else if (c < 0x10000)
                   4077:                    { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   4078:            else                 
                   4079:                    { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   4080:      
                   4081:            for ( ; bits >= 0; bits-= 6) {
                   4082:                out[i++]= ((c >> bits) & 0x3F) | 0x80;
                   4083:            }
                   4084:            out[i] = 0;
                   4085: 
                   4086:            sgmlCheckParagraph(ctxt);
                   4087:            if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   4088:                ctxt->sax->characters(ctxt->userData, out, i);
                   4089:        }
                   4090:        xmlFree(name);
                   4091:     }
                   4092: }
                   4093: 
                   4094: /**
                   4095:  * sgmlParseContent:
                   4096:  * @ctxt:  an SGML parser context
                   4097:  * @name:  the node name
                   4098:  *
                   4099:  * Parse a content: comment, sub-element, reference or text.
                   4100:  *
                   4101:  */
                   4102: 
                   4103: void
                   4104: sgmlParseContent(sgmlParserCtxtPtr ctxt) {
                   4105:     xmlChar *currentNode;
                   4106:     int depth;
                   4107: 
                   4108:     currentNode = xmlStrdup(ctxt->name);
                   4109:     depth = ctxt->nameNr;
                   4110:     while (1) {
                   4111:        long cons = ctxt->nbChars;
                   4112: 
                   4113:         GROW;
                   4114:        /*
                   4115:         * Our tag or one of it's parent or children is ending.
                   4116:         */
                   4117:         if ((CUR == '<') && (NXT(1) == '/')) {
                   4118:            sgmlParseEndTag(ctxt);
                   4119:            if (currentNode != NULL) xmlFree(currentNode);
                   4120:            return;
                   4121:         }
                   4122: 
                   4123:        /*
                   4124:         * Has this node been popped out during parsing of
                   4125:         * the next element
                   4126:         */
1.7       veillard 4127:         if ((!xmlStrEqual(currentNode, ctxt->name)) &&
1.1       veillard 4128:            (depth >= ctxt->nameNr)) {
                   4129:            if (currentNode != NULL) xmlFree(currentNode);
                   4130:            return;
                   4131:        }
                   4132: 
                   4133:        /*
                   4134:         * Sometimes DOCTYPE arrives in the middle of the document
                   4135:         */
                   4136:        if ((CUR == '<') && (NXT(1) == '!') &&
                   4137:            (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   4138:            (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   4139:            (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   4140:            (UPP(8) == 'E')) {
                   4141:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4142:                ctxt->sax->error(ctxt->userData,
                   4143:                     "Misplaced DOCTYPE declaration\n");
                   4144:            ctxt->wellFormed = 0;
                   4145:            sgmlParseDocTypeDecl(ctxt);
                   4146:        }
                   4147: 
                   4148:        /*
                   4149:         * First case :  a comment
                   4150:         */
                   4151:        if ((CUR == '<') && (NXT(1) == '!') &&
                   4152:                 (NXT(2) == '-') && (NXT(3) == '-')) {
                   4153:            sgmlParseComment(ctxt);
                   4154:        }
                   4155: 
                   4156:        /*
                   4157:         * Second case :  a sub-element.
                   4158:         */
                   4159:        else if (CUR == '<') {
                   4160:            sgmlParseElement(ctxt);
                   4161:        }
                   4162: 
                   4163:        /*
                   4164:         * Third case : a reference. If if has not been resolved,
                   4165:         *    parsing returns it's Name, create the node 
                   4166:         */
                   4167:        else if (CUR == '&') {
                   4168:            sgmlParseReference(ctxt);
                   4169:        }
                   4170: 
                   4171:        /*
                   4172:         * Fourth : end of the resource
                   4173:         */
                   4174:        else if (CUR == 0) {
                   4175:            sgmlAutoClose(ctxt, NULL);
                   4176:        }
                   4177: 
                   4178:        /*
                   4179:         * Last case, text. Note that References are handled directly.
                   4180:         */
                   4181:        else {
                   4182:            sgmlParseCharData(ctxt, 0);
                   4183:        }
                   4184: 
                   4185:        if (cons == ctxt->nbChars) {
                   4186:            if (ctxt->node != NULL) {
                   4187:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4188:                    ctxt->sax->error(ctxt->userData,
                   4189:                         "detected an error in element content\n");
                   4190:                ctxt->wellFormed = 0;
                   4191:            }
                   4192:             break;
                   4193:        }
                   4194: 
                   4195:         GROW;
                   4196:     }
                   4197:     if (currentNode != NULL) xmlFree(currentNode);
                   4198: }
                   4199: 
                   4200: /**
                   4201:  * sgmlParseElement:
                   4202:  * @ctxt:  an SGML parser context
                   4203:  *
                   4204:  * parse an SGML element, this is highly recursive
                   4205:  *
                   4206:  * [39] element ::= EmptyElemTag | STag content ETag
                   4207:  *
                   4208:  * [41] Attribute ::= Name Eq AttValue
                   4209:  */
                   4210: 
                   4211: void
                   4212: sgmlParseElement(sgmlParserCtxtPtr ctxt) {
                   4213:     xmlChar *name;
                   4214:     xmlChar *currentNode = NULL;
                   4215:     sgmlElemDescPtr info;
                   4216:     sgmlParserNodeInfo node_info;
                   4217:     xmlChar *oldname;
                   4218:     int depth = ctxt->nameNr;
                   4219: 
                   4220:     /* Capture start position */
                   4221:     if (ctxt->record_info) {
                   4222:         node_info.begin_pos = ctxt->input->consumed +
                   4223:                           (CUR_PTR - ctxt->input->base);
                   4224:        node_info.begin_line = ctxt->input->line;
                   4225:     }
                   4226: 
                   4227:     oldname = xmlStrdup(ctxt->name);
                   4228:     sgmlParseStartTag(ctxt);
                   4229:     name = ctxt->name;
                   4230: #ifdef DEBUG
                   4231:     if (oldname == NULL)
                   4232:        fprintf(stderr, "Start of element %s\n", name);
                   4233:     else if (name == NULL)     
                   4234:        fprintf(stderr, "Start of element failed, was %s\n", oldname);
                   4235:     else       
                   4236:        fprintf(stderr, "Start of element %s, was %s\n", name, oldname);
                   4237: #endif
1.7       veillard 4238:     if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) ||
1.1       veillard 4239:         (name == NULL)) {
                   4240:        if (CUR == '>')
                   4241:            NEXT;
                   4242:        if (oldname != NULL)
                   4243:            xmlFree(oldname);
                   4244:         return;
                   4245:     }
                   4246:     if (oldname != NULL)
                   4247:        xmlFree(oldname);
                   4248: 
                   4249:     /*
                   4250:      * Lookup the info for that element.
                   4251:      */
                   4252:     info = sgmlTagLookup(name);
                   4253:     if (info == NULL) {
                   4254:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.4       veillard 4255:            ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
1.1       veillard 4256:                             name);
                   4257:        ctxt->wellFormed = 0;
                   4258:     } else if (info->depr) {
                   4259: /***************************
                   4260:        if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
                   4261:            ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n",
                   4262:                               name);
                   4263:  ***************************/
                   4264:     }
                   4265: 
                   4266:     /*
                   4267:      * Check for an Empty Element labelled the XML/SGML way
                   4268:      */
                   4269:     if ((CUR == '/') && (NXT(1) == '>')) {
                   4270:         SKIP(2);
                   4271:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   4272:            ctxt->sax->endElement(ctxt->userData, name);
                   4273:        oldname = sgmlnamePop(ctxt);
                   4274: #ifdef DEBUG
                   4275:         fprintf(stderr,"End of tag the XML way: popping out %s\n", oldname);
                   4276: #endif
                   4277:        if (oldname != NULL)
                   4278:            xmlFree(oldname);
                   4279:        return;
                   4280:     }
                   4281: 
                   4282:     if (CUR == '>') {
                   4283:         NEXT;
                   4284:     } else {
                   4285:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4286:            ctxt->sax->error(ctxt->userData,
                   4287:                             "Couldn't find end of Start Tag %s\n",
                   4288:                             name);
                   4289:        ctxt->wellFormed = 0;
                   4290: 
                   4291:        /*
                   4292:         * end of parsing of this node.
                   4293:         */
1.7       veillard 4294:        if (xmlStrEqual(name, ctxt->name)) { 
1.1       veillard 4295:            nodePop(ctxt);
                   4296:            oldname = sgmlnamePop(ctxt);
                   4297: #ifdef DEBUG
                   4298:            fprintf(stderr,"End of start tag problem: popping out %s\n", oldname);
                   4299: #endif
                   4300:            if (oldname != NULL)
                   4301:                xmlFree(oldname);
                   4302:        }    
                   4303: 
                   4304:        /*
                   4305:         * Capture end position and add node
                   4306:         */
                   4307:        if ( currentNode != NULL && ctxt->record_info ) {
                   4308:           node_info.end_pos = ctxt->input->consumed +
                   4309:                              (CUR_PTR - ctxt->input->base);
                   4310:           node_info.end_line = ctxt->input->line;
                   4311:           node_info.node = ctxt->node;
                   4312:           xmlParserAddNodeInfo(ctxt, &node_info);
                   4313:        }
                   4314:        return;
                   4315:     }
                   4316: 
                   4317:     /*
                   4318:      * Check for an Empty Element from DTD definition
                   4319:      */
                   4320:     if ((info != NULL) && (info->empty)) {
                   4321:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   4322:            ctxt->sax->endElement(ctxt->userData, name);
                   4323:        oldname = sgmlnamePop(ctxt);
                   4324: #ifdef DEBUG
                   4325:        fprintf(stderr,"End of empty tag %s : popping out %s\n", name, oldname);
                   4326: #endif
                   4327:        if (oldname != NULL)
                   4328:            xmlFree(oldname);
                   4329:        return;
                   4330:     }
                   4331: 
                   4332:     /*
                   4333:      * Parse the content of the element:
                   4334:      */
                   4335:     currentNode = xmlStrdup(ctxt->name);
                   4336:     depth = ctxt->nameNr;
                   4337:     while (IS_CHAR(CUR)) {
                   4338:        sgmlParseContent(ctxt);
                   4339:        if (ctxt->nameNr < depth) break; 
                   4340:     }  
                   4341: 
                   4342:     if (!IS_CHAR(CUR)) {
                   4343:        /************
                   4344:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4345:            ctxt->sax->error(ctxt->userData,
                   4346:                 "Premature end of data in tag %s\n", currentNode);
                   4347:        ctxt->wellFormed = 0;
                   4348:         *************/
                   4349: 
                   4350:        /*
                   4351:         * end of parsing of this node.
                   4352:         */
                   4353:        nodePop(ctxt);
                   4354:        oldname = sgmlnamePop(ctxt);
                   4355: #ifdef DEBUG
                   4356:        fprintf(stderr,"Premature end of tag %s : popping out %s\n", name, oldname);
                   4357: #endif
                   4358:        if (oldname != NULL)
                   4359:            xmlFree(oldname);
                   4360:        if (currentNode != NULL)
                   4361:            xmlFree(currentNode);
                   4362:        return;
                   4363:     }
                   4364: 
                   4365:     /*
                   4366:      * Capture end position and add node
                   4367:      */
                   4368:     if ( currentNode != NULL && ctxt->record_info ) {
                   4369:        node_info.end_pos = ctxt->input->consumed +
                   4370:                           (CUR_PTR - ctxt->input->base);
                   4371:        node_info.end_line = ctxt->input->line;
                   4372:        node_info.node = ctxt->node;
                   4373:        xmlParserAddNodeInfo(ctxt, &node_info);
                   4374:     }
                   4375:     if (currentNode != NULL)
                   4376:        xmlFree(currentNode);
                   4377: }
                   4378: 
                   4379: /**
1.3       veillard 4380:  * sgmlParseEntityDecl:
                   4381:  * @ctxt:  an SGML parser context
                   4382:  *
                   4383:  * parse <!ENTITY declarations
                   4384:  *
                   4385:  */
                   4386: 
                   4387: void
                   4388: sgmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
                   4389:     xmlChar *name = NULL;
                   4390:     xmlChar *value = NULL;
                   4391:     xmlChar *URI = NULL, *literal = NULL;
                   4392:     xmlChar *ndata = NULL;
                   4393:     int isParameter = 0;
                   4394:     xmlChar *orig = NULL;
                   4395:     
                   4396:     GROW;
                   4397:     if ((RAW == '<') && (NXT(1) == '!') &&
                   4398:         (NXT(2) == 'E') && (NXT(3) == 'N') &&
                   4399:         (NXT(4) == 'T') && (NXT(5) == 'I') &&
                   4400:         (NXT(6) == 'T') && (NXT(7) == 'Y')) {
                   4401:        xmlParserInputPtr input = ctxt->input;
                   4402:        ctxt->instate = XML_PARSER_ENTITY_DECL;
                   4403:        SHRINK;
                   4404:        SKIP(8);
                   4405:        if (!IS_BLANK(CUR)) {
1.6       veillard 4406:            ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4407:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4408:                ctxt->sax->error(ctxt->userData,
                   4409:                                 "Space required after '<!ENTITY'\n");
                   4410:            ctxt->wellFormed = 0;
                   4411:            ctxt->disableSAX = 1;
                   4412:        }
                   4413:        SKIP_BLANKS;
                   4414: 
                   4415:        if (RAW == '%') {
                   4416:            NEXT;
                   4417:            if (!IS_BLANK(CUR)) {
1.6       veillard 4418:                ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4419:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4420:                    ctxt->sax->error(ctxt->userData,
                   4421:                                     "Space required after '%'\n");
                   4422:                ctxt->wellFormed = 0;
                   4423:                ctxt->disableSAX = 1;
                   4424:            }
                   4425:            SKIP_BLANKS;
                   4426:            isParameter = 1;
                   4427:        }
                   4428: 
                   4429:         name = xmlParseName(ctxt);
                   4430:        if (name == NULL) {
1.6       veillard 4431:            ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.3       veillard 4432:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4433:                ctxt->sax->error(ctxt->userData, "sgmlarseEntityDecl: no name\n");
                   4434:            ctxt->wellFormed = 0;
                   4435:            ctxt->disableSAX = 1;
                   4436:             return;
                   4437:        }
                   4438:        if (!IS_BLANK(CUR)) {
1.6       veillard 4439:            ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4440:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4441:                ctxt->sax->error(ctxt->userData,
                   4442:                     "Space required after the entity name\n");
                   4443:            ctxt->wellFormed = 0;
                   4444:            ctxt->disableSAX = 1;
                   4445:        }
                   4446:         SKIP_BLANKS;
                   4447: 
                   4448:        /*
                   4449:         * handle the various case of definitions...
                   4450:         */
                   4451:        if (isParameter) {
                   4452:            if ((RAW == '"') || (RAW == '\'')) {
                   4453:                value = xmlParseEntityValue(ctxt, &orig);
                   4454:                if (value) {
                   4455:                    if ((ctxt->sax != NULL) &&
                   4456:                        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
                   4457:                        ctxt->sax->entityDecl(ctxt->userData, name,
                   4458:                                    XML_INTERNAL_PARAMETER_ENTITY,
                   4459:                                    NULL, NULL, value);
                   4460:                }
                   4461:            } else {
                   4462:                URI = xmlParseExternalID(ctxt, &literal, 1);
                   4463:                if ((URI == NULL) && (literal == NULL)) {
1.6       veillard 4464:                    ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.3       veillard 4465:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4466:                        ctxt->sax->error(ctxt->userData,
                   4467:                            "Entity value required\n");
                   4468:                    ctxt->wellFormed = 0;
                   4469:                    ctxt->disableSAX = 1;
                   4470:                }
                   4471:                if (URI) {
                   4472:                    xmlURIPtr uri;
                   4473: 
                   4474:                    uri = xmlParseURI((const char *) URI);
                   4475:                    if (uri == NULL) {
1.6       veillard 4476:                        ctxt->errNo = XML_ERR_INVALID_URI;
1.3       veillard 4477:                        if ((ctxt->sax != NULL) &&
                   4478:                            (!ctxt->disableSAX) &&
                   4479:                            (ctxt->sax->error != NULL))
                   4480:                            ctxt->sax->error(ctxt->userData,
                   4481:                                        "Invalid URI: %s\n", URI);
                   4482:                        ctxt->wellFormed = 0;
                   4483:                    } else {
                   4484:                        if (uri->fragment != NULL) {
1.6       veillard 4485:                            ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.3       veillard 4486:                            if ((ctxt->sax != NULL) &&
                   4487:                                (!ctxt->disableSAX) &&
                   4488:                                (ctxt->sax->error != NULL))
                   4489:                                ctxt->sax->error(ctxt->userData,
                   4490:                                            "Fragment not allowed: %s\n", URI);
                   4491:                            ctxt->wellFormed = 0;
                   4492:                        } else {
                   4493:                            if ((ctxt->sax != NULL) &&
                   4494:                                (!ctxt->disableSAX) &&
                   4495:                                (ctxt->sax->entityDecl != NULL))
                   4496:                                ctxt->sax->entityDecl(ctxt->userData, name,
                   4497:                                            XML_EXTERNAL_PARAMETER_ENTITY,
                   4498:                                            literal, URI, NULL);
                   4499:                        }
                   4500:                        xmlFreeURI(uri);
                   4501:                    }
                   4502:                }
                   4503:            }
                   4504:        } else {
                   4505:            if ((RAW == '"') || (RAW == '\'')) {
                   4506:                value = xmlParseEntityValue(ctxt, &orig);
                   4507:                if ((ctxt->sax != NULL) &&
                   4508:                    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
                   4509:                    ctxt->sax->entityDecl(ctxt->userData, name,
                   4510:                                XML_INTERNAL_GENERAL_ENTITY,
                   4511:                                NULL, NULL, value);
                   4512:            } else {
                   4513:                URI = xmlParseExternalID(ctxt, &literal, 1);
                   4514:                if ((URI == NULL) && (literal == NULL)) {
1.6       veillard 4515:                    ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.3       veillard 4516:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4517:                        ctxt->sax->error(ctxt->userData,
                   4518:                            "Entity value required\n");
                   4519:                    ctxt->wellFormed = 0;
                   4520:                    ctxt->disableSAX = 1;
                   4521:                }
                   4522:                if (URI) {
                   4523:                    xmlURIPtr uri;
                   4524: 
                   4525:                    uri = xmlParseURI((const char *)URI);
                   4526:                    if (uri == NULL) {
1.6       veillard 4527:                        ctxt->errNo = XML_ERR_INVALID_URI;
1.3       veillard 4528:                        if ((ctxt->sax != NULL) &&
                   4529:                            (!ctxt->disableSAX) &&
                   4530:                            (ctxt->sax->error != NULL))
                   4531:                            ctxt->sax->error(ctxt->userData,
                   4532:                                        "Invalid URI: %s\n", URI);
                   4533:                        ctxt->wellFormed = 0;
                   4534:                    } else {
                   4535:                        if (uri->fragment != NULL) {
1.6       veillard 4536:                            ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.3       veillard 4537:                            if ((ctxt->sax != NULL) &&
                   4538:                                (!ctxt->disableSAX) &&
                   4539:                                (ctxt->sax->error != NULL))
                   4540:                                ctxt->sax->error(ctxt->userData,
                   4541:                                            "Fragment not allowed: %s\n", URI);
                   4542:                            ctxt->wellFormed = 0;
                   4543:                        }
                   4544:                        xmlFreeURI(uri);
                   4545:                    }
                   4546:                }
                   4547:                if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.6       veillard 4548:                    ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4549:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4550:                        ctxt->sax->error(ctxt->userData,
                   4551:                            "Space required before content model\n");
                   4552:                    ctxt->wellFormed = 0;
                   4553:                    ctxt->disableSAX = 1;
                   4554:                }
                   4555:                SKIP_BLANKS;
                   4556: 
                   4557:                /*
                   4558:                 * SGML specific: here we can get the content model
                   4559:                 */
                   4560:                if (RAW != '>') {
                   4561:                    xmlChar *contmod;
                   4562: 
                   4563:                    contmod = xmlParseName(ctxt);
                   4564: 
                   4565:                    if (contmod == NULL) {
1.6       veillard 4566:                        ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4567:                        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4568:                            ctxt->sax->error(ctxt->userData,
                   4569:                                "Could not parse entity content model\n");
                   4570:                        ctxt->wellFormed = 0;
                   4571:                        ctxt->disableSAX = 1;
                   4572:                    } else {
1.7       veillard 4573:                        if (xmlStrEqual(contmod, BAD_CAST"NDATA")) {
1.3       veillard 4574:                            if (!IS_BLANK(CUR)) {
1.6       veillard 4575:                                ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4576:                                if ((ctxt->sax != NULL) &&
                   4577:                                    (ctxt->sax->error != NULL))
                   4578:                                    ctxt->sax->error(ctxt->userData,
                   4579:                                        "Space required after 'NDATA'\n");
                   4580:                                ctxt->wellFormed = 0;
                   4581:                                ctxt->disableSAX = 1;
                   4582:                            }
                   4583:                            SKIP_BLANKS;
                   4584:                            ndata = xmlParseName(ctxt);
                   4585:                            if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
                   4586:                                (ctxt->sax->unparsedEntityDecl != NULL)) {
                   4587:                                ctxt->sax->unparsedEntityDecl(ctxt->userData,
                   4588:                                        name, literal, URI, ndata);
                   4589:                            }
1.7       veillard 4590:                        } else if (xmlStrEqual(contmod, BAD_CAST"SUBDOC")) {
1.3       veillard 4591:                            if ((ctxt->sax != NULL) &&
                   4592:                                (ctxt->sax->warning != NULL))
                   4593:                                ctxt->sax->warning(ctxt->userData,
                   4594:                                    "SUBDOC entities are not supported\n");
                   4595:                            SKIP_BLANKS;
                   4596:                            ndata = xmlParseName(ctxt);
                   4597:                            if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
                   4598:                                (ctxt->sax->unparsedEntityDecl != NULL)) {
                   4599:                                ctxt->sax->unparsedEntityDecl(ctxt->userData,
                   4600:                                        name, literal, URI, ndata);
                   4601:                            }
1.7       veillard 4602:                        } else if (xmlStrEqual(contmod, BAD_CAST"CDATA")) {
1.3       veillard 4603:                            if ((ctxt->sax != NULL) &&
                   4604:                                (ctxt->sax->warning != NULL))
                   4605:                                ctxt->sax->warning(ctxt->userData,
                   4606:                                    "CDATA entities are not supported\n");
                   4607:                            SKIP_BLANKS;
                   4608:                            ndata = xmlParseName(ctxt);
                   4609:                            if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
                   4610:                                (ctxt->sax->unparsedEntityDecl != NULL)) {
                   4611:                                ctxt->sax->unparsedEntityDecl(ctxt->userData,
                   4612:                                        name, literal, URI, ndata);
                   4613:                            }
                   4614:                        }
                   4615:                        xmlFree(contmod);
                   4616:                    }
                   4617:                } else {
                   4618:                    if ((ctxt->sax != NULL) &&
                   4619:                        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
                   4620:                        ctxt->sax->entityDecl(ctxt->userData, name,
                   4621:                                    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
                   4622:                                    literal, URI, NULL);
                   4623:                }
                   4624:            }
                   4625:        }
                   4626:        SKIP_BLANKS;
                   4627:        if (RAW != '>') {
1.6       veillard 4628:            ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.3       veillard 4629:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4630:                ctxt->sax->error(ctxt->userData, 
                   4631:                    "sgmlParseEntityDecl: entity %s not terminated\n", name);
                   4632:            ctxt->wellFormed = 0;
                   4633:            ctxt->disableSAX = 1;
                   4634:        } else {
                   4635:            if (input != ctxt->input) {
1.6       veillard 4636:                ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.3       veillard 4637:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4638:                    ctxt->sax->error(ctxt->userData, 
                   4639: "Entity declaration doesn't start and stop in the same entity\n");
                   4640:                ctxt->wellFormed = 0;
                   4641:                ctxt->disableSAX = 1;
                   4642:            }
                   4643:            NEXT;
                   4644:        }
                   4645:        if (orig != NULL) {
                   4646:            /*
                   4647:             * Ugly mechanism to save the raw entity value.
                   4648:             */
                   4649:            xmlEntityPtr cur = NULL;
                   4650: 
                   4651:            if (isParameter) {
                   4652:                if ((ctxt->sax != NULL) &&
                   4653:                    (ctxt->sax->getParameterEntity != NULL))
                   4654:                    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
                   4655:            } else {
                   4656:                if ((ctxt->sax != NULL) &&
                   4657:                    (ctxt->sax->getEntity != NULL))
                   4658:                    cur = ctxt->sax->getEntity(ctxt->userData, name);
                   4659:            }
                   4660:             if (cur != NULL) {
                   4661:                if (cur->orig != NULL)
                   4662:                    xmlFree(orig);
                   4663:                else
                   4664:                    cur->orig = orig;
                   4665:            } else
                   4666:                xmlFree(orig);
                   4667:        }
                   4668:        if (name != NULL) xmlFree(name);
                   4669:        if (value != NULL) xmlFree(value);
                   4670:        if (URI != NULL) xmlFree(URI);
                   4671:        if (literal != NULL) xmlFree(literal);
                   4672:        if (ndata != NULL) xmlFree(ndata);
                   4673:     }
                   4674: }
                   4675: 
                   4676: /**
                   4677:  * sgmlParseMarkupDecl:
                   4678:  * @ctxt:  an SGML parser context
                   4679:  * 
                   4680:  * parse Markup declarations
                   4681:  *
                   4682:  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
                   4683:  *                     NotationDecl | PI | Comment
                   4684:  */
                   4685: void
                   4686: sgmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
                   4687:     GROW;
                   4688:     xmlParseElementDecl(ctxt);
                   4689:     xmlParseAttributeListDecl(ctxt);
                   4690:     sgmlParseEntityDecl(ctxt);
                   4691:     xmlParseNotationDecl(ctxt);
                   4692:     xmlParsePI(ctxt);
                   4693:     xmlParseComment(ctxt);
                   4694:     /*
                   4695:      * This is only for internal subset. On external entities,
                   4696:      * the replacement is done before parsing stage
                   4697:      */
                   4698:     if ((ctxt->external == 0) && (ctxt->inputNr == 1))
                   4699:        xmlParsePEReference(ctxt);
                   4700:     ctxt->instate = XML_PARSER_DTD;
                   4701: }
                   4702: 
                   4703: /**
                   4704:  * sgmlParseInternalsubset:
                   4705:  * @ctxt:  an SGML parser context
                   4706:  *
                   4707:  * parse the internal subset declaration
                   4708:  *
                   4709:  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
                   4710:  */
                   4711: 
                   4712: void
                   4713: sgmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
                   4714:     /*
                   4715:      * Is there any DTD definition ?
                   4716:      */
                   4717:     if (RAW == '[') {
                   4718:         ctxt->instate = XML_PARSER_DTD;
                   4719:         NEXT;
                   4720:        /*
                   4721:         * Parse the succession of Markup declarations and 
                   4722:         * PEReferences.
                   4723:         * Subsequence (markupdecl | PEReference | S)*
                   4724:         */
                   4725:        while (RAW != ']') {
                   4726:            const xmlChar *check = CUR_PTR;
                   4727:            int cons = ctxt->input->consumed;
                   4728: 
                   4729:            SKIP_BLANKS;
                   4730:            sgmlParseMarkupDecl(ctxt);
                   4731:            xmlParsePEReference(ctxt);
                   4732: 
                   4733:            /*
                   4734:             * Pop-up of finished entities.
                   4735:             */
                   4736:            while ((RAW == 0) && (ctxt->inputNr > 1))
                   4737:                xmlPopInput(ctxt);
                   4738: 
                   4739:            if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.6       veillard 4740:                ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.3       veillard 4741:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4742:                    ctxt->sax->error(ctxt->userData, 
                   4743:             "sgmlParseInternalSubset: error detected in Markup declaration\n");
                   4744:                ctxt->wellFormed = 0;
                   4745:                ctxt->disableSAX = 1;
                   4746:                break;
                   4747:            }
                   4748:        }
                   4749:        if (RAW == ']') { 
                   4750:            NEXT;
                   4751:            SKIP_BLANKS;
                   4752:        }
                   4753:     }
                   4754: 
                   4755:     /*
                   4756:      * We should be at the end of the DOCTYPE declaration.
                   4757:      */
                   4758:     if (RAW != '>') {
1.6       veillard 4759:        ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.3       veillard 4760:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4761:            ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
                   4762:        ctxt->wellFormed = 0;
                   4763:        ctxt->disableSAX = 1;
                   4764:     }
                   4765:     NEXT;
                   4766: }
                   4767: 
                   4768: /**
1.2       veillard 4769:  * sgmlParseMisc:
                   4770:  * @ctxt:  an XML parser context
                   4771:  * 
                   4772:  * parse an XML Misc* optionnal field.
                   4773:  *
                   4774:  * [27] Misc ::= Comment | PI |  S
                   4775:  */
                   4776: 
                   4777: void
                   4778: sgmlParseMisc(xmlParserCtxtPtr ctxt) {
                   4779:     while (((RAW == '<') && (NXT(1) == '?')) ||
                   4780:            ((RAW == '<') && (NXT(1) == '!') &&
                   4781:            (NXT(2) == '-') && (NXT(3) == '-')) ||
                   4782:            IS_BLANK(CUR)) {
                   4783:         if ((RAW == '<') && (NXT(1) == '?')) {
                   4784:            xmlParsePI(ctxt); /* TODO: SGML PIs differs */
                   4785:        } else if (IS_BLANK(CUR)) {
                   4786:            NEXT;
                   4787:        } else
                   4788:            xmlParseComment(ctxt);
                   4789:     }
                   4790: }
                   4791: 
                   4792: /**
1.1       veillard 4793:  * sgmlParseDocument :
                   4794:  * @ctxt:  an SGML parser context
                   4795:  * 
                   4796:  * parse an SGML document (and build a tree if using the standard SAX
                   4797:  * interface).
                   4798:  *
                   4799:  * Returns 0, -1 in case of error. the parser context is augmented
                   4800:  *                as a result of the parsing.
                   4801:  */
                   4802: 
                   4803: int
                   4804: sgmlParseDocument(sgmlParserCtxtPtr ctxt) {
1.2       veillard 4805:     xmlChar start[4];
                   4806:     xmlCharEncoding enc;
1.1       veillard 4807:     xmlDtdPtr dtd;
                   4808: 
                   4809:     sgmlDefaultSAXHandlerInit();
                   4810:     ctxt->html = 2;
                   4811: 
                   4812:     GROW;
                   4813:     /*
                   4814:      * SAX: beginning of the document processing.
                   4815:      */
                   4816:     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
                   4817:         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
                   4818: 
1.2       veillard 4819:     /* 
                   4820:      * Get the 4 first bytes and decode the charset
                   4821:      * if enc != XML_CHAR_ENCODING_NONE
                   4822:      * plug some encoding conversion routines.
                   4823:      */
                   4824:     start[0] = RAW;
                   4825:     start[1] = NXT(1);
                   4826:     start[2] = NXT(2);
                   4827:     start[3] = NXT(3);
                   4828:     enc = xmlDetectCharEncoding(start, 4);
                   4829:     if (enc != XML_CHAR_ENCODING_NONE) {
                   4830:         xmlSwitchEncoding(ctxt, enc);
                   4831:     }
                   4832: 
1.1       veillard 4833:     /*
                   4834:      * Wipe out everything which is before the first '<'
                   4835:      */
                   4836:     SKIP_BLANKS;
                   4837:     if (CUR == 0) {
                   4838:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4839:            ctxt->sax->error(ctxt->userData, "Document is empty\n");
                   4840:        ctxt->wellFormed = 0;
                   4841:     }
                   4842: 
                   4843:     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
                   4844:        ctxt->sax->startDocument(ctxt->userData);
                   4845: 
                   4846: 
                   4847:     /*
1.2       veillard 4848:      * The Misc part of the Prolog
1.1       veillard 4849:      */
1.2       veillard 4850:     GROW;
                   4851:     sgmlParseMisc(ctxt);
1.1       veillard 4852: 
                   4853:     /*
                   4854:      * Then possibly doc type declaration(s) and more Misc
                   4855:      * (doctypedecl Misc*)?
                   4856:      */
1.2       veillard 4857:     GROW;
                   4858:     if ((RAW == '<') && (NXT(1) == '!') &&
                   4859:        (NXT(2) == 'D') && (NXT(3) == 'O') &&
                   4860:        (NXT(4) == 'C') && (NXT(5) == 'T') &&
                   4861:        (NXT(6) == 'Y') && (NXT(7) == 'P') &&
                   4862:        (NXT(8) == 'E')) {
                   4863: 
                   4864:        ctxt->inSubset = 1;
1.1       veillard 4865:        sgmlParseDocTypeDecl(ctxt);
1.2       veillard 4866:        if (RAW == '[') {
                   4867:            ctxt->instate = XML_PARSER_DTD;
1.3       veillard 4868:            sgmlParseInternalSubset(ctxt);
1.2       veillard 4869:        }
                   4870: 
                   4871:        /*
                   4872:         * Create and update the external subset.
                   4873:         */
                   4874:        ctxt->inSubset = 2;
                   4875:        if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
                   4876:            (!ctxt->disableSAX))
                   4877:            ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
                   4878:                                      ctxt->extSubSystem, ctxt->extSubURI);
                   4879:        ctxt->inSubset = 0;
                   4880: 
                   4881: 
                   4882:        ctxt->instate = XML_PARSER_PROLOG;
                   4883:        sgmlParseMisc(ctxt);
1.1       veillard 4884:     }
                   4885: 
                   4886:     /*
                   4887:      * Time to start parsing the tree itself
                   4888:      */
                   4889:     sgmlParseContent(ctxt);
                   4890: 
                   4891:     /*
                   4892:      * autoclose
                   4893:      */
                   4894:     if (CUR == 0)
                   4895:        sgmlAutoClose(ctxt, NULL);
                   4896: 
                   4897: 
                   4898:     /*
                   4899:      * SAX: end of the document processing.
                   4900:      */
                   4901:     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   4902:         ctxt->sax->endDocument(ctxt->userData);
                   4903: 
                   4904:     if (ctxt->myDoc != NULL) {
                   4905:        dtd = xmlGetIntSubset(ctxt->myDoc);
                   4906:        if (dtd == NULL)
                   4907:            ctxt->myDoc->intSubset = 
                   4908:                xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML", 
                   4909:                    BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
                   4910:                    BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
                   4911:     }
                   4912:     if (! ctxt->wellFormed) return(-1);
                   4913:     return(0);
                   4914: }
                   4915: 
                   4916: 
                   4917: /************************************************************************
                   4918:  *                                                                     *
                   4919:  *                     Parser contexts handling                        *
                   4920:  *                                                                     *
                   4921:  ************************************************************************/
                   4922: 
                   4923: /**
                   4924:  * xmlInitParserCtxt:
                   4925:  * @ctxt:  an SGML parser context
                   4926:  *
                   4927:  * Initialize a parser context
                   4928:  */
                   4929: 
                   4930: void
                   4931: sgmlInitParserCtxt(sgmlParserCtxtPtr ctxt)
                   4932: {
                   4933:     sgmlSAXHandler *sax;
                   4934: 
                   4935:     if (ctxt == NULL) return;
                   4936:     memset(ctxt, 0, sizeof(sgmlParserCtxt));
                   4937: 
                   4938:     sax = (sgmlSAXHandler *) xmlMalloc(sizeof(sgmlSAXHandler));
                   4939:     if (sax == NULL) {
                   4940:         fprintf(stderr, "sgmlInitParserCtxt: out of memory\n");
                   4941:     }
                   4942:     memset(sax, 0, sizeof(sgmlSAXHandler));
                   4943: 
                   4944:     /* Allocate the Input stack */
                   4945:     ctxt->inputTab = (sgmlParserInputPtr *) 
                   4946:                       xmlMalloc(5 * sizeof(sgmlParserInputPtr));
                   4947:     if (ctxt->inputTab == NULL) {
                   4948:         fprintf(stderr, "sgmlInitParserCtxt: out of memory\n");
                   4949:     }
                   4950:     ctxt->inputNr = 0;
                   4951:     ctxt->inputMax = 5;
                   4952:     ctxt->input = NULL;
                   4953:     ctxt->version = NULL;
                   4954:     ctxt->encoding = NULL;
                   4955:     ctxt->standalone = -1;
                   4956:     ctxt->instate = XML_PARSER_START;
                   4957: 
                   4958:     /* Allocate the Node stack */
                   4959:     ctxt->nodeTab = (sgmlNodePtr *) xmlMalloc(10 * sizeof(sgmlNodePtr));
                   4960:     ctxt->nodeNr = 0;
                   4961:     ctxt->nodeMax = 10;
                   4962:     ctxt->node = NULL;
                   4963: 
                   4964:     /* Allocate the Name stack */
                   4965:     ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
                   4966:     ctxt->nameNr = 0;
                   4967:     ctxt->nameMax = 10;
                   4968:     ctxt->name = NULL;
                   4969: 
                   4970:     if (sax == NULL) ctxt->sax = &sgmlDefaultSAXHandler;
                   4971:     else {
                   4972:         ctxt->sax = sax;
                   4973:        memcpy(sax, &sgmlDefaultSAXHandler, sizeof(sgmlSAXHandler));
                   4974:     }
                   4975:     ctxt->userData = ctxt;
                   4976:     ctxt->myDoc = NULL;
                   4977:     ctxt->wellFormed = 1;
                   4978:     ctxt->replaceEntities = 0;
                   4979:     ctxt->html = 2;
                   4980:     ctxt->record_info = 0;
                   4981:     ctxt->validate = 0;
                   4982:     ctxt->nbChars = 0;
                   4983:     ctxt->checkIndex = 0;
                   4984:     xmlInitNodeInfoSeq(&ctxt->node_seq);
                   4985: }
                   4986: 
                   4987: /**
                   4988:  * sgmlFreeParserCtxt:
                   4989:  * @ctxt:  an SGML parser context
                   4990:  *
                   4991:  * Free all the memory used by a parser context. However the parsed
                   4992:  * document in ctxt->myDoc is not freed.
                   4993:  */
                   4994: 
                   4995: void
                   4996: sgmlFreeParserCtxt(sgmlParserCtxtPtr ctxt)
                   4997: {
                   4998:     xmlFreeParserCtxt(ctxt);
                   4999: }
                   5000: 
                   5001: /**
                   5002:  * sgmlCreateDocParserCtxt :
                   5003:  * @cur:  a pointer to an array of xmlChar
                   5004:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5005:  *
                   5006:  * Create a parser context for an SGML document.
                   5007:  *
                   5008:  * Returns the new parser context or NULL
                   5009:  */
                   5010: sgmlParserCtxtPtr
                   5011: sgmlCreateDocParserCtxt(xmlChar *cur, const char *encoding) {
                   5012:     sgmlParserCtxtPtr ctxt;
                   5013:     sgmlParserInputPtr input;
                   5014:     /* sgmlCharEncoding enc; */
                   5015: 
                   5016:     ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
                   5017:     if (ctxt == NULL) {
                   5018:         perror("malloc");
                   5019:        return(NULL);
                   5020:     }
                   5021:     sgmlInitParserCtxt(ctxt);
                   5022:     input = (sgmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
                   5023:     if (input == NULL) {
                   5024:         perror("malloc");
                   5025:        xmlFree(ctxt);
                   5026:        return(NULL);
                   5027:     }
                   5028:     memset(input, 0, sizeof(sgmlParserInput));
                   5029: 
                   5030:     input->line = 1;
                   5031:     input->col = 1;
                   5032:     input->base = cur;
                   5033:     input->cur = cur;
                   5034: 
                   5035:     inputPush(ctxt, input);
                   5036:     return(ctxt);
                   5037: }
                   5038: 
                   5039: /************************************************************************
                   5040:  *                                                                     *
                   5041:  *             Progressive parsing interfaces                          *
                   5042:  *                                                                     *
                   5043:  ************************************************************************/
                   5044: 
                   5045: /**
                   5046:  * sgmlParseLookupSequence:
                   5047:  * @ctxt:  an SGML parser context
                   5048:  * @first:  the first char to lookup
                   5049:  * @next:  the next char to lookup or zero
                   5050:  * @third:  the next char to lookup or zero
                   5051:  *
                   5052:  * Try to find if a sequence (first, next, third) or  just (first next) or
                   5053:  * (first) is available in the input stream.
                   5054:  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
                   5055:  * to avoid rescanning sequences of bytes, it DOES change the state of the
                   5056:  * parser, do not use liberally.
                   5057:  * This is basically similar to xmlParseLookupSequence()
                   5058:  *
                   5059:  * Returns the index to the current parsing point if the full sequence
                   5060:  *      is available, -1 otherwise.
                   5061:  */
                   5062: int
                   5063: sgmlParseLookupSequence(sgmlParserCtxtPtr ctxt, xmlChar first,
                   5064:                        xmlChar next, xmlChar third) {
                   5065:     int base, len;
                   5066:     sgmlParserInputPtr in;
                   5067:     const xmlChar *buf;
                   5068: 
                   5069:     in = ctxt->input;
                   5070:     if (in == NULL) return(-1);
                   5071:     base = in->cur - in->base;
                   5072:     if (base < 0) return(-1);
                   5073:     if (ctxt->checkIndex > base)
                   5074:         base = ctxt->checkIndex;
                   5075:     if (in->buf == NULL) {
                   5076:        buf = in->base;
                   5077:        len = in->length;
                   5078:     } else {
                   5079:        buf = in->buf->buffer->content;
                   5080:        len = in->buf->buffer->use;
                   5081:     }
                   5082:     /* take into account the sequence length */
                   5083:     if (third) len -= 2;
                   5084:     else if (next) len --;
                   5085:     for (;base < len;base++) {
                   5086:         if (buf[base] == first) {
                   5087:            if (third != 0) {
                   5088:                if ((buf[base + 1] != next) ||
                   5089:                    (buf[base + 2] != third)) continue;
                   5090:            } else if (next != 0) {
                   5091:                if (buf[base + 1] != next) continue;
                   5092:            }
                   5093:            ctxt->checkIndex = 0;
                   5094: #ifdef DEBUG_PUSH
                   5095:            if (next == 0)
                   5096:                fprintf(stderr, "HPP: lookup '%c' found at %d\n",
                   5097:                        first, base);
                   5098:            else if (third == 0)
                   5099:                fprintf(stderr, "HPP: lookup '%c%c' found at %d\n",
                   5100:                        first, next, base);
                   5101:            else 
                   5102:                fprintf(stderr, "HPP: lookup '%c%c%c' found at %d\n",
                   5103:                        first, next, third, base);
                   5104: #endif
                   5105:            return(base - (in->cur - in->base));
                   5106:        }
                   5107:     }
                   5108:     ctxt->checkIndex = base;
                   5109: #ifdef DEBUG_PUSH
                   5110:     if (next == 0)
                   5111:        fprintf(stderr, "HPP: lookup '%c' failed\n", first);
                   5112:     else if (third == 0)
                   5113:        fprintf(stderr, "HPP: lookup '%c%c' failed\n", first, next);
                   5114:     else       
                   5115:        fprintf(stderr, "HPP: lookup '%c%c%c' failed\n", first, next, third);
                   5116: #endif
                   5117:     return(-1);
                   5118: }
                   5119: 
                   5120: /**
                   5121:  * sgmlParseTryOrFinish:
                   5122:  * @ctxt:  an SGML parser context
                   5123:  * @terminate:  last chunk indicator
                   5124:  *
                   5125:  * Try to progress on parsing
                   5126:  *
                   5127:  * Returns zero if no parsing was possible
                   5128:  */
                   5129: int
                   5130: sgmlParseTryOrFinish(sgmlParserCtxtPtr ctxt, int terminate) {
                   5131:     int ret = 0;
                   5132:     sgmlParserInputPtr in;
                   5133:     int avail = 0;
                   5134:     xmlChar cur, next;
                   5135: 
                   5136: #ifdef DEBUG_PUSH
                   5137:     switch (ctxt->instate) {
                   5138:        case XML_PARSER_EOF:
                   5139:            fprintf(stderr, "HPP: try EOF\n"); break;
                   5140:        case XML_PARSER_START:
                   5141:            fprintf(stderr, "HPP: try START\n"); break;
                   5142:        case XML_PARSER_MISC:
                   5143:            fprintf(stderr, "HPP: try MISC\n");break;
                   5144:        case XML_PARSER_COMMENT:
                   5145:            fprintf(stderr, "HPP: try COMMENT\n");break;
                   5146:        case XML_PARSER_PROLOG:
                   5147:            fprintf(stderr, "HPP: try PROLOG\n");break;
                   5148:        case XML_PARSER_START_TAG:
                   5149:            fprintf(stderr, "HPP: try START_TAG\n");break;
                   5150:        case XML_PARSER_CONTENT:
                   5151:            fprintf(stderr, "HPP: try CONTENT\n");break;
                   5152:        case XML_PARSER_CDATA_SECTION:
                   5153:            fprintf(stderr, "HPP: try CDATA_SECTION\n");break;
                   5154:        case XML_PARSER_END_TAG:
                   5155:            fprintf(stderr, "HPP: try END_TAG\n");break;
                   5156:        case XML_PARSER_ENTITY_DECL:
                   5157:            fprintf(stderr, "HPP: try ENTITY_DECL\n");break;
                   5158:        case XML_PARSER_ENTITY_VALUE:
                   5159:            fprintf(stderr, "HPP: try ENTITY_VALUE\n");break;
                   5160:        case XML_PARSER_ATTRIBUTE_VALUE:
                   5161:            fprintf(stderr, "HPP: try ATTRIBUTE_VALUE\n");break;
                   5162:        case XML_PARSER_DTD:
                   5163:            fprintf(stderr, "HPP: try DTD\n");break;
                   5164:        case XML_PARSER_EPILOG:
                   5165:            fprintf(stderr, "HPP: try EPILOG\n");break;
                   5166:        case XML_PARSER_PI:
                   5167:            fprintf(stderr, "HPP: try PI\n");break;
                   5168:     }
                   5169: #endif
                   5170: 
                   5171:     while (1) {
                   5172: 
                   5173:        in = ctxt->input;
                   5174:        if (in == NULL) break;
                   5175:        if (in->buf == NULL)
                   5176:            avail = in->length - (in->cur - in->base);
                   5177:        else
                   5178:            avail = in->buf->buffer->use - (in->cur - in->base);
                   5179:        if ((avail == 0) && (terminate)) {
                   5180:            sgmlAutoClose(ctxt, NULL);
                   5181:            if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { 
                   5182:                /*
                   5183:                 * SAX: end of the document processing.
                   5184:                 */
                   5185:                ctxt->instate = XML_PARSER_EOF;
                   5186:                if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5187:                    ctxt->sax->endDocument(ctxt->userData);
                   5188:            }
                   5189:        }
                   5190:         if (avail < 1)
                   5191:            goto done;
                   5192:         switch (ctxt->instate) {
                   5193:             case XML_PARSER_EOF:
                   5194:                /*
                   5195:                 * Document parsing is done !
                   5196:                 */
                   5197:                goto done;
                   5198:             case XML_PARSER_START:
                   5199:                /*
                   5200:                 * Very first chars read from the document flow.
                   5201:                 */
                   5202:                cur = in->cur[0];
                   5203:                if (IS_BLANK(cur)) {
                   5204:                    SKIP_BLANKS;
                   5205:                    if (in->buf == NULL)
                   5206:                        avail = in->length - (in->cur - in->base);
                   5207:                    else
                   5208:                        avail = in->buf->buffer->use - (in->cur - in->base);
                   5209:                }
                   5210:                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
                   5211:                    ctxt->sax->setDocumentLocator(ctxt->userData,
                   5212:                                                  &xmlDefaultSAXLocator);
                   5213:                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
                   5214:                    (!ctxt->disableSAX))
                   5215:                    ctxt->sax->startDocument(ctxt->userData);
                   5216: 
                   5217:                cur = in->cur[0];
                   5218:                next = in->cur[1];
                   5219:                if ((cur == '<') && (next == '!') &&
                   5220:                    (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   5221:                    (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   5222:                    (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   5223:                    (UPP(8) == 'E')) {
                   5224:                    if ((!terminate) &&
                   5225:                        (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5226:                        goto done;
                   5227: #ifdef DEBUG_PUSH
                   5228:                    fprintf(stderr, "HPP: Parsing internal subset\n");
                   5229: #endif
                   5230:                    sgmlParseDocTypeDecl(ctxt);
                   5231:                    ctxt->instate = XML_PARSER_PROLOG;
                   5232: #ifdef DEBUG_PUSH
                   5233:                    fprintf(stderr, "HPP: entering PROLOG\n");
                   5234: #endif
                   5235:                 } else {
                   5236:                    ctxt->instate = XML_PARSER_MISC;
                   5237:                }
                   5238: #ifdef DEBUG_PUSH
                   5239:                fprintf(stderr, "HPP: entering MISC\n");
                   5240: #endif
                   5241:                break;
                   5242:             case XML_PARSER_MISC:
                   5243:                SKIP_BLANKS;
                   5244:                if (in->buf == NULL)
                   5245:                    avail = in->length - (in->cur - in->base);
                   5246:                else
                   5247:                    avail = in->buf->buffer->use - (in->cur - in->base);
                   5248:                if (avail < 2)
                   5249:                    goto done;
                   5250:                cur = in->cur[0];
                   5251:                next = in->cur[1];
                   5252:                if ((cur == '<') && (next == '!') &&
                   5253:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5254:                    if ((!terminate) &&
                   5255:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5256:                        goto done;
                   5257: #ifdef DEBUG_PUSH
                   5258:                    fprintf(stderr, "HPP: Parsing Comment\n");
                   5259: #endif
                   5260:                    sgmlParseComment(ctxt);
                   5261:                    ctxt->instate = XML_PARSER_MISC;
                   5262:                } else if ((cur == '<') && (next == '!') &&
                   5263:                    (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   5264:                    (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   5265:                    (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   5266:                    (UPP(8) == 'E')) {
                   5267:                    if ((!terminate) &&
                   5268:                        (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5269:                        goto done;
                   5270: #ifdef DEBUG_PUSH
                   5271:                    fprintf(stderr, "HPP: Parsing internal subset\n");
                   5272: #endif
                   5273:                    sgmlParseDocTypeDecl(ctxt);
                   5274:                    ctxt->instate = XML_PARSER_PROLOG;
                   5275: #ifdef DEBUG_PUSH
                   5276:                    fprintf(stderr, "HPP: entering PROLOG\n");
                   5277: #endif
                   5278:                } else if ((cur == '<') && (next == '!') &&
                   5279:                           (avail < 9)) {
                   5280:                    goto done;
                   5281:                } else {
                   5282:                    ctxt->instate = XML_PARSER_START_TAG;
                   5283: #ifdef DEBUG_PUSH
                   5284:                    fprintf(stderr, "HPP: entering START_TAG\n");
                   5285: #endif
                   5286:                }
                   5287:                break;
                   5288:             case XML_PARSER_PROLOG:
                   5289:                SKIP_BLANKS;
                   5290:                if (in->buf == NULL)
                   5291:                    avail = in->length - (in->cur - in->base);
                   5292:                else
                   5293:                    avail = in->buf->buffer->use - (in->cur - in->base);
                   5294:                if (avail < 2) 
                   5295:                    goto done;
                   5296:                cur = in->cur[0];
                   5297:                next = in->cur[1];
                   5298:                if ((cur == '<') && (next == '!') &&
                   5299:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5300:                    if ((!terminate) &&
                   5301:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5302:                        goto done;
                   5303: #ifdef DEBUG_PUSH
                   5304:                    fprintf(stderr, "HPP: Parsing Comment\n");
                   5305: #endif
                   5306:                    sgmlParseComment(ctxt);
                   5307:                    ctxt->instate = XML_PARSER_PROLOG;
                   5308:                } else if ((cur == '<') && (next == '!') &&
                   5309:                           (avail < 4)) {
                   5310:                    goto done;
                   5311:                } else {
                   5312:                    ctxt->instate = XML_PARSER_START_TAG;
                   5313: #ifdef DEBUG_PUSH
                   5314:                    fprintf(stderr, "HPP: entering START_TAG\n");
                   5315: #endif
                   5316:                }
                   5317:                break;
                   5318:             case XML_PARSER_EPILOG:
                   5319:                if (in->buf == NULL)
                   5320:                    avail = in->length - (in->cur - in->base);
                   5321:                else
                   5322:                    avail = in->buf->buffer->use - (in->cur - in->base);
                   5323:                if (avail < 1)
                   5324:                    goto done;
                   5325:                cur = in->cur[0];
                   5326:                if (IS_BLANK(cur)) {
                   5327:                    sgmlParseCharData(ctxt, 0);
                   5328:                    goto done;
                   5329:                }
                   5330:                if (avail < 2)
                   5331:                    goto done;
                   5332:                next = in->cur[1];
                   5333:                if ((cur == '<') && (next == '!') &&
                   5334:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5335:                    if ((!terminate) &&
                   5336:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5337:                        goto done;
                   5338: #ifdef DEBUG_PUSH
                   5339:                    fprintf(stderr, "HPP: Parsing Comment\n");
                   5340: #endif
                   5341:                    sgmlParseComment(ctxt);
                   5342:                    ctxt->instate = XML_PARSER_EPILOG;
                   5343:                } else if ((cur == '<') && (next == '!') &&
                   5344:                           (avail < 4)) {
                   5345:                    goto done;
                   5346:                } else {
1.6       veillard 5347:                    ctxt->errNo = XML_ERR_DOCUMENT_END;
1.1       veillard 5348:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5349:                        ctxt->sax->error(ctxt->userData,
                   5350:                            "Extra content at the end of the document\n");
                   5351:                    ctxt->wellFormed = 0;
                   5352:                    ctxt->instate = XML_PARSER_EOF;
                   5353: #ifdef DEBUG_PUSH
                   5354:                    fprintf(stderr, "HPP: entering EOF\n");
                   5355: #endif
                   5356:                    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5357:                        ctxt->sax->endDocument(ctxt->userData);
                   5358:                    goto done;
                   5359:                }
                   5360:                break;
                   5361:             case XML_PARSER_START_TAG: {
                   5362:                xmlChar *name, *oldname;
                   5363:                int depth = ctxt->nameNr;
                   5364:                sgmlElemDescPtr info;
                   5365: 
                   5366:                if (avail < 2)
                   5367:                    goto done;
                   5368:                cur = in->cur[0];
                   5369:                if (cur != '<') {
                   5370:                    ctxt->instate = XML_PARSER_CONTENT;
                   5371: #ifdef DEBUG_PUSH
                   5372:                    fprintf(stderr, "HPP: entering CONTENT\n");
                   5373: #endif
                   5374:                    break;
                   5375:                }
                   5376:                if ((!terminate) &&
                   5377:                    (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5378:                    goto done;
                   5379: 
                   5380:                oldname = xmlStrdup(ctxt->name);
                   5381:                sgmlParseStartTag(ctxt);
                   5382:                name = ctxt->name;
                   5383: #ifdef DEBUG
                   5384:                if (oldname == NULL)
                   5385:                    fprintf(stderr, "Start of element %s\n", name);
                   5386:                else if (name == NULL)  
                   5387:                    fprintf(stderr, "Start of element failed, was %s\n",
                   5388:                            oldname);
                   5389:                else    
                   5390:                    fprintf(stderr, "Start of element %s, was %s\n",
                   5391:                            name, oldname);
                   5392: #endif
                   5393:                if (((depth == ctxt->nameNr) &&
1.7       veillard 5394:                     (xmlStrEqual(oldname, ctxt->name))) ||
1.1       veillard 5395:                    (name == NULL)) {
                   5396:                    if (CUR == '>')
                   5397:                        NEXT;
                   5398:                    if (oldname != NULL)
                   5399:                        xmlFree(oldname);
                   5400:                    break;
                   5401:                }
                   5402:                if (oldname != NULL)
                   5403:                    xmlFree(oldname);
                   5404: 
                   5405:                /*
                   5406:                 * Lookup the info for that element.
                   5407:                 */
                   5408:                info = sgmlTagLookup(name);
                   5409:                if (info == NULL) {
                   5410:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.4       veillard 5411:                        ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
1.1       veillard 5412:                                         name);
                   5413:                    ctxt->wellFormed = 0;
                   5414:                } else if (info->depr) {
                   5415:                    /***************************
                   5416:                    if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
                   5417:                        ctxt->sax->warning(ctxt->userData,
                   5418:                                           "Tag %s is deprecated\n",
                   5419:                                           name);
                   5420:                     ***************************/
                   5421:                }
                   5422: 
                   5423:                /*
                   5424:                 * Check for an Empty Element labelled the XML/SGML way
                   5425:                 */
                   5426:                if ((CUR == '/') && (NXT(1) == '>')) {
                   5427:                    SKIP(2);
                   5428:                    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   5429:                        ctxt->sax->endElement(ctxt->userData, name);
                   5430:                    oldname = sgmlnamePop(ctxt);
                   5431: #ifdef DEBUG
                   5432:                    fprintf(stderr,"End of tag the XML way: popping out %s\n",
                   5433:                            oldname);
                   5434: #endif
                   5435:                    if (oldname != NULL)
                   5436:                        xmlFree(oldname);
                   5437:                    ctxt->instate = XML_PARSER_CONTENT;
                   5438: #ifdef DEBUG_PUSH
                   5439:                    fprintf(stderr, "HPP: entering CONTENT\n");
                   5440: #endif
                   5441:                    break;
                   5442:                }
                   5443: 
                   5444:                if (CUR == '>') {
                   5445:                    NEXT;
                   5446:                } else {
                   5447:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5448:                        ctxt->sax->error(ctxt->userData, 
                   5449:                                         "Couldn't find end of Start Tag %s\n",
                   5450:                                         name);
                   5451:                    ctxt->wellFormed = 0;
                   5452: 
                   5453:                    /*
                   5454:                     * end of parsing of this node.
                   5455:                     */
1.7       veillard 5456:                    if (xmlStrEqual(name, ctxt->name)) { 
1.1       veillard 5457:                        nodePop(ctxt);
                   5458:                        oldname = sgmlnamePop(ctxt);
                   5459: #ifdef DEBUG
                   5460:                        fprintf(stderr,
                   5461:                         "End of start tag problem: popping out %s\n", oldname);
                   5462: #endif
                   5463:                        if (oldname != NULL)
                   5464:                            xmlFree(oldname);
                   5465:                    }    
                   5466: 
                   5467:                    ctxt->instate = XML_PARSER_CONTENT;
                   5468: #ifdef DEBUG_PUSH
                   5469:                    fprintf(stderr, "HPP: entering CONTENT\n");
                   5470: #endif
                   5471:                    break;
                   5472:                }
                   5473: 
                   5474:                /*
                   5475:                 * Check for an Empty Element from DTD definition
                   5476:                 */
                   5477:                if ((info != NULL) && (info->empty)) {
                   5478:                    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   5479:                        ctxt->sax->endElement(ctxt->userData, name);
                   5480:                    oldname = sgmlnamePop(ctxt);
                   5481: #ifdef DEBUG
                   5482:                    fprintf(stderr,"End of empty tag %s : popping out %s\n", name, oldname);
                   5483: #endif
                   5484:                    if (oldname != NULL)
                   5485:                        xmlFree(oldname);
                   5486:                }
                   5487:                ctxt->instate = XML_PARSER_CONTENT;
                   5488: #ifdef DEBUG_PUSH
                   5489:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5490: #endif
                   5491:                 break;
                   5492:            }
                   5493:             case XML_PARSER_CONTENT: {
                   5494:                long cons;
                   5495:                 /*
                   5496:                 * Handle preparsed entities and charRef
                   5497:                 */
                   5498:                if (ctxt->token != 0) {
                   5499:                    xmlChar chr[2] = { 0 , 0 } ;
                   5500: 
                   5501:                    chr[0] = (xmlChar) ctxt->token;
                   5502:                    sgmlCheckParagraph(ctxt);
                   5503:                    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   5504:                        ctxt->sax->characters(ctxt->userData, chr, 1);
                   5505:                    ctxt->token = 0;
                   5506:                    ctxt->checkIndex = 0;
                   5507:                }
                   5508:                if ((avail == 1) && (terminate)) {
                   5509:                    cur = in->cur[0];
                   5510:                    if ((cur != '<') && (cur != '&')) {
                   5511:                        if (ctxt->sax != NULL) {
                   5512:                            if (IS_BLANK(cur)) {
                   5513:                                if (ctxt->sax->ignorableWhitespace != NULL)
                   5514:                                    ctxt->sax->ignorableWhitespace(
                   5515:                                            ctxt->userData, &cur, 1);
                   5516:                            } else {
                   5517:                                sgmlCheckParagraph(ctxt);
                   5518:                                if (ctxt->sax->characters != NULL)
                   5519:                                    ctxt->sax->characters(
                   5520:                                            ctxt->userData, &cur, 1);
                   5521:                            }
                   5522:                        }
                   5523:                        ctxt->token = 0;
                   5524:                        ctxt->checkIndex = 0;
                   5525:                        NEXT;
                   5526:                    }
                   5527:                    break;
                   5528:                }
                   5529:                if (avail < 2)
                   5530:                    goto done;
                   5531:                cur = in->cur[0];
                   5532:                next = in->cur[1];
                   5533:                cons = ctxt->nbChars;
                   5534:                /*
                   5535:                 * Sometimes DOCTYPE arrives in the middle of the document
                   5536:                 */
                   5537:                if ((cur == '<') && (next == '!') &&
                   5538:                    (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   5539:                    (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   5540:                    (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   5541:                    (UPP(8) == 'E')) {
                   5542:                    if ((!terminate) &&
                   5543:                        (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5544:                        goto done;
                   5545:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5546:                        ctxt->sax->error(ctxt->userData,
                   5547:                             "Misplaced DOCTYPE declaration\n");
                   5548:                    ctxt->wellFormed = 0;
                   5549:                    sgmlParseDocTypeDecl(ctxt);
                   5550:                } else if ((cur == '<') && (next == '!') &&
                   5551:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5552:                    if ((!terminate) &&
                   5553:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5554:                        goto done;
                   5555: #ifdef DEBUG_PUSH
                   5556:                    fprintf(stderr, "HPP: Parsing Comment\n");
                   5557: #endif
                   5558:                    sgmlParseComment(ctxt);
                   5559:                    ctxt->instate = XML_PARSER_CONTENT;
                   5560:                } else if ((cur == '<') && (next == '!') && (avail < 4)) {
                   5561:                    goto done;
                   5562:                } else if ((cur == '<') && (next == '/')) {
                   5563:                    ctxt->instate = XML_PARSER_END_TAG;
                   5564:                    ctxt->checkIndex = 0;
                   5565: #ifdef DEBUG_PUSH
                   5566:                    fprintf(stderr, "HPP: entering END_TAG\n");
                   5567: #endif
                   5568:                    break;
                   5569:                } else if (cur == '<') {
                   5570:                    ctxt->instate = XML_PARSER_START_TAG;
                   5571:                    ctxt->checkIndex = 0;
                   5572: #ifdef DEBUG_PUSH
                   5573:                    fprintf(stderr, "HPP: entering START_TAG\n");
                   5574: #endif
                   5575:                    break;
                   5576:                } else if (cur == '&') {
                   5577:                    if ((!terminate) &&
                   5578:                        (sgmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
                   5579:                        goto done;
                   5580: #ifdef DEBUG_PUSH
                   5581:                    fprintf(stderr, "HPP: Parsing Reference\n");
                   5582: #endif
                   5583:                    /* TODO: check generation of subtrees if noent !!! */
                   5584:                    sgmlParseReference(ctxt);
                   5585:                } else {
                   5586:                    /* TODO Avoid the extra copy, handle directly !!!!!! */
                   5587:                    /*
                   5588:                     * Goal of the following test is :
                   5589:                     *  - minimize calls to the SAX 'character' callback
                   5590:                     *    when they are mergeable
                   5591:                     */
                   5592:                    if ((ctxt->inputNr == 1) &&
                   5593:                        (avail < SGML_PARSER_BIG_BUFFER_SIZE)) {
                   5594:                        if ((!terminate) &&
                   5595:                            (sgmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
                   5596:                            goto done;
                   5597:                     }
                   5598:                    ctxt->checkIndex = 0;
                   5599: #ifdef DEBUG_PUSH
                   5600:                    fprintf(stderr, "HPP: Parsing char data\n");
                   5601: #endif
                   5602:                    sgmlParseCharData(ctxt, 0);
                   5603:                }
                   5604:                if (cons == ctxt->nbChars) {
                   5605:                    if (ctxt->node != NULL) {
                   5606:                        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5607:                            ctxt->sax->error(ctxt->userData,
                   5608:                                 "detected an error in element content\n");
                   5609:                        ctxt->wellFormed = 0;
                   5610:                        NEXT;
                   5611:                    }
                   5612:                    break;
                   5613:                }
                   5614: 
                   5615:                break;
                   5616:            }
                   5617:             case XML_PARSER_END_TAG:
                   5618:                if (avail < 2)
                   5619:                    goto done;
                   5620:                if ((!terminate) &&
                   5621:                    (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5622:                    goto done;
                   5623:                sgmlParseEndTag(ctxt);
                   5624:                if (ctxt->nameNr == 0) {
                   5625:                    ctxt->instate = XML_PARSER_EPILOG;
                   5626:                } else {
                   5627:                    ctxt->instate = XML_PARSER_CONTENT;
                   5628:                }
                   5629:                ctxt->checkIndex = 0;
                   5630: #ifdef DEBUG_PUSH
                   5631:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5632: #endif
                   5633:                break;
                   5634:             case XML_PARSER_CDATA_SECTION:
                   5635:                fprintf(stderr, "HPP: internal error, state == CDATA\n");
                   5636:                ctxt->instate = XML_PARSER_CONTENT;
                   5637:                ctxt->checkIndex = 0;
                   5638: #ifdef DEBUG_PUSH
                   5639:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5640: #endif
                   5641:                break;
                   5642:             case XML_PARSER_DTD:
                   5643:                fprintf(stderr, "HPP: internal error, state == DTD\n");
                   5644:                ctxt->instate = XML_PARSER_CONTENT;
                   5645:                ctxt->checkIndex = 0;
                   5646: #ifdef DEBUG_PUSH
                   5647:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5648: #endif
                   5649:                break;
                   5650:             case XML_PARSER_COMMENT:
                   5651:                fprintf(stderr, "HPP: internal error, state == COMMENT\n");
                   5652:                ctxt->instate = XML_PARSER_CONTENT;
                   5653:                ctxt->checkIndex = 0;
                   5654: #ifdef DEBUG_PUSH
                   5655:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5656: #endif
                   5657:                break;
                   5658:             case XML_PARSER_PI:
                   5659:                fprintf(stderr, "HPP: internal error, state == PI\n");
                   5660:                ctxt->instate = XML_PARSER_CONTENT;
                   5661:                ctxt->checkIndex = 0;
                   5662: #ifdef DEBUG_PUSH
                   5663:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5664: #endif
                   5665:                break;
                   5666:             case XML_PARSER_ENTITY_DECL:
                   5667:                fprintf(stderr, "HPP: internal error, state == ENTITY_DECL\n");
                   5668:                ctxt->instate = XML_PARSER_CONTENT;
                   5669:                ctxt->checkIndex = 0;
                   5670: #ifdef DEBUG_PUSH
                   5671:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5672: #endif
                   5673:                break;
                   5674:             case XML_PARSER_ENTITY_VALUE:
                   5675:                fprintf(stderr, "HPP: internal error, state == ENTITY_VALUE\n");
                   5676:                ctxt->instate = XML_PARSER_CONTENT;
                   5677:                ctxt->checkIndex = 0;
                   5678: #ifdef DEBUG_PUSH
                   5679:                fprintf(stderr, "HPP: entering DTD\n");
                   5680: #endif
                   5681:                break;
                   5682:             case XML_PARSER_ATTRIBUTE_VALUE:
                   5683:                fprintf(stderr, "HPP: internal error, state == ATTRIBUTE_VALUE\n");
                   5684:                ctxt->instate = XML_PARSER_START_TAG;
                   5685:                ctxt->checkIndex = 0;
                   5686: #ifdef DEBUG_PUSH
                   5687:                fprintf(stderr, "HPP: entering START_TAG\n");
                   5688: #endif
                   5689:                break;
                   5690:            case XML_PARSER_SYSTEM_LITERAL:
                   5691:                fprintf(stderr, "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n");
                   5692:                ctxt->instate = XML_PARSER_CONTENT;
                   5693:                ctxt->checkIndex = 0;
                   5694: #ifdef DEBUG_PUSH
                   5695:                fprintf(stderr, "HPP: entering CONTENT\n");
                   5696: #endif
                   5697:                break;
                   5698:        }
                   5699:     }
                   5700: done:    
                   5701:     if ((avail == 0) && (terminate)) {
                   5702:        sgmlAutoClose(ctxt, NULL);
                   5703:        if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { 
                   5704:            /*
                   5705:             * SAX: end of the document processing.
                   5706:             */
                   5707:            ctxt->instate = XML_PARSER_EOF;
                   5708:            if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5709:                ctxt->sax->endDocument(ctxt->userData);
                   5710:        }
                   5711:     }
                   5712:     if ((ctxt->myDoc != NULL) &&
                   5713:        ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
                   5714:         (ctxt->instate == XML_PARSER_EPILOG))) {
                   5715:        xmlDtdPtr dtd;
                   5716:        dtd = xmlGetIntSubset(ctxt->myDoc);
                   5717:        if (dtd == NULL)
                   5718:            ctxt->myDoc->intSubset = 
                   5719:                xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML", 
                   5720:                    BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
                   5721:                    BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
                   5722:     }
                   5723: #ifdef DEBUG_PUSH
                   5724:     fprintf(stderr, "HPP: done %d\n", ret);
                   5725: #endif
                   5726:     return(ret);
                   5727: }
                   5728: 
                   5729: /**
                   5730:  * sgmlParseTry:
                   5731:  * @ctxt:  an SGML parser context
                   5732:  *
                   5733:  * Try to progress on parsing
                   5734:  *
                   5735:  * Returns zero if no parsing was possible
                   5736:  */
                   5737: int
                   5738: sgmlParseTry(sgmlParserCtxtPtr ctxt) {
                   5739:     return(sgmlParseTryOrFinish(ctxt, 0));
                   5740: }
                   5741: 
                   5742: /**
                   5743:  * sgmlParseChunk:
                   5744:  * @ctxt:  an XML parser context
                   5745:  * @chunk:  an char array
                   5746:  * @size:  the size in byte of the chunk
                   5747:  * @terminate:  last chunk indicator
                   5748:  *
                   5749:  * Parse a Chunk of memory
                   5750:  *
                   5751:  * Returns zero if no error, the xmlParserErrors otherwise.
                   5752:  */
                   5753: int
                   5754: sgmlParseChunk(sgmlParserCtxtPtr ctxt, const char *chunk, int size,
                   5755:               int terminate) {
                   5756:     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
                   5757:         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
                   5758:        int base = ctxt->input->base - ctxt->input->buf->buffer->content;
                   5759:        int cur = ctxt->input->cur - ctxt->input->base;
                   5760:        
                   5761:        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);              
                   5762:        ctxt->input->base = ctxt->input->buf->buffer->content + base;
                   5763:        ctxt->input->cur = ctxt->input->base + cur;
                   5764: #ifdef DEBUG_PUSH
                   5765:        fprintf(stderr, "HPP: pushed %d\n", size);
                   5766: #endif
                   5767: 
                   5768:        if ((terminate) || (ctxt->input->buf->buffer->use > 80))
                   5769:            sgmlParseTryOrFinish(ctxt, terminate);
                   5770:     } else if (ctxt->instate != XML_PARSER_EOF) {
                   5771:        xmlParserInputBufferPush(ctxt->input->buf, 0, "");
                   5772:         sgmlParseTryOrFinish(ctxt, terminate);
                   5773:     }
                   5774:     if (terminate) {
                   5775:        if ((ctxt->instate != XML_PARSER_EOF) &&
                   5776:            (ctxt->instate != XML_PARSER_EPILOG) &&
                   5777:            (ctxt->instate != XML_PARSER_MISC)) {
1.6       veillard 5778:            ctxt->errNo = XML_ERR_DOCUMENT_END;
1.1       veillard 5779:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5780:                ctxt->sax->error(ctxt->userData,
                   5781:                    "Extra content at the end of the document\n");
                   5782:            ctxt->wellFormed = 0;
                   5783:        } 
                   5784:        if (ctxt->instate != XML_PARSER_EOF) {
                   5785:            if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5786:                ctxt->sax->endDocument(ctxt->userData);
                   5787:        }
                   5788:        ctxt->instate = XML_PARSER_EOF;
                   5789:     }
                   5790:     return((xmlParserErrors) ctxt->errNo);           
                   5791: }
                   5792: 
                   5793: /************************************************************************
                   5794:  *                                                                     *
                   5795:  *                     User entry points                               *
                   5796:  *                                                                     *
                   5797:  ************************************************************************/
                   5798: 
                   5799: /**
                   5800:  * sgmlCreatePushParserCtxt :
                   5801:  * @sax:  a SAX handler
                   5802:  * @user_data:  The user data returned on SAX callbacks
                   5803:  * @chunk:  a pointer to an array of chars
                   5804:  * @size:  number of chars in the array
                   5805:  * @filename:  an optional file name or URI
                   5806:  * @enc:  an optional encoding
                   5807:  *
                   5808:  * Create a parser context for using the SGML parser in push mode
                   5809:  * To allow content encoding detection, @size should be >= 4
                   5810:  * The value of @filename is used for fetching external entities
                   5811:  * and error/warning reports.
                   5812:  *
                   5813:  * Returns the new parser context or NULL
                   5814:  */
                   5815: sgmlParserCtxtPtr
                   5816: sgmlCreatePushParserCtxt(sgmlSAXHandlerPtr sax, void *user_data, 
                   5817:                          const char *chunk, int size, const char *filename,
                   5818:                         xmlCharEncoding enc) {
                   5819:     sgmlParserCtxtPtr ctxt;
                   5820:     sgmlParserInputPtr inputStream;
                   5821:     xmlParserInputBufferPtr buf;
                   5822: 
                   5823:     buf = xmlAllocParserInputBuffer(enc);
                   5824:     if (buf == NULL) return(NULL);
                   5825: 
                   5826:     ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
                   5827:     if (ctxt == NULL) {
                   5828:        xmlFree(buf);
                   5829:        return(NULL);
                   5830:     }
                   5831:     memset(ctxt, 0, sizeof(sgmlParserCtxt));
                   5832:     sgmlInitParserCtxt(ctxt);
                   5833:     if (sax != NULL) {
                   5834:        if (ctxt->sax != &sgmlDefaultSAXHandler)
                   5835:            xmlFree(ctxt->sax);
                   5836:        ctxt->sax = (sgmlSAXHandlerPtr) xmlMalloc(sizeof(sgmlSAXHandler));
                   5837:        if (ctxt->sax == NULL) {
                   5838:            xmlFree(buf);
                   5839:            xmlFree(ctxt);
                   5840:            return(NULL);
                   5841:        }
                   5842:        memcpy(ctxt->sax, sax, sizeof(sgmlSAXHandler));
                   5843:        if (user_data != NULL)
                   5844:            ctxt->userData = user_data;
                   5845:     }  
                   5846:     if (filename == NULL) {
                   5847:        ctxt->directory = NULL;
                   5848:     } else {
                   5849:         ctxt->directory = xmlParserGetDirectory(filename);
                   5850:     }
                   5851: 
                   5852:     inputStream = sgmlNewInputStream(ctxt);
                   5853:     if (inputStream == NULL) {
                   5854:        xmlFreeParserCtxt(ctxt);
                   5855:        return(NULL);
                   5856:     }
                   5857: 
                   5858:     if (filename == NULL)
                   5859:        inputStream->filename = NULL;
                   5860:     else
                   5861:        inputStream->filename = xmlMemStrdup(filename);
                   5862:     inputStream->buf = buf;
                   5863:     inputStream->base = inputStream->buf->buffer->content;
                   5864:     inputStream->cur = inputStream->buf->buffer->content;
                   5865: 
                   5866:     inputPush(ctxt, inputStream);
                   5867: 
                   5868:     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
                   5869:         (ctxt->input->buf != NULL))  {       
                   5870:        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);              
                   5871: #ifdef DEBUG_PUSH
                   5872:        fprintf(stderr, "HPP: pushed %d\n", size);
                   5873: #endif
                   5874:     }
                   5875: 
                   5876:     return(ctxt);
                   5877: }
                   5878: 
                   5879: /**
                   5880:  * sgmlSAXParseDoc :
                   5881:  * @cur:  a pointer to an array of xmlChar
                   5882:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5883:  * @sax:  the SAX handler block
                   5884:  * @userData: if using SAX, this pointer will be provided on callbacks. 
                   5885:  *
                   5886:  * parse an SGML in-memory document and build a tree.
                   5887:  * It use the given SAX function block to handle the parsing callback.
                   5888:  * If sax is NULL, fallback to the default DOM tree building routines.
                   5889:  * 
                   5890:  * Returns the resulting document tree
                   5891:  */
                   5892: 
                   5893: sgmlDocPtr
                   5894: sgmlSAXParseDoc(xmlChar *cur, const char *encoding, sgmlSAXHandlerPtr sax, void *userData) {
                   5895:     sgmlDocPtr ret;
                   5896:     sgmlParserCtxtPtr ctxt;
                   5897: 
                   5898:     if (cur == NULL) return(NULL);
                   5899: 
                   5900: 
                   5901:     ctxt = sgmlCreateDocParserCtxt(cur, encoding);
                   5902:     if (ctxt == NULL) return(NULL);
                   5903:     if (sax != NULL) { 
                   5904:         ctxt->sax = sax;
                   5905:         ctxt->userData = userData;
                   5906:     }
                   5907: 
                   5908:     sgmlParseDocument(ctxt);
                   5909:     ret = ctxt->myDoc;
                   5910:     if (sax != NULL) {
                   5911:        ctxt->sax = NULL;
                   5912:        ctxt->userData = NULL;
                   5913:     }
                   5914:     sgmlFreeParserCtxt(ctxt);
                   5915:     
                   5916:     return(ret);
                   5917: }
                   5918: 
                   5919: /**
                   5920:  * sgmlParseDoc :
                   5921:  * @cur:  a pointer to an array of xmlChar
                   5922:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5923:  *
                   5924:  * parse an SGML in-memory document and build a tree.
                   5925:  * 
                   5926:  * Returns the resulting document tree
                   5927:  */
                   5928: 
                   5929: sgmlDocPtr
                   5930: sgmlParseDoc(xmlChar *cur, const char *encoding) {
                   5931:     return(sgmlSAXParseDoc(cur, encoding, NULL, NULL));
                   5932: }
                   5933: 
                   5934: 
                   5935: /**
                   5936:  * sgmlCreateFileParserCtxt :
                   5937:  * @filename:  the filename
                   5938:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5939:  *
                   5940:  * Create a parser context for a file content. 
                   5941:  * Automatic support for ZLIB/Compress compressed document is provided
                   5942:  * by default if found at compile-time.
                   5943:  *
                   5944:  * Returns the new parser context or NULL
                   5945:  */
                   5946: sgmlParserCtxtPtr
                   5947: sgmlCreateFileParserCtxt(const char *filename, const char *encoding)
                   5948: {
                   5949:     sgmlParserCtxtPtr ctxt;
                   5950:     sgmlParserInputPtr inputStream;
                   5951:     xmlParserInputBufferPtr buf;
                   5952:     /* sgmlCharEncoding enc; */
                   5953: 
                   5954:     buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
                   5955:     if (buf == NULL) return(NULL);
                   5956: 
                   5957:     ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
                   5958:     if (ctxt == NULL) {
                   5959:         perror("malloc");
                   5960:        return(NULL);
                   5961:     }
                   5962:     memset(ctxt, 0, sizeof(sgmlParserCtxt));
                   5963:     sgmlInitParserCtxt(ctxt);
                   5964:     inputStream = (sgmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
                   5965:     if (inputStream == NULL) {
                   5966:         perror("malloc");
                   5967:        xmlFree(ctxt);
                   5968:        return(NULL);
                   5969:     }
                   5970:     memset(inputStream, 0, sizeof(sgmlParserInput));
                   5971: 
                   5972:     inputStream->filename = xmlMemStrdup(filename);
                   5973:     inputStream->line = 1;
                   5974:     inputStream->col = 1;
                   5975:     inputStream->buf = buf;
                   5976:     inputStream->directory = NULL;
                   5977: 
                   5978:     inputStream->base = inputStream->buf->buffer->content;
                   5979:     inputStream->cur = inputStream->buf->buffer->content;
                   5980:     inputStream->free = NULL;
                   5981: 
                   5982:     inputPush(ctxt, inputStream);
                   5983:     return(ctxt);
                   5984: }
                   5985: 
                   5986: /**
                   5987:  * sgmlSAXParseFile :
                   5988:  * @filename:  the filename
                   5989:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5990:  * @sax:  the SAX handler block
                   5991:  * @userData: if using SAX, this pointer will be provided on callbacks. 
                   5992:  *
                   5993:  * parse an SGML file and build a tree. Automatic support for ZLIB/Compress
                   5994:  * compressed document is provided by default if found at compile-time.
                   5995:  * It use the given SAX function block to handle the parsing callback.
                   5996:  * If sax is NULL, fallback to the default DOM tree building routines.
                   5997:  *
                   5998:  * Returns the resulting document tree
                   5999:  */
                   6000: 
                   6001: sgmlDocPtr
                   6002: sgmlSAXParseFile(const char *filename, const char *encoding, sgmlSAXHandlerPtr sax, 
                   6003:                  void *userData) {
                   6004:     sgmlDocPtr ret;
                   6005:     sgmlParserCtxtPtr ctxt;
                   6006:     sgmlSAXHandlerPtr oldsax = NULL;
                   6007: 
                   6008:     ctxt = sgmlCreateFileParserCtxt(filename, encoding);
                   6009:     if (ctxt == NULL) return(NULL);
                   6010:     if (sax != NULL) {
                   6011:        oldsax = ctxt->sax;
                   6012:         ctxt->sax = sax;
                   6013:         ctxt->userData = userData;
                   6014:     }
                   6015: 
                   6016:     sgmlParseDocument(ctxt);
                   6017: 
                   6018:     ret = ctxt->myDoc;
                   6019:     if (sax != NULL) {
                   6020:         ctxt->sax = oldsax;
                   6021:         ctxt->userData = NULL;
                   6022:     }
                   6023:     sgmlFreeParserCtxt(ctxt);
                   6024:     
                   6025:     return(ret);
                   6026: }
                   6027: 
                   6028: /**
                   6029:  * sgmlParseFile :
                   6030:  * @filename:  the filename
                   6031:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   6032:  *
                   6033:  * parse an SGML file and build a tree. Automatic support for ZLIB/Compress
                   6034:  * compressed document is provided by default if found at compile-time.
                   6035:  *
                   6036:  * Returns the resulting document tree
                   6037:  */
                   6038: 
                   6039: sgmlDocPtr
                   6040: sgmlParseFile(const char *filename, const char *encoding) {
                   6041:     return(sgmlSAXParseFile(filename, encoding, NULL, NULL));
                   6042: }
                   6043: 
                   6044: #endif /* LIBXML_SGML_ENABLED */
Webmaster