Annotation of XML/SGMLparser.c, revision 1.10

1.1       veillard    1: /*
                      2:  * SGMLparser.c : an attempt to parse Docbook documents
                      3:  *
                      4:  * See Copyright for the status of this software.
                      5:  *
                      6:  * Daniel.Veillard@w3.org
                      7:  */
                      8: 
                      9: #ifdef WIN32
                     10: #include "win32config.h"
                     11: #else
                     12: #include "config.h"
                     13: #endif
                     14: 
                     15: #include "xmlversion.h"
                     16: #ifdef LIBXML_SGML_ENABLED
                     17: 
                     18: #include <stdio.h>
                     19: #include <string.h>
                     20: #ifdef HAVE_CTYPE_H
                     21: #include <ctype.h>
                     22: #endif
                     23: #ifdef HAVE_STDLIB_H
                     24: #include <stdlib.h>
                     25: #endif
                     26: #ifdef HAVE_SYS_STAT_H
                     27: #include <sys/stat.h>
                     28: #endif
                     29: #ifdef HAVE_FCNTL_H
                     30: #include <fcntl.h>
                     31: #endif
                     32: #ifdef HAVE_UNISTD_H
                     33: #include <unistd.h>
                     34: #endif
                     35: #ifdef HAVE_ZLIB_H
                     36: #include <zlib.h>
                     37: #endif
                     38: 
                     39: #include <libxml/xmlmemory.h>
                     40: #include <libxml/tree.h>
                     41: #include <libxml/SGMLparser.h>
                     42: #include <libxml/entities.h>
                     43: #include <libxml/encoding.h>
                     44: #include <libxml/parser.h>
                     45: #include <libxml/valid.h>
                     46: #include <libxml/parserInternals.h>
                     47: #include <libxml/xmlIO.h>
                     48: #include <libxml/SAX.h>
1.3       veillard   49: #include <libxml/uri.h>
1.8       veillard   50: #include <libxml/xmlerror.h>
1.1       veillard   51: 
                     52: #define SGML_MAX_NAMELEN 1000
                     53: #define SGML_PARSER_BIG_BUFFER_SIZE 1000
                     54: #define SGML_PARSER_BUFFER_SIZE 100
                     55: 
                     56: /* #define DEBUG */
                     57: /* #define DEBUG_PUSH */
                     58: 
                     59: /************************************************************************
                     60:  *                                                                     *
                     61:  *             Parser stacks related functions and macros              *
                     62:  *                                                                     *
                     63:  ************************************************************************/
                     64: 
                     65: /*
                     66:  * Generic function for accessing stacks in the Parser Context
                     67:  */
                     68: 
                     69: #define PUSH_AND_POP(scope, type, name)                                        \
                     70: scope int sgml##name##Push(sgmlParserCtxtPtr ctxt, type value) {       \
                     71:     if (ctxt->name##Nr >= ctxt->name##Max) {                           \
                     72:        ctxt->name##Max *= 2;                                           \
                     73:         ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab,         \
                     74:                     ctxt->name##Max * sizeof(ctxt->name##Tab[0]));     \
                     75:         if (ctxt->name##Tab == NULL) {                                 \
1.10    ! veillard   76:            xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");                      \
1.1       veillard   77:            return(0);                                                  \
                     78:        }                                                               \
                     79:     }                                                                  \
                     80:     ctxt->name##Tab[ctxt->name##Nr] = value;                           \
                     81:     ctxt->name = value;                                                        \
                     82:     return(ctxt->name##Nr++);                                          \
                     83: }                                                                      \
                     84: scope type sgml##name##Pop(sgmlParserCtxtPtr ctxt) {                   \
                     85:     type ret;                                                          \
                     86:     if (ctxt->name##Nr < 0) return(0);                                 \
                     87:     ctxt->name##Nr--;                                                  \
                     88:     if (ctxt->name##Nr < 0) return(0);                                 \
                     89:     if (ctxt->name##Nr > 0)                                            \
                     90:        ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1];               \
                     91:     else                                                               \
                     92:         ctxt->name = NULL;                                             \
                     93:     ret = ctxt->name##Tab[ctxt->name##Nr];                             \
                     94:     ctxt->name##Tab[ctxt->name##Nr] = 0;                               \
                     95:     return(ret);                                                       \
                     96: }                                                                      \
                     97: 
                     98: PUSH_AND_POP(extern, xmlNodePtr, node)
                     99: PUSH_AND_POP(extern, xmlChar*, name)
                    100: 
                    101: /*
                    102:  * Macros for accessing the content. Those should be used only by the parser,
                    103:  * and not exported.
                    104:  *
                    105:  * Dirty macros, i.e. one need to make assumption on the context to use them
                    106:  *
                    107:  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
                    108:  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
                    109:  *           in ISO-Latin or UTF-8, and the current 16 bit value if compiled
                    110:  *           in UNICODE mode. This should be used internally by the parser
                    111:  *           only to compare to ASCII values otherwise it would break when
                    112:  *           running with UTF-8 encoding.
                    113:  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
                    114:  *           to compare on ASCII based substring.
                    115:  *   UPP(n)  returns the n'th next xmlChar converted to uppercase. Same as CUR
                    116:  *           it should be used only to compare on ASCII based substring.
                    117:  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
                    118:  *           strings within the parser.
                    119:  *
                    120:  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
                    121:  *
                    122:  *   CURRENT Returns the current char value, with the full decoding of
                    123:  *           UTF-8 if we are using this mode. It returns an int.
                    124:  *   NEXT    Skip to the next character, this does the proper decoding
                    125:  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
                    126:  *   COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
                    127:  */
                    128: 
                    129: #define UPPER (toupper(*ctxt->input->cur))
                    130: 
                    131: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
                    132: 
                    133: #define NXT(val) ctxt->input->cur[(val)]
                    134: 
                    135: #define UPP(val) (toupper(ctxt->input->cur[(val)]))
                    136: 
                    137: #define CUR_PTR ctxt->input->cur
                    138: 
                    139: #define SHRINK  xmlParserInputShrink(ctxt->input)
                    140: 
                    141: #define GROW  xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
                    142: 
                    143: #define CURRENT ((int) (*ctxt->input->cur))
                    144: 
1.9       veillard  145: #define SKIP_BLANKS sgmlSkipBlankChars(ctxt)
1.1       veillard  146: 
                    147: #if 0
                    148: #define CUR ((int) (*ctxt->input->cur))
                    149: #define NEXT sgmlNextChar(ctxt);
                    150: #else
                    151: /* Inported from XML */
                    152: 
                    153: /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
                    154: #define CUR ((int) (*ctxt->input->cur))
1.9       veillard  155: #define NEXT xmlNextChar(ctxt),ctxt->nbChars++
1.1       veillard  156: 
                    157: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
                    158: #define NXT(val) ctxt->input->cur[(val)]
                    159: #define CUR_PTR ctxt->input->cur
                    160: 
                    161: 
1.9       veillard  162: #define NEXTL(l) do {                                                  \
1.1       veillard  163:     if (*(ctxt->input->cur) == '\n') {                                 \
                    164:        ctxt->input->line++; ctxt->input->col = 1;                      \
                    165:     } else ctxt->input->col++;                                         \
1.9       veillard  166:     ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++;           \
                    167:   } while (0)
1.1       veillard  168:     
                    169: /************
                    170:     \
                    171:     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);    \
                    172:     if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
                    173:  ************/
                    174: 
1.9       veillard  175: #define CUR_CHAR(l) sgmlCurrentChar(ctxt, &l)
                    176: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1.1       veillard  177: 
                    178: #define COPY_BUF(l,b,i,v)                                              \
                    179:     if (l == 1) b[i++] = (xmlChar) v;                                  \
1.9       veillard  180:     else i += xmlCopyChar(l,&b[i],v)
1.1       veillard  181: #endif
                    182: 
                    183: /**
                    184:  * sgmlCurrentChar:
                    185:  * @ctxt:  the SGML parser context
                    186:  * @len:  pointer to the length of the char read
                    187:  *
                    188:  * The current char value, if using UTF-8 this may actaully span multiple
                    189:  * bytes in the input buffer. Implement the end of line normalization:
                    190:  * 2.11 End-of-Line Handling
                    191:  * If the encoding is unspecified, in the case we find an ISO-Latin-1
                    192:  * char, then the encoding converter is plugged in automatically.
                    193:  *
                    194:  * Returns the current char value and its lenght
                    195:  */
                    196: 
                    197: int
                    198: sgmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
                    199:     if (ctxt->instate == XML_PARSER_EOF)
                    200:        return(0);
                    201: 
                    202:     if (ctxt->token != 0) {
                    203:        *len = 0;
                    204:        return(ctxt->token);
                    205:     }  
                    206:     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
                    207:        /*
                    208:         * We are supposed to handle UTF8, check it's valid
                    209:         * From rfc2044: encoding of the Unicode values on UTF-8:
                    210:         *
                    211:         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                    212:         * 0000 0000-0000 007F   0xxxxxxx
                    213:         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
                    214:         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
                    215:         *
                    216:         * Check for the 0x110000 limit too
                    217:         */
                    218:        const unsigned char *cur = ctxt->input->cur;
                    219:        unsigned char c;
                    220:        unsigned int val;
                    221: 
                    222:        c = *cur;
                    223:        if (c & 0x80) {
                    224:            if (cur[1] == 0)
                    225:                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    226:            if ((cur[1] & 0xc0) != 0x80)
                    227:                goto encoding_error;
                    228:            if ((c & 0xe0) == 0xe0) {
                    229: 
                    230:                if (cur[2] == 0)
                    231:                    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    232:                if ((cur[2] & 0xc0) != 0x80)
                    233:                    goto encoding_error;
                    234:                if ((c & 0xf0) == 0xf0) {
                    235:                    if (cur[3] == 0)
                    236:                        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    237:                    if (((c & 0xf8) != 0xf0) ||
                    238:                        ((cur[3] & 0xc0) != 0x80))
                    239:                        goto encoding_error;
                    240:                    /* 4-byte code */
                    241:                    *len = 4;
                    242:                    val = (cur[0] & 0x7) << 18;
                    243:                    val |= (cur[1] & 0x3f) << 12;
                    244:                    val |= (cur[2] & 0x3f) << 6;
                    245:                    val |= cur[3] & 0x3f;
                    246:                } else {
                    247:                  /* 3-byte code */
                    248:                    *len = 3;
                    249:                    val = (cur[0] & 0xf) << 12;
                    250:                    val |= (cur[1] & 0x3f) << 6;
                    251:                    val |= cur[2] & 0x3f;
                    252:                }
                    253:            } else {
                    254:              /* 2-byte code */
                    255:                *len = 2;
                    256:                val = (cur[0] & 0x1f) << 6;
                    257:                val |= cur[1] & 0x3f;
                    258:            }
                    259:            if (!IS_CHAR(val)) {
1.6       veillard  260:                ctxt->errNo = XML_ERR_INVALID_ENCODING;
1.1       veillard  261:                if ((ctxt->sax != NULL) &&
                    262:                    (ctxt->sax->error != NULL))
                    263:                    ctxt->sax->error(ctxt->userData, 
                    264:                                     "Char 0x%X out of allowed range\n", val);
                    265:                ctxt->wellFormed = 0;
                    266:                ctxt->disableSAX = 1;
                    267:            }    
                    268:            return(val);
                    269:        } else {
                    270:            /* 1-byte code */
                    271:            *len = 1;
                    272:            return((int) *ctxt->input->cur);
                    273:        }
                    274:     }
                    275:     /*
                    276:      * Assume it's a fixed lenght encoding (1) with
                    277:      * a compatibke encoding for the ASCII set, since
                    278:      * XML constructs only use < 128 chars
                    279:      */
                    280:     *len = 1;
                    281:     if ((int) *ctxt->input->cur < 0x80)
                    282:        return((int) *ctxt->input->cur);
                    283: 
                    284:     /*
                    285:      * Humm this is bad, do an automatic flow conversion
                    286:      */
                    287:     xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
                    288:     ctxt->charset = XML_CHAR_ENCODING_UTF8;
                    289:     return(xmlCurrentChar(ctxt, len));
                    290: 
                    291: encoding_error:
                    292:     /*
                    293:      * If we detect an UTF8 error that probably mean that the
                    294:      * input encoding didn't get properly advertized in the
                    295:      * declaration header. Report the error and switch the encoding
                    296:      * to ISO-Latin-1 (if you don't like this policy, just declare the
                    297:      * encoding !)
                    298:      */
1.6       veillard  299:     ctxt->errNo = XML_ERR_INVALID_ENCODING;
1.1       veillard  300:     if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
                    301:        ctxt->sax->error(ctxt->userData, 
                    302:                         "Input is not proper UTF-8, indicate encoding !\n");
                    303:        ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
                    304:                        ctxt->input->cur[0], ctxt->input->cur[1],
                    305:                        ctxt->input->cur[2], ctxt->input->cur[3]);
                    306:     }
                    307: 
                    308:     ctxt->charset = XML_CHAR_ENCODING_8859_1; 
                    309:     *len = 1;
                    310:     return((int) *ctxt->input->cur);
                    311: }
                    312: 
                    313: /**
                    314:  * sgmlNextChar:
                    315:  * @ctxt:  the SGML parser context
                    316:  *
                    317:  * Skip to the next char input char.
                    318:  */
                    319: 
                    320: void
                    321: sgmlNextChar(sgmlParserCtxtPtr ctxt) {
                    322:     if (ctxt->instate == XML_PARSER_EOF)
                    323:        return;
                    324:     if ((*ctxt->input->cur == 0) &&
                    325:         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
                    326:            xmlPopInput(ctxt);
                    327:     } else {
                    328:         if (*(ctxt->input->cur) == '\n') {
                    329:            ctxt->input->line++; ctxt->input->col = 1;
                    330:        } else ctxt->input->col++;
                    331:        ctxt->input->cur++;
                    332:        ctxt->nbChars++;
                    333:         if (*ctxt->input->cur == 0)
                    334:            xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    335:     }
                    336: }
                    337: 
                    338: /**
                    339:  * sgmlSkipBlankChars:
                    340:  * @ctxt:  the SGML parser context
                    341:  *
                    342:  * skip all blanks character found at that point in the input streams.
                    343:  *
                    344:  * Returns the number of space chars skipped
                    345:  */
                    346: 
                    347: int
                    348: sgmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
                    349:     int res = 0;
                    350: 
                    351:     while (IS_BLANK(*(ctxt->input->cur))) {
                    352:        if ((*ctxt->input->cur == 0) &&
                    353:            (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
                    354:                xmlPopInput(ctxt);
                    355:        } else {
                    356:            if (*(ctxt->input->cur) == '\n') {
                    357:                ctxt->input->line++; ctxt->input->col = 1;
                    358:            } else ctxt->input->col++;
                    359:            ctxt->input->cur++;
                    360:            ctxt->nbChars++;
                    361:            if (*ctxt->input->cur == 0)
                    362:                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                    363:        }
                    364:        res++;
                    365:     }
                    366:     return(res);
                    367: }
                    368: 
                    369: 
                    370: 
                    371: /************************************************************************
                    372:  *                                                                     *
                    373:  *             The list of SGML elements and their properties          *
                    374:  *                                                                     *
                    375:  ************************************************************************/
                    376: 
                    377: /*
                    378:  *  Start Tag: 1 means the start tag can be ommited
                    379:  *  End Tag:   1 means the end tag can be ommited
                    380:  *             2 means it's forbidden (empty elements)
                    381:  *  Depr:      this element is deprecated
                    382:  *  DTD:       1 means that this element is valid only in the Loose DTD
                    383:  *             2 means that this element is valid only in the Frameset DTD
                    384:  *
                    385:  * Name,Start Tag,End Tag,  Empty,  Depr.,    DTD, Description
                    386:  */
                    387: sgmlElemDesc  docbookElementTable[] = {
                    388: { "abbrev",    0,      0,      0,      3,      0, "" }, /* word */
                    389: { "abstract",  0,      0,      0,      9,      0, "" }, /* title */
                    390: { "accel",     0,      0,      0,      7,      0, "" }, /* smallcptr */
                    391: { "ackno",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    392: { "acronym",   0,      0,      0,      3,      0, "" }, /* word */
                    393: { "action",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    394: { "address",   0,      0,      0,      1,      0, "" },
                    395: { "affiliation",0,     0,      0,      9,      0, "" }, /* shortaffil */
                    396: { "alt",       0,      0,      0,      1,      0, "" },
                    397: { "anchor",    0,      2,      1,      0,      0, "" },
                    398: { "answer",    0,      0,      0,      9,      0, "" }, /* label */
                    399: { "appendix",  0,      0,      0,      9,      0, "" }, /* appendixinfo */
                    400: { "appendixinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    401: { "application",0,     0,      0,      2,      0, "" }, /* para */
                    402: { "area",      0,      2,      1,      0,      0, "" },
                    403: { "areaset",   0,      0,      0,      9,      0, "" }, /* area */
                    404: { "areaspec",  0,      0,      0,      9,      0, "" }, /* area */
                    405: { "arg",       0,      0,      0,      1,      0, "" },
                    406: { "article",   0,      0,      0,      9,      0, "" }, /* div.title.content */
                    407: { "articleinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    408: { "artpagenums",0,     0,      0,      4,      0, "" }, /* docinfo */
                    409: { "attribution",0,     0,      0,      2,      0, "" }, /* para */
                    410: { "audiodata", 0,      2,      1,      0,      0, "" },
                    411: { "audioobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    412: { "authorblurb",0,     0,      0,      9,      0, "" }, /* title */
                    413: { "authorgroup",0,     0,      0,      9,      0, "" }, /* author */
                    414: { "authorinitials",0,  0,      0,      4,      0, "" }, /* docinfo */
                    415: { "author",    0,      0,      0,      9,      0, "" }, /* person.ident.mix */
                    416: { "beginpage", 0,      2,      1,      0,      0, "" },
                    417: { "bibliodiv", 0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    418: { "biblioentry",0,     0,      0,      9,      0, "" }, /* articleinfo */
                    419: { "bibliography",0,    0,      0,      9,      0, "" }, /* bibliographyinfo */
                    420: { "bibliographyinfo",0,        0,      0,      9,      0, "" }, /* graphic */
                    421: { "bibliomisc",        0,      0,      0,      2,      0, "" }, /* para */
                    422: { "bibliomixed",0,     0,      0,      1,      0, "" }, /* %bibliocomponent.mix, bibliomset) */
                    423: { "bibliomset",        0,      0,      0,      1,      0, "" }, /* %bibliocomponent.mix; | bibliomset) */
                    424: { "biblioset", 0,      0,      0,      9,      0, "" }, /* bibliocomponent.mix */
                    425: { "blockquote",        0,      0,      0,      9,      0, "" }, /* title */
                    426: { "book",      0,      0,      0,      9,      0, "" }, /* div.title.content */
                    427: { "bookinfo",  0,      0,      0,      9,      0, "" }, /* graphic */
                    428: { "bridgehead",        0,      0,      0,      8,      0, "" }, /* title */
                    429: { "callout",   0,      0,      0,      9,      0, "" }, /* component.mix */
                    430: { "calloutlist",0,     0,      0,      9,      0, "" }, /* formalobject.title.content */
                    431: { "caption",   0,      0,      0,      9,      0, "" }, /* textobject.mix */
                    432: { "caution",   0,      0,      0,      9,      0, "" }, /* title */
                    433: { "chapter",   0,      0,      0,      9,      0, "" }, /* chapterinfo */
                    434: { "chapterinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    435: { "citation",  0,      0,      0,      2,      0, "" }, /* para */
                    436: { "citerefentry",0,    0,      0,      9,      0, "" }, /* refentrytitle */
                    437: { "citetitle", 0,      0,      0,      2,      0, "" }, /* para */
                    438: { "city",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    439: { "classname", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    440: { "classsynopsisinfo",0,0,     0,      9,      0, "" }, /* cptr */
                    441: { "classsynopsis",0,   0,      0,      9,      0, "" }, /* ooclass */
                    442: { "cmdsynopsis",0,     0,      0,      9,      0, "" }, /* command */
                    443: { "co",                0,      2,      1,      0,      0, "" },
                    444: { "collab",    0,      0,      0,      9,      0, "" }, /* collabname */
                    445: { "collabname",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    446: { "colophon",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    447: { "colspec",   0,      2,      1,      0,      0, "" },
                    448: { "colspec",   0,      2,      1,      0,      0, "" },
                    449: { "command",   0,      0,      0,      9,      0, "" }, /* cptr */
                    450: { "computeroutput",0,  0,      0,      9,      0, "" }, /* cptr */
                    451: { "confdates", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    452: { "confgroup", 0,      0,      0,      9,      0, "" }, /* confdates */
                    453: { "confnum",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    454: { "confsponsor",0,     0,      0,      4,      0, "" }, /* docinfo */
                    455: { "conftitle", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    456: { "constant",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    457: { "constructorsynopsis",0,0,   0,      9,      0, "" }, /* modifier */
                    458: { "contractnum",0,     0,      0,      4,      0, "" }, /* docinfo */
                    459: { "contractsponsor",0, 0,      0,      4,      0, "" }, /* docinfo */
                    460: { "contrib",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    461: { "copyright", 0,      0,      0,      9,      0, "" }, /* year */
                    462: { "corpauthor",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    463: { "corpname",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    464: { "country",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    465: { "database",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    466: { "date",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    467: { "dedication",        0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    468: { "destructorsynopsis",0,0,    0,      9,      0, "" }, /* modifier */
                    469: { "edition",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    470: { "editor",    0,      0,      0,      9,      0, "" }, /* person.ident.mix */
                    471: { "email",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    472: { "emphasis",  0,      0,      0,      2,      0, "" }, /* para */
                    473: { "entry",     0,      0,      0,      9,      0, "" }, /* tbl.entry.mdl */
                    474: { "entrytbl",  0,      0,      0,      9,      0, "" }, /* tbl.entrytbl.mdl */
                    475: { "envar",     0,      0,      0,      7,      0, "" }, /* smallcptr */
                    476: { "epigraph",  0,      0,      0,      9,      0, "" }, /* attribution */
                    477: { "equation",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    478: { "errorcode", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    479: { "errorname", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    480: { "errortype", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    481: { "example",   0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    482: { "exceptionname",0,   0,      0,      7,      0, "" }, /* smallcptr */
                    483: { "fax",       0,      0,      0,      4,      0, "" }, /* docinfo */
                    484: { "fieldsynopsis",     0,      0,      0,      9,      0, "" }, /* modifier */
                    485: { "figure",    0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    486: { "filename",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    487: { "firstname", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    488: { "firstterm", 0,      0,      0,      3,      0, "" }, /* word */
                    489: { "footnote",  0,      0,      0,      9,      0, "" }, /* footnote.mix */
                    490: { "footnoteref",0,     2,      1,      0,      0, "" },
                    491: { "foreignphrase",0,   0,      0,      2,      0, "" }, /* para */
                    492: { "formalpara",        0,      0,      0,      9,      0, "" }, /* title */
                    493: { "funcdef",   0,      0,      0,      1,      0, "" },
                    494: { "funcparams",        0,      0,      0,      9,      0, "" }, /* cptr */
                    495: { "funcprototype",0,   0,      0,      9,      0, "" }, /* funcdef */
                    496: { "funcsynopsis",0,    0,      0,      9,      0, "" }, /* funcsynopsisinfo */
                    497: { "funcsynopsisinfo",  0,      0,      0,      9,      0, "" }, /* cptr */
                    498: { "function",  0,      0,      0,      9,      0, "" }, /* cptr */
                    499: { "glossary",  0,      0,      0,      9,      0, "" }, /* glossaryinfo */
                    500: { "glossaryinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    501: { "glossdef",  0,      0,      0,      9,      0, "" }, /* glossdef.mix */
                    502: { "glossdiv",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    503: { "glossentry",        0,      0,      0,      9,      0, "" }, /* glossterm */
                    504: { "glosslist", 0,      0,      0,      9,      0, "" }, /* glossentry */
                    505: { "glossseealso",0,    0,      0,      2,      0, "" }, /* para */
                    506: { "glosssee",  0,      0,      0,      2,      0, "" }, /* para */
                    507: { "glossterm", 0,      0,      0,      2,      0, "" }, /* para */
                    508: { "graphic",   0,      2,      1,      0,      0, "" },
                    509: { "graphicco", 0,      0,      0,      9,      0, "" }, /* areaspec */
                    510: { "group",     0,      0,      0,      9,      0, "" }, /* arg */
                    511: { "guibutton", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    512: { "guiicon",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    513: { "guilabel",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    514: { "guimenuitem",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    515: { "guimenu",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    516: { "guisubmenu",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    517: { "hardware",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    518: { "highlights",        0,      0,      0,      9,      0, "" }, /* highlights.mix */
                    519: { "holder",    0,      0,      0,      4,      0, "" }, /* docinfo */
                    520: { "honorific", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    521: { "imagedata", 0,      2,      1,      0,      0, "" },
                    522: { "imageobjectco",0,   0,      0,      9,      0, "" }, /* areaspec */
                    523: { "imageobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    524: { "important", 0,      0,      0,      9,      0, "" }, /* title */
                    525: { "indexdiv",  0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    526: { "indexentry",        0,      0,      0,      9,      0, "" }, /* primaryie */
                    527: { "index",     0,      0,      0,      9,      0, "" }, /* indexinfo */
                    528: { "indexinfo", 0,      0,      0,      9,      0, "" }, /* graphic */
                    529: { "indexterm", 0,      0,      0,      9,      0, "" }, /* primary */
                    530: { "informalequation",0,        0,      0,      9,      0, "" }, /* equation.content */
                    531: { "informalexample",0, 0,      0,      9,      0, "" }, /* example.mix */
                    532: { "informalfigure",0,  0,      0,      9,      0, "" }, /* figure.mix */
                    533: { "informaltable",0,   0,      0,      9,      0, "" }, /* graphic */
                    534: { "initializer",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    535: { "inlineequation",0,  0,      0,      9,      0, "" }, /* inlineequation.content */
                    536: { "inlinegraphic",0,   2,      1,      0,      0, "" },
                    537: { "inlinemediaobject",0,0,     0,      9,      0, "" }, /* objectinfo */
                    538: { "interfacename",0,   0,      0,      7,      0, "" }, /* smallcptr */
                    539: { "interface", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    540: { "invpartnumber",0,   0,      0,      4,      0, "" }, /* docinfo */
                    541: { "isbn",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    542: { "issn",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    543: { "issuenum",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    544: { "itemizedlist",0,    0,      0,      9,      0, "" }, /* formalobject.title.content */
                    545: { "itermset",  0,      0,      0,      9,      0, "" }, /* indexterm */
                    546: { "jobtitle",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    547: { "keycap",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    548: { "keycode",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    549: { "keycombo",  0,      0,      0,      9,      0, "" }, /* keycap */
                    550: { "keysym",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    551: { "keyword",   0,      0,      0,      1,      0, "" },
                    552: { "keywordset",        0,      0,      0,      9,      0, "" }, /* keyword */
                    553: { "label",     0,      0,      0,      3,      0, "" }, /* word */
                    554: { "legalnotice",0,     0,      0,      9,      0, "" }, /* title */
                    555: { "lineage",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    556: { "lineannotation",0,  0,      0,      2,      0, "" }, /* para */
                    557: { "link",      0,      0,      0,      2,      0, "" }, /* para */
                    558: { "listitem",  0,      0,      0,      9,      0, "" }, /* component.mix */
                    559: { "literal",   0,      0,      0,      9,      0, "" }, /* cptr */
                    560: { "literallayout",0,   0,      0,      2,      0, "" }, /* para */
                    561: { "lot",       0,      0,      0,      9,      0, "" }, /* bookcomponent.title.content */
                    562: { "lotentry",  0,      0,      0,      2,      0, "" }, /* para */
                    563: { "manvolnum", 0,      0,      0,      3,      0, "" }, /* word */
                    564: { "markup",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    565: { "medialabel",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    566: { "mediaobjectco",0,   0,      0,      9,      0, "" }, /* objectinfo */
                    567: { "mediaobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    568: { "member",    0,      0,      0,      2,      0, "" }, /* para */
                    569: { "menuchoice",        0,      0,      0,      9,      0, "" }, /* shortcut */
                    570: { "methodname",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    571: { "methodparam",0,     0,      0,      9,      0, "" }, /* modifier */
                    572: { "methodsynopsis",0,  0,      0,      9,      0, "" }, /* modifier */
                    573: { "modespec",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    574: { "modifier",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    575: { "mousebutton",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    576: { "msgaud",    0,      0,      0,      2,      0, "" }, /* para */
                    577: { "msgentry",  0,      0,      0,      9,      0, "" }, /* msg */
                    578: { "msgexplan", 0,      0,      0,      9,      0, "" }, /* title */
                    579: { "msginfo",   0,      0,      0,      9,      0, "" }, /* msglevel */
                    580: { "msglevel",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    581: { "msgmain",   0,      0,      0,      9,      0, "" }, /* title */
                    582: { "msgorig",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    583: { "msgrel",    0,      0,      0,      9,      0, "" }, /* title */
                    584: { "msgset",    0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    585: { "msgsub",    0,      0,      0,      9,      0, "" }, /* title */
                    586: { "msgtext",   0,      0,      0,      9,      0, "" }, /* component.mix */
                    587: { "msg",       0,      0,      0,      9,      0, "" }, /* title */
                    588: { "note",      0,      0,      0,      9,      0, "" }, /* title */
                    589: { "objectinfo",        0,      0,      0,      9,      0, "" }, /* graphic */
                    590: { "olink",     0,      0,      0,      2,      0, "" }, /* para */
                    591: { "ooclass",   0,      0,      0,      9,      0, "" }, /* modifier */
                    592: { "ooexception",0,     0,      0,      9,      0, "" }, /* modifier */
                    593: { "oointerface",0,     0,      0,      9,      0, "" }, /* modifier */
                    594: { "optional",  0,      0,      0,      9,      0, "" }, /* cptr */
                    595: { "option",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    596: { "orderedlist",0,     0,      0,      9,      0, "" }, /* formalobject.title.content */
                    597: { "orgdiv",    0,      0,      0,      4,      0, "" }, /* docinfo */
                    598: { "orgname",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    599: { "otheraddr", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    600: { "othercredit",0,     0,      0,      9,      0, "" }, /* person.ident.mix */
                    601: { "othername", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    602: { "pagenums",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    603: { "paramdef",  0,      0,      0,      1,      0, "" },
                    604: { "parameter", 0,      0,      0,      7,      0, "" }, /* smallcptr */
                    605: { "para",      0,      0,      0,      2,      0, "" }, /* para */
                    606: { "partinfo",  0,      0,      0,      9,      0, "" }, /* graphic */
                    607: { "partintro", 0,      0,      0,      9,      0, "" }, /* div.title.content */
                    608: { "part",      0,      0,      0,      9,      0, "" }, /* partinfo */
                    609: { "phone",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    610: { "phrase",    0,      0,      0,      2,      0, "" }, /* para */
                    611: { "pob",       0,      0,      0,      4,      0, "" }, /* docinfo */
                    612: { "postcode",  0,      0,      0,      4,      0, "" }, /* docinfo */
                    613: { "prefaceinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    614: { "preface",   0,      0,      0,      9,      0, "" }, /* prefaceinfo */
                    615: { "primaryie", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    616: { "primary  ", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    617: { "printhistory",0,    0,      0,      9,      0, "" }, /* para.class */
                    618: { "procedure", 0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    619: { "productname",0,     0,      0,      2,      0, "" }, /* para */
                    620: { "productnumber",0,   0,      0,      4,      0, "" }, /* docinfo */
                    621: { "programlistingco",0,        0,      0,      9,      0, "" }, /* areaspec */
                    622: { "programlisting",0,  0,      0,      2,      0, "" }, /* para */
                    623: { "prompt",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    624: { "property",  0,      0,      0,      7,      0, "" }, /* smallcptr */
                    625: { "pubdate",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    626: { "publishername",0,   0,      0,      4,      0, "" }, /* docinfo */
                    627: { "publisher", 0,      0,      0,      9,      0, "" }, /* publishername */
                    628: { "pubsnumber",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    629: { "qandadiv",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    630: { "qandaentry",        0,      0,      0,      9,      0, "" }, /* revhistory */
                    631: { "qandaset",  0,      0,      0,      9,      0, "" }, /* formalobject.title.content */
                    632: { "question",  0,      0,      0,      9,      0, "" }, /* label */
                    633: { "quote",     0,      0,      0,      2,      0, "" }, /* para */
                    634: { "refclass",  0,      0,      0,      9,      0, "" }, /* refclass.char.mix */
                    635: { "refdescriptor",0,   0,      0,      9,      0, "" }, /* refname.char.mix */
                    636: { "refentryinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    637: { "refentry",  0,      0,      0,      9,      0, "" }, /* ndxterm.class */
                    638: { "refentrytitle",0,   0,      0,      2,      0, "" }, /* para */
                    639: { "referenceinfo",0,   0,      0,      9,      0, "" }, /* graphic */
                    640: { "reference", 0,      0,      0,      9,      0, "" }, /* referenceinfo */
                    641: { "refmeta",   0,      0,      0,      9,      0, "" }, /* ndxterm.class */
                    642: { "refmiscinfo",0,     0,      0,      4,      0, "" }, /* docinfo */
                    643: { "refnamediv",        0,      0,      0,      9,      0, "" }, /* refdescriptor */
                    644: { "refname",   0,      0,      0,      9,      0, "" }, /* refname.char.mix */
                    645: { "refpurpose",        0,      0,      0,      9,      0, "" }, /* refinline.char.mix */
                    646: { "refsect1info",0,    0,      0,      9,      0, "" }, /* graphic */
                    647: { "refsect1",  0,      0,      0,      9,      0, "" }, /* refsect */
                    648: { "refsect2info",0,    0,      0,      9,      0, "" }, /* graphic */
                    649: { "refsect2",  0,      0,      0,      9,      0, "" }, /* refsect */
                    650: { "refsect3info",0,    0,      0,      9,      0, "" }, /* graphic */
                    651: { "refsect3",  0,      0,      0,      9,      0, "" }, /* refsect */
                    652: { "refsynopsisdivinfo",0,0,    0,      9,      0, "" }, /* graphic */
                    653: { "refsynopsisdiv",0,  0,      0,      9,      0, "" }, /* refsynopsisdivinfo */
                    654: { "releaseinfo",0,     0,      0,      4,      0, "" }, /* docinfo */
                    655: { "remark",    0,      0,      0,      2,      0, "" }, /* para */
                    656: { "replaceable",0,     0,      0,      1,      0, "" },
                    657: { "returnvalue",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    658: { "revdescription",0,  0,      0,      9,      0, "" }, /* revdescription.mix */
                    659: { "revhistory",        0,      0,      0,      9,      0, "" }, /* revision */
                    660: { "revision",  0,      0,      0,      9,      0, "" }, /* revnumber */
                    661: { "revnumber", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    662: { "revremark", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    663: { "row",       0,      0,      0,      9,      0, "" }, /* tbl.row.mdl */
                    664: { "row",       0,      0,      0,      9,      0, "" }, /* tbl.row.mdl */
                    665: { "sbr",       0,      2,      1,      0,      0, "" },
                    666: { "screenco",  0,      0,      0,      9,      0, "" }, /* areaspec */
                    667: { "screeninfo",        0,      0,      0,      2,      0, "" }, /* para */
                    668: { "screen",    0,      0,      0,      2,      0, "" }, /* para */
                    669: { "screenshot",        0,      0,      0,      9,      0, "" }, /* screeninfo */
                    670: { "secondaryie",0,     0,      0,      4,      0, "" }, /* ndxterm */
                    671: { "secondary", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    672: { "sect1info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    673: { "sect1",     0,      0,      0,      9,      0, "" }, /* sect */
                    674: { "sect2info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    675: { "sect2",     0,      0,      0,      9,      0, "" }, /* sect */
                    676: { "sect3info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    677: { "sect3",     0,      0,      0,      9,      0, "" }, /* sect */
                    678: { "sect4info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    679: { "sect4",     0,      0,      0,      9,      0, "" }, /* sect */
                    680: { "sect5info", 0,      0,      0,      9,      0, "" }, /* graphic */
                    681: { "sect5",     0,      0,      0,      9,      0, "" }, /* sect */
                    682: { "sectioninfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    683: { "section",   0,      0,      0,      9,      0, "" }, /* sectioninfo */
                    684: { "seealsoie", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    685: { "seealso",   0,      0,      0,      4,      0, "" }, /* ndxterm */
                    686: { "seeie",     0,      0,      0,      4,      0, "" }, /* ndxterm */
                    687: { "see",       0,      0,      0,      4,      0, "" }, /* ndxterm */
                    688: { "seglistitem",0,     0,      0,      9,      0, "" }, /* seg */
                    689: { "segmentedlist",0,   0,      0,      9,      0, "" }, /* formalobject.title.content */
                    690: { "seg",       0,      0,      0,      2,      0, "" }, /* para */
                    691: { "segtitle",  0,      0,      0,      8,      0, "" }, /* title */
                    692: { "seriesvolnums",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    693: { "set",       0,      0,      0,      9,      0, "" }, /* div.title.content */
                    694: { "setindexinfo",0,    0,      0,      9,      0, "" }, /* graphic */
                    695: { "setindex",  0,      0,      0,      9,      0, "" }, /* setindexinfo */
                    696: { "setinfo",   0,      0,      0,      9,      0, "" }, /* graphic */
                    697: { "sgmltag",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    698: { "shortaffil",        0,      0,      0,      4,      0, "" }, /* docinfo */
                    699: { "shortcut",  0,      0,      0,      9,      0, "" }, /* keycap */
                    700: { "sidebarinfo",0,     0,      0,      9,      0, "" }, /* graphic */
                    701: { "sidebar",   0,      0,      0,      9,      0, "" }, /* sidebarinfo */
                    702: { "simpara",   0,      0,      0,      2,      0, "" }, /* para */
                    703: { "simplelist",        0,      0,      0,      9,      0, "" }, /* member */
                    704: { "simplemsgentry",    0,      0,      0,      9,      0, "" }, /* msgtext */
                    705: { "simplesect",        0,      0,      0,      9,      0, "" }, /* sect.title.content */
                    706: { "spanspec",  0,      2,      1,      0,      0, "" },
                    707: { "state",     0,      0,      0,      4,      0, "" }, /* docinfo */
                    708: { "step",      0,      0,      0,      9,      0, "" }, /* title */
                    709: { "street",    0,      0,      0,      4,      0, "" }, /* docinfo */
                    710: { "structfield",0,     0,      0,      7,      0, "" }, /* smallcptr */
                    711: { "structname",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    712: { "subjectset",        0,      0,      0,      9,      0, "" }, /* subject */
                    713: { "subject",   0,      0,      0,      9,      0, "" }, /* subjectterm */
                    714: { "subjectterm",0,     0,      0,      1,      0, "" },
                    715: { "subscript", 0,      0,      0,      1,      0, "" },
                    716: { "substeps",  0,      0,      0,      9,      0, "" }, /* step */
                    717: { "subtitle",  0,      0,      0,      8,      0, "" }, /* title */
                    718: { "superscript",       0,      0,      0,      1,      0, "" },
                    719: { "surname",   0,      0,      0,      4,      0, "" }, /* docinfo */
                    720: { "symbol",    0,      0,      0,      7,      0, "" }, /* smallcptr */
                    721: { "synopfragment",     0,      0,      0,      9,      0, "" }, /* arg */
                    722: { "synopfragmentref",  0,      0,      0,      1,      0, "" },
                    723: { "synopsis",  0,      0,      0,      2,      0, "" }, /* para */
                    724: { "systemitem",        0,      0,      0,      7,      0, "" }, /* smallcptr */
                    725: { "table",     0,      0,      0,      9,      0, "" }, /* tbl.table.mdl */
                    726: /* { "%tbl.table.name;",       0,      0,      0,      9,      0, "" },*/ /* tbl.table.mdl */
                    727: { "tbody",     0,      0,      0,      9,      0, "" }, /* row */
                    728: { "tbody",     0,      0,      0,      9,      0, "" }, /* row */
                    729: { "term",      0,      0,      0,      2,      0, "" }, /* para */
                    730: { "tertiaryie",        0,      0,      0,      4,      0, "" }, /* ndxterm */
                    731: { "tertiary ", 0,      0,      0,      4,      0, "" }, /* ndxterm */
                    732: { "textobject",        0,      0,      0,      9,      0, "" }, /* objectinfo */
                    733: { "tfoot",     0,      0,      0,      9,      0, "" }, /* tbl.hdft.mdl */
                    734: { "tgroup",    0,      0,      0,      9,      0, "" }, /* tbl.tgroup.mdl */
                    735: { "tgroup",    0,      0,      0,      9,      0, "" }, /* tbl.tgroup.mdl */
                    736: { "thead",     0,      0,      0,      9,      0, "" }, /* row */
                    737: { "thead",     0,      0,      0,      9,      0, "" }, /* tbl.hdft.mdl */
                    738: { "tip",       0,      0,      0,      9,      0, "" }, /* title */
                    739: { "titleabbrev",0,     0,      0,      8,      0, "" }, /* title */
                    740: { "title",     0,      0,      0,      8,      0, "" }, /* title */
                    741: { "tocback",   0,      0,      0,      2,      0, "" }, /* para */
                    742: { "toc",       0,      0,      0,      9,      0, "" }, /* bookcomponent.title.content */
                    743: { "tocchap",   0,      0,      0,      9,      0, "" }, /* tocentry */
                    744: { "tocentry",  0,      0,      0,      2,      0, "" }, /* para */
                    745: { "tocfront",  0,      0,      0,      2,      0, "" }, /* para */
                    746: { "toclevel1", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    747: { "toclevel2", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    748: { "toclevel3", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    749: { "toclevel4", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    750: { "toclevel5", 0,      0,      0,      9,      0, "" }, /* tocentry */
                    751: { "tocpart",   0,      0,      0,      9,      0, "" }, /* tocentry */
                    752: { "token",     0,      0,      0,      7,      0, "" }, /* smallcptr */
                    753: { "trademark", 0,      0,      0,      1,      0, "" },
                    754: { "type",      0,      0,      0,      7,      0, "" }, /* smallcptr */
                    755: { "ulink",     0,      0,      0,      2,      0, "" }, /* para */
                    756: { "userinput", 0,      0,      0,      9,      0, "" }, /* cptr */
                    757: { "varargs",   0,      2,      1,      0,      0, "" },
                    758: { "variablelist",0,    0,      0,      9,      0, "" }, /* formalobject.title.content */
                    759: { "varlistentry",0,    0,      0,      9,      0, "" }, /* term */
                    760: { "varname",   0,      0,      0,      7,      0, "" }, /* smallcptr */
                    761: { "videodata", 0,      2,      1,      0,      0, "" },
                    762: { "videoobject",0,     0,      0,      9,      0, "" }, /* objectinfo */
                    763: { "void",      0,      2,      1,      0,      0, "" },
                    764: { "volumenum", 0,      0,      0,      4,      0, "" }, /* docinfo */
                    765: { "warning",   0,      0,      0,      9,      0, "" }, /* title */
                    766: { "wordasword",        0,      0,      0,      3,      0, "" }, /* word */
                    767: { "xref",      0,      2,      1,      0,      0, "" },
                    768: { "year",      0,      0,      0,      4,      0, "" }, /* docinfo */
                    769: };
                    770: 
                    771: /*
                    772:  * start tags that imply the end of a current element
                    773:  * any tag of each line implies the end of the current element if the type of
                    774:  * that element is in the same line
                    775:  */
                    776: char *sgmlEquEnd[] = {
                    777: "dt", "dd", "li", "option", NULL,
                    778: "h1", "h2", "h3", "h4", "h5", "h6", NULL,
                    779: "ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL,
                    780: NULL
                    781: };
                    782: /*
                    783:  * acording the SGML DTD, HR should be added to the 2nd line above, as it
                    784:  * is not allowed within a H1, H2, H3, etc. But we should tolerate that case
                    785:  * because many documents contain rules in headings...
                    786:  */
                    787: 
                    788: /*
                    789:  * start tags that imply the end of current element
                    790:  */
                    791: char *sgmlStartClose[] = {
                    792: NULL
                    793: };
                    794: 
                    795: /*
                    796:  * The list of SGML elements which are supposed not to have
                    797:  * CDATA content and where a p element will be implied
                    798:  *
                    799:  * TODO: extend that list by reading the SGML SGML DtD on
                    800:  *       implied paragraph
                    801:  */
                    802: static char *sgmlNoContentElements[] = {
                    803:     NULL
                    804: };
                    805: 
                    806: 
                    807: static char** sgmlStartCloseIndex[100];
                    808: static int sgmlStartCloseIndexinitialized = 0;
                    809: 
                    810: /************************************************************************
                    811:  *                                                                     *
                    812:  *             functions to handle SGML specific data                  *
                    813:  *                                                                     *
                    814:  ************************************************************************/
                    815: 
                    816: /**
                    817:  * sgmlInitAutoClose:
                    818:  *
                    819:  * Initialize the sgmlStartCloseIndex for fast lookup of closing tags names.
                    820:  *
                    821:  */
                    822: void
                    823: sgmlInitAutoClose(void) {
                    824:     int index, i = 0;
                    825: 
                    826:     if (sgmlStartCloseIndexinitialized) return;
                    827: 
                    828:     for (index = 0;index < 100;index ++) sgmlStartCloseIndex[index] = NULL;
                    829:     index = 0;
                    830:     while ((sgmlStartClose[i] != NULL) && (index < 100 - 1)) {
                    831:         sgmlStartCloseIndex[index++] = &sgmlStartClose[i];
                    832:        while (sgmlStartClose[i] != NULL) i++;
                    833:        i++;
                    834:     }
                    835: }
                    836: 
                    837: /**
                    838:  * sgmlTagLookup:
                    839:  * @tag:  The tag name
                    840:  *
                    841:  * Lookup the SGML tag in the ElementTable
                    842:  *
                    843:  * Returns the related sgmlElemDescPtr or NULL if not found.
                    844:  */
                    845: sgmlElemDescPtr
                    846: sgmlTagLookup(const xmlChar *tag) {
                    847:     int i;
                    848: 
                    849:     for (i = 0; i < (sizeof(docbookElementTable) /
                    850:                      sizeof(docbookElementTable[0]));i++) {
1.7       veillard  851:         if (xmlStrEqual(tag, BAD_CAST docbookElementTable[i].name))
1.1       veillard  852:            return(&docbookElementTable[i]);
                    853:     }
                    854:     return(NULL);
                    855: }
                    856: 
                    857: /**
                    858:  * sgmlCheckAutoClose:
                    859:  * @newtag:  The new tag name
                    860:  * @oldtag:  The old tag name
                    861:  *
                    862:  * Checks wether the new tag is one of the registered valid tags for closing old.
                    863:  * Initialize the sgmlStartCloseIndex for fast lookup of closing tags names.
                    864:  *
                    865:  * Returns 0 if no, 1 if yes.
                    866:  */
                    867: int
                    868: sgmlCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) {
                    869:     int i, index;
                    870:     char **close;
                    871: 
                    872:     if (sgmlStartCloseIndexinitialized == 0) sgmlInitAutoClose();
                    873: 
                    874:     /* inefficient, but not a big deal */
                    875:     for (index = 0; index < 100;index++) {
                    876:         close = sgmlStartCloseIndex[index];
                    877:        if (close == NULL) return(0);
1.7       veillard  878:        if (xmlStrEqual(BAD_CAST *close, newtag)) break;
1.1       veillard  879:     }
                    880: 
                    881:     i = close - sgmlStartClose;
                    882:     i++;
                    883:     while (sgmlStartClose[i] != NULL) {
1.7       veillard  884:         if (xmlStrEqual(BAD_CAST sgmlStartClose[i], oldtag)) {
1.1       veillard  885:            return(1);
                    886:        }
                    887:        i++;
                    888:     }
                    889:     return(0);
                    890: }
                    891: 
                    892: /**
                    893:  * sgmlAutoCloseOnClose:
                    894:  * @ctxt:  an SGML parser context
                    895:  * @newtag:  The new tag name
                    896:  *
                    897:  * The HTmL DtD allows an ending tag to implicitely close other tags.
                    898:  */
                    899: void
                    900: sgmlAutoCloseOnClose(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
                    901:     sgmlElemDescPtr info;
                    902:     xmlChar *oldname;
                    903:     int i;
                    904: 
                    905:     if ((newtag[0] == '/') && (newtag[1] == 0))
                    906:        return;
                    907: 
                    908: #ifdef DEBUG
1.10    ! veillard  909:     xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
1.1       veillard  910:     for (i = 0;i < ctxt->nameNr;i++) 
1.10    ! veillard  911:         xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]);
1.1       veillard  912: #endif
                    913: 
                    914:     for (i = (ctxt->nameNr - 1);i >= 0;i--) {
1.7       veillard  915:         if (xmlStrEqual(newtag, ctxt->nameTab[i])) break;
1.1       veillard  916:     }
                    917:     if (i < 0) return;
                    918: 
1.7       veillard  919:     while (!xmlStrEqual(newtag, ctxt->name)) {
1.1       veillard  920:        info = sgmlTagLookup(ctxt->name);
                    921:        if ((info == NULL) || (info->endTag == 1)) {
                    922: #ifdef DEBUG
1.10    ! veillard  923:            xmlGenericError(xmlGenericErrorContext,"sgmlAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name);
1.1       veillard  924: #endif
                    925:         } else {
                    926:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                    927:                ctxt->sax->error(ctxt->userData,
                    928:                 "Opening and ending tag mismatch: %s and %s\n",
                    929:                                 newtag, ctxt->name);
                    930:            ctxt->wellFormed = 0;
                    931:        }
                    932:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                    933:            ctxt->sax->endElement(ctxt->userData, ctxt->name);
                    934:        oldname = sgmlnamePop(ctxt);
                    935:        if (oldname != NULL) {
                    936: #ifdef DEBUG
1.10    ! veillard  937:            xmlGenericError(xmlGenericErrorContext,"sgmlAutoCloseOnClose: popped %s\n", oldname);
1.1       veillard  938: #endif
                    939:            xmlFree(oldname);
                    940:        }       
                    941:     }
                    942: }
                    943: 
                    944: /**
                    945:  * sgmlAutoClose:
                    946:  * @ctxt:  an SGML parser context
                    947:  * @newtag:  The new tag name or NULL
                    948:  *
                    949:  * The HTmL DtD allows a tag to implicitely close other tags.
                    950:  * The list is kept in sgmlStartClose array. This function is
                    951:  * called when a new tag has been detected and generates the
                    952:  * appropriates closes if possible/needed.
                    953:  * If newtag is NULL this mean we are at the end of the resource
                    954:  * and we should check 
                    955:  */
                    956: void
                    957: sgmlAutoClose(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
                    958:     xmlChar *oldname;
                    959:     while ((newtag != NULL) && (ctxt->name != NULL) && 
                    960:            (sgmlCheckAutoClose(newtag, ctxt->name))) {
                    961: #ifdef DEBUG
1.10    ! veillard  962:        xmlGenericError(xmlGenericErrorContext,"sgmlAutoClose: %s closes %s\n", newtag, ctxt->name);
1.1       veillard  963: #endif
                    964:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                    965:            ctxt->sax->endElement(ctxt->userData, ctxt->name);
                    966:        oldname = sgmlnamePop(ctxt);
                    967:        if (oldname != NULL) {
                    968: #ifdef DEBUG
1.10    ! veillard  969:            xmlGenericError(xmlGenericErrorContext,"sgmlAutoClose: popped %s\n", oldname);
1.1       veillard  970: #endif
                    971:            xmlFree(oldname);
                    972:         }
                    973:     }
                    974: #if 0
                    975:     if (newtag == NULL) {
                    976:        sgmlAutoCloseOnClose(ctxt, BAD_CAST"head");
                    977:        sgmlAutoCloseOnClose(ctxt, BAD_CAST"body");
                    978:        sgmlAutoCloseOnClose(ctxt, BAD_CAST"sgml");
                    979:     }
                    980:     while ((newtag == NULL) && (ctxt->name != NULL) &&
1.7       veillard  981:           ((xmlStrEqual(ctxt->name, BAD_CAST"head")) ||
                    982:            (xmlStrEqual(ctxt->name, BAD_CAST"body")) ||
                    983:            (xmlStrEqual(ctxt->name, BAD_CAST"sgml")))) {
1.1       veillard  984: #ifdef DEBUG
1.10    ! veillard  985:        xmlGenericError(xmlGenericErrorContext,"sgmlAutoClose: EOF closes %s\n", ctxt->name);
1.1       veillard  986: #endif
                    987:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                    988:            ctxt->sax->endElement(ctxt->userData, ctxt->name);
                    989:        oldname = sgmlnamePop(ctxt);
                    990:        if (oldname != NULL) {
                    991: #ifdef DEBUG
1.10    ! veillard  992:            xmlGenericError(xmlGenericErrorContext,"sgmlAutoClose: popped %s\n", oldname);
1.1       veillard  993: #endif
                    994:            xmlFree(oldname);
                    995:         }
                    996:    }
                    997: #endif
                    998: }
                    999: 
                   1000: /**
                   1001:  * sgmlAutoCloseTag:
                   1002:  * @doc:  the SGML document
                   1003:  * @name:  The tag name
                   1004:  * @elem:  the SGML element
                   1005:  *
                   1006:  * The HTmL DtD allows a tag to implicitely close other tags.
                   1007:  * The list is kept in sgmlStartClose array. This function checks
                   1008:  * if the element or one of it's children would autoclose the
                   1009:  * given tag.
                   1010:  *
                   1011:  * Returns 1 if autoclose, 0 otherwise
                   1012:  */
                   1013: int
                   1014: sgmlAutoCloseTag(sgmlDocPtr doc, const xmlChar *name, sgmlNodePtr elem) {
                   1015:     sgmlNodePtr child;
                   1016: 
                   1017:     if (elem == NULL) return(1);
1.7       veillard 1018:     if (xmlStrEqual(name, elem->name)) return(0);
1.1       veillard 1019:     if (sgmlCheckAutoClose(elem->name, name)) return(1);
                   1020:     child = elem->children;
                   1021:     while (child != NULL) {
                   1022:         if (sgmlAutoCloseTag(doc, name, child)) return(1);
                   1023:        child = child->next;
                   1024:     }
                   1025:     return(0);
                   1026: }
                   1027: 
                   1028: /**
                   1029:  * sgmlIsAutoClosed:
                   1030:  * @doc:  the SGML document
                   1031:  * @elem:  the SGML element
                   1032:  *
                   1033:  * The HTmL DtD allows a tag to implicitely close other tags.
                   1034:  * The list is kept in sgmlStartClose array. This function checks
                   1035:  * if a tag is autoclosed by one of it's child
                   1036:  *
                   1037:  * Returns 1 if autoclosed, 0 otherwise
                   1038:  */
                   1039: int
                   1040: sgmlIsAutoClosed(sgmlDocPtr doc, sgmlNodePtr elem) {
                   1041:     sgmlNodePtr child;
                   1042: 
                   1043:     if (elem == NULL) return(1);
                   1044:     child = elem->children;
                   1045:     while (child != NULL) {
                   1046:        if (sgmlAutoCloseTag(doc, elem->name, child)) return(1);
                   1047:        child = child->next;
                   1048:     }
                   1049:     return(0);
                   1050: }
                   1051: 
                   1052: /**
                   1053:  * sgmlCheckImplied:
                   1054:  * @ctxt:  an SGML parser context
                   1055:  * @newtag:  The new tag name
                   1056:  *
                   1057:  * The HTmL DtD allows a tag to exists only implicitely
                   1058:  * called when a new tag has been detected and generates the
                   1059:  * appropriates implicit tags if missing
                   1060:  */
                   1061: void
                   1062: sgmlCheckImplied(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
                   1063: #if 0
1.7       veillard 1064:     if (xmlStrEqual(newtag, BAD_CAST"sgml"))
1.1       veillard 1065:        return;
                   1066:     if (ctxt->nameNr <= 0) {
                   1067: #ifdef DEBUG
1.10    ! veillard 1068:        xmlGenericError(xmlGenericErrorContext,"Implied element sgml: pushed sgml\n");
1.1       veillard 1069: #endif    
                   1070:        sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"sgml"));
                   1071:        if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1072:            ctxt->sax->startElement(ctxt->userData, BAD_CAST"sgml", NULL);
                   1073:     }
1.7       veillard 1074:     if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head")))
1.1       veillard 1075:         return;
                   1076:     if (ctxt->nameNr <= 1) {
1.7       veillard 1077:        if ((xmlStrEqual(newtag, BAD_CAST"script")) ||
                   1078:            (xmlStrEqual(newtag, BAD_CAST"style")) ||
                   1079:            (xmlStrEqual(newtag, BAD_CAST"meta")) ||
                   1080:            (xmlStrEqual(newtag, BAD_CAST"link")) ||
                   1081:            (xmlStrEqual(newtag, BAD_CAST"title")) ||
                   1082:            (xmlStrEqual(newtag, BAD_CAST"base"))) {
1.1       veillard 1083:            /* 
                   1084:             * dropped OBJECT ... i you put it first BODY will be
                   1085:             * assumed !
                   1086:             */
                   1087: #ifdef DEBUG
1.10    ! veillard 1088:            xmlGenericError(xmlGenericErrorContext,"Implied element head: pushed head\n");
1.1       veillard 1089: #endif    
                   1090:            sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"head"));
                   1091:            if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1092:                ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
                   1093:        } else {
                   1094: #ifdef DEBUG
1.10    ! veillard 1095:            xmlGenericError(xmlGenericErrorContext,"Implied element body: pushed body\n");
1.1       veillard 1096: #endif    
                   1097:            sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"body"));
                   1098:            if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1099:                ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
                   1100:        }
                   1101:     }
                   1102: #endif
                   1103: }
                   1104: 
                   1105: /**
                   1106:  * sgmlCheckParagraph
                   1107:  * @ctxt:  an SGML parser context
                   1108:  *
                   1109:  * Check whether a p element need to be implied before inserting
                   1110:  * characters in the current element.
                   1111:  *
                   1112:  * Returns 1 if a paragraph has been inserted, 0 if not and -1
                   1113:  *         in case of error.
                   1114:  */
                   1115: 
                   1116: int
                   1117: sgmlCheckParagraph(sgmlParserCtxtPtr ctxt) {
                   1118:     const xmlChar *tag;
                   1119:     int i;
                   1120: 
                   1121:     if (ctxt == NULL)
                   1122:        return(-1);
                   1123:     tag = ctxt->name;
                   1124:     if (tag == NULL) {
                   1125:        sgmlAutoClose(ctxt, BAD_CAST"p");
                   1126:        sgmlCheckImplied(ctxt, BAD_CAST"p");
                   1127:        sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
                   1128:        if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1129:            ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
                   1130:        return(1);
                   1131:     }
                   1132:     for (i = 0; sgmlNoContentElements[i] != NULL; i++) {
1.7       veillard 1133:        if (xmlStrEqual(tag, BAD_CAST sgmlNoContentElements[i])) {
1.1       veillard 1134: #ifdef DEBUG
1.10    ! veillard 1135:            xmlGenericError(xmlGenericErrorContext,"Implied element paragraph\n");
1.1       veillard 1136: #endif    
                   1137:            sgmlAutoClose(ctxt, BAD_CAST"p");
                   1138:            sgmlCheckImplied(ctxt, BAD_CAST"p");
                   1139:            sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
                   1140:            if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   1141:                ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
                   1142:            return(1);
                   1143:        }
                   1144:     }
                   1145:     return(0);
                   1146: }
                   1147: 
                   1148: /************************************************************************
                   1149:  *                                                                     *
                   1150:  *             The list of SGML predefined entities                    *
                   1151:  *                                                                     *
                   1152:  ************************************************************************/
                   1153: 
                   1154: 
                   1155: sgmlEntityDesc  docbookEntitiesTable[] = {
                   1156: /*
                   1157:  * the 4 absolute ones, plus apostrophe.
                   1158:  */
                   1159: { 0x0026, "amp", "AMPERSAND" },
                   1160: { 0x003C, "lt",        "LESS-THAN SIGN" },
                   1161: 
                   1162: /*
                   1163:  * Converted with VI macros from docbook ent files
                   1164:  */
                   1165: { 0x0021, "excl", "EXCLAMATION MARK" },
                   1166: { 0x0022, "quot", "QUOTATION MARK" },
                   1167: { 0x0023, "num", "NUMBER SIGN" },
                   1168: { 0x0024, "dollar", "DOLLAR SIGN" },
                   1169: { 0x0025, "percnt", "PERCENT SIGN" },
                   1170: { 0x0027, "apos", "APOSTROPHE" },
                   1171: { 0x0028, "lpar", "LEFT PARENTHESIS" },
                   1172: { 0x0029, "rpar", "RIGHT PARENTHESIS" },
                   1173: { 0x002A, "ast", "ASTERISK OPERATOR" },
                   1174: { 0x002B, "plus", "PLUS SIGN" },
                   1175: { 0x002C, "comma", "COMMA" },
                   1176: { 0x002D, "hyphen", "HYPHEN-MINUS" },
                   1177: { 0x002E, "period", "FULL STOP" },
                   1178: { 0x002F, "sol", "SOLIDUS" },
                   1179: { 0x003A, "colon", "COLON" },
                   1180: { 0x003B, "semi", "SEMICOLON" },
                   1181: { 0x003D, "equals", "EQUALS SIGN" },
                   1182: { 0x003E, "gt", "GREATER-THAN SIGN" },
                   1183: { 0x003F, "quest", "QUESTION MARK" },
                   1184: { 0x0040, "commat", "COMMERCIAL AT" },
                   1185: { 0x005B, "lsqb", "LEFT SQUARE BRACKET" },
                   1186: { 0x005C, "bsol", "REVERSE SOLIDUS" },
                   1187: { 0x005D, "rsqb", "RIGHT SQUARE BRACKET" },
                   1188: { 0x005E, "circ", "RING OPERATOR" },
                   1189: { 0x005F, "lowbar", "LOW LINE" },
                   1190: { 0x0060, "grave", "GRAVE ACCENT" },
                   1191: { 0x007B, "lcub", "LEFT CURLY BRACKET" },
                   1192: { 0x007C, "verbar", "VERTICAL LINE" },
                   1193: { 0x007D, "rcub", "RIGHT CURLY BRACKET" },
                   1194: { 0x00A0, "nbsp", "NO-BREAK SPACE" },
                   1195: { 0x00A1, "iexcl", "INVERTED EXCLAMATION MARK" },
                   1196: { 0x00A2, "cent", "CENT SIGN" },
                   1197: { 0x00A3, "pound", "POUND SIGN" },
                   1198: { 0x00A4, "curren", "CURRENCY SIGN" },
                   1199: { 0x00A5, "yen", "YEN SIGN" },
                   1200: { 0x00A6, "brvbar", "BROKEN BAR" },
                   1201: { 0x00A7, "sect", "SECTION SIGN" },
                   1202: { 0x00A8, "die", "" },
                   1203: { 0x00A8, "Dot", "" },
                   1204: { 0x00A8, "uml", "" },
                   1205: { 0x00A9, "copy", "COPYRIGHT SIGN" },
                   1206: { 0x00AA, "ordf", "FEMININE ORDINAL INDICATOR" },
                   1207: { 0x00AB, "laquo", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK" },
                   1208: { 0x00AC, "not", "NOT SIGN" },
                   1209: { 0x00AD, "shy", "SOFT HYPHEN" },
                   1210: { 0x00AE, "reg", "REG TRADE MARK SIGN" },
                   1211: { 0x00AF, "macr", "MACRON" },
                   1212: { 0x00B0, "deg", "DEGREE SIGN" },
                   1213: { 0x00B1, "plusmn", "PLUS-MINUS SIGN" },
                   1214: { 0x00B2, "sup2", "SUPERSCRIPT TWO" },
                   1215: { 0x00B3, "sup3", "SUPERSCRIPT THREE" },
                   1216: { 0x00B4, "acute", "ACUTE ACCENT" },
                   1217: { 0x00B5, "micro", "MICRO SIGN" },
                   1218: { 0x00B6, "para", "PILCROW SIGN" },
                   1219: { 0x00B7, "middot", "MIDDLE DOT" },
                   1220: { 0x00B8, "cedil", "CEDILLA" },
                   1221: { 0x00B9, "sup1", "SUPERSCRIPT ONE" },
                   1222: { 0x00BA, "ordm", "MASCULINE ORDINAL INDICATOR" },
                   1223: { 0x00BB, "raquo", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" },
                   1224: { 0x00BC, "frac14", "VULGAR FRACTION ONE QUARTER" },
                   1225: { 0x00BD, "frac12", "VULGAR FRACTION ONE HALF" },
                   1226: { 0x00BD, "half", "VULGAR FRACTION ONE HALF" },
                   1227: { 0x00BE, "frac34", "VULGAR FRACTION THREE QUARTERS" },
                   1228: { 0x00BF, "iquest", "INVERTED QUESTION MARK" },
                   1229: { 0x00C0, "Agrave", "LATIN CAPITAL LETTER A WITH GRAVE" },
                   1230: { 0x00C1, "Aacute", "LATIN CAPITAL LETTER A WITH ACUTE" },
                   1231: { 0x00C2, "Acirc", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX" },
                   1232: { 0x00C3, "Atilde", "LATIN CAPITAL LETTER A WITH TILDE" },
                   1233: { 0x00C4, "Auml", "LATIN CAPITAL LETTER A WITH DIAERESIS" },
                   1234: { 0x00C5, "Aring", "LATIN CAPITAL LETTER A WITH RING ABOVE" },
                   1235: { 0x00C6, "AElig", "LATIN CAPITAL LETTER AE" },
                   1236: { 0x00C7, "Ccedil", "LATIN CAPITAL LETTER C WITH CEDILLA" },
                   1237: { 0x00C8, "Egrave", "LATIN CAPITAL LETTER E WITH GRAVE" },
                   1238: { 0x00C9, "Eacute", "LATIN CAPITAL LETTER E WITH ACUTE" },
                   1239: { 0x00CA, "Ecirc", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX" },
                   1240: { 0x00CB, "Euml", "LATIN CAPITAL LETTER E WITH DIAERESIS" },
                   1241: { 0x00CC, "Igrave", "LATIN CAPITAL LETTER I WITH GRAVE" },
                   1242: { 0x00CD, "Iacute", "LATIN CAPITAL LETTER I WITH ACUTE" },
                   1243: { 0x00CE, "Icirc", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX" },
                   1244: { 0x00CF, "Iuml", "LATIN CAPITAL LETTER I WITH DIAERESIS" },
                   1245: { 0x00D0, "ETH", "LATIN CAPITAL LETTER ETH" },
                   1246: { 0x00D1, "Ntilde", "LATIN CAPITAL LETTER N WITH TILDE" },
                   1247: { 0x00D2, "Ograve", "LATIN CAPITAL LETTER O WITH GRAVE" },
                   1248: { 0x00D3, "Oacute", "LATIN CAPITAL LETTER O WITH ACUTE" },
                   1249: { 0x00D4, "Ocirc", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX" },
                   1250: { 0x00D5, "Otilde", "LATIN CAPITAL LETTER O WITH TILDE" },
                   1251: { 0x00D6, "Ouml", "LATIN CAPITAL LETTER O WITH DIAERESIS" },
                   1252: { 0x00D7, "times", "MULTIPLICATION SIGN" },
                   1253: { 0x00D8, "Oslash", "LATIN CAPITAL LETTER O WITH STROKE" },
                   1254: { 0x00D9, "Ugrave", "LATIN CAPITAL LETTER U WITH GRAVE" },
                   1255: { 0x00DA, "Uacute", "LATIN CAPITAL LETTER U WITH ACUTE" },
                   1256: { 0x00DB, "Ucirc", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX" },
                   1257: { 0x00DC, "Uuml", "LATIN CAPITAL LETTER U WITH DIAERESIS" },
                   1258: { 0x00DD, "Yacute", "LATIN CAPITAL LETTER Y WITH ACUTE" },
                   1259: { 0x00DE, "THORN", "LATIN CAPITAL LETTER THORN" },
                   1260: { 0x00DF, "szlig", "LATIN SMALL LETTER SHARP S" },
                   1261: { 0x00E0, "agrave", "LATIN SMALL LETTER A WITH GRAVE" },
                   1262: { 0x00E1, "aacute", "LATIN SMALL LETTER A WITH ACUTE" },
                   1263: { 0x00E2, "acirc", "LATIN SMALL LETTER A WITH CIRCUMFLEX" },
                   1264: { 0x00E3, "atilde", "LATIN SMALL LETTER A WITH TILDE" },
                   1265: { 0x00E4, "auml", "LATIN SMALL LETTER A WITH DIAERESIS" },
                   1266: { 0x00E5, "aring", "LATIN SMALL LETTER A WITH RING ABOVE" },
                   1267: { 0x00E6, "aelig", "LATIN SMALL LETTER AE" },
                   1268: { 0x00E7, "ccedil", "LATIN SMALL LETTER C WITH CEDILLA" },
                   1269: { 0x00E8, "egrave", "LATIN SMALL LETTER E WITH GRAVE" },
                   1270: { 0x00E9, "eacute", "LATIN SMALL LETTER E WITH ACUTE" },
                   1271: { 0x00EA, "ecirc", "LATIN SMALL LETTER E WITH CIRCUMFLEX" },
                   1272: { 0x00EB, "euml", "LATIN SMALL LETTER E WITH DIAERESIS" },
                   1273: { 0x00EC, "igrave", "LATIN SMALL LETTER I WITH GRAVE" },
                   1274: { 0x00ED, "iacute", "LATIN SMALL LETTER I WITH ACUTE" },
                   1275: { 0x00EE, "icirc", "LATIN SMALL LETTER I WITH CIRCUMFLEX" },
                   1276: { 0x00EF, "iuml", "LATIN SMALL LETTER I WITH DIAERESIS" },
                   1277: { 0x00F0, "eth", "LATIN SMALL LETTER ETH" },
                   1278: { 0x00F1, "ntilde", "LATIN SMALL LETTER N WITH TILDE" },
                   1279: { 0x00F2, "ograve", "LATIN SMALL LETTER O WITH GRAVE" },
                   1280: { 0x00F3, "oacute", "LATIN SMALL LETTER O WITH ACUTE" },
                   1281: { 0x00F4, "ocirc", "LATIN SMALL LETTER O WITH CIRCUMFLEX" },
                   1282: { 0x00F5, "otilde", "LATIN SMALL LETTER O WITH TILDE" },
                   1283: { 0x00F6, "ouml", "LATIN SMALL LETTER O WITH DIAERESIS" },
                   1284: { 0x00F7, "divide", "DIVISION SIGN" },
                   1285: { 0x00F8, "oslash", "CIRCLED DIVISION SLASH" },
                   1286: { 0x00F9, "ugrave", "LATIN SMALL LETTER U WITH GRAVE" },
                   1287: { 0x00FA, "uacute", "LATIN SMALL LETTER U WITH ACUTE" },
                   1288: { 0x00FB, "ucirc", "LATIN SMALL LETTER U WITH CIRCUMFLEX" },
                   1289: { 0x00FC, "uuml", "LATIN SMALL LETTER U WITH DIAERESIS" },
                   1290: { 0x00FD, "yacute", "LATIN SMALL LETTER Y WITH ACUTE" },
                   1291: { 0x00FE, "thorn", "LATIN SMALL LETTER THORN" },
                   1292: { 0x00FF, "yuml", "LATIN SMALL LETTER Y WITH DIAERESIS" },
                   1293: { 0x0100, "Amacr", "LATIN CAPITAL LETTER A WITH MACRON" },
                   1294: { 0x0101, "amacr", "LATIN SMALL LETTER A WITH MACRON" },
                   1295: { 0x0102, "Abreve", "LATIN CAPITAL LETTER A WITH BREVE" },
                   1296: { 0x0103, "abreve", "LATIN SMALL LETTER A WITH BREVE" },
                   1297: { 0x0104, "Aogon", "LATIN CAPITAL LETTER A WITH OGONEK" },
                   1298: { 0x0105, "aogon", "LATIN SMALL LETTER A WITH OGONEK" },
                   1299: { 0x0106, "Cacute", "LATIN CAPITAL LETTER C WITH ACUTE" },
                   1300: { 0x0107, "cacute", "LATIN SMALL LETTER C WITH ACUTE" },
                   1301: { 0x0108, "Ccirc", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX" },
                   1302: { 0x0109, "ccirc", "LATIN SMALL LETTER C WITH CIRCUMFLEX" },
                   1303: { 0x010A, "Cdot", "LATIN CAPITAL LETTER C WITH DOT ABOVE" },
                   1304: { 0x010B, "cdot", "DOT OPERATOR" },
                   1305: { 0x010C, "Ccaron", "LATIN CAPITAL LETTER C WITH CARON" },
                   1306: { 0x010D, "ccaron", "LATIN SMALL LETTER C WITH CARON" },
                   1307: { 0x010E, "Dcaron", "LATIN CAPITAL LETTER D WITH CARON" },
                   1308: { 0x010F, "dcaron", "LATIN SMALL LETTER D WITH CARON" },
                   1309: { 0x0110, "Dstrok", "LATIN CAPITAL LETTER D WITH STROKE" },
                   1310: { 0x0111, "dstrok", "LATIN SMALL LETTER D WITH STROKE" },
                   1311: { 0x0112, "Emacr", "LATIN CAPITAL LETTER E WITH MACRON" },
                   1312: { 0x0113, "emacr", "LATIN SMALL LETTER E WITH MACRON" },
                   1313: { 0x0116, "Edot", "LATIN CAPITAL LETTER E WITH DOT ABOVE" },
                   1314: { 0x0117, "edot", "LATIN SMALL LETTER E WITH DOT ABOVE" },
                   1315: { 0x0118, "Eogon", "LATIN CAPITAL LETTER E WITH OGONEK" },
                   1316: { 0x0119, "eogon", "LATIN SMALL LETTER E WITH OGONEK" },
                   1317: { 0x011A, "Ecaron", "LATIN CAPITAL LETTER E WITH CARON" },
                   1318: { 0x011B, "ecaron", "LATIN SMALL LETTER E WITH CARON" },
                   1319: { 0x011C, "Gcirc", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX" },
                   1320: { 0x011D, "gcirc", "LATIN SMALL LETTER G WITH CIRCUMFLEX" },
                   1321: { 0x011E, "Gbreve", "LATIN CAPITAL LETTER G WITH BREVE" },
                   1322: { 0x011F, "gbreve", "LATIN SMALL LETTER G WITH BREVE" },
                   1323: { 0x0120, "Gdot", "LATIN CAPITAL LETTER G WITH DOT ABOVE" },
                   1324: { 0x0121, "gdot", "LATIN SMALL LETTER G WITH DOT ABOVE" },
                   1325: { 0x0122, "Gcedil", "LATIN CAPITAL LETTER G WITH CEDILLA" },
                   1326: { 0x0124, "Hcirc", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX" },
                   1327: { 0x0125, "hcirc", "LATIN SMALL LETTER H WITH CIRCUMFLEX" },
                   1328: { 0x0126, "Hstrok", "LATIN CAPITAL LETTER H WITH STROKE" },
                   1329: { 0x0127, "hstrok", "LATIN SMALL LETTER H WITH STROKE" },
                   1330: { 0x0128, "Itilde", "LATIN CAPITAL LETTER I WITH TILDE" },
                   1331: { 0x0129, "itilde", "LATIN SMALL LETTER I WITH TILDE" },
                   1332: { 0x012A, "Imacr", "LATIN CAPITAL LETTER I WITH MACRON" },
                   1333: { 0x012B, "imacr", "LATIN SMALL LETTER I WITH MACRON" },
                   1334: { 0x012E, "Iogon", "LATIN CAPITAL LETTER I WITH OGONEK" },
                   1335: { 0x012F, "iogon", "LATIN SMALL LETTER I WITH OGONEK" },
                   1336: { 0x0130, "Idot", "LATIN CAPITAL LETTER I WITH DOT ABOVE" },
                   1337: { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
                   1338: { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
                   1339: { 0x0132, "IJlig", "LATIN CAPITAL LIGATURE IJ" },
                   1340: { 0x0133, "ijlig", "LATIN SMALL LIGATURE IJ" },
                   1341: { 0x0134, "Jcirc", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX" },
                   1342: { 0x0135, "jcirc", "LATIN SMALL LETTER J WITH CIRCUMFLEX" },
                   1343: { 0x0136, "Kcedil", "LATIN CAPITAL LETTER K WITH CEDILLA" },
                   1344: { 0x0137, "kcedil", "LATIN SMALL LETTER K WITH CEDILLA" },
                   1345: { 0x0138, "kgreen", "LATIN SMALL LETTER KRA" },
                   1346: { 0x0139, "Lacute", "LATIN CAPITAL LETTER L WITH ACUTE" },
                   1347: { 0x013A, "lacute", "LATIN SMALL LETTER L WITH ACUTE" },
                   1348: { 0x013B, "Lcedil", "LATIN CAPITAL LETTER L WITH CEDILLA" },
                   1349: { 0x013C, "lcedil", "LATIN SMALL LETTER L WITH CEDILLA" },
                   1350: { 0x013D, "Lcaron", "LATIN CAPITAL LETTER L WITH CARON" },
                   1351: { 0x013E, "lcaron", "LATIN SMALL LETTER L WITH CARON" },
                   1352: { 0x013F, "Lmidot", "LATIN CAPITAL LETTER L WITH MIDDLE DOT" },
                   1353: { 0x0140, "lmidot", "LATIN SMALL LETTER L WITH MIDDLE DOT" },
                   1354: { 0x0141, "Lstrok", "LATIN CAPITAL LETTER L WITH STROKE" },
                   1355: { 0x0142, "lstrok", "LATIN SMALL LETTER L WITH STROKE" },
                   1356: { 0x0143, "Nacute", "LATIN CAPITAL LETTER N WITH ACUTE" },
                   1357: { 0x0144, "nacute", "LATIN SMALL LETTER N WITH ACUTE" },
                   1358: { 0x0145, "Ncedil", "LATIN CAPITAL LETTER N WITH CEDILLA" },
                   1359: { 0x0146, "ncedil", "LATIN SMALL LETTER N WITH CEDILLA" },
                   1360: { 0x0147, "Ncaron", "LATIN CAPITAL LETTER N WITH CARON" },
                   1361: { 0x0148, "ncaron", "LATIN SMALL LETTER N WITH CARON" },
                   1362: { 0x0149, "napos", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE" },
                   1363: { 0x014A, "ENG", "LATIN CAPITAL LETTER ENG" },
                   1364: { 0x014B, "eng", "LATIN SMALL LETTER ENG" },
                   1365: { 0x014C, "Omacr", "LATIN CAPITAL LETTER O WITH MACRON" },
                   1366: { 0x014D, "omacr", "LATIN SMALL LETTER O WITH MACRON" },
                   1367: { 0x0150, "Odblac", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE" },
                   1368: { 0x0151, "odblac", "LATIN SMALL LETTER O WITH DOUBLE ACUTE" },
                   1369: { 0x0152, "OElig", "LATIN CAPITAL LIGATURE OE" },
                   1370: { 0x0153, "oelig", "LATIN SMALL LIGATURE OE" },
                   1371: { 0x0154, "Racute", "LATIN CAPITAL LETTER R WITH ACUTE" },
                   1372: { 0x0155, "racute", "LATIN SMALL LETTER R WITH ACUTE" },
                   1373: { 0x0156, "Rcedil", "LATIN CAPITAL LETTER R WITH CEDILLA" },
                   1374: { 0x0157, "rcedil", "LATIN SMALL LETTER R WITH CEDILLA" },
                   1375: { 0x0158, "Rcaron", "LATIN CAPITAL LETTER R WITH CARON" },
                   1376: { 0x0159, "rcaron", "LATIN SMALL LETTER R WITH CARON" },
                   1377: { 0x015A, "Sacute", "LATIN CAPITAL LETTER S WITH ACUTE" },
                   1378: { 0x015B, "sacute", "LATIN SMALL LETTER S WITH ACUTE" },
                   1379: { 0x015C, "Scirc", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX" },
                   1380: { 0x015D, "scirc", "LATIN SMALL LETTER S WITH CIRCUMFLEX" },
                   1381: { 0x015E, "Scedil", "LATIN CAPITAL LETTER S WITH CEDILLA" },
                   1382: { 0x015F, "scedil", "LATIN SMALL LETTER S WITH CEDILLA" },
                   1383: { 0x0160, "Scaron", "LATIN CAPITAL LETTER S WITH CARON" },
                   1384: { 0x0161, "scaron", "LATIN SMALL LETTER S WITH CARON" },
                   1385: { 0x0162, "Tcedil", "LATIN CAPITAL LETTER T WITH CEDILLA" },
                   1386: { 0x0163, "tcedil", "LATIN SMALL LETTER T WITH CEDILLA" },
                   1387: { 0x0164, "Tcaron", "LATIN CAPITAL LETTER T WITH CARON" },
                   1388: { 0x0165, "tcaron", "LATIN SMALL LETTER T WITH CARON" },
                   1389: { 0x0166, "Tstrok", "LATIN CAPITAL LETTER T WITH STROKE" },
                   1390: { 0x0167, "tstrok", "LATIN SMALL LETTER T WITH STROKE" },
                   1391: { 0x0168, "Utilde", "LATIN CAPITAL LETTER U WITH TILDE" },
                   1392: { 0x0169, "utilde", "LATIN SMALL LETTER U WITH TILDE" },
                   1393: { 0x016A, "Umacr", "LATIN CAPITAL LETTER U WITH MACRON" },
                   1394: { 0x016B, "umacr", "LATIN SMALL LETTER U WITH MACRON" },
                   1395: { 0x016C, "Ubreve", "LATIN CAPITAL LETTER U WITH BREVE" },
                   1396: { 0x016D, "ubreve", "LATIN SMALL LETTER U WITH BREVE" },
                   1397: { 0x016E, "Uring", "LATIN CAPITAL LETTER U WITH RING ABOVE" },
                   1398: { 0x016F, "uring", "LATIN SMALL LETTER U WITH RING ABOVE" },
                   1399: { 0x0170, "Udblac", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE" },
                   1400: { 0x0171, "udblac", "LATIN SMALL LETTER U WITH DOUBLE ACUTE" },
                   1401: { 0x0172, "Uogon", "LATIN CAPITAL LETTER U WITH OGONEK" },
                   1402: { 0x0173, "uogon", "LATIN SMALL LETTER U WITH OGONEK" },
                   1403: { 0x0174, "Wcirc", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX" },
                   1404: { 0x0175, "wcirc", "LATIN SMALL LETTER W WITH CIRCUMFLEX" },
                   1405: { 0x0176, "Ycirc", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX" },
                   1406: { 0x0177, "ycirc", "LATIN SMALL LETTER Y WITH CIRCUMFLEX" },
                   1407: { 0x0178, "Yuml", "LATIN CAPITAL LETTER Y WITH DIAERESIS" },
                   1408: { 0x0179, "Zacute", "LATIN CAPITAL LETTER Z WITH ACUTE" },
                   1409: { 0x017A, "zacute", "LATIN SMALL LETTER Z WITH ACUTE" },
                   1410: { 0x017B, "Zdot", "LATIN CAPITAL LETTER Z WITH DOT ABOVE" },
                   1411: { 0x017C, "zdot", "LATIN SMALL LETTER Z WITH DOT ABOVE" },
                   1412: { 0x017D, "Zcaron", "LATIN CAPITAL LETTER Z WITH CARON" },
                   1413: { 0x017E, "zcaron", "LATIN SMALL LETTER Z WITH CARON" },
                   1414: { 0x0192, "fnof", "LATIN SMALL LETTER F WITH HOOK" },
                   1415: { 0x01F5, "gacute", "LATIN SMALL LETTER G WITH ACUTE" },
                   1416: { 0x02C7, "caron", "CARON" },
                   1417: { 0x02D8, "breve", "BREVE" },
                   1418: { 0x02D9, "dot", "DOT ABOVE" },
                   1419: { 0x02DA, "ring", "RING ABOVE" },
                   1420: { 0x02DB, "ogon", "OGONEK" },
                   1421: { 0x02DC, "tilde", "TILDE" },
                   1422: { 0x02DD, "dblac", "DOUBLE ACUTE ACCENT" },
                   1423: { 0x0386, "Aacgr", "GREEK CAPITAL LETTER ALPHA WITH TONOS" },
                   1424: { 0x0388, "Eacgr", "GREEK CAPITAL LETTER EPSILON WITH TONOS" },
                   1425: { 0x0389, "EEacgr", "GREEK CAPITAL LETTER ETA WITH TONOS" },
                   1426: { 0x038A, "Iacgr", "GREEK CAPITAL LETTER IOTA WITH TONOS" },
                   1427: { 0x038C, "Oacgr", "GREEK CAPITAL LETTER OMICRON WITH TONOS" },
                   1428: { 0x038E, "Uacgr", "GREEK CAPITAL LETTER UPSILON WITH TONOS" },
                   1429: { 0x038F, "OHacgr", "GREEK CAPITAL LETTER OMEGA WITH TONOS" },
                   1430: { 0x0390, "idiagr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS" },
                   1431: { 0x0391, "Agr", "GREEK CAPITAL LETTER ALPHA" },
                   1432: { 0x0392, "Bgr", "GREEK CAPITAL LETTER BETA" },
                   1433: { 0x0393, "b.Gamma", "GREEK CAPITAL LETTER GAMMA" },
                   1434: { 0x0393, "Gamma", "GREEK CAPITAL LETTER GAMMA" },
                   1435: { 0x0393, "Ggr", "GREEK CAPITAL LETTER GAMMA" },
                   1436: { 0x0394, "b.Delta", "GREEK CAPITAL LETTER DELTA" },
                   1437: { 0x0394, "Delta", "GREEK CAPITAL LETTER DELTA" },
                   1438: { 0x0394, "Dgr", "GREEK CAPITAL LETTER DELTA" },
                   1439: { 0x0395, "Egr", "GREEK CAPITAL LETTER EPSILON" },
                   1440: { 0x0396, "Zgr", "GREEK CAPITAL LETTER ZETA" },
                   1441: { 0x0397, "EEgr", "GREEK CAPITAL LETTER ETA" },
                   1442: { 0x0398, "b.Theta", "GREEK CAPITAL LETTER THETA" },
                   1443: { 0x0398, "Theta", "GREEK CAPITAL LETTER THETA" },
                   1444: { 0x0398, "THgr", "GREEK CAPITAL LETTER THETA" },
                   1445: { 0x0399, "Igr", "GREEK CAPITAL LETTER IOTA" },
                   1446: { 0x039A, "Kgr", "GREEK CAPITAL LETTER KAPPA" },
                   1447: { 0x039B, "b.Lambda", "GREEK CAPITAL LETTER LAMDA" },
                   1448: { 0x039B, "Lambda", "GREEK CAPITAL LETTER LAMDA" },
                   1449: { 0x039B, "Lgr", "GREEK CAPITAL LETTER LAMDA" },
                   1450: { 0x039C, "Mgr", "GREEK CAPITAL LETTER MU" },
                   1451: { 0x039D, "Ngr", "GREEK CAPITAL LETTER NU" },
                   1452: { 0x039E, "b.Xi", "GREEK CAPITAL LETTER XI" },
                   1453: { 0x039E, "Xgr", "GREEK CAPITAL LETTER XI" },
                   1454: { 0x039E, "Xi", "GREEK CAPITAL LETTER XI" },
                   1455: { 0x039F, "Ogr", "GREEK CAPITAL LETTER OMICRON" },
                   1456: { 0x03A0, "b.Pi", "GREEK CAPITAL LETTER PI" },
                   1457: { 0x03A0, "Pgr", "GREEK CAPITAL LETTER PI" },
                   1458: { 0x03A0, "Pi", "GREEK CAPITAL LETTER PI" },
                   1459: { 0x03A1, "Rgr", "GREEK CAPITAL LETTER RHO" },
                   1460: { 0x03A3, "b.Sigma", "GREEK CAPITAL LETTER SIGMA" },
                   1461: { 0x03A3, "Sgr", "GREEK CAPITAL LETTER SIGMA" },
                   1462: { 0x03A3, "Sigma", "GREEK CAPITAL LETTER SIGMA" },
                   1463: { 0x03A4, "Tgr", "GREEK CAPITAL LETTER TAU" },
                   1464: { 0x03A5, "Ugr", "" },
                   1465: { 0x03A6, "b.Phi", "GREEK CAPITAL LETTER PHI" },
                   1466: { 0x03A6, "PHgr", "GREEK CAPITAL LETTER PHI" },
                   1467: { 0x03A6, "Phi", "GREEK CAPITAL LETTER PHI" },
                   1468: { 0x03A7, "KHgr", "GREEK CAPITAL LETTER CHI" },
                   1469: { 0x03A8, "b.Psi", "GREEK CAPITAL LETTER PSI" },
                   1470: { 0x03A8, "PSgr", "GREEK CAPITAL LETTER PSI" },
                   1471: { 0x03A8, "Psi", "GREEK CAPITAL LETTER PSI" },
                   1472: { 0x03A9, "b.Omega", "GREEK CAPITAL LETTER OMEGA" },
                   1473: { 0x03A9, "OHgr", "GREEK CAPITAL LETTER OMEGA" },
                   1474: { 0x03A9, "Omega", "GREEK CAPITAL LETTER OMEGA" },
                   1475: { 0x03AA, "Idigr", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA" },
                   1476: { 0x03AB, "Udigr", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA" },
                   1477: { 0x03AC, "aacgr", "GREEK SMALL LETTER ALPHA WITH TONOS" },
                   1478: { 0x03AD, "eacgr", "GREEK SMALL LETTER EPSILON WITH TONOS" },
                   1479: { 0x03AE, "eeacgr", "GREEK SMALL LETTER ETA WITH TONOS" },
                   1480: { 0x03AF, "iacgr", "GREEK SMALL LETTER IOTA WITH TONOS" },
                   1481: { 0x03B0, "udiagr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS" },
                   1482: { 0x03B1, "agr", "" },
                   1483: { 0x03B1, "alpha", "" },
                   1484: { 0x03B1, "b.alpha", "" },
                   1485: { 0x03B2, "b.beta", "GREEK SMALL LETTER BETA" },
                   1486: { 0x03B2, "beta", "GREEK SMALL LETTER BETA" },
                   1487: { 0x03B2, "bgr", "GREEK SMALL LETTER BETA" },
                   1488: { 0x03B3, "b.gamma", "GREEK SMALL LETTER GAMMA" },
                   1489: { 0x03B3, "gamma", "GREEK SMALL LETTER GAMMA" },
                   1490: { 0x03B3, "ggr", "GREEK SMALL LETTER GAMMA" },
                   1491: { 0x03B4, "b.delta", "GREEK SMALL LETTER DELTA" },
                   1492: { 0x03B4, "delta", "GREEK SMALL LETTER DELTA" },
                   1493: { 0x03B4, "dgr", "GREEK SMALL LETTER DELTA" },
                   1494: { 0x03B5, "b.epsi", "" },
                   1495: { 0x03B5, "b.epsis", "" },
                   1496: { 0x03B5, "b.epsiv", "" },
                   1497: { 0x03B5, "egr", "" },
                   1498: { 0x03B5, "epsiv", "" },
                   1499: { 0x03B6, "b.zeta", "GREEK SMALL LETTER ZETA" },
                   1500: { 0x03B6, "zeta", "GREEK SMALL LETTER ZETA" },
                   1501: { 0x03B6, "zgr", "GREEK SMALL LETTER ZETA" },
                   1502: { 0x03B7, "b.eta", "GREEK SMALL LETTER ETA" },
                   1503: { 0x03B7, "eegr", "GREEK SMALL LETTER ETA" },
                   1504: { 0x03B7, "eta", "GREEK SMALL LETTER ETA" },
                   1505: { 0x03B8, "b.thetas", "" },
                   1506: { 0x03B8, "thetas", "" },
                   1507: { 0x03B8, "thgr", "" },
                   1508: { 0x03B9, "b.iota", "GREEK SMALL LETTER IOTA" },
                   1509: { 0x03B9, "igr", "GREEK SMALL LETTER IOTA" },
                   1510: { 0x03B9, "iota", "GREEK SMALL LETTER IOTA" },
                   1511: { 0x03BA, "b.kappa", "GREEK SMALL LETTER KAPPA" },
                   1512: { 0x03BA, "kappa", "GREEK SMALL LETTER KAPPA" },
                   1513: { 0x03BA, "kgr", "GREEK SMALL LETTER KAPPA" },
                   1514: { 0x03BB, "b.lambda", "GREEK SMALL LETTER LAMDA" },
                   1515: { 0x03BB, "lambda", "GREEK SMALL LETTER LAMDA" },
                   1516: { 0x03BB, "lgr", "GREEK SMALL LETTER LAMDA" },
                   1517: { 0x03BC, "b.mu", "GREEK SMALL LETTER MU" },
                   1518: { 0x03BC, "mgr", "GREEK SMALL LETTER MU" },
                   1519: { 0x03BC, "mu", "GREEK SMALL LETTER MU" },
                   1520: { 0x03BD, "b.nu", "GREEK SMALL LETTER NU" },
                   1521: { 0x03BD, "ngr", "GREEK SMALL LETTER NU" },
                   1522: { 0x03BD, "nu", "GREEK SMALL LETTER NU" },
                   1523: { 0x03BE, "b.xi", "GREEK SMALL LETTER XI" },
                   1524: { 0x03BE, "xgr", "GREEK SMALL LETTER XI" },
                   1525: { 0x03BE, "xi", "GREEK SMALL LETTER XI" },
                   1526: { 0x03BF, "ogr", "GREEK SMALL LETTER OMICRON" },
                   1527: { 0x03C0, "b.pi", "GREEK SMALL LETTER PI" },
                   1528: { 0x03C0, "pgr", "GREEK SMALL LETTER PI" },
                   1529: { 0x03C0, "pi", "GREEK SMALL LETTER PI" },
                   1530: { 0x03C1, "b.rho", "GREEK SMALL LETTER RHO" },
                   1531: { 0x03C1, "rgr", "GREEK SMALL LETTER RHO" },
                   1532: { 0x03C1, "rho", "GREEK SMALL LETTER RHO" },
                   1533: { 0x03C2, "b.sigmav", "" },
                   1534: { 0x03C2, "sfgr", "" },
                   1535: { 0x03C2, "sigmav", "" },
                   1536: { 0x03C3, "b.sigma", "GREEK SMALL LETTER SIGMA" },
                   1537: { 0x03C3, "sgr", "GREEK SMALL LETTER SIGMA" },
                   1538: { 0x03C3, "sigma", "GREEK SMALL LETTER SIGMA" },
                   1539: { 0x03C4, "b.tau", "GREEK SMALL LETTER TAU" },
                   1540: { 0x03C4, "tau", "GREEK SMALL LETTER TAU" },
                   1541: { 0x03C4, "tgr", "GREEK SMALL LETTER TAU" },
                   1542: { 0x03C5, "b.upsi", "GREEK SMALL LETTER UPSILON" },
                   1543: { 0x03C5, "ugr", "GREEK SMALL LETTER UPSILON" },
                   1544: { 0x03C5, "upsi", "GREEK SMALL LETTER UPSILON" },
                   1545: { 0x03C6, "b.phis", "GREEK SMALL LETTER PHI" },
                   1546: { 0x03C6, "phgr", "GREEK SMALL LETTER PHI" },
                   1547: { 0x03C6, "phis", "GREEK SMALL LETTER PHI" },
                   1548: { 0x03C7, "b.chi", "GREEK SMALL LETTER CHI" },
                   1549: { 0x03C7, "chi", "GREEK SMALL LETTER CHI" },
                   1550: { 0x03C7, "khgr", "GREEK SMALL LETTER CHI" },
                   1551: { 0x03C8, "b.psi", "GREEK SMALL LETTER PSI" },
                   1552: { 0x03C8, "psgr", "GREEK SMALL LETTER PSI" },
                   1553: { 0x03C8, "psi", "GREEK SMALL LETTER PSI" },
                   1554: { 0x03C9, "b.omega", "GREEK SMALL LETTER OMEGA" },
                   1555: { 0x03C9, "ohgr", "GREEK SMALL LETTER OMEGA" },
                   1556: { 0x03C9, "omega", "GREEK SMALL LETTER OMEGA" },
                   1557: { 0x03CA, "idigr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA" },
                   1558: { 0x03CB, "udigr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA" },
                   1559: { 0x03CC, "oacgr", "GREEK SMALL LETTER OMICRON WITH TONOS" },
                   1560: { 0x03CD, "uacgr", "GREEK SMALL LETTER UPSILON WITH TONOS" },
                   1561: { 0x03CE, "ohacgr", "GREEK SMALL LETTER OMEGA WITH TONOS" },
                   1562: { 0x03D1, "b.thetav", "" },
                   1563: { 0x03D1, "thetav", "" },
                   1564: { 0x03D2, "b.Upsi", "" },
                   1565: { 0x03D2, "Upsi", "" },
                   1566: { 0x03D5, "b.phiv", "GREEK PHI SYMBOL" },
                   1567: { 0x03D5, "phiv", "GREEK PHI SYMBOL" },
                   1568: { 0x03D6, "b.piv", "GREEK PI SYMBOL" },
                   1569: { 0x03D6, "piv", "GREEK PI SYMBOL" },
                   1570: { 0x03DC, "b.gammad", "GREEK LETTER DIGAMMA" },
                   1571: { 0x03DC, "gammad", "GREEK LETTER DIGAMMA" },
                   1572: { 0x03F0, "b.kappav", "GREEK KAPPA SYMBOL" },
                   1573: { 0x03F0, "kappav", "GREEK KAPPA SYMBOL" },
                   1574: { 0x03F1, "b.rhov", "GREEK RHO SYMBOL" },
                   1575: { 0x03F1, "rhov", "GREEK RHO SYMBOL" },
                   1576: { 0x0401, "IOcy", "CYRILLIC CAPITAL LETTER IO" },
                   1577: { 0x0402, "DJcy", "CYRILLIC CAPITAL LETTER DJE" },
                   1578: { 0x0403, "GJcy", "CYRILLIC CAPITAL LETTER GJE" },
                   1579: { 0x0404, "Jukcy", "CYRILLIC CAPITAL LETTER UKRAINIAN IE" },
                   1580: { 0x0405, "DScy", "CYRILLIC CAPITAL LETTER DZE" },
                   1581: { 0x0406, "Iukcy", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I" },
                   1582: { 0x0407, "YIcy", "CYRILLIC CAPITAL LETTER YI" },
                   1583: { 0x0408, "Jsercy", "CYRILLIC CAPITAL LETTER JE" },
                   1584: { 0x0409, "LJcy", "CYRILLIC CAPITAL LETTER LJE" },
                   1585: { 0x040A, "NJcy", "CYRILLIC CAPITAL LETTER NJE" },
                   1586: { 0x040B, "TSHcy", "CYRILLIC CAPITAL LETTER TSHE" },
                   1587: { 0x040C, "KJcy", "CYRILLIC CAPITAL LETTER KJE" },
                   1588: { 0x040E, "Ubrcy", "CYRILLIC CAPITAL LETTER SHORT U" },
                   1589: { 0x040F, "DZcy", "CYRILLIC CAPITAL LETTER DZHE" },
                   1590: { 0x0410, "Acy", "CYRILLIC CAPITAL LETTER A" },
                   1591: { 0x0411, "Bcy", "CYRILLIC CAPITAL LETTER BE" },
                   1592: { 0x0412, "Vcy", "CYRILLIC CAPITAL LETTER VE" },
                   1593: { 0x0413, "Gcy", "CYRILLIC CAPITAL LETTER GHE" },
                   1594: { 0x0414, "Dcy", "CYRILLIC CAPITAL LETTER DE" },
                   1595: { 0x0415, "IEcy", "CYRILLIC CAPITAL LETTER IE" },
                   1596: { 0x0416, "ZHcy", "CYRILLIC CAPITAL LETTER ZHE" },
                   1597: { 0x0417, "Zcy", "CYRILLIC CAPITAL LETTER ZE" },
                   1598: { 0x0418, "Icy", "CYRILLIC CAPITAL LETTER I" },
                   1599: { 0x0419, "Jcy", "CYRILLIC CAPITAL LETTER SHORT I" },
                   1600: { 0x041A, "Kcy", "CYRILLIC CAPITAL LETTER KA" },
                   1601: { 0x041B, "Lcy", "CYRILLIC CAPITAL LETTER EL" },
                   1602: { 0x041C, "Mcy", "CYRILLIC CAPITAL LETTER EM" },
                   1603: { 0x041D, "Ncy", "CYRILLIC CAPITAL LETTER EN" },
                   1604: { 0x041E, "Ocy", "CYRILLIC CAPITAL LETTER O" },
                   1605: { 0x041F, "Pcy", "CYRILLIC CAPITAL LETTER PE" },
                   1606: { 0x0420, "Rcy", "CYRILLIC CAPITAL LETTER ER" },
                   1607: { 0x0421, "Scy", "CYRILLIC CAPITAL LETTER ES" },
                   1608: { 0x0422, "Tcy", "CYRILLIC CAPITAL LETTER TE" },
                   1609: { 0x0423, "Ucy", "CYRILLIC CAPITAL LETTER U" },
                   1610: { 0x0424, "Fcy", "CYRILLIC CAPITAL LETTER EF" },
                   1611: { 0x0425, "KHcy", "CYRILLIC CAPITAL LETTER HA" },
                   1612: { 0x0426, "TScy", "CYRILLIC CAPITAL LETTER TSE" },
                   1613: { 0x0427, "CHcy", "CYRILLIC CAPITAL LETTER CHE" },
                   1614: { 0x0428, "SHcy", "CYRILLIC CAPITAL LETTER SHA" },
                   1615: { 0x0429, "SHCHcy", "CYRILLIC CAPITAL LETTER SHCHA" },
                   1616: { 0x042A, "HARDcy", "CYRILLIC CAPITAL LETTER HARD SIGN" },
                   1617: { 0x042B, "Ycy", "CYRILLIC CAPITAL LETTER YERU" },
                   1618: { 0x042C, "SOFTcy", "CYRILLIC CAPITAL LETTER SOFT SIGN" },
                   1619: { 0x042D, "Ecy", "CYRILLIC CAPITAL LETTER E" },
                   1620: { 0x042E, "YUcy", "CYRILLIC CAPITAL LETTER YU" },
                   1621: { 0x042F, "YAcy", "CYRILLIC CAPITAL LETTER YA" },
                   1622: { 0x0430, "acy", "CYRILLIC SMALL LETTER A" },
                   1623: { 0x0431, "bcy", "CYRILLIC SMALL LETTER BE" },
                   1624: { 0x0432, "vcy", "CYRILLIC SMALL LETTER VE" },
                   1625: { 0x0433, "gcy", "CYRILLIC SMALL LETTER GHE" },
                   1626: { 0x0434, "dcy", "CYRILLIC SMALL LETTER DE" },
                   1627: { 0x0435, "iecy", "CYRILLIC SMALL LETTER IE" },
                   1628: { 0x0436, "zhcy", "CYRILLIC SMALL LETTER ZHE" },
                   1629: { 0x0437, "zcy", "CYRILLIC SMALL LETTER ZE" },
                   1630: { 0x0438, "icy", "CYRILLIC SMALL LETTER I" },
                   1631: { 0x0439, "jcy", "CYRILLIC SMALL LETTER SHORT I" },
                   1632: { 0x043A, "kcy", "CYRILLIC SMALL LETTER KA" },
                   1633: { 0x043B, "lcy", "CYRILLIC SMALL LETTER EL" },
                   1634: { 0x043C, "mcy", "CYRILLIC SMALL LETTER EM" },
                   1635: { 0x043D, "ncy", "CYRILLIC SMALL LETTER EN" },
                   1636: { 0x043E, "ocy", "CYRILLIC SMALL LETTER O" },
                   1637: { 0x043F, "pcy", "CYRILLIC SMALL LETTER PE" },
                   1638: { 0x0440, "rcy", "CYRILLIC SMALL LETTER ER" },
                   1639: { 0x0441, "scy", "CYRILLIC SMALL LETTER ES" },
                   1640: { 0x0442, "tcy", "CYRILLIC SMALL LETTER TE" },
                   1641: { 0x0443, "ucy", "CYRILLIC SMALL LETTER U" },
                   1642: { 0x0444, "fcy", "CYRILLIC SMALL LETTER EF" },
                   1643: { 0x0445, "khcy", "CYRILLIC SMALL LETTER HA" },
                   1644: { 0x0446, "tscy", "CYRILLIC SMALL LETTER TSE" },
                   1645: { 0x0447, "chcy", "CYRILLIC SMALL LETTER CHE" },
                   1646: { 0x0448, "shcy", "CYRILLIC SMALL LETTER SHA" },
                   1647: { 0x0449, "shchcy", "CYRILLIC SMALL LETTER SHCHA" },
                   1648: { 0x044A, "hardcy", "CYRILLIC SMALL LETTER HARD SIGN" },
                   1649: { 0x044B, "ycy", "CYRILLIC SMALL LETTER YERU" },
                   1650: { 0x044C, "softcy", "CYRILLIC SMALL LETTER SOFT SIGN" },
                   1651: { 0x044D, "ecy", "CYRILLIC SMALL LETTER E" },
                   1652: { 0x044E, "yucy", "CYRILLIC SMALL LETTER YU" },
                   1653: { 0x044F, "yacy", "CYRILLIC SMALL LETTER YA" },
                   1654: { 0x0451, "iocy", "CYRILLIC SMALL LETTER IO" },
                   1655: { 0x0452, "djcy", "CYRILLIC SMALL LETTER DJE" },
                   1656: { 0x0453, "gjcy", "CYRILLIC SMALL LETTER GJE" },
                   1657: { 0x0454, "jukcy", "CYRILLIC SMALL LETTER UKRAINIAN IE" },
                   1658: { 0x0455, "dscy", "CYRILLIC SMALL LETTER DZE" },
                   1659: { 0x0456, "iukcy", "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I" },
                   1660: { 0x0457, "yicy", "CYRILLIC SMALL LETTER YI" },
                   1661: { 0x0458, "jsercy", "CYRILLIC SMALL LETTER JE" },
                   1662: { 0x0459, "ljcy", "CYRILLIC SMALL LETTER LJE" },
                   1663: { 0x045A, "njcy", "CYRILLIC SMALL LETTER NJE" },
                   1664: { 0x045B, "tshcy", "CYRILLIC SMALL LETTER TSHE" },
                   1665: { 0x045C, "kjcy", "CYRILLIC SMALL LETTER KJE" },
                   1666: { 0x045E, "ubrcy", "CYRILLIC SMALL LETTER SHORT U" },
                   1667: { 0x045F, "dzcy", "CYRILLIC SMALL LETTER DZHE" },
                   1668: { 0x2002, "ensp", "EN SPACE" },
                   1669: { 0x2003, "emsp", "EM SPACE" },
                   1670: { 0x2004, "emsp13", "THREE-PER-EM SPACE" },
                   1671: { 0x2005, "emsp14", "FOUR-PER-EM SPACE" },
                   1672: { 0x2007, "numsp", "FIGURE SPACE" },
                   1673: { 0x2008, "puncsp", "PUNCTUATION SPACE" },
                   1674: { 0x2009, "thinsp", "THIN SPACE" },
                   1675: { 0x200A, "hairsp", "HAIR SPACE" },
                   1676: { 0x2010, "dash", "HYPHEN" },
                   1677: { 0x2013, "ndash", "EN DASH" },
                   1678: { 0x2014, "mdash", "EM DASH" },
                   1679: { 0x2015, "horbar", "HORIZONTAL BAR" },
                   1680: { 0x2016, "Verbar", "DOUBLE VERTICAL LINE" },
                   1681: { 0x2018, "lsquo", "" },
                   1682: { 0x2018, "rsquor", "" },
                   1683: { 0x2019, "rsquo", "RIGHT SINGLE QUOTATION MARK" },
                   1684: { 0x201A, "lsquor", "SINGLE LOW-9 QUOTATION MARK" },
                   1685: { 0x201C, "ldquo", "" },
                   1686: { 0x201C, "rdquor", "" },
                   1687: { 0x201D, "rdquo", "RIGHT DOUBLE QUOTATION MARK" },
                   1688: { 0x201E, "ldquor", "DOUBLE LOW-9 QUOTATION MARK" },
                   1689: { 0x2020, "dagger", "DAGGER" },
                   1690: { 0x2021, "Dagger", "DOUBLE DAGGER" },
                   1691: { 0x2022, "bull", "BULLET" },
                   1692: { 0x2025, "nldr", "TWO DOT LEADER" },
                   1693: { 0x2026, "hellip", "HORIZONTAL ELLIPSIS" },
                   1694: { 0x2026, "mldr", "HORIZONTAL ELLIPSIS" },
                   1695: { 0x2030, "permil", "PER MILLE SIGN" },
                   1696: { 0x2032, "prime", "PRIME" },
                   1697: { 0x2032, "vprime", "PRIME" },
                   1698: { 0x2033, "Prime", "DOUBLE PRIME" },
                   1699: { 0x2034, "tprime", "TRIPLE PRIME" },
                   1700: { 0x2035, "bprime", "REVERSED PRIME" },
                   1701: { 0x2041, "caret", "CARET" },
                   1702: { 0x2043, "hybull", "HYPHEN BULLET" },
                   1703: { 0x20DB, "tdot", "COMBINING THREE DOTS ABOVE" },
                   1704: { 0x20DC, "DotDot", "COMBINING FOUR DOTS ABOVE" },
                   1705: { 0x2105, "incare", "CARE OF" },
                   1706: { 0x210B, "hamilt", "SCRIPT CAPITAL H" },
                   1707: { 0x210F, "planck", "PLANCK CONSTANT OVER TWO PI" },
                   1708: { 0x2111, "image", "BLACK-LETTER CAPITAL I" },
                   1709: { 0x2112, "lagran", "SCRIPT CAPITAL L" },
                   1710: { 0x2113, "ell", "SCRIPT SMALL L" },
                   1711: { 0x2116, "numero", "NUMERO SIGN" },
                   1712: { 0x2117, "copysr", "SOUND RECORDING COPYRIGHT" },
                   1713: { 0x2118, "weierp", "SCRIPT CAPITAL P" },
                   1714: { 0x211C, "real", "BLACK-LETTER CAPITAL R" },
                   1715: { 0x211E, "rx", "PRESCRIPTION TAKE" },
                   1716: { 0x2122, "trade", "TRADE MARK SIGN" },
                   1717: { 0x2126, "ohm", "OHM SIGN" },
                   1718: { 0x212B, "angst", "ANGSTROM SIGN" },
                   1719: { 0x212C, "bernou", "SCRIPT CAPITAL B" },
                   1720: { 0x2133, "phmmat", "SCRIPT CAPITAL M" },
                   1721: { 0x2134, "order", "SCRIPT SMALL O" },
                   1722: { 0x2135, "aleph", "ALEF SYMBOL" },
                   1723: { 0x2136, "beth", "BET SYMBOL" },
                   1724: { 0x2137, "gimel", "GIMEL SYMBOL" },
                   1725: { 0x2138, "daleth", "DALET SYMBOL" },
                   1726: { 0x2153, "frac13", "VULGAR FRACTION ONE THIRD" },
                   1727: { 0x2154, "frac23", "VULGAR FRACTION TWO THIRDS" },
                   1728: { 0x2155, "frac15", "VULGAR FRACTION ONE FIFTH" },
                   1729: { 0x2156, "frac25", "VULGAR FRACTION TWO FIFTHS" },
                   1730: { 0x2157, "frac35", "VULGAR FRACTION THREE FIFTHS" },
                   1731: { 0x2158, "frac45", "VULGAR FRACTION FOUR FIFTHS" },
                   1732: { 0x2159, "frac16", "VULGAR FRACTION ONE SIXTH" },
                   1733: { 0x215A, "frac56", "VULGAR FRACTION FIVE SIXTHS" },
                   1734: { 0x215B, "frac18", "" },
                   1735: { 0x215C, "frac38", "" },
                   1736: { 0x215D, "frac58", "" },
                   1737: { 0x215E, "frac78", "" },
                   1738: { 0x2190, "larr", "LEFTWARDS DOUBLE ARROW" },
                   1739: { 0x2191, "uarr", "UPWARDS ARROW" },
                   1740: { 0x2192, "rarr", "RIGHTWARDS DOUBLE ARROW" },
                   1741: { 0x2193, "darr", "DOWNWARDS ARROW" },
                   1742: { 0x2194, "harr", "LEFT RIGHT ARROW" },
                   1743: { 0x2194, "xhArr", "LEFT RIGHT ARROW" },
                   1744: { 0x2194, "xharr", "LEFT RIGHT ARROW" },
                   1745: { 0x2195, "varr", "UP DOWN ARROW" },
                   1746: { 0x2196, "nwarr", "NORTH WEST ARROW" },
                   1747: { 0x2197, "nearr", "NORTH EAST ARROW" },
                   1748: { 0x2198, "drarr", "SOUTH EAST ARROW" },
                   1749: { 0x2199, "dlarr", "SOUTH WEST ARROW" },
                   1750: { 0x219A, "nlarr", "LEFTWARDS ARROW WITH STROKE" },
                   1751: { 0x219B, "nrarr", "RIGHTWARDS ARROW WITH STROKE" },
                   1752: { 0x219D, "rarrw", "RIGHTWARDS SQUIGGLE ARROW" },
                   1753: { 0x219E, "Larr", "LEFTWARDS TWO HEADED ARROW" },
                   1754: { 0x21A0, "Rarr", "RIGHTWARDS TWO HEADED ARROW" },
                   1755: { 0x21A2, "larrtl", "LEFTWARDS ARROW WITH TAIL" },
                   1756: { 0x21A3, "rarrtl", "RIGHTWARDS ARROW WITH TAIL" },
                   1757: { 0x21A6, "map", "RIGHTWARDS ARROW FROM BAR" },
                   1758: { 0x21A9, "larrhk", "LEFTWARDS ARROW WITH HOOK" },
                   1759: { 0x21AA, "rarrhk", "RIGHTWARDS ARROW WITH HOOK" },
                   1760: { 0x21AB, "larrlp", "LEFTWARDS ARROW WITH LOOP" },
                   1761: { 0x21AC, "rarrlp", "RIGHTWARDS ARROW WITH LOOP" },
                   1762: { 0x21AD, "harrw", "LEFT RIGHT WAVE ARROW" },
                   1763: { 0x21AE, "nharr", "LEFT RIGHT ARROW WITH STROKE" },
                   1764: { 0x21B0, "lsh", "UPWARDS ARROW WITH TIP LEFTWARDS" },
                   1765: { 0x21B1, "rsh", "UPWARDS ARROW WITH TIP RIGHTWARDS" },
                   1766: { 0x21B6, "cularr", "ANTICLOCKWISE TOP SEMICIRCLE ARROW" },
                   1767: { 0x21B7, "curarr", "CLOCKWISE TOP SEMICIRCLE ARROW" },
                   1768: { 0x21BA, "olarr", "ANTICLOCKWISE OPEN CIRCLE ARROW" },
                   1769: { 0x21BB, "orarr", "CLOCKWISE OPEN CIRCLE ARROW" },
                   1770: { 0x21BC, "lharu", "LEFTWARDS HARPOON WITH BARB UPWARDS" },
                   1771: { 0x21BD, "lhard", "LEFTWARDS HARPOON WITH BARB DOWNWARDS" },
                   1772: { 0x21BE, "uharr", "UPWARDS HARPOON WITH BARB RIGHTWARDS" },
                   1773: { 0x21BF, "uharl", "UPWARDS HARPOON WITH BARB LEFTWARDS" },
                   1774: { 0x21C0, "rharu", "RIGHTWARDS HARPOON WITH BARB UPWARDS" },
                   1775: { 0x21C1, "rhard", "RIGHTWARDS HARPOON WITH BARB DOWNWARDS" },
                   1776: { 0x21C2, "dharr", "DOWNWARDS HARPOON WITH BARB RIGHTWARDS" },
                   1777: { 0x21C3, "dharl", "DOWNWARDS HARPOON WITH BARB LEFTWARDS" },
                   1778: { 0x21C4, "rlarr2", "RIGHTWARDS ARROW OVER LEFTWARDS ARROW" },
                   1779: { 0x21C6, "lrarr2", "LEFTWARDS ARROW OVER RIGHTWARDS ARROW" },
                   1780: { 0x21C7, "larr2", "LEFTWARDS PAIRED ARROWS" },
                   1781: { 0x21C8, "uarr2", "UPWARDS PAIRED ARROWS" },
                   1782: { 0x21C9, "rarr2", "RIGHTWARDS PAIRED ARROWS" },
                   1783: { 0x21CA, "darr2", "DOWNWARDS PAIRED ARROWS" },
                   1784: { 0x21CB, "lrhar2", "LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON" },
                   1785: { 0x21CC, "rlhar2", "RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON" },
                   1786: { 0x21CD, "nlArr", "LEFTWARDS DOUBLE ARROW WITH STROKE" },
                   1787: { 0x21CE, "nhArr", "LEFT RIGHT DOUBLE ARROW WITH STROKE" },
                   1788: { 0x21CF, "nrArr", "RIGHTWARDS DOUBLE ARROW WITH STROKE" },
                   1789: { 0x21D0, "lArr", "LEFTWARDS ARROW" },
                   1790: { 0x21D0, "xlArr", "LEFTWARDS DOUBLE ARROW" },
                   1791: { 0x21D1, "uArr", "UPWARDS DOUBLE ARROW" },
                   1792: { 0x21D2, "rArr", "RIGHTWARDS ARROW" },
                   1793: { 0x21D2, "xrArr", "RIGHTWARDS DOUBLE ARROW" },
                   1794: { 0x21D3, "dArr", "DOWNWARDS DOUBLE ARROW" },
                   1795: { 0x21D4, "hArr", "" },
                   1796: { 0x21D4, "iff", "LEFT RIGHT DOUBLE ARROW" },
                   1797: { 0x21D5, "vArr", "UP DOWN DOUBLE ARROW" },
                   1798: { 0x21DA, "lAarr", "LEFTWARDS TRIPLE ARROW" },
                   1799: { 0x21DB, "rAarr", "RIGHTWARDS TRIPLE ARROW" },
                   1800: { 0x2200, "forall", "" },
                   1801: { 0x2201, "comp", "COMPLEMENT" },
                   1802: { 0x2202, "part", "" },
                   1803: { 0x2203, "exist", "" },
                   1804: { 0x2204, "nexist", "THERE DOES NOT EXIST" },
                   1805: { 0x2205, "empty", "" },
                   1806: { 0x2207, "nabla", "NABLA" },
                   1807: { 0x2209, "notin", "" },
                   1808: { 0x220A, "epsi", "" },
                   1809: { 0x220A, "epsis", "" },
                   1810: { 0x220A, "isin", "" },
                   1811: { 0x220D, "bepsi", "SMALL CONTAINS AS MEMBER" },
                   1812: { 0x220D, "ni", "" },
                   1813: { 0x220F, "prod", "N-ARY PRODUCT" },
                   1814: { 0x2210, "amalg", "N-ARY COPRODUCT" },
                   1815: { 0x2210, "coprod", "N-ARY COPRODUCT" },
                   1816: { 0x2210, "samalg", "" },
                   1817: { 0x2211, "sum", "N-ARY SUMMATION" },
                   1818: { 0x2212, "minus", "MINUS SIGN" },
                   1819: { 0x2213, "mnplus", "" },
                   1820: { 0x2214, "plusdo", "DOT PLUS" },
                   1821: { 0x2216, "setmn", "SET MINUS" },
                   1822: { 0x2216, "ssetmn", "SET MINUS" },
                   1823: { 0x2217, "lowast", "ASTERISK OPERATOR" },
                   1824: { 0x2218, "compfn", "RING OPERATOR" },
                   1825: { 0x221A, "radic", "" },
                   1826: { 0x221D, "prop", "" },
                   1827: { 0x221D, "vprop", "" },
                   1828: { 0x221E, "infin", "" },
                   1829: { 0x221F, "ang90", "RIGHT ANGLE" },
                   1830: { 0x2220, "ang", "ANGLE" },
                   1831: { 0x2221, "angmsd", "MEASURED ANGLE" },
                   1832: { 0x2222, "angsph", "" },
                   1833: { 0x2223, "mid", "" },
                   1834: { 0x2224, "nmid", "DOES NOT DIVIDE" },
                   1835: { 0x2225, "par", "PARALLEL TO" },
                   1836: { 0x2225, "spar", "PARALLEL TO" },
                   1837: { 0x2226, "npar", "NOT PARALLEL TO" },
                   1838: { 0x2226, "nspar", "NOT PARALLEL TO" },
                   1839: { 0x2227, "and", "" },
                   1840: { 0x2228, "or", "" },
                   1841: { 0x2229, "cap", "" },
                   1842: { 0x222A, "cup", "" },
                   1843: { 0x222B, "int", "" },
                   1844: { 0x222E, "conint", "" },
                   1845: { 0x2234, "there4", "" },
                   1846: { 0x2235, "becaus", "BECAUSE" },
                   1847: { 0x223C, "sim", "" },
                   1848: { 0x223C, "thksim", "TILDE OPERATOR" },
                   1849: { 0x223D, "bsim", "" },
                   1850: { 0x2240, "wreath", "WREATH PRODUCT" },
                   1851: { 0x2241, "nsim", "" },
                   1852: { 0x2243, "sime", "" },
                   1853: { 0x2244, "nsime", "" },
                   1854: { 0x2245, "cong", "" },
                   1855: { 0x2247, "ncong", "NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO" },
                   1856: { 0x2248, "ap", "" },
                   1857: { 0x2248, "thkap", "ALMOST EQUAL TO" },
                   1858: { 0x2249, "nap", "NOT ALMOST EQUAL TO" },
                   1859: { 0x224A, "ape", "" },
                   1860: { 0x224C, "bcong", "ALL EQUAL TO" },
                   1861: { 0x224D, "asymp", "EQUIVALENT TO" },
                   1862: { 0x224E, "bump", "" },
                   1863: { 0x224F, "bumpe", "" },
                   1864: { 0x2250, "esdot", "" },
                   1865: { 0x2251, "eDot", "" },
                   1866: { 0x2252, "efDot", "" },
                   1867: { 0x2253, "erDot", "" },
                   1868: { 0x2254, "colone", "" },
                   1869: { 0x2255, "ecolon", "" },
                   1870: { 0x2256, "ecir", "" },
                   1871: { 0x2257, "cire", "" },
                   1872: { 0x2259, "wedgeq", "ESTIMATES" },
                   1873: { 0x225C, "trie", "" },
                   1874: { 0x2260, "ne", "" },
                   1875: { 0x2261, "equiv", "" },
                   1876: { 0x2262, "nequiv", "NOT IDENTICAL TO" },
                   1877: { 0x2264, "le", "" },
                   1878: { 0x2264, "les", "LESS-THAN OR EQUAL TO" },
                   1879: { 0x2265, "ge", "GREATER-THAN OR EQUAL TO" },
                   1880: { 0x2265, "ges", "GREATER-THAN OR EQUAL TO" },
                   1881: { 0x2266, "lE", "" },
                   1882: { 0x2267, "gE", "" },
                   1883: { 0x2268, "lnE", "" },
                   1884: { 0x2268, "lne", "" },
                   1885: { 0x2268, "lvnE", "LESS-THAN BUT NOT EQUAL TO" },
                   1886: { 0x2269, "gnE", "" },
                   1887: { 0x2269, "gne", "" },
                   1888: { 0x2269, "gvnE", "GREATER-THAN BUT NOT EQUAL TO" },
                   1889: { 0x226A, "Lt", "MUCH LESS-THAN" },
                   1890: { 0x226B, "Gt", "MUCH GREATER-THAN" },
                   1891: { 0x226C, "twixt", "BETWEEN" },
                   1892: { 0x226E, "nlt", "NOT LESS-THAN" },
                   1893: { 0x226F, "ngt", "NOT GREATER-THAN" },
                   1894: { 0x2270, "nlE", "" },
                   1895: { 0x2270, "nle", "NEITHER LESS-THAN NOR EQUAL TO" },
                   1896: { 0x2270, "nles", "" },
                   1897: { 0x2271, "ngE", "" },
                   1898: { 0x2271, "nge", "NEITHER GREATER-THAN NOR EQUAL TO" },
                   1899: { 0x2271, "nges", "" },
                   1900: { 0x2272, "lap", "LESS-THAN OR EQUIVALENT TO" },
                   1901: { 0x2272, "lsim", "LESS-THAN OR EQUIVALENT TO" },
                   1902: { 0x2273, "gap", "GREATER-THAN OR EQUIVALENT TO" },
                   1903: { 0x2273, "gsim", "GREATER-THAN OR EQUIVALENT TO" },
                   1904: { 0x2276, "lg", "LESS-THAN OR GREATER-THAN" },
                   1905: { 0x2277, "gl", "" },
                   1906: { 0x227A, "pr", "" },
                   1907: { 0x227B, "sc", "" },
                   1908: { 0x227C, "cupre", "" },
                   1909: { 0x227C, "pre", "" },
                   1910: { 0x227D, "sccue", "" },
                   1911: { 0x227D, "sce", "" },
                   1912: { 0x227E, "prap", "" },
                   1913: { 0x227E, "prsim", "" },
                   1914: { 0x227F, "scap", "" },
                   1915: { 0x227F, "scsim", "" },
                   1916: { 0x2280, "npr", "DOES NOT PRECEDE" },
                   1917: { 0x2281, "nsc", "DOES NOT SUCCEED" },
                   1918: { 0x2282, "sub", "" },
                   1919: { 0x2283, "sup", "" },
                   1920: { 0x2284, "nsub", "NOT A SUBSET OF" },
                   1921: { 0x2285, "nsup", "NOT A SUPERSET OF" },
                   1922: { 0x2286, "subE", "" },
                   1923: { 0x2286, "sube", "" },
                   1924: { 0x2287, "supE", "" },
                   1925: { 0x2287, "supe", "" },
                   1926: { 0x2288, "nsubE", "" },
                   1927: { 0x2288, "nsube", "" },
                   1928: { 0x2289, "nsupE", "" },
                   1929: { 0x2289, "nsupe", "" },
                   1930: { 0x228A, "subne", "" },
                   1931: { 0x228A, "subnE", "SUBSET OF WITH NOT EQUAL TO" },
                   1932: { 0x228A, "vsubne", "SUBSET OF WITH NOT EQUAL TO" },
                   1933: { 0x228B, "supnE", "" },
                   1934: { 0x228B, "supne", "" },
                   1935: { 0x228B, "vsupnE", "SUPERSET OF WITH NOT EQUAL TO" },
                   1936: { 0x228B, "vsupne", "SUPERSET OF WITH NOT EQUAL TO" },
                   1937: { 0x228E, "uplus", "MULTISET UNION" },
                   1938: { 0x228F, "sqsub", "" },
                   1939: { 0x2290, "sqsup", "" },
                   1940: { 0x2291, "sqsube", "" },
                   1941: { 0x2292, "sqsupe", "" },
                   1942: { 0x2293, "sqcap", "SQUARE CAP" },
                   1943: { 0x2294, "sqcup", "SQUARE CUP" },
                   1944: { 0x2295, "oplus", "CIRCLED PLUS" },
                   1945: { 0x2296, "ominus", "CIRCLED MINUS" },
                   1946: { 0x2297, "otimes", "CIRCLED TIMES" },
                   1947: { 0x2298, "osol", "CIRCLED DIVISION SLASH" },
                   1948: { 0x2299, "odot", "CIRCLED DOT OPERATOR" },
                   1949: { 0x229A, "ocir", "CIRCLED RING OPERATOR" },
                   1950: { 0x229B, "oast", "CIRCLED ASTERISK OPERATOR" },
                   1951: { 0x229D, "odash", "CIRCLED DASH" },
                   1952: { 0x229E, "plusb", "SQUARED PLUS" },
                   1953: { 0x229F, "minusb", "SQUARED MINUS" },
                   1954: { 0x22A0, "timesb", "SQUARED TIMES" },
                   1955: { 0x22A1, "sdotb", "SQUARED DOT OPERATOR" },
                   1956: { 0x22A2, "vdash", "" },
                   1957: { 0x22A3, "dashv", "" },
                   1958: { 0x22A4, "top", "DOWN TACK" },
                   1959: { 0x22A5, "bottom", "" },
                   1960: { 0x22A5, "perp", "" },
                   1961: { 0x22A7, "models", "MODELS" },
                   1962: { 0x22A8, "vDash", "" },
                   1963: { 0x22A9, "Vdash", "" },
                   1964: { 0x22AA, "Vvdash", "" },
                   1965: { 0x22AC, "nvdash", "DOES NOT PROVE" },
                   1966: { 0x22AD, "nvDash", "NOT TRUE" },
                   1967: { 0x22AE, "nVdash", "DOES NOT FORCE" },
                   1968: { 0x22AF, "nVDash", "NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE" },
                   1969: { 0x22B2, "vltri", "" },
                   1970: { 0x22B3, "vrtri", "" },
                   1971: { 0x22B4, "ltrie", "" },
                   1972: { 0x22B5, "rtrie", "" },
                   1973: { 0x22B8, "mumap", "MULTIMAP" },
                   1974: { 0x22BA, "intcal", "INTERCALATE" },
                   1975: { 0x22BB, "veebar", "" },
                   1976: { 0x22BC, "barwed", "NAND" },
                   1977: { 0x22C4, "diam", "DIAMOND OPERATOR" },
                   1978: { 0x22C5, "sdot", "DOT OPERATOR" },
                   1979: { 0x22C6, "sstarf", "STAR OPERATOR" },
                   1980: { 0x22C6, "star", "STAR OPERATOR" },
                   1981: { 0x22C7, "divonx", "DIVISION TIMES" },
                   1982: { 0x22C8, "bowtie", "" },
                   1983: { 0x22C9, "ltimes", "LEFT NORMAL FACTOR SEMIDIRECT PRODUCT" },
                   1984: { 0x22CA, "rtimes", "RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT" },
                   1985: { 0x22CB, "lthree", "LEFT SEMIDIRECT PRODUCT" },
                   1986: { 0x22CC, "rthree", "RIGHT SEMIDIRECT PRODUCT" },
                   1987: { 0x22CD, "bsime", "" },
                   1988: { 0x22CE, "cuvee", "CURLY LOGICAL OR" },
                   1989: { 0x22CF, "cuwed", "CURLY LOGICAL AND" },
                   1990: { 0x22D0, "Sub", "" },
                   1991: { 0x22D1, "Sup", "" },
                   1992: { 0x22D2, "Cap", "DOUBLE INTERSECTION" },
                   1993: { 0x22D3, "Cup", "DOUBLE UNION" },
                   1994: { 0x22D4, "fork", "" },
                   1995: { 0x22D6, "ldot", "" },
                   1996: { 0x22D7, "gsdot", "" },
                   1997: { 0x22D8, "Ll", "" },
                   1998: { 0x22D9, "Gg", "VERY MUCH GREATER-THAN" },
                   1999: { 0x22DA, "lEg", "" },
                   2000: { 0x22DA, "leg", "" },
                   2001: { 0x22DB, "gEl", "" },
                   2002: { 0x22DB, "gel", "" },
                   2003: { 0x22DC, "els", "" },
                   2004: { 0x22DD, "egs", "" },
                   2005: { 0x22DE, "cuepr", "" },
                   2006: { 0x22DF, "cuesc", "" },
                   2007: { 0x22E0, "npre", "DOES NOT PRECEDE OR EQUAL" },
                   2008: { 0x22E1, "nsce", "DOES NOT SUCCEED OR EQUAL" },
                   2009: { 0x22E6, "lnsim", "" },
                   2010: { 0x22E7, "gnsim", "GREATER-THAN BUT NOT EQUIVALENT TO" },
                   2011: { 0x22E8, "prnap", "" },
                   2012: { 0x22E8, "prnsim", "" },
                   2013: { 0x22E9, "scnap", "" },
                   2014: { 0x22E9, "scnsim", "" },
                   2015: { 0x22EA, "nltri", "NOT NORMAL SUBGROUP OF" },
                   2016: { 0x22EB, "nrtri", "DOES NOT CONTAIN AS NORMAL SUBGROUP" },
                   2017: { 0x22EC, "nltrie", "NOT NORMAL SUBGROUP OF OR EQUAL TO" },
                   2018: { 0x22ED, "nrtrie", "DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL" },
                   2019: { 0x22EE, "vellip", "" },
                   2020: { 0x2306, "Barwed", "PERSPECTIVE" },
                   2021: { 0x2308, "lceil", "LEFT CEILING" },
                   2022: { 0x2309, "rceil", "RIGHT CEILING" },
                   2023: { 0x230A, "lfloor", "LEFT FLOOR" },
                   2024: { 0x230B, "rfloor", "RIGHT FLOOR" },
                   2025: { 0x230C, "drcrop", "BOTTOM RIGHT CROP" },
                   2026: { 0x230D, "dlcrop", "BOTTOM LEFT CROP" },
                   2027: { 0x230E, "urcrop", "TOP RIGHT CROP" },
                   2028: { 0x230F, "ulcrop", "TOP LEFT CROP" },
                   2029: { 0x2315, "telrec", "TELEPHONE RECORDER" },
                   2030: { 0x2316, "target", "POSITION INDICATOR" },
                   2031: { 0x231C, "ulcorn", "TOP LEFT CORNER" },
                   2032: { 0x231D, "urcorn", "TOP RIGHT CORNER" },
                   2033: { 0x231E, "dlcorn", "BOTTOM LEFT CORNER" },
                   2034: { 0x231F, "drcorn", "BOTTOM RIGHT CORNER" },
                   2035: { 0x2322, "frown", "" },
                   2036: { 0x2322, "sfrown", "FROWN" },
                   2037: { 0x2323, "smile", "" },
                   2038: { 0x2323, "ssmile", "SMILE" },
                   2039: { 0x2423, "blank", "OPEN BOX" },
                   2040: { 0x24C8, "oS", "CIRCLED LATIN CAPITAL LETTER S" },
                   2041: { 0x2500, "boxh", "BOX DRAWINGS LIGHT HORIZONTAL" },
                   2042: { 0x2502, "boxv", "BOX DRAWINGS LIGHT VERTICAL" },
                   2043: { 0x250C, "boxdr", "BOX DRAWINGS LIGHT DOWN AND RIGHT" },
                   2044: { 0x2510, "boxdl", "BOX DRAWINGS LIGHT DOWN AND LEFT" },
                   2045: { 0x2514, "boxur", "BOX DRAWINGS LIGHT UP AND RIGHT" },
                   2046: { 0x2518, "boxul", "BOX DRAWINGS LIGHT UP AND LEFT" },
                   2047: { 0x251C, "boxvr", "BOX DRAWINGS LIGHT VERTICAL AND RIGHT" },
                   2048: { 0x2524, "boxvl", "BOX DRAWINGS LIGHT VERTICAL AND LEFT" },
                   2049: { 0x252C, "boxhd", "BOX DRAWINGS LIGHT DOWN AND HORIZONTAL" },
                   2050: { 0x2534, "boxhu", "BOX DRAWINGS LIGHT UP AND HORIZONTAL" },
                   2051: { 0x253C, "boxvh", "BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL" },
                   2052: { 0x2550, "boxH", "BOX DRAWINGS DOUBLE HORIZONTAL" },
                   2053: { 0x2551, "boxV", "BOX DRAWINGS DOUBLE VERTICAL" },
                   2054: { 0x2552, "boxDR", "BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE" },
                   2055: { 0x2553, "boxDr", "BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE" },
                   2056: { 0x2554, "boxdR", "BOX DRAWINGS DOUBLE DOWN AND RIGHT" },
                   2057: { 0x2555, "boxDL", "BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE" },
                   2058: { 0x2556, "boxdL", "BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE" },
                   2059: { 0x2557, "boxDl", "BOX DRAWINGS DOUBLE DOWN AND LEFT" },
                   2060: { 0x2558, "boxUR", "BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE" },
                   2061: { 0x2559, "boxuR", "BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE" },
                   2062: { 0x255A, "boxUr", "BOX DRAWINGS DOUBLE UP AND RIGHT" },
                   2063: { 0x255B, "boxUL", "BOX DRAWINGS UP SINGLE AND LEFT DOUBLE" },
                   2064: { 0x255C, "boxUl", "BOX DRAWINGS UP DOUBLE AND LEFT SINGLE" },
                   2065: { 0x255D, "boxuL", "BOX DRAWINGS DOUBLE UP AND LEFT" },
                   2066: { 0x255E, "boxvR", "BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE" },
                   2067: { 0x255F, "boxVR", "BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE" },
                   2068: { 0x2560, "boxVr", "BOX DRAWINGS DOUBLE VERTICAL AND RIGHT" },
                   2069: { 0x2561, "boxvL", "BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE" },
                   2070: { 0x2562, "boxVL", "BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE" },
                   2071: { 0x2563, "boxVl", "BOX DRAWINGS DOUBLE VERTICAL AND LEFT" },
                   2072: { 0x2564, "boxhD", "BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE" },
                   2073: { 0x2565, "boxHD", "BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE" },
                   2074: { 0x2566, "boxHd", "BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL" },
                   2075: { 0x2567, "boxhU", "BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE" },
                   2076: { 0x2568, "boxHU", "BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE" },
                   2077: { 0x2569, "boxHu", "BOX DRAWINGS DOUBLE UP AND HORIZONTAL" },
                   2078: { 0x256A, "boxvH", "BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE" },
                   2079: { 0x256B, "boxVH", "BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE" },
                   2080: { 0x256C, "boxVh", "BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL" },
                   2081: { 0x2580, "uhblk", "UPPER HALF BLOCK" },
                   2082: { 0x2584, "lhblk", "LOWER HALF BLOCK" },
                   2083: { 0x2588, "block", "FULL BLOCK" },
                   2084: { 0x2591, "blk14", "LIGHT SHADE" },
                   2085: { 0x2592, "blk12", "MEDIUM SHADE" },
                   2086: { 0x2593, "blk34", "DARK SHADE" },
                   2087: { 0x25A1, "square", "WHITE SQUARE" },
                   2088: { 0x25A1, "squ", "WHITE SQUARE" },
                   2089: { 0x25AA, "squf", "" },
                   2090: { 0x25AD, "rect", "WHITE RECTANGLE" },
                   2091: { 0x25AE, "marker", "BLACK VERTICAL RECTANGLE" },
                   2092: { 0x25B3, "xutri", "WHITE UP-POINTING TRIANGLE" },
                   2093: { 0x25B4, "utrif", "BLACK UP-POINTING TRIANGLE" },
                   2094: { 0x25B5, "utri", "WHITE UP-POINTING TRIANGLE" },
                   2095: { 0x25B8, "rtrif", "BLACK RIGHT-POINTING TRIANGLE" },
                   2096: { 0x25B9, "rtri", "WHITE RIGHT-POINTING TRIANGLE" },
                   2097: { 0x25BD, "xdtri", "WHITE DOWN-POINTING TRIANGLE" },
                   2098: { 0x25BE, "dtrif", "BLACK DOWN-POINTING TRIANGLE" },
                   2099: { 0x25BF, "dtri", "WHITE DOWN-POINTING TRIANGLE" },
                   2100: { 0x25C2, "ltrif", "BLACK LEFT-POINTING TRIANGLE" },
                   2101: { 0x25C3, "ltri", "WHITE LEFT-POINTING TRIANGLE" },
                   2102: { 0x25CA, "loz", "LOZENGE" },
                   2103: { 0x25CB, "cir", "WHITE CIRCLE" },
                   2104: { 0x25CB, "xcirc", "WHITE CIRCLE" },
                   2105: { 0x2605, "starf", "BLACK STAR" },
                   2106: { 0x260E, "phone", "TELEPHONE SIGN" },
                   2107: { 0x2640, "female", "" },
                   2108: { 0x2642, "male", "MALE SIGN" },
                   2109: { 0x2660, "spades", "BLACK SPADE SUIT" },
                   2110: { 0x2663, "clubs", "BLACK CLUB SUIT" },
                   2111: { 0x2665, "hearts", "BLACK HEART SUIT" },
                   2112: { 0x2666, "diams", "BLACK DIAMOND SUIT" },
                   2113: { 0x2669, "sung", "" },
                   2114: { 0x266D, "flat", "MUSIC FLAT SIGN" },
                   2115: { 0x266E, "natur", "MUSIC NATURAL SIGN" },
                   2116: { 0x266F, "sharp", "MUSIC SHARP SIGN" },
                   2117: { 0x2713, "check", "CHECK MARK" },
                   2118: { 0x2717, "cross", "BALLOT X" },
                   2119: { 0x2720, "malt", "MALTESE CROSS" },
                   2120: { 0x2726, "lozf", "" },
                   2121: { 0x2736, "sext", "SIX POINTED BLACK STAR" },
                   2122: { 0x3008, "lang", "" },
                   2123: { 0x3009, "rang", "" },
                   2124: { 0xE291, "rpargt", "" },
                   2125: { 0xE2A2, "lnap", "" },
                   2126: { 0xE2AA, "nsmid", "" },
                   2127: { 0xE2B3, "prnE", "" },
                   2128: { 0xE2B5, "scnE", "" },
                   2129: { 0xE2B8, "vsubnE", "" },
                   2130: { 0xE301, "smid", "" },
                   2131: { 0xE411, "gnap", "" },
                   2132: { 0xFB00, "fflig", "" },
                   2133: { 0xFB01, "filig", "" },
                   2134: { 0xFB02, "fllig", "" },
                   2135: { 0xFB03, "ffilig", "" },
                   2136: { 0xFB04, "ffllig", "" },
                   2137: { 0xFE68, "sbsol", "SMALL REVERSE SOLIDUS" },
                   2138: };
                   2139: 
                   2140: /************************************************************************
                   2141:  *                                                                     *
                   2142:  *             Commodity functions to handle entities                  *
                   2143:  *                                                                     *
                   2144:  ************************************************************************/
                   2145: 
                   2146: /*
                   2147:  * Macro used to grow the current buffer.
                   2148:  */
                   2149: #define growBuffer(buffer) {                                           \
                   2150:     buffer##_size *= 2;                                                        \
                   2151:     buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar));  \
                   2152:     if (buffer == NULL) {                                              \
                   2153:        perror("realloc failed");                                       \
                   2154:        return(NULL);                                                   \
                   2155:     }                                                                  \
                   2156: }
                   2157: 
                   2158: /**
                   2159:  * sgmlEntityLookup:
                   2160:  * @name: the entity name
                   2161:  *
                   2162:  * Lookup the given entity in EntitiesTable
                   2163:  *
                   2164:  * TODO: the linear scan is really ugly, an hash table is really needed.
                   2165:  *
                   2166:  * Returns the associated sgmlEntityDescPtr if found, NULL otherwise.
                   2167:  */
                   2168: sgmlEntityDescPtr
                   2169: sgmlEntityLookup(const xmlChar *name) {
                   2170:     int i;
                   2171: 
                   2172:     for (i = 0;i < (sizeof(docbookEntitiesTable)/
                   2173:                     sizeof(docbookEntitiesTable[0]));i++) {
1.7       veillard 2174:         if (xmlStrEqual(name, BAD_CAST docbookEntitiesTable[i].name)) {
1.1       veillard 2175: #ifdef DEBUG
1.10    ! veillard 2176:             xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", name);
1.1       veillard 2177: #endif
                   2178:             return(&docbookEntitiesTable[i]);
                   2179:        }
                   2180:     }
                   2181:     return(NULL);
                   2182: }
                   2183: 
                   2184: /**
                   2185:  * sgmlEntityValueLookup:
                   2186:  * @value: the entity's unicode value
                   2187:  *
                   2188:  * Lookup the given entity in EntitiesTable
                   2189:  *
                   2190:  * TODO: the linear scan is really ugly, an hash table is really needed.
                   2191:  *
                   2192:  * Returns the associated sgmlEntityDescPtr if found, NULL otherwise.
                   2193:  */
                   2194: sgmlEntityDescPtr
                   2195: sgmlEntityValueLookup(int value) {
                   2196:     int i;
                   2197: #ifdef DEBUG
                   2198:     int lv = 0;
                   2199: #endif
                   2200: 
                   2201:     for (i = 0;i < (sizeof(docbookEntitiesTable)/
                   2202:                     sizeof(docbookEntitiesTable[0]));i++) {
                   2203:         if (docbookEntitiesTable[i].value >= value) {
                   2204:            if (docbookEntitiesTable[i].value > value)
                   2205:                break;
                   2206: #ifdef DEBUG
1.10    ! veillard 2207:            xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", docbookEntitiesTable[i].name);
1.1       veillard 2208: #endif
                   2209:             return(&docbookEntitiesTable[i]);
                   2210:        }
                   2211: #ifdef DEBUG
                   2212:        if (lv > docbookEntitiesTable[i].value) {
1.10    ! veillard 2213:            xmlGenericError(xmlGenericErrorContext,
        !          2214:                    "docbookEntitiesTable[] is not sorted (%d > %d)!\n",
1.1       veillard 2215:                    lv, docbookEntitiesTable[i].value);
                   2216:        }
                   2217:        lv = docbookEntitiesTable[i].value;
                   2218: #endif
                   2219:     }
                   2220:     return(NULL);
                   2221: }
                   2222: 
                   2223: /**
                   2224:  * UTF8ToSgml:
                   2225:  * @out:  a pointer to an array of bytes to store the result
                   2226:  * @outlen:  the length of @out
                   2227:  * @in:  a pointer to an array of UTF-8 chars
                   2228:  * @inlen:  the length of @in
                   2229:  *
                   2230:  * Take a block of UTF-8 chars in and try to convert it to an ASCII
                   2231:  * plus SGML entities block of chars out.
                   2232:  *
                   2233:  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
                   2234:  * The value of @inlen after return is the number of octets consumed
                   2235:  *     as the return value is positive, else unpredictiable.
                   2236:  * The value of @outlen after return is the number of octets consumed.
                   2237:  */
                   2238: int
                   2239: UTF8ToSgml(unsigned char* out, int *outlen,
                   2240:               const unsigned char* in, int *inlen) {
                   2241:     const unsigned char* processed = in;
                   2242:     const unsigned char* outend;
                   2243:     const unsigned char* outstart = out;
                   2244:     const unsigned char* instart = in;
                   2245:     const unsigned char* inend;
                   2246:     unsigned int c, d;
                   2247:     int trailing;
                   2248: 
                   2249:     if (in == NULL) {
                   2250:         /*
                   2251:         * initialization nothing to do
                   2252:         */
                   2253:        *outlen = 0;
                   2254:        *inlen = 0;
                   2255:        return(0);
                   2256:     }
                   2257:     inend = in + (*inlen);
                   2258:     outend = out + (*outlen);
                   2259:     while (in < inend) {
                   2260:        d = *in++;
                   2261:        if      (d < 0x80)  { c= d; trailing= 0; }
                   2262:        else if (d < 0xC0) {
                   2263:            /* trailing byte in leading position */
                   2264:            *outlen = out - outstart;
                   2265:            *inlen = processed - instart;
                   2266:            return(-2);
                   2267:         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
                   2268:         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
                   2269:         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
                   2270:        else {
                   2271:            /* no chance for this in Ascii */
                   2272:            *outlen = out - outstart;
                   2273:            *inlen = processed - instart;
                   2274:            return(-2);
                   2275:        }
                   2276: 
                   2277:        if (inend - in < trailing) {
                   2278:            break;
                   2279:        } 
                   2280: 
                   2281:        for ( ; trailing; trailing--) {
                   2282:            if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
                   2283:                break;
                   2284:            c <<= 6;
                   2285:            c |= d & 0x3F;
                   2286:        }
                   2287: 
                   2288:        /* assertion: c is a single UTF-4 value */
                   2289:        if (c < 0x80) {
                   2290:            if (out + 1 >= outend)
                   2291:                break;
                   2292:            *out++ = c;
                   2293:        } else {
                   2294:            int len;
                   2295:            sgmlEntityDescPtr ent;
                   2296: 
                   2297:            /*
                   2298:             * Try to lookup a predefined SGML entity for it
                   2299:             */
                   2300: 
                   2301:            ent = sgmlEntityValueLookup(c);
                   2302:            if (ent == NULL) {
                   2303:                /* no chance for this in Ascii */
                   2304:                *outlen = out - outstart;
                   2305:                *inlen = processed - instart;
                   2306:                return(-2);
                   2307:            }
                   2308:            len = strlen(ent->name);
                   2309:            if (out + 2 + len >= outend)
                   2310:                break;
                   2311:            *out++ = '&';
                   2312:            memcpy(out, ent->name, len);
                   2313:            out += len;
                   2314:            *out++ = ';';
                   2315:        }
                   2316:        processed = in;
                   2317:     }
                   2318:     *outlen = out - outstart;
                   2319:     *inlen = processed - instart;
                   2320:     return(0);
                   2321: }
                   2322: 
                   2323: /**
                   2324:  * sgmlEncodeEntities:
                   2325:  * @out:  a pointer to an array of bytes to store the result
                   2326:  * @outlen:  the length of @out
                   2327:  * @in:  a pointer to an array of UTF-8 chars
                   2328:  * @inlen:  the length of @in
                   2329:  * @quoteChar: the quote character to escape (' or ") or zero.
                   2330:  *
                   2331:  * Take a block of UTF-8 chars in and try to convert it to an ASCII
                   2332:  * plus SGML entities block of chars out.
                   2333:  *
                   2334:  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
                   2335:  * The value of @inlen after return is the number of octets consumed
                   2336:  *     as the return value is positive, else unpredictiable.
                   2337:  * The value of @outlen after return is the number of octets consumed.
                   2338:  */
                   2339: int
                   2340: sgmlEncodeEntities(unsigned char* out, int *outlen,
                   2341:                   const unsigned char* in, int *inlen, int quoteChar) {
                   2342:     const unsigned char* processed = in;
                   2343:     const unsigned char* outend = out + (*outlen);
                   2344:     const unsigned char* outstart = out;
                   2345:     const unsigned char* instart = in;
                   2346:     const unsigned char* inend = in + (*inlen);
                   2347:     unsigned int c, d;
                   2348:     int trailing;
                   2349: 
                   2350:     while (in < inend) {
                   2351:        d = *in++;
                   2352:        if      (d < 0x80)  { c= d; trailing= 0; }
                   2353:        else if (d < 0xC0) {
                   2354:            /* trailing byte in leading position */
                   2355:            *outlen = out - outstart;
                   2356:            *inlen = processed - instart;
                   2357:            return(-2);
                   2358:         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
                   2359:         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
                   2360:         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
                   2361:        else {
                   2362:            /* no chance for this in Ascii */
                   2363:            *outlen = out - outstart;
                   2364:            *inlen = processed - instart;
                   2365:            return(-2);
                   2366:        }
                   2367: 
                   2368:        if (inend - in < trailing)
                   2369:            break;
                   2370: 
                   2371:        while (trailing--) {
                   2372:            if (((d= *in++) & 0xC0) != 0x80) {
                   2373:                *outlen = out - outstart;
                   2374:                *inlen = processed - instart;
                   2375:                return(-2);
                   2376:            }
                   2377:            c <<= 6;
                   2378:            c |= d & 0x3F;
                   2379:        }
                   2380: 
                   2381:        /* assertion: c is a single UTF-4 value */
                   2382:        if (c < 0x80 && c != quoteChar && c != '&' && c != '<' && c != '>') {
                   2383:            if (out >= outend)
                   2384:                break;
                   2385:            *out++ = c;
                   2386:        } else {
                   2387:            sgmlEntityDescPtr ent;
                   2388:            const char *cp;
                   2389:            char nbuf[16];
                   2390:            int len;
                   2391: 
                   2392:            /*
                   2393:             * Try to lookup a predefined SGML entity for it
                   2394:             */
                   2395:            ent = sgmlEntityValueLookup(c);
                   2396:            if (ent == NULL) {
                   2397:                sprintf(nbuf, "#%u", c);
                   2398:                cp = nbuf;
                   2399:            }
                   2400:            else
                   2401:                cp = ent->name;
                   2402:            len = strlen(cp);
                   2403:            if (out + 2 + len > outend)
                   2404:                break;
                   2405:            *out++ = '&';
                   2406:            memcpy(out, cp, len);
                   2407:            out += len;
                   2408:            *out++ = ';';
                   2409:        }
                   2410:        processed = in;
                   2411:     }
                   2412:     *outlen = out - outstart;
                   2413:     *inlen = processed - instart;
                   2414:     return(0);
                   2415: }
                   2416: 
                   2417: /**
                   2418:  * sgmlDecodeEntities:
                   2419:  * @ctxt:  the parser context
                   2420:  * @len:  the len to decode (in bytes !), -1 for no size limit
                   2421:  * @end:  an end marker xmlChar, 0 if none
                   2422:  * @end2:  an end marker xmlChar, 0 if none
                   2423:  * @end3:  an end marker xmlChar, 0 if none
                   2424:  *
                   2425:  * Subtitute the SGML entities by their value
                   2426:  *
                   2427:  * DEPRECATED !!!!
                   2428:  *
                   2429:  * Returns A newly allocated string with the substitution done. The caller
                   2430:  *      must deallocate it !
                   2431:  */
                   2432: xmlChar *
                   2433: sgmlDecodeEntities(sgmlParserCtxtPtr ctxt, int len,
                   2434:                   xmlChar end, xmlChar  end2, xmlChar end3) {
                   2435:     xmlChar *name = NULL;
                   2436:     xmlChar *buffer = NULL;
                   2437:     unsigned int buffer_size = 0;
                   2438:     unsigned int nbchars = 0;
                   2439:     sgmlEntityDescPtr ent;
                   2440:     unsigned int max = (unsigned int) len;
                   2441:     int c,l;
                   2442: 
                   2443:     if (ctxt->depth > 40) {
1.6       veillard 2444:        ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.1       veillard 2445:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   2446:            ctxt->sax->error(ctxt->userData,
                   2447:                "Detected entity reference loop\n");
                   2448:        ctxt->wellFormed = 0;
                   2449:        ctxt->disableSAX = 1;
                   2450:        return(NULL);
                   2451:     }
                   2452: 
                   2453:     /*
                   2454:      * allocate a translation buffer.
                   2455:      */
                   2456:     buffer_size = SGML_PARSER_BIG_BUFFER_SIZE;
                   2457:     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
                   2458:     if (buffer == NULL) {
                   2459:        perror("xmlDecodeEntities: malloc failed");
                   2460:        return(NULL);
                   2461:     }
                   2462: 
                   2463:     /*
                   2464:      * Ok loop until we reach one of the ending char or a size limit.
                   2465:      */
                   2466:     c = CUR_CHAR(l);
                   2467:     while ((nbchars < max) && (c != end) &&
                   2468:            (c != end2) && (c != end3)) {
                   2469: 
                   2470:        if (c == 0) break;
                   2471:         if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
                   2472:            int val = sgmlParseCharRef(ctxt);
                   2473:            COPY_BUF(0,buffer,nbchars,val);
                   2474:            NEXTL(l);
                   2475:        } else if ((c == '&') && (ctxt->token != '&')) {
                   2476:            ent = sgmlParseEntityRef(ctxt, &name);
                   2477:            if (name != NULL) {
                   2478:                if (ent != NULL) {
                   2479:                    int val = ent->value;
                   2480:                    COPY_BUF(0,buffer,nbchars,val);
                   2481:                    NEXTL(l);
                   2482:                } else {
                   2483:                    const xmlChar *cur = name;
                   2484: 
                   2485:                    buffer[nbchars++] = '&';
                   2486:                    if (nbchars > buffer_size - SGML_PARSER_BUFFER_SIZE) {
                   2487:                        growBuffer(buffer);
                   2488:                    }
                   2489:                    while (*cur != 0) {
                   2490:                        buffer[nbchars++] = *cur++;
                   2491:                    }
                   2492:                    buffer[nbchars++] = ';';
                   2493:                }
                   2494:            }
                   2495:        } else {
                   2496:            COPY_BUF(l,buffer,nbchars,c);
                   2497:            NEXTL(l);
                   2498:            if (nbchars > buffer_size - SGML_PARSER_BUFFER_SIZE) {
                   2499:              growBuffer(buffer);
                   2500:            }
                   2501:        }
                   2502:        c = CUR_CHAR(l);
                   2503:     }
                   2504:     buffer[nbchars++] = 0;
                   2505:     return(buffer);
                   2506: }
                   2507: 
                   2508: /************************************************************************
                   2509:  *                                                                     *
                   2510:  *             Commodity functions to handle streams                   *
                   2511:  *                                                                     *
                   2512:  ************************************************************************/
                   2513: 
                   2514: /**
                   2515:  * sgmlFreeInputStream:
                   2516:  * @input:  an sgmlParserInputPtr
                   2517:  *
                   2518:  * Free up an input stream.
                   2519:  */
                   2520: void
                   2521: sgmlFreeInputStream(sgmlParserInputPtr input) {
                   2522:     if (input == NULL) return;
                   2523: 
                   2524:     if (input->filename != NULL) xmlFree((char *) input->filename);
                   2525:     if (input->directory != NULL) xmlFree((char *) input->directory);
                   2526:     if ((input->free != NULL) && (input->base != NULL))
                   2527:         input->free((xmlChar *) input->base);
                   2528:     if (input->buf != NULL) 
                   2529:         xmlFreeParserInputBuffer(input->buf);
                   2530:     memset(input, -1, sizeof(sgmlParserInput));
                   2531:     xmlFree(input);
                   2532: }
                   2533: 
                   2534: /**
                   2535:  * sgmlNewInputStream:
                   2536:  * @ctxt:  an SGML parser context
                   2537:  *
                   2538:  * Create a new input stream structure
                   2539:  * Returns the new input stream or NULL
                   2540:  */
                   2541: sgmlParserInputPtr
                   2542: sgmlNewInputStream(sgmlParserCtxtPtr ctxt) {
                   2543:     sgmlParserInputPtr input;
                   2544: 
                   2545:     input = (xmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
                   2546:     if (input == NULL) {
                   2547:         ctxt->errNo = XML_ERR_NO_MEMORY;
                   2548:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   2549:            ctxt->sax->error(ctxt->userData, 
                   2550:                             "malloc: couldn't allocate a new input stream\n");
                   2551:        return(NULL);
                   2552:     }
                   2553:     memset(input, 0, sizeof(sgmlParserInput));
                   2554:     input->filename = NULL;
                   2555:     input->directory = NULL;
                   2556:     input->base = NULL;
                   2557:     input->cur = NULL;
                   2558:     input->buf = NULL;
                   2559:     input->line = 1;
                   2560:     input->col = 1;
                   2561:     input->buf = NULL;
                   2562:     input->free = NULL;
                   2563:     input->version = NULL;
                   2564:     input->consumed = 0;
                   2565:     input->length = 0;
                   2566:     return(input);
                   2567: }
                   2568: 
                   2569: 
                   2570: /************************************************************************
                   2571:  *                                                                     *
                   2572:  *             Commodity functions, cleanup needed ?                   *
                   2573:  *                                                                     *
                   2574:  ************************************************************************/
                   2575: 
                   2576: /**
                   2577:  * areBlanks:
                   2578:  * @ctxt:  an SGML parser context
                   2579:  * @str:  a xmlChar *
                   2580:  * @len:  the size of @str
                   2581:  *
                   2582:  * Is this a sequence of blank chars that one can ignore ?
                   2583:  *
                   2584:  * Returns 1 if ignorable 0 otherwise.
                   2585:  */
                   2586: 
                   2587: static int areBlanks(sgmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
                   2588:     int i;
                   2589:     xmlNodePtr lastChild;
                   2590: 
                   2591:     for (i = 0;i < len;i++)
                   2592:         if (!(IS_BLANK(str[i]))) return(0);
                   2593: 
                   2594:     if (CUR == 0) return(1);
                   2595:     if (CUR != '<') return(0);
                   2596:     if (ctxt->name == NULL)
                   2597:        return(1);
                   2598: #if 0
1.7       veillard 2599:     if (xmlStrEqual(ctxt->name, BAD_CAST"sgml"))
1.1       veillard 2600:        return(1);
1.7       veillard 2601:     if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
1.1       veillard 2602:        return(1);
1.7       veillard 2603:     if (xmlStrEqual(ctxt->name, BAD_CAST"body"))
1.1       veillard 2604:        return(1);
                   2605: #endif
                   2606:     if (ctxt->node == NULL) return(0);
                   2607:     lastChild = xmlGetLastChild(ctxt->node);
                   2608:     if (lastChild == NULL) {
                   2609:         if (ctxt->node->content != NULL) return(0);
                   2610:     } else if (xmlNodeIsText(lastChild))
                   2611:         return(0);
                   2612:     return(1);
                   2613: }
                   2614: 
                   2615: /**
                   2616:  * sgmlHandleEntity:
                   2617:  * @ctxt:  an SGML parser context
                   2618:  * @entity:  an XML entity pointer.
                   2619:  *
                   2620:  * Default handling of an SGML entity, call the parser with the
                   2621:  * substitution string
                   2622:  */
                   2623: 
                   2624: void
                   2625: sgmlHandleEntity(sgmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
                   2626:     int len;
                   2627: 
                   2628:     if (entity->content == NULL) {
                   2629:         if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   2630:            ctxt->sax->error(ctxt->userData, "sgmlHandleEntity %s: content == NULL\n",
                   2631:                       entity->name);
                   2632:        ctxt->wellFormed = 0;
                   2633:         return;
                   2634:     }
                   2635:     len = xmlStrlen(entity->content);
                   2636: 
                   2637:     /*
                   2638:      * Just handle the content as a set of chars.
                   2639:      */
                   2640:     sgmlCheckParagraph(ctxt);
                   2641:     if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   2642:        ctxt->sax->characters(ctxt->userData, entity->content, len);
                   2643: 
                   2644: }
                   2645: 
                   2646: /**
                   2647:  * sgmlNewDocNoDtD:
                   2648:  * @URI:  URI for the dtd, or NULL
                   2649:  * @ExternalID:  the external ID of the DTD, or NULL
                   2650:  *
                   2651:  * Returns a new document, do not intialize the DTD if not provided
                   2652:  */
                   2653: sgmlDocPtr
                   2654: sgmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
                   2655:     xmlDocPtr cur;
                   2656: 
                   2657:     /*
                   2658:      * Allocate a new document and fill the fields.
                   2659:      */
                   2660:     cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
                   2661:     if (cur == NULL) {
1.10    ! veillard 2662:         xmlGenericError(xmlGenericErrorContext,
        !          2663:                "xmlNewDoc : malloc failed\n");
1.1       veillard 2664:        return(NULL);
                   2665:     }
                   2666:     memset(cur, 0, sizeof(xmlDoc));
                   2667: 
                   2668:     cur->type = XML_SGML_DOCUMENT_NODE;
                   2669:     cur->version = NULL;
                   2670:     cur->intSubset = NULL;
                   2671:     if ((ExternalID != NULL) ||
                   2672:        (URI != NULL))
                   2673:        xmlCreateIntSubset(cur, BAD_CAST "SGML", ExternalID, URI);
                   2674:     cur->doc = cur;
                   2675:     cur->name = NULL;
                   2676:     cur->children = NULL; 
                   2677:     cur->extSubset = NULL;
                   2678:     cur->oldNs = NULL;
                   2679:     cur->encoding = NULL;
                   2680:     cur->standalone = 1;
                   2681:     cur->compression = 0;
                   2682:     cur->ids = NULL;
                   2683:     cur->refs = NULL;
                   2684: #ifndef XML_WITHOUT_CORBA
                   2685:     cur->_private = NULL;
                   2686: #endif
                   2687:     return(cur);
                   2688: }
                   2689: 
                   2690: /**
                   2691:  * sgmlNewDoc:
                   2692:  * @URI:  URI for the dtd, or NULL
                   2693:  * @ExternalID:  the external ID of the DTD, or NULL
                   2694:  *
                   2695:  * Returns a new document
                   2696:  */
                   2697: sgmlDocPtr
                   2698: sgmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
                   2699:     if ((URI == NULL) && (ExternalID == NULL))
                   2700:        return(sgmlNewDocNoDtD(
                   2701:                    BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
                   2702:                    BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd"));
                   2703: 
                   2704:     return(sgmlNewDocNoDtD(URI, ExternalID));
                   2705: }
                   2706: 
                   2707: 
                   2708: /************************************************************************
                   2709:  *                                                                     *
                   2710:  *                     The parser itself                               *
                   2711:  *     Relates to http://www.w3.org/TR/docbook                         *
                   2712:  *                                                                     *
                   2713:  ************************************************************************/
                   2714: 
                   2715: /************************************************************************
                   2716:  *                                                                     *
                   2717:  *                     The parser itself                               *
                   2718:  *                                                                     *
                   2719:  ************************************************************************/
                   2720: 
                   2721: /**
                   2722:  * sgmlParseSGMLName:
                   2723:  * @ctxt:  an SGML parser context
                   2724:  *
                   2725:  * parse an SGML tag or attribute name, note that we convert it to lowercase
                   2726:  * since SGML names are not case-sensitive.
                   2727:  *
                   2728:  * Returns the Tag Name parsed or NULL
                   2729:  */
                   2730: 
                   2731: xmlChar *
                   2732: sgmlParseSGMLName(sgmlParserCtxtPtr ctxt) {
                   2733:     xmlChar *ret = NULL;
                   2734:     int i = 0;
                   2735:     xmlChar loc[SGML_PARSER_BUFFER_SIZE];
                   2736: 
                   2737:     if (!IS_LETTER(CUR) && (CUR != '_') &&
                   2738:         (CUR != ':')) return(NULL);
                   2739: 
                   2740:     while ((i < SGML_PARSER_BUFFER_SIZE) &&
                   2741:            ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2742:           (CUR == ':') || (CUR == '_'))) {
                   2743:        if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
                   2744:         else loc[i] = CUR;
                   2745:        i++;
                   2746:        
                   2747:        NEXT;
                   2748:     }
                   2749:     
                   2750:     ret = xmlStrndup(loc, i);
                   2751: 
                   2752:     return(ret);
                   2753: }
                   2754: 
                   2755: /**
                   2756:  * sgmlParseName:
                   2757:  * @ctxt:  an SGML parser context
                   2758:  *
                   2759:  * parse an SGML name, this routine is case sensistive.
                   2760:  *
                   2761:  * Returns the Name parsed or NULL
                   2762:  */
                   2763: 
                   2764: xmlChar *
                   2765: sgmlParseName(sgmlParserCtxtPtr ctxt) {
                   2766:     xmlChar buf[SGML_MAX_NAMELEN];
                   2767:     int len = 0;
                   2768: 
                   2769:     GROW;
                   2770:     if (!IS_LETTER(CUR) && (CUR != '_')) {
                   2771:        return(NULL);
                   2772:     }
                   2773: 
                   2774:     while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2775:            (CUR == '.') || (CUR == '-') ||
                   2776:           (CUR == '_') || (CUR == ':') || 
                   2777:           (IS_COMBINING(CUR)) ||
                   2778:           (IS_EXTENDER(CUR))) {
                   2779:        buf[len++] = CUR;
                   2780:        NEXT;
                   2781:        if (len >= SGML_MAX_NAMELEN) {
1.10    ! veillard 2782:            xmlGenericError(xmlGenericErrorContext, 
1.1       veillard 2783:               "sgmlParseName: reached SGML_MAX_NAMELEN limit\n");
                   2784:            while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2785:                   (CUR == '.') || (CUR == '-') ||
                   2786:                   (CUR == '_') || (CUR == ':') || 
                   2787:                   (IS_COMBINING(CUR)) ||
                   2788:                   (IS_EXTENDER(CUR)))
                   2789:                 NEXT;
                   2790:            break;
                   2791:        }
                   2792:     }
                   2793:     return(xmlStrndup(buf, len));
                   2794: }
                   2795: 
                   2796: /**
                   2797:  * sgmlParseSGMLAttribute:
                   2798:  * @ctxt:  an SGML parser context
                   2799:  * @stop:  a char stop value
                   2800:  * 
                   2801:  * parse an SGML attribute value till the stop (quote), if
                   2802:  * stop is 0 then it stops at the first space
                   2803:  *
                   2804:  * Returns the attribute parsed or NULL
                   2805:  */
                   2806: 
                   2807: xmlChar *
                   2808: sgmlParseSGMLAttribute(sgmlParserCtxtPtr ctxt, const xmlChar stop) {
                   2809: #if 0
                   2810:     xmlChar buf[SGML_MAX_NAMELEN];
                   2811:     int len = 0;
                   2812: 
                   2813:     GROW;
                   2814:     while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
                   2815:        if ((stop == 0) && (IS_BLANK(CUR))) break;
                   2816:        buf[len++] = CUR;
                   2817:        NEXT;
                   2818:        if (len >= SGML_MAX_NAMELEN) {
1.10    ! veillard 2819:            xmlGenericError(xmlGenericErrorContext, 
1.1       veillard 2820:               "sgmlParseSGMLAttribute: reached SGML_MAX_NAMELEN limit\n");
                   2821:            while ((!IS_BLANK(CUR)) && (CUR != '<') &&
                   2822:                   (CUR != '>') &&
                   2823:                   (CUR != '\'') && (CUR != '"'))
                   2824:                 NEXT;
                   2825:            break;
                   2826:        }
                   2827:     }
                   2828:     return(xmlStrndup(buf, len));
                   2829: #else    
                   2830:     xmlChar *buffer = NULL;
                   2831:     int buffer_size = 0;
                   2832:     xmlChar *out = NULL;
                   2833:     xmlChar *name = NULL;
                   2834: 
                   2835:     xmlChar *cur = NULL;
                   2836:     sgmlEntityDescPtr ent;
                   2837: 
                   2838:     /*
                   2839:      * allocate a translation buffer.
                   2840:      */
                   2841:     buffer_size = SGML_PARSER_BIG_BUFFER_SIZE;
                   2842:     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
                   2843:     if (buffer == NULL) {
                   2844:        perror("sgmlParseSGMLAttribute: malloc failed");
                   2845:        return(NULL);
                   2846:     }
                   2847:     out = buffer;
                   2848: 
                   2849:     /*
                   2850:      * Ok loop until we reach one of the ending chars
                   2851:      */
                   2852:     while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
                   2853:        if ((stop == 0) && (IS_BLANK(CUR))) break;
                   2854:         if (CUR == '&') {
                   2855:            if (NXT(1) == '#') {
                   2856:                unsigned int c;
                   2857:                int bits;
                   2858: 
                   2859:                c = sgmlParseCharRef(ctxt);
                   2860:                if      (c <    0x80)
                   2861:                        { *out++  = c;                bits= -6; }
                   2862:                else if (c <   0x800)
                   2863:                        { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   2864:                else if (c < 0x10000)
                   2865:                        { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   2866:                else                 
                   2867:                        { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   2868:         
                   2869:                for ( ; bits >= 0; bits-= 6) {
                   2870:                    *out++  = ((c >> bits) & 0x3F) | 0x80;
                   2871:                }
                   2872:            } else {
                   2873:                ent = sgmlParseEntityRef(ctxt, &name);
                   2874:                if (name == NULL) {
                   2875:                    *out++ = '&';
                   2876:                    if (out - buffer > buffer_size - 100) {
                   2877:                        int index = out - buffer;
                   2878: 
                   2879:                        growBuffer(buffer);
                   2880:                        out = &buffer[index];
                   2881:                    }
                   2882:                } else if (ent == NULL) {
                   2883:                    *out++ = '&';
                   2884:                    cur = name;
                   2885:                    while (*cur != 0) {
                   2886:                        if (out - buffer > buffer_size - 100) {
                   2887:                            int index = out - buffer;
                   2888: 
                   2889:                            growBuffer(buffer);
                   2890:                            out = &buffer[index];
                   2891:                        }
                   2892:                        *out++ = *cur++;
                   2893:                    }
                   2894:                    xmlFree(name);
                   2895:                } else {
                   2896:                    unsigned int c;
                   2897:                    int bits;
                   2898: 
                   2899:                    if (out - buffer > buffer_size - 100) {
                   2900:                        int index = out - buffer;
                   2901: 
                   2902:                        growBuffer(buffer);
                   2903:                        out = &buffer[index];
                   2904:                    }
                   2905:                    c = (xmlChar)ent->value;
                   2906:                    if      (c <    0x80)
                   2907:                        { *out++  = c;                bits= -6; }
                   2908:                    else if (c <   0x800)
                   2909:                        { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   2910:                    else if (c < 0x10000)
                   2911:                        { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   2912:                    else                 
                   2913:                        { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   2914:             
                   2915:                    for ( ; bits >= 0; bits-= 6) {
                   2916:                        *out++  = ((c >> bits) & 0x3F) | 0x80;
                   2917:                    }
                   2918:                    xmlFree(name);
                   2919:                }
                   2920:            }
                   2921:        } else {
                   2922:            unsigned int c;
                   2923:            int bits;
                   2924: 
                   2925:            if (out - buffer > buffer_size - 100) {
                   2926:                int index = out - buffer;
                   2927: 
                   2928:                growBuffer(buffer);
                   2929:                out = &buffer[index];
                   2930:            }
                   2931:            c = CUR;
                   2932:            if      (c <    0x80)
                   2933:                    { *out++  = c;                bits= -6; }
                   2934:            else if (c <   0x800)
                   2935:                    { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   2936:            else if (c < 0x10000)
                   2937:                    { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   2938:            else                 
                   2939:                    { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   2940:      
                   2941:            for ( ; bits >= 0; bits-= 6) {
                   2942:                *out++  = ((c >> bits) & 0x3F) | 0x80;
                   2943:            }
                   2944:            NEXT;
                   2945:        }
                   2946:     }
                   2947:     *out++ = 0;
                   2948:     return(buffer);
                   2949: #endif
                   2950: }
                   2951: 
                   2952: /**
                   2953:  * sgmlParseNmtoken:
                   2954:  * @ctxt:  an SGML parser context
                   2955:  * 
                   2956:  * parse an SGML Nmtoken.
                   2957:  *
                   2958:  * Returns the Nmtoken parsed or NULL
                   2959:  */
                   2960: 
                   2961: xmlChar *
                   2962: sgmlParseNmtoken(sgmlParserCtxtPtr ctxt) {
                   2963:     xmlChar buf[SGML_MAX_NAMELEN];
                   2964:     int len = 0;
                   2965: 
                   2966:     GROW;
                   2967:     while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2968:            (CUR == '.') || (CUR == '-') ||
                   2969:           (CUR == '_') || (CUR == ':') || 
                   2970:           (IS_COMBINING(CUR)) ||
                   2971:           (IS_EXTENDER(CUR))) {
                   2972:        buf[len++] = CUR;
                   2973:        NEXT;
                   2974:        if (len >= SGML_MAX_NAMELEN) {
1.10    ! veillard 2975:            xmlGenericError(xmlGenericErrorContext, 
1.1       veillard 2976:               "sgmlParseNmtoken: reached SGML_MAX_NAMELEN limit\n");
                   2977:            while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
                   2978:                   (CUR == '.') || (CUR == '-') ||
                   2979:                   (CUR == '_') || (CUR == ':') || 
                   2980:                   (IS_COMBINING(CUR)) ||
                   2981:                   (IS_EXTENDER(CUR)))
                   2982:                 NEXT;
                   2983:            break;
                   2984:        }
                   2985:     }
                   2986:     return(xmlStrndup(buf, len));
                   2987: }
                   2988: 
                   2989: /**
                   2990:  * sgmlParseEntityRef:
                   2991:  * @ctxt:  an SGML parser context
                   2992:  * @str:  location to store the entity name
                   2993:  *
                   2994:  * parse an SGML ENTITY references
                   2995:  *
                   2996:  * [68] EntityRef ::= '&' Name ';'
                   2997:  *
                   2998:  * Returns the associated sgmlEntityDescPtr if found, or NULL otherwise,
                   2999:  *         if non-NULL *str will have to be freed by the caller.
                   3000:  */
                   3001: sgmlEntityDescPtr
                   3002: sgmlParseEntityRef(sgmlParserCtxtPtr ctxt, xmlChar **str) {
                   3003:     xmlChar *name;
                   3004:     sgmlEntityDescPtr ent = NULL;
                   3005:     *str = NULL;
                   3006: 
                   3007:     if (CUR == '&') {
                   3008:         NEXT;
                   3009:         name = sgmlParseName(ctxt);
                   3010:        if (name == NULL) {
                   3011:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3012:                ctxt->sax->error(ctxt->userData, "sgmlParseEntityRef: no name\n");
                   3013:            ctxt->wellFormed = 0;
                   3014:        } else {
                   3015:            GROW;
                   3016:            if (CUR == ';') {
                   3017:                *str = name;
                   3018: 
                   3019:                /*
                   3020:                 * Lookup the entity in the table.
                   3021:                 */
                   3022:                ent = sgmlEntityLookup(name);
                   3023:                if (ent != NULL) /* OK that's ugly !!! */
                   3024:                    NEXT;
                   3025:            } else {
                   3026:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3027:                    ctxt->sax->error(ctxt->userData,
                   3028:                                     "sgmlParseEntityRef: expecting ';'\n");
                   3029:                *str = name;
                   3030:            }
                   3031:        }
                   3032:     }
                   3033:     return(ent);
                   3034: }
                   3035: 
                   3036: /**
                   3037:  * sgmlParseAttValue:
                   3038:  * @ctxt:  an SGML parser context
                   3039:  *
                   3040:  * parse a value for an attribute
                   3041:  * Note: the parser won't do substitution of entities here, this
                   3042:  * will be handled later in xmlStringGetNodeList, unless it was
                   3043:  * asked for ctxt->replaceEntities != 0 
                   3044:  *
                   3045:  * Returns the AttValue parsed or NULL.
                   3046:  */
                   3047: 
                   3048: xmlChar *
                   3049: sgmlParseAttValue(sgmlParserCtxtPtr ctxt) {
                   3050:     xmlChar *ret = NULL;
                   3051: 
                   3052:     if (CUR == '"') {
                   3053:         NEXT;
                   3054:        ret = sgmlParseSGMLAttribute(ctxt, '"');
                   3055:         if (CUR != '"') {
                   3056:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3057:                ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
                   3058:            ctxt->wellFormed = 0;
                   3059:        } else
                   3060:            NEXT;
                   3061:     } else if (CUR == '\'') {
                   3062:         NEXT;
                   3063:        ret = sgmlParseSGMLAttribute(ctxt, '\'');
                   3064:         if (CUR != '\'') {
                   3065:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3066:                ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
                   3067:            ctxt->wellFormed = 0;
                   3068:        } else
                   3069:            NEXT;
                   3070:     } else {
                   3071:         /*
                   3072:         * That's an SGMLism, the attribute value may not be quoted
                   3073:         */
                   3074:        ret = sgmlParseSGMLAttribute(ctxt, 0);
                   3075:        if (ret == NULL) {
                   3076:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3077:                ctxt->sax->error(ctxt->userData, "AttValue: no value found\n");
                   3078:            ctxt->wellFormed = 0;
                   3079:        }
                   3080:     }
                   3081:     return(ret);
                   3082: }
                   3083: 
                   3084: /**
                   3085:  * sgmlParseSystemLiteral:
                   3086:  * @ctxt:  an SGML parser context
                   3087:  * 
                   3088:  * parse an SGML Literal
                   3089:  *
                   3090:  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
                   3091:  *
                   3092:  * Returns the SystemLiteral parsed or NULL
                   3093:  */
                   3094: 
                   3095: xmlChar *
                   3096: sgmlParseSystemLiteral(sgmlParserCtxtPtr ctxt) {
                   3097:     const xmlChar *q;
                   3098:     xmlChar *ret = NULL;
                   3099: 
                   3100:     if (CUR == '"') {
                   3101:         NEXT;
                   3102:        q = CUR_PTR;
                   3103:        while ((IS_CHAR(CUR)) && (CUR != '"'))
                   3104:            NEXT;
                   3105:        if (!IS_CHAR(CUR)) {
                   3106:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3107:                ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
                   3108:            ctxt->wellFormed = 0;
                   3109:        } else {
                   3110:            ret = xmlStrndup(q, CUR_PTR - q);
                   3111:            NEXT;
                   3112:         }
                   3113:     } else if (CUR == '\'') {
                   3114:         NEXT;
                   3115:        q = CUR_PTR;
                   3116:        while ((IS_CHAR(CUR)) && (CUR != '\''))
                   3117:            NEXT;
                   3118:        if (!IS_CHAR(CUR)) {
                   3119:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3120:                ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
                   3121:            ctxt->wellFormed = 0;
                   3122:        } else {
                   3123:            ret = xmlStrndup(q, CUR_PTR - q);
                   3124:            NEXT;
                   3125:         }
                   3126:     } else {
                   3127:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3128:            ctxt->sax->error(ctxt->userData,
                   3129:                             "SystemLiteral \" or ' expected\n");
                   3130:        ctxt->wellFormed = 0;
                   3131:     }
                   3132:     
                   3133:     return(ret);
                   3134: }
                   3135: 
                   3136: /**
                   3137:  * sgmlParsePubidLiteral:
                   3138:  * @ctxt:  an SGML parser context
                   3139:  *
                   3140:  * parse an SGML public literal
                   3141:  *
                   3142:  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
                   3143:  *
                   3144:  * Returns the PubidLiteral parsed or NULL.
                   3145:  */
                   3146: 
                   3147: xmlChar *
                   3148: sgmlParsePubidLiteral(sgmlParserCtxtPtr ctxt) {
                   3149:     const xmlChar *q;
                   3150:     xmlChar *ret = NULL;
                   3151:     /*
                   3152:      * Name ::= (Letter | '_') (NameChar)*
                   3153:      */
                   3154:     if (CUR == '"') {
                   3155:         NEXT;
                   3156:        q = CUR_PTR;
                   3157:        while (IS_PUBIDCHAR(CUR)) NEXT;
                   3158:        if (CUR != '"') {
                   3159:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3160:                ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
                   3161:            ctxt->wellFormed = 0;
                   3162:        } else {
                   3163:            ret = xmlStrndup(q, CUR_PTR - q);
                   3164:            NEXT;
                   3165:        }
                   3166:     } else if (CUR == '\'') {
                   3167:         NEXT;
                   3168:        q = CUR_PTR;
                   3169:        while ((IS_LETTER(CUR)) && (CUR != '\''))
                   3170:            NEXT;
                   3171:        if (!IS_LETTER(CUR)) {
                   3172:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3173:                ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
                   3174:            ctxt->wellFormed = 0;
                   3175:        } else {
                   3176:            ret = xmlStrndup(q, CUR_PTR - q);
                   3177:            NEXT;
                   3178:        }
                   3179:     } else {
                   3180:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3181:            ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
                   3182:        ctxt->wellFormed = 0;
                   3183:     }
                   3184:     
                   3185:     return(ret);
                   3186: }
                   3187: 
                   3188: /**
                   3189:  * sgmlParseCharData:
                   3190:  * @ctxt:  an SGML parser context
                   3191:  * @cdata:  int indicating whether we are within a CDATA section
                   3192:  *
                   3193:  * parse a CharData section.
                   3194:  * if we are within a CDATA section ']]>' marks an end of section.
                   3195:  *
                   3196:  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
                   3197:  */
                   3198: 
                   3199: void
                   3200: sgmlParseCharData(sgmlParserCtxtPtr ctxt, int cdata) {
                   3201:     xmlChar buf[SGML_PARSER_BIG_BUFFER_SIZE + 5];
                   3202:     int nbchar = 0;
                   3203:     int cur, l;
                   3204: 
                   3205:     SHRINK;
                   3206:     cur = CUR_CHAR(l);
                   3207:     while (((cur != '<') || (ctxt->token == '<')) &&
                   3208:            ((cur != '&') || (ctxt->token == '&')) && 
                   3209:           (IS_CHAR(cur))) {
                   3210:        COPY_BUF(l,buf,nbchar,cur);
                   3211:        if (nbchar >= SGML_PARSER_BIG_BUFFER_SIZE) {
                   3212:            /*
                   3213:             * Ok the segment is to be consumed as chars.
                   3214:             */
                   3215:            if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
                   3216:                if (areBlanks(ctxt, buf, nbchar)) {
                   3217:                    if (ctxt->sax->ignorableWhitespace != NULL)
                   3218:                        ctxt->sax->ignorableWhitespace(ctxt->userData,
                   3219:                                                       buf, nbchar);
                   3220:                } else {
                   3221:                    sgmlCheckParagraph(ctxt);
                   3222:                    if (ctxt->sax->characters != NULL)
                   3223:                        ctxt->sax->characters(ctxt->userData, buf, nbchar);
                   3224:                }
                   3225:            }
                   3226:            nbchar = 0;
                   3227:        }
                   3228:        NEXTL(l);
                   3229:        cur = CUR_CHAR(l);
                   3230:     }
                   3231:     if (nbchar != 0) {
                   3232:        /*
                   3233:         * Ok the segment is to be consumed as chars.
                   3234:         */
                   3235:        if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
                   3236:            if (areBlanks(ctxt, buf, nbchar)) {
                   3237:                if (ctxt->sax->ignorableWhitespace != NULL)
                   3238:                    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
                   3239:            } else {
                   3240:                sgmlCheckParagraph(ctxt);
                   3241:                if (ctxt->sax->characters != NULL)
                   3242:                    ctxt->sax->characters(ctxt->userData, buf, nbchar);
                   3243:            }
                   3244:        }
                   3245:     }
                   3246: }
                   3247: 
                   3248: /**
                   3249:  * sgmlParseExternalID:
                   3250:  * @ctxt:  an SGML parser context
                   3251:  * @publicID:  a xmlChar** receiving PubidLiteral
                   3252:  * @strict: indicate whether we should restrict parsing to only
                   3253:  *          production [75], see NOTE below
                   3254:  *
                   3255:  * Parse an External ID or a Public ID
                   3256:  *
                   3257:  * NOTE: Productions [75] and [83] interract badly since [75] can generate
                   3258:  *       'PUBLIC' S PubidLiteral S SystemLiteral
                   3259:  *
                   3260:  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
                   3261:  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
                   3262:  *
                   3263:  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
                   3264:  *
                   3265:  * Returns the function returns SystemLiteral and in the second
                   3266:  *                case publicID receives PubidLiteral, is strict is off
                   3267:  *                it is possible to return NULL and have publicID set.
                   3268:  */
                   3269: 
                   3270: xmlChar *
                   3271: sgmlParseExternalID(sgmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
                   3272:     xmlChar *URI = NULL;
                   3273: 
                   3274:     if ((UPPER == 'S') && (UPP(1) == 'Y') &&
                   3275:          (UPP(2) == 'S') && (UPP(3) == 'T') &&
                   3276:         (UPP(4) == 'E') && (UPP(5) == 'M')) {
                   3277:         SKIP(6);
                   3278:        if (!IS_BLANK(CUR)) {
                   3279:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3280:                ctxt->sax->error(ctxt->userData,
                   3281:                    "Space required after 'SYSTEM'\n");
                   3282:            ctxt->wellFormed = 0;
                   3283:        }
                   3284:         SKIP_BLANKS;
                   3285:        URI = sgmlParseSystemLiteral(ctxt);
                   3286:        if (URI == NULL) {
                   3287:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3288:                ctxt->sax->error(ctxt->userData,
                   3289:                  "sgmlParseExternalID: SYSTEM, no URI\n");
                   3290:            ctxt->wellFormed = 0;
                   3291:         }
                   3292:     } else if ((UPPER == 'P') && (UPP(1) == 'U') &&
                   3293:               (UPP(2) == 'B') && (UPP(3) == 'L') &&
                   3294:               (UPP(4) == 'I') && (UPP(5) == 'C')) {
                   3295:         SKIP(6);
                   3296:        if (!IS_BLANK(CUR)) {
                   3297:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3298:                ctxt->sax->error(ctxt->userData,
                   3299:                    "Space required after 'PUBLIC'\n");
                   3300:            ctxt->wellFormed = 0;
                   3301:        }
                   3302:         SKIP_BLANKS;
                   3303:        *publicID = sgmlParsePubidLiteral(ctxt);
                   3304:        if (*publicID == NULL) {
                   3305:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3306:                ctxt->sax->error(ctxt->userData, 
                   3307:                  "sgmlParseExternalID: PUBLIC, no Public Identifier\n");
                   3308:            ctxt->wellFormed = 0;
                   3309:        }
                   3310:         SKIP_BLANKS;
                   3311:         if ((CUR == '"') || (CUR == '\'')) {
                   3312:            URI = sgmlParseSystemLiteral(ctxt);
                   3313:        }
                   3314:     }
                   3315:     return(URI);
                   3316: }
                   3317: 
                   3318: /**
                   3319:  * sgmlParseComment:
                   3320:  * @ctxt:  an SGML parser context
                   3321:  *
                   3322:  * Parse an XML (SGML) comment <!-- .... -->
                   3323:  *
                   3324:  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
                   3325:  */
                   3326: void
                   3327: sgmlParseComment(sgmlParserCtxtPtr ctxt) {
                   3328:     xmlChar *buf = NULL;
                   3329:     int len;
                   3330:     int size = SGML_PARSER_BUFFER_SIZE;
                   3331:     int q, ql;
                   3332:     int r, rl;
                   3333:     int cur, l;
                   3334:     xmlParserInputState state;
                   3335: 
                   3336:     /*
                   3337:      * Check that there is a comment right here.
                   3338:      */
                   3339:     if ((RAW != '<') || (NXT(1) != '!') ||
                   3340:         (NXT(2) != '-') || (NXT(3) != '-')) return;
                   3341: 
                   3342:     state = ctxt->instate;
                   3343:     ctxt->instate = XML_PARSER_COMMENT;
                   3344:     SHRINK;
                   3345:     SKIP(4);
                   3346:     buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
                   3347:     if (buf == NULL) {
1.10    ! veillard 3348:        xmlGenericError(xmlGenericErrorContext,
        !          3349:                "malloc of %d byte failed\n", size);
1.1       veillard 3350:        ctxt->instate = state;
                   3351:        return;
                   3352:     }
                   3353:     q = CUR_CHAR(ql);
                   3354:     NEXTL(ql);
                   3355:     r = CUR_CHAR(rl);
                   3356:     NEXTL(rl);
                   3357:     cur = CUR_CHAR(l);
                   3358:     len = 0;
                   3359:     while (IS_CHAR(cur) &&
                   3360:            ((cur != '>') ||
                   3361:            (r != '-') || (q != '-'))) {
                   3362:        if (len + 5 >= size) {
                   3363:            size *= 2;
                   3364:            buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
                   3365:            if (buf == NULL) {
1.10    ! veillard 3366:                xmlGenericError(xmlGenericErrorContext,
        !          3367:                        "realloc of %d byte failed\n", size);
1.1       veillard 3368:                ctxt->instate = state;
                   3369:                return;
                   3370:            }
                   3371:        }
                   3372:        COPY_BUF(ql,buf,len,q);
                   3373:        q = r;
                   3374:        ql = rl;
                   3375:        r = cur;
                   3376:        rl = l;
                   3377:        NEXTL(l);
                   3378:        cur = CUR_CHAR(l);
                   3379:        if (cur == 0) {
                   3380:            SHRINK;
                   3381:            GROW;
                   3382:            cur = CUR_CHAR(l);
                   3383:        }
                   3384:     }
                   3385:     buf[len] = 0;
                   3386:     if (!IS_CHAR(cur)) {
1.6       veillard 3387:        ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.1       veillard 3388:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3389:            ctxt->sax->error(ctxt->userData,
                   3390:                             "Comment not terminated \n<!--%.50s\n", buf);
                   3391:        ctxt->wellFormed = 0;
                   3392:        xmlFree(buf);
                   3393:     } else {
                   3394:         NEXT;
                   3395:        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
                   3396:            (!ctxt->disableSAX))
                   3397:            ctxt->sax->comment(ctxt->userData, buf);
                   3398:        xmlFree(buf);
                   3399:     }
                   3400:     ctxt->instate = state;
                   3401: }
                   3402: 
                   3403: /**
                   3404:  * sgmlParseCharRef:
                   3405:  * @ctxt:  an SGML parser context
                   3406:  *
                   3407:  * parse Reference declarations
                   3408:  *
                   3409:  * [66] CharRef ::= '&#' [0-9]+ ';' |
                   3410:  *                  '&#x' [0-9a-fA-F]+ ';'
                   3411:  *
                   3412:  * Returns the value parsed (as an int)
                   3413:  */
                   3414: int
                   3415: sgmlParseCharRef(sgmlParserCtxtPtr ctxt) {
                   3416:     int val = 0;
                   3417: 
                   3418:     if ((CUR == '&') && (NXT(1) == '#') &&
                   3419:         (NXT(2) == 'x')) {
                   3420:        SKIP(3);
                   3421:        while (CUR != ';') {
                   3422:            if ((CUR >= '0') && (CUR <= '9')) 
                   3423:                val = val * 16 + (CUR - '0');
                   3424:            else if ((CUR >= 'a') && (CUR <= 'f'))
                   3425:                val = val * 16 + (CUR - 'a') + 10;
                   3426:            else if ((CUR >= 'A') && (CUR <= 'F'))
                   3427:                val = val * 16 + (CUR - 'A') + 10;
                   3428:            else {
                   3429:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3430:                    ctxt->sax->error(ctxt->userData, 
                   3431:                         "sgmlParseCharRef: invalid hexadecimal value\n");
                   3432:                ctxt->wellFormed = 0;
                   3433:                val = 0;
                   3434:                break;
                   3435:            }
                   3436:            NEXT;
                   3437:        }
                   3438:        if (CUR == ';')
                   3439:            NEXT;
                   3440:     } else if  ((CUR == '&') && (NXT(1) == '#')) {
                   3441:        SKIP(2);
                   3442:        while (CUR != ';') {
                   3443:            if ((CUR >= '0') && (CUR <= '9')) 
                   3444:                val = val * 10 + (CUR - '0');
                   3445:            else {
                   3446:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3447:                    ctxt->sax->error(ctxt->userData, 
                   3448:                         "sgmlParseCharRef: invalid decimal value\n");
                   3449:                ctxt->wellFormed = 0;
                   3450:                val = 0;
                   3451:                break;
                   3452:            }
                   3453:            NEXT;
                   3454:        }
                   3455:        if (CUR == ';')
                   3456:            NEXT;
                   3457:     } else {
                   3458:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3459:            ctxt->sax->error(ctxt->userData, "sgmlParseCharRef: invalid value\n");
                   3460:        ctxt->wellFormed = 0;
                   3461:     }
                   3462:     /*
                   3463:      * Check the value IS_CHAR ...
                   3464:      */
                   3465:     if (IS_CHAR(val)) {
                   3466:         return(val);
                   3467:     } else {
                   3468:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3469:            ctxt->sax->error(ctxt->userData, "sgmlParseCharRef: invalid xmlChar value %d\n",
                   3470:                             val);
                   3471:        ctxt->wellFormed = 0;
                   3472:     }
                   3473:     return(0);
                   3474: }
                   3475: 
                   3476: 
                   3477: /**
                   3478:  * sgmlParseDocTypeDecl :
                   3479:  * @ctxt:  an SGML parser context
                   3480:  *
                   3481:  * parse a DOCTYPE declaration
                   3482:  *
                   3483:  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 
                   3484:  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
                   3485:  */
                   3486: 
                   3487: void
                   3488: sgmlParseDocTypeDecl(sgmlParserCtxtPtr ctxt) {
                   3489:     xmlChar *name;
                   3490:     xmlChar *ExternalID = NULL;
                   3491:     xmlChar *URI = NULL;
                   3492: 
                   3493:     /*
                   3494:      * We know that '<!DOCTYPE' has been detected.
                   3495:      */
                   3496:     SKIP(9);
                   3497: 
                   3498:     SKIP_BLANKS;
                   3499: 
                   3500:     /*
                   3501:      * Parse the DOCTYPE name.
                   3502:      */
                   3503:     name = sgmlParseName(ctxt);
                   3504:     if (name == NULL) {
                   3505:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3506:            ctxt->sax->error(ctxt->userData, "sgmlParseDocTypeDecl : no DOCTYPE name !\n");
                   3507:        ctxt->wellFormed = 0;
                   3508:     }
                   3509:     /*
                   3510:      * Check that upper(name) == "SGML" !!!!!!!!!!!!!
                   3511:      */
                   3512: 
                   3513:     SKIP_BLANKS;
                   3514: 
                   3515:     /*
                   3516:      * Check for SystemID and ExternalID
                   3517:      */
                   3518:     URI = sgmlParseExternalID(ctxt, &ExternalID, 0);
                   3519:     SKIP_BLANKS;
                   3520: 
                   3521:     /*
1.2       veillard 3522:      * Create or update the document accordingly to the DOCTYPE
                   3523:      */
                   3524:     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
                   3525:        (!ctxt->disableSAX))
                   3526:        ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
                   3527: 
                   3528:     /*
                   3529:      * Is there any internal subset declarations ?
                   3530:      * they are handled separately in sgmlParseInternalSubset()
                   3531:      */
                   3532:     if (RAW == '[')
                   3533:        return;
                   3534: 
                   3535: 
                   3536:     /*
1.1       veillard 3537:      * We should be at the end of the DOCTYPE declaration.
                   3538:      */
                   3539:     if (CUR != '>') {
                   3540:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3541:            ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
                   3542:        ctxt->wellFormed = 0;
                   3543:         /* We shouldn't try to resynchronize ... */
                   3544:     }
                   3545:     NEXT;
                   3546: 
                   3547:     /*
                   3548:      * Cleanup, since we don't use all those identifiers
                   3549:      */
                   3550:     if (URI != NULL) xmlFree(URI);
                   3551:     if (ExternalID != NULL) xmlFree(ExternalID);
                   3552:     if (name != NULL) xmlFree(name);
                   3553: }
                   3554: 
                   3555: /**
                   3556:  * sgmlParseAttribute:
                   3557:  * @ctxt:  an SGML parser context
                   3558:  * @value:  a xmlChar ** used to store the value of the attribute
                   3559:  *
                   3560:  * parse an attribute
                   3561:  *
                   3562:  * [41] Attribute ::= Name Eq AttValue
                   3563:  *
                   3564:  * [25] Eq ::= S? '=' S?
                   3565:  *
                   3566:  * With namespace:
                   3567:  *
                   3568:  * [NS 11] Attribute ::= QName Eq AttValue
                   3569:  *
                   3570:  * Also the case QName == xmlns:??? is handled independently as a namespace
                   3571:  * definition.
                   3572:  *
                   3573:  * Returns the attribute name, and the value in *value.
                   3574:  */
                   3575: 
                   3576: xmlChar *
                   3577: sgmlParseAttribute(sgmlParserCtxtPtr ctxt, xmlChar **value) {
                   3578:     xmlChar *name, *val = NULL;
                   3579: 
                   3580:     *value = NULL;
                   3581:     name = sgmlParseName(ctxt);
                   3582:     if (name == NULL) {
                   3583:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3584:            ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
                   3585:        ctxt->wellFormed = 0;
                   3586:         return(NULL);
                   3587:     }
                   3588: 
                   3589:     /*
                   3590:      * read the value
                   3591:      */
                   3592:     SKIP_BLANKS;
                   3593:     if (CUR == '=') {
                   3594:         NEXT;
                   3595:        SKIP_BLANKS;
                   3596:        val = sgmlParseAttValue(ctxt);
                   3597:        /******
                   3598:     } else {
                   3599:         * TODO : some attribute must have values, some may not
                   3600:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3601:            ctxt->sax->warning(ctxt->userData,
                   3602:               "No value for attribute %s\n", name); */
                   3603:     }
                   3604: 
                   3605:     *value = val;
                   3606:     return(name);
                   3607: }
                   3608: 
                   3609: /**
                   3610:  * sgmlCheckEncoding:
                   3611:  * @ctxt:  an SGML parser context
                   3612:  * @attvalue: the attribute value
                   3613:  *
                   3614:  * Checks an http-equiv attribute from a Meta tag to detect
                   3615:  * the encoding
                   3616:  * If a new encoding is detected the parser is switched to decode
                   3617:  * it and pass UTF8
                   3618:  */
                   3619: void
                   3620: sgmlCheckEncoding(sgmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
                   3621:     const xmlChar *encoding;
                   3622: 
                   3623:     if ((ctxt == NULL) || (attvalue == NULL))
                   3624:        return;
                   3625: 
                   3626:     encoding = xmlStrstr(attvalue, BAD_CAST"charset=");
                   3627:     if (encoding == NULL) 
                   3628:        encoding = xmlStrstr(attvalue, BAD_CAST"Charset=");
                   3629:     if (encoding == NULL) 
                   3630:        encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET=");
                   3631:     if (encoding != NULL) {
                   3632:        encoding += 8;
                   3633:     } else {
                   3634:        encoding = xmlStrstr(attvalue, BAD_CAST"charset =");
                   3635:        if (encoding == NULL) 
                   3636:            encoding = xmlStrstr(attvalue, BAD_CAST"Charset =");
                   3637:        if (encoding == NULL) 
                   3638:            encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET =");
                   3639:        if (encoding != NULL)
                   3640:            encoding += 9;
                   3641:     }
                   3642:     if (encoding != NULL) {
                   3643:        xmlCharEncoding enc;
                   3644:        xmlCharEncodingHandlerPtr handler;
                   3645: 
                   3646:        while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
                   3647: 
                   3648:        if (ctxt->input->encoding != NULL)
                   3649:            xmlFree((xmlChar *) ctxt->input->encoding);
                   3650:        ctxt->input->encoding = xmlStrdup(encoding);
                   3651: 
                   3652:        enc = xmlParseCharEncoding((const char *) encoding);
                   3653:        /*
                   3654:         * registered set of known encodings
                   3655:         */
                   3656:        if (enc != XML_CHAR_ENCODING_ERROR) {
                   3657:            xmlSwitchEncoding(ctxt, enc);
                   3658:            ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   3659:        } else {
                   3660:            /*
                   3661:             * fallback for unknown encodings
                   3662:             */
                   3663:            handler = xmlFindCharEncodingHandler((const char *) encoding);
                   3664:            if (handler != NULL) {
                   3665:                xmlSwitchToEncoding(ctxt, handler);
                   3666:                ctxt->charset = XML_CHAR_ENCODING_UTF8;
                   3667:            } else {
                   3668:                ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
                   3669:            }
                   3670:        }
                   3671: 
                   3672:        if ((ctxt->input->buf != NULL) &&
                   3673:            (ctxt->input->buf->encoder != NULL) &&
                   3674:            (ctxt->input->buf->raw != NULL) &&
                   3675:            (ctxt->input->buf->buffer != NULL)) {
                   3676:            int nbchars;
                   3677:            int processed;
                   3678: 
                   3679:            /*
                   3680:             * convert as much as possible to the parser reading buffer.
                   3681:             */
                   3682:            processed = ctxt->input->cur - ctxt->input->base;
                   3683:            xmlBufferShrink(ctxt->input->buf->buffer, processed);
                   3684:            nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
                   3685:                                       ctxt->input->buf->buffer,
                   3686:                                       ctxt->input->buf->raw);
                   3687:            if (nbchars < 0) {
1.6       veillard 3688:                ctxt->errNo = XML_ERR_INVALID_ENCODING;
1.1       veillard 3689:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3690:                    ctxt->sax->error(ctxt->userData, 
                   3691:                     "sgmlCheckEncoding: encoder error\n");
                   3692:            }
                   3693:            ctxt->input->base =
                   3694:            ctxt->input->cur = ctxt->input->buf->buffer->content;
                   3695:        }
                   3696:     }
                   3697: }
                   3698: 
                   3699: /**
                   3700:  * sgmlCheckMeta:
                   3701:  * @ctxt:  an SGML parser context
                   3702:  * @atts:  the attributes values
                   3703:  *
                   3704:  * Checks an attributes from a Meta tag
                   3705:  */
                   3706: void
                   3707: sgmlCheckMeta(sgmlParserCtxtPtr ctxt, const xmlChar **atts) {
                   3708:     int i;
                   3709:     const xmlChar *att, *value;
                   3710:     int http = 0;
                   3711:     const xmlChar *content = NULL;
                   3712: 
                   3713:     if ((ctxt == NULL) || (atts == NULL))
                   3714:        return;
                   3715: 
                   3716:     i = 0;
                   3717:     att = atts[i++];
                   3718:     while (att != NULL) {
                   3719:        value = atts[i++];
                   3720:        if ((value != NULL) &&
1.7       veillard 3721:            ((xmlStrEqual(att, BAD_CAST"http-equiv")) ||
                   3722:             (xmlStrEqual(att, BAD_CAST"Http-Equiv")) ||
                   3723:             (xmlStrEqual(att, BAD_CAST"HTTP-EQUIV"))) &&
                   3724:            ((xmlStrEqual(value, BAD_CAST"Content-Type")) ||
                   3725:             (xmlStrEqual(value, BAD_CAST"content-type")) ||
                   3726:             (xmlStrEqual(value, BAD_CAST"CONTENT-TYPE"))))
1.1       veillard 3727:            http = 1;
                   3728:        else if ((value != NULL) &&
1.7       veillard 3729:                 ((xmlStrEqual(att, BAD_CAST"content")) ||
                   3730:                  (xmlStrEqual(att, BAD_CAST"Content")) ||
                   3731:                  (xmlStrEqual(att, BAD_CAST"CONTENT"))))
1.1       veillard 3732:            content = value;
                   3733:        att = atts[i++];
                   3734:     }
                   3735:     if ((http) && (content != NULL))
                   3736:        sgmlCheckEncoding(ctxt, content);
                   3737: 
                   3738: }
                   3739: 
                   3740: /**
                   3741:  * sgmlParseStartTag:
                   3742:  * @ctxt:  an SGML parser context
                   3743:  * 
                   3744:  * parse a start of tag either for rule element or
                   3745:  * EmptyElement. In both case we don't parse the tag closing chars.
                   3746:  *
                   3747:  * [40] STag ::= '<' Name (S Attribute)* S? '>'
                   3748:  *
                   3749:  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
                   3750:  *
                   3751:  * With namespace:
                   3752:  *
                   3753:  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
                   3754:  *
                   3755:  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
                   3756:  *
                   3757:  */
                   3758: 
                   3759: void
                   3760: sgmlParseStartTag(sgmlParserCtxtPtr ctxt) {
                   3761:     xmlChar *name;
                   3762:     xmlChar *attname;
                   3763:     xmlChar *attvalue;
                   3764:     const xmlChar **atts = NULL;
                   3765:     int nbatts = 0;
                   3766:     int maxatts = 0;
                   3767:     int meta = 0;
                   3768:     int i;
                   3769: 
                   3770:     if (CUR != '<') return;
                   3771:     NEXT;
                   3772: 
                   3773:     GROW;
                   3774:     name = sgmlParseSGMLName(ctxt);
                   3775:     if (name == NULL) {
                   3776:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3777:            ctxt->sax->error(ctxt->userData, 
                   3778:             "sgmlParseStartTag: invalid element name\n");
                   3779:        ctxt->wellFormed = 0;
                   3780:         return;
                   3781:     }
1.7       veillard 3782:     if (xmlStrEqual(name, BAD_CAST"meta"))
1.1       veillard 3783:        meta = 1;
                   3784: 
                   3785:     /*
                   3786:      * Check for auto-closure of SGML elements.
                   3787:      */
                   3788:     sgmlAutoClose(ctxt, name);
                   3789: 
                   3790:     /*
                   3791:      * Check for implied SGML elements.
                   3792:      */
                   3793:     sgmlCheckImplied(ctxt, name);
                   3794: 
                   3795:     /*
                   3796:      * Now parse the attributes, it ends up with the ending
                   3797:      *
                   3798:      * (S Attribute)* S?
                   3799:      */
                   3800:     SKIP_BLANKS;
                   3801:     while ((IS_CHAR(CUR)) &&
                   3802:            (CUR != '>') && 
                   3803:           ((CUR != '/') || (NXT(1) != '>'))) {
                   3804:        long cons = ctxt->nbChars;
                   3805: 
                   3806:        GROW;
                   3807:        attname = sgmlParseAttribute(ctxt, &attvalue);
                   3808:         if (attname != NULL) {
                   3809: 
                   3810:            /*
                   3811:             * Well formedness requires at most one declaration of an attribute
                   3812:             */
                   3813:            for (i = 0; i < nbatts;i += 2) {
1.7       veillard 3814:                if (xmlStrEqual(atts[i], attname)) {
1.1       veillard 3815:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3816:                        ctxt->sax->error(ctxt->userData,
                   3817:                                         "Attribute %s redefined\n",
                   3818:                                         attname);
                   3819:                    ctxt->wellFormed = 0;
                   3820:                    xmlFree(attname);
                   3821:                    if (attvalue != NULL)
                   3822:                        xmlFree(attvalue);
                   3823:                    goto failed;
                   3824:                }
                   3825:            }
                   3826: 
                   3827:            /*
                   3828:             * Add the pair to atts
                   3829:             */
                   3830:            if (atts == NULL) {
                   3831:                maxatts = 10;
                   3832:                atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
                   3833:                if (atts == NULL) {
1.10    ! veillard 3834:                    xmlGenericError(xmlGenericErrorContext,
        !          3835:                            "malloc of %ld byte failed\n",
1.1       veillard 3836:                            maxatts * (long)sizeof(xmlChar *));
                   3837:                    if (name != NULL) xmlFree(name);
                   3838:                    return;
                   3839:                }
                   3840:            } else if (nbatts + 4 > maxatts) {
                   3841:                maxatts *= 2;
                   3842:                atts = (const xmlChar **) xmlRealloc(atts, maxatts * sizeof(xmlChar *));
                   3843:                if (atts == NULL) {
1.10    ! veillard 3844:                    xmlGenericError(xmlGenericErrorContext,
        !          3845:                            "realloc of %ld byte failed\n",
1.1       veillard 3846:                            maxatts * (long)sizeof(xmlChar *));
                   3847:                    if (name != NULL) xmlFree(name);
                   3848:                    return;
                   3849:                }
                   3850:            }
                   3851:            atts[nbatts++] = attname;
                   3852:            atts[nbatts++] = attvalue;
                   3853:            atts[nbatts] = NULL;
                   3854:            atts[nbatts + 1] = NULL;
                   3855:        }
                   3856: 
                   3857: failed:
                   3858:        SKIP_BLANKS;
                   3859:         if (cons == ctxt->nbChars) {
                   3860:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3861:                ctxt->sax->error(ctxt->userData, 
                   3862:                 "sgmlParseStartTag: problem parsing attributes\n");
                   3863:            ctxt->wellFormed = 0;
                   3864:            break;
                   3865:        }
                   3866:     }
                   3867: 
                   3868:     /*
                   3869:      * Handle specific association to the META tag
                   3870:      */
                   3871:     if (meta)
                   3872:        sgmlCheckMeta(ctxt, atts);
                   3873: 
                   3874:     /*
                   3875:      * SAX: Start of Element !
                   3876:      */
                   3877:     sgmlnamePush(ctxt, xmlStrdup(name));
                   3878: #ifdef DEBUG
1.10    ! veillard 3879:     xmlGenericError(xmlGenericErrorContext,"Start of element %s: pushed %s\n", name, ctxt->name);
1.1       veillard 3880: #endif    
                   3881:     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
                   3882:         ctxt->sax->startElement(ctxt->userData, name, atts);
                   3883: 
                   3884:     if (atts != NULL) {
                   3885:         for (i = 0;i < nbatts;i++) {
                   3886:            if (atts[i] != NULL)
                   3887:                xmlFree((xmlChar *) atts[i]);
                   3888:        }
                   3889:        xmlFree((void *) atts);
                   3890:     }
                   3891:     if (name != NULL) xmlFree(name);
                   3892: }
                   3893: 
                   3894: /**
                   3895:  * sgmlParseEndTag:
                   3896:  * @ctxt:  an SGML parser context
                   3897:  *
                   3898:  * parse an end of tag
                   3899:  *
                   3900:  * [42] ETag ::= '</' Name S? '>'
                   3901:  *
                   3902:  * With namespace
                   3903:  *
                   3904:  * [NS 9] ETag ::= '</' QName S? '>'
                   3905:  */
                   3906: 
                   3907: void
                   3908: sgmlParseEndTag(sgmlParserCtxtPtr ctxt) {
                   3909:     xmlChar *name;
                   3910:     xmlChar *oldname;
                   3911:     int i;
                   3912: 
                   3913:     if ((CUR != '<') || (NXT(1) != '/')) {
                   3914:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3915:            ctxt->sax->error(ctxt->userData, "sgmlParseEndTag: '</' not found\n");
                   3916:        ctxt->wellFormed = 0;
                   3917:        return;
                   3918:     }
                   3919:     SKIP(2);
                   3920: 
                   3921:     name = sgmlParseSGMLName(ctxt);
                   3922:     if (name == NULL) {
                   3923:        if (CUR == '>') {
                   3924:            NEXT;
                   3925:            oldname = sgmlnamePop(ctxt);
                   3926:            if (oldname != NULL) {
                   3927:                if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   3928:                    ctxt->sax->endElement(ctxt->userData, name);
                   3929: #ifdef DEBUG
1.10    ! veillard 3930:                xmlGenericError(xmlGenericErrorContext,"End of tag </>: popping out %s\n", oldname);
1.1       veillard 3931: #endif
                   3932:                xmlFree(oldname);
                   3933: #ifdef DEBUG
                   3934:            } else {
1.10    ! veillard 3935:                xmlGenericError(xmlGenericErrorContext,"End of tag </>: stack empty !!!\n");
1.1       veillard 3936: #endif
                   3937:            }
                   3938:            return;
                   3939:        } else
                   3940:            return;
                   3941:     }
                   3942: 
                   3943:     /*
                   3944:      * We should definitely be at the ending "S? '>'" part
                   3945:      */
                   3946:     SKIP_BLANKS;
                   3947:     if ((!IS_CHAR(CUR)) || (CUR != '>')) {
                   3948:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3949:            ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
                   3950:        ctxt->wellFormed = 0;
                   3951:     } else
                   3952:        NEXT;
                   3953: 
                   3954:     /*
                   3955:      * If the name read is not one of the element in the parsing stack
                   3956:      * then return, it's just an error.
                   3957:      */
                   3958:     for (i = (ctxt->nameNr - 1);i >= 0;i--) {
1.7       veillard 3959:         if (xmlStrEqual(name, ctxt->nameTab[i])) break;
1.1       veillard 3960:     }
                   3961:     if (i < 0) {
                   3962:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3963:            ctxt->sax->error(ctxt->userData,
                   3964:             "Unexpected end tag : %s\n", name);
                   3965:        xmlFree(name);
                   3966:        ctxt->wellFormed = 0;
                   3967:        return;
                   3968:     }
                   3969: 
                   3970: 
                   3971:     /*
                   3972:      * Check for auto-closure of SGML elements.
                   3973:      */
                   3974: 
                   3975:     sgmlAutoCloseOnClose(ctxt, name);
                   3976: 
                   3977:     /*
                   3978:      * Well formedness constraints, opening and closing must match.
                   3979:      * With the exception that the autoclose may have popped stuff out
                   3980:      * of the stack.
                   3981:      */
                   3982:     if (((name[0] != '/') || (name[1] != 0)) &&
1.7       veillard 3983:        (!xmlStrEqual(name, ctxt->name))) {
1.1       veillard 3984: #ifdef DEBUG
1.10    ! veillard 3985:        xmlGenericError(xmlGenericErrorContext,"End of tag %s: expecting %s\n", name, ctxt->name);
1.1       veillard 3986: #endif
                   3987:         if ((ctxt->name != NULL) && 
1.7       veillard 3988:            (!xmlStrEqual(ctxt->name, name))) {
1.1       veillard 3989:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   3990:                ctxt->sax->error(ctxt->userData,
                   3991:                 "Opening and ending tag mismatch: %s and %s\n",
                   3992:                                 name, ctxt->name);
                   3993:            ctxt->wellFormed = 0;
                   3994:         }
                   3995:     }
                   3996: 
                   3997:     /*
                   3998:      * SAX: End of Tag
                   3999:      */
                   4000:     oldname = ctxt->name;
                   4001:     if (((name[0] == '/') && (name[1] == 0)) ||
1.7       veillard 4002:        ((oldname != NULL) && (xmlStrEqual(oldname, name)))) {
1.1       veillard 4003:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   4004:            ctxt->sax->endElement(ctxt->userData, name);
                   4005:        oldname = sgmlnamePop(ctxt);
                   4006:        if (oldname != NULL) {
                   4007: #ifdef DEBUG
1.10    ! veillard 4008:            xmlGenericError(xmlGenericErrorContext,"End of tag %s: popping out %s\n", name, oldname);
1.1       veillard 4009: #endif
                   4010:            xmlFree(oldname);
                   4011: #ifdef DEBUG
                   4012:        } else {
1.10    ! veillard 4013:            xmlGenericError(xmlGenericErrorContext,"End of tag %s: stack empty !!!\n", name);
1.1       veillard 4014: #endif
                   4015:        }
                   4016:     }
                   4017: 
                   4018:     if (name != NULL)
                   4019:        xmlFree(name);
                   4020: 
                   4021:     return;
                   4022: }
                   4023: 
                   4024: 
                   4025: /**
                   4026:  * sgmlParseReference:
                   4027:  * @ctxt:  an SGML parser context
                   4028:  * 
                   4029:  * parse and handle entity references in content,
                   4030:  * this will end-up in a call to character() since this is either a
                   4031:  * CharRef, or a predefined entity.
                   4032:  */
                   4033: void
                   4034: sgmlParseReference(sgmlParserCtxtPtr ctxt) {
                   4035:     sgmlEntityDescPtr ent;
                   4036:     xmlChar out[6];
                   4037:     xmlChar *name;
                   4038:     if (CUR != '&') return;
                   4039: 
                   4040:     if (NXT(1) == '#') {
                   4041:        unsigned int c;
                   4042:        int bits, i = 0;
                   4043: 
                   4044:        c = sgmlParseCharRef(ctxt);
                   4045:         if      (c <    0x80) { out[i++]= c;                bits= -6; }
                   4046:         else if (c <   0x800) { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   4047:         else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   4048:         else                  { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   4049:  
                   4050:         for ( ; bits >= 0; bits-= 6) {
                   4051:             out[i++]= ((c >> bits) & 0x3F) | 0x80;
                   4052:         }
                   4053:        out[i] = 0;
                   4054: 
                   4055:        sgmlCheckParagraph(ctxt);
                   4056:        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   4057:            ctxt->sax->characters(ctxt->userData, out, i);
                   4058:     } else {
                   4059:        ent = sgmlParseEntityRef(ctxt, &name);
                   4060:        if (name == NULL) {
                   4061:            sgmlCheckParagraph(ctxt);
                   4062:            if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   4063:                ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
                   4064:            return;
                   4065:        }
                   4066:        if ((ent == NULL) || (ent->value <= 0)) {
                   4067:            sgmlCheckParagraph(ctxt);
                   4068:            if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
                   4069:                ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
                   4070:                ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
                   4071:                /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
                   4072:            }
                   4073:        } else {
                   4074:            unsigned int c;
                   4075:            int bits, i = 0;
                   4076: 
                   4077:            c = ent->value;
                   4078:            if      (c <    0x80)
                   4079:                    { out[i++]= c;                bits= -6; }
                   4080:            else if (c <   0x800)
                   4081:                    { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                   4082:            else if (c < 0x10000)
                   4083:                    { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                   4084:            else                 
                   4085:                    { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                   4086:      
                   4087:            for ( ; bits >= 0; bits-= 6) {
                   4088:                out[i++]= ((c >> bits) & 0x3F) | 0x80;
                   4089:            }
                   4090:            out[i] = 0;
                   4091: 
                   4092:            sgmlCheckParagraph(ctxt);
                   4093:            if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   4094:                ctxt->sax->characters(ctxt->userData, out, i);
                   4095:        }
                   4096:        xmlFree(name);
                   4097:     }
                   4098: }
                   4099: 
                   4100: /**
                   4101:  * sgmlParseContent:
                   4102:  * @ctxt:  an SGML parser context
                   4103:  * @name:  the node name
                   4104:  *
                   4105:  * Parse a content: comment, sub-element, reference or text.
                   4106:  *
                   4107:  */
                   4108: 
                   4109: void
                   4110: sgmlParseContent(sgmlParserCtxtPtr ctxt) {
                   4111:     xmlChar *currentNode;
                   4112:     int depth;
                   4113: 
                   4114:     currentNode = xmlStrdup(ctxt->name);
                   4115:     depth = ctxt->nameNr;
                   4116:     while (1) {
                   4117:        long cons = ctxt->nbChars;
                   4118: 
                   4119:         GROW;
                   4120:        /*
                   4121:         * Our tag or one of it's parent or children is ending.
                   4122:         */
                   4123:         if ((CUR == '<') && (NXT(1) == '/')) {
                   4124:            sgmlParseEndTag(ctxt);
                   4125:            if (currentNode != NULL) xmlFree(currentNode);
                   4126:            return;
                   4127:         }
                   4128: 
                   4129:        /*
                   4130:         * Has this node been popped out during parsing of
                   4131:         * the next element
                   4132:         */
1.7       veillard 4133:         if ((!xmlStrEqual(currentNode, ctxt->name)) &&
1.1       veillard 4134:            (depth >= ctxt->nameNr)) {
                   4135:            if (currentNode != NULL) xmlFree(currentNode);
                   4136:            return;
                   4137:        }
                   4138: 
                   4139:        /*
                   4140:         * Sometimes DOCTYPE arrives in the middle of the document
                   4141:         */
                   4142:        if ((CUR == '<') && (NXT(1) == '!') &&
                   4143:            (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   4144:            (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   4145:            (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   4146:            (UPP(8) == 'E')) {
                   4147:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4148:                ctxt->sax->error(ctxt->userData,
                   4149:                     "Misplaced DOCTYPE declaration\n");
                   4150:            ctxt->wellFormed = 0;
                   4151:            sgmlParseDocTypeDecl(ctxt);
                   4152:        }
                   4153: 
                   4154:        /*
                   4155:         * First case :  a comment
                   4156:         */
                   4157:        if ((CUR == '<') && (NXT(1) == '!') &&
                   4158:                 (NXT(2) == '-') && (NXT(3) == '-')) {
                   4159:            sgmlParseComment(ctxt);
                   4160:        }
                   4161: 
                   4162:        /*
                   4163:         * Second case :  a sub-element.
                   4164:         */
                   4165:        else if (CUR == '<') {
                   4166:            sgmlParseElement(ctxt);
                   4167:        }
                   4168: 
                   4169:        /*
                   4170:         * Third case : a reference. If if has not been resolved,
                   4171:         *    parsing returns it's Name, create the node 
                   4172:         */
                   4173:        else if (CUR == '&') {
                   4174:            sgmlParseReference(ctxt);
                   4175:        }
                   4176: 
                   4177:        /*
                   4178:         * Fourth : end of the resource
                   4179:         */
                   4180:        else if (CUR == 0) {
                   4181:            sgmlAutoClose(ctxt, NULL);
                   4182:        }
                   4183: 
                   4184:        /*
                   4185:         * Last case, text. Note that References are handled directly.
                   4186:         */
                   4187:        else {
                   4188:            sgmlParseCharData(ctxt, 0);
                   4189:        }
                   4190: 
                   4191:        if (cons == ctxt->nbChars) {
                   4192:            if (ctxt->node != NULL) {
                   4193:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4194:                    ctxt->sax->error(ctxt->userData,
                   4195:                         "detected an error in element content\n");
                   4196:                ctxt->wellFormed = 0;
                   4197:            }
                   4198:             break;
                   4199:        }
                   4200: 
                   4201:         GROW;
                   4202:     }
                   4203:     if (currentNode != NULL) xmlFree(currentNode);
                   4204: }
                   4205: 
                   4206: /**
                   4207:  * sgmlParseElement:
                   4208:  * @ctxt:  an SGML parser context
                   4209:  *
                   4210:  * parse an SGML element, this is highly recursive
                   4211:  *
                   4212:  * [39] element ::= EmptyElemTag | STag content ETag
                   4213:  *
                   4214:  * [41] Attribute ::= Name Eq AttValue
                   4215:  */
                   4216: 
                   4217: void
                   4218: sgmlParseElement(sgmlParserCtxtPtr ctxt) {
                   4219:     xmlChar *name;
                   4220:     xmlChar *currentNode = NULL;
                   4221:     sgmlElemDescPtr info;
                   4222:     sgmlParserNodeInfo node_info;
                   4223:     xmlChar *oldname;
                   4224:     int depth = ctxt->nameNr;
                   4225: 
                   4226:     /* Capture start position */
                   4227:     if (ctxt->record_info) {
                   4228:         node_info.begin_pos = ctxt->input->consumed +
                   4229:                           (CUR_PTR - ctxt->input->base);
                   4230:        node_info.begin_line = ctxt->input->line;
                   4231:     }
                   4232: 
                   4233:     oldname = xmlStrdup(ctxt->name);
                   4234:     sgmlParseStartTag(ctxt);
                   4235:     name = ctxt->name;
                   4236: #ifdef DEBUG
                   4237:     if (oldname == NULL)
1.10    ! veillard 4238:        xmlGenericError(xmlGenericErrorContext,
        !          4239:                "Start of element %s\n", name);
1.1       veillard 4240:     else if (name == NULL)     
1.10    ! veillard 4241:        xmlGenericError(xmlGenericErrorContext,
        !          4242:                "Start of element failed, was %s\n", oldname);
1.1       veillard 4243:     else       
1.10    ! veillard 4244:        xmlGenericError(xmlGenericErrorContext,
        !          4245:                "Start of element %s, was %s\n", name, oldname);
1.1       veillard 4246: #endif
1.7       veillard 4247:     if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) ||
1.1       veillard 4248:         (name == NULL)) {
                   4249:        if (CUR == '>')
                   4250:            NEXT;
                   4251:        if (oldname != NULL)
                   4252:            xmlFree(oldname);
                   4253:         return;
                   4254:     }
                   4255:     if (oldname != NULL)
                   4256:        xmlFree(oldname);
                   4257: 
                   4258:     /*
                   4259:      * Lookup the info for that element.
                   4260:      */
                   4261:     info = sgmlTagLookup(name);
                   4262:     if (info == NULL) {
                   4263:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.4       veillard 4264:            ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
1.1       veillard 4265:                             name);
                   4266:        ctxt->wellFormed = 0;
                   4267:     } else if (info->depr) {
                   4268: /***************************
                   4269:        if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
                   4270:            ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n",
                   4271:                               name);
                   4272:  ***************************/
                   4273:     }
                   4274: 
                   4275:     /*
                   4276:      * Check for an Empty Element labelled the XML/SGML way
                   4277:      */
                   4278:     if ((CUR == '/') && (NXT(1) == '>')) {
                   4279:         SKIP(2);
                   4280:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   4281:            ctxt->sax->endElement(ctxt->userData, name);
                   4282:        oldname = sgmlnamePop(ctxt);
                   4283: #ifdef DEBUG
1.10    ! veillard 4284:         xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", oldname);
1.1       veillard 4285: #endif
                   4286:        if (oldname != NULL)
                   4287:            xmlFree(oldname);
                   4288:        return;
                   4289:     }
                   4290: 
                   4291:     if (CUR == '>') {
                   4292:         NEXT;
                   4293:     } else {
                   4294:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4295:            ctxt->sax->error(ctxt->userData,
                   4296:                             "Couldn't find end of Start Tag %s\n",
                   4297:                             name);
                   4298:        ctxt->wellFormed = 0;
                   4299: 
                   4300:        /*
                   4301:         * end of parsing of this node.
                   4302:         */
1.7       veillard 4303:        if (xmlStrEqual(name, ctxt->name)) { 
1.1       veillard 4304:            nodePop(ctxt);
                   4305:            oldname = sgmlnamePop(ctxt);
                   4306: #ifdef DEBUG
1.10    ! veillard 4307:            xmlGenericError(xmlGenericErrorContext,"End of start tag problem: popping out %s\n", oldname);
1.1       veillard 4308: #endif
                   4309:            if (oldname != NULL)
                   4310:                xmlFree(oldname);
                   4311:        }    
                   4312: 
                   4313:        /*
                   4314:         * Capture end position and add node
                   4315:         */
                   4316:        if ( currentNode != NULL && ctxt->record_info ) {
                   4317:           node_info.end_pos = ctxt->input->consumed +
                   4318:                              (CUR_PTR - ctxt->input->base);
                   4319:           node_info.end_line = ctxt->input->line;
                   4320:           node_info.node = ctxt->node;
                   4321:           xmlParserAddNodeInfo(ctxt, &node_info);
                   4322:        }
                   4323:        return;
                   4324:     }
                   4325: 
                   4326:     /*
                   4327:      * Check for an Empty Element from DTD definition
                   4328:      */
                   4329:     if ((info != NULL) && (info->empty)) {
                   4330:        if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   4331:            ctxt->sax->endElement(ctxt->userData, name);
                   4332:        oldname = sgmlnamePop(ctxt);
                   4333: #ifdef DEBUG
1.10    ! veillard 4334:        xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
1.1       veillard 4335: #endif
                   4336:        if (oldname != NULL)
                   4337:            xmlFree(oldname);
                   4338:        return;
                   4339:     }
                   4340: 
                   4341:     /*
                   4342:      * Parse the content of the element:
                   4343:      */
                   4344:     currentNode = xmlStrdup(ctxt->name);
                   4345:     depth = ctxt->nameNr;
                   4346:     while (IS_CHAR(CUR)) {
                   4347:        sgmlParseContent(ctxt);
                   4348:        if (ctxt->nameNr < depth) break; 
                   4349:     }  
                   4350: 
                   4351:     if (!IS_CHAR(CUR)) {
                   4352:        /************
                   4353:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4354:            ctxt->sax->error(ctxt->userData,
                   4355:                 "Premature end of data in tag %s\n", currentNode);
                   4356:        ctxt->wellFormed = 0;
                   4357:         *************/
                   4358: 
                   4359:        /*
                   4360:         * end of parsing of this node.
                   4361:         */
                   4362:        nodePop(ctxt);
                   4363:        oldname = sgmlnamePop(ctxt);
                   4364: #ifdef DEBUG
1.10    ! veillard 4365:        xmlGenericError(xmlGenericErrorContext,"Premature end of tag %s : popping out %s\n", name, oldname);
1.1       veillard 4366: #endif
                   4367:        if (oldname != NULL)
                   4368:            xmlFree(oldname);
                   4369:        if (currentNode != NULL)
                   4370:            xmlFree(currentNode);
                   4371:        return;
                   4372:     }
                   4373: 
                   4374:     /*
                   4375:      * Capture end position and add node
                   4376:      */
                   4377:     if ( currentNode != NULL && ctxt->record_info ) {
                   4378:        node_info.end_pos = ctxt->input->consumed +
                   4379:                           (CUR_PTR - ctxt->input->base);
                   4380:        node_info.end_line = ctxt->input->line;
                   4381:        node_info.node = ctxt->node;
                   4382:        xmlParserAddNodeInfo(ctxt, &node_info);
                   4383:     }
                   4384:     if (currentNode != NULL)
                   4385:        xmlFree(currentNode);
                   4386: }
                   4387: 
                   4388: /**
1.3       veillard 4389:  * sgmlParseEntityDecl:
                   4390:  * @ctxt:  an SGML parser context
                   4391:  *
                   4392:  * parse <!ENTITY declarations
                   4393:  *
                   4394:  */
                   4395: 
                   4396: void
                   4397: sgmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
                   4398:     xmlChar *name = NULL;
                   4399:     xmlChar *value = NULL;
                   4400:     xmlChar *URI = NULL, *literal = NULL;
                   4401:     xmlChar *ndata = NULL;
                   4402:     int isParameter = 0;
                   4403:     xmlChar *orig = NULL;
                   4404:     
                   4405:     GROW;
                   4406:     if ((RAW == '<') && (NXT(1) == '!') &&
                   4407:         (NXT(2) == 'E') && (NXT(3) == 'N') &&
                   4408:         (NXT(4) == 'T') && (NXT(5) == 'I') &&
                   4409:         (NXT(6) == 'T') && (NXT(7) == 'Y')) {
                   4410:        xmlParserInputPtr input = ctxt->input;
                   4411:        ctxt->instate = XML_PARSER_ENTITY_DECL;
                   4412:        SHRINK;
                   4413:        SKIP(8);
                   4414:        if (!IS_BLANK(CUR)) {
1.6       veillard 4415:            ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4416:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4417:                ctxt->sax->error(ctxt->userData,
                   4418:                                 "Space required after '<!ENTITY'\n");
                   4419:            ctxt->wellFormed = 0;
                   4420:            ctxt->disableSAX = 1;
                   4421:        }
                   4422:        SKIP_BLANKS;
                   4423: 
                   4424:        if (RAW == '%') {
                   4425:            NEXT;
                   4426:            if (!IS_BLANK(CUR)) {
1.6       veillard 4427:                ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4428:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4429:                    ctxt->sax->error(ctxt->userData,
                   4430:                                     "Space required after '%'\n");
                   4431:                ctxt->wellFormed = 0;
                   4432:                ctxt->disableSAX = 1;
                   4433:            }
                   4434:            SKIP_BLANKS;
                   4435:            isParameter = 1;
                   4436:        }
                   4437: 
                   4438:         name = xmlParseName(ctxt);
                   4439:        if (name == NULL) {
1.6       veillard 4440:            ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.3       veillard 4441:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4442:                ctxt->sax->error(ctxt->userData, "sgmlarseEntityDecl: no name\n");
                   4443:            ctxt->wellFormed = 0;
                   4444:            ctxt->disableSAX = 1;
                   4445:             return;
                   4446:        }
                   4447:        if (!IS_BLANK(CUR)) {
1.6       veillard 4448:            ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4449:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4450:                ctxt->sax->error(ctxt->userData,
                   4451:                     "Space required after the entity name\n");
                   4452:            ctxt->wellFormed = 0;
                   4453:            ctxt->disableSAX = 1;
                   4454:        }
                   4455:         SKIP_BLANKS;
                   4456: 
                   4457:        /*
                   4458:         * handle the various case of definitions...
                   4459:         */
                   4460:        if (isParameter) {
                   4461:            if ((RAW == '"') || (RAW == '\'')) {
                   4462:                value = xmlParseEntityValue(ctxt, &orig);
                   4463:                if (value) {
                   4464:                    if ((ctxt->sax != NULL) &&
                   4465:                        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
                   4466:                        ctxt->sax->entityDecl(ctxt->userData, name,
                   4467:                                    XML_INTERNAL_PARAMETER_ENTITY,
                   4468:                                    NULL, NULL, value);
                   4469:                }
                   4470:            } else {
                   4471:                URI = xmlParseExternalID(ctxt, &literal, 1);
                   4472:                if ((URI == NULL) && (literal == NULL)) {
1.6       veillard 4473:                    ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.3       veillard 4474:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4475:                        ctxt->sax->error(ctxt->userData,
                   4476:                            "Entity value required\n");
                   4477:                    ctxt->wellFormed = 0;
                   4478:                    ctxt->disableSAX = 1;
                   4479:                }
                   4480:                if (URI) {
                   4481:                    xmlURIPtr uri;
                   4482: 
                   4483:                    uri = xmlParseURI((const char *) URI);
                   4484:                    if (uri == NULL) {
1.6       veillard 4485:                        ctxt->errNo = XML_ERR_INVALID_URI;
1.3       veillard 4486:                        if ((ctxt->sax != NULL) &&
                   4487:                            (!ctxt->disableSAX) &&
                   4488:                            (ctxt->sax->error != NULL))
                   4489:                            ctxt->sax->error(ctxt->userData,
                   4490:                                        "Invalid URI: %s\n", URI);
                   4491:                        ctxt->wellFormed = 0;
                   4492:                    } else {
                   4493:                        if (uri->fragment != NULL) {
1.6       veillard 4494:                            ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.3       veillard 4495:                            if ((ctxt->sax != NULL) &&
                   4496:                                (!ctxt->disableSAX) &&
                   4497:                                (ctxt->sax->error != NULL))
                   4498:                                ctxt->sax->error(ctxt->userData,
                   4499:                                            "Fragment not allowed: %s\n", URI);
                   4500:                            ctxt->wellFormed = 0;
                   4501:                        } else {
                   4502:                            if ((ctxt->sax != NULL) &&
                   4503:                                (!ctxt->disableSAX) &&
                   4504:                                (ctxt->sax->entityDecl != NULL))
                   4505:                                ctxt->sax->entityDecl(ctxt->userData, name,
                   4506:                                            XML_EXTERNAL_PARAMETER_ENTITY,
                   4507:                                            literal, URI, NULL);
                   4508:                        }
                   4509:                        xmlFreeURI(uri);
                   4510:                    }
                   4511:                }
                   4512:            }
                   4513:        } else {
                   4514:            if ((RAW == '"') || (RAW == '\'')) {
                   4515:                value = xmlParseEntityValue(ctxt, &orig);
                   4516:                if ((ctxt->sax != NULL) &&
                   4517:                    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
                   4518:                    ctxt->sax->entityDecl(ctxt->userData, name,
                   4519:                                XML_INTERNAL_GENERAL_ENTITY,
                   4520:                                NULL, NULL, value);
                   4521:            } else {
                   4522:                URI = xmlParseExternalID(ctxt, &literal, 1);
                   4523:                if ((URI == NULL) && (literal == NULL)) {
1.6       veillard 4524:                    ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.3       veillard 4525:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4526:                        ctxt->sax->error(ctxt->userData,
                   4527:                            "Entity value required\n");
                   4528:                    ctxt->wellFormed = 0;
                   4529:                    ctxt->disableSAX = 1;
                   4530:                }
                   4531:                if (URI) {
                   4532:                    xmlURIPtr uri;
                   4533: 
                   4534:                    uri = xmlParseURI((const char *)URI);
                   4535:                    if (uri == NULL) {
1.6       veillard 4536:                        ctxt->errNo = XML_ERR_INVALID_URI;
1.3       veillard 4537:                        if ((ctxt->sax != NULL) &&
                   4538:                            (!ctxt->disableSAX) &&
                   4539:                            (ctxt->sax->error != NULL))
                   4540:                            ctxt->sax->error(ctxt->userData,
                   4541:                                        "Invalid URI: %s\n", URI);
                   4542:                        ctxt->wellFormed = 0;
                   4543:                    } else {
                   4544:                        if (uri->fragment != NULL) {
1.6       veillard 4545:                            ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.3       veillard 4546:                            if ((ctxt->sax != NULL) &&
                   4547:                                (!ctxt->disableSAX) &&
                   4548:                                (ctxt->sax->error != NULL))
                   4549:                                ctxt->sax->error(ctxt->userData,
                   4550:                                            "Fragment not allowed: %s\n", URI);
                   4551:                            ctxt->wellFormed = 0;
                   4552:                        }
                   4553:                        xmlFreeURI(uri);
                   4554:                    }
                   4555:                }
                   4556:                if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.6       veillard 4557:                    ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4558:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4559:                        ctxt->sax->error(ctxt->userData,
                   4560:                            "Space required before content model\n");
                   4561:                    ctxt->wellFormed = 0;
                   4562:                    ctxt->disableSAX = 1;
                   4563:                }
                   4564:                SKIP_BLANKS;
                   4565: 
                   4566:                /*
                   4567:                 * SGML specific: here we can get the content model
                   4568:                 */
                   4569:                if (RAW != '>') {
                   4570:                    xmlChar *contmod;
                   4571: 
                   4572:                    contmod = xmlParseName(ctxt);
                   4573: 
                   4574:                    if (contmod == NULL) {
1.6       veillard 4575:                        ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4576:                        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4577:                            ctxt->sax->error(ctxt->userData,
                   4578:                                "Could not parse entity content model\n");
                   4579:                        ctxt->wellFormed = 0;
                   4580:                        ctxt->disableSAX = 1;
                   4581:                    } else {
1.7       veillard 4582:                        if (xmlStrEqual(contmod, BAD_CAST"NDATA")) {
1.3       veillard 4583:                            if (!IS_BLANK(CUR)) {
1.6       veillard 4584:                                ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3       veillard 4585:                                if ((ctxt->sax != NULL) &&
                   4586:                                    (ctxt->sax->error != NULL))
                   4587:                                    ctxt->sax->error(ctxt->userData,
                   4588:                                        "Space required after 'NDATA'\n");
                   4589:                                ctxt->wellFormed = 0;
                   4590:                                ctxt->disableSAX = 1;
                   4591:                            }
                   4592:                            SKIP_BLANKS;
                   4593:                            ndata = xmlParseName(ctxt);
                   4594:                            if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
                   4595:                                (ctxt->sax->unparsedEntityDecl != NULL)) {
                   4596:                                ctxt->sax->unparsedEntityDecl(ctxt->userData,
                   4597:                                        name, literal, URI, ndata);
                   4598:                            }
1.7       veillard 4599:                        } else if (xmlStrEqual(contmod, BAD_CAST"SUBDOC")) {
1.3       veillard 4600:                            if ((ctxt->sax != NULL) &&
                   4601:                                (ctxt->sax->warning != NULL))
                   4602:                                ctxt->sax->warning(ctxt->userData,
                   4603:                                    "SUBDOC entities are not supported\n");
                   4604:                            SKIP_BLANKS;
                   4605:                            ndata = xmlParseName(ctxt);
                   4606:                            if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
                   4607:                                (ctxt->sax->unparsedEntityDecl != NULL)) {
                   4608:                                ctxt->sax->unparsedEntityDecl(ctxt->userData,
                   4609:                                        name, literal, URI, ndata);
                   4610:                            }
1.7       veillard 4611:                        } else if (xmlStrEqual(contmod, BAD_CAST"CDATA")) {
1.3       veillard 4612:                            if ((ctxt->sax != NULL) &&
                   4613:                                (ctxt->sax->warning != NULL))
                   4614:                                ctxt->sax->warning(ctxt->userData,
                   4615:                                    "CDATA entities are not supported\n");
                   4616:                            SKIP_BLANKS;
                   4617:                            ndata = xmlParseName(ctxt);
                   4618:                            if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
                   4619:                                (ctxt->sax->unparsedEntityDecl != NULL)) {
                   4620:                                ctxt->sax->unparsedEntityDecl(ctxt->userData,
                   4621:                                        name, literal, URI, ndata);
                   4622:                            }
                   4623:                        }
                   4624:                        xmlFree(contmod);
                   4625:                    }
                   4626:                } else {
                   4627:                    if ((ctxt->sax != NULL) &&
                   4628:                        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
                   4629:                        ctxt->sax->entityDecl(ctxt->userData, name,
                   4630:                                    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
                   4631:                                    literal, URI, NULL);
                   4632:                }
                   4633:            }
                   4634:        }
                   4635:        SKIP_BLANKS;
                   4636:        if (RAW != '>') {
1.6       veillard 4637:            ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.3       veillard 4638:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4639:                ctxt->sax->error(ctxt->userData, 
                   4640:                    "sgmlParseEntityDecl: entity %s not terminated\n", name);
                   4641:            ctxt->wellFormed = 0;
                   4642:            ctxt->disableSAX = 1;
                   4643:        } else {
                   4644:            if (input != ctxt->input) {
1.6       veillard 4645:                ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.3       veillard 4646:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4647:                    ctxt->sax->error(ctxt->userData, 
                   4648: "Entity declaration doesn't start and stop in the same entity\n");
                   4649:                ctxt->wellFormed = 0;
                   4650:                ctxt->disableSAX = 1;
                   4651:            }
                   4652:            NEXT;
                   4653:        }
                   4654:        if (orig != NULL) {
                   4655:            /*
                   4656:             * Ugly mechanism to save the raw entity value.
                   4657:             */
                   4658:            xmlEntityPtr cur = NULL;
                   4659: 
                   4660:            if (isParameter) {
                   4661:                if ((ctxt->sax != NULL) &&
                   4662:                    (ctxt->sax->getParameterEntity != NULL))
                   4663:                    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
                   4664:            } else {
                   4665:                if ((ctxt->sax != NULL) &&
                   4666:                    (ctxt->sax->getEntity != NULL))
                   4667:                    cur = ctxt->sax->getEntity(ctxt->userData, name);
                   4668:            }
                   4669:             if (cur != NULL) {
                   4670:                if (cur->orig != NULL)
                   4671:                    xmlFree(orig);
                   4672:                else
                   4673:                    cur->orig = orig;
                   4674:            } else
                   4675:                xmlFree(orig);
                   4676:        }
                   4677:        if (name != NULL) xmlFree(name);
                   4678:        if (value != NULL) xmlFree(value);
                   4679:        if (URI != NULL) xmlFree(URI);
                   4680:        if (literal != NULL) xmlFree(literal);
                   4681:        if (ndata != NULL) xmlFree(ndata);
                   4682:     }
                   4683: }
                   4684: 
                   4685: /**
                   4686:  * sgmlParseMarkupDecl:
                   4687:  * @ctxt:  an SGML parser context
                   4688:  * 
                   4689:  * parse Markup declarations
                   4690:  *
                   4691:  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
                   4692:  *                     NotationDecl | PI | Comment
                   4693:  */
                   4694: void
                   4695: sgmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
                   4696:     GROW;
                   4697:     xmlParseElementDecl(ctxt);
                   4698:     xmlParseAttributeListDecl(ctxt);
                   4699:     sgmlParseEntityDecl(ctxt);
                   4700:     xmlParseNotationDecl(ctxt);
                   4701:     xmlParsePI(ctxt);
                   4702:     xmlParseComment(ctxt);
                   4703:     /*
                   4704:      * This is only for internal subset. On external entities,
                   4705:      * the replacement is done before parsing stage
                   4706:      */
                   4707:     if ((ctxt->external == 0) && (ctxt->inputNr == 1))
                   4708:        xmlParsePEReference(ctxt);
                   4709:     ctxt->instate = XML_PARSER_DTD;
                   4710: }
                   4711: 
                   4712: /**
                   4713:  * sgmlParseInternalsubset:
                   4714:  * @ctxt:  an SGML parser context
                   4715:  *
                   4716:  * parse the internal subset declaration
                   4717:  *
                   4718:  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
                   4719:  */
                   4720: 
                   4721: void
                   4722: sgmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
                   4723:     /*
                   4724:      * Is there any DTD definition ?
                   4725:      */
                   4726:     if (RAW == '[') {
                   4727:         ctxt->instate = XML_PARSER_DTD;
                   4728:         NEXT;
                   4729:        /*
                   4730:         * Parse the succession of Markup declarations and 
                   4731:         * PEReferences.
                   4732:         * Subsequence (markupdecl | PEReference | S)*
                   4733:         */
                   4734:        while (RAW != ']') {
                   4735:            const xmlChar *check = CUR_PTR;
                   4736:            int cons = ctxt->input->consumed;
                   4737: 
                   4738:            SKIP_BLANKS;
                   4739:            sgmlParseMarkupDecl(ctxt);
                   4740:            xmlParsePEReference(ctxt);
                   4741: 
                   4742:            /*
                   4743:             * Pop-up of finished entities.
                   4744:             */
                   4745:            while ((RAW == 0) && (ctxt->inputNr > 1))
                   4746:                xmlPopInput(ctxt);
                   4747: 
                   4748:            if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.6       veillard 4749:                ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.3       veillard 4750:                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4751:                    ctxt->sax->error(ctxt->userData, 
                   4752:             "sgmlParseInternalSubset: error detected in Markup declaration\n");
                   4753:                ctxt->wellFormed = 0;
                   4754:                ctxt->disableSAX = 1;
                   4755:                break;
                   4756:            }
                   4757:        }
                   4758:        if (RAW == ']') { 
                   4759:            NEXT;
                   4760:            SKIP_BLANKS;
                   4761:        }
                   4762:     }
                   4763: 
                   4764:     /*
                   4765:      * We should be at the end of the DOCTYPE declaration.
                   4766:      */
                   4767:     if (RAW != '>') {
1.6       veillard 4768:        ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.3       veillard 4769:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4770:            ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
                   4771:        ctxt->wellFormed = 0;
                   4772:        ctxt->disableSAX = 1;
                   4773:     }
                   4774:     NEXT;
                   4775: }
                   4776: 
                   4777: /**
1.2       veillard 4778:  * sgmlParseMisc:
                   4779:  * @ctxt:  an XML parser context
                   4780:  * 
                   4781:  * parse an XML Misc* optionnal field.
                   4782:  *
                   4783:  * [27] Misc ::= Comment | PI |  S
                   4784:  */
                   4785: 
                   4786: void
                   4787: sgmlParseMisc(xmlParserCtxtPtr ctxt) {
                   4788:     while (((RAW == '<') && (NXT(1) == '?')) ||
                   4789:            ((RAW == '<') && (NXT(1) == '!') &&
                   4790:            (NXT(2) == '-') && (NXT(3) == '-')) ||
                   4791:            IS_BLANK(CUR)) {
                   4792:         if ((RAW == '<') && (NXT(1) == '?')) {
                   4793:            xmlParsePI(ctxt); /* TODO: SGML PIs differs */
                   4794:        } else if (IS_BLANK(CUR)) {
                   4795:            NEXT;
                   4796:        } else
                   4797:            xmlParseComment(ctxt);
                   4798:     }
                   4799: }
                   4800: 
                   4801: /**
1.1       veillard 4802:  * sgmlParseDocument :
                   4803:  * @ctxt:  an SGML parser context
                   4804:  * 
                   4805:  * parse an SGML document (and build a tree if using the standard SAX
                   4806:  * interface).
                   4807:  *
                   4808:  * Returns 0, -1 in case of error. the parser context is augmented
                   4809:  *                as a result of the parsing.
                   4810:  */
                   4811: 
                   4812: int
                   4813: sgmlParseDocument(sgmlParserCtxtPtr ctxt) {
1.2       veillard 4814:     xmlChar start[4];
                   4815:     xmlCharEncoding enc;
1.1       veillard 4816:     xmlDtdPtr dtd;
                   4817: 
                   4818:     sgmlDefaultSAXHandlerInit();
                   4819:     ctxt->html = 2;
                   4820: 
                   4821:     GROW;
                   4822:     /*
                   4823:      * SAX: beginning of the document processing.
                   4824:      */
                   4825:     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
                   4826:         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
                   4827: 
1.2       veillard 4828:     /* 
                   4829:      * Get the 4 first bytes and decode the charset
                   4830:      * if enc != XML_CHAR_ENCODING_NONE
                   4831:      * plug some encoding conversion routines.
                   4832:      */
                   4833:     start[0] = RAW;
                   4834:     start[1] = NXT(1);
                   4835:     start[2] = NXT(2);
                   4836:     start[3] = NXT(3);
                   4837:     enc = xmlDetectCharEncoding(start, 4);
                   4838:     if (enc != XML_CHAR_ENCODING_NONE) {
                   4839:         xmlSwitchEncoding(ctxt, enc);
                   4840:     }
                   4841: 
1.1       veillard 4842:     /*
                   4843:      * Wipe out everything which is before the first '<'
                   4844:      */
                   4845:     SKIP_BLANKS;
                   4846:     if (CUR == 0) {
                   4847:        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   4848:            ctxt->sax->error(ctxt->userData, "Document is empty\n");
                   4849:        ctxt->wellFormed = 0;
                   4850:     }
                   4851: 
                   4852:     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
                   4853:        ctxt->sax->startDocument(ctxt->userData);
                   4854: 
                   4855: 
                   4856:     /*
1.2       veillard 4857:      * The Misc part of the Prolog
1.1       veillard 4858:      */
1.2       veillard 4859:     GROW;
                   4860:     sgmlParseMisc(ctxt);
1.1       veillard 4861: 
                   4862:     /*
                   4863:      * Then possibly doc type declaration(s) and more Misc
                   4864:      * (doctypedecl Misc*)?
                   4865:      */
1.2       veillard 4866:     GROW;
                   4867:     if ((RAW == '<') && (NXT(1) == '!') &&
                   4868:        (NXT(2) == 'D') && (NXT(3) == 'O') &&
                   4869:        (NXT(4) == 'C') && (NXT(5) == 'T') &&
                   4870:        (NXT(6) == 'Y') && (NXT(7) == 'P') &&
                   4871:        (NXT(8) == 'E')) {
                   4872: 
                   4873:        ctxt->inSubset = 1;
1.1       veillard 4874:        sgmlParseDocTypeDecl(ctxt);
1.2       veillard 4875:        if (RAW == '[') {
                   4876:            ctxt->instate = XML_PARSER_DTD;
1.3       veillard 4877:            sgmlParseInternalSubset(ctxt);
1.2       veillard 4878:        }
                   4879: 
                   4880:        /*
                   4881:         * Create and update the external subset.
                   4882:         */
                   4883:        ctxt->inSubset = 2;
                   4884:        if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
                   4885:            (!ctxt->disableSAX))
                   4886:            ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
                   4887:                                      ctxt->extSubSystem, ctxt->extSubURI);
                   4888:        ctxt->inSubset = 0;
                   4889: 
                   4890: 
                   4891:        ctxt->instate = XML_PARSER_PROLOG;
                   4892:        sgmlParseMisc(ctxt);
1.1       veillard 4893:     }
                   4894: 
                   4895:     /*
                   4896:      * Time to start parsing the tree itself
                   4897:      */
                   4898:     sgmlParseContent(ctxt);
                   4899: 
                   4900:     /*
                   4901:      * autoclose
                   4902:      */
                   4903:     if (CUR == 0)
                   4904:        sgmlAutoClose(ctxt, NULL);
                   4905: 
                   4906: 
                   4907:     /*
                   4908:      * SAX: end of the document processing.
                   4909:      */
                   4910:     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   4911:         ctxt->sax->endDocument(ctxt->userData);
                   4912: 
                   4913:     if (ctxt->myDoc != NULL) {
                   4914:        dtd = xmlGetIntSubset(ctxt->myDoc);
                   4915:        if (dtd == NULL)
                   4916:            ctxt->myDoc->intSubset = 
                   4917:                xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML", 
                   4918:                    BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
                   4919:                    BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
                   4920:     }
                   4921:     if (! ctxt->wellFormed) return(-1);
                   4922:     return(0);
                   4923: }
                   4924: 
                   4925: 
                   4926: /************************************************************************
                   4927:  *                                                                     *
                   4928:  *                     Parser contexts handling                        *
                   4929:  *                                                                     *
                   4930:  ************************************************************************/
                   4931: 
                   4932: /**
                   4933:  * xmlInitParserCtxt:
                   4934:  * @ctxt:  an SGML parser context
                   4935:  *
                   4936:  * Initialize a parser context
                   4937:  */
                   4938: 
                   4939: void
                   4940: sgmlInitParserCtxt(sgmlParserCtxtPtr ctxt)
                   4941: {
                   4942:     sgmlSAXHandler *sax;
                   4943: 
                   4944:     if (ctxt == NULL) return;
                   4945:     memset(ctxt, 0, sizeof(sgmlParserCtxt));
                   4946: 
                   4947:     sax = (sgmlSAXHandler *) xmlMalloc(sizeof(sgmlSAXHandler));
                   4948:     if (sax == NULL) {
1.10    ! veillard 4949:         xmlGenericError(xmlGenericErrorContext,
        !          4950:                "sgmlInitParserCtxt: out of memory\n");
1.1       veillard 4951:     }
                   4952:     memset(sax, 0, sizeof(sgmlSAXHandler));
                   4953: 
                   4954:     /* Allocate the Input stack */
                   4955:     ctxt->inputTab = (sgmlParserInputPtr *) 
                   4956:                       xmlMalloc(5 * sizeof(sgmlParserInputPtr));
                   4957:     if (ctxt->inputTab == NULL) {
1.10    ! veillard 4958:         xmlGenericError(xmlGenericErrorContext,
        !          4959:                "sgmlInitParserCtxt: out of memory\n");
1.1       veillard 4960:     }
                   4961:     ctxt->inputNr = 0;
                   4962:     ctxt->inputMax = 5;
                   4963:     ctxt->input = NULL;
                   4964:     ctxt->version = NULL;
                   4965:     ctxt->encoding = NULL;
                   4966:     ctxt->standalone = -1;
                   4967:     ctxt->instate = XML_PARSER_START;
                   4968: 
                   4969:     /* Allocate the Node stack */
                   4970:     ctxt->nodeTab = (sgmlNodePtr *) xmlMalloc(10 * sizeof(sgmlNodePtr));
                   4971:     ctxt->nodeNr = 0;
                   4972:     ctxt->nodeMax = 10;
                   4973:     ctxt->node = NULL;
                   4974: 
                   4975:     /* Allocate the Name stack */
                   4976:     ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
                   4977:     ctxt->nameNr = 0;
                   4978:     ctxt->nameMax = 10;
                   4979:     ctxt->name = NULL;
                   4980: 
                   4981:     if (sax == NULL) ctxt->sax = &sgmlDefaultSAXHandler;
                   4982:     else {
                   4983:         ctxt->sax = sax;
                   4984:        memcpy(sax, &sgmlDefaultSAXHandler, sizeof(sgmlSAXHandler));
                   4985:     }
                   4986:     ctxt->userData = ctxt;
                   4987:     ctxt->myDoc = NULL;
                   4988:     ctxt->wellFormed = 1;
                   4989:     ctxt->replaceEntities = 0;
                   4990:     ctxt->html = 2;
                   4991:     ctxt->record_info = 0;
                   4992:     ctxt->validate = 0;
                   4993:     ctxt->nbChars = 0;
                   4994:     ctxt->checkIndex = 0;
                   4995:     xmlInitNodeInfoSeq(&ctxt->node_seq);
                   4996: }
                   4997: 
                   4998: /**
                   4999:  * sgmlFreeParserCtxt:
                   5000:  * @ctxt:  an SGML parser context
                   5001:  *
                   5002:  * Free all the memory used by a parser context. However the parsed
                   5003:  * document in ctxt->myDoc is not freed.
                   5004:  */
                   5005: 
                   5006: void
                   5007: sgmlFreeParserCtxt(sgmlParserCtxtPtr ctxt)
                   5008: {
                   5009:     xmlFreeParserCtxt(ctxt);
                   5010: }
                   5011: 
                   5012: /**
                   5013:  * sgmlCreateDocParserCtxt :
                   5014:  * @cur:  a pointer to an array of xmlChar
                   5015:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5016:  *
                   5017:  * Create a parser context for an SGML document.
                   5018:  *
                   5019:  * Returns the new parser context or NULL
                   5020:  */
                   5021: sgmlParserCtxtPtr
                   5022: sgmlCreateDocParserCtxt(xmlChar *cur, const char *encoding) {
                   5023:     sgmlParserCtxtPtr ctxt;
                   5024:     sgmlParserInputPtr input;
                   5025:     /* sgmlCharEncoding enc; */
                   5026: 
                   5027:     ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
                   5028:     if (ctxt == NULL) {
                   5029:         perror("malloc");
                   5030:        return(NULL);
                   5031:     }
                   5032:     sgmlInitParserCtxt(ctxt);
                   5033:     input = (sgmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
                   5034:     if (input == NULL) {
                   5035:         perror("malloc");
                   5036:        xmlFree(ctxt);
                   5037:        return(NULL);
                   5038:     }
                   5039:     memset(input, 0, sizeof(sgmlParserInput));
                   5040: 
                   5041:     input->line = 1;
                   5042:     input->col = 1;
                   5043:     input->base = cur;
                   5044:     input->cur = cur;
                   5045: 
                   5046:     inputPush(ctxt, input);
                   5047:     return(ctxt);
                   5048: }
                   5049: 
                   5050: /************************************************************************
                   5051:  *                                                                     *
                   5052:  *             Progressive parsing interfaces                          *
                   5053:  *                                                                     *
                   5054:  ************************************************************************/
                   5055: 
                   5056: /**
                   5057:  * sgmlParseLookupSequence:
                   5058:  * @ctxt:  an SGML parser context
                   5059:  * @first:  the first char to lookup
                   5060:  * @next:  the next char to lookup or zero
                   5061:  * @third:  the next char to lookup or zero
                   5062:  *
                   5063:  * Try to find if a sequence (first, next, third) or  just (first next) or
                   5064:  * (first) is available in the input stream.
                   5065:  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
                   5066:  * to avoid rescanning sequences of bytes, it DOES change the state of the
                   5067:  * parser, do not use liberally.
                   5068:  * This is basically similar to xmlParseLookupSequence()
                   5069:  *
                   5070:  * Returns the index to the current parsing point if the full sequence
                   5071:  *      is available, -1 otherwise.
                   5072:  */
                   5073: int
                   5074: sgmlParseLookupSequence(sgmlParserCtxtPtr ctxt, xmlChar first,
                   5075:                        xmlChar next, xmlChar third) {
                   5076:     int base, len;
                   5077:     sgmlParserInputPtr in;
                   5078:     const xmlChar *buf;
                   5079: 
                   5080:     in = ctxt->input;
                   5081:     if (in == NULL) return(-1);
                   5082:     base = in->cur - in->base;
                   5083:     if (base < 0) return(-1);
                   5084:     if (ctxt->checkIndex > base)
                   5085:         base = ctxt->checkIndex;
                   5086:     if (in->buf == NULL) {
                   5087:        buf = in->base;
                   5088:        len = in->length;
                   5089:     } else {
                   5090:        buf = in->buf->buffer->content;
                   5091:        len = in->buf->buffer->use;
                   5092:     }
                   5093:     /* take into account the sequence length */
                   5094:     if (third) len -= 2;
                   5095:     else if (next) len --;
                   5096:     for (;base < len;base++) {
                   5097:         if (buf[base] == first) {
                   5098:            if (third != 0) {
                   5099:                if ((buf[base + 1] != next) ||
                   5100:                    (buf[base + 2] != third)) continue;
                   5101:            } else if (next != 0) {
                   5102:                if (buf[base + 1] != next) continue;
                   5103:            }
                   5104:            ctxt->checkIndex = 0;
                   5105: #ifdef DEBUG_PUSH
                   5106:            if (next == 0)
1.10    ! veillard 5107:                xmlGenericError(xmlGenericErrorContext,
        !          5108:                        "HPP: lookup '%c' found at %d\n",
1.1       veillard 5109:                        first, base);
                   5110:            else if (third == 0)
1.10    ! veillard 5111:                xmlGenericError(xmlGenericErrorContext,
        !          5112:                        "HPP: lookup '%c%c' found at %d\n",
1.1       veillard 5113:                        first, next, base);
                   5114:            else 
1.10    ! veillard 5115:                xmlGenericError(xmlGenericErrorContext,
        !          5116:                        "HPP: lookup '%c%c%c' found at %d\n",
1.1       veillard 5117:                        first, next, third, base);
                   5118: #endif
                   5119:            return(base - (in->cur - in->base));
                   5120:        }
                   5121:     }
                   5122:     ctxt->checkIndex = base;
                   5123: #ifdef DEBUG_PUSH
                   5124:     if (next == 0)
1.10    ! veillard 5125:        xmlGenericError(xmlGenericErrorContext,
        !          5126:                "HPP: lookup '%c' failed\n", first);
1.1       veillard 5127:     else if (third == 0)
1.10    ! veillard 5128:        xmlGenericError(xmlGenericErrorContext,
        !          5129:                "HPP: lookup '%c%c' failed\n", first, next);
1.1       veillard 5130:     else       
1.10    ! veillard 5131:        xmlGenericError(xmlGenericErrorContext,
        !          5132:                "HPP: lookup '%c%c%c' failed\n", first, next, third);
1.1       veillard 5133: #endif
                   5134:     return(-1);
                   5135: }
                   5136: 
                   5137: /**
                   5138:  * sgmlParseTryOrFinish:
                   5139:  * @ctxt:  an SGML parser context
                   5140:  * @terminate:  last chunk indicator
                   5141:  *
                   5142:  * Try to progress on parsing
                   5143:  *
                   5144:  * Returns zero if no parsing was possible
                   5145:  */
                   5146: int
                   5147: sgmlParseTryOrFinish(sgmlParserCtxtPtr ctxt, int terminate) {
                   5148:     int ret = 0;
                   5149:     sgmlParserInputPtr in;
                   5150:     int avail = 0;
                   5151:     xmlChar cur, next;
                   5152: 
                   5153: #ifdef DEBUG_PUSH
                   5154:     switch (ctxt->instate) {
                   5155:        case XML_PARSER_EOF:
1.10    ! veillard 5156:            xmlGenericError(xmlGenericErrorContext,
        !          5157:                    "HPP: try EOF\n"); break;
1.1       veillard 5158:        case XML_PARSER_START:
1.10    ! veillard 5159:            xmlGenericError(xmlGenericErrorContext,
        !          5160:                    "HPP: try START\n"); break;
1.1       veillard 5161:        case XML_PARSER_MISC:
1.10    ! veillard 5162:            xmlGenericError(xmlGenericErrorContext,
        !          5163:                    "HPP: try MISC\n");break;
1.1       veillard 5164:        case XML_PARSER_COMMENT:
1.10    ! veillard 5165:            xmlGenericError(xmlGenericErrorContext,
        !          5166:                    "HPP: try COMMENT\n");break;
1.1       veillard 5167:        case XML_PARSER_PROLOG:
1.10    ! veillard 5168:            xmlGenericError(xmlGenericErrorContext,
        !          5169:                    "HPP: try PROLOG\n");break;
1.1       veillard 5170:        case XML_PARSER_START_TAG:
1.10    ! veillard 5171:            xmlGenericError(xmlGenericErrorContext,
        !          5172:                    "HPP: try START_TAG\n");break;
1.1       veillard 5173:        case XML_PARSER_CONTENT:
1.10    ! veillard 5174:            xmlGenericError(xmlGenericErrorContext,
        !          5175:                    "HPP: try CONTENT\n");break;
1.1       veillard 5176:        case XML_PARSER_CDATA_SECTION:
1.10    ! veillard 5177:            xmlGenericError(xmlGenericErrorContext,
        !          5178:                    "HPP: try CDATA_SECTION\n");break;
1.1       veillard 5179:        case XML_PARSER_END_TAG:
1.10    ! veillard 5180:            xmlGenericError(xmlGenericErrorContext,
        !          5181:                    "HPP: try END_TAG\n");break;
1.1       veillard 5182:        case XML_PARSER_ENTITY_DECL:
1.10    ! veillard 5183:            xmlGenericError(xmlGenericErrorContext,
        !          5184:                    "HPP: try ENTITY_DECL\n");break;
1.1       veillard 5185:        case XML_PARSER_ENTITY_VALUE:
1.10    ! veillard 5186:            xmlGenericError(xmlGenericErrorContext,
        !          5187:                    "HPP: try ENTITY_VALUE\n");break;
1.1       veillard 5188:        case XML_PARSER_ATTRIBUTE_VALUE:
1.10    ! veillard 5189:            xmlGenericError(xmlGenericErrorContext,
        !          5190:                    "HPP: try ATTRIBUTE_VALUE\n");break;
1.1       veillard 5191:        case XML_PARSER_DTD:
1.10    ! veillard 5192:            xmlGenericError(xmlGenericErrorContext,
        !          5193:                    "HPP: try DTD\n");break;
1.1       veillard 5194:        case XML_PARSER_EPILOG:
1.10    ! veillard 5195:            xmlGenericError(xmlGenericErrorContext,
        !          5196:                    "HPP: try EPILOG\n");break;
1.1       veillard 5197:        case XML_PARSER_PI:
1.10    ! veillard 5198:            xmlGenericError(xmlGenericErrorContext,
        !          5199:                    "HPP: try PI\n");break;
1.1       veillard 5200:     }
                   5201: #endif
                   5202: 
                   5203:     while (1) {
                   5204: 
                   5205:        in = ctxt->input;
                   5206:        if (in == NULL) break;
                   5207:        if (in->buf == NULL)
                   5208:            avail = in->length - (in->cur - in->base);
                   5209:        else
                   5210:            avail = in->buf->buffer->use - (in->cur - in->base);
                   5211:        if ((avail == 0) && (terminate)) {
                   5212:            sgmlAutoClose(ctxt, NULL);
                   5213:            if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { 
                   5214:                /*
                   5215:                 * SAX: end of the document processing.
                   5216:                 */
                   5217:                ctxt->instate = XML_PARSER_EOF;
                   5218:                if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5219:                    ctxt->sax->endDocument(ctxt->userData);
                   5220:            }
                   5221:        }
                   5222:         if (avail < 1)
                   5223:            goto done;
                   5224:         switch (ctxt->instate) {
                   5225:             case XML_PARSER_EOF:
                   5226:                /*
                   5227:                 * Document parsing is done !
                   5228:                 */
                   5229:                goto done;
                   5230:             case XML_PARSER_START:
                   5231:                /*
                   5232:                 * Very first chars read from the document flow.
                   5233:                 */
                   5234:                cur = in->cur[0];
                   5235:                if (IS_BLANK(cur)) {
                   5236:                    SKIP_BLANKS;
                   5237:                    if (in->buf == NULL)
                   5238:                        avail = in->length - (in->cur - in->base);
                   5239:                    else
                   5240:                        avail = in->buf->buffer->use - (in->cur - in->base);
                   5241:                }
                   5242:                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
                   5243:                    ctxt->sax->setDocumentLocator(ctxt->userData,
                   5244:                                                  &xmlDefaultSAXLocator);
                   5245:                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
                   5246:                    (!ctxt->disableSAX))
                   5247:                    ctxt->sax->startDocument(ctxt->userData);
                   5248: 
                   5249:                cur = in->cur[0];
                   5250:                next = in->cur[1];
                   5251:                if ((cur == '<') && (next == '!') &&
                   5252:                    (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   5253:                    (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   5254:                    (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   5255:                    (UPP(8) == 'E')) {
                   5256:                    if ((!terminate) &&
                   5257:                        (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5258:                        goto done;
                   5259: #ifdef DEBUG_PUSH
1.10    ! veillard 5260:                    xmlGenericError(xmlGenericErrorContext,
        !          5261:                            "HPP: Parsing internal subset\n");
1.1       veillard 5262: #endif
                   5263:                    sgmlParseDocTypeDecl(ctxt);
                   5264:                    ctxt->instate = XML_PARSER_PROLOG;
                   5265: #ifdef DEBUG_PUSH
1.10    ! veillard 5266:                    xmlGenericError(xmlGenericErrorContext,
        !          5267:                            "HPP: entering PROLOG\n");
1.1       veillard 5268: #endif
                   5269:                 } else {
                   5270:                    ctxt->instate = XML_PARSER_MISC;
                   5271:                }
                   5272: #ifdef DEBUG_PUSH
1.10    ! veillard 5273:                xmlGenericError(xmlGenericErrorContext,
        !          5274:                        "HPP: entering MISC\n");
1.1       veillard 5275: #endif
                   5276:                break;
                   5277:             case XML_PARSER_MISC:
                   5278:                SKIP_BLANKS;
                   5279:                if (in->buf == NULL)
                   5280:                    avail = in->length - (in->cur - in->base);
                   5281:                else
                   5282:                    avail = in->buf->buffer->use - (in->cur - in->base);
                   5283:                if (avail < 2)
                   5284:                    goto done;
                   5285:                cur = in->cur[0];
                   5286:                next = in->cur[1];
                   5287:                if ((cur == '<') && (next == '!') &&
                   5288:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5289:                    if ((!terminate) &&
                   5290:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5291:                        goto done;
                   5292: #ifdef DEBUG_PUSH
1.10    ! veillard 5293:                    xmlGenericError(xmlGenericErrorContext,
        !          5294:                            "HPP: Parsing Comment\n");
1.1       veillard 5295: #endif
                   5296:                    sgmlParseComment(ctxt);
                   5297:                    ctxt->instate = XML_PARSER_MISC;
                   5298:                } else if ((cur == '<') && (next == '!') &&
                   5299:                    (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   5300:                    (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   5301:                    (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   5302:                    (UPP(8) == 'E')) {
                   5303:                    if ((!terminate) &&
                   5304:                        (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5305:                        goto done;
                   5306: #ifdef DEBUG_PUSH
1.10    ! veillard 5307:                    xmlGenericError(xmlGenericErrorContext,
        !          5308:                            "HPP: Parsing internal subset\n");
1.1       veillard 5309: #endif
                   5310:                    sgmlParseDocTypeDecl(ctxt);
                   5311:                    ctxt->instate = XML_PARSER_PROLOG;
                   5312: #ifdef DEBUG_PUSH
1.10    ! veillard 5313:                    xmlGenericError(xmlGenericErrorContext,
        !          5314:                            "HPP: entering PROLOG\n");
1.1       veillard 5315: #endif
                   5316:                } else if ((cur == '<') && (next == '!') &&
                   5317:                           (avail < 9)) {
                   5318:                    goto done;
                   5319:                } else {
                   5320:                    ctxt->instate = XML_PARSER_START_TAG;
                   5321: #ifdef DEBUG_PUSH
1.10    ! veillard 5322:                    xmlGenericError(xmlGenericErrorContext,
        !          5323:                            "HPP: entering START_TAG\n");
1.1       veillard 5324: #endif
                   5325:                }
                   5326:                break;
                   5327:             case XML_PARSER_PROLOG:
                   5328:                SKIP_BLANKS;
                   5329:                if (in->buf == NULL)
                   5330:                    avail = in->length - (in->cur - in->base);
                   5331:                else
                   5332:                    avail = in->buf->buffer->use - (in->cur - in->base);
                   5333:                if (avail < 2) 
                   5334:                    goto done;
                   5335:                cur = in->cur[0];
                   5336:                next = in->cur[1];
                   5337:                if ((cur == '<') && (next == '!') &&
                   5338:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5339:                    if ((!terminate) &&
                   5340:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5341:                        goto done;
                   5342: #ifdef DEBUG_PUSH
1.10    ! veillard 5343:                    xmlGenericError(xmlGenericErrorContext,
        !          5344:                            "HPP: Parsing Comment\n");
1.1       veillard 5345: #endif
                   5346:                    sgmlParseComment(ctxt);
                   5347:                    ctxt->instate = XML_PARSER_PROLOG;
                   5348:                } else if ((cur == '<') && (next == '!') &&
                   5349:                           (avail < 4)) {
                   5350:                    goto done;
                   5351:                } else {
                   5352:                    ctxt->instate = XML_PARSER_START_TAG;
                   5353: #ifdef DEBUG_PUSH
1.10    ! veillard 5354:                    xmlGenericError(xmlGenericErrorContext,
        !          5355:                            "HPP: entering START_TAG\n");
1.1       veillard 5356: #endif
                   5357:                }
                   5358:                break;
                   5359:             case XML_PARSER_EPILOG:
                   5360:                if (in->buf == NULL)
                   5361:                    avail = in->length - (in->cur - in->base);
                   5362:                else
                   5363:                    avail = in->buf->buffer->use - (in->cur - in->base);
                   5364:                if (avail < 1)
                   5365:                    goto done;
                   5366:                cur = in->cur[0];
                   5367:                if (IS_BLANK(cur)) {
                   5368:                    sgmlParseCharData(ctxt, 0);
                   5369:                    goto done;
                   5370:                }
                   5371:                if (avail < 2)
                   5372:                    goto done;
                   5373:                next = in->cur[1];
                   5374:                if ((cur == '<') && (next == '!') &&
                   5375:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5376:                    if ((!terminate) &&
                   5377:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5378:                        goto done;
                   5379: #ifdef DEBUG_PUSH
1.10    ! veillard 5380:                    xmlGenericError(xmlGenericErrorContext,
        !          5381:                            "HPP: Parsing Comment\n");
1.1       veillard 5382: #endif
                   5383:                    sgmlParseComment(ctxt);
                   5384:                    ctxt->instate = XML_PARSER_EPILOG;
                   5385:                } else if ((cur == '<') && (next == '!') &&
                   5386:                           (avail < 4)) {
                   5387:                    goto done;
                   5388:                } else {
1.6       veillard 5389:                    ctxt->errNo = XML_ERR_DOCUMENT_END;
1.1       veillard 5390:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5391:                        ctxt->sax->error(ctxt->userData,
                   5392:                            "Extra content at the end of the document\n");
                   5393:                    ctxt->wellFormed = 0;
                   5394:                    ctxt->instate = XML_PARSER_EOF;
                   5395: #ifdef DEBUG_PUSH
1.10    ! veillard 5396:                    xmlGenericError(xmlGenericErrorContext,
        !          5397:                            "HPP: entering EOF\n");
1.1       veillard 5398: #endif
                   5399:                    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5400:                        ctxt->sax->endDocument(ctxt->userData);
                   5401:                    goto done;
                   5402:                }
                   5403:                break;
                   5404:             case XML_PARSER_START_TAG: {
                   5405:                xmlChar *name, *oldname;
                   5406:                int depth = ctxt->nameNr;
                   5407:                sgmlElemDescPtr info;
                   5408: 
                   5409:                if (avail < 2)
                   5410:                    goto done;
                   5411:                cur = in->cur[0];
                   5412:                if (cur != '<') {
                   5413:                    ctxt->instate = XML_PARSER_CONTENT;
                   5414: #ifdef DEBUG_PUSH
1.10    ! veillard 5415:                    xmlGenericError(xmlGenericErrorContext,
        !          5416:                            "HPP: entering CONTENT\n");
1.1       veillard 5417: #endif
                   5418:                    break;
                   5419:                }
                   5420:                if ((!terminate) &&
                   5421:                    (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5422:                    goto done;
                   5423: 
                   5424:                oldname = xmlStrdup(ctxt->name);
                   5425:                sgmlParseStartTag(ctxt);
                   5426:                name = ctxt->name;
                   5427: #ifdef DEBUG
                   5428:                if (oldname == NULL)
1.10    ! veillard 5429:                    xmlGenericError(xmlGenericErrorContext,
        !          5430:                            "Start of element %s\n", name);
1.1       veillard 5431:                else if (name == NULL)  
1.10    ! veillard 5432:                    xmlGenericError(xmlGenericErrorContext,
        !          5433:                            "Start of element failed, was %s\n",
1.1       veillard 5434:                            oldname);
                   5435:                else    
1.10    ! veillard 5436:                    xmlGenericError(xmlGenericErrorContext,
        !          5437:                            "Start of element %s, was %s\n",
1.1       veillard 5438:                            name, oldname);
                   5439: #endif
                   5440:                if (((depth == ctxt->nameNr) &&
1.7       veillard 5441:                     (xmlStrEqual(oldname, ctxt->name))) ||
1.1       veillard 5442:                    (name == NULL)) {
                   5443:                    if (CUR == '>')
                   5444:                        NEXT;
                   5445:                    if (oldname != NULL)
                   5446:                        xmlFree(oldname);
                   5447:                    break;
                   5448:                }
                   5449:                if (oldname != NULL)
                   5450:                    xmlFree(oldname);
                   5451: 
                   5452:                /*
                   5453:                 * Lookup the info for that element.
                   5454:                 */
                   5455:                info = sgmlTagLookup(name);
                   5456:                if (info == NULL) {
                   5457:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.4       veillard 5458:                        ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
1.1       veillard 5459:                                         name);
                   5460:                    ctxt->wellFormed = 0;
                   5461:                } else if (info->depr) {
                   5462:                    /***************************
                   5463:                    if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
                   5464:                        ctxt->sax->warning(ctxt->userData,
                   5465:                                           "Tag %s is deprecated\n",
                   5466:                                           name);
                   5467:                     ***************************/
                   5468:                }
                   5469: 
                   5470:                /*
                   5471:                 * Check for an Empty Element labelled the XML/SGML way
                   5472:                 */
                   5473:                if ((CUR == '/') && (NXT(1) == '>')) {
                   5474:                    SKIP(2);
                   5475:                    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   5476:                        ctxt->sax->endElement(ctxt->userData, name);
                   5477:                    oldname = sgmlnamePop(ctxt);
                   5478: #ifdef DEBUG
1.10    ! veillard 5479:                    xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n",
1.1       veillard 5480:                            oldname);
                   5481: #endif
                   5482:                    if (oldname != NULL)
                   5483:                        xmlFree(oldname);
                   5484:                    ctxt->instate = XML_PARSER_CONTENT;
                   5485: #ifdef DEBUG_PUSH
1.10    ! veillard 5486:                    xmlGenericError(xmlGenericErrorContext,
        !          5487:                            "HPP: entering CONTENT\n");
1.1       veillard 5488: #endif
                   5489:                    break;
                   5490:                }
                   5491: 
                   5492:                if (CUR == '>') {
                   5493:                    NEXT;
                   5494:                } else {
                   5495:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5496:                        ctxt->sax->error(ctxt->userData, 
                   5497:                                         "Couldn't find end of Start Tag %s\n",
                   5498:                                         name);
                   5499:                    ctxt->wellFormed = 0;
                   5500: 
                   5501:                    /*
                   5502:                     * end of parsing of this node.
                   5503:                     */
1.7       veillard 5504:                    if (xmlStrEqual(name, ctxt->name)) { 
1.1       veillard 5505:                        nodePop(ctxt);
                   5506:                        oldname = sgmlnamePop(ctxt);
                   5507: #ifdef DEBUG
1.10    ! veillard 5508:                        xmlGenericError(xmlGenericErrorContext,
1.1       veillard 5509:                         "End of start tag problem: popping out %s\n", oldname);
                   5510: #endif
                   5511:                        if (oldname != NULL)
                   5512:                            xmlFree(oldname);
                   5513:                    }    
                   5514: 
                   5515:                    ctxt->instate = XML_PARSER_CONTENT;
                   5516: #ifdef DEBUG_PUSH
1.10    ! veillard 5517:                    xmlGenericError(xmlGenericErrorContext,
        !          5518:                            "HPP: entering CONTENT\n");
1.1       veillard 5519: #endif
                   5520:                    break;
                   5521:                }
                   5522: 
                   5523:                /*
                   5524:                 * Check for an Empty Element from DTD definition
                   5525:                 */
                   5526:                if ((info != NULL) && (info->empty)) {
                   5527:                    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
                   5528:                        ctxt->sax->endElement(ctxt->userData, name);
                   5529:                    oldname = sgmlnamePop(ctxt);
                   5530: #ifdef DEBUG
1.10    ! veillard 5531:                    xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
1.1       veillard 5532: #endif
                   5533:                    if (oldname != NULL)
                   5534:                        xmlFree(oldname);
                   5535:                }
                   5536:                ctxt->instate = XML_PARSER_CONTENT;
                   5537: #ifdef DEBUG_PUSH
1.10    ! veillard 5538:                xmlGenericError(xmlGenericErrorContext,
        !          5539:                        "HPP: entering CONTENT\n");
1.1       veillard 5540: #endif
                   5541:                 break;
                   5542:            }
                   5543:             case XML_PARSER_CONTENT: {
                   5544:                long cons;
                   5545:                 /*
                   5546:                 * Handle preparsed entities and charRef
                   5547:                 */
                   5548:                if (ctxt->token != 0) {
                   5549:                    xmlChar chr[2] = { 0 , 0 } ;
                   5550: 
                   5551:                    chr[0] = (xmlChar) ctxt->token;
                   5552:                    sgmlCheckParagraph(ctxt);
                   5553:                    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
                   5554:                        ctxt->sax->characters(ctxt->userData, chr, 1);
                   5555:                    ctxt->token = 0;
                   5556:                    ctxt->checkIndex = 0;
                   5557:                }
                   5558:                if ((avail == 1) && (terminate)) {
                   5559:                    cur = in->cur[0];
                   5560:                    if ((cur != '<') && (cur != '&')) {
                   5561:                        if (ctxt->sax != NULL) {
                   5562:                            if (IS_BLANK(cur)) {
                   5563:                                if (ctxt->sax->ignorableWhitespace != NULL)
                   5564:                                    ctxt->sax->ignorableWhitespace(
                   5565:                                            ctxt->userData, &cur, 1);
                   5566:                            } else {
                   5567:                                sgmlCheckParagraph(ctxt);
                   5568:                                if (ctxt->sax->characters != NULL)
                   5569:                                    ctxt->sax->characters(
                   5570:                                            ctxt->userData, &cur, 1);
                   5571:                            }
                   5572:                        }
                   5573:                        ctxt->token = 0;
                   5574:                        ctxt->checkIndex = 0;
                   5575:                        NEXT;
                   5576:                    }
                   5577:                    break;
                   5578:                }
                   5579:                if (avail < 2)
                   5580:                    goto done;
                   5581:                cur = in->cur[0];
                   5582:                next = in->cur[1];
                   5583:                cons = ctxt->nbChars;
                   5584:                /*
                   5585:                 * Sometimes DOCTYPE arrives in the middle of the document
                   5586:                 */
                   5587:                if ((cur == '<') && (next == '!') &&
                   5588:                    (UPP(2) == 'D') && (UPP(3) == 'O') &&
                   5589:                    (UPP(4) == 'C') && (UPP(5) == 'T') &&
                   5590:                    (UPP(6) == 'Y') && (UPP(7) == 'P') &&
                   5591:                    (UPP(8) == 'E')) {
                   5592:                    if ((!terminate) &&
                   5593:                        (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5594:                        goto done;
                   5595:                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5596:                        ctxt->sax->error(ctxt->userData,
                   5597:                             "Misplaced DOCTYPE declaration\n");
                   5598:                    ctxt->wellFormed = 0;
                   5599:                    sgmlParseDocTypeDecl(ctxt);
                   5600:                } else if ((cur == '<') && (next == '!') &&
                   5601:                    (in->cur[2] == '-') && (in->cur[3] == '-')) {
                   5602:                    if ((!terminate) &&
                   5603:                        (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
                   5604:                        goto done;
                   5605: #ifdef DEBUG_PUSH
1.10    ! veillard 5606:                    xmlGenericError(xmlGenericErrorContext,
        !          5607:                            "HPP: Parsing Comment\n");
1.1       veillard 5608: #endif
                   5609:                    sgmlParseComment(ctxt);
                   5610:                    ctxt->instate = XML_PARSER_CONTENT;
                   5611:                } else if ((cur == '<') && (next == '!') && (avail < 4)) {
                   5612:                    goto done;
                   5613:                } else if ((cur == '<') && (next == '/')) {
                   5614:                    ctxt->instate = XML_PARSER_END_TAG;
                   5615:                    ctxt->checkIndex = 0;
                   5616: #ifdef DEBUG_PUSH
1.10    ! veillard 5617:                    xmlGenericError(xmlGenericErrorContext,
        !          5618:                            "HPP: entering END_TAG\n");
1.1       veillard 5619: #endif
                   5620:                    break;
                   5621:                } else if (cur == '<') {
                   5622:                    ctxt->instate = XML_PARSER_START_TAG;
                   5623:                    ctxt->checkIndex = 0;
                   5624: #ifdef DEBUG_PUSH
1.10    ! veillard 5625:                    xmlGenericError(xmlGenericErrorContext,
        !          5626:                            "HPP: entering START_TAG\n");
1.1       veillard 5627: #endif
                   5628:                    break;
                   5629:                } else if (cur == '&') {
                   5630:                    if ((!terminate) &&
                   5631:                        (sgmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
                   5632:                        goto done;
                   5633: #ifdef DEBUG_PUSH
1.10    ! veillard 5634:                    xmlGenericError(xmlGenericErrorContext,
        !          5635:                            "HPP: Parsing Reference\n");
1.1       veillard 5636: #endif
                   5637:                    /* TODO: check generation of subtrees if noent !!! */
                   5638:                    sgmlParseReference(ctxt);
                   5639:                } else {
                   5640:                    /* TODO Avoid the extra copy, handle directly !!!!!! */
                   5641:                    /*
                   5642:                     * Goal of the following test is :
                   5643:                     *  - minimize calls to the SAX 'character' callback
                   5644:                     *    when they are mergeable
                   5645:                     */
                   5646:                    if ((ctxt->inputNr == 1) &&
                   5647:                        (avail < SGML_PARSER_BIG_BUFFER_SIZE)) {
                   5648:                        if ((!terminate) &&
                   5649:                            (sgmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
                   5650:                            goto done;
                   5651:                     }
                   5652:                    ctxt->checkIndex = 0;
                   5653: #ifdef DEBUG_PUSH
1.10    ! veillard 5654:                    xmlGenericError(xmlGenericErrorContext,
        !          5655:                            "HPP: Parsing char data\n");
1.1       veillard 5656: #endif
                   5657:                    sgmlParseCharData(ctxt, 0);
                   5658:                }
                   5659:                if (cons == ctxt->nbChars) {
                   5660:                    if (ctxt->node != NULL) {
                   5661:                        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5662:                            ctxt->sax->error(ctxt->userData,
                   5663:                                 "detected an error in element content\n");
                   5664:                        ctxt->wellFormed = 0;
                   5665:                        NEXT;
                   5666:                    }
                   5667:                    break;
                   5668:                }
                   5669: 
                   5670:                break;
                   5671:            }
                   5672:             case XML_PARSER_END_TAG:
                   5673:                if (avail < 2)
                   5674:                    goto done;
                   5675:                if ((!terminate) &&
                   5676:                    (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
                   5677:                    goto done;
                   5678:                sgmlParseEndTag(ctxt);
                   5679:                if (ctxt->nameNr == 0) {
                   5680:                    ctxt->instate = XML_PARSER_EPILOG;
                   5681:                } else {
                   5682:                    ctxt->instate = XML_PARSER_CONTENT;
                   5683:                }
                   5684:                ctxt->checkIndex = 0;
                   5685: #ifdef DEBUG_PUSH
1.10    ! veillard 5686:                xmlGenericError(xmlGenericErrorContext,
        !          5687:                        "HPP: entering CONTENT\n");
1.1       veillard 5688: #endif
                   5689:                break;
                   5690:             case XML_PARSER_CDATA_SECTION:
1.10    ! veillard 5691:                xmlGenericError(xmlGenericErrorContext,
        !          5692:                        "HPP: internal error, state == CDATA\n");
1.1       veillard 5693:                ctxt->instate = XML_PARSER_CONTENT;
                   5694:                ctxt->checkIndex = 0;
                   5695: #ifdef DEBUG_PUSH
1.10    ! veillard 5696:                xmlGenericError(xmlGenericErrorContext,
        !          5697:                        "HPP: entering CONTENT\n");
1.1       veillard 5698: #endif
                   5699:                break;
                   5700:             case XML_PARSER_DTD:
1.10    ! veillard 5701:                xmlGenericError(xmlGenericErrorContext,
        !          5702:                        "HPP: internal error, state == DTD\n");
1.1       veillard 5703:                ctxt->instate = XML_PARSER_CONTENT;
                   5704:                ctxt->checkIndex = 0;
                   5705: #ifdef DEBUG_PUSH
1.10    ! veillard 5706:                xmlGenericError(xmlGenericErrorContext,
        !          5707:                        "HPP: entering CONTENT\n");
1.1       veillard 5708: #endif
                   5709:                break;
                   5710:             case XML_PARSER_COMMENT:
1.10    ! veillard 5711:                xmlGenericError(xmlGenericErrorContext,
        !          5712:                        "HPP: internal error, state == COMMENT\n");
1.1       veillard 5713:                ctxt->instate = XML_PARSER_CONTENT;
                   5714:                ctxt->checkIndex = 0;
                   5715: #ifdef DEBUG_PUSH
1.10    ! veillard 5716:                xmlGenericError(xmlGenericErrorContext,
        !          5717:                        "HPP: entering CONTENT\n");
1.1       veillard 5718: #endif
                   5719:                break;
                   5720:             case XML_PARSER_PI:
1.10    ! veillard 5721:                xmlGenericError(xmlGenericErrorContext,
        !          5722:                        "HPP: internal error, state == PI\n");
1.1       veillard 5723:                ctxt->instate = XML_PARSER_CONTENT;
                   5724:                ctxt->checkIndex = 0;
                   5725: #ifdef DEBUG_PUSH
1.10    ! veillard 5726:                xmlGenericError(xmlGenericErrorContext,
        !          5727:                        "HPP: entering CONTENT\n");
1.1       veillard 5728: #endif
                   5729:                break;
                   5730:             case XML_PARSER_ENTITY_DECL:
1.10    ! veillard 5731:                xmlGenericError(xmlGenericErrorContext,
        !          5732:                        "HPP: internal error, state == ENTITY_DECL\n");
1.1       veillard 5733:                ctxt->instate = XML_PARSER_CONTENT;
                   5734:                ctxt->checkIndex = 0;
                   5735: #ifdef DEBUG_PUSH
1.10    ! veillard 5736:                xmlGenericError(xmlGenericErrorContext,
        !          5737:                        "HPP: entering CONTENT\n");
1.1       veillard 5738: #endif
                   5739:                break;
                   5740:             case XML_PARSER_ENTITY_VALUE:
1.10    ! veillard 5741:                xmlGenericError(xmlGenericErrorContext,
        !          5742:                        "HPP: internal error, state == ENTITY_VALUE\n");
1.1       veillard 5743:                ctxt->instate = XML_PARSER_CONTENT;
                   5744:                ctxt->checkIndex = 0;
                   5745: #ifdef DEBUG_PUSH
1.10    ! veillard 5746:                xmlGenericError(xmlGenericErrorContext,
        !          5747:                        "HPP: entering DTD\n");
1.1       veillard 5748: #endif
                   5749:                break;
                   5750:             case XML_PARSER_ATTRIBUTE_VALUE:
1.10    ! veillard 5751:                xmlGenericError(xmlGenericErrorContext,
        !          5752:                        "HPP: internal error, state == ATTRIBUTE_VALUE\n");
1.1       veillard 5753:                ctxt->instate = XML_PARSER_START_TAG;
                   5754:                ctxt->checkIndex = 0;
                   5755: #ifdef DEBUG_PUSH
1.10    ! veillard 5756:                xmlGenericError(xmlGenericErrorContext,
        !          5757:                        "HPP: entering START_TAG\n");
1.1       veillard 5758: #endif
                   5759:                break;
                   5760:            case XML_PARSER_SYSTEM_LITERAL:
1.10    ! veillard 5761:                xmlGenericError(xmlGenericErrorContext,
        !          5762:                        "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n");
1.1       veillard 5763:                ctxt->instate = XML_PARSER_CONTENT;
                   5764:                ctxt->checkIndex = 0;
                   5765: #ifdef DEBUG_PUSH
1.10    ! veillard 5766:                xmlGenericError(xmlGenericErrorContext,
        !          5767:                        "HPP: entering CONTENT\n");
1.1       veillard 5768: #endif
                   5769:                break;
                   5770:        }
                   5771:     }
                   5772: done:    
                   5773:     if ((avail == 0) && (terminate)) {
                   5774:        sgmlAutoClose(ctxt, NULL);
                   5775:        if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { 
                   5776:            /*
                   5777:             * SAX: end of the document processing.
                   5778:             */
                   5779:            ctxt->instate = XML_PARSER_EOF;
                   5780:            if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5781:                ctxt->sax->endDocument(ctxt->userData);
                   5782:        }
                   5783:     }
                   5784:     if ((ctxt->myDoc != NULL) &&
                   5785:        ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
                   5786:         (ctxt->instate == XML_PARSER_EPILOG))) {
                   5787:        xmlDtdPtr dtd;
                   5788:        dtd = xmlGetIntSubset(ctxt->myDoc);
                   5789:        if (dtd == NULL)
                   5790:            ctxt->myDoc->intSubset = 
                   5791:                xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML", 
                   5792:                    BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
                   5793:                    BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
                   5794:     }
                   5795: #ifdef DEBUG_PUSH
1.10    ! veillard 5796:     xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
1.1       veillard 5797: #endif
                   5798:     return(ret);
                   5799: }
                   5800: 
                   5801: /**
                   5802:  * sgmlParseTry:
                   5803:  * @ctxt:  an SGML parser context
                   5804:  *
                   5805:  * Try to progress on parsing
                   5806:  *
                   5807:  * Returns zero if no parsing was possible
                   5808:  */
                   5809: int
                   5810: sgmlParseTry(sgmlParserCtxtPtr ctxt) {
                   5811:     return(sgmlParseTryOrFinish(ctxt, 0));
                   5812: }
                   5813: 
                   5814: /**
                   5815:  * sgmlParseChunk:
                   5816:  * @ctxt:  an XML parser context
                   5817:  * @chunk:  an char array
                   5818:  * @size:  the size in byte of the chunk
                   5819:  * @terminate:  last chunk indicator
                   5820:  *
                   5821:  * Parse a Chunk of memory
                   5822:  *
                   5823:  * Returns zero if no error, the xmlParserErrors otherwise.
                   5824:  */
                   5825: int
                   5826: sgmlParseChunk(sgmlParserCtxtPtr ctxt, const char *chunk, int size,
                   5827:               int terminate) {
                   5828:     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
                   5829:         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
                   5830:        int base = ctxt->input->base - ctxt->input->buf->buffer->content;
                   5831:        int cur = ctxt->input->cur - ctxt->input->base;
                   5832:        
                   5833:        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);              
                   5834:        ctxt->input->base = ctxt->input->buf->buffer->content + base;
                   5835:        ctxt->input->cur = ctxt->input->base + cur;
                   5836: #ifdef DEBUG_PUSH
1.10    ! veillard 5837:        xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
1.1       veillard 5838: #endif
                   5839: 
                   5840:        if ((terminate) || (ctxt->input->buf->buffer->use > 80))
                   5841:            sgmlParseTryOrFinish(ctxt, terminate);
                   5842:     } else if (ctxt->instate != XML_PARSER_EOF) {
                   5843:        xmlParserInputBufferPush(ctxt->input->buf, 0, "");
                   5844:         sgmlParseTryOrFinish(ctxt, terminate);
                   5845:     }
                   5846:     if (terminate) {
                   5847:        if ((ctxt->instate != XML_PARSER_EOF) &&
                   5848:            (ctxt->instate != XML_PARSER_EPILOG) &&
                   5849:            (ctxt->instate != XML_PARSER_MISC)) {
1.6       veillard 5850:            ctxt->errNo = XML_ERR_DOCUMENT_END;
1.1       veillard 5851:            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                   5852:                ctxt->sax->error(ctxt->userData,
                   5853:                    "Extra content at the end of the document\n");
                   5854:            ctxt->wellFormed = 0;
                   5855:        } 
                   5856:        if (ctxt->instate != XML_PARSER_EOF) {
                   5857:            if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
                   5858:                ctxt->sax->endDocument(ctxt->userData);
                   5859:        }
                   5860:        ctxt->instate = XML_PARSER_EOF;
                   5861:     }
                   5862:     return((xmlParserErrors) ctxt->errNo);           
                   5863: }
                   5864: 
                   5865: /************************************************************************
                   5866:  *                                                                     *
                   5867:  *                     User entry points                               *
                   5868:  *                                                                     *
                   5869:  ************************************************************************/
                   5870: 
                   5871: /**
                   5872:  * sgmlCreatePushParserCtxt :
                   5873:  * @sax:  a SAX handler
                   5874:  * @user_data:  The user data returned on SAX callbacks
                   5875:  * @chunk:  a pointer to an array of chars
                   5876:  * @size:  number of chars in the array
                   5877:  * @filename:  an optional file name or URI
                   5878:  * @enc:  an optional encoding
                   5879:  *
                   5880:  * Create a parser context for using the SGML parser in push mode
                   5881:  * To allow content encoding detection, @size should be >= 4
                   5882:  * The value of @filename is used for fetching external entities
                   5883:  * and error/warning reports.
                   5884:  *
                   5885:  * Returns the new parser context or NULL
                   5886:  */
                   5887: sgmlParserCtxtPtr
                   5888: sgmlCreatePushParserCtxt(sgmlSAXHandlerPtr sax, void *user_data, 
                   5889:                          const char *chunk, int size, const char *filename,
                   5890:                         xmlCharEncoding enc) {
                   5891:     sgmlParserCtxtPtr ctxt;
                   5892:     sgmlParserInputPtr inputStream;
                   5893:     xmlParserInputBufferPtr buf;
                   5894: 
                   5895:     buf = xmlAllocParserInputBuffer(enc);
                   5896:     if (buf == NULL) return(NULL);
                   5897: 
                   5898:     ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
                   5899:     if (ctxt == NULL) {
                   5900:        xmlFree(buf);
                   5901:        return(NULL);
                   5902:     }
                   5903:     memset(ctxt, 0, sizeof(sgmlParserCtxt));
                   5904:     sgmlInitParserCtxt(ctxt);
                   5905:     if (sax != NULL) {
                   5906:        if (ctxt->sax != &sgmlDefaultSAXHandler)
                   5907:            xmlFree(ctxt->sax);
                   5908:        ctxt->sax = (sgmlSAXHandlerPtr) xmlMalloc(sizeof(sgmlSAXHandler));
                   5909:        if (ctxt->sax == NULL) {
                   5910:            xmlFree(buf);
                   5911:            xmlFree(ctxt);
                   5912:            return(NULL);
                   5913:        }
                   5914:        memcpy(ctxt->sax, sax, sizeof(sgmlSAXHandler));
                   5915:        if (user_data != NULL)
                   5916:            ctxt->userData = user_data;
                   5917:     }  
                   5918:     if (filename == NULL) {
                   5919:        ctxt->directory = NULL;
                   5920:     } else {
                   5921:         ctxt->directory = xmlParserGetDirectory(filename);
                   5922:     }
                   5923: 
                   5924:     inputStream = sgmlNewInputStream(ctxt);
                   5925:     if (inputStream == NULL) {
                   5926:        xmlFreeParserCtxt(ctxt);
                   5927:        return(NULL);
                   5928:     }
                   5929: 
                   5930:     if (filename == NULL)
                   5931:        inputStream->filename = NULL;
                   5932:     else
                   5933:        inputStream->filename = xmlMemStrdup(filename);
                   5934:     inputStream->buf = buf;
                   5935:     inputStream->base = inputStream->buf->buffer->content;
                   5936:     inputStream->cur = inputStream->buf->buffer->content;
                   5937: 
                   5938:     inputPush(ctxt, inputStream);
                   5939: 
                   5940:     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
                   5941:         (ctxt->input->buf != NULL))  {       
                   5942:        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);              
                   5943: #ifdef DEBUG_PUSH
1.10    ! veillard 5944:        xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
1.1       veillard 5945: #endif
                   5946:     }
                   5947: 
                   5948:     return(ctxt);
                   5949: }
                   5950: 
                   5951: /**
                   5952:  * sgmlSAXParseDoc :
                   5953:  * @cur:  a pointer to an array of xmlChar
                   5954:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5955:  * @sax:  the SAX handler block
                   5956:  * @userData: if using SAX, this pointer will be provided on callbacks. 
                   5957:  *
                   5958:  * parse an SGML in-memory document and build a tree.
                   5959:  * It use the given SAX function block to handle the parsing callback.
                   5960:  * If sax is NULL, fallback to the default DOM tree building routines.
                   5961:  * 
                   5962:  * Returns the resulting document tree
                   5963:  */
                   5964: 
                   5965: sgmlDocPtr
                   5966: sgmlSAXParseDoc(xmlChar *cur, const char *encoding, sgmlSAXHandlerPtr sax, void *userData) {
                   5967:     sgmlDocPtr ret;
                   5968:     sgmlParserCtxtPtr ctxt;
                   5969: 
                   5970:     if (cur == NULL) return(NULL);
                   5971: 
                   5972: 
                   5973:     ctxt = sgmlCreateDocParserCtxt(cur, encoding);
                   5974:     if (ctxt == NULL) return(NULL);
                   5975:     if (sax != NULL) { 
                   5976:         ctxt->sax = sax;
                   5977:         ctxt->userData = userData;
                   5978:     }
                   5979: 
                   5980:     sgmlParseDocument(ctxt);
                   5981:     ret = ctxt->myDoc;
                   5982:     if (sax != NULL) {
                   5983:        ctxt->sax = NULL;
                   5984:        ctxt->userData = NULL;
                   5985:     }
                   5986:     sgmlFreeParserCtxt(ctxt);
                   5987:     
                   5988:     return(ret);
                   5989: }
                   5990: 
                   5991: /**
                   5992:  * sgmlParseDoc :
                   5993:  * @cur:  a pointer to an array of xmlChar
                   5994:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   5995:  *
                   5996:  * parse an SGML in-memory document and build a tree.
                   5997:  * 
                   5998:  * Returns the resulting document tree
                   5999:  */
                   6000: 
                   6001: sgmlDocPtr
                   6002: sgmlParseDoc(xmlChar *cur, const char *encoding) {
                   6003:     return(sgmlSAXParseDoc(cur, encoding, NULL, NULL));
                   6004: }
                   6005: 
                   6006: 
                   6007: /**
                   6008:  * sgmlCreateFileParserCtxt :
                   6009:  * @filename:  the filename
                   6010:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   6011:  *
                   6012:  * Create a parser context for a file content. 
                   6013:  * Automatic support for ZLIB/Compress compressed document is provided
                   6014:  * by default if found at compile-time.
                   6015:  *
                   6016:  * Returns the new parser context or NULL
                   6017:  */
                   6018: sgmlParserCtxtPtr
                   6019: sgmlCreateFileParserCtxt(const char *filename, const char *encoding)
                   6020: {
                   6021:     sgmlParserCtxtPtr ctxt;
                   6022:     sgmlParserInputPtr inputStream;
                   6023:     xmlParserInputBufferPtr buf;
                   6024:     /* sgmlCharEncoding enc; */
                   6025: 
                   6026:     buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
                   6027:     if (buf == NULL) return(NULL);
                   6028: 
                   6029:     ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
                   6030:     if (ctxt == NULL) {
                   6031:         perror("malloc");
                   6032:        return(NULL);
                   6033:     }
                   6034:     memset(ctxt, 0, sizeof(sgmlParserCtxt));
                   6035:     sgmlInitParserCtxt(ctxt);
                   6036:     inputStream = (sgmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
                   6037:     if (inputStream == NULL) {
                   6038:         perror("malloc");
                   6039:        xmlFree(ctxt);
                   6040:        return(NULL);
                   6041:     }
                   6042:     memset(inputStream, 0, sizeof(sgmlParserInput));
                   6043: 
                   6044:     inputStream->filename = xmlMemStrdup(filename);
                   6045:     inputStream->line = 1;
                   6046:     inputStream->col = 1;
                   6047:     inputStream->buf = buf;
                   6048:     inputStream->directory = NULL;
                   6049: 
                   6050:     inputStream->base = inputStream->buf->buffer->content;
                   6051:     inputStream->cur = inputStream->buf->buffer->content;
                   6052:     inputStream->free = NULL;
                   6053: 
                   6054:     inputPush(ctxt, inputStream);
                   6055:     return(ctxt);
                   6056: }
                   6057: 
                   6058: /**
                   6059:  * sgmlSAXParseFile :
                   6060:  * @filename:  the filename
                   6061:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   6062:  * @sax:  the SAX handler block
                   6063:  * @userData: if using SAX, this pointer will be provided on callbacks. 
                   6064:  *
                   6065:  * parse an SGML file and build a tree. Automatic support for ZLIB/Compress
                   6066:  * compressed document is provided by default if found at compile-time.
                   6067:  * It use the given SAX function block to handle the parsing callback.
                   6068:  * If sax is NULL, fallback to the default DOM tree building routines.
                   6069:  *
                   6070:  * Returns the resulting document tree
                   6071:  */
                   6072: 
                   6073: sgmlDocPtr
                   6074: sgmlSAXParseFile(const char *filename, const char *encoding, sgmlSAXHandlerPtr sax, 
                   6075:                  void *userData) {
                   6076:     sgmlDocPtr ret;
                   6077:     sgmlParserCtxtPtr ctxt;
                   6078:     sgmlSAXHandlerPtr oldsax = NULL;
                   6079: 
                   6080:     ctxt = sgmlCreateFileParserCtxt(filename, encoding);
                   6081:     if (ctxt == NULL) return(NULL);
                   6082:     if (sax != NULL) {
                   6083:        oldsax = ctxt->sax;
                   6084:         ctxt->sax = sax;
                   6085:         ctxt->userData = userData;
                   6086:     }
                   6087: 
                   6088:     sgmlParseDocument(ctxt);
                   6089: 
                   6090:     ret = ctxt->myDoc;
                   6091:     if (sax != NULL) {
                   6092:         ctxt->sax = oldsax;
                   6093:         ctxt->userData = NULL;
                   6094:     }
                   6095:     sgmlFreeParserCtxt(ctxt);
                   6096:     
                   6097:     return(ret);
                   6098: }
                   6099: 
                   6100: /**
                   6101:  * sgmlParseFile :
                   6102:  * @filename:  the filename
                   6103:  * @encoding:  a free form C string describing the SGML document encoding, or NULL
                   6104:  *
                   6105:  * parse an SGML file and build a tree. Automatic support for ZLIB/Compress
                   6106:  * compressed document is provided by default if found at compile-time.
                   6107:  *
                   6108:  * Returns the resulting document tree
                   6109:  */
                   6110: 
                   6111: sgmlDocPtr
                   6112: sgmlParseFile(const char *filename, const char *encoding) {
                   6113:     return(sgmlSAXParseFile(filename, encoding, NULL, NULL));
                   6114: }
                   6115: 
                   6116: #endif /* LIBXML_SGML_ENABLED */

Webmaster