Annotation of XML/SGMLparser.c, revision 1.10
1.1 veillard 1: /*
2: * SGMLparser.c : an attempt to parse Docbook documents
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
9: #ifdef WIN32
10: #include "win32config.h"
11: #else
12: #include "config.h"
13: #endif
14:
15: #include "xmlversion.h"
16: #ifdef LIBXML_SGML_ENABLED
17:
18: #include <stdio.h>
19: #include <string.h>
20: #ifdef HAVE_CTYPE_H
21: #include <ctype.h>
22: #endif
23: #ifdef HAVE_STDLIB_H
24: #include <stdlib.h>
25: #endif
26: #ifdef HAVE_SYS_STAT_H
27: #include <sys/stat.h>
28: #endif
29: #ifdef HAVE_FCNTL_H
30: #include <fcntl.h>
31: #endif
32: #ifdef HAVE_UNISTD_H
33: #include <unistd.h>
34: #endif
35: #ifdef HAVE_ZLIB_H
36: #include <zlib.h>
37: #endif
38:
39: #include <libxml/xmlmemory.h>
40: #include <libxml/tree.h>
41: #include <libxml/SGMLparser.h>
42: #include <libxml/entities.h>
43: #include <libxml/encoding.h>
44: #include <libxml/parser.h>
45: #include <libxml/valid.h>
46: #include <libxml/parserInternals.h>
47: #include <libxml/xmlIO.h>
48: #include <libxml/SAX.h>
1.3 veillard 49: #include <libxml/uri.h>
1.8 veillard 50: #include <libxml/xmlerror.h>
1.1 veillard 51:
52: #define SGML_MAX_NAMELEN 1000
53: #define SGML_PARSER_BIG_BUFFER_SIZE 1000
54: #define SGML_PARSER_BUFFER_SIZE 100
55:
56: /* #define DEBUG */
57: /* #define DEBUG_PUSH */
58:
59: /************************************************************************
60: * *
61: * Parser stacks related functions and macros *
62: * *
63: ************************************************************************/
64:
65: /*
66: * Generic function for accessing stacks in the Parser Context
67: */
68:
69: #define PUSH_AND_POP(scope, type, name) \
70: scope int sgml##name##Push(sgmlParserCtxtPtr ctxt, type value) { \
71: if (ctxt->name##Nr >= ctxt->name##Max) { \
72: ctxt->name##Max *= 2; \
73: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
74: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
75: if (ctxt->name##Tab == NULL) { \
1.10 ! veillard 76: xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); \
1.1 veillard 77: return(0); \
78: } \
79: } \
80: ctxt->name##Tab[ctxt->name##Nr] = value; \
81: ctxt->name = value; \
82: return(ctxt->name##Nr++); \
83: } \
84: scope type sgml##name##Pop(sgmlParserCtxtPtr ctxt) { \
85: type ret; \
86: if (ctxt->name##Nr < 0) return(0); \
87: ctxt->name##Nr--; \
88: if (ctxt->name##Nr < 0) return(0); \
89: if (ctxt->name##Nr > 0) \
90: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
91: else \
92: ctxt->name = NULL; \
93: ret = ctxt->name##Tab[ctxt->name##Nr]; \
94: ctxt->name##Tab[ctxt->name##Nr] = 0; \
95: return(ret); \
96: } \
97:
98: PUSH_AND_POP(extern, xmlNodePtr, node)
99: PUSH_AND_POP(extern, xmlChar*, name)
100:
101: /*
102: * Macros for accessing the content. Those should be used only by the parser,
103: * and not exported.
104: *
105: * Dirty macros, i.e. one need to make assumption on the context to use them
106: *
107: * CUR_PTR return the current pointer to the xmlChar to be parsed.
108: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
109: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
110: * in UNICODE mode. This should be used internally by the parser
111: * only to compare to ASCII values otherwise it would break when
112: * running with UTF-8 encoding.
113: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
114: * to compare on ASCII based substring.
115: * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
116: * it should be used only to compare on ASCII based substring.
117: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
118: * strings within the parser.
119: *
120: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
121: *
122: * CURRENT Returns the current char value, with the full decoding of
123: * UTF-8 if we are using this mode. It returns an int.
124: * NEXT Skip to the next character, this does the proper decoding
125: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
126: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
127: */
128:
129: #define UPPER (toupper(*ctxt->input->cur))
130:
131: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
132:
133: #define NXT(val) ctxt->input->cur[(val)]
134:
135: #define UPP(val) (toupper(ctxt->input->cur[(val)]))
136:
137: #define CUR_PTR ctxt->input->cur
138:
139: #define SHRINK xmlParserInputShrink(ctxt->input)
140:
141: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
142:
143: #define CURRENT ((int) (*ctxt->input->cur))
144:
1.9 veillard 145: #define SKIP_BLANKS sgmlSkipBlankChars(ctxt)
1.1 veillard 146:
147: #if 0
148: #define CUR ((int) (*ctxt->input->cur))
149: #define NEXT sgmlNextChar(ctxt);
150: #else
151: /* Inported from XML */
152:
153: /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
154: #define CUR ((int) (*ctxt->input->cur))
1.9 veillard 155: #define NEXT xmlNextChar(ctxt),ctxt->nbChars++
1.1 veillard 156:
157: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
158: #define NXT(val) ctxt->input->cur[(val)]
159: #define CUR_PTR ctxt->input->cur
160:
161:
1.9 veillard 162: #define NEXTL(l) do { \
1.1 veillard 163: if (*(ctxt->input->cur) == '\n') { \
164: ctxt->input->line++; ctxt->input->col = 1; \
165: } else ctxt->input->col++; \
1.9 veillard 166: ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
167: } while (0)
1.1 veillard 168:
169: /************
170: \
171: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
172: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
173: ************/
174:
1.9 veillard 175: #define CUR_CHAR(l) sgmlCurrentChar(ctxt, &l)
176: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1.1 veillard 177:
178: #define COPY_BUF(l,b,i,v) \
179: if (l == 1) b[i++] = (xmlChar) v; \
1.9 veillard 180: else i += xmlCopyChar(l,&b[i],v)
1.1 veillard 181: #endif
182:
183: /**
184: * sgmlCurrentChar:
185: * @ctxt: the SGML parser context
186: * @len: pointer to the length of the char read
187: *
188: * The current char value, if using UTF-8 this may actaully span multiple
189: * bytes in the input buffer. Implement the end of line normalization:
190: * 2.11 End-of-Line Handling
191: * If the encoding is unspecified, in the case we find an ISO-Latin-1
192: * char, then the encoding converter is plugged in automatically.
193: *
194: * Returns the current char value and its lenght
195: */
196:
197: int
198: sgmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
199: if (ctxt->instate == XML_PARSER_EOF)
200: return(0);
201:
202: if (ctxt->token != 0) {
203: *len = 0;
204: return(ctxt->token);
205: }
206: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
207: /*
208: * We are supposed to handle UTF8, check it's valid
209: * From rfc2044: encoding of the Unicode values on UTF-8:
210: *
211: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
212: * 0000 0000-0000 007F 0xxxxxxx
213: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
214: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
215: *
216: * Check for the 0x110000 limit too
217: */
218: const unsigned char *cur = ctxt->input->cur;
219: unsigned char c;
220: unsigned int val;
221:
222: c = *cur;
223: if (c & 0x80) {
224: if (cur[1] == 0)
225: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
226: if ((cur[1] & 0xc0) != 0x80)
227: goto encoding_error;
228: if ((c & 0xe0) == 0xe0) {
229:
230: if (cur[2] == 0)
231: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
232: if ((cur[2] & 0xc0) != 0x80)
233: goto encoding_error;
234: if ((c & 0xf0) == 0xf0) {
235: if (cur[3] == 0)
236: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
237: if (((c & 0xf8) != 0xf0) ||
238: ((cur[3] & 0xc0) != 0x80))
239: goto encoding_error;
240: /* 4-byte code */
241: *len = 4;
242: val = (cur[0] & 0x7) << 18;
243: val |= (cur[1] & 0x3f) << 12;
244: val |= (cur[2] & 0x3f) << 6;
245: val |= cur[3] & 0x3f;
246: } else {
247: /* 3-byte code */
248: *len = 3;
249: val = (cur[0] & 0xf) << 12;
250: val |= (cur[1] & 0x3f) << 6;
251: val |= cur[2] & 0x3f;
252: }
253: } else {
254: /* 2-byte code */
255: *len = 2;
256: val = (cur[0] & 0x1f) << 6;
257: val |= cur[1] & 0x3f;
258: }
259: if (!IS_CHAR(val)) {
1.6 veillard 260: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1.1 veillard 261: if ((ctxt->sax != NULL) &&
262: (ctxt->sax->error != NULL))
263: ctxt->sax->error(ctxt->userData,
264: "Char 0x%X out of allowed range\n", val);
265: ctxt->wellFormed = 0;
266: ctxt->disableSAX = 1;
267: }
268: return(val);
269: } else {
270: /* 1-byte code */
271: *len = 1;
272: return((int) *ctxt->input->cur);
273: }
274: }
275: /*
276: * Assume it's a fixed lenght encoding (1) with
277: * a compatibke encoding for the ASCII set, since
278: * XML constructs only use < 128 chars
279: */
280: *len = 1;
281: if ((int) *ctxt->input->cur < 0x80)
282: return((int) *ctxt->input->cur);
283:
284: /*
285: * Humm this is bad, do an automatic flow conversion
286: */
287: xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
288: ctxt->charset = XML_CHAR_ENCODING_UTF8;
289: return(xmlCurrentChar(ctxt, len));
290:
291: encoding_error:
292: /*
293: * If we detect an UTF8 error that probably mean that the
294: * input encoding didn't get properly advertized in the
295: * declaration header. Report the error and switch the encoding
296: * to ISO-Latin-1 (if you don't like this policy, just declare the
297: * encoding !)
298: */
1.6 veillard 299: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1.1 veillard 300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
301: ctxt->sax->error(ctxt->userData,
302: "Input is not proper UTF-8, indicate encoding !\n");
303: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
304: ctxt->input->cur[0], ctxt->input->cur[1],
305: ctxt->input->cur[2], ctxt->input->cur[3]);
306: }
307:
308: ctxt->charset = XML_CHAR_ENCODING_8859_1;
309: *len = 1;
310: return((int) *ctxt->input->cur);
311: }
312:
313: /**
314: * sgmlNextChar:
315: * @ctxt: the SGML parser context
316: *
317: * Skip to the next char input char.
318: */
319:
320: void
321: sgmlNextChar(sgmlParserCtxtPtr ctxt) {
322: if (ctxt->instate == XML_PARSER_EOF)
323: return;
324: if ((*ctxt->input->cur == 0) &&
325: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
326: xmlPopInput(ctxt);
327: } else {
328: if (*(ctxt->input->cur) == '\n') {
329: ctxt->input->line++; ctxt->input->col = 1;
330: } else ctxt->input->col++;
331: ctxt->input->cur++;
332: ctxt->nbChars++;
333: if (*ctxt->input->cur == 0)
334: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
335: }
336: }
337:
338: /**
339: * sgmlSkipBlankChars:
340: * @ctxt: the SGML parser context
341: *
342: * skip all blanks character found at that point in the input streams.
343: *
344: * Returns the number of space chars skipped
345: */
346:
347: int
348: sgmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
349: int res = 0;
350:
351: while (IS_BLANK(*(ctxt->input->cur))) {
352: if ((*ctxt->input->cur == 0) &&
353: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
354: xmlPopInput(ctxt);
355: } else {
356: if (*(ctxt->input->cur) == '\n') {
357: ctxt->input->line++; ctxt->input->col = 1;
358: } else ctxt->input->col++;
359: ctxt->input->cur++;
360: ctxt->nbChars++;
361: if (*ctxt->input->cur == 0)
362: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
363: }
364: res++;
365: }
366: return(res);
367: }
368:
369:
370:
371: /************************************************************************
372: * *
373: * The list of SGML elements and their properties *
374: * *
375: ************************************************************************/
376:
377: /*
378: * Start Tag: 1 means the start tag can be ommited
379: * End Tag: 1 means the end tag can be ommited
380: * 2 means it's forbidden (empty elements)
381: * Depr: this element is deprecated
382: * DTD: 1 means that this element is valid only in the Loose DTD
383: * 2 means that this element is valid only in the Frameset DTD
384: *
385: * Name,Start Tag,End Tag, Empty, Depr., DTD, Description
386: */
387: sgmlElemDesc docbookElementTable[] = {
388: { "abbrev", 0, 0, 0, 3, 0, "" }, /* word */
389: { "abstract", 0, 0, 0, 9, 0, "" }, /* title */
390: { "accel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
391: { "ackno", 0, 0, 0, 4, 0, "" }, /* docinfo */
392: { "acronym", 0, 0, 0, 3, 0, "" }, /* word */
393: { "action", 0, 0, 0, 7, 0, "" }, /* smallcptr */
394: { "address", 0, 0, 0, 1, 0, "" },
395: { "affiliation",0, 0, 0, 9, 0, "" }, /* shortaffil */
396: { "alt", 0, 0, 0, 1, 0, "" },
397: { "anchor", 0, 2, 1, 0, 0, "" },
398: { "answer", 0, 0, 0, 9, 0, "" }, /* label */
399: { "appendix", 0, 0, 0, 9, 0, "" }, /* appendixinfo */
400: { "appendixinfo",0, 0, 0, 9, 0, "" }, /* graphic */
401: { "application",0, 0, 0, 2, 0, "" }, /* para */
402: { "area", 0, 2, 1, 0, 0, "" },
403: { "areaset", 0, 0, 0, 9, 0, "" }, /* area */
404: { "areaspec", 0, 0, 0, 9, 0, "" }, /* area */
405: { "arg", 0, 0, 0, 1, 0, "" },
406: { "article", 0, 0, 0, 9, 0, "" }, /* div.title.content */
407: { "articleinfo",0, 0, 0, 9, 0, "" }, /* graphic */
408: { "artpagenums",0, 0, 0, 4, 0, "" }, /* docinfo */
409: { "attribution",0, 0, 0, 2, 0, "" }, /* para */
410: { "audiodata", 0, 2, 1, 0, 0, "" },
411: { "audioobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
412: { "authorblurb",0, 0, 0, 9, 0, "" }, /* title */
413: { "authorgroup",0, 0, 0, 9, 0, "" }, /* author */
414: { "authorinitials",0, 0, 0, 4, 0, "" }, /* docinfo */
415: { "author", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */
416: { "beginpage", 0, 2, 1, 0, 0, "" },
417: { "bibliodiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
418: { "biblioentry",0, 0, 0, 9, 0, "" }, /* articleinfo */
419: { "bibliography",0, 0, 0, 9, 0, "" }, /* bibliographyinfo */
420: { "bibliographyinfo",0, 0, 0, 9, 0, "" }, /* graphic */
421: { "bibliomisc", 0, 0, 0, 2, 0, "" }, /* para */
422: { "bibliomixed",0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix, bibliomset) */
423: { "bibliomset", 0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix; | bibliomset) */
424: { "biblioset", 0, 0, 0, 9, 0, "" }, /* bibliocomponent.mix */
425: { "blockquote", 0, 0, 0, 9, 0, "" }, /* title */
426: { "book", 0, 0, 0, 9, 0, "" }, /* div.title.content */
427: { "bookinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
428: { "bridgehead", 0, 0, 0, 8, 0, "" }, /* title */
429: { "callout", 0, 0, 0, 9, 0, "" }, /* component.mix */
430: { "calloutlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
431: { "caption", 0, 0, 0, 9, 0, "" }, /* textobject.mix */
432: { "caution", 0, 0, 0, 9, 0, "" }, /* title */
433: { "chapter", 0, 0, 0, 9, 0, "" }, /* chapterinfo */
434: { "chapterinfo",0, 0, 0, 9, 0, "" }, /* graphic */
435: { "citation", 0, 0, 0, 2, 0, "" }, /* para */
436: { "citerefentry",0, 0, 0, 9, 0, "" }, /* refentrytitle */
437: { "citetitle", 0, 0, 0, 2, 0, "" }, /* para */
438: { "city", 0, 0, 0, 4, 0, "" }, /* docinfo */
439: { "classname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
440: { "classsynopsisinfo",0,0, 0, 9, 0, "" }, /* cptr */
441: { "classsynopsis",0, 0, 0, 9, 0, "" }, /* ooclass */
442: { "cmdsynopsis",0, 0, 0, 9, 0, "" }, /* command */
443: { "co", 0, 2, 1, 0, 0, "" },
444: { "collab", 0, 0, 0, 9, 0, "" }, /* collabname */
445: { "collabname", 0, 0, 0, 4, 0, "" }, /* docinfo */
446: { "colophon", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
447: { "colspec", 0, 2, 1, 0, 0, "" },
448: { "colspec", 0, 2, 1, 0, 0, "" },
449: { "command", 0, 0, 0, 9, 0, "" }, /* cptr */
450: { "computeroutput",0, 0, 0, 9, 0, "" }, /* cptr */
451: { "confdates", 0, 0, 0, 4, 0, "" }, /* docinfo */
452: { "confgroup", 0, 0, 0, 9, 0, "" }, /* confdates */
453: { "confnum", 0, 0, 0, 4, 0, "" }, /* docinfo */
454: { "confsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */
455: { "conftitle", 0, 0, 0, 4, 0, "" }, /* docinfo */
456: { "constant", 0, 0, 0, 7, 0, "" }, /* smallcptr */
457: { "constructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */
458: { "contractnum",0, 0, 0, 4, 0, "" }, /* docinfo */
459: { "contractsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */
460: { "contrib", 0, 0, 0, 4, 0, "" }, /* docinfo */
461: { "copyright", 0, 0, 0, 9, 0, "" }, /* year */
462: { "corpauthor", 0, 0, 0, 4, 0, "" }, /* docinfo */
463: { "corpname", 0, 0, 0, 4, 0, "" }, /* docinfo */
464: { "country", 0, 0, 0, 4, 0, "" }, /* docinfo */
465: { "database", 0, 0, 0, 7, 0, "" }, /* smallcptr */
466: { "date", 0, 0, 0, 4, 0, "" }, /* docinfo */
467: { "dedication", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
468: { "destructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */
469: { "edition", 0, 0, 0, 4, 0, "" }, /* docinfo */
470: { "editor", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */
471: { "email", 0, 0, 0, 4, 0, "" }, /* docinfo */
472: { "emphasis", 0, 0, 0, 2, 0, "" }, /* para */
473: { "entry", 0, 0, 0, 9, 0, "" }, /* tbl.entry.mdl */
474: { "entrytbl", 0, 0, 0, 9, 0, "" }, /* tbl.entrytbl.mdl */
475: { "envar", 0, 0, 0, 7, 0, "" }, /* smallcptr */
476: { "epigraph", 0, 0, 0, 9, 0, "" }, /* attribution */
477: { "equation", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
478: { "errorcode", 0, 0, 0, 7, 0, "" }, /* smallcptr */
479: { "errorname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
480: { "errortype", 0, 0, 0, 7, 0, "" }, /* smallcptr */
481: { "example", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
482: { "exceptionname",0, 0, 0, 7, 0, "" }, /* smallcptr */
483: { "fax", 0, 0, 0, 4, 0, "" }, /* docinfo */
484: { "fieldsynopsis", 0, 0, 0, 9, 0, "" }, /* modifier */
485: { "figure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
486: { "filename", 0, 0, 0, 7, 0, "" }, /* smallcptr */
487: { "firstname", 0, 0, 0, 4, 0, "" }, /* docinfo */
488: { "firstterm", 0, 0, 0, 3, 0, "" }, /* word */
489: { "footnote", 0, 0, 0, 9, 0, "" }, /* footnote.mix */
490: { "footnoteref",0, 2, 1, 0, 0, "" },
491: { "foreignphrase",0, 0, 0, 2, 0, "" }, /* para */
492: { "formalpara", 0, 0, 0, 9, 0, "" }, /* title */
493: { "funcdef", 0, 0, 0, 1, 0, "" },
494: { "funcparams", 0, 0, 0, 9, 0, "" }, /* cptr */
495: { "funcprototype",0, 0, 0, 9, 0, "" }, /* funcdef */
496: { "funcsynopsis",0, 0, 0, 9, 0, "" }, /* funcsynopsisinfo */
497: { "funcsynopsisinfo", 0, 0, 0, 9, 0, "" }, /* cptr */
498: { "function", 0, 0, 0, 9, 0, "" }, /* cptr */
499: { "glossary", 0, 0, 0, 9, 0, "" }, /* glossaryinfo */
500: { "glossaryinfo",0, 0, 0, 9, 0, "" }, /* graphic */
501: { "glossdef", 0, 0, 0, 9, 0, "" }, /* glossdef.mix */
502: { "glossdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
503: { "glossentry", 0, 0, 0, 9, 0, "" }, /* glossterm */
504: { "glosslist", 0, 0, 0, 9, 0, "" }, /* glossentry */
505: { "glossseealso",0, 0, 0, 2, 0, "" }, /* para */
506: { "glosssee", 0, 0, 0, 2, 0, "" }, /* para */
507: { "glossterm", 0, 0, 0, 2, 0, "" }, /* para */
508: { "graphic", 0, 2, 1, 0, 0, "" },
509: { "graphicco", 0, 0, 0, 9, 0, "" }, /* areaspec */
510: { "group", 0, 0, 0, 9, 0, "" }, /* arg */
511: { "guibutton", 0, 0, 0, 7, 0, "" }, /* smallcptr */
512: { "guiicon", 0, 0, 0, 7, 0, "" }, /* smallcptr */
513: { "guilabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
514: { "guimenuitem",0, 0, 0, 7, 0, "" }, /* smallcptr */
515: { "guimenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */
516: { "guisubmenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */
517: { "hardware", 0, 0, 0, 7, 0, "" }, /* smallcptr */
518: { "highlights", 0, 0, 0, 9, 0, "" }, /* highlights.mix */
519: { "holder", 0, 0, 0, 4, 0, "" }, /* docinfo */
520: { "honorific", 0, 0, 0, 4, 0, "" }, /* docinfo */
521: { "imagedata", 0, 2, 1, 0, 0, "" },
522: { "imageobjectco",0, 0, 0, 9, 0, "" }, /* areaspec */
523: { "imageobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
524: { "important", 0, 0, 0, 9, 0, "" }, /* title */
525: { "indexdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
526: { "indexentry", 0, 0, 0, 9, 0, "" }, /* primaryie */
527: { "index", 0, 0, 0, 9, 0, "" }, /* indexinfo */
528: { "indexinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
529: { "indexterm", 0, 0, 0, 9, 0, "" }, /* primary */
530: { "informalequation",0, 0, 0, 9, 0, "" }, /* equation.content */
531: { "informalexample",0, 0, 0, 9, 0, "" }, /* example.mix */
532: { "informalfigure",0, 0, 0, 9, 0, "" }, /* figure.mix */
533: { "informaltable",0, 0, 0, 9, 0, "" }, /* graphic */
534: { "initializer",0, 0, 0, 7, 0, "" }, /* smallcptr */
535: { "inlineequation",0, 0, 0, 9, 0, "" }, /* inlineequation.content */
536: { "inlinegraphic",0, 2, 1, 0, 0, "" },
537: { "inlinemediaobject",0,0, 0, 9, 0, "" }, /* objectinfo */
538: { "interfacename",0, 0, 0, 7, 0, "" }, /* smallcptr */
539: { "interface", 0, 0, 0, 7, 0, "" }, /* smallcptr */
540: { "invpartnumber",0, 0, 0, 4, 0, "" }, /* docinfo */
541: { "isbn", 0, 0, 0, 4, 0, "" }, /* docinfo */
542: { "issn", 0, 0, 0, 4, 0, "" }, /* docinfo */
543: { "issuenum", 0, 0, 0, 4, 0, "" }, /* docinfo */
544: { "itemizedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
545: { "itermset", 0, 0, 0, 9, 0, "" }, /* indexterm */
546: { "jobtitle", 0, 0, 0, 4, 0, "" }, /* docinfo */
547: { "keycap", 0, 0, 0, 7, 0, "" }, /* smallcptr */
548: { "keycode", 0, 0, 0, 7, 0, "" }, /* smallcptr */
549: { "keycombo", 0, 0, 0, 9, 0, "" }, /* keycap */
550: { "keysym", 0, 0, 0, 7, 0, "" }, /* smallcptr */
551: { "keyword", 0, 0, 0, 1, 0, "" },
552: { "keywordset", 0, 0, 0, 9, 0, "" }, /* keyword */
553: { "label", 0, 0, 0, 3, 0, "" }, /* word */
554: { "legalnotice",0, 0, 0, 9, 0, "" }, /* title */
555: { "lineage", 0, 0, 0, 4, 0, "" }, /* docinfo */
556: { "lineannotation",0, 0, 0, 2, 0, "" }, /* para */
557: { "link", 0, 0, 0, 2, 0, "" }, /* para */
558: { "listitem", 0, 0, 0, 9, 0, "" }, /* component.mix */
559: { "literal", 0, 0, 0, 9, 0, "" }, /* cptr */
560: { "literallayout",0, 0, 0, 2, 0, "" }, /* para */
561: { "lot", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */
562: { "lotentry", 0, 0, 0, 2, 0, "" }, /* para */
563: { "manvolnum", 0, 0, 0, 3, 0, "" }, /* word */
564: { "markup", 0, 0, 0, 7, 0, "" }, /* smallcptr */
565: { "medialabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
566: { "mediaobjectco",0, 0, 0, 9, 0, "" }, /* objectinfo */
567: { "mediaobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
568: { "member", 0, 0, 0, 2, 0, "" }, /* para */
569: { "menuchoice", 0, 0, 0, 9, 0, "" }, /* shortcut */
570: { "methodname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
571: { "methodparam",0, 0, 0, 9, 0, "" }, /* modifier */
572: { "methodsynopsis",0, 0, 0, 9, 0, "" }, /* modifier */
573: { "modespec", 0, 0, 0, 4, 0, "" }, /* docinfo */
574: { "modifier", 0, 0, 0, 7, 0, "" }, /* smallcptr */
575: { "mousebutton",0, 0, 0, 7, 0, "" }, /* smallcptr */
576: { "msgaud", 0, 0, 0, 2, 0, "" }, /* para */
577: { "msgentry", 0, 0, 0, 9, 0, "" }, /* msg */
578: { "msgexplan", 0, 0, 0, 9, 0, "" }, /* title */
579: { "msginfo", 0, 0, 0, 9, 0, "" }, /* msglevel */
580: { "msglevel", 0, 0, 0, 7, 0, "" }, /* smallcptr */
581: { "msgmain", 0, 0, 0, 9, 0, "" }, /* title */
582: { "msgorig", 0, 0, 0, 7, 0, "" }, /* smallcptr */
583: { "msgrel", 0, 0, 0, 9, 0, "" }, /* title */
584: { "msgset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
585: { "msgsub", 0, 0, 0, 9, 0, "" }, /* title */
586: { "msgtext", 0, 0, 0, 9, 0, "" }, /* component.mix */
587: { "msg", 0, 0, 0, 9, 0, "" }, /* title */
588: { "note", 0, 0, 0, 9, 0, "" }, /* title */
589: { "objectinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
590: { "olink", 0, 0, 0, 2, 0, "" }, /* para */
591: { "ooclass", 0, 0, 0, 9, 0, "" }, /* modifier */
592: { "ooexception",0, 0, 0, 9, 0, "" }, /* modifier */
593: { "oointerface",0, 0, 0, 9, 0, "" }, /* modifier */
594: { "optional", 0, 0, 0, 9, 0, "" }, /* cptr */
595: { "option", 0, 0, 0, 7, 0, "" }, /* smallcptr */
596: { "orderedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
597: { "orgdiv", 0, 0, 0, 4, 0, "" }, /* docinfo */
598: { "orgname", 0, 0, 0, 4, 0, "" }, /* docinfo */
599: { "otheraddr", 0, 0, 0, 4, 0, "" }, /* docinfo */
600: { "othercredit",0, 0, 0, 9, 0, "" }, /* person.ident.mix */
601: { "othername", 0, 0, 0, 4, 0, "" }, /* docinfo */
602: { "pagenums", 0, 0, 0, 4, 0, "" }, /* docinfo */
603: { "paramdef", 0, 0, 0, 1, 0, "" },
604: { "parameter", 0, 0, 0, 7, 0, "" }, /* smallcptr */
605: { "para", 0, 0, 0, 2, 0, "" }, /* para */
606: { "partinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
607: { "partintro", 0, 0, 0, 9, 0, "" }, /* div.title.content */
608: { "part", 0, 0, 0, 9, 0, "" }, /* partinfo */
609: { "phone", 0, 0, 0, 4, 0, "" }, /* docinfo */
610: { "phrase", 0, 0, 0, 2, 0, "" }, /* para */
611: { "pob", 0, 0, 0, 4, 0, "" }, /* docinfo */
612: { "postcode", 0, 0, 0, 4, 0, "" }, /* docinfo */
613: { "prefaceinfo",0, 0, 0, 9, 0, "" }, /* graphic */
614: { "preface", 0, 0, 0, 9, 0, "" }, /* prefaceinfo */
615: { "primaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
616: { "primary ", 0, 0, 0, 4, 0, "" }, /* ndxterm */
617: { "printhistory",0, 0, 0, 9, 0, "" }, /* para.class */
618: { "procedure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
619: { "productname",0, 0, 0, 2, 0, "" }, /* para */
620: { "productnumber",0, 0, 0, 4, 0, "" }, /* docinfo */
621: { "programlistingco",0, 0, 0, 9, 0, "" }, /* areaspec */
622: { "programlisting",0, 0, 0, 2, 0, "" }, /* para */
623: { "prompt", 0, 0, 0, 7, 0, "" }, /* smallcptr */
624: { "property", 0, 0, 0, 7, 0, "" }, /* smallcptr */
625: { "pubdate", 0, 0, 0, 4, 0, "" }, /* docinfo */
626: { "publishername",0, 0, 0, 4, 0, "" }, /* docinfo */
627: { "publisher", 0, 0, 0, 9, 0, "" }, /* publishername */
628: { "pubsnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */
629: { "qandadiv", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
630: { "qandaentry", 0, 0, 0, 9, 0, "" }, /* revhistory */
631: { "qandaset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
632: { "question", 0, 0, 0, 9, 0, "" }, /* label */
633: { "quote", 0, 0, 0, 2, 0, "" }, /* para */
634: { "refclass", 0, 0, 0, 9, 0, "" }, /* refclass.char.mix */
635: { "refdescriptor",0, 0, 0, 9, 0, "" }, /* refname.char.mix */
636: { "refentryinfo",0, 0, 0, 9, 0, "" }, /* graphic */
637: { "refentry", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */
638: { "refentrytitle",0, 0, 0, 2, 0, "" }, /* para */
639: { "referenceinfo",0, 0, 0, 9, 0, "" }, /* graphic */
640: { "reference", 0, 0, 0, 9, 0, "" }, /* referenceinfo */
641: { "refmeta", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */
642: { "refmiscinfo",0, 0, 0, 4, 0, "" }, /* docinfo */
643: { "refnamediv", 0, 0, 0, 9, 0, "" }, /* refdescriptor */
644: { "refname", 0, 0, 0, 9, 0, "" }, /* refname.char.mix */
645: { "refpurpose", 0, 0, 0, 9, 0, "" }, /* refinline.char.mix */
646: { "refsect1info",0, 0, 0, 9, 0, "" }, /* graphic */
647: { "refsect1", 0, 0, 0, 9, 0, "" }, /* refsect */
648: { "refsect2info",0, 0, 0, 9, 0, "" }, /* graphic */
649: { "refsect2", 0, 0, 0, 9, 0, "" }, /* refsect */
650: { "refsect3info",0, 0, 0, 9, 0, "" }, /* graphic */
651: { "refsect3", 0, 0, 0, 9, 0, "" }, /* refsect */
652: { "refsynopsisdivinfo",0,0, 0, 9, 0, "" }, /* graphic */
653: { "refsynopsisdiv",0, 0, 0, 9, 0, "" }, /* refsynopsisdivinfo */
654: { "releaseinfo",0, 0, 0, 4, 0, "" }, /* docinfo */
655: { "remark", 0, 0, 0, 2, 0, "" }, /* para */
656: { "replaceable",0, 0, 0, 1, 0, "" },
657: { "returnvalue",0, 0, 0, 7, 0, "" }, /* smallcptr */
658: { "revdescription",0, 0, 0, 9, 0, "" }, /* revdescription.mix */
659: { "revhistory", 0, 0, 0, 9, 0, "" }, /* revision */
660: { "revision", 0, 0, 0, 9, 0, "" }, /* revnumber */
661: { "revnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */
662: { "revremark", 0, 0, 0, 4, 0, "" }, /* docinfo */
663: { "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */
664: { "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */
665: { "sbr", 0, 2, 1, 0, 0, "" },
666: { "screenco", 0, 0, 0, 9, 0, "" }, /* areaspec */
667: { "screeninfo", 0, 0, 0, 2, 0, "" }, /* para */
668: { "screen", 0, 0, 0, 2, 0, "" }, /* para */
669: { "screenshot", 0, 0, 0, 9, 0, "" }, /* screeninfo */
670: { "secondaryie",0, 0, 0, 4, 0, "" }, /* ndxterm */
671: { "secondary", 0, 0, 0, 4, 0, "" }, /* ndxterm */
672: { "sect1info", 0, 0, 0, 9, 0, "" }, /* graphic */
673: { "sect1", 0, 0, 0, 9, 0, "" }, /* sect */
674: { "sect2info", 0, 0, 0, 9, 0, "" }, /* graphic */
675: { "sect2", 0, 0, 0, 9, 0, "" }, /* sect */
676: { "sect3info", 0, 0, 0, 9, 0, "" }, /* graphic */
677: { "sect3", 0, 0, 0, 9, 0, "" }, /* sect */
678: { "sect4info", 0, 0, 0, 9, 0, "" }, /* graphic */
679: { "sect4", 0, 0, 0, 9, 0, "" }, /* sect */
680: { "sect5info", 0, 0, 0, 9, 0, "" }, /* graphic */
681: { "sect5", 0, 0, 0, 9, 0, "" }, /* sect */
682: { "sectioninfo",0, 0, 0, 9, 0, "" }, /* graphic */
683: { "section", 0, 0, 0, 9, 0, "" }, /* sectioninfo */
684: { "seealsoie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
685: { "seealso", 0, 0, 0, 4, 0, "" }, /* ndxterm */
686: { "seeie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
687: { "see", 0, 0, 0, 4, 0, "" }, /* ndxterm */
688: { "seglistitem",0, 0, 0, 9, 0, "" }, /* seg */
689: { "segmentedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
690: { "seg", 0, 0, 0, 2, 0, "" }, /* para */
691: { "segtitle", 0, 0, 0, 8, 0, "" }, /* title */
692: { "seriesvolnums", 0, 0, 0, 4, 0, "" }, /* docinfo */
693: { "set", 0, 0, 0, 9, 0, "" }, /* div.title.content */
694: { "setindexinfo",0, 0, 0, 9, 0, "" }, /* graphic */
695: { "setindex", 0, 0, 0, 9, 0, "" }, /* setindexinfo */
696: { "setinfo", 0, 0, 0, 9, 0, "" }, /* graphic */
697: { "sgmltag", 0, 0, 0, 7, 0, "" }, /* smallcptr */
698: { "shortaffil", 0, 0, 0, 4, 0, "" }, /* docinfo */
699: { "shortcut", 0, 0, 0, 9, 0, "" }, /* keycap */
700: { "sidebarinfo",0, 0, 0, 9, 0, "" }, /* graphic */
701: { "sidebar", 0, 0, 0, 9, 0, "" }, /* sidebarinfo */
702: { "simpara", 0, 0, 0, 2, 0, "" }, /* para */
703: { "simplelist", 0, 0, 0, 9, 0, "" }, /* member */
704: { "simplemsgentry", 0, 0, 0, 9, 0, "" }, /* msgtext */
705: { "simplesect", 0, 0, 0, 9, 0, "" }, /* sect.title.content */
706: { "spanspec", 0, 2, 1, 0, 0, "" },
707: { "state", 0, 0, 0, 4, 0, "" }, /* docinfo */
708: { "step", 0, 0, 0, 9, 0, "" }, /* title */
709: { "street", 0, 0, 0, 4, 0, "" }, /* docinfo */
710: { "structfield",0, 0, 0, 7, 0, "" }, /* smallcptr */
711: { "structname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
712: { "subjectset", 0, 0, 0, 9, 0, "" }, /* subject */
713: { "subject", 0, 0, 0, 9, 0, "" }, /* subjectterm */
714: { "subjectterm",0, 0, 0, 1, 0, "" },
715: { "subscript", 0, 0, 0, 1, 0, "" },
716: { "substeps", 0, 0, 0, 9, 0, "" }, /* step */
717: { "subtitle", 0, 0, 0, 8, 0, "" }, /* title */
718: { "superscript", 0, 0, 0, 1, 0, "" },
719: { "surname", 0, 0, 0, 4, 0, "" }, /* docinfo */
720: { "symbol", 0, 0, 0, 7, 0, "" }, /* smallcptr */
721: { "synopfragment", 0, 0, 0, 9, 0, "" }, /* arg */
722: { "synopfragmentref", 0, 0, 0, 1, 0, "" },
723: { "synopsis", 0, 0, 0, 2, 0, "" }, /* para */
724: { "systemitem", 0, 0, 0, 7, 0, "" }, /* smallcptr */
725: { "table", 0, 0, 0, 9, 0, "" }, /* tbl.table.mdl */
726: /* { "%tbl.table.name;", 0, 0, 0, 9, 0, "" },*/ /* tbl.table.mdl */
727: { "tbody", 0, 0, 0, 9, 0, "" }, /* row */
728: { "tbody", 0, 0, 0, 9, 0, "" }, /* row */
729: { "term", 0, 0, 0, 2, 0, "" }, /* para */
730: { "tertiaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */
731: { "tertiary ", 0, 0, 0, 4, 0, "" }, /* ndxterm */
732: { "textobject", 0, 0, 0, 9, 0, "" }, /* objectinfo */
733: { "tfoot", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */
734: { "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */
735: { "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */
736: { "thead", 0, 0, 0, 9, 0, "" }, /* row */
737: { "thead", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */
738: { "tip", 0, 0, 0, 9, 0, "" }, /* title */
739: { "titleabbrev",0, 0, 0, 8, 0, "" }, /* title */
740: { "title", 0, 0, 0, 8, 0, "" }, /* title */
741: { "tocback", 0, 0, 0, 2, 0, "" }, /* para */
742: { "toc", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */
743: { "tocchap", 0, 0, 0, 9, 0, "" }, /* tocentry */
744: { "tocentry", 0, 0, 0, 2, 0, "" }, /* para */
745: { "tocfront", 0, 0, 0, 2, 0, "" }, /* para */
746: { "toclevel1", 0, 0, 0, 9, 0, "" }, /* tocentry */
747: { "toclevel2", 0, 0, 0, 9, 0, "" }, /* tocentry */
748: { "toclevel3", 0, 0, 0, 9, 0, "" }, /* tocentry */
749: { "toclevel4", 0, 0, 0, 9, 0, "" }, /* tocentry */
750: { "toclevel5", 0, 0, 0, 9, 0, "" }, /* tocentry */
751: { "tocpart", 0, 0, 0, 9, 0, "" }, /* tocentry */
752: { "token", 0, 0, 0, 7, 0, "" }, /* smallcptr */
753: { "trademark", 0, 0, 0, 1, 0, "" },
754: { "type", 0, 0, 0, 7, 0, "" }, /* smallcptr */
755: { "ulink", 0, 0, 0, 2, 0, "" }, /* para */
756: { "userinput", 0, 0, 0, 9, 0, "" }, /* cptr */
757: { "varargs", 0, 2, 1, 0, 0, "" },
758: { "variablelist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */
759: { "varlistentry",0, 0, 0, 9, 0, "" }, /* term */
760: { "varname", 0, 0, 0, 7, 0, "" }, /* smallcptr */
761: { "videodata", 0, 2, 1, 0, 0, "" },
762: { "videoobject",0, 0, 0, 9, 0, "" }, /* objectinfo */
763: { "void", 0, 2, 1, 0, 0, "" },
764: { "volumenum", 0, 0, 0, 4, 0, "" }, /* docinfo */
765: { "warning", 0, 0, 0, 9, 0, "" }, /* title */
766: { "wordasword", 0, 0, 0, 3, 0, "" }, /* word */
767: { "xref", 0, 2, 1, 0, 0, "" },
768: { "year", 0, 0, 0, 4, 0, "" }, /* docinfo */
769: };
770:
771: /*
772: * start tags that imply the end of a current element
773: * any tag of each line implies the end of the current element if the type of
774: * that element is in the same line
775: */
776: char *sgmlEquEnd[] = {
777: "dt", "dd", "li", "option", NULL,
778: "h1", "h2", "h3", "h4", "h5", "h6", NULL,
779: "ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL,
780: NULL
781: };
782: /*
783: * acording the SGML DTD, HR should be added to the 2nd line above, as it
784: * is not allowed within a H1, H2, H3, etc. But we should tolerate that case
785: * because many documents contain rules in headings...
786: */
787:
788: /*
789: * start tags that imply the end of current element
790: */
791: char *sgmlStartClose[] = {
792: NULL
793: };
794:
795: /*
796: * The list of SGML elements which are supposed not to have
797: * CDATA content and where a p element will be implied
798: *
799: * TODO: extend that list by reading the SGML SGML DtD on
800: * implied paragraph
801: */
802: static char *sgmlNoContentElements[] = {
803: NULL
804: };
805:
806:
807: static char** sgmlStartCloseIndex[100];
808: static int sgmlStartCloseIndexinitialized = 0;
809:
810: /************************************************************************
811: * *
812: * functions to handle SGML specific data *
813: * *
814: ************************************************************************/
815:
816: /**
817: * sgmlInitAutoClose:
818: *
819: * Initialize the sgmlStartCloseIndex for fast lookup of closing tags names.
820: *
821: */
822: void
823: sgmlInitAutoClose(void) {
824: int index, i = 0;
825:
826: if (sgmlStartCloseIndexinitialized) return;
827:
828: for (index = 0;index < 100;index ++) sgmlStartCloseIndex[index] = NULL;
829: index = 0;
830: while ((sgmlStartClose[i] != NULL) && (index < 100 - 1)) {
831: sgmlStartCloseIndex[index++] = &sgmlStartClose[i];
832: while (sgmlStartClose[i] != NULL) i++;
833: i++;
834: }
835: }
836:
837: /**
838: * sgmlTagLookup:
839: * @tag: The tag name
840: *
841: * Lookup the SGML tag in the ElementTable
842: *
843: * Returns the related sgmlElemDescPtr or NULL if not found.
844: */
845: sgmlElemDescPtr
846: sgmlTagLookup(const xmlChar *tag) {
847: int i;
848:
849: for (i = 0; i < (sizeof(docbookElementTable) /
850: sizeof(docbookElementTable[0]));i++) {
1.7 veillard 851: if (xmlStrEqual(tag, BAD_CAST docbookElementTable[i].name))
1.1 veillard 852: return(&docbookElementTable[i]);
853: }
854: return(NULL);
855: }
856:
857: /**
858: * sgmlCheckAutoClose:
859: * @newtag: The new tag name
860: * @oldtag: The old tag name
861: *
862: * Checks wether the new tag is one of the registered valid tags for closing old.
863: * Initialize the sgmlStartCloseIndex for fast lookup of closing tags names.
864: *
865: * Returns 0 if no, 1 if yes.
866: */
867: int
868: sgmlCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) {
869: int i, index;
870: char **close;
871:
872: if (sgmlStartCloseIndexinitialized == 0) sgmlInitAutoClose();
873:
874: /* inefficient, but not a big deal */
875: for (index = 0; index < 100;index++) {
876: close = sgmlStartCloseIndex[index];
877: if (close == NULL) return(0);
1.7 veillard 878: if (xmlStrEqual(BAD_CAST *close, newtag)) break;
1.1 veillard 879: }
880:
881: i = close - sgmlStartClose;
882: i++;
883: while (sgmlStartClose[i] != NULL) {
1.7 veillard 884: if (xmlStrEqual(BAD_CAST sgmlStartClose[i], oldtag)) {
1.1 veillard 885: return(1);
886: }
887: i++;
888: }
889: return(0);
890: }
891:
892: /**
893: * sgmlAutoCloseOnClose:
894: * @ctxt: an SGML parser context
895: * @newtag: The new tag name
896: *
897: * The HTmL DtD allows an ending tag to implicitely close other tags.
898: */
899: void
900: sgmlAutoCloseOnClose(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
901: sgmlElemDescPtr info;
902: xmlChar *oldname;
903: int i;
904:
905: if ((newtag[0] == '/') && (newtag[1] == 0))
906: return;
907:
908: #ifdef DEBUG
1.10 ! veillard 909: xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
1.1 veillard 910: for (i = 0;i < ctxt->nameNr;i++)
1.10 ! veillard 911: xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]);
1.1 veillard 912: #endif
913:
914: for (i = (ctxt->nameNr - 1);i >= 0;i--) {
1.7 veillard 915: if (xmlStrEqual(newtag, ctxt->nameTab[i])) break;
1.1 veillard 916: }
917: if (i < 0) return;
918:
1.7 veillard 919: while (!xmlStrEqual(newtag, ctxt->name)) {
1.1 veillard 920: info = sgmlTagLookup(ctxt->name);
921: if ((info == NULL) || (info->endTag == 1)) {
922: #ifdef DEBUG
1.10 ! veillard 923: xmlGenericError(xmlGenericErrorContext,"sgmlAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name);
1.1 veillard 924: #endif
925: } else {
926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
927: ctxt->sax->error(ctxt->userData,
928: "Opening and ending tag mismatch: %s and %s\n",
929: newtag, ctxt->name);
930: ctxt->wellFormed = 0;
931: }
932: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
933: ctxt->sax->endElement(ctxt->userData, ctxt->name);
934: oldname = sgmlnamePop(ctxt);
935: if (oldname != NULL) {
936: #ifdef DEBUG
1.10 ! veillard 937: xmlGenericError(xmlGenericErrorContext,"sgmlAutoCloseOnClose: popped %s\n", oldname);
1.1 veillard 938: #endif
939: xmlFree(oldname);
940: }
941: }
942: }
943:
944: /**
945: * sgmlAutoClose:
946: * @ctxt: an SGML parser context
947: * @newtag: The new tag name or NULL
948: *
949: * The HTmL DtD allows a tag to implicitely close other tags.
950: * The list is kept in sgmlStartClose array. This function is
951: * called when a new tag has been detected and generates the
952: * appropriates closes if possible/needed.
953: * If newtag is NULL this mean we are at the end of the resource
954: * and we should check
955: */
956: void
957: sgmlAutoClose(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
958: xmlChar *oldname;
959: while ((newtag != NULL) && (ctxt->name != NULL) &&
960: (sgmlCheckAutoClose(newtag, ctxt->name))) {
961: #ifdef DEBUG
1.10 ! veillard 962: xmlGenericError(xmlGenericErrorContext,"sgmlAutoClose: %s closes %s\n", newtag, ctxt->name);
1.1 veillard 963: #endif
964: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
965: ctxt->sax->endElement(ctxt->userData, ctxt->name);
966: oldname = sgmlnamePop(ctxt);
967: if (oldname != NULL) {
968: #ifdef DEBUG
1.10 ! veillard 969: xmlGenericError(xmlGenericErrorContext,"sgmlAutoClose: popped %s\n", oldname);
1.1 veillard 970: #endif
971: xmlFree(oldname);
972: }
973: }
974: #if 0
975: if (newtag == NULL) {
976: sgmlAutoCloseOnClose(ctxt, BAD_CAST"head");
977: sgmlAutoCloseOnClose(ctxt, BAD_CAST"body");
978: sgmlAutoCloseOnClose(ctxt, BAD_CAST"sgml");
979: }
980: while ((newtag == NULL) && (ctxt->name != NULL) &&
1.7 veillard 981: ((xmlStrEqual(ctxt->name, BAD_CAST"head")) ||
982: (xmlStrEqual(ctxt->name, BAD_CAST"body")) ||
983: (xmlStrEqual(ctxt->name, BAD_CAST"sgml")))) {
1.1 veillard 984: #ifdef DEBUG
1.10 ! veillard 985: xmlGenericError(xmlGenericErrorContext,"sgmlAutoClose: EOF closes %s\n", ctxt->name);
1.1 veillard 986: #endif
987: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
988: ctxt->sax->endElement(ctxt->userData, ctxt->name);
989: oldname = sgmlnamePop(ctxt);
990: if (oldname != NULL) {
991: #ifdef DEBUG
1.10 ! veillard 992: xmlGenericError(xmlGenericErrorContext,"sgmlAutoClose: popped %s\n", oldname);
1.1 veillard 993: #endif
994: xmlFree(oldname);
995: }
996: }
997: #endif
998: }
999:
1000: /**
1001: * sgmlAutoCloseTag:
1002: * @doc: the SGML document
1003: * @name: The tag name
1004: * @elem: the SGML element
1005: *
1006: * The HTmL DtD allows a tag to implicitely close other tags.
1007: * The list is kept in sgmlStartClose array. This function checks
1008: * if the element or one of it's children would autoclose the
1009: * given tag.
1010: *
1011: * Returns 1 if autoclose, 0 otherwise
1012: */
1013: int
1014: sgmlAutoCloseTag(sgmlDocPtr doc, const xmlChar *name, sgmlNodePtr elem) {
1015: sgmlNodePtr child;
1016:
1017: if (elem == NULL) return(1);
1.7 veillard 1018: if (xmlStrEqual(name, elem->name)) return(0);
1.1 veillard 1019: if (sgmlCheckAutoClose(elem->name, name)) return(1);
1020: child = elem->children;
1021: while (child != NULL) {
1022: if (sgmlAutoCloseTag(doc, name, child)) return(1);
1023: child = child->next;
1024: }
1025: return(0);
1026: }
1027:
1028: /**
1029: * sgmlIsAutoClosed:
1030: * @doc: the SGML document
1031: * @elem: the SGML element
1032: *
1033: * The HTmL DtD allows a tag to implicitely close other tags.
1034: * The list is kept in sgmlStartClose array. This function checks
1035: * if a tag is autoclosed by one of it's child
1036: *
1037: * Returns 1 if autoclosed, 0 otherwise
1038: */
1039: int
1040: sgmlIsAutoClosed(sgmlDocPtr doc, sgmlNodePtr elem) {
1041: sgmlNodePtr child;
1042:
1043: if (elem == NULL) return(1);
1044: child = elem->children;
1045: while (child != NULL) {
1046: if (sgmlAutoCloseTag(doc, elem->name, child)) return(1);
1047: child = child->next;
1048: }
1049: return(0);
1050: }
1051:
1052: /**
1053: * sgmlCheckImplied:
1054: * @ctxt: an SGML parser context
1055: * @newtag: The new tag name
1056: *
1057: * The HTmL DtD allows a tag to exists only implicitely
1058: * called when a new tag has been detected and generates the
1059: * appropriates implicit tags if missing
1060: */
1061: void
1062: sgmlCheckImplied(sgmlParserCtxtPtr ctxt, const xmlChar *newtag) {
1063: #if 0
1.7 veillard 1064: if (xmlStrEqual(newtag, BAD_CAST"sgml"))
1.1 veillard 1065: return;
1066: if (ctxt->nameNr <= 0) {
1067: #ifdef DEBUG
1.10 ! veillard 1068: xmlGenericError(xmlGenericErrorContext,"Implied element sgml: pushed sgml\n");
1.1 veillard 1069: #endif
1070: sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"sgml"));
1071: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1072: ctxt->sax->startElement(ctxt->userData, BAD_CAST"sgml", NULL);
1073: }
1.7 veillard 1074: if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head")))
1.1 veillard 1075: return;
1076: if (ctxt->nameNr <= 1) {
1.7 veillard 1077: if ((xmlStrEqual(newtag, BAD_CAST"script")) ||
1078: (xmlStrEqual(newtag, BAD_CAST"style")) ||
1079: (xmlStrEqual(newtag, BAD_CAST"meta")) ||
1080: (xmlStrEqual(newtag, BAD_CAST"link")) ||
1081: (xmlStrEqual(newtag, BAD_CAST"title")) ||
1082: (xmlStrEqual(newtag, BAD_CAST"base"))) {
1.1 veillard 1083: /*
1084: * dropped OBJECT ... i you put it first BODY will be
1085: * assumed !
1086: */
1087: #ifdef DEBUG
1.10 ! veillard 1088: xmlGenericError(xmlGenericErrorContext,"Implied element head: pushed head\n");
1.1 veillard 1089: #endif
1090: sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"head"));
1091: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1092: ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
1093: } else {
1094: #ifdef DEBUG
1.10 ! veillard 1095: xmlGenericError(xmlGenericErrorContext,"Implied element body: pushed body\n");
1.1 veillard 1096: #endif
1097: sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"body"));
1098: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1099: ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
1100: }
1101: }
1102: #endif
1103: }
1104:
1105: /**
1106: * sgmlCheckParagraph
1107: * @ctxt: an SGML parser context
1108: *
1109: * Check whether a p element need to be implied before inserting
1110: * characters in the current element.
1111: *
1112: * Returns 1 if a paragraph has been inserted, 0 if not and -1
1113: * in case of error.
1114: */
1115:
1116: int
1117: sgmlCheckParagraph(sgmlParserCtxtPtr ctxt) {
1118: const xmlChar *tag;
1119: int i;
1120:
1121: if (ctxt == NULL)
1122: return(-1);
1123: tag = ctxt->name;
1124: if (tag == NULL) {
1125: sgmlAutoClose(ctxt, BAD_CAST"p");
1126: sgmlCheckImplied(ctxt, BAD_CAST"p");
1127: sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
1128: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1129: ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1130: return(1);
1131: }
1132: for (i = 0; sgmlNoContentElements[i] != NULL; i++) {
1.7 veillard 1133: if (xmlStrEqual(tag, BAD_CAST sgmlNoContentElements[i])) {
1.1 veillard 1134: #ifdef DEBUG
1.10 ! veillard 1135: xmlGenericError(xmlGenericErrorContext,"Implied element paragraph\n");
1.1 veillard 1136: #endif
1137: sgmlAutoClose(ctxt, BAD_CAST"p");
1138: sgmlCheckImplied(ctxt, BAD_CAST"p");
1139: sgmlnamePush(ctxt, xmlStrdup(BAD_CAST"p"));
1140: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1141: ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1142: return(1);
1143: }
1144: }
1145: return(0);
1146: }
1147:
1148: /************************************************************************
1149: * *
1150: * The list of SGML predefined entities *
1151: * *
1152: ************************************************************************/
1153:
1154:
1155: sgmlEntityDesc docbookEntitiesTable[] = {
1156: /*
1157: * the 4 absolute ones, plus apostrophe.
1158: */
1159: { 0x0026, "amp", "AMPERSAND" },
1160: { 0x003C, "lt", "LESS-THAN SIGN" },
1161:
1162: /*
1163: * Converted with VI macros from docbook ent files
1164: */
1165: { 0x0021, "excl", "EXCLAMATION MARK" },
1166: { 0x0022, "quot", "QUOTATION MARK" },
1167: { 0x0023, "num", "NUMBER SIGN" },
1168: { 0x0024, "dollar", "DOLLAR SIGN" },
1169: { 0x0025, "percnt", "PERCENT SIGN" },
1170: { 0x0027, "apos", "APOSTROPHE" },
1171: { 0x0028, "lpar", "LEFT PARENTHESIS" },
1172: { 0x0029, "rpar", "RIGHT PARENTHESIS" },
1173: { 0x002A, "ast", "ASTERISK OPERATOR" },
1174: { 0x002B, "plus", "PLUS SIGN" },
1175: { 0x002C, "comma", "COMMA" },
1176: { 0x002D, "hyphen", "HYPHEN-MINUS" },
1177: { 0x002E, "period", "FULL STOP" },
1178: { 0x002F, "sol", "SOLIDUS" },
1179: { 0x003A, "colon", "COLON" },
1180: { 0x003B, "semi", "SEMICOLON" },
1181: { 0x003D, "equals", "EQUALS SIGN" },
1182: { 0x003E, "gt", "GREATER-THAN SIGN" },
1183: { 0x003F, "quest", "QUESTION MARK" },
1184: { 0x0040, "commat", "COMMERCIAL AT" },
1185: { 0x005B, "lsqb", "LEFT SQUARE BRACKET" },
1186: { 0x005C, "bsol", "REVERSE SOLIDUS" },
1187: { 0x005D, "rsqb", "RIGHT SQUARE BRACKET" },
1188: { 0x005E, "circ", "RING OPERATOR" },
1189: { 0x005F, "lowbar", "LOW LINE" },
1190: { 0x0060, "grave", "GRAVE ACCENT" },
1191: { 0x007B, "lcub", "LEFT CURLY BRACKET" },
1192: { 0x007C, "verbar", "VERTICAL LINE" },
1193: { 0x007D, "rcub", "RIGHT CURLY BRACKET" },
1194: { 0x00A0, "nbsp", "NO-BREAK SPACE" },
1195: { 0x00A1, "iexcl", "INVERTED EXCLAMATION MARK" },
1196: { 0x00A2, "cent", "CENT SIGN" },
1197: { 0x00A3, "pound", "POUND SIGN" },
1198: { 0x00A4, "curren", "CURRENCY SIGN" },
1199: { 0x00A5, "yen", "YEN SIGN" },
1200: { 0x00A6, "brvbar", "BROKEN BAR" },
1201: { 0x00A7, "sect", "SECTION SIGN" },
1202: { 0x00A8, "die", "" },
1203: { 0x00A8, "Dot", "" },
1204: { 0x00A8, "uml", "" },
1205: { 0x00A9, "copy", "COPYRIGHT SIGN" },
1206: { 0x00AA, "ordf", "FEMININE ORDINAL INDICATOR" },
1207: { 0x00AB, "laquo", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK" },
1208: { 0x00AC, "not", "NOT SIGN" },
1209: { 0x00AD, "shy", "SOFT HYPHEN" },
1210: { 0x00AE, "reg", "REG TRADE MARK SIGN" },
1211: { 0x00AF, "macr", "MACRON" },
1212: { 0x00B0, "deg", "DEGREE SIGN" },
1213: { 0x00B1, "plusmn", "PLUS-MINUS SIGN" },
1214: { 0x00B2, "sup2", "SUPERSCRIPT TWO" },
1215: { 0x00B3, "sup3", "SUPERSCRIPT THREE" },
1216: { 0x00B4, "acute", "ACUTE ACCENT" },
1217: { 0x00B5, "micro", "MICRO SIGN" },
1218: { 0x00B6, "para", "PILCROW SIGN" },
1219: { 0x00B7, "middot", "MIDDLE DOT" },
1220: { 0x00B8, "cedil", "CEDILLA" },
1221: { 0x00B9, "sup1", "SUPERSCRIPT ONE" },
1222: { 0x00BA, "ordm", "MASCULINE ORDINAL INDICATOR" },
1223: { 0x00BB, "raquo", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" },
1224: { 0x00BC, "frac14", "VULGAR FRACTION ONE QUARTER" },
1225: { 0x00BD, "frac12", "VULGAR FRACTION ONE HALF" },
1226: { 0x00BD, "half", "VULGAR FRACTION ONE HALF" },
1227: { 0x00BE, "frac34", "VULGAR FRACTION THREE QUARTERS" },
1228: { 0x00BF, "iquest", "INVERTED QUESTION MARK" },
1229: { 0x00C0, "Agrave", "LATIN CAPITAL LETTER A WITH GRAVE" },
1230: { 0x00C1, "Aacute", "LATIN CAPITAL LETTER A WITH ACUTE" },
1231: { 0x00C2, "Acirc", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX" },
1232: { 0x00C3, "Atilde", "LATIN CAPITAL LETTER A WITH TILDE" },
1233: { 0x00C4, "Auml", "LATIN CAPITAL LETTER A WITH DIAERESIS" },
1234: { 0x00C5, "Aring", "LATIN CAPITAL LETTER A WITH RING ABOVE" },
1235: { 0x00C6, "AElig", "LATIN CAPITAL LETTER AE" },
1236: { 0x00C7, "Ccedil", "LATIN CAPITAL LETTER C WITH CEDILLA" },
1237: { 0x00C8, "Egrave", "LATIN CAPITAL LETTER E WITH GRAVE" },
1238: { 0x00C9, "Eacute", "LATIN CAPITAL LETTER E WITH ACUTE" },
1239: { 0x00CA, "Ecirc", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX" },
1240: { 0x00CB, "Euml", "LATIN CAPITAL LETTER E WITH DIAERESIS" },
1241: { 0x00CC, "Igrave", "LATIN CAPITAL LETTER I WITH GRAVE" },
1242: { 0x00CD, "Iacute", "LATIN CAPITAL LETTER I WITH ACUTE" },
1243: { 0x00CE, "Icirc", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX" },
1244: { 0x00CF, "Iuml", "LATIN CAPITAL LETTER I WITH DIAERESIS" },
1245: { 0x00D0, "ETH", "LATIN CAPITAL LETTER ETH" },
1246: { 0x00D1, "Ntilde", "LATIN CAPITAL LETTER N WITH TILDE" },
1247: { 0x00D2, "Ograve", "LATIN CAPITAL LETTER O WITH GRAVE" },
1248: { 0x00D3, "Oacute", "LATIN CAPITAL LETTER O WITH ACUTE" },
1249: { 0x00D4, "Ocirc", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX" },
1250: { 0x00D5, "Otilde", "LATIN CAPITAL LETTER O WITH TILDE" },
1251: { 0x00D6, "Ouml", "LATIN CAPITAL LETTER O WITH DIAERESIS" },
1252: { 0x00D7, "times", "MULTIPLICATION SIGN" },
1253: { 0x00D8, "Oslash", "LATIN CAPITAL LETTER O WITH STROKE" },
1254: { 0x00D9, "Ugrave", "LATIN CAPITAL LETTER U WITH GRAVE" },
1255: { 0x00DA, "Uacute", "LATIN CAPITAL LETTER U WITH ACUTE" },
1256: { 0x00DB, "Ucirc", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX" },
1257: { 0x00DC, "Uuml", "LATIN CAPITAL LETTER U WITH DIAERESIS" },
1258: { 0x00DD, "Yacute", "LATIN CAPITAL LETTER Y WITH ACUTE" },
1259: { 0x00DE, "THORN", "LATIN CAPITAL LETTER THORN" },
1260: { 0x00DF, "szlig", "LATIN SMALL LETTER SHARP S" },
1261: { 0x00E0, "agrave", "LATIN SMALL LETTER A WITH GRAVE" },
1262: { 0x00E1, "aacute", "LATIN SMALL LETTER A WITH ACUTE" },
1263: { 0x00E2, "acirc", "LATIN SMALL LETTER A WITH CIRCUMFLEX" },
1264: { 0x00E3, "atilde", "LATIN SMALL LETTER A WITH TILDE" },
1265: { 0x00E4, "auml", "LATIN SMALL LETTER A WITH DIAERESIS" },
1266: { 0x00E5, "aring", "LATIN SMALL LETTER A WITH RING ABOVE" },
1267: { 0x00E6, "aelig", "LATIN SMALL LETTER AE" },
1268: { 0x00E7, "ccedil", "LATIN SMALL LETTER C WITH CEDILLA" },
1269: { 0x00E8, "egrave", "LATIN SMALL LETTER E WITH GRAVE" },
1270: { 0x00E9, "eacute", "LATIN SMALL LETTER E WITH ACUTE" },
1271: { 0x00EA, "ecirc", "LATIN SMALL LETTER E WITH CIRCUMFLEX" },
1272: { 0x00EB, "euml", "LATIN SMALL LETTER E WITH DIAERESIS" },
1273: { 0x00EC, "igrave", "LATIN SMALL LETTER I WITH GRAVE" },
1274: { 0x00ED, "iacute", "LATIN SMALL LETTER I WITH ACUTE" },
1275: { 0x00EE, "icirc", "LATIN SMALL LETTER I WITH CIRCUMFLEX" },
1276: { 0x00EF, "iuml", "LATIN SMALL LETTER I WITH DIAERESIS" },
1277: { 0x00F0, "eth", "LATIN SMALL LETTER ETH" },
1278: { 0x00F1, "ntilde", "LATIN SMALL LETTER N WITH TILDE" },
1279: { 0x00F2, "ograve", "LATIN SMALL LETTER O WITH GRAVE" },
1280: { 0x00F3, "oacute", "LATIN SMALL LETTER O WITH ACUTE" },
1281: { 0x00F4, "ocirc", "LATIN SMALL LETTER O WITH CIRCUMFLEX" },
1282: { 0x00F5, "otilde", "LATIN SMALL LETTER O WITH TILDE" },
1283: { 0x00F6, "ouml", "LATIN SMALL LETTER O WITH DIAERESIS" },
1284: { 0x00F7, "divide", "DIVISION SIGN" },
1285: { 0x00F8, "oslash", "CIRCLED DIVISION SLASH" },
1286: { 0x00F9, "ugrave", "LATIN SMALL LETTER U WITH GRAVE" },
1287: { 0x00FA, "uacute", "LATIN SMALL LETTER U WITH ACUTE" },
1288: { 0x00FB, "ucirc", "LATIN SMALL LETTER U WITH CIRCUMFLEX" },
1289: { 0x00FC, "uuml", "LATIN SMALL LETTER U WITH DIAERESIS" },
1290: { 0x00FD, "yacute", "LATIN SMALL LETTER Y WITH ACUTE" },
1291: { 0x00FE, "thorn", "LATIN SMALL LETTER THORN" },
1292: { 0x00FF, "yuml", "LATIN SMALL LETTER Y WITH DIAERESIS" },
1293: { 0x0100, "Amacr", "LATIN CAPITAL LETTER A WITH MACRON" },
1294: { 0x0101, "amacr", "LATIN SMALL LETTER A WITH MACRON" },
1295: { 0x0102, "Abreve", "LATIN CAPITAL LETTER A WITH BREVE" },
1296: { 0x0103, "abreve", "LATIN SMALL LETTER A WITH BREVE" },
1297: { 0x0104, "Aogon", "LATIN CAPITAL LETTER A WITH OGONEK" },
1298: { 0x0105, "aogon", "LATIN SMALL LETTER A WITH OGONEK" },
1299: { 0x0106, "Cacute", "LATIN CAPITAL LETTER C WITH ACUTE" },
1300: { 0x0107, "cacute", "LATIN SMALL LETTER C WITH ACUTE" },
1301: { 0x0108, "Ccirc", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX" },
1302: { 0x0109, "ccirc", "LATIN SMALL LETTER C WITH CIRCUMFLEX" },
1303: { 0x010A, "Cdot", "LATIN CAPITAL LETTER C WITH DOT ABOVE" },
1304: { 0x010B, "cdot", "DOT OPERATOR" },
1305: { 0x010C, "Ccaron", "LATIN CAPITAL LETTER C WITH CARON" },
1306: { 0x010D, "ccaron", "LATIN SMALL LETTER C WITH CARON" },
1307: { 0x010E, "Dcaron", "LATIN CAPITAL LETTER D WITH CARON" },
1308: { 0x010F, "dcaron", "LATIN SMALL LETTER D WITH CARON" },
1309: { 0x0110, "Dstrok", "LATIN CAPITAL LETTER D WITH STROKE" },
1310: { 0x0111, "dstrok", "LATIN SMALL LETTER D WITH STROKE" },
1311: { 0x0112, "Emacr", "LATIN CAPITAL LETTER E WITH MACRON" },
1312: { 0x0113, "emacr", "LATIN SMALL LETTER E WITH MACRON" },
1313: { 0x0116, "Edot", "LATIN CAPITAL LETTER E WITH DOT ABOVE" },
1314: { 0x0117, "edot", "LATIN SMALL LETTER E WITH DOT ABOVE" },
1315: { 0x0118, "Eogon", "LATIN CAPITAL LETTER E WITH OGONEK" },
1316: { 0x0119, "eogon", "LATIN SMALL LETTER E WITH OGONEK" },
1317: { 0x011A, "Ecaron", "LATIN CAPITAL LETTER E WITH CARON" },
1318: { 0x011B, "ecaron", "LATIN SMALL LETTER E WITH CARON" },
1319: { 0x011C, "Gcirc", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX" },
1320: { 0x011D, "gcirc", "LATIN SMALL LETTER G WITH CIRCUMFLEX" },
1321: { 0x011E, "Gbreve", "LATIN CAPITAL LETTER G WITH BREVE" },
1322: { 0x011F, "gbreve", "LATIN SMALL LETTER G WITH BREVE" },
1323: { 0x0120, "Gdot", "LATIN CAPITAL LETTER G WITH DOT ABOVE" },
1324: { 0x0121, "gdot", "LATIN SMALL LETTER G WITH DOT ABOVE" },
1325: { 0x0122, "Gcedil", "LATIN CAPITAL LETTER G WITH CEDILLA" },
1326: { 0x0124, "Hcirc", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX" },
1327: { 0x0125, "hcirc", "LATIN SMALL LETTER H WITH CIRCUMFLEX" },
1328: { 0x0126, "Hstrok", "LATIN CAPITAL LETTER H WITH STROKE" },
1329: { 0x0127, "hstrok", "LATIN SMALL LETTER H WITH STROKE" },
1330: { 0x0128, "Itilde", "LATIN CAPITAL LETTER I WITH TILDE" },
1331: { 0x0129, "itilde", "LATIN SMALL LETTER I WITH TILDE" },
1332: { 0x012A, "Imacr", "LATIN CAPITAL LETTER I WITH MACRON" },
1333: { 0x012B, "imacr", "LATIN SMALL LETTER I WITH MACRON" },
1334: { 0x012E, "Iogon", "LATIN CAPITAL LETTER I WITH OGONEK" },
1335: { 0x012F, "iogon", "LATIN SMALL LETTER I WITH OGONEK" },
1336: { 0x0130, "Idot", "LATIN CAPITAL LETTER I WITH DOT ABOVE" },
1337: { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
1338: { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" },
1339: { 0x0132, "IJlig", "LATIN CAPITAL LIGATURE IJ" },
1340: { 0x0133, "ijlig", "LATIN SMALL LIGATURE IJ" },
1341: { 0x0134, "Jcirc", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX" },
1342: { 0x0135, "jcirc", "LATIN SMALL LETTER J WITH CIRCUMFLEX" },
1343: { 0x0136, "Kcedil", "LATIN CAPITAL LETTER K WITH CEDILLA" },
1344: { 0x0137, "kcedil", "LATIN SMALL LETTER K WITH CEDILLA" },
1345: { 0x0138, "kgreen", "LATIN SMALL LETTER KRA" },
1346: { 0x0139, "Lacute", "LATIN CAPITAL LETTER L WITH ACUTE" },
1347: { 0x013A, "lacute", "LATIN SMALL LETTER L WITH ACUTE" },
1348: { 0x013B, "Lcedil", "LATIN CAPITAL LETTER L WITH CEDILLA" },
1349: { 0x013C, "lcedil", "LATIN SMALL LETTER L WITH CEDILLA" },
1350: { 0x013D, "Lcaron", "LATIN CAPITAL LETTER L WITH CARON" },
1351: { 0x013E, "lcaron", "LATIN SMALL LETTER L WITH CARON" },
1352: { 0x013F, "Lmidot", "LATIN CAPITAL LETTER L WITH MIDDLE DOT" },
1353: { 0x0140, "lmidot", "LATIN SMALL LETTER L WITH MIDDLE DOT" },
1354: { 0x0141, "Lstrok", "LATIN CAPITAL LETTER L WITH STROKE" },
1355: { 0x0142, "lstrok", "LATIN SMALL LETTER L WITH STROKE" },
1356: { 0x0143, "Nacute", "LATIN CAPITAL LETTER N WITH ACUTE" },
1357: { 0x0144, "nacute", "LATIN SMALL LETTER N WITH ACUTE" },
1358: { 0x0145, "Ncedil", "LATIN CAPITAL LETTER N WITH CEDILLA" },
1359: { 0x0146, "ncedil", "LATIN SMALL LETTER N WITH CEDILLA" },
1360: { 0x0147, "Ncaron", "LATIN CAPITAL LETTER N WITH CARON" },
1361: { 0x0148, "ncaron", "LATIN SMALL LETTER N WITH CARON" },
1362: { 0x0149, "napos", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE" },
1363: { 0x014A, "ENG", "LATIN CAPITAL LETTER ENG" },
1364: { 0x014B, "eng", "LATIN SMALL LETTER ENG" },
1365: { 0x014C, "Omacr", "LATIN CAPITAL LETTER O WITH MACRON" },
1366: { 0x014D, "omacr", "LATIN SMALL LETTER O WITH MACRON" },
1367: { 0x0150, "Odblac", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE" },
1368: { 0x0151, "odblac", "LATIN SMALL LETTER O WITH DOUBLE ACUTE" },
1369: { 0x0152, "OElig", "LATIN CAPITAL LIGATURE OE" },
1370: { 0x0153, "oelig", "LATIN SMALL LIGATURE OE" },
1371: { 0x0154, "Racute", "LATIN CAPITAL LETTER R WITH ACUTE" },
1372: { 0x0155, "racute", "LATIN SMALL LETTER R WITH ACUTE" },
1373: { 0x0156, "Rcedil", "LATIN CAPITAL LETTER R WITH CEDILLA" },
1374: { 0x0157, "rcedil", "LATIN SMALL LETTER R WITH CEDILLA" },
1375: { 0x0158, "Rcaron", "LATIN CAPITAL LETTER R WITH CARON" },
1376: { 0x0159, "rcaron", "LATIN SMALL LETTER R WITH CARON" },
1377: { 0x015A, "Sacute", "LATIN CAPITAL LETTER S WITH ACUTE" },
1378: { 0x015B, "sacute", "LATIN SMALL LETTER S WITH ACUTE" },
1379: { 0x015C, "Scirc", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX" },
1380: { 0x015D, "scirc", "LATIN SMALL LETTER S WITH CIRCUMFLEX" },
1381: { 0x015E, "Scedil", "LATIN CAPITAL LETTER S WITH CEDILLA" },
1382: { 0x015F, "scedil", "LATIN SMALL LETTER S WITH CEDILLA" },
1383: { 0x0160, "Scaron", "LATIN CAPITAL LETTER S WITH CARON" },
1384: { 0x0161, "scaron", "LATIN SMALL LETTER S WITH CARON" },
1385: { 0x0162, "Tcedil", "LATIN CAPITAL LETTER T WITH CEDILLA" },
1386: { 0x0163, "tcedil", "LATIN SMALL LETTER T WITH CEDILLA" },
1387: { 0x0164, "Tcaron", "LATIN CAPITAL LETTER T WITH CARON" },
1388: { 0x0165, "tcaron", "LATIN SMALL LETTER T WITH CARON" },
1389: { 0x0166, "Tstrok", "LATIN CAPITAL LETTER T WITH STROKE" },
1390: { 0x0167, "tstrok", "LATIN SMALL LETTER T WITH STROKE" },
1391: { 0x0168, "Utilde", "LATIN CAPITAL LETTER U WITH TILDE" },
1392: { 0x0169, "utilde", "LATIN SMALL LETTER U WITH TILDE" },
1393: { 0x016A, "Umacr", "LATIN CAPITAL LETTER U WITH MACRON" },
1394: { 0x016B, "umacr", "LATIN SMALL LETTER U WITH MACRON" },
1395: { 0x016C, "Ubreve", "LATIN CAPITAL LETTER U WITH BREVE" },
1396: { 0x016D, "ubreve", "LATIN SMALL LETTER U WITH BREVE" },
1397: { 0x016E, "Uring", "LATIN CAPITAL LETTER U WITH RING ABOVE" },
1398: { 0x016F, "uring", "LATIN SMALL LETTER U WITH RING ABOVE" },
1399: { 0x0170, "Udblac", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE" },
1400: { 0x0171, "udblac", "LATIN SMALL LETTER U WITH DOUBLE ACUTE" },
1401: { 0x0172, "Uogon", "LATIN CAPITAL LETTER U WITH OGONEK" },
1402: { 0x0173, "uogon", "LATIN SMALL LETTER U WITH OGONEK" },
1403: { 0x0174, "Wcirc", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX" },
1404: { 0x0175, "wcirc", "LATIN SMALL LETTER W WITH CIRCUMFLEX" },
1405: { 0x0176, "Ycirc", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX" },
1406: { 0x0177, "ycirc", "LATIN SMALL LETTER Y WITH CIRCUMFLEX" },
1407: { 0x0178, "Yuml", "LATIN CAPITAL LETTER Y WITH DIAERESIS" },
1408: { 0x0179, "Zacute", "LATIN CAPITAL LETTER Z WITH ACUTE" },
1409: { 0x017A, "zacute", "LATIN SMALL LETTER Z WITH ACUTE" },
1410: { 0x017B, "Zdot", "LATIN CAPITAL LETTER Z WITH DOT ABOVE" },
1411: { 0x017C, "zdot", "LATIN SMALL LETTER Z WITH DOT ABOVE" },
1412: { 0x017D, "Zcaron", "LATIN CAPITAL LETTER Z WITH CARON" },
1413: { 0x017E, "zcaron", "LATIN SMALL LETTER Z WITH CARON" },
1414: { 0x0192, "fnof", "LATIN SMALL LETTER F WITH HOOK" },
1415: { 0x01F5, "gacute", "LATIN SMALL LETTER G WITH ACUTE" },
1416: { 0x02C7, "caron", "CARON" },
1417: { 0x02D8, "breve", "BREVE" },
1418: { 0x02D9, "dot", "DOT ABOVE" },
1419: { 0x02DA, "ring", "RING ABOVE" },
1420: { 0x02DB, "ogon", "OGONEK" },
1421: { 0x02DC, "tilde", "TILDE" },
1422: { 0x02DD, "dblac", "DOUBLE ACUTE ACCENT" },
1423: { 0x0386, "Aacgr", "GREEK CAPITAL LETTER ALPHA WITH TONOS" },
1424: { 0x0388, "Eacgr", "GREEK CAPITAL LETTER EPSILON WITH TONOS" },
1425: { 0x0389, "EEacgr", "GREEK CAPITAL LETTER ETA WITH TONOS" },
1426: { 0x038A, "Iacgr", "GREEK CAPITAL LETTER IOTA WITH TONOS" },
1427: { 0x038C, "Oacgr", "GREEK CAPITAL LETTER OMICRON WITH TONOS" },
1428: { 0x038E, "Uacgr", "GREEK CAPITAL LETTER UPSILON WITH TONOS" },
1429: { 0x038F, "OHacgr", "GREEK CAPITAL LETTER OMEGA WITH TONOS" },
1430: { 0x0390, "idiagr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS" },
1431: { 0x0391, "Agr", "GREEK CAPITAL LETTER ALPHA" },
1432: { 0x0392, "Bgr", "GREEK CAPITAL LETTER BETA" },
1433: { 0x0393, "b.Gamma", "GREEK CAPITAL LETTER GAMMA" },
1434: { 0x0393, "Gamma", "GREEK CAPITAL LETTER GAMMA" },
1435: { 0x0393, "Ggr", "GREEK CAPITAL LETTER GAMMA" },
1436: { 0x0394, "b.Delta", "GREEK CAPITAL LETTER DELTA" },
1437: { 0x0394, "Delta", "GREEK CAPITAL LETTER DELTA" },
1438: { 0x0394, "Dgr", "GREEK CAPITAL LETTER DELTA" },
1439: { 0x0395, "Egr", "GREEK CAPITAL LETTER EPSILON" },
1440: { 0x0396, "Zgr", "GREEK CAPITAL LETTER ZETA" },
1441: { 0x0397, "EEgr", "GREEK CAPITAL LETTER ETA" },
1442: { 0x0398, "b.Theta", "GREEK CAPITAL LETTER THETA" },
1443: { 0x0398, "Theta", "GREEK CAPITAL LETTER THETA" },
1444: { 0x0398, "THgr", "GREEK CAPITAL LETTER THETA" },
1445: { 0x0399, "Igr", "GREEK CAPITAL LETTER IOTA" },
1446: { 0x039A, "Kgr", "GREEK CAPITAL LETTER KAPPA" },
1447: { 0x039B, "b.Lambda", "GREEK CAPITAL LETTER LAMDA" },
1448: { 0x039B, "Lambda", "GREEK CAPITAL LETTER LAMDA" },
1449: { 0x039B, "Lgr", "GREEK CAPITAL LETTER LAMDA" },
1450: { 0x039C, "Mgr", "GREEK CAPITAL LETTER MU" },
1451: { 0x039D, "Ngr", "GREEK CAPITAL LETTER NU" },
1452: { 0x039E, "b.Xi", "GREEK CAPITAL LETTER XI" },
1453: { 0x039E, "Xgr", "GREEK CAPITAL LETTER XI" },
1454: { 0x039E, "Xi", "GREEK CAPITAL LETTER XI" },
1455: { 0x039F, "Ogr", "GREEK CAPITAL LETTER OMICRON" },
1456: { 0x03A0, "b.Pi", "GREEK CAPITAL LETTER PI" },
1457: { 0x03A0, "Pgr", "GREEK CAPITAL LETTER PI" },
1458: { 0x03A0, "Pi", "GREEK CAPITAL LETTER PI" },
1459: { 0x03A1, "Rgr", "GREEK CAPITAL LETTER RHO" },
1460: { 0x03A3, "b.Sigma", "GREEK CAPITAL LETTER SIGMA" },
1461: { 0x03A3, "Sgr", "GREEK CAPITAL LETTER SIGMA" },
1462: { 0x03A3, "Sigma", "GREEK CAPITAL LETTER SIGMA" },
1463: { 0x03A4, "Tgr", "GREEK CAPITAL LETTER TAU" },
1464: { 0x03A5, "Ugr", "" },
1465: { 0x03A6, "b.Phi", "GREEK CAPITAL LETTER PHI" },
1466: { 0x03A6, "PHgr", "GREEK CAPITAL LETTER PHI" },
1467: { 0x03A6, "Phi", "GREEK CAPITAL LETTER PHI" },
1468: { 0x03A7, "KHgr", "GREEK CAPITAL LETTER CHI" },
1469: { 0x03A8, "b.Psi", "GREEK CAPITAL LETTER PSI" },
1470: { 0x03A8, "PSgr", "GREEK CAPITAL LETTER PSI" },
1471: { 0x03A8, "Psi", "GREEK CAPITAL LETTER PSI" },
1472: { 0x03A9, "b.Omega", "GREEK CAPITAL LETTER OMEGA" },
1473: { 0x03A9, "OHgr", "GREEK CAPITAL LETTER OMEGA" },
1474: { 0x03A9, "Omega", "GREEK CAPITAL LETTER OMEGA" },
1475: { 0x03AA, "Idigr", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA" },
1476: { 0x03AB, "Udigr", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA" },
1477: { 0x03AC, "aacgr", "GREEK SMALL LETTER ALPHA WITH TONOS" },
1478: { 0x03AD, "eacgr", "GREEK SMALL LETTER EPSILON WITH TONOS" },
1479: { 0x03AE, "eeacgr", "GREEK SMALL LETTER ETA WITH TONOS" },
1480: { 0x03AF, "iacgr", "GREEK SMALL LETTER IOTA WITH TONOS" },
1481: { 0x03B0, "udiagr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS" },
1482: { 0x03B1, "agr", "" },
1483: { 0x03B1, "alpha", "" },
1484: { 0x03B1, "b.alpha", "" },
1485: { 0x03B2, "b.beta", "GREEK SMALL LETTER BETA" },
1486: { 0x03B2, "beta", "GREEK SMALL LETTER BETA" },
1487: { 0x03B2, "bgr", "GREEK SMALL LETTER BETA" },
1488: { 0x03B3, "b.gamma", "GREEK SMALL LETTER GAMMA" },
1489: { 0x03B3, "gamma", "GREEK SMALL LETTER GAMMA" },
1490: { 0x03B3, "ggr", "GREEK SMALL LETTER GAMMA" },
1491: { 0x03B4, "b.delta", "GREEK SMALL LETTER DELTA" },
1492: { 0x03B4, "delta", "GREEK SMALL LETTER DELTA" },
1493: { 0x03B4, "dgr", "GREEK SMALL LETTER DELTA" },
1494: { 0x03B5, "b.epsi", "" },
1495: { 0x03B5, "b.epsis", "" },
1496: { 0x03B5, "b.epsiv", "" },
1497: { 0x03B5, "egr", "" },
1498: { 0x03B5, "epsiv", "" },
1499: { 0x03B6, "b.zeta", "GREEK SMALL LETTER ZETA" },
1500: { 0x03B6, "zeta", "GREEK SMALL LETTER ZETA" },
1501: { 0x03B6, "zgr", "GREEK SMALL LETTER ZETA" },
1502: { 0x03B7, "b.eta", "GREEK SMALL LETTER ETA" },
1503: { 0x03B7, "eegr", "GREEK SMALL LETTER ETA" },
1504: { 0x03B7, "eta", "GREEK SMALL LETTER ETA" },
1505: { 0x03B8, "b.thetas", "" },
1506: { 0x03B8, "thetas", "" },
1507: { 0x03B8, "thgr", "" },
1508: { 0x03B9, "b.iota", "GREEK SMALL LETTER IOTA" },
1509: { 0x03B9, "igr", "GREEK SMALL LETTER IOTA" },
1510: { 0x03B9, "iota", "GREEK SMALL LETTER IOTA" },
1511: { 0x03BA, "b.kappa", "GREEK SMALL LETTER KAPPA" },
1512: { 0x03BA, "kappa", "GREEK SMALL LETTER KAPPA" },
1513: { 0x03BA, "kgr", "GREEK SMALL LETTER KAPPA" },
1514: { 0x03BB, "b.lambda", "GREEK SMALL LETTER LAMDA" },
1515: { 0x03BB, "lambda", "GREEK SMALL LETTER LAMDA" },
1516: { 0x03BB, "lgr", "GREEK SMALL LETTER LAMDA" },
1517: { 0x03BC, "b.mu", "GREEK SMALL LETTER MU" },
1518: { 0x03BC, "mgr", "GREEK SMALL LETTER MU" },
1519: { 0x03BC, "mu", "GREEK SMALL LETTER MU" },
1520: { 0x03BD, "b.nu", "GREEK SMALL LETTER NU" },
1521: { 0x03BD, "ngr", "GREEK SMALL LETTER NU" },
1522: { 0x03BD, "nu", "GREEK SMALL LETTER NU" },
1523: { 0x03BE, "b.xi", "GREEK SMALL LETTER XI" },
1524: { 0x03BE, "xgr", "GREEK SMALL LETTER XI" },
1525: { 0x03BE, "xi", "GREEK SMALL LETTER XI" },
1526: { 0x03BF, "ogr", "GREEK SMALL LETTER OMICRON" },
1527: { 0x03C0, "b.pi", "GREEK SMALL LETTER PI" },
1528: { 0x03C0, "pgr", "GREEK SMALL LETTER PI" },
1529: { 0x03C0, "pi", "GREEK SMALL LETTER PI" },
1530: { 0x03C1, "b.rho", "GREEK SMALL LETTER RHO" },
1531: { 0x03C1, "rgr", "GREEK SMALL LETTER RHO" },
1532: { 0x03C1, "rho", "GREEK SMALL LETTER RHO" },
1533: { 0x03C2, "b.sigmav", "" },
1534: { 0x03C2, "sfgr", "" },
1535: { 0x03C2, "sigmav", "" },
1536: { 0x03C3, "b.sigma", "GREEK SMALL LETTER SIGMA" },
1537: { 0x03C3, "sgr", "GREEK SMALL LETTER SIGMA" },
1538: { 0x03C3, "sigma", "GREEK SMALL LETTER SIGMA" },
1539: { 0x03C4, "b.tau", "GREEK SMALL LETTER TAU" },
1540: { 0x03C4, "tau", "GREEK SMALL LETTER TAU" },
1541: { 0x03C4, "tgr", "GREEK SMALL LETTER TAU" },
1542: { 0x03C5, "b.upsi", "GREEK SMALL LETTER UPSILON" },
1543: { 0x03C5, "ugr", "GREEK SMALL LETTER UPSILON" },
1544: { 0x03C5, "upsi", "GREEK SMALL LETTER UPSILON" },
1545: { 0x03C6, "b.phis", "GREEK SMALL LETTER PHI" },
1546: { 0x03C6, "phgr", "GREEK SMALL LETTER PHI" },
1547: { 0x03C6, "phis", "GREEK SMALL LETTER PHI" },
1548: { 0x03C7, "b.chi", "GREEK SMALL LETTER CHI" },
1549: { 0x03C7, "chi", "GREEK SMALL LETTER CHI" },
1550: { 0x03C7, "khgr", "GREEK SMALL LETTER CHI" },
1551: { 0x03C8, "b.psi", "GREEK SMALL LETTER PSI" },
1552: { 0x03C8, "psgr", "GREEK SMALL LETTER PSI" },
1553: { 0x03C8, "psi", "GREEK SMALL LETTER PSI" },
1554: { 0x03C9, "b.omega", "GREEK SMALL LETTER OMEGA" },
1555: { 0x03C9, "ohgr", "GREEK SMALL LETTER OMEGA" },
1556: { 0x03C9, "omega", "GREEK SMALL LETTER OMEGA" },
1557: { 0x03CA, "idigr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA" },
1558: { 0x03CB, "udigr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA" },
1559: { 0x03CC, "oacgr", "GREEK SMALL LETTER OMICRON WITH TONOS" },
1560: { 0x03CD, "uacgr", "GREEK SMALL LETTER UPSILON WITH TONOS" },
1561: { 0x03CE, "ohacgr", "GREEK SMALL LETTER OMEGA WITH TONOS" },
1562: { 0x03D1, "b.thetav", "" },
1563: { 0x03D1, "thetav", "" },
1564: { 0x03D2, "b.Upsi", "" },
1565: { 0x03D2, "Upsi", "" },
1566: { 0x03D5, "b.phiv", "GREEK PHI SYMBOL" },
1567: { 0x03D5, "phiv", "GREEK PHI SYMBOL" },
1568: { 0x03D6, "b.piv", "GREEK PI SYMBOL" },
1569: { 0x03D6, "piv", "GREEK PI SYMBOL" },
1570: { 0x03DC, "b.gammad", "GREEK LETTER DIGAMMA" },
1571: { 0x03DC, "gammad", "GREEK LETTER DIGAMMA" },
1572: { 0x03F0, "b.kappav", "GREEK KAPPA SYMBOL" },
1573: { 0x03F0, "kappav", "GREEK KAPPA SYMBOL" },
1574: { 0x03F1, "b.rhov", "GREEK RHO SYMBOL" },
1575: { 0x03F1, "rhov", "GREEK RHO SYMBOL" },
1576: { 0x0401, "IOcy", "CYRILLIC CAPITAL LETTER IO" },
1577: { 0x0402, "DJcy", "CYRILLIC CAPITAL LETTER DJE" },
1578: { 0x0403, "GJcy", "CYRILLIC CAPITAL LETTER GJE" },
1579: { 0x0404, "Jukcy", "CYRILLIC CAPITAL LETTER UKRAINIAN IE" },
1580: { 0x0405, "DScy", "CYRILLIC CAPITAL LETTER DZE" },
1581: { 0x0406, "Iukcy", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I" },
1582: { 0x0407, "YIcy", "CYRILLIC CAPITAL LETTER YI" },
1583: { 0x0408, "Jsercy", "CYRILLIC CAPITAL LETTER JE" },
1584: { 0x0409, "LJcy", "CYRILLIC CAPITAL LETTER LJE" },
1585: { 0x040A, "NJcy", "CYRILLIC CAPITAL LETTER NJE" },
1586: { 0x040B, "TSHcy", "CYRILLIC CAPITAL LETTER TSHE" },
1587: { 0x040C, "KJcy", "CYRILLIC CAPITAL LETTER KJE" },
1588: { 0x040E, "Ubrcy", "CYRILLIC CAPITAL LETTER SHORT U" },
1589: { 0x040F, "DZcy", "CYRILLIC CAPITAL LETTER DZHE" },
1590: { 0x0410, "Acy", "CYRILLIC CAPITAL LETTER A" },
1591: { 0x0411, "Bcy", "CYRILLIC CAPITAL LETTER BE" },
1592: { 0x0412, "Vcy", "CYRILLIC CAPITAL LETTER VE" },
1593: { 0x0413, "Gcy", "CYRILLIC CAPITAL LETTER GHE" },
1594: { 0x0414, "Dcy", "CYRILLIC CAPITAL LETTER DE" },
1595: { 0x0415, "IEcy", "CYRILLIC CAPITAL LETTER IE" },
1596: { 0x0416, "ZHcy", "CYRILLIC CAPITAL LETTER ZHE" },
1597: { 0x0417, "Zcy", "CYRILLIC CAPITAL LETTER ZE" },
1598: { 0x0418, "Icy", "CYRILLIC CAPITAL LETTER I" },
1599: { 0x0419, "Jcy", "CYRILLIC CAPITAL LETTER SHORT I" },
1600: { 0x041A, "Kcy", "CYRILLIC CAPITAL LETTER KA" },
1601: { 0x041B, "Lcy", "CYRILLIC CAPITAL LETTER EL" },
1602: { 0x041C, "Mcy", "CYRILLIC CAPITAL LETTER EM" },
1603: { 0x041D, "Ncy", "CYRILLIC CAPITAL LETTER EN" },
1604: { 0x041E, "Ocy", "CYRILLIC CAPITAL LETTER O" },
1605: { 0x041F, "Pcy", "CYRILLIC CAPITAL LETTER PE" },
1606: { 0x0420, "Rcy", "CYRILLIC CAPITAL LETTER ER" },
1607: { 0x0421, "Scy", "CYRILLIC CAPITAL LETTER ES" },
1608: { 0x0422, "Tcy", "CYRILLIC CAPITAL LETTER TE" },
1609: { 0x0423, "Ucy", "CYRILLIC CAPITAL LETTER U" },
1610: { 0x0424, "Fcy", "CYRILLIC CAPITAL LETTER EF" },
1611: { 0x0425, "KHcy", "CYRILLIC CAPITAL LETTER HA" },
1612: { 0x0426, "TScy", "CYRILLIC CAPITAL LETTER TSE" },
1613: { 0x0427, "CHcy", "CYRILLIC CAPITAL LETTER CHE" },
1614: { 0x0428, "SHcy", "CYRILLIC CAPITAL LETTER SHA" },
1615: { 0x0429, "SHCHcy", "CYRILLIC CAPITAL LETTER SHCHA" },
1616: { 0x042A, "HARDcy", "CYRILLIC CAPITAL LETTER HARD SIGN" },
1617: { 0x042B, "Ycy", "CYRILLIC CAPITAL LETTER YERU" },
1618: { 0x042C, "SOFTcy", "CYRILLIC CAPITAL LETTER SOFT SIGN" },
1619: { 0x042D, "Ecy", "CYRILLIC CAPITAL LETTER E" },
1620: { 0x042E, "YUcy", "CYRILLIC CAPITAL LETTER YU" },
1621: { 0x042F, "YAcy", "CYRILLIC CAPITAL LETTER YA" },
1622: { 0x0430, "acy", "CYRILLIC SMALL LETTER A" },
1623: { 0x0431, "bcy", "CYRILLIC SMALL LETTER BE" },
1624: { 0x0432, "vcy", "CYRILLIC SMALL LETTER VE" },
1625: { 0x0433, "gcy", "CYRILLIC SMALL LETTER GHE" },
1626: { 0x0434, "dcy", "CYRILLIC SMALL LETTER DE" },
1627: { 0x0435, "iecy", "CYRILLIC SMALL LETTER IE" },
1628: { 0x0436, "zhcy", "CYRILLIC SMALL LETTER ZHE" },
1629: { 0x0437, "zcy", "CYRILLIC SMALL LETTER ZE" },
1630: { 0x0438, "icy", "CYRILLIC SMALL LETTER I" },
1631: { 0x0439, "jcy", "CYRILLIC SMALL LETTER SHORT I" },
1632: { 0x043A, "kcy", "CYRILLIC SMALL LETTER KA" },
1633: { 0x043B, "lcy", "CYRILLIC SMALL LETTER EL" },
1634: { 0x043C, "mcy", "CYRILLIC SMALL LETTER EM" },
1635: { 0x043D, "ncy", "CYRILLIC SMALL LETTER EN" },
1636: { 0x043E, "ocy", "CYRILLIC SMALL LETTER O" },
1637: { 0x043F, "pcy", "CYRILLIC SMALL LETTER PE" },
1638: { 0x0440, "rcy", "CYRILLIC SMALL LETTER ER" },
1639: { 0x0441, "scy", "CYRILLIC SMALL LETTER ES" },
1640: { 0x0442, "tcy", "CYRILLIC SMALL LETTER TE" },
1641: { 0x0443, "ucy", "CYRILLIC SMALL LETTER U" },
1642: { 0x0444, "fcy", "CYRILLIC SMALL LETTER EF" },
1643: { 0x0445, "khcy", "CYRILLIC SMALL LETTER HA" },
1644: { 0x0446, "tscy", "CYRILLIC SMALL LETTER TSE" },
1645: { 0x0447, "chcy", "CYRILLIC SMALL LETTER CHE" },
1646: { 0x0448, "shcy", "CYRILLIC SMALL LETTER SHA" },
1647: { 0x0449, "shchcy", "CYRILLIC SMALL LETTER SHCHA" },
1648: { 0x044A, "hardcy", "CYRILLIC SMALL LETTER HARD SIGN" },
1649: { 0x044B, "ycy", "CYRILLIC SMALL LETTER YERU" },
1650: { 0x044C, "softcy", "CYRILLIC SMALL LETTER SOFT SIGN" },
1651: { 0x044D, "ecy", "CYRILLIC SMALL LETTER E" },
1652: { 0x044E, "yucy", "CYRILLIC SMALL LETTER YU" },
1653: { 0x044F, "yacy", "CYRILLIC SMALL LETTER YA" },
1654: { 0x0451, "iocy", "CYRILLIC SMALL LETTER IO" },
1655: { 0x0452, "djcy", "CYRILLIC SMALL LETTER DJE" },
1656: { 0x0453, "gjcy", "CYRILLIC SMALL LETTER GJE" },
1657: { 0x0454, "jukcy", "CYRILLIC SMALL LETTER UKRAINIAN IE" },
1658: { 0x0455, "dscy", "CYRILLIC SMALL LETTER DZE" },
1659: { 0x0456, "iukcy", "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I" },
1660: { 0x0457, "yicy", "CYRILLIC SMALL LETTER YI" },
1661: { 0x0458, "jsercy", "CYRILLIC SMALL LETTER JE" },
1662: { 0x0459, "ljcy", "CYRILLIC SMALL LETTER LJE" },
1663: { 0x045A, "njcy", "CYRILLIC SMALL LETTER NJE" },
1664: { 0x045B, "tshcy", "CYRILLIC SMALL LETTER TSHE" },
1665: { 0x045C, "kjcy", "CYRILLIC SMALL LETTER KJE" },
1666: { 0x045E, "ubrcy", "CYRILLIC SMALL LETTER SHORT U" },
1667: { 0x045F, "dzcy", "CYRILLIC SMALL LETTER DZHE" },
1668: { 0x2002, "ensp", "EN SPACE" },
1669: { 0x2003, "emsp", "EM SPACE" },
1670: { 0x2004, "emsp13", "THREE-PER-EM SPACE" },
1671: { 0x2005, "emsp14", "FOUR-PER-EM SPACE" },
1672: { 0x2007, "numsp", "FIGURE SPACE" },
1673: { 0x2008, "puncsp", "PUNCTUATION SPACE" },
1674: { 0x2009, "thinsp", "THIN SPACE" },
1675: { 0x200A, "hairsp", "HAIR SPACE" },
1676: { 0x2010, "dash", "HYPHEN" },
1677: { 0x2013, "ndash", "EN DASH" },
1678: { 0x2014, "mdash", "EM DASH" },
1679: { 0x2015, "horbar", "HORIZONTAL BAR" },
1680: { 0x2016, "Verbar", "DOUBLE VERTICAL LINE" },
1681: { 0x2018, "lsquo", "" },
1682: { 0x2018, "rsquor", "" },
1683: { 0x2019, "rsquo", "RIGHT SINGLE QUOTATION MARK" },
1684: { 0x201A, "lsquor", "SINGLE LOW-9 QUOTATION MARK" },
1685: { 0x201C, "ldquo", "" },
1686: { 0x201C, "rdquor", "" },
1687: { 0x201D, "rdquo", "RIGHT DOUBLE QUOTATION MARK" },
1688: { 0x201E, "ldquor", "DOUBLE LOW-9 QUOTATION MARK" },
1689: { 0x2020, "dagger", "DAGGER" },
1690: { 0x2021, "Dagger", "DOUBLE DAGGER" },
1691: { 0x2022, "bull", "BULLET" },
1692: { 0x2025, "nldr", "TWO DOT LEADER" },
1693: { 0x2026, "hellip", "HORIZONTAL ELLIPSIS" },
1694: { 0x2026, "mldr", "HORIZONTAL ELLIPSIS" },
1695: { 0x2030, "permil", "PER MILLE SIGN" },
1696: { 0x2032, "prime", "PRIME" },
1697: { 0x2032, "vprime", "PRIME" },
1698: { 0x2033, "Prime", "DOUBLE PRIME" },
1699: { 0x2034, "tprime", "TRIPLE PRIME" },
1700: { 0x2035, "bprime", "REVERSED PRIME" },
1701: { 0x2041, "caret", "CARET" },
1702: { 0x2043, "hybull", "HYPHEN BULLET" },
1703: { 0x20DB, "tdot", "COMBINING THREE DOTS ABOVE" },
1704: { 0x20DC, "DotDot", "COMBINING FOUR DOTS ABOVE" },
1705: { 0x2105, "incare", "CARE OF" },
1706: { 0x210B, "hamilt", "SCRIPT CAPITAL H" },
1707: { 0x210F, "planck", "PLANCK CONSTANT OVER TWO PI" },
1708: { 0x2111, "image", "BLACK-LETTER CAPITAL I" },
1709: { 0x2112, "lagran", "SCRIPT CAPITAL L" },
1710: { 0x2113, "ell", "SCRIPT SMALL L" },
1711: { 0x2116, "numero", "NUMERO SIGN" },
1712: { 0x2117, "copysr", "SOUND RECORDING COPYRIGHT" },
1713: { 0x2118, "weierp", "SCRIPT CAPITAL P" },
1714: { 0x211C, "real", "BLACK-LETTER CAPITAL R" },
1715: { 0x211E, "rx", "PRESCRIPTION TAKE" },
1716: { 0x2122, "trade", "TRADE MARK SIGN" },
1717: { 0x2126, "ohm", "OHM SIGN" },
1718: { 0x212B, "angst", "ANGSTROM SIGN" },
1719: { 0x212C, "bernou", "SCRIPT CAPITAL B" },
1720: { 0x2133, "phmmat", "SCRIPT CAPITAL M" },
1721: { 0x2134, "order", "SCRIPT SMALL O" },
1722: { 0x2135, "aleph", "ALEF SYMBOL" },
1723: { 0x2136, "beth", "BET SYMBOL" },
1724: { 0x2137, "gimel", "GIMEL SYMBOL" },
1725: { 0x2138, "daleth", "DALET SYMBOL" },
1726: { 0x2153, "frac13", "VULGAR FRACTION ONE THIRD" },
1727: { 0x2154, "frac23", "VULGAR FRACTION TWO THIRDS" },
1728: { 0x2155, "frac15", "VULGAR FRACTION ONE FIFTH" },
1729: { 0x2156, "frac25", "VULGAR FRACTION TWO FIFTHS" },
1730: { 0x2157, "frac35", "VULGAR FRACTION THREE FIFTHS" },
1731: { 0x2158, "frac45", "VULGAR FRACTION FOUR FIFTHS" },
1732: { 0x2159, "frac16", "VULGAR FRACTION ONE SIXTH" },
1733: { 0x215A, "frac56", "VULGAR FRACTION FIVE SIXTHS" },
1734: { 0x215B, "frac18", "" },
1735: { 0x215C, "frac38", "" },
1736: { 0x215D, "frac58", "" },
1737: { 0x215E, "frac78", "" },
1738: { 0x2190, "larr", "LEFTWARDS DOUBLE ARROW" },
1739: { 0x2191, "uarr", "UPWARDS ARROW" },
1740: { 0x2192, "rarr", "RIGHTWARDS DOUBLE ARROW" },
1741: { 0x2193, "darr", "DOWNWARDS ARROW" },
1742: { 0x2194, "harr", "LEFT RIGHT ARROW" },
1743: { 0x2194, "xhArr", "LEFT RIGHT ARROW" },
1744: { 0x2194, "xharr", "LEFT RIGHT ARROW" },
1745: { 0x2195, "varr", "UP DOWN ARROW" },
1746: { 0x2196, "nwarr", "NORTH WEST ARROW" },
1747: { 0x2197, "nearr", "NORTH EAST ARROW" },
1748: { 0x2198, "drarr", "SOUTH EAST ARROW" },
1749: { 0x2199, "dlarr", "SOUTH WEST ARROW" },
1750: { 0x219A, "nlarr", "LEFTWARDS ARROW WITH STROKE" },
1751: { 0x219B, "nrarr", "RIGHTWARDS ARROW WITH STROKE" },
1752: { 0x219D, "rarrw", "RIGHTWARDS SQUIGGLE ARROW" },
1753: { 0x219E, "Larr", "LEFTWARDS TWO HEADED ARROW" },
1754: { 0x21A0, "Rarr", "RIGHTWARDS TWO HEADED ARROW" },
1755: { 0x21A2, "larrtl", "LEFTWARDS ARROW WITH TAIL" },
1756: { 0x21A3, "rarrtl", "RIGHTWARDS ARROW WITH TAIL" },
1757: { 0x21A6, "map", "RIGHTWARDS ARROW FROM BAR" },
1758: { 0x21A9, "larrhk", "LEFTWARDS ARROW WITH HOOK" },
1759: { 0x21AA, "rarrhk", "RIGHTWARDS ARROW WITH HOOK" },
1760: { 0x21AB, "larrlp", "LEFTWARDS ARROW WITH LOOP" },
1761: { 0x21AC, "rarrlp", "RIGHTWARDS ARROW WITH LOOP" },
1762: { 0x21AD, "harrw", "LEFT RIGHT WAVE ARROW" },
1763: { 0x21AE, "nharr", "LEFT RIGHT ARROW WITH STROKE" },
1764: { 0x21B0, "lsh", "UPWARDS ARROW WITH TIP LEFTWARDS" },
1765: { 0x21B1, "rsh", "UPWARDS ARROW WITH TIP RIGHTWARDS" },
1766: { 0x21B6, "cularr", "ANTICLOCKWISE TOP SEMICIRCLE ARROW" },
1767: { 0x21B7, "curarr", "CLOCKWISE TOP SEMICIRCLE ARROW" },
1768: { 0x21BA, "olarr", "ANTICLOCKWISE OPEN CIRCLE ARROW" },
1769: { 0x21BB, "orarr", "CLOCKWISE OPEN CIRCLE ARROW" },
1770: { 0x21BC, "lharu", "LEFTWARDS HARPOON WITH BARB UPWARDS" },
1771: { 0x21BD, "lhard", "LEFTWARDS HARPOON WITH BARB DOWNWARDS" },
1772: { 0x21BE, "uharr", "UPWARDS HARPOON WITH BARB RIGHTWARDS" },
1773: { 0x21BF, "uharl", "UPWARDS HARPOON WITH BARB LEFTWARDS" },
1774: { 0x21C0, "rharu", "RIGHTWARDS HARPOON WITH BARB UPWARDS" },
1775: { 0x21C1, "rhard", "RIGHTWARDS HARPOON WITH BARB DOWNWARDS" },
1776: { 0x21C2, "dharr", "DOWNWARDS HARPOON WITH BARB RIGHTWARDS" },
1777: { 0x21C3, "dharl", "DOWNWARDS HARPOON WITH BARB LEFTWARDS" },
1778: { 0x21C4, "rlarr2", "RIGHTWARDS ARROW OVER LEFTWARDS ARROW" },
1779: { 0x21C6, "lrarr2", "LEFTWARDS ARROW OVER RIGHTWARDS ARROW" },
1780: { 0x21C7, "larr2", "LEFTWARDS PAIRED ARROWS" },
1781: { 0x21C8, "uarr2", "UPWARDS PAIRED ARROWS" },
1782: { 0x21C9, "rarr2", "RIGHTWARDS PAIRED ARROWS" },
1783: { 0x21CA, "darr2", "DOWNWARDS PAIRED ARROWS" },
1784: { 0x21CB, "lrhar2", "LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON" },
1785: { 0x21CC, "rlhar2", "RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON" },
1786: { 0x21CD, "nlArr", "LEFTWARDS DOUBLE ARROW WITH STROKE" },
1787: { 0x21CE, "nhArr", "LEFT RIGHT DOUBLE ARROW WITH STROKE" },
1788: { 0x21CF, "nrArr", "RIGHTWARDS DOUBLE ARROW WITH STROKE" },
1789: { 0x21D0, "lArr", "LEFTWARDS ARROW" },
1790: { 0x21D0, "xlArr", "LEFTWARDS DOUBLE ARROW" },
1791: { 0x21D1, "uArr", "UPWARDS DOUBLE ARROW" },
1792: { 0x21D2, "rArr", "RIGHTWARDS ARROW" },
1793: { 0x21D2, "xrArr", "RIGHTWARDS DOUBLE ARROW" },
1794: { 0x21D3, "dArr", "DOWNWARDS DOUBLE ARROW" },
1795: { 0x21D4, "hArr", "" },
1796: { 0x21D4, "iff", "LEFT RIGHT DOUBLE ARROW" },
1797: { 0x21D5, "vArr", "UP DOWN DOUBLE ARROW" },
1798: { 0x21DA, "lAarr", "LEFTWARDS TRIPLE ARROW" },
1799: { 0x21DB, "rAarr", "RIGHTWARDS TRIPLE ARROW" },
1800: { 0x2200, "forall", "" },
1801: { 0x2201, "comp", "COMPLEMENT" },
1802: { 0x2202, "part", "" },
1803: { 0x2203, "exist", "" },
1804: { 0x2204, "nexist", "THERE DOES NOT EXIST" },
1805: { 0x2205, "empty", "" },
1806: { 0x2207, "nabla", "NABLA" },
1807: { 0x2209, "notin", "" },
1808: { 0x220A, "epsi", "" },
1809: { 0x220A, "epsis", "" },
1810: { 0x220A, "isin", "" },
1811: { 0x220D, "bepsi", "SMALL CONTAINS AS MEMBER" },
1812: { 0x220D, "ni", "" },
1813: { 0x220F, "prod", "N-ARY PRODUCT" },
1814: { 0x2210, "amalg", "N-ARY COPRODUCT" },
1815: { 0x2210, "coprod", "N-ARY COPRODUCT" },
1816: { 0x2210, "samalg", "" },
1817: { 0x2211, "sum", "N-ARY SUMMATION" },
1818: { 0x2212, "minus", "MINUS SIGN" },
1819: { 0x2213, "mnplus", "" },
1820: { 0x2214, "plusdo", "DOT PLUS" },
1821: { 0x2216, "setmn", "SET MINUS" },
1822: { 0x2216, "ssetmn", "SET MINUS" },
1823: { 0x2217, "lowast", "ASTERISK OPERATOR" },
1824: { 0x2218, "compfn", "RING OPERATOR" },
1825: { 0x221A, "radic", "" },
1826: { 0x221D, "prop", "" },
1827: { 0x221D, "vprop", "" },
1828: { 0x221E, "infin", "" },
1829: { 0x221F, "ang90", "RIGHT ANGLE" },
1830: { 0x2220, "ang", "ANGLE" },
1831: { 0x2221, "angmsd", "MEASURED ANGLE" },
1832: { 0x2222, "angsph", "" },
1833: { 0x2223, "mid", "" },
1834: { 0x2224, "nmid", "DOES NOT DIVIDE" },
1835: { 0x2225, "par", "PARALLEL TO" },
1836: { 0x2225, "spar", "PARALLEL TO" },
1837: { 0x2226, "npar", "NOT PARALLEL TO" },
1838: { 0x2226, "nspar", "NOT PARALLEL TO" },
1839: { 0x2227, "and", "" },
1840: { 0x2228, "or", "" },
1841: { 0x2229, "cap", "" },
1842: { 0x222A, "cup", "" },
1843: { 0x222B, "int", "" },
1844: { 0x222E, "conint", "" },
1845: { 0x2234, "there4", "" },
1846: { 0x2235, "becaus", "BECAUSE" },
1847: { 0x223C, "sim", "" },
1848: { 0x223C, "thksim", "TILDE OPERATOR" },
1849: { 0x223D, "bsim", "" },
1850: { 0x2240, "wreath", "WREATH PRODUCT" },
1851: { 0x2241, "nsim", "" },
1852: { 0x2243, "sime", "" },
1853: { 0x2244, "nsime", "" },
1854: { 0x2245, "cong", "" },
1855: { 0x2247, "ncong", "NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO" },
1856: { 0x2248, "ap", "" },
1857: { 0x2248, "thkap", "ALMOST EQUAL TO" },
1858: { 0x2249, "nap", "NOT ALMOST EQUAL TO" },
1859: { 0x224A, "ape", "" },
1860: { 0x224C, "bcong", "ALL EQUAL TO" },
1861: { 0x224D, "asymp", "EQUIVALENT TO" },
1862: { 0x224E, "bump", "" },
1863: { 0x224F, "bumpe", "" },
1864: { 0x2250, "esdot", "" },
1865: { 0x2251, "eDot", "" },
1866: { 0x2252, "efDot", "" },
1867: { 0x2253, "erDot", "" },
1868: { 0x2254, "colone", "" },
1869: { 0x2255, "ecolon", "" },
1870: { 0x2256, "ecir", "" },
1871: { 0x2257, "cire", "" },
1872: { 0x2259, "wedgeq", "ESTIMATES" },
1873: { 0x225C, "trie", "" },
1874: { 0x2260, "ne", "" },
1875: { 0x2261, "equiv", "" },
1876: { 0x2262, "nequiv", "NOT IDENTICAL TO" },
1877: { 0x2264, "le", "" },
1878: { 0x2264, "les", "LESS-THAN OR EQUAL TO" },
1879: { 0x2265, "ge", "GREATER-THAN OR EQUAL TO" },
1880: { 0x2265, "ges", "GREATER-THAN OR EQUAL TO" },
1881: { 0x2266, "lE", "" },
1882: { 0x2267, "gE", "" },
1883: { 0x2268, "lnE", "" },
1884: { 0x2268, "lne", "" },
1885: { 0x2268, "lvnE", "LESS-THAN BUT NOT EQUAL TO" },
1886: { 0x2269, "gnE", "" },
1887: { 0x2269, "gne", "" },
1888: { 0x2269, "gvnE", "GREATER-THAN BUT NOT EQUAL TO" },
1889: { 0x226A, "Lt", "MUCH LESS-THAN" },
1890: { 0x226B, "Gt", "MUCH GREATER-THAN" },
1891: { 0x226C, "twixt", "BETWEEN" },
1892: { 0x226E, "nlt", "NOT LESS-THAN" },
1893: { 0x226F, "ngt", "NOT GREATER-THAN" },
1894: { 0x2270, "nlE", "" },
1895: { 0x2270, "nle", "NEITHER LESS-THAN NOR EQUAL TO" },
1896: { 0x2270, "nles", "" },
1897: { 0x2271, "ngE", "" },
1898: { 0x2271, "nge", "NEITHER GREATER-THAN NOR EQUAL TO" },
1899: { 0x2271, "nges", "" },
1900: { 0x2272, "lap", "LESS-THAN OR EQUIVALENT TO" },
1901: { 0x2272, "lsim", "LESS-THAN OR EQUIVALENT TO" },
1902: { 0x2273, "gap", "GREATER-THAN OR EQUIVALENT TO" },
1903: { 0x2273, "gsim", "GREATER-THAN OR EQUIVALENT TO" },
1904: { 0x2276, "lg", "LESS-THAN OR GREATER-THAN" },
1905: { 0x2277, "gl", "" },
1906: { 0x227A, "pr", "" },
1907: { 0x227B, "sc", "" },
1908: { 0x227C, "cupre", "" },
1909: { 0x227C, "pre", "" },
1910: { 0x227D, "sccue", "" },
1911: { 0x227D, "sce", "" },
1912: { 0x227E, "prap", "" },
1913: { 0x227E, "prsim", "" },
1914: { 0x227F, "scap", "" },
1915: { 0x227F, "scsim", "" },
1916: { 0x2280, "npr", "DOES NOT PRECEDE" },
1917: { 0x2281, "nsc", "DOES NOT SUCCEED" },
1918: { 0x2282, "sub", "" },
1919: { 0x2283, "sup", "" },
1920: { 0x2284, "nsub", "NOT A SUBSET OF" },
1921: { 0x2285, "nsup", "NOT A SUPERSET OF" },
1922: { 0x2286, "subE", "" },
1923: { 0x2286, "sube", "" },
1924: { 0x2287, "supE", "" },
1925: { 0x2287, "supe", "" },
1926: { 0x2288, "nsubE", "" },
1927: { 0x2288, "nsube", "" },
1928: { 0x2289, "nsupE", "" },
1929: { 0x2289, "nsupe", "" },
1930: { 0x228A, "subne", "" },
1931: { 0x228A, "subnE", "SUBSET OF WITH NOT EQUAL TO" },
1932: { 0x228A, "vsubne", "SUBSET OF WITH NOT EQUAL TO" },
1933: { 0x228B, "supnE", "" },
1934: { 0x228B, "supne", "" },
1935: { 0x228B, "vsupnE", "SUPERSET OF WITH NOT EQUAL TO" },
1936: { 0x228B, "vsupne", "SUPERSET OF WITH NOT EQUAL TO" },
1937: { 0x228E, "uplus", "MULTISET UNION" },
1938: { 0x228F, "sqsub", "" },
1939: { 0x2290, "sqsup", "" },
1940: { 0x2291, "sqsube", "" },
1941: { 0x2292, "sqsupe", "" },
1942: { 0x2293, "sqcap", "SQUARE CAP" },
1943: { 0x2294, "sqcup", "SQUARE CUP" },
1944: { 0x2295, "oplus", "CIRCLED PLUS" },
1945: { 0x2296, "ominus", "CIRCLED MINUS" },
1946: { 0x2297, "otimes", "CIRCLED TIMES" },
1947: { 0x2298, "osol", "CIRCLED DIVISION SLASH" },
1948: { 0x2299, "odot", "CIRCLED DOT OPERATOR" },
1949: { 0x229A, "ocir", "CIRCLED RING OPERATOR" },
1950: { 0x229B, "oast", "CIRCLED ASTERISK OPERATOR" },
1951: { 0x229D, "odash", "CIRCLED DASH" },
1952: { 0x229E, "plusb", "SQUARED PLUS" },
1953: { 0x229F, "minusb", "SQUARED MINUS" },
1954: { 0x22A0, "timesb", "SQUARED TIMES" },
1955: { 0x22A1, "sdotb", "SQUARED DOT OPERATOR" },
1956: { 0x22A2, "vdash", "" },
1957: { 0x22A3, "dashv", "" },
1958: { 0x22A4, "top", "DOWN TACK" },
1959: { 0x22A5, "bottom", "" },
1960: { 0x22A5, "perp", "" },
1961: { 0x22A7, "models", "MODELS" },
1962: { 0x22A8, "vDash", "" },
1963: { 0x22A9, "Vdash", "" },
1964: { 0x22AA, "Vvdash", "" },
1965: { 0x22AC, "nvdash", "DOES NOT PROVE" },
1966: { 0x22AD, "nvDash", "NOT TRUE" },
1967: { 0x22AE, "nVdash", "DOES NOT FORCE" },
1968: { 0x22AF, "nVDash", "NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE" },
1969: { 0x22B2, "vltri", "" },
1970: { 0x22B3, "vrtri", "" },
1971: { 0x22B4, "ltrie", "" },
1972: { 0x22B5, "rtrie", "" },
1973: { 0x22B8, "mumap", "MULTIMAP" },
1974: { 0x22BA, "intcal", "INTERCALATE" },
1975: { 0x22BB, "veebar", "" },
1976: { 0x22BC, "barwed", "NAND" },
1977: { 0x22C4, "diam", "DIAMOND OPERATOR" },
1978: { 0x22C5, "sdot", "DOT OPERATOR" },
1979: { 0x22C6, "sstarf", "STAR OPERATOR" },
1980: { 0x22C6, "star", "STAR OPERATOR" },
1981: { 0x22C7, "divonx", "DIVISION TIMES" },
1982: { 0x22C8, "bowtie", "" },
1983: { 0x22C9, "ltimes", "LEFT NORMAL FACTOR SEMIDIRECT PRODUCT" },
1984: { 0x22CA, "rtimes", "RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT" },
1985: { 0x22CB, "lthree", "LEFT SEMIDIRECT PRODUCT" },
1986: { 0x22CC, "rthree", "RIGHT SEMIDIRECT PRODUCT" },
1987: { 0x22CD, "bsime", "" },
1988: { 0x22CE, "cuvee", "CURLY LOGICAL OR" },
1989: { 0x22CF, "cuwed", "CURLY LOGICAL AND" },
1990: { 0x22D0, "Sub", "" },
1991: { 0x22D1, "Sup", "" },
1992: { 0x22D2, "Cap", "DOUBLE INTERSECTION" },
1993: { 0x22D3, "Cup", "DOUBLE UNION" },
1994: { 0x22D4, "fork", "" },
1995: { 0x22D6, "ldot", "" },
1996: { 0x22D7, "gsdot", "" },
1997: { 0x22D8, "Ll", "" },
1998: { 0x22D9, "Gg", "VERY MUCH GREATER-THAN" },
1999: { 0x22DA, "lEg", "" },
2000: { 0x22DA, "leg", "" },
2001: { 0x22DB, "gEl", "" },
2002: { 0x22DB, "gel", "" },
2003: { 0x22DC, "els", "" },
2004: { 0x22DD, "egs", "" },
2005: { 0x22DE, "cuepr", "" },
2006: { 0x22DF, "cuesc", "" },
2007: { 0x22E0, "npre", "DOES NOT PRECEDE OR EQUAL" },
2008: { 0x22E1, "nsce", "DOES NOT SUCCEED OR EQUAL" },
2009: { 0x22E6, "lnsim", "" },
2010: { 0x22E7, "gnsim", "GREATER-THAN BUT NOT EQUIVALENT TO" },
2011: { 0x22E8, "prnap", "" },
2012: { 0x22E8, "prnsim", "" },
2013: { 0x22E9, "scnap", "" },
2014: { 0x22E9, "scnsim", "" },
2015: { 0x22EA, "nltri", "NOT NORMAL SUBGROUP OF" },
2016: { 0x22EB, "nrtri", "DOES NOT CONTAIN AS NORMAL SUBGROUP" },
2017: { 0x22EC, "nltrie", "NOT NORMAL SUBGROUP OF OR EQUAL TO" },
2018: { 0x22ED, "nrtrie", "DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL" },
2019: { 0x22EE, "vellip", "" },
2020: { 0x2306, "Barwed", "PERSPECTIVE" },
2021: { 0x2308, "lceil", "LEFT CEILING" },
2022: { 0x2309, "rceil", "RIGHT CEILING" },
2023: { 0x230A, "lfloor", "LEFT FLOOR" },
2024: { 0x230B, "rfloor", "RIGHT FLOOR" },
2025: { 0x230C, "drcrop", "BOTTOM RIGHT CROP" },
2026: { 0x230D, "dlcrop", "BOTTOM LEFT CROP" },
2027: { 0x230E, "urcrop", "TOP RIGHT CROP" },
2028: { 0x230F, "ulcrop", "TOP LEFT CROP" },
2029: { 0x2315, "telrec", "TELEPHONE RECORDER" },
2030: { 0x2316, "target", "POSITION INDICATOR" },
2031: { 0x231C, "ulcorn", "TOP LEFT CORNER" },
2032: { 0x231D, "urcorn", "TOP RIGHT CORNER" },
2033: { 0x231E, "dlcorn", "BOTTOM LEFT CORNER" },
2034: { 0x231F, "drcorn", "BOTTOM RIGHT CORNER" },
2035: { 0x2322, "frown", "" },
2036: { 0x2322, "sfrown", "FROWN" },
2037: { 0x2323, "smile", "" },
2038: { 0x2323, "ssmile", "SMILE" },
2039: { 0x2423, "blank", "OPEN BOX" },
2040: { 0x24C8, "oS", "CIRCLED LATIN CAPITAL LETTER S" },
2041: { 0x2500, "boxh", "BOX DRAWINGS LIGHT HORIZONTAL" },
2042: { 0x2502, "boxv", "BOX DRAWINGS LIGHT VERTICAL" },
2043: { 0x250C, "boxdr", "BOX DRAWINGS LIGHT DOWN AND RIGHT" },
2044: { 0x2510, "boxdl", "BOX DRAWINGS LIGHT DOWN AND LEFT" },
2045: { 0x2514, "boxur", "BOX DRAWINGS LIGHT UP AND RIGHT" },
2046: { 0x2518, "boxul", "BOX DRAWINGS LIGHT UP AND LEFT" },
2047: { 0x251C, "boxvr", "BOX DRAWINGS LIGHT VERTICAL AND RIGHT" },
2048: { 0x2524, "boxvl", "BOX DRAWINGS LIGHT VERTICAL AND LEFT" },
2049: { 0x252C, "boxhd", "BOX DRAWINGS LIGHT DOWN AND HORIZONTAL" },
2050: { 0x2534, "boxhu", "BOX DRAWINGS LIGHT UP AND HORIZONTAL" },
2051: { 0x253C, "boxvh", "BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL" },
2052: { 0x2550, "boxH", "BOX DRAWINGS DOUBLE HORIZONTAL" },
2053: { 0x2551, "boxV", "BOX DRAWINGS DOUBLE VERTICAL" },
2054: { 0x2552, "boxDR", "BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE" },
2055: { 0x2553, "boxDr", "BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE" },
2056: { 0x2554, "boxdR", "BOX DRAWINGS DOUBLE DOWN AND RIGHT" },
2057: { 0x2555, "boxDL", "BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE" },
2058: { 0x2556, "boxdL", "BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE" },
2059: { 0x2557, "boxDl", "BOX DRAWINGS DOUBLE DOWN AND LEFT" },
2060: { 0x2558, "boxUR", "BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE" },
2061: { 0x2559, "boxuR", "BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE" },
2062: { 0x255A, "boxUr", "BOX DRAWINGS DOUBLE UP AND RIGHT" },
2063: { 0x255B, "boxUL", "BOX DRAWINGS UP SINGLE AND LEFT DOUBLE" },
2064: { 0x255C, "boxUl", "BOX DRAWINGS UP DOUBLE AND LEFT SINGLE" },
2065: { 0x255D, "boxuL", "BOX DRAWINGS DOUBLE UP AND LEFT" },
2066: { 0x255E, "boxvR", "BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE" },
2067: { 0x255F, "boxVR", "BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE" },
2068: { 0x2560, "boxVr", "BOX DRAWINGS DOUBLE VERTICAL AND RIGHT" },
2069: { 0x2561, "boxvL", "BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE" },
2070: { 0x2562, "boxVL", "BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE" },
2071: { 0x2563, "boxVl", "BOX DRAWINGS DOUBLE VERTICAL AND LEFT" },
2072: { 0x2564, "boxhD", "BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE" },
2073: { 0x2565, "boxHD", "BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE" },
2074: { 0x2566, "boxHd", "BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL" },
2075: { 0x2567, "boxhU", "BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE" },
2076: { 0x2568, "boxHU", "BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE" },
2077: { 0x2569, "boxHu", "BOX DRAWINGS DOUBLE UP AND HORIZONTAL" },
2078: { 0x256A, "boxvH", "BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE" },
2079: { 0x256B, "boxVH", "BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE" },
2080: { 0x256C, "boxVh", "BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL" },
2081: { 0x2580, "uhblk", "UPPER HALF BLOCK" },
2082: { 0x2584, "lhblk", "LOWER HALF BLOCK" },
2083: { 0x2588, "block", "FULL BLOCK" },
2084: { 0x2591, "blk14", "LIGHT SHADE" },
2085: { 0x2592, "blk12", "MEDIUM SHADE" },
2086: { 0x2593, "blk34", "DARK SHADE" },
2087: { 0x25A1, "square", "WHITE SQUARE" },
2088: { 0x25A1, "squ", "WHITE SQUARE" },
2089: { 0x25AA, "squf", "" },
2090: { 0x25AD, "rect", "WHITE RECTANGLE" },
2091: { 0x25AE, "marker", "BLACK VERTICAL RECTANGLE" },
2092: { 0x25B3, "xutri", "WHITE UP-POINTING TRIANGLE" },
2093: { 0x25B4, "utrif", "BLACK UP-POINTING TRIANGLE" },
2094: { 0x25B5, "utri", "WHITE UP-POINTING TRIANGLE" },
2095: { 0x25B8, "rtrif", "BLACK RIGHT-POINTING TRIANGLE" },
2096: { 0x25B9, "rtri", "WHITE RIGHT-POINTING TRIANGLE" },
2097: { 0x25BD, "xdtri", "WHITE DOWN-POINTING TRIANGLE" },
2098: { 0x25BE, "dtrif", "BLACK DOWN-POINTING TRIANGLE" },
2099: { 0x25BF, "dtri", "WHITE DOWN-POINTING TRIANGLE" },
2100: { 0x25C2, "ltrif", "BLACK LEFT-POINTING TRIANGLE" },
2101: { 0x25C3, "ltri", "WHITE LEFT-POINTING TRIANGLE" },
2102: { 0x25CA, "loz", "LOZENGE" },
2103: { 0x25CB, "cir", "WHITE CIRCLE" },
2104: { 0x25CB, "xcirc", "WHITE CIRCLE" },
2105: { 0x2605, "starf", "BLACK STAR" },
2106: { 0x260E, "phone", "TELEPHONE SIGN" },
2107: { 0x2640, "female", "" },
2108: { 0x2642, "male", "MALE SIGN" },
2109: { 0x2660, "spades", "BLACK SPADE SUIT" },
2110: { 0x2663, "clubs", "BLACK CLUB SUIT" },
2111: { 0x2665, "hearts", "BLACK HEART SUIT" },
2112: { 0x2666, "diams", "BLACK DIAMOND SUIT" },
2113: { 0x2669, "sung", "" },
2114: { 0x266D, "flat", "MUSIC FLAT SIGN" },
2115: { 0x266E, "natur", "MUSIC NATURAL SIGN" },
2116: { 0x266F, "sharp", "MUSIC SHARP SIGN" },
2117: { 0x2713, "check", "CHECK MARK" },
2118: { 0x2717, "cross", "BALLOT X" },
2119: { 0x2720, "malt", "MALTESE CROSS" },
2120: { 0x2726, "lozf", "" },
2121: { 0x2736, "sext", "SIX POINTED BLACK STAR" },
2122: { 0x3008, "lang", "" },
2123: { 0x3009, "rang", "" },
2124: { 0xE291, "rpargt", "" },
2125: { 0xE2A2, "lnap", "" },
2126: { 0xE2AA, "nsmid", "" },
2127: { 0xE2B3, "prnE", "" },
2128: { 0xE2B5, "scnE", "" },
2129: { 0xE2B8, "vsubnE", "" },
2130: { 0xE301, "smid", "" },
2131: { 0xE411, "gnap", "" },
2132: { 0xFB00, "fflig", "" },
2133: { 0xFB01, "filig", "" },
2134: { 0xFB02, "fllig", "" },
2135: { 0xFB03, "ffilig", "" },
2136: { 0xFB04, "ffllig", "" },
2137: { 0xFE68, "sbsol", "SMALL REVERSE SOLIDUS" },
2138: };
2139:
2140: /************************************************************************
2141: * *
2142: * Commodity functions to handle entities *
2143: * *
2144: ************************************************************************/
2145:
2146: /*
2147: * Macro used to grow the current buffer.
2148: */
2149: #define growBuffer(buffer) { \
2150: buffer##_size *= 2; \
2151: buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2152: if (buffer == NULL) { \
2153: perror("realloc failed"); \
2154: return(NULL); \
2155: } \
2156: }
2157:
2158: /**
2159: * sgmlEntityLookup:
2160: * @name: the entity name
2161: *
2162: * Lookup the given entity in EntitiesTable
2163: *
2164: * TODO: the linear scan is really ugly, an hash table is really needed.
2165: *
2166: * Returns the associated sgmlEntityDescPtr if found, NULL otherwise.
2167: */
2168: sgmlEntityDescPtr
2169: sgmlEntityLookup(const xmlChar *name) {
2170: int i;
2171:
2172: for (i = 0;i < (sizeof(docbookEntitiesTable)/
2173: sizeof(docbookEntitiesTable[0]));i++) {
1.7 veillard 2174: if (xmlStrEqual(name, BAD_CAST docbookEntitiesTable[i].name)) {
1.1 veillard 2175: #ifdef DEBUG
1.10 ! veillard 2176: xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", name);
1.1 veillard 2177: #endif
2178: return(&docbookEntitiesTable[i]);
2179: }
2180: }
2181: return(NULL);
2182: }
2183:
2184: /**
2185: * sgmlEntityValueLookup:
2186: * @value: the entity's unicode value
2187: *
2188: * Lookup the given entity in EntitiesTable
2189: *
2190: * TODO: the linear scan is really ugly, an hash table is really needed.
2191: *
2192: * Returns the associated sgmlEntityDescPtr if found, NULL otherwise.
2193: */
2194: sgmlEntityDescPtr
2195: sgmlEntityValueLookup(int value) {
2196: int i;
2197: #ifdef DEBUG
2198: int lv = 0;
2199: #endif
2200:
2201: for (i = 0;i < (sizeof(docbookEntitiesTable)/
2202: sizeof(docbookEntitiesTable[0]));i++) {
2203: if (docbookEntitiesTable[i].value >= value) {
2204: if (docbookEntitiesTable[i].value > value)
2205: break;
2206: #ifdef DEBUG
1.10 ! veillard 2207: xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", docbookEntitiesTable[i].name);
1.1 veillard 2208: #endif
2209: return(&docbookEntitiesTable[i]);
2210: }
2211: #ifdef DEBUG
2212: if (lv > docbookEntitiesTable[i].value) {
1.10 ! veillard 2213: xmlGenericError(xmlGenericErrorContext,
! 2214: "docbookEntitiesTable[] is not sorted (%d > %d)!\n",
1.1 veillard 2215: lv, docbookEntitiesTable[i].value);
2216: }
2217: lv = docbookEntitiesTable[i].value;
2218: #endif
2219: }
2220: return(NULL);
2221: }
2222:
2223: /**
2224: * UTF8ToSgml:
2225: * @out: a pointer to an array of bytes to store the result
2226: * @outlen: the length of @out
2227: * @in: a pointer to an array of UTF-8 chars
2228: * @inlen: the length of @in
2229: *
2230: * Take a block of UTF-8 chars in and try to convert it to an ASCII
2231: * plus SGML entities block of chars out.
2232: *
2233: * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2234: * The value of @inlen after return is the number of octets consumed
2235: * as the return value is positive, else unpredictiable.
2236: * The value of @outlen after return is the number of octets consumed.
2237: */
2238: int
2239: UTF8ToSgml(unsigned char* out, int *outlen,
2240: const unsigned char* in, int *inlen) {
2241: const unsigned char* processed = in;
2242: const unsigned char* outend;
2243: const unsigned char* outstart = out;
2244: const unsigned char* instart = in;
2245: const unsigned char* inend;
2246: unsigned int c, d;
2247: int trailing;
2248:
2249: if (in == NULL) {
2250: /*
2251: * initialization nothing to do
2252: */
2253: *outlen = 0;
2254: *inlen = 0;
2255: return(0);
2256: }
2257: inend = in + (*inlen);
2258: outend = out + (*outlen);
2259: while (in < inend) {
2260: d = *in++;
2261: if (d < 0x80) { c= d; trailing= 0; }
2262: else if (d < 0xC0) {
2263: /* trailing byte in leading position */
2264: *outlen = out - outstart;
2265: *inlen = processed - instart;
2266: return(-2);
2267: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
2268: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
2269: else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
2270: else {
2271: /* no chance for this in Ascii */
2272: *outlen = out - outstart;
2273: *inlen = processed - instart;
2274: return(-2);
2275: }
2276:
2277: if (inend - in < trailing) {
2278: break;
2279: }
2280:
2281: for ( ; trailing; trailing--) {
2282: if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
2283: break;
2284: c <<= 6;
2285: c |= d & 0x3F;
2286: }
2287:
2288: /* assertion: c is a single UTF-4 value */
2289: if (c < 0x80) {
2290: if (out + 1 >= outend)
2291: break;
2292: *out++ = c;
2293: } else {
2294: int len;
2295: sgmlEntityDescPtr ent;
2296:
2297: /*
2298: * Try to lookup a predefined SGML entity for it
2299: */
2300:
2301: ent = sgmlEntityValueLookup(c);
2302: if (ent == NULL) {
2303: /* no chance for this in Ascii */
2304: *outlen = out - outstart;
2305: *inlen = processed - instart;
2306: return(-2);
2307: }
2308: len = strlen(ent->name);
2309: if (out + 2 + len >= outend)
2310: break;
2311: *out++ = '&';
2312: memcpy(out, ent->name, len);
2313: out += len;
2314: *out++ = ';';
2315: }
2316: processed = in;
2317: }
2318: *outlen = out - outstart;
2319: *inlen = processed - instart;
2320: return(0);
2321: }
2322:
2323: /**
2324: * sgmlEncodeEntities:
2325: * @out: a pointer to an array of bytes to store the result
2326: * @outlen: the length of @out
2327: * @in: a pointer to an array of UTF-8 chars
2328: * @inlen: the length of @in
2329: * @quoteChar: the quote character to escape (' or ") or zero.
2330: *
2331: * Take a block of UTF-8 chars in and try to convert it to an ASCII
2332: * plus SGML entities block of chars out.
2333: *
2334: * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2335: * The value of @inlen after return is the number of octets consumed
2336: * as the return value is positive, else unpredictiable.
2337: * The value of @outlen after return is the number of octets consumed.
2338: */
2339: int
2340: sgmlEncodeEntities(unsigned char* out, int *outlen,
2341: const unsigned char* in, int *inlen, int quoteChar) {
2342: const unsigned char* processed = in;
2343: const unsigned char* outend = out + (*outlen);
2344: const unsigned char* outstart = out;
2345: const unsigned char* instart = in;
2346: const unsigned char* inend = in + (*inlen);
2347: unsigned int c, d;
2348: int trailing;
2349:
2350: while (in < inend) {
2351: d = *in++;
2352: if (d < 0x80) { c= d; trailing= 0; }
2353: else if (d < 0xC0) {
2354: /* trailing byte in leading position */
2355: *outlen = out - outstart;
2356: *inlen = processed - instart;
2357: return(-2);
2358: } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
2359: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
2360: else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
2361: else {
2362: /* no chance for this in Ascii */
2363: *outlen = out - outstart;
2364: *inlen = processed - instart;
2365: return(-2);
2366: }
2367:
2368: if (inend - in < trailing)
2369: break;
2370:
2371: while (trailing--) {
2372: if (((d= *in++) & 0xC0) != 0x80) {
2373: *outlen = out - outstart;
2374: *inlen = processed - instart;
2375: return(-2);
2376: }
2377: c <<= 6;
2378: c |= d & 0x3F;
2379: }
2380:
2381: /* assertion: c is a single UTF-4 value */
2382: if (c < 0x80 && c != quoteChar && c != '&' && c != '<' && c != '>') {
2383: if (out >= outend)
2384: break;
2385: *out++ = c;
2386: } else {
2387: sgmlEntityDescPtr ent;
2388: const char *cp;
2389: char nbuf[16];
2390: int len;
2391:
2392: /*
2393: * Try to lookup a predefined SGML entity for it
2394: */
2395: ent = sgmlEntityValueLookup(c);
2396: if (ent == NULL) {
2397: sprintf(nbuf, "#%u", c);
2398: cp = nbuf;
2399: }
2400: else
2401: cp = ent->name;
2402: len = strlen(cp);
2403: if (out + 2 + len > outend)
2404: break;
2405: *out++ = '&';
2406: memcpy(out, cp, len);
2407: out += len;
2408: *out++ = ';';
2409: }
2410: processed = in;
2411: }
2412: *outlen = out - outstart;
2413: *inlen = processed - instart;
2414: return(0);
2415: }
2416:
2417: /**
2418: * sgmlDecodeEntities:
2419: * @ctxt: the parser context
2420: * @len: the len to decode (in bytes !), -1 for no size limit
2421: * @end: an end marker xmlChar, 0 if none
2422: * @end2: an end marker xmlChar, 0 if none
2423: * @end3: an end marker xmlChar, 0 if none
2424: *
2425: * Subtitute the SGML entities by their value
2426: *
2427: * DEPRECATED !!!!
2428: *
2429: * Returns A newly allocated string with the substitution done. The caller
2430: * must deallocate it !
2431: */
2432: xmlChar *
2433: sgmlDecodeEntities(sgmlParserCtxtPtr ctxt, int len,
2434: xmlChar end, xmlChar end2, xmlChar end3) {
2435: xmlChar *name = NULL;
2436: xmlChar *buffer = NULL;
2437: unsigned int buffer_size = 0;
2438: unsigned int nbchars = 0;
2439: sgmlEntityDescPtr ent;
2440: unsigned int max = (unsigned int) len;
2441: int c,l;
2442:
2443: if (ctxt->depth > 40) {
1.6 veillard 2444: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.1 veillard 2445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2446: ctxt->sax->error(ctxt->userData,
2447: "Detected entity reference loop\n");
2448: ctxt->wellFormed = 0;
2449: ctxt->disableSAX = 1;
2450: return(NULL);
2451: }
2452:
2453: /*
2454: * allocate a translation buffer.
2455: */
2456: buffer_size = SGML_PARSER_BIG_BUFFER_SIZE;
2457: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2458: if (buffer == NULL) {
2459: perror("xmlDecodeEntities: malloc failed");
2460: return(NULL);
2461: }
2462:
2463: /*
2464: * Ok loop until we reach one of the ending char or a size limit.
2465: */
2466: c = CUR_CHAR(l);
2467: while ((nbchars < max) && (c != end) &&
2468: (c != end2) && (c != end3)) {
2469:
2470: if (c == 0) break;
2471: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2472: int val = sgmlParseCharRef(ctxt);
2473: COPY_BUF(0,buffer,nbchars,val);
2474: NEXTL(l);
2475: } else if ((c == '&') && (ctxt->token != '&')) {
2476: ent = sgmlParseEntityRef(ctxt, &name);
2477: if (name != NULL) {
2478: if (ent != NULL) {
2479: int val = ent->value;
2480: COPY_BUF(0,buffer,nbchars,val);
2481: NEXTL(l);
2482: } else {
2483: const xmlChar *cur = name;
2484:
2485: buffer[nbchars++] = '&';
2486: if (nbchars > buffer_size - SGML_PARSER_BUFFER_SIZE) {
2487: growBuffer(buffer);
2488: }
2489: while (*cur != 0) {
2490: buffer[nbchars++] = *cur++;
2491: }
2492: buffer[nbchars++] = ';';
2493: }
2494: }
2495: } else {
2496: COPY_BUF(l,buffer,nbchars,c);
2497: NEXTL(l);
2498: if (nbchars > buffer_size - SGML_PARSER_BUFFER_SIZE) {
2499: growBuffer(buffer);
2500: }
2501: }
2502: c = CUR_CHAR(l);
2503: }
2504: buffer[nbchars++] = 0;
2505: return(buffer);
2506: }
2507:
2508: /************************************************************************
2509: * *
2510: * Commodity functions to handle streams *
2511: * *
2512: ************************************************************************/
2513:
2514: /**
2515: * sgmlFreeInputStream:
2516: * @input: an sgmlParserInputPtr
2517: *
2518: * Free up an input stream.
2519: */
2520: void
2521: sgmlFreeInputStream(sgmlParserInputPtr input) {
2522: if (input == NULL) return;
2523:
2524: if (input->filename != NULL) xmlFree((char *) input->filename);
2525: if (input->directory != NULL) xmlFree((char *) input->directory);
2526: if ((input->free != NULL) && (input->base != NULL))
2527: input->free((xmlChar *) input->base);
2528: if (input->buf != NULL)
2529: xmlFreeParserInputBuffer(input->buf);
2530: memset(input, -1, sizeof(sgmlParserInput));
2531: xmlFree(input);
2532: }
2533:
2534: /**
2535: * sgmlNewInputStream:
2536: * @ctxt: an SGML parser context
2537: *
2538: * Create a new input stream structure
2539: * Returns the new input stream or NULL
2540: */
2541: sgmlParserInputPtr
2542: sgmlNewInputStream(sgmlParserCtxtPtr ctxt) {
2543: sgmlParserInputPtr input;
2544:
2545: input = (xmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
2546: if (input == NULL) {
2547: ctxt->errNo = XML_ERR_NO_MEMORY;
2548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2549: ctxt->sax->error(ctxt->userData,
2550: "malloc: couldn't allocate a new input stream\n");
2551: return(NULL);
2552: }
2553: memset(input, 0, sizeof(sgmlParserInput));
2554: input->filename = NULL;
2555: input->directory = NULL;
2556: input->base = NULL;
2557: input->cur = NULL;
2558: input->buf = NULL;
2559: input->line = 1;
2560: input->col = 1;
2561: input->buf = NULL;
2562: input->free = NULL;
2563: input->version = NULL;
2564: input->consumed = 0;
2565: input->length = 0;
2566: return(input);
2567: }
2568:
2569:
2570: /************************************************************************
2571: * *
2572: * Commodity functions, cleanup needed ? *
2573: * *
2574: ************************************************************************/
2575:
2576: /**
2577: * areBlanks:
2578: * @ctxt: an SGML parser context
2579: * @str: a xmlChar *
2580: * @len: the size of @str
2581: *
2582: * Is this a sequence of blank chars that one can ignore ?
2583: *
2584: * Returns 1 if ignorable 0 otherwise.
2585: */
2586:
2587: static int areBlanks(sgmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2588: int i;
2589: xmlNodePtr lastChild;
2590:
2591: for (i = 0;i < len;i++)
2592: if (!(IS_BLANK(str[i]))) return(0);
2593:
2594: if (CUR == 0) return(1);
2595: if (CUR != '<') return(0);
2596: if (ctxt->name == NULL)
2597: return(1);
2598: #if 0
1.7 veillard 2599: if (xmlStrEqual(ctxt->name, BAD_CAST"sgml"))
1.1 veillard 2600: return(1);
1.7 veillard 2601: if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
1.1 veillard 2602: return(1);
1.7 veillard 2603: if (xmlStrEqual(ctxt->name, BAD_CAST"body"))
1.1 veillard 2604: return(1);
2605: #endif
2606: if (ctxt->node == NULL) return(0);
2607: lastChild = xmlGetLastChild(ctxt->node);
2608: if (lastChild == NULL) {
2609: if (ctxt->node->content != NULL) return(0);
2610: } else if (xmlNodeIsText(lastChild))
2611: return(0);
2612: return(1);
2613: }
2614:
2615: /**
2616: * sgmlHandleEntity:
2617: * @ctxt: an SGML parser context
2618: * @entity: an XML entity pointer.
2619: *
2620: * Default handling of an SGML entity, call the parser with the
2621: * substitution string
2622: */
2623:
2624: void
2625: sgmlHandleEntity(sgmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2626: int len;
2627:
2628: if (entity->content == NULL) {
2629: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2630: ctxt->sax->error(ctxt->userData, "sgmlHandleEntity %s: content == NULL\n",
2631: entity->name);
2632: ctxt->wellFormed = 0;
2633: return;
2634: }
2635: len = xmlStrlen(entity->content);
2636:
2637: /*
2638: * Just handle the content as a set of chars.
2639: */
2640: sgmlCheckParagraph(ctxt);
2641: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
2642: ctxt->sax->characters(ctxt->userData, entity->content, len);
2643:
2644: }
2645:
2646: /**
2647: * sgmlNewDocNoDtD:
2648: * @URI: URI for the dtd, or NULL
2649: * @ExternalID: the external ID of the DTD, or NULL
2650: *
2651: * Returns a new document, do not intialize the DTD if not provided
2652: */
2653: sgmlDocPtr
2654: sgmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
2655: xmlDocPtr cur;
2656:
2657: /*
2658: * Allocate a new document and fill the fields.
2659: */
2660: cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
2661: if (cur == NULL) {
1.10 ! veillard 2662: xmlGenericError(xmlGenericErrorContext,
! 2663: "xmlNewDoc : malloc failed\n");
1.1 veillard 2664: return(NULL);
2665: }
2666: memset(cur, 0, sizeof(xmlDoc));
2667:
2668: cur->type = XML_SGML_DOCUMENT_NODE;
2669: cur->version = NULL;
2670: cur->intSubset = NULL;
2671: if ((ExternalID != NULL) ||
2672: (URI != NULL))
2673: xmlCreateIntSubset(cur, BAD_CAST "SGML", ExternalID, URI);
2674: cur->doc = cur;
2675: cur->name = NULL;
2676: cur->children = NULL;
2677: cur->extSubset = NULL;
2678: cur->oldNs = NULL;
2679: cur->encoding = NULL;
2680: cur->standalone = 1;
2681: cur->compression = 0;
2682: cur->ids = NULL;
2683: cur->refs = NULL;
2684: #ifndef XML_WITHOUT_CORBA
2685: cur->_private = NULL;
2686: #endif
2687: return(cur);
2688: }
2689:
2690: /**
2691: * sgmlNewDoc:
2692: * @URI: URI for the dtd, or NULL
2693: * @ExternalID: the external ID of the DTD, or NULL
2694: *
2695: * Returns a new document
2696: */
2697: sgmlDocPtr
2698: sgmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
2699: if ((URI == NULL) && (ExternalID == NULL))
2700: return(sgmlNewDocNoDtD(
2701: BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
2702: BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd"));
2703:
2704: return(sgmlNewDocNoDtD(URI, ExternalID));
2705: }
2706:
2707:
2708: /************************************************************************
2709: * *
2710: * The parser itself *
2711: * Relates to http://www.w3.org/TR/docbook *
2712: * *
2713: ************************************************************************/
2714:
2715: /************************************************************************
2716: * *
2717: * The parser itself *
2718: * *
2719: ************************************************************************/
2720:
2721: /**
2722: * sgmlParseSGMLName:
2723: * @ctxt: an SGML parser context
2724: *
2725: * parse an SGML tag or attribute name, note that we convert it to lowercase
2726: * since SGML names are not case-sensitive.
2727: *
2728: * Returns the Tag Name parsed or NULL
2729: */
2730:
2731: xmlChar *
2732: sgmlParseSGMLName(sgmlParserCtxtPtr ctxt) {
2733: xmlChar *ret = NULL;
2734: int i = 0;
2735: xmlChar loc[SGML_PARSER_BUFFER_SIZE];
2736:
2737: if (!IS_LETTER(CUR) && (CUR != '_') &&
2738: (CUR != ':')) return(NULL);
2739:
2740: while ((i < SGML_PARSER_BUFFER_SIZE) &&
2741: ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2742: (CUR == ':') || (CUR == '_'))) {
2743: if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
2744: else loc[i] = CUR;
2745: i++;
2746:
2747: NEXT;
2748: }
2749:
2750: ret = xmlStrndup(loc, i);
2751:
2752: return(ret);
2753: }
2754:
2755: /**
2756: * sgmlParseName:
2757: * @ctxt: an SGML parser context
2758: *
2759: * parse an SGML name, this routine is case sensistive.
2760: *
2761: * Returns the Name parsed or NULL
2762: */
2763:
2764: xmlChar *
2765: sgmlParseName(sgmlParserCtxtPtr ctxt) {
2766: xmlChar buf[SGML_MAX_NAMELEN];
2767: int len = 0;
2768:
2769: GROW;
2770: if (!IS_LETTER(CUR) && (CUR != '_')) {
2771: return(NULL);
2772: }
2773:
2774: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2775: (CUR == '.') || (CUR == '-') ||
2776: (CUR == '_') || (CUR == ':') ||
2777: (IS_COMBINING(CUR)) ||
2778: (IS_EXTENDER(CUR))) {
2779: buf[len++] = CUR;
2780: NEXT;
2781: if (len >= SGML_MAX_NAMELEN) {
1.10 ! veillard 2782: xmlGenericError(xmlGenericErrorContext,
1.1 veillard 2783: "sgmlParseName: reached SGML_MAX_NAMELEN limit\n");
2784: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2785: (CUR == '.') || (CUR == '-') ||
2786: (CUR == '_') || (CUR == ':') ||
2787: (IS_COMBINING(CUR)) ||
2788: (IS_EXTENDER(CUR)))
2789: NEXT;
2790: break;
2791: }
2792: }
2793: return(xmlStrndup(buf, len));
2794: }
2795:
2796: /**
2797: * sgmlParseSGMLAttribute:
2798: * @ctxt: an SGML parser context
2799: * @stop: a char stop value
2800: *
2801: * parse an SGML attribute value till the stop (quote), if
2802: * stop is 0 then it stops at the first space
2803: *
2804: * Returns the attribute parsed or NULL
2805: */
2806:
2807: xmlChar *
2808: sgmlParseSGMLAttribute(sgmlParserCtxtPtr ctxt, const xmlChar stop) {
2809: #if 0
2810: xmlChar buf[SGML_MAX_NAMELEN];
2811: int len = 0;
2812:
2813: GROW;
2814: while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
2815: if ((stop == 0) && (IS_BLANK(CUR))) break;
2816: buf[len++] = CUR;
2817: NEXT;
2818: if (len >= SGML_MAX_NAMELEN) {
1.10 ! veillard 2819: xmlGenericError(xmlGenericErrorContext,
1.1 veillard 2820: "sgmlParseSGMLAttribute: reached SGML_MAX_NAMELEN limit\n");
2821: while ((!IS_BLANK(CUR)) && (CUR != '<') &&
2822: (CUR != '>') &&
2823: (CUR != '\'') && (CUR != '"'))
2824: NEXT;
2825: break;
2826: }
2827: }
2828: return(xmlStrndup(buf, len));
2829: #else
2830: xmlChar *buffer = NULL;
2831: int buffer_size = 0;
2832: xmlChar *out = NULL;
2833: xmlChar *name = NULL;
2834:
2835: xmlChar *cur = NULL;
2836: sgmlEntityDescPtr ent;
2837:
2838: /*
2839: * allocate a translation buffer.
2840: */
2841: buffer_size = SGML_PARSER_BIG_BUFFER_SIZE;
2842: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2843: if (buffer == NULL) {
2844: perror("sgmlParseSGMLAttribute: malloc failed");
2845: return(NULL);
2846: }
2847: out = buffer;
2848:
2849: /*
2850: * Ok loop until we reach one of the ending chars
2851: */
2852: while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {
2853: if ((stop == 0) && (IS_BLANK(CUR))) break;
2854: if (CUR == '&') {
2855: if (NXT(1) == '#') {
2856: unsigned int c;
2857: int bits;
2858:
2859: c = sgmlParseCharRef(ctxt);
2860: if (c < 0x80)
2861: { *out++ = c; bits= -6; }
2862: else if (c < 0x800)
2863: { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
2864: else if (c < 0x10000)
2865: { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
2866: else
2867: { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
2868:
2869: for ( ; bits >= 0; bits-= 6) {
2870: *out++ = ((c >> bits) & 0x3F) | 0x80;
2871: }
2872: } else {
2873: ent = sgmlParseEntityRef(ctxt, &name);
2874: if (name == NULL) {
2875: *out++ = '&';
2876: if (out - buffer > buffer_size - 100) {
2877: int index = out - buffer;
2878:
2879: growBuffer(buffer);
2880: out = &buffer[index];
2881: }
2882: } else if (ent == NULL) {
2883: *out++ = '&';
2884: cur = name;
2885: while (*cur != 0) {
2886: if (out - buffer > buffer_size - 100) {
2887: int index = out - buffer;
2888:
2889: growBuffer(buffer);
2890: out = &buffer[index];
2891: }
2892: *out++ = *cur++;
2893: }
2894: xmlFree(name);
2895: } else {
2896: unsigned int c;
2897: int bits;
2898:
2899: if (out - buffer > buffer_size - 100) {
2900: int index = out - buffer;
2901:
2902: growBuffer(buffer);
2903: out = &buffer[index];
2904: }
2905: c = (xmlChar)ent->value;
2906: if (c < 0x80)
2907: { *out++ = c; bits= -6; }
2908: else if (c < 0x800)
2909: { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
2910: else if (c < 0x10000)
2911: { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
2912: else
2913: { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
2914:
2915: for ( ; bits >= 0; bits-= 6) {
2916: *out++ = ((c >> bits) & 0x3F) | 0x80;
2917: }
2918: xmlFree(name);
2919: }
2920: }
2921: } else {
2922: unsigned int c;
2923: int bits;
2924:
2925: if (out - buffer > buffer_size - 100) {
2926: int index = out - buffer;
2927:
2928: growBuffer(buffer);
2929: out = &buffer[index];
2930: }
2931: c = CUR;
2932: if (c < 0x80)
2933: { *out++ = c; bits= -6; }
2934: else if (c < 0x800)
2935: { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
2936: else if (c < 0x10000)
2937: { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
2938: else
2939: { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
2940:
2941: for ( ; bits >= 0; bits-= 6) {
2942: *out++ = ((c >> bits) & 0x3F) | 0x80;
2943: }
2944: NEXT;
2945: }
2946: }
2947: *out++ = 0;
2948: return(buffer);
2949: #endif
2950: }
2951:
2952: /**
2953: * sgmlParseNmtoken:
2954: * @ctxt: an SGML parser context
2955: *
2956: * parse an SGML Nmtoken.
2957: *
2958: * Returns the Nmtoken parsed or NULL
2959: */
2960:
2961: xmlChar *
2962: sgmlParseNmtoken(sgmlParserCtxtPtr ctxt) {
2963: xmlChar buf[SGML_MAX_NAMELEN];
2964: int len = 0;
2965:
2966: GROW;
2967: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2968: (CUR == '.') || (CUR == '-') ||
2969: (CUR == '_') || (CUR == ':') ||
2970: (IS_COMBINING(CUR)) ||
2971: (IS_EXTENDER(CUR))) {
2972: buf[len++] = CUR;
2973: NEXT;
2974: if (len >= SGML_MAX_NAMELEN) {
1.10 ! veillard 2975: xmlGenericError(xmlGenericErrorContext,
1.1 veillard 2976: "sgmlParseNmtoken: reached SGML_MAX_NAMELEN limit\n");
2977: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2978: (CUR == '.') || (CUR == '-') ||
2979: (CUR == '_') || (CUR == ':') ||
2980: (IS_COMBINING(CUR)) ||
2981: (IS_EXTENDER(CUR)))
2982: NEXT;
2983: break;
2984: }
2985: }
2986: return(xmlStrndup(buf, len));
2987: }
2988:
2989: /**
2990: * sgmlParseEntityRef:
2991: * @ctxt: an SGML parser context
2992: * @str: location to store the entity name
2993: *
2994: * parse an SGML ENTITY references
2995: *
2996: * [68] EntityRef ::= '&' Name ';'
2997: *
2998: * Returns the associated sgmlEntityDescPtr if found, or NULL otherwise,
2999: * if non-NULL *str will have to be freed by the caller.
3000: */
3001: sgmlEntityDescPtr
3002: sgmlParseEntityRef(sgmlParserCtxtPtr ctxt, xmlChar **str) {
3003: xmlChar *name;
3004: sgmlEntityDescPtr ent = NULL;
3005: *str = NULL;
3006:
3007: if (CUR == '&') {
3008: NEXT;
3009: name = sgmlParseName(ctxt);
3010: if (name == NULL) {
3011: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3012: ctxt->sax->error(ctxt->userData, "sgmlParseEntityRef: no name\n");
3013: ctxt->wellFormed = 0;
3014: } else {
3015: GROW;
3016: if (CUR == ';') {
3017: *str = name;
3018:
3019: /*
3020: * Lookup the entity in the table.
3021: */
3022: ent = sgmlEntityLookup(name);
3023: if (ent != NULL) /* OK that's ugly !!! */
3024: NEXT;
3025: } else {
3026: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3027: ctxt->sax->error(ctxt->userData,
3028: "sgmlParseEntityRef: expecting ';'\n");
3029: *str = name;
3030: }
3031: }
3032: }
3033: return(ent);
3034: }
3035:
3036: /**
3037: * sgmlParseAttValue:
3038: * @ctxt: an SGML parser context
3039: *
3040: * parse a value for an attribute
3041: * Note: the parser won't do substitution of entities here, this
3042: * will be handled later in xmlStringGetNodeList, unless it was
3043: * asked for ctxt->replaceEntities != 0
3044: *
3045: * Returns the AttValue parsed or NULL.
3046: */
3047:
3048: xmlChar *
3049: sgmlParseAttValue(sgmlParserCtxtPtr ctxt) {
3050: xmlChar *ret = NULL;
3051:
3052: if (CUR == '"') {
3053: NEXT;
3054: ret = sgmlParseSGMLAttribute(ctxt, '"');
3055: if (CUR != '"') {
3056: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3057: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3058: ctxt->wellFormed = 0;
3059: } else
3060: NEXT;
3061: } else if (CUR == '\'') {
3062: NEXT;
3063: ret = sgmlParseSGMLAttribute(ctxt, '\'');
3064: if (CUR != '\'') {
3065: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3066: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3067: ctxt->wellFormed = 0;
3068: } else
3069: NEXT;
3070: } else {
3071: /*
3072: * That's an SGMLism, the attribute value may not be quoted
3073: */
3074: ret = sgmlParseSGMLAttribute(ctxt, 0);
3075: if (ret == NULL) {
3076: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3077: ctxt->sax->error(ctxt->userData, "AttValue: no value found\n");
3078: ctxt->wellFormed = 0;
3079: }
3080: }
3081: return(ret);
3082: }
3083:
3084: /**
3085: * sgmlParseSystemLiteral:
3086: * @ctxt: an SGML parser context
3087: *
3088: * parse an SGML Literal
3089: *
3090: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3091: *
3092: * Returns the SystemLiteral parsed or NULL
3093: */
3094:
3095: xmlChar *
3096: sgmlParseSystemLiteral(sgmlParserCtxtPtr ctxt) {
3097: const xmlChar *q;
3098: xmlChar *ret = NULL;
3099:
3100: if (CUR == '"') {
3101: NEXT;
3102: q = CUR_PTR;
3103: while ((IS_CHAR(CUR)) && (CUR != '"'))
3104: NEXT;
3105: if (!IS_CHAR(CUR)) {
3106: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3107: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3108: ctxt->wellFormed = 0;
3109: } else {
3110: ret = xmlStrndup(q, CUR_PTR - q);
3111: NEXT;
3112: }
3113: } else if (CUR == '\'') {
3114: NEXT;
3115: q = CUR_PTR;
3116: while ((IS_CHAR(CUR)) && (CUR != '\''))
3117: NEXT;
3118: if (!IS_CHAR(CUR)) {
3119: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3120: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3121: ctxt->wellFormed = 0;
3122: } else {
3123: ret = xmlStrndup(q, CUR_PTR - q);
3124: NEXT;
3125: }
3126: } else {
3127: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3128: ctxt->sax->error(ctxt->userData,
3129: "SystemLiteral \" or ' expected\n");
3130: ctxt->wellFormed = 0;
3131: }
3132:
3133: return(ret);
3134: }
3135:
3136: /**
3137: * sgmlParsePubidLiteral:
3138: * @ctxt: an SGML parser context
3139: *
3140: * parse an SGML public literal
3141: *
3142: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3143: *
3144: * Returns the PubidLiteral parsed or NULL.
3145: */
3146:
3147: xmlChar *
3148: sgmlParsePubidLiteral(sgmlParserCtxtPtr ctxt) {
3149: const xmlChar *q;
3150: xmlChar *ret = NULL;
3151: /*
3152: * Name ::= (Letter | '_') (NameChar)*
3153: */
3154: if (CUR == '"') {
3155: NEXT;
3156: q = CUR_PTR;
3157: while (IS_PUBIDCHAR(CUR)) NEXT;
3158: if (CUR != '"') {
3159: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3160: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3161: ctxt->wellFormed = 0;
3162: } else {
3163: ret = xmlStrndup(q, CUR_PTR - q);
3164: NEXT;
3165: }
3166: } else if (CUR == '\'') {
3167: NEXT;
3168: q = CUR_PTR;
3169: while ((IS_LETTER(CUR)) && (CUR != '\''))
3170: NEXT;
3171: if (!IS_LETTER(CUR)) {
3172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3173: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3174: ctxt->wellFormed = 0;
3175: } else {
3176: ret = xmlStrndup(q, CUR_PTR - q);
3177: NEXT;
3178: }
3179: } else {
3180: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3181: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
3182: ctxt->wellFormed = 0;
3183: }
3184:
3185: return(ret);
3186: }
3187:
3188: /**
3189: * sgmlParseCharData:
3190: * @ctxt: an SGML parser context
3191: * @cdata: int indicating whether we are within a CDATA section
3192: *
3193: * parse a CharData section.
3194: * if we are within a CDATA section ']]>' marks an end of section.
3195: *
3196: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3197: */
3198:
3199: void
3200: sgmlParseCharData(sgmlParserCtxtPtr ctxt, int cdata) {
3201: xmlChar buf[SGML_PARSER_BIG_BUFFER_SIZE + 5];
3202: int nbchar = 0;
3203: int cur, l;
3204:
3205: SHRINK;
3206: cur = CUR_CHAR(l);
3207: while (((cur != '<') || (ctxt->token == '<')) &&
3208: ((cur != '&') || (ctxt->token == '&')) &&
3209: (IS_CHAR(cur))) {
3210: COPY_BUF(l,buf,nbchar,cur);
3211: if (nbchar >= SGML_PARSER_BIG_BUFFER_SIZE) {
3212: /*
3213: * Ok the segment is to be consumed as chars.
3214: */
3215: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3216: if (areBlanks(ctxt, buf, nbchar)) {
3217: if (ctxt->sax->ignorableWhitespace != NULL)
3218: ctxt->sax->ignorableWhitespace(ctxt->userData,
3219: buf, nbchar);
3220: } else {
3221: sgmlCheckParagraph(ctxt);
3222: if (ctxt->sax->characters != NULL)
3223: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3224: }
3225: }
3226: nbchar = 0;
3227: }
3228: NEXTL(l);
3229: cur = CUR_CHAR(l);
3230: }
3231: if (nbchar != 0) {
3232: /*
3233: * Ok the segment is to be consumed as chars.
3234: */
3235: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3236: if (areBlanks(ctxt, buf, nbchar)) {
3237: if (ctxt->sax->ignorableWhitespace != NULL)
3238: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3239: } else {
3240: sgmlCheckParagraph(ctxt);
3241: if (ctxt->sax->characters != NULL)
3242: ctxt->sax->characters(ctxt->userData, buf, nbchar);
3243: }
3244: }
3245: }
3246: }
3247:
3248: /**
3249: * sgmlParseExternalID:
3250: * @ctxt: an SGML parser context
3251: * @publicID: a xmlChar** receiving PubidLiteral
3252: * @strict: indicate whether we should restrict parsing to only
3253: * production [75], see NOTE below
3254: *
3255: * Parse an External ID or a Public ID
3256: *
3257: * NOTE: Productions [75] and [83] interract badly since [75] can generate
3258: * 'PUBLIC' S PubidLiteral S SystemLiteral
3259: *
3260: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3261: * | 'PUBLIC' S PubidLiteral S SystemLiteral
3262: *
3263: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3264: *
3265: * Returns the function returns SystemLiteral and in the second
3266: * case publicID receives PubidLiteral, is strict is off
3267: * it is possible to return NULL and have publicID set.
3268: */
3269:
3270: xmlChar *
3271: sgmlParseExternalID(sgmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3272: xmlChar *URI = NULL;
3273:
3274: if ((UPPER == 'S') && (UPP(1) == 'Y') &&
3275: (UPP(2) == 'S') && (UPP(3) == 'T') &&
3276: (UPP(4) == 'E') && (UPP(5) == 'M')) {
3277: SKIP(6);
3278: if (!IS_BLANK(CUR)) {
3279: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3280: ctxt->sax->error(ctxt->userData,
3281: "Space required after 'SYSTEM'\n");
3282: ctxt->wellFormed = 0;
3283: }
3284: SKIP_BLANKS;
3285: URI = sgmlParseSystemLiteral(ctxt);
3286: if (URI == NULL) {
3287: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3288: ctxt->sax->error(ctxt->userData,
3289: "sgmlParseExternalID: SYSTEM, no URI\n");
3290: ctxt->wellFormed = 0;
3291: }
3292: } else if ((UPPER == 'P') && (UPP(1) == 'U') &&
3293: (UPP(2) == 'B') && (UPP(3) == 'L') &&
3294: (UPP(4) == 'I') && (UPP(5) == 'C')) {
3295: SKIP(6);
3296: if (!IS_BLANK(CUR)) {
3297: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3298: ctxt->sax->error(ctxt->userData,
3299: "Space required after 'PUBLIC'\n");
3300: ctxt->wellFormed = 0;
3301: }
3302: SKIP_BLANKS;
3303: *publicID = sgmlParsePubidLiteral(ctxt);
3304: if (*publicID == NULL) {
3305: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3306: ctxt->sax->error(ctxt->userData,
3307: "sgmlParseExternalID: PUBLIC, no Public Identifier\n");
3308: ctxt->wellFormed = 0;
3309: }
3310: SKIP_BLANKS;
3311: if ((CUR == '"') || (CUR == '\'')) {
3312: URI = sgmlParseSystemLiteral(ctxt);
3313: }
3314: }
3315: return(URI);
3316: }
3317:
3318: /**
3319: * sgmlParseComment:
3320: * @ctxt: an SGML parser context
3321: *
3322: * Parse an XML (SGML) comment <!-- .... -->
3323: *
3324: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3325: */
3326: void
3327: sgmlParseComment(sgmlParserCtxtPtr ctxt) {
3328: xmlChar *buf = NULL;
3329: int len;
3330: int size = SGML_PARSER_BUFFER_SIZE;
3331: int q, ql;
3332: int r, rl;
3333: int cur, l;
3334: xmlParserInputState state;
3335:
3336: /*
3337: * Check that there is a comment right here.
3338: */
3339: if ((RAW != '<') || (NXT(1) != '!') ||
3340: (NXT(2) != '-') || (NXT(3) != '-')) return;
3341:
3342: state = ctxt->instate;
3343: ctxt->instate = XML_PARSER_COMMENT;
3344: SHRINK;
3345: SKIP(4);
3346: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3347: if (buf == NULL) {
1.10 ! veillard 3348: xmlGenericError(xmlGenericErrorContext,
! 3349: "malloc of %d byte failed\n", size);
1.1 veillard 3350: ctxt->instate = state;
3351: return;
3352: }
3353: q = CUR_CHAR(ql);
3354: NEXTL(ql);
3355: r = CUR_CHAR(rl);
3356: NEXTL(rl);
3357: cur = CUR_CHAR(l);
3358: len = 0;
3359: while (IS_CHAR(cur) &&
3360: ((cur != '>') ||
3361: (r != '-') || (q != '-'))) {
3362: if (len + 5 >= size) {
3363: size *= 2;
3364: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3365: if (buf == NULL) {
1.10 ! veillard 3366: xmlGenericError(xmlGenericErrorContext,
! 3367: "realloc of %d byte failed\n", size);
1.1 veillard 3368: ctxt->instate = state;
3369: return;
3370: }
3371: }
3372: COPY_BUF(ql,buf,len,q);
3373: q = r;
3374: ql = rl;
3375: r = cur;
3376: rl = l;
3377: NEXTL(l);
3378: cur = CUR_CHAR(l);
3379: if (cur == 0) {
3380: SHRINK;
3381: GROW;
3382: cur = CUR_CHAR(l);
3383: }
3384: }
3385: buf[len] = 0;
3386: if (!IS_CHAR(cur)) {
1.6 veillard 3387: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.1 veillard 3388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3389: ctxt->sax->error(ctxt->userData,
3390: "Comment not terminated \n<!--%.50s\n", buf);
3391: ctxt->wellFormed = 0;
3392: xmlFree(buf);
3393: } else {
3394: NEXT;
3395: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3396: (!ctxt->disableSAX))
3397: ctxt->sax->comment(ctxt->userData, buf);
3398: xmlFree(buf);
3399: }
3400: ctxt->instate = state;
3401: }
3402:
3403: /**
3404: * sgmlParseCharRef:
3405: * @ctxt: an SGML parser context
3406: *
3407: * parse Reference declarations
3408: *
3409: * [66] CharRef ::= '&#' [0-9]+ ';' |
3410: * '&#x' [0-9a-fA-F]+ ';'
3411: *
3412: * Returns the value parsed (as an int)
3413: */
3414: int
3415: sgmlParseCharRef(sgmlParserCtxtPtr ctxt) {
3416: int val = 0;
3417:
3418: if ((CUR == '&') && (NXT(1) == '#') &&
3419: (NXT(2) == 'x')) {
3420: SKIP(3);
3421: while (CUR != ';') {
3422: if ((CUR >= '0') && (CUR <= '9'))
3423: val = val * 16 + (CUR - '0');
3424: else if ((CUR >= 'a') && (CUR <= 'f'))
3425: val = val * 16 + (CUR - 'a') + 10;
3426: else if ((CUR >= 'A') && (CUR <= 'F'))
3427: val = val * 16 + (CUR - 'A') + 10;
3428: else {
3429: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3430: ctxt->sax->error(ctxt->userData,
3431: "sgmlParseCharRef: invalid hexadecimal value\n");
3432: ctxt->wellFormed = 0;
3433: val = 0;
3434: break;
3435: }
3436: NEXT;
3437: }
3438: if (CUR == ';')
3439: NEXT;
3440: } else if ((CUR == '&') && (NXT(1) == '#')) {
3441: SKIP(2);
3442: while (CUR != ';') {
3443: if ((CUR >= '0') && (CUR <= '9'))
3444: val = val * 10 + (CUR - '0');
3445: else {
3446: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3447: ctxt->sax->error(ctxt->userData,
3448: "sgmlParseCharRef: invalid decimal value\n");
3449: ctxt->wellFormed = 0;
3450: val = 0;
3451: break;
3452: }
3453: NEXT;
3454: }
3455: if (CUR == ';')
3456: NEXT;
3457: } else {
3458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459: ctxt->sax->error(ctxt->userData, "sgmlParseCharRef: invalid value\n");
3460: ctxt->wellFormed = 0;
3461: }
3462: /*
3463: * Check the value IS_CHAR ...
3464: */
3465: if (IS_CHAR(val)) {
3466: return(val);
3467: } else {
3468: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3469: ctxt->sax->error(ctxt->userData, "sgmlParseCharRef: invalid xmlChar value %d\n",
3470: val);
3471: ctxt->wellFormed = 0;
3472: }
3473: return(0);
3474: }
3475:
3476:
3477: /**
3478: * sgmlParseDocTypeDecl :
3479: * @ctxt: an SGML parser context
3480: *
3481: * parse a DOCTYPE declaration
3482: *
3483: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
3484: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
3485: */
3486:
3487: void
3488: sgmlParseDocTypeDecl(sgmlParserCtxtPtr ctxt) {
3489: xmlChar *name;
3490: xmlChar *ExternalID = NULL;
3491: xmlChar *URI = NULL;
3492:
3493: /*
3494: * We know that '<!DOCTYPE' has been detected.
3495: */
3496: SKIP(9);
3497:
3498: SKIP_BLANKS;
3499:
3500: /*
3501: * Parse the DOCTYPE name.
3502: */
3503: name = sgmlParseName(ctxt);
3504: if (name == NULL) {
3505: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3506: ctxt->sax->error(ctxt->userData, "sgmlParseDocTypeDecl : no DOCTYPE name !\n");
3507: ctxt->wellFormed = 0;
3508: }
3509: /*
3510: * Check that upper(name) == "SGML" !!!!!!!!!!!!!
3511: */
3512:
3513: SKIP_BLANKS;
3514:
3515: /*
3516: * Check for SystemID and ExternalID
3517: */
3518: URI = sgmlParseExternalID(ctxt, &ExternalID, 0);
3519: SKIP_BLANKS;
3520:
3521: /*
1.2 veillard 3522: * Create or update the document accordingly to the DOCTYPE
3523: */
3524: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
3525: (!ctxt->disableSAX))
3526: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
3527:
3528: /*
3529: * Is there any internal subset declarations ?
3530: * they are handled separately in sgmlParseInternalSubset()
3531: */
3532: if (RAW == '[')
3533: return;
3534:
3535:
3536: /*
1.1 veillard 3537: * We should be at the end of the DOCTYPE declaration.
3538: */
3539: if (CUR != '>') {
3540: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3541: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
3542: ctxt->wellFormed = 0;
3543: /* We shouldn't try to resynchronize ... */
3544: }
3545: NEXT;
3546:
3547: /*
3548: * Cleanup, since we don't use all those identifiers
3549: */
3550: if (URI != NULL) xmlFree(URI);
3551: if (ExternalID != NULL) xmlFree(ExternalID);
3552: if (name != NULL) xmlFree(name);
3553: }
3554:
3555: /**
3556: * sgmlParseAttribute:
3557: * @ctxt: an SGML parser context
3558: * @value: a xmlChar ** used to store the value of the attribute
3559: *
3560: * parse an attribute
3561: *
3562: * [41] Attribute ::= Name Eq AttValue
3563: *
3564: * [25] Eq ::= S? '=' S?
3565: *
3566: * With namespace:
3567: *
3568: * [NS 11] Attribute ::= QName Eq AttValue
3569: *
3570: * Also the case QName == xmlns:??? is handled independently as a namespace
3571: * definition.
3572: *
3573: * Returns the attribute name, and the value in *value.
3574: */
3575:
3576: xmlChar *
3577: sgmlParseAttribute(sgmlParserCtxtPtr ctxt, xmlChar **value) {
3578: xmlChar *name, *val = NULL;
3579:
3580: *value = NULL;
3581: name = sgmlParseName(ctxt);
3582: if (name == NULL) {
3583: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3584: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
3585: ctxt->wellFormed = 0;
3586: return(NULL);
3587: }
3588:
3589: /*
3590: * read the value
3591: */
3592: SKIP_BLANKS;
3593: if (CUR == '=') {
3594: NEXT;
3595: SKIP_BLANKS;
3596: val = sgmlParseAttValue(ctxt);
3597: /******
3598: } else {
3599: * TODO : some attribute must have values, some may not
3600: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3601: ctxt->sax->warning(ctxt->userData,
3602: "No value for attribute %s\n", name); */
3603: }
3604:
3605: *value = val;
3606: return(name);
3607: }
3608:
3609: /**
3610: * sgmlCheckEncoding:
3611: * @ctxt: an SGML parser context
3612: * @attvalue: the attribute value
3613: *
3614: * Checks an http-equiv attribute from a Meta tag to detect
3615: * the encoding
3616: * If a new encoding is detected the parser is switched to decode
3617: * it and pass UTF8
3618: */
3619: void
3620: sgmlCheckEncoding(sgmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
3621: const xmlChar *encoding;
3622:
3623: if ((ctxt == NULL) || (attvalue == NULL))
3624: return;
3625:
3626: encoding = xmlStrstr(attvalue, BAD_CAST"charset=");
3627: if (encoding == NULL)
3628: encoding = xmlStrstr(attvalue, BAD_CAST"Charset=");
3629: if (encoding == NULL)
3630: encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET=");
3631: if (encoding != NULL) {
3632: encoding += 8;
3633: } else {
3634: encoding = xmlStrstr(attvalue, BAD_CAST"charset =");
3635: if (encoding == NULL)
3636: encoding = xmlStrstr(attvalue, BAD_CAST"Charset =");
3637: if (encoding == NULL)
3638: encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET =");
3639: if (encoding != NULL)
3640: encoding += 9;
3641: }
3642: if (encoding != NULL) {
3643: xmlCharEncoding enc;
3644: xmlCharEncodingHandlerPtr handler;
3645:
3646: while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
3647:
3648: if (ctxt->input->encoding != NULL)
3649: xmlFree((xmlChar *) ctxt->input->encoding);
3650: ctxt->input->encoding = xmlStrdup(encoding);
3651:
3652: enc = xmlParseCharEncoding((const char *) encoding);
3653: /*
3654: * registered set of known encodings
3655: */
3656: if (enc != XML_CHAR_ENCODING_ERROR) {
3657: xmlSwitchEncoding(ctxt, enc);
3658: ctxt->charset = XML_CHAR_ENCODING_UTF8;
3659: } else {
3660: /*
3661: * fallback for unknown encodings
3662: */
3663: handler = xmlFindCharEncodingHandler((const char *) encoding);
3664: if (handler != NULL) {
3665: xmlSwitchToEncoding(ctxt, handler);
3666: ctxt->charset = XML_CHAR_ENCODING_UTF8;
3667: } else {
3668: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
3669: }
3670: }
3671:
3672: if ((ctxt->input->buf != NULL) &&
3673: (ctxt->input->buf->encoder != NULL) &&
3674: (ctxt->input->buf->raw != NULL) &&
3675: (ctxt->input->buf->buffer != NULL)) {
3676: int nbchars;
3677: int processed;
3678:
3679: /*
3680: * convert as much as possible to the parser reading buffer.
3681: */
3682: processed = ctxt->input->cur - ctxt->input->base;
3683: xmlBufferShrink(ctxt->input->buf->buffer, processed);
3684: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
3685: ctxt->input->buf->buffer,
3686: ctxt->input->buf->raw);
3687: if (nbchars < 0) {
1.6 veillard 3688: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1.1 veillard 3689: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3690: ctxt->sax->error(ctxt->userData,
3691: "sgmlCheckEncoding: encoder error\n");
3692: }
3693: ctxt->input->base =
3694: ctxt->input->cur = ctxt->input->buf->buffer->content;
3695: }
3696: }
3697: }
3698:
3699: /**
3700: * sgmlCheckMeta:
3701: * @ctxt: an SGML parser context
3702: * @atts: the attributes values
3703: *
3704: * Checks an attributes from a Meta tag
3705: */
3706: void
3707: sgmlCheckMeta(sgmlParserCtxtPtr ctxt, const xmlChar **atts) {
3708: int i;
3709: const xmlChar *att, *value;
3710: int http = 0;
3711: const xmlChar *content = NULL;
3712:
3713: if ((ctxt == NULL) || (atts == NULL))
3714: return;
3715:
3716: i = 0;
3717: att = atts[i++];
3718: while (att != NULL) {
3719: value = atts[i++];
3720: if ((value != NULL) &&
1.7 veillard 3721: ((xmlStrEqual(att, BAD_CAST"http-equiv")) ||
3722: (xmlStrEqual(att, BAD_CAST"Http-Equiv")) ||
3723: (xmlStrEqual(att, BAD_CAST"HTTP-EQUIV"))) &&
3724: ((xmlStrEqual(value, BAD_CAST"Content-Type")) ||
3725: (xmlStrEqual(value, BAD_CAST"content-type")) ||
3726: (xmlStrEqual(value, BAD_CAST"CONTENT-TYPE"))))
1.1 veillard 3727: http = 1;
3728: else if ((value != NULL) &&
1.7 veillard 3729: ((xmlStrEqual(att, BAD_CAST"content")) ||
3730: (xmlStrEqual(att, BAD_CAST"Content")) ||
3731: (xmlStrEqual(att, BAD_CAST"CONTENT"))))
1.1 veillard 3732: content = value;
3733: att = atts[i++];
3734: }
3735: if ((http) && (content != NULL))
3736: sgmlCheckEncoding(ctxt, content);
3737:
3738: }
3739:
3740: /**
3741: * sgmlParseStartTag:
3742: * @ctxt: an SGML parser context
3743: *
3744: * parse a start of tag either for rule element or
3745: * EmptyElement. In both case we don't parse the tag closing chars.
3746: *
3747: * [40] STag ::= '<' Name (S Attribute)* S? '>'
3748: *
3749: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3750: *
3751: * With namespace:
3752: *
3753: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3754: *
3755: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
3756: *
3757: */
3758:
3759: void
3760: sgmlParseStartTag(sgmlParserCtxtPtr ctxt) {
3761: xmlChar *name;
3762: xmlChar *attname;
3763: xmlChar *attvalue;
3764: const xmlChar **atts = NULL;
3765: int nbatts = 0;
3766: int maxatts = 0;
3767: int meta = 0;
3768: int i;
3769:
3770: if (CUR != '<') return;
3771: NEXT;
3772:
3773: GROW;
3774: name = sgmlParseSGMLName(ctxt);
3775: if (name == NULL) {
3776: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3777: ctxt->sax->error(ctxt->userData,
3778: "sgmlParseStartTag: invalid element name\n");
3779: ctxt->wellFormed = 0;
3780: return;
3781: }
1.7 veillard 3782: if (xmlStrEqual(name, BAD_CAST"meta"))
1.1 veillard 3783: meta = 1;
3784:
3785: /*
3786: * Check for auto-closure of SGML elements.
3787: */
3788: sgmlAutoClose(ctxt, name);
3789:
3790: /*
3791: * Check for implied SGML elements.
3792: */
3793: sgmlCheckImplied(ctxt, name);
3794:
3795: /*
3796: * Now parse the attributes, it ends up with the ending
3797: *
3798: * (S Attribute)* S?
3799: */
3800: SKIP_BLANKS;
3801: while ((IS_CHAR(CUR)) &&
3802: (CUR != '>') &&
3803: ((CUR != '/') || (NXT(1) != '>'))) {
3804: long cons = ctxt->nbChars;
3805:
3806: GROW;
3807: attname = sgmlParseAttribute(ctxt, &attvalue);
3808: if (attname != NULL) {
3809:
3810: /*
3811: * Well formedness requires at most one declaration of an attribute
3812: */
3813: for (i = 0; i < nbatts;i += 2) {
1.7 veillard 3814: if (xmlStrEqual(atts[i], attname)) {
1.1 veillard 3815: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3816: ctxt->sax->error(ctxt->userData,
3817: "Attribute %s redefined\n",
3818: attname);
3819: ctxt->wellFormed = 0;
3820: xmlFree(attname);
3821: if (attvalue != NULL)
3822: xmlFree(attvalue);
3823: goto failed;
3824: }
3825: }
3826:
3827: /*
3828: * Add the pair to atts
3829: */
3830: if (atts == NULL) {
3831: maxatts = 10;
3832: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
3833: if (atts == NULL) {
1.10 ! veillard 3834: xmlGenericError(xmlGenericErrorContext,
! 3835: "malloc of %ld byte failed\n",
1.1 veillard 3836: maxatts * (long)sizeof(xmlChar *));
3837: if (name != NULL) xmlFree(name);
3838: return;
3839: }
3840: } else if (nbatts + 4 > maxatts) {
3841: maxatts *= 2;
3842: atts = (const xmlChar **) xmlRealloc(atts, maxatts * sizeof(xmlChar *));
3843: if (atts == NULL) {
1.10 ! veillard 3844: xmlGenericError(xmlGenericErrorContext,
! 3845: "realloc of %ld byte failed\n",
1.1 veillard 3846: maxatts * (long)sizeof(xmlChar *));
3847: if (name != NULL) xmlFree(name);
3848: return;
3849: }
3850: }
3851: atts[nbatts++] = attname;
3852: atts[nbatts++] = attvalue;
3853: atts[nbatts] = NULL;
3854: atts[nbatts + 1] = NULL;
3855: }
3856:
3857: failed:
3858: SKIP_BLANKS;
3859: if (cons == ctxt->nbChars) {
3860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3861: ctxt->sax->error(ctxt->userData,
3862: "sgmlParseStartTag: problem parsing attributes\n");
3863: ctxt->wellFormed = 0;
3864: break;
3865: }
3866: }
3867:
3868: /*
3869: * Handle specific association to the META tag
3870: */
3871: if (meta)
3872: sgmlCheckMeta(ctxt, atts);
3873:
3874: /*
3875: * SAX: Start of Element !
3876: */
3877: sgmlnamePush(ctxt, xmlStrdup(name));
3878: #ifdef DEBUG
1.10 ! veillard 3879: xmlGenericError(xmlGenericErrorContext,"Start of element %s: pushed %s\n", name, ctxt->name);
1.1 veillard 3880: #endif
3881: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
3882: ctxt->sax->startElement(ctxt->userData, name, atts);
3883:
3884: if (atts != NULL) {
3885: for (i = 0;i < nbatts;i++) {
3886: if (atts[i] != NULL)
3887: xmlFree((xmlChar *) atts[i]);
3888: }
3889: xmlFree((void *) atts);
3890: }
3891: if (name != NULL) xmlFree(name);
3892: }
3893:
3894: /**
3895: * sgmlParseEndTag:
3896: * @ctxt: an SGML parser context
3897: *
3898: * parse an end of tag
3899: *
3900: * [42] ETag ::= '</' Name S? '>'
3901: *
3902: * With namespace
3903: *
3904: * [NS 9] ETag ::= '</' QName S? '>'
3905: */
3906:
3907: void
3908: sgmlParseEndTag(sgmlParserCtxtPtr ctxt) {
3909: xmlChar *name;
3910: xmlChar *oldname;
3911: int i;
3912:
3913: if ((CUR != '<') || (NXT(1) != '/')) {
3914: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3915: ctxt->sax->error(ctxt->userData, "sgmlParseEndTag: '</' not found\n");
3916: ctxt->wellFormed = 0;
3917: return;
3918: }
3919: SKIP(2);
3920:
3921: name = sgmlParseSGMLName(ctxt);
3922: if (name == NULL) {
3923: if (CUR == '>') {
3924: NEXT;
3925: oldname = sgmlnamePop(ctxt);
3926: if (oldname != NULL) {
3927: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3928: ctxt->sax->endElement(ctxt->userData, name);
3929: #ifdef DEBUG
1.10 ! veillard 3930: xmlGenericError(xmlGenericErrorContext,"End of tag </>: popping out %s\n", oldname);
1.1 veillard 3931: #endif
3932: xmlFree(oldname);
3933: #ifdef DEBUG
3934: } else {
1.10 ! veillard 3935: xmlGenericError(xmlGenericErrorContext,"End of tag </>: stack empty !!!\n");
1.1 veillard 3936: #endif
3937: }
3938: return;
3939: } else
3940: return;
3941: }
3942:
3943: /*
3944: * We should definitely be at the ending "S? '>'" part
3945: */
3946: SKIP_BLANKS;
3947: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
3948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3949: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
3950: ctxt->wellFormed = 0;
3951: } else
3952: NEXT;
3953:
3954: /*
3955: * If the name read is not one of the element in the parsing stack
3956: * then return, it's just an error.
3957: */
3958: for (i = (ctxt->nameNr - 1);i >= 0;i--) {
1.7 veillard 3959: if (xmlStrEqual(name, ctxt->nameTab[i])) break;
1.1 veillard 3960: }
3961: if (i < 0) {
3962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3963: ctxt->sax->error(ctxt->userData,
3964: "Unexpected end tag : %s\n", name);
3965: xmlFree(name);
3966: ctxt->wellFormed = 0;
3967: return;
3968: }
3969:
3970:
3971: /*
3972: * Check for auto-closure of SGML elements.
3973: */
3974:
3975: sgmlAutoCloseOnClose(ctxt, name);
3976:
3977: /*
3978: * Well formedness constraints, opening and closing must match.
3979: * With the exception that the autoclose may have popped stuff out
3980: * of the stack.
3981: */
3982: if (((name[0] != '/') || (name[1] != 0)) &&
1.7 veillard 3983: (!xmlStrEqual(name, ctxt->name))) {
1.1 veillard 3984: #ifdef DEBUG
1.10 ! veillard 3985: xmlGenericError(xmlGenericErrorContext,"End of tag %s: expecting %s\n", name, ctxt->name);
1.1 veillard 3986: #endif
3987: if ((ctxt->name != NULL) &&
1.7 veillard 3988: (!xmlStrEqual(ctxt->name, name))) {
1.1 veillard 3989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3990: ctxt->sax->error(ctxt->userData,
3991: "Opening and ending tag mismatch: %s and %s\n",
3992: name, ctxt->name);
3993: ctxt->wellFormed = 0;
3994: }
3995: }
3996:
3997: /*
3998: * SAX: End of Tag
3999: */
4000: oldname = ctxt->name;
4001: if (((name[0] == '/') && (name[1] == 0)) ||
1.7 veillard 4002: ((oldname != NULL) && (xmlStrEqual(oldname, name)))) {
1.1 veillard 4003: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4004: ctxt->sax->endElement(ctxt->userData, name);
4005: oldname = sgmlnamePop(ctxt);
4006: if (oldname != NULL) {
4007: #ifdef DEBUG
1.10 ! veillard 4008: xmlGenericError(xmlGenericErrorContext,"End of tag %s: popping out %s\n", name, oldname);
1.1 veillard 4009: #endif
4010: xmlFree(oldname);
4011: #ifdef DEBUG
4012: } else {
1.10 ! veillard 4013: xmlGenericError(xmlGenericErrorContext,"End of tag %s: stack empty !!!\n", name);
1.1 veillard 4014: #endif
4015: }
4016: }
4017:
4018: if (name != NULL)
4019: xmlFree(name);
4020:
4021: return;
4022: }
4023:
4024:
4025: /**
4026: * sgmlParseReference:
4027: * @ctxt: an SGML parser context
4028: *
4029: * parse and handle entity references in content,
4030: * this will end-up in a call to character() since this is either a
4031: * CharRef, or a predefined entity.
4032: */
4033: void
4034: sgmlParseReference(sgmlParserCtxtPtr ctxt) {
4035: sgmlEntityDescPtr ent;
4036: xmlChar out[6];
4037: xmlChar *name;
4038: if (CUR != '&') return;
4039:
4040: if (NXT(1) == '#') {
4041: unsigned int c;
4042: int bits, i = 0;
4043:
4044: c = sgmlParseCharRef(ctxt);
4045: if (c < 0x80) { out[i++]= c; bits= -6; }
4046: else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; }
4047: else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; }
4048: else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; }
4049:
4050: for ( ; bits >= 0; bits-= 6) {
4051: out[i++]= ((c >> bits) & 0x3F) | 0x80;
4052: }
4053: out[i] = 0;
4054:
4055: sgmlCheckParagraph(ctxt);
4056: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4057: ctxt->sax->characters(ctxt->userData, out, i);
4058: } else {
4059: ent = sgmlParseEntityRef(ctxt, &name);
4060: if (name == NULL) {
4061: sgmlCheckParagraph(ctxt);
4062: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4063: ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
4064: return;
4065: }
4066: if ((ent == NULL) || (ent->value <= 0)) {
4067: sgmlCheckParagraph(ctxt);
4068: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
4069: ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
4070: ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
4071: /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
4072: }
4073: } else {
4074: unsigned int c;
4075: int bits, i = 0;
4076:
4077: c = ent->value;
4078: if (c < 0x80)
4079: { out[i++]= c; bits= -6; }
4080: else if (c < 0x800)
4081: { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; }
4082: else if (c < 0x10000)
4083: { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; }
4084: else
4085: { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; }
4086:
4087: for ( ; bits >= 0; bits-= 6) {
4088: out[i++]= ((c >> bits) & 0x3F) | 0x80;
4089: }
4090: out[i] = 0;
4091:
4092: sgmlCheckParagraph(ctxt);
4093: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4094: ctxt->sax->characters(ctxt->userData, out, i);
4095: }
4096: xmlFree(name);
4097: }
4098: }
4099:
4100: /**
4101: * sgmlParseContent:
4102: * @ctxt: an SGML parser context
4103: * @name: the node name
4104: *
4105: * Parse a content: comment, sub-element, reference or text.
4106: *
4107: */
4108:
4109: void
4110: sgmlParseContent(sgmlParserCtxtPtr ctxt) {
4111: xmlChar *currentNode;
4112: int depth;
4113:
4114: currentNode = xmlStrdup(ctxt->name);
4115: depth = ctxt->nameNr;
4116: while (1) {
4117: long cons = ctxt->nbChars;
4118:
4119: GROW;
4120: /*
4121: * Our tag or one of it's parent or children is ending.
4122: */
4123: if ((CUR == '<') && (NXT(1) == '/')) {
4124: sgmlParseEndTag(ctxt);
4125: if (currentNode != NULL) xmlFree(currentNode);
4126: return;
4127: }
4128:
4129: /*
4130: * Has this node been popped out during parsing of
4131: * the next element
4132: */
1.7 veillard 4133: if ((!xmlStrEqual(currentNode, ctxt->name)) &&
1.1 veillard 4134: (depth >= ctxt->nameNr)) {
4135: if (currentNode != NULL) xmlFree(currentNode);
4136: return;
4137: }
4138:
4139: /*
4140: * Sometimes DOCTYPE arrives in the middle of the document
4141: */
4142: if ((CUR == '<') && (NXT(1) == '!') &&
4143: (UPP(2) == 'D') && (UPP(3) == 'O') &&
4144: (UPP(4) == 'C') && (UPP(5) == 'T') &&
4145: (UPP(6) == 'Y') && (UPP(7) == 'P') &&
4146: (UPP(8) == 'E')) {
4147: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4148: ctxt->sax->error(ctxt->userData,
4149: "Misplaced DOCTYPE declaration\n");
4150: ctxt->wellFormed = 0;
4151: sgmlParseDocTypeDecl(ctxt);
4152: }
4153:
4154: /*
4155: * First case : a comment
4156: */
4157: if ((CUR == '<') && (NXT(1) == '!') &&
4158: (NXT(2) == '-') && (NXT(3) == '-')) {
4159: sgmlParseComment(ctxt);
4160: }
4161:
4162: /*
4163: * Second case : a sub-element.
4164: */
4165: else if (CUR == '<') {
4166: sgmlParseElement(ctxt);
4167: }
4168:
4169: /*
4170: * Third case : a reference. If if has not been resolved,
4171: * parsing returns it's Name, create the node
4172: */
4173: else if (CUR == '&') {
4174: sgmlParseReference(ctxt);
4175: }
4176:
4177: /*
4178: * Fourth : end of the resource
4179: */
4180: else if (CUR == 0) {
4181: sgmlAutoClose(ctxt, NULL);
4182: }
4183:
4184: /*
4185: * Last case, text. Note that References are handled directly.
4186: */
4187: else {
4188: sgmlParseCharData(ctxt, 0);
4189: }
4190:
4191: if (cons == ctxt->nbChars) {
4192: if (ctxt->node != NULL) {
4193: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4194: ctxt->sax->error(ctxt->userData,
4195: "detected an error in element content\n");
4196: ctxt->wellFormed = 0;
4197: }
4198: break;
4199: }
4200:
4201: GROW;
4202: }
4203: if (currentNode != NULL) xmlFree(currentNode);
4204: }
4205:
4206: /**
4207: * sgmlParseElement:
4208: * @ctxt: an SGML parser context
4209: *
4210: * parse an SGML element, this is highly recursive
4211: *
4212: * [39] element ::= EmptyElemTag | STag content ETag
4213: *
4214: * [41] Attribute ::= Name Eq AttValue
4215: */
4216:
4217: void
4218: sgmlParseElement(sgmlParserCtxtPtr ctxt) {
4219: xmlChar *name;
4220: xmlChar *currentNode = NULL;
4221: sgmlElemDescPtr info;
4222: sgmlParserNodeInfo node_info;
4223: xmlChar *oldname;
4224: int depth = ctxt->nameNr;
4225:
4226: /* Capture start position */
4227: if (ctxt->record_info) {
4228: node_info.begin_pos = ctxt->input->consumed +
4229: (CUR_PTR - ctxt->input->base);
4230: node_info.begin_line = ctxt->input->line;
4231: }
4232:
4233: oldname = xmlStrdup(ctxt->name);
4234: sgmlParseStartTag(ctxt);
4235: name = ctxt->name;
4236: #ifdef DEBUG
4237: if (oldname == NULL)
1.10 ! veillard 4238: xmlGenericError(xmlGenericErrorContext,
! 4239: "Start of element %s\n", name);
1.1 veillard 4240: else if (name == NULL)
1.10 ! veillard 4241: xmlGenericError(xmlGenericErrorContext,
! 4242: "Start of element failed, was %s\n", oldname);
1.1 veillard 4243: else
1.10 ! veillard 4244: xmlGenericError(xmlGenericErrorContext,
! 4245: "Start of element %s, was %s\n", name, oldname);
1.1 veillard 4246: #endif
1.7 veillard 4247: if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) ||
1.1 veillard 4248: (name == NULL)) {
4249: if (CUR == '>')
4250: NEXT;
4251: if (oldname != NULL)
4252: xmlFree(oldname);
4253: return;
4254: }
4255: if (oldname != NULL)
4256: xmlFree(oldname);
4257:
4258: /*
4259: * Lookup the info for that element.
4260: */
4261: info = sgmlTagLookup(name);
4262: if (info == NULL) {
4263: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.4 veillard 4264: ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
1.1 veillard 4265: name);
4266: ctxt->wellFormed = 0;
4267: } else if (info->depr) {
4268: /***************************
4269: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4270: ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n",
4271: name);
4272: ***************************/
4273: }
4274:
4275: /*
4276: * Check for an Empty Element labelled the XML/SGML way
4277: */
4278: if ((CUR == '/') && (NXT(1) == '>')) {
4279: SKIP(2);
4280: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4281: ctxt->sax->endElement(ctxt->userData, name);
4282: oldname = sgmlnamePop(ctxt);
4283: #ifdef DEBUG
1.10 ! veillard 4284: xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", oldname);
1.1 veillard 4285: #endif
4286: if (oldname != NULL)
4287: xmlFree(oldname);
4288: return;
4289: }
4290:
4291: if (CUR == '>') {
4292: NEXT;
4293: } else {
4294: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4295: ctxt->sax->error(ctxt->userData,
4296: "Couldn't find end of Start Tag %s\n",
4297: name);
4298: ctxt->wellFormed = 0;
4299:
4300: /*
4301: * end of parsing of this node.
4302: */
1.7 veillard 4303: if (xmlStrEqual(name, ctxt->name)) {
1.1 veillard 4304: nodePop(ctxt);
4305: oldname = sgmlnamePop(ctxt);
4306: #ifdef DEBUG
1.10 ! veillard 4307: xmlGenericError(xmlGenericErrorContext,"End of start tag problem: popping out %s\n", oldname);
1.1 veillard 4308: #endif
4309: if (oldname != NULL)
4310: xmlFree(oldname);
4311: }
4312:
4313: /*
4314: * Capture end position and add node
4315: */
4316: if ( currentNode != NULL && ctxt->record_info ) {
4317: node_info.end_pos = ctxt->input->consumed +
4318: (CUR_PTR - ctxt->input->base);
4319: node_info.end_line = ctxt->input->line;
4320: node_info.node = ctxt->node;
4321: xmlParserAddNodeInfo(ctxt, &node_info);
4322: }
4323: return;
4324: }
4325:
4326: /*
4327: * Check for an Empty Element from DTD definition
4328: */
4329: if ((info != NULL) && (info->empty)) {
4330: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4331: ctxt->sax->endElement(ctxt->userData, name);
4332: oldname = sgmlnamePop(ctxt);
4333: #ifdef DEBUG
1.10 ! veillard 4334: xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
1.1 veillard 4335: #endif
4336: if (oldname != NULL)
4337: xmlFree(oldname);
4338: return;
4339: }
4340:
4341: /*
4342: * Parse the content of the element:
4343: */
4344: currentNode = xmlStrdup(ctxt->name);
4345: depth = ctxt->nameNr;
4346: while (IS_CHAR(CUR)) {
4347: sgmlParseContent(ctxt);
4348: if (ctxt->nameNr < depth) break;
4349: }
4350:
4351: if (!IS_CHAR(CUR)) {
4352: /************
4353: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4354: ctxt->sax->error(ctxt->userData,
4355: "Premature end of data in tag %s\n", currentNode);
4356: ctxt->wellFormed = 0;
4357: *************/
4358:
4359: /*
4360: * end of parsing of this node.
4361: */
4362: nodePop(ctxt);
4363: oldname = sgmlnamePop(ctxt);
4364: #ifdef DEBUG
1.10 ! veillard 4365: xmlGenericError(xmlGenericErrorContext,"Premature end of tag %s : popping out %s\n", name, oldname);
1.1 veillard 4366: #endif
4367: if (oldname != NULL)
4368: xmlFree(oldname);
4369: if (currentNode != NULL)
4370: xmlFree(currentNode);
4371: return;
4372: }
4373:
4374: /*
4375: * Capture end position and add node
4376: */
4377: if ( currentNode != NULL && ctxt->record_info ) {
4378: node_info.end_pos = ctxt->input->consumed +
4379: (CUR_PTR - ctxt->input->base);
4380: node_info.end_line = ctxt->input->line;
4381: node_info.node = ctxt->node;
4382: xmlParserAddNodeInfo(ctxt, &node_info);
4383: }
4384: if (currentNode != NULL)
4385: xmlFree(currentNode);
4386: }
4387:
4388: /**
1.3 veillard 4389: * sgmlParseEntityDecl:
4390: * @ctxt: an SGML parser context
4391: *
4392: * parse <!ENTITY declarations
4393: *
4394: */
4395:
4396: void
4397: sgmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4398: xmlChar *name = NULL;
4399: xmlChar *value = NULL;
4400: xmlChar *URI = NULL, *literal = NULL;
4401: xmlChar *ndata = NULL;
4402: int isParameter = 0;
4403: xmlChar *orig = NULL;
4404:
4405: GROW;
4406: if ((RAW == '<') && (NXT(1) == '!') &&
4407: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4408: (NXT(4) == 'T') && (NXT(5) == 'I') &&
4409: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
4410: xmlParserInputPtr input = ctxt->input;
4411: ctxt->instate = XML_PARSER_ENTITY_DECL;
4412: SHRINK;
4413: SKIP(8);
4414: if (!IS_BLANK(CUR)) {
1.6 veillard 4415: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3 veillard 4416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4417: ctxt->sax->error(ctxt->userData,
4418: "Space required after '<!ENTITY'\n");
4419: ctxt->wellFormed = 0;
4420: ctxt->disableSAX = 1;
4421: }
4422: SKIP_BLANKS;
4423:
4424: if (RAW == '%') {
4425: NEXT;
4426: if (!IS_BLANK(CUR)) {
1.6 veillard 4427: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3 veillard 4428: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4429: ctxt->sax->error(ctxt->userData,
4430: "Space required after '%'\n");
4431: ctxt->wellFormed = 0;
4432: ctxt->disableSAX = 1;
4433: }
4434: SKIP_BLANKS;
4435: isParameter = 1;
4436: }
4437:
4438: name = xmlParseName(ctxt);
4439: if (name == NULL) {
1.6 veillard 4440: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.3 veillard 4441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4442: ctxt->sax->error(ctxt->userData, "sgmlarseEntityDecl: no name\n");
4443: ctxt->wellFormed = 0;
4444: ctxt->disableSAX = 1;
4445: return;
4446: }
4447: if (!IS_BLANK(CUR)) {
1.6 veillard 4448: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3 veillard 4449: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4450: ctxt->sax->error(ctxt->userData,
4451: "Space required after the entity name\n");
4452: ctxt->wellFormed = 0;
4453: ctxt->disableSAX = 1;
4454: }
4455: SKIP_BLANKS;
4456:
4457: /*
4458: * handle the various case of definitions...
4459: */
4460: if (isParameter) {
4461: if ((RAW == '"') || (RAW == '\'')) {
4462: value = xmlParseEntityValue(ctxt, &orig);
4463: if (value) {
4464: if ((ctxt->sax != NULL) &&
4465: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4466: ctxt->sax->entityDecl(ctxt->userData, name,
4467: XML_INTERNAL_PARAMETER_ENTITY,
4468: NULL, NULL, value);
4469: }
4470: } else {
4471: URI = xmlParseExternalID(ctxt, &literal, 1);
4472: if ((URI == NULL) && (literal == NULL)) {
1.6 veillard 4473: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.3 veillard 4474: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4475: ctxt->sax->error(ctxt->userData,
4476: "Entity value required\n");
4477: ctxt->wellFormed = 0;
4478: ctxt->disableSAX = 1;
4479: }
4480: if (URI) {
4481: xmlURIPtr uri;
4482:
4483: uri = xmlParseURI((const char *) URI);
4484: if (uri == NULL) {
1.6 veillard 4485: ctxt->errNo = XML_ERR_INVALID_URI;
1.3 veillard 4486: if ((ctxt->sax != NULL) &&
4487: (!ctxt->disableSAX) &&
4488: (ctxt->sax->error != NULL))
4489: ctxt->sax->error(ctxt->userData,
4490: "Invalid URI: %s\n", URI);
4491: ctxt->wellFormed = 0;
4492: } else {
4493: if (uri->fragment != NULL) {
1.6 veillard 4494: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.3 veillard 4495: if ((ctxt->sax != NULL) &&
4496: (!ctxt->disableSAX) &&
4497: (ctxt->sax->error != NULL))
4498: ctxt->sax->error(ctxt->userData,
4499: "Fragment not allowed: %s\n", URI);
4500: ctxt->wellFormed = 0;
4501: } else {
4502: if ((ctxt->sax != NULL) &&
4503: (!ctxt->disableSAX) &&
4504: (ctxt->sax->entityDecl != NULL))
4505: ctxt->sax->entityDecl(ctxt->userData, name,
4506: XML_EXTERNAL_PARAMETER_ENTITY,
4507: literal, URI, NULL);
4508: }
4509: xmlFreeURI(uri);
4510: }
4511: }
4512: }
4513: } else {
4514: if ((RAW == '"') || (RAW == '\'')) {
4515: value = xmlParseEntityValue(ctxt, &orig);
4516: if ((ctxt->sax != NULL) &&
4517: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4518: ctxt->sax->entityDecl(ctxt->userData, name,
4519: XML_INTERNAL_GENERAL_ENTITY,
4520: NULL, NULL, value);
4521: } else {
4522: URI = xmlParseExternalID(ctxt, &literal, 1);
4523: if ((URI == NULL) && (literal == NULL)) {
1.6 veillard 4524: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.3 veillard 4525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4526: ctxt->sax->error(ctxt->userData,
4527: "Entity value required\n");
4528: ctxt->wellFormed = 0;
4529: ctxt->disableSAX = 1;
4530: }
4531: if (URI) {
4532: xmlURIPtr uri;
4533:
4534: uri = xmlParseURI((const char *)URI);
4535: if (uri == NULL) {
1.6 veillard 4536: ctxt->errNo = XML_ERR_INVALID_URI;
1.3 veillard 4537: if ((ctxt->sax != NULL) &&
4538: (!ctxt->disableSAX) &&
4539: (ctxt->sax->error != NULL))
4540: ctxt->sax->error(ctxt->userData,
4541: "Invalid URI: %s\n", URI);
4542: ctxt->wellFormed = 0;
4543: } else {
4544: if (uri->fragment != NULL) {
1.6 veillard 4545: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.3 veillard 4546: if ((ctxt->sax != NULL) &&
4547: (!ctxt->disableSAX) &&
4548: (ctxt->sax->error != NULL))
4549: ctxt->sax->error(ctxt->userData,
4550: "Fragment not allowed: %s\n", URI);
4551: ctxt->wellFormed = 0;
4552: }
4553: xmlFreeURI(uri);
4554: }
4555: }
4556: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.6 veillard 4557: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3 veillard 4558: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4559: ctxt->sax->error(ctxt->userData,
4560: "Space required before content model\n");
4561: ctxt->wellFormed = 0;
4562: ctxt->disableSAX = 1;
4563: }
4564: SKIP_BLANKS;
4565:
4566: /*
4567: * SGML specific: here we can get the content model
4568: */
4569: if (RAW != '>') {
4570: xmlChar *contmod;
4571:
4572: contmod = xmlParseName(ctxt);
4573:
4574: if (contmod == NULL) {
1.6 veillard 4575: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3 veillard 4576: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4577: ctxt->sax->error(ctxt->userData,
4578: "Could not parse entity content model\n");
4579: ctxt->wellFormed = 0;
4580: ctxt->disableSAX = 1;
4581: } else {
1.7 veillard 4582: if (xmlStrEqual(contmod, BAD_CAST"NDATA")) {
1.3 veillard 4583: if (!IS_BLANK(CUR)) {
1.6 veillard 4584: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.3 veillard 4585: if ((ctxt->sax != NULL) &&
4586: (ctxt->sax->error != NULL))
4587: ctxt->sax->error(ctxt->userData,
4588: "Space required after 'NDATA'\n");
4589: ctxt->wellFormed = 0;
4590: ctxt->disableSAX = 1;
4591: }
4592: SKIP_BLANKS;
4593: ndata = xmlParseName(ctxt);
4594: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4595: (ctxt->sax->unparsedEntityDecl != NULL)) {
4596: ctxt->sax->unparsedEntityDecl(ctxt->userData,
4597: name, literal, URI, ndata);
4598: }
1.7 veillard 4599: } else if (xmlStrEqual(contmod, BAD_CAST"SUBDOC")) {
1.3 veillard 4600: if ((ctxt->sax != NULL) &&
4601: (ctxt->sax->warning != NULL))
4602: ctxt->sax->warning(ctxt->userData,
4603: "SUBDOC entities are not supported\n");
4604: SKIP_BLANKS;
4605: ndata = xmlParseName(ctxt);
4606: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4607: (ctxt->sax->unparsedEntityDecl != NULL)) {
4608: ctxt->sax->unparsedEntityDecl(ctxt->userData,
4609: name, literal, URI, ndata);
4610: }
1.7 veillard 4611: } else if (xmlStrEqual(contmod, BAD_CAST"CDATA")) {
1.3 veillard 4612: if ((ctxt->sax != NULL) &&
4613: (ctxt->sax->warning != NULL))
4614: ctxt->sax->warning(ctxt->userData,
4615: "CDATA entities are not supported\n");
4616: SKIP_BLANKS;
4617: ndata = xmlParseName(ctxt);
4618: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4619: (ctxt->sax->unparsedEntityDecl != NULL)) {
4620: ctxt->sax->unparsedEntityDecl(ctxt->userData,
4621: name, literal, URI, ndata);
4622: }
4623: }
4624: xmlFree(contmod);
4625: }
4626: } else {
4627: if ((ctxt->sax != NULL) &&
4628: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4629: ctxt->sax->entityDecl(ctxt->userData, name,
4630: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4631: literal, URI, NULL);
4632: }
4633: }
4634: }
4635: SKIP_BLANKS;
4636: if (RAW != '>') {
1.6 veillard 4637: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.3 veillard 4638: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4639: ctxt->sax->error(ctxt->userData,
4640: "sgmlParseEntityDecl: entity %s not terminated\n", name);
4641: ctxt->wellFormed = 0;
4642: ctxt->disableSAX = 1;
4643: } else {
4644: if (input != ctxt->input) {
1.6 veillard 4645: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.3 veillard 4646: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4647: ctxt->sax->error(ctxt->userData,
4648: "Entity declaration doesn't start and stop in the same entity\n");
4649: ctxt->wellFormed = 0;
4650: ctxt->disableSAX = 1;
4651: }
4652: NEXT;
4653: }
4654: if (orig != NULL) {
4655: /*
4656: * Ugly mechanism to save the raw entity value.
4657: */
4658: xmlEntityPtr cur = NULL;
4659:
4660: if (isParameter) {
4661: if ((ctxt->sax != NULL) &&
4662: (ctxt->sax->getParameterEntity != NULL))
4663: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4664: } else {
4665: if ((ctxt->sax != NULL) &&
4666: (ctxt->sax->getEntity != NULL))
4667: cur = ctxt->sax->getEntity(ctxt->userData, name);
4668: }
4669: if (cur != NULL) {
4670: if (cur->orig != NULL)
4671: xmlFree(orig);
4672: else
4673: cur->orig = orig;
4674: } else
4675: xmlFree(orig);
4676: }
4677: if (name != NULL) xmlFree(name);
4678: if (value != NULL) xmlFree(value);
4679: if (URI != NULL) xmlFree(URI);
4680: if (literal != NULL) xmlFree(literal);
4681: if (ndata != NULL) xmlFree(ndata);
4682: }
4683: }
4684:
4685: /**
4686: * sgmlParseMarkupDecl:
4687: * @ctxt: an SGML parser context
4688: *
4689: * parse Markup declarations
4690: *
4691: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4692: * NotationDecl | PI | Comment
4693: */
4694: void
4695: sgmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4696: GROW;
4697: xmlParseElementDecl(ctxt);
4698: xmlParseAttributeListDecl(ctxt);
4699: sgmlParseEntityDecl(ctxt);
4700: xmlParseNotationDecl(ctxt);
4701: xmlParsePI(ctxt);
4702: xmlParseComment(ctxt);
4703: /*
4704: * This is only for internal subset. On external entities,
4705: * the replacement is done before parsing stage
4706: */
4707: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4708: xmlParsePEReference(ctxt);
4709: ctxt->instate = XML_PARSER_DTD;
4710: }
4711:
4712: /**
4713: * sgmlParseInternalsubset:
4714: * @ctxt: an SGML parser context
4715: *
4716: * parse the internal subset declaration
4717: *
4718: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
4719: */
4720:
4721: void
4722: sgmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
4723: /*
4724: * Is there any DTD definition ?
4725: */
4726: if (RAW == '[') {
4727: ctxt->instate = XML_PARSER_DTD;
4728: NEXT;
4729: /*
4730: * Parse the succession of Markup declarations and
4731: * PEReferences.
4732: * Subsequence (markupdecl | PEReference | S)*
4733: */
4734: while (RAW != ']') {
4735: const xmlChar *check = CUR_PTR;
4736: int cons = ctxt->input->consumed;
4737:
4738: SKIP_BLANKS;
4739: sgmlParseMarkupDecl(ctxt);
4740: xmlParsePEReference(ctxt);
4741:
4742: /*
4743: * Pop-up of finished entities.
4744: */
4745: while ((RAW == 0) && (ctxt->inputNr > 1))
4746: xmlPopInput(ctxt);
4747:
4748: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.6 veillard 4749: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.3 veillard 4750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4751: ctxt->sax->error(ctxt->userData,
4752: "sgmlParseInternalSubset: error detected in Markup declaration\n");
4753: ctxt->wellFormed = 0;
4754: ctxt->disableSAX = 1;
4755: break;
4756: }
4757: }
4758: if (RAW == ']') {
4759: NEXT;
4760: SKIP_BLANKS;
4761: }
4762: }
4763:
4764: /*
4765: * We should be at the end of the DOCTYPE declaration.
4766: */
4767: if (RAW != '>') {
1.6 veillard 4768: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.3 veillard 4769: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4770: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
4771: ctxt->wellFormed = 0;
4772: ctxt->disableSAX = 1;
4773: }
4774: NEXT;
4775: }
4776:
4777: /**
1.2 veillard 4778: * sgmlParseMisc:
4779: * @ctxt: an XML parser context
4780: *
4781: * parse an XML Misc* optionnal field.
4782: *
4783: * [27] Misc ::= Comment | PI | S
4784: */
4785:
4786: void
4787: sgmlParseMisc(xmlParserCtxtPtr ctxt) {
4788: while (((RAW == '<') && (NXT(1) == '?')) ||
4789: ((RAW == '<') && (NXT(1) == '!') &&
4790: (NXT(2) == '-') && (NXT(3) == '-')) ||
4791: IS_BLANK(CUR)) {
4792: if ((RAW == '<') && (NXT(1) == '?')) {
4793: xmlParsePI(ctxt); /* TODO: SGML PIs differs */
4794: } else if (IS_BLANK(CUR)) {
4795: NEXT;
4796: } else
4797: xmlParseComment(ctxt);
4798: }
4799: }
4800:
4801: /**
1.1 veillard 4802: * sgmlParseDocument :
4803: * @ctxt: an SGML parser context
4804: *
4805: * parse an SGML document (and build a tree if using the standard SAX
4806: * interface).
4807: *
4808: * Returns 0, -1 in case of error. the parser context is augmented
4809: * as a result of the parsing.
4810: */
4811:
4812: int
4813: sgmlParseDocument(sgmlParserCtxtPtr ctxt) {
1.2 veillard 4814: xmlChar start[4];
4815: xmlCharEncoding enc;
1.1 veillard 4816: xmlDtdPtr dtd;
4817:
4818: sgmlDefaultSAXHandlerInit();
4819: ctxt->html = 2;
4820:
4821: GROW;
4822: /*
4823: * SAX: beginning of the document processing.
4824: */
4825: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4826: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
4827:
1.2 veillard 4828: /*
4829: * Get the 4 first bytes and decode the charset
4830: * if enc != XML_CHAR_ENCODING_NONE
4831: * plug some encoding conversion routines.
4832: */
4833: start[0] = RAW;
4834: start[1] = NXT(1);
4835: start[2] = NXT(2);
4836: start[3] = NXT(3);
4837: enc = xmlDetectCharEncoding(start, 4);
4838: if (enc != XML_CHAR_ENCODING_NONE) {
4839: xmlSwitchEncoding(ctxt, enc);
4840: }
4841:
1.1 veillard 4842: /*
4843: * Wipe out everything which is before the first '<'
4844: */
4845: SKIP_BLANKS;
4846: if (CUR == 0) {
4847: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4848: ctxt->sax->error(ctxt->userData, "Document is empty\n");
4849: ctxt->wellFormed = 0;
4850: }
4851:
4852: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4853: ctxt->sax->startDocument(ctxt->userData);
4854:
4855:
4856: /*
1.2 veillard 4857: * The Misc part of the Prolog
1.1 veillard 4858: */
1.2 veillard 4859: GROW;
4860: sgmlParseMisc(ctxt);
1.1 veillard 4861:
4862: /*
4863: * Then possibly doc type declaration(s) and more Misc
4864: * (doctypedecl Misc*)?
4865: */
1.2 veillard 4866: GROW;
4867: if ((RAW == '<') && (NXT(1) == '!') &&
4868: (NXT(2) == 'D') && (NXT(3) == 'O') &&
4869: (NXT(4) == 'C') && (NXT(5) == 'T') &&
4870: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
4871: (NXT(8) == 'E')) {
4872:
4873: ctxt->inSubset = 1;
1.1 veillard 4874: sgmlParseDocTypeDecl(ctxt);
1.2 veillard 4875: if (RAW == '[') {
4876: ctxt->instate = XML_PARSER_DTD;
1.3 veillard 4877: sgmlParseInternalSubset(ctxt);
1.2 veillard 4878: }
4879:
4880: /*
4881: * Create and update the external subset.
4882: */
4883: ctxt->inSubset = 2;
4884: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
4885: (!ctxt->disableSAX))
4886: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
4887: ctxt->extSubSystem, ctxt->extSubURI);
4888: ctxt->inSubset = 0;
4889:
4890:
4891: ctxt->instate = XML_PARSER_PROLOG;
4892: sgmlParseMisc(ctxt);
1.1 veillard 4893: }
4894:
4895: /*
4896: * Time to start parsing the tree itself
4897: */
4898: sgmlParseContent(ctxt);
4899:
4900: /*
4901: * autoclose
4902: */
4903: if (CUR == 0)
4904: sgmlAutoClose(ctxt, NULL);
4905:
4906:
4907: /*
4908: * SAX: end of the document processing.
4909: */
4910: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4911: ctxt->sax->endDocument(ctxt->userData);
4912:
4913: if (ctxt->myDoc != NULL) {
4914: dtd = xmlGetIntSubset(ctxt->myDoc);
4915: if (dtd == NULL)
4916: ctxt->myDoc->intSubset =
4917: xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
4918: BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
4919: BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
4920: }
4921: if (! ctxt->wellFormed) return(-1);
4922: return(0);
4923: }
4924:
4925:
4926: /************************************************************************
4927: * *
4928: * Parser contexts handling *
4929: * *
4930: ************************************************************************/
4931:
4932: /**
4933: * xmlInitParserCtxt:
4934: * @ctxt: an SGML parser context
4935: *
4936: * Initialize a parser context
4937: */
4938:
4939: void
4940: sgmlInitParserCtxt(sgmlParserCtxtPtr ctxt)
4941: {
4942: sgmlSAXHandler *sax;
4943:
4944: if (ctxt == NULL) return;
4945: memset(ctxt, 0, sizeof(sgmlParserCtxt));
4946:
4947: sax = (sgmlSAXHandler *) xmlMalloc(sizeof(sgmlSAXHandler));
4948: if (sax == NULL) {
1.10 ! veillard 4949: xmlGenericError(xmlGenericErrorContext,
! 4950: "sgmlInitParserCtxt: out of memory\n");
1.1 veillard 4951: }
4952: memset(sax, 0, sizeof(sgmlSAXHandler));
4953:
4954: /* Allocate the Input stack */
4955: ctxt->inputTab = (sgmlParserInputPtr *)
4956: xmlMalloc(5 * sizeof(sgmlParserInputPtr));
4957: if (ctxt->inputTab == NULL) {
1.10 ! veillard 4958: xmlGenericError(xmlGenericErrorContext,
! 4959: "sgmlInitParserCtxt: out of memory\n");
1.1 veillard 4960: }
4961: ctxt->inputNr = 0;
4962: ctxt->inputMax = 5;
4963: ctxt->input = NULL;
4964: ctxt->version = NULL;
4965: ctxt->encoding = NULL;
4966: ctxt->standalone = -1;
4967: ctxt->instate = XML_PARSER_START;
4968:
4969: /* Allocate the Node stack */
4970: ctxt->nodeTab = (sgmlNodePtr *) xmlMalloc(10 * sizeof(sgmlNodePtr));
4971: ctxt->nodeNr = 0;
4972: ctxt->nodeMax = 10;
4973: ctxt->node = NULL;
4974:
4975: /* Allocate the Name stack */
4976: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
4977: ctxt->nameNr = 0;
4978: ctxt->nameMax = 10;
4979: ctxt->name = NULL;
4980:
4981: if (sax == NULL) ctxt->sax = &sgmlDefaultSAXHandler;
4982: else {
4983: ctxt->sax = sax;
4984: memcpy(sax, &sgmlDefaultSAXHandler, sizeof(sgmlSAXHandler));
4985: }
4986: ctxt->userData = ctxt;
4987: ctxt->myDoc = NULL;
4988: ctxt->wellFormed = 1;
4989: ctxt->replaceEntities = 0;
4990: ctxt->html = 2;
4991: ctxt->record_info = 0;
4992: ctxt->validate = 0;
4993: ctxt->nbChars = 0;
4994: ctxt->checkIndex = 0;
4995: xmlInitNodeInfoSeq(&ctxt->node_seq);
4996: }
4997:
4998: /**
4999: * sgmlFreeParserCtxt:
5000: * @ctxt: an SGML parser context
5001: *
5002: * Free all the memory used by a parser context. However the parsed
5003: * document in ctxt->myDoc is not freed.
5004: */
5005:
5006: void
5007: sgmlFreeParserCtxt(sgmlParserCtxtPtr ctxt)
5008: {
5009: xmlFreeParserCtxt(ctxt);
5010: }
5011:
5012: /**
5013: * sgmlCreateDocParserCtxt :
5014: * @cur: a pointer to an array of xmlChar
5015: * @encoding: a free form C string describing the SGML document encoding, or NULL
5016: *
5017: * Create a parser context for an SGML document.
5018: *
5019: * Returns the new parser context or NULL
5020: */
5021: sgmlParserCtxtPtr
5022: sgmlCreateDocParserCtxt(xmlChar *cur, const char *encoding) {
5023: sgmlParserCtxtPtr ctxt;
5024: sgmlParserInputPtr input;
5025: /* sgmlCharEncoding enc; */
5026:
5027: ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
5028: if (ctxt == NULL) {
5029: perror("malloc");
5030: return(NULL);
5031: }
5032: sgmlInitParserCtxt(ctxt);
5033: input = (sgmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
5034: if (input == NULL) {
5035: perror("malloc");
5036: xmlFree(ctxt);
5037: return(NULL);
5038: }
5039: memset(input, 0, sizeof(sgmlParserInput));
5040:
5041: input->line = 1;
5042: input->col = 1;
5043: input->base = cur;
5044: input->cur = cur;
5045:
5046: inputPush(ctxt, input);
5047: return(ctxt);
5048: }
5049:
5050: /************************************************************************
5051: * *
5052: * Progressive parsing interfaces *
5053: * *
5054: ************************************************************************/
5055:
5056: /**
5057: * sgmlParseLookupSequence:
5058: * @ctxt: an SGML parser context
5059: * @first: the first char to lookup
5060: * @next: the next char to lookup or zero
5061: * @third: the next char to lookup or zero
5062: *
5063: * Try to find if a sequence (first, next, third) or just (first next) or
5064: * (first) is available in the input stream.
5065: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
5066: * to avoid rescanning sequences of bytes, it DOES change the state of the
5067: * parser, do not use liberally.
5068: * This is basically similar to xmlParseLookupSequence()
5069: *
5070: * Returns the index to the current parsing point if the full sequence
5071: * is available, -1 otherwise.
5072: */
5073: int
5074: sgmlParseLookupSequence(sgmlParserCtxtPtr ctxt, xmlChar first,
5075: xmlChar next, xmlChar third) {
5076: int base, len;
5077: sgmlParserInputPtr in;
5078: const xmlChar *buf;
5079:
5080: in = ctxt->input;
5081: if (in == NULL) return(-1);
5082: base = in->cur - in->base;
5083: if (base < 0) return(-1);
5084: if (ctxt->checkIndex > base)
5085: base = ctxt->checkIndex;
5086: if (in->buf == NULL) {
5087: buf = in->base;
5088: len = in->length;
5089: } else {
5090: buf = in->buf->buffer->content;
5091: len = in->buf->buffer->use;
5092: }
5093: /* take into account the sequence length */
5094: if (third) len -= 2;
5095: else if (next) len --;
5096: for (;base < len;base++) {
5097: if (buf[base] == first) {
5098: if (third != 0) {
5099: if ((buf[base + 1] != next) ||
5100: (buf[base + 2] != third)) continue;
5101: } else if (next != 0) {
5102: if (buf[base + 1] != next) continue;
5103: }
5104: ctxt->checkIndex = 0;
5105: #ifdef DEBUG_PUSH
5106: if (next == 0)
1.10 ! veillard 5107: xmlGenericError(xmlGenericErrorContext,
! 5108: "HPP: lookup '%c' found at %d\n",
1.1 veillard 5109: first, base);
5110: else if (third == 0)
1.10 ! veillard 5111: xmlGenericError(xmlGenericErrorContext,
! 5112: "HPP: lookup '%c%c' found at %d\n",
1.1 veillard 5113: first, next, base);
5114: else
1.10 ! veillard 5115: xmlGenericError(xmlGenericErrorContext,
! 5116: "HPP: lookup '%c%c%c' found at %d\n",
1.1 veillard 5117: first, next, third, base);
5118: #endif
5119: return(base - (in->cur - in->base));
5120: }
5121: }
5122: ctxt->checkIndex = base;
5123: #ifdef DEBUG_PUSH
5124: if (next == 0)
1.10 ! veillard 5125: xmlGenericError(xmlGenericErrorContext,
! 5126: "HPP: lookup '%c' failed\n", first);
1.1 veillard 5127: else if (third == 0)
1.10 ! veillard 5128: xmlGenericError(xmlGenericErrorContext,
! 5129: "HPP: lookup '%c%c' failed\n", first, next);
1.1 veillard 5130: else
1.10 ! veillard 5131: xmlGenericError(xmlGenericErrorContext,
! 5132: "HPP: lookup '%c%c%c' failed\n", first, next, third);
1.1 veillard 5133: #endif
5134: return(-1);
5135: }
5136:
5137: /**
5138: * sgmlParseTryOrFinish:
5139: * @ctxt: an SGML parser context
5140: * @terminate: last chunk indicator
5141: *
5142: * Try to progress on parsing
5143: *
5144: * Returns zero if no parsing was possible
5145: */
5146: int
5147: sgmlParseTryOrFinish(sgmlParserCtxtPtr ctxt, int terminate) {
5148: int ret = 0;
5149: sgmlParserInputPtr in;
5150: int avail = 0;
5151: xmlChar cur, next;
5152:
5153: #ifdef DEBUG_PUSH
5154: switch (ctxt->instate) {
5155: case XML_PARSER_EOF:
1.10 ! veillard 5156: xmlGenericError(xmlGenericErrorContext,
! 5157: "HPP: try EOF\n"); break;
1.1 veillard 5158: case XML_PARSER_START:
1.10 ! veillard 5159: xmlGenericError(xmlGenericErrorContext,
! 5160: "HPP: try START\n"); break;
1.1 veillard 5161: case XML_PARSER_MISC:
1.10 ! veillard 5162: xmlGenericError(xmlGenericErrorContext,
! 5163: "HPP: try MISC\n");break;
1.1 veillard 5164: case XML_PARSER_COMMENT:
1.10 ! veillard 5165: xmlGenericError(xmlGenericErrorContext,
! 5166: "HPP: try COMMENT\n");break;
1.1 veillard 5167: case XML_PARSER_PROLOG:
1.10 ! veillard 5168: xmlGenericError(xmlGenericErrorContext,
! 5169: "HPP: try PROLOG\n");break;
1.1 veillard 5170: case XML_PARSER_START_TAG:
1.10 ! veillard 5171: xmlGenericError(xmlGenericErrorContext,
! 5172: "HPP: try START_TAG\n");break;
1.1 veillard 5173: case XML_PARSER_CONTENT:
1.10 ! veillard 5174: xmlGenericError(xmlGenericErrorContext,
! 5175: "HPP: try CONTENT\n");break;
1.1 veillard 5176: case XML_PARSER_CDATA_SECTION:
1.10 ! veillard 5177: xmlGenericError(xmlGenericErrorContext,
! 5178: "HPP: try CDATA_SECTION\n");break;
1.1 veillard 5179: case XML_PARSER_END_TAG:
1.10 ! veillard 5180: xmlGenericError(xmlGenericErrorContext,
! 5181: "HPP: try END_TAG\n");break;
1.1 veillard 5182: case XML_PARSER_ENTITY_DECL:
1.10 ! veillard 5183: xmlGenericError(xmlGenericErrorContext,
! 5184: "HPP: try ENTITY_DECL\n");break;
1.1 veillard 5185: case XML_PARSER_ENTITY_VALUE:
1.10 ! veillard 5186: xmlGenericError(xmlGenericErrorContext,
! 5187: "HPP: try ENTITY_VALUE\n");break;
1.1 veillard 5188: case XML_PARSER_ATTRIBUTE_VALUE:
1.10 ! veillard 5189: xmlGenericError(xmlGenericErrorContext,
! 5190: "HPP: try ATTRIBUTE_VALUE\n");break;
1.1 veillard 5191: case XML_PARSER_DTD:
1.10 ! veillard 5192: xmlGenericError(xmlGenericErrorContext,
! 5193: "HPP: try DTD\n");break;
1.1 veillard 5194: case XML_PARSER_EPILOG:
1.10 ! veillard 5195: xmlGenericError(xmlGenericErrorContext,
! 5196: "HPP: try EPILOG\n");break;
1.1 veillard 5197: case XML_PARSER_PI:
1.10 ! veillard 5198: xmlGenericError(xmlGenericErrorContext,
! 5199: "HPP: try PI\n");break;
1.1 veillard 5200: }
5201: #endif
5202:
5203: while (1) {
5204:
5205: in = ctxt->input;
5206: if (in == NULL) break;
5207: if (in->buf == NULL)
5208: avail = in->length - (in->cur - in->base);
5209: else
5210: avail = in->buf->buffer->use - (in->cur - in->base);
5211: if ((avail == 0) && (terminate)) {
5212: sgmlAutoClose(ctxt, NULL);
5213: if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5214: /*
5215: * SAX: end of the document processing.
5216: */
5217: ctxt->instate = XML_PARSER_EOF;
5218: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5219: ctxt->sax->endDocument(ctxt->userData);
5220: }
5221: }
5222: if (avail < 1)
5223: goto done;
5224: switch (ctxt->instate) {
5225: case XML_PARSER_EOF:
5226: /*
5227: * Document parsing is done !
5228: */
5229: goto done;
5230: case XML_PARSER_START:
5231: /*
5232: * Very first chars read from the document flow.
5233: */
5234: cur = in->cur[0];
5235: if (IS_BLANK(cur)) {
5236: SKIP_BLANKS;
5237: if (in->buf == NULL)
5238: avail = in->length - (in->cur - in->base);
5239: else
5240: avail = in->buf->buffer->use - (in->cur - in->base);
5241: }
5242: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
5243: ctxt->sax->setDocumentLocator(ctxt->userData,
5244: &xmlDefaultSAXLocator);
5245: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
5246: (!ctxt->disableSAX))
5247: ctxt->sax->startDocument(ctxt->userData);
5248:
5249: cur = in->cur[0];
5250: next = in->cur[1];
5251: if ((cur == '<') && (next == '!') &&
5252: (UPP(2) == 'D') && (UPP(3) == 'O') &&
5253: (UPP(4) == 'C') && (UPP(5) == 'T') &&
5254: (UPP(6) == 'Y') && (UPP(7) == 'P') &&
5255: (UPP(8) == 'E')) {
5256: if ((!terminate) &&
5257: (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
5258: goto done;
5259: #ifdef DEBUG_PUSH
1.10 ! veillard 5260: xmlGenericError(xmlGenericErrorContext,
! 5261: "HPP: Parsing internal subset\n");
1.1 veillard 5262: #endif
5263: sgmlParseDocTypeDecl(ctxt);
5264: ctxt->instate = XML_PARSER_PROLOG;
5265: #ifdef DEBUG_PUSH
1.10 ! veillard 5266: xmlGenericError(xmlGenericErrorContext,
! 5267: "HPP: entering PROLOG\n");
1.1 veillard 5268: #endif
5269: } else {
5270: ctxt->instate = XML_PARSER_MISC;
5271: }
5272: #ifdef DEBUG_PUSH
1.10 ! veillard 5273: xmlGenericError(xmlGenericErrorContext,
! 5274: "HPP: entering MISC\n");
1.1 veillard 5275: #endif
5276: break;
5277: case XML_PARSER_MISC:
5278: SKIP_BLANKS;
5279: if (in->buf == NULL)
5280: avail = in->length - (in->cur - in->base);
5281: else
5282: avail = in->buf->buffer->use - (in->cur - in->base);
5283: if (avail < 2)
5284: goto done;
5285: cur = in->cur[0];
5286: next = in->cur[1];
5287: if ((cur == '<') && (next == '!') &&
5288: (in->cur[2] == '-') && (in->cur[3] == '-')) {
5289: if ((!terminate) &&
5290: (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
5291: goto done;
5292: #ifdef DEBUG_PUSH
1.10 ! veillard 5293: xmlGenericError(xmlGenericErrorContext,
! 5294: "HPP: Parsing Comment\n");
1.1 veillard 5295: #endif
5296: sgmlParseComment(ctxt);
5297: ctxt->instate = XML_PARSER_MISC;
5298: } else if ((cur == '<') && (next == '!') &&
5299: (UPP(2) == 'D') && (UPP(3) == 'O') &&
5300: (UPP(4) == 'C') && (UPP(5) == 'T') &&
5301: (UPP(6) == 'Y') && (UPP(7) == 'P') &&
5302: (UPP(8) == 'E')) {
5303: if ((!terminate) &&
5304: (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
5305: goto done;
5306: #ifdef DEBUG_PUSH
1.10 ! veillard 5307: xmlGenericError(xmlGenericErrorContext,
! 5308: "HPP: Parsing internal subset\n");
1.1 veillard 5309: #endif
5310: sgmlParseDocTypeDecl(ctxt);
5311: ctxt->instate = XML_PARSER_PROLOG;
5312: #ifdef DEBUG_PUSH
1.10 ! veillard 5313: xmlGenericError(xmlGenericErrorContext,
! 5314: "HPP: entering PROLOG\n");
1.1 veillard 5315: #endif
5316: } else if ((cur == '<') && (next == '!') &&
5317: (avail < 9)) {
5318: goto done;
5319: } else {
5320: ctxt->instate = XML_PARSER_START_TAG;
5321: #ifdef DEBUG_PUSH
1.10 ! veillard 5322: xmlGenericError(xmlGenericErrorContext,
! 5323: "HPP: entering START_TAG\n");
1.1 veillard 5324: #endif
5325: }
5326: break;
5327: case XML_PARSER_PROLOG:
5328: SKIP_BLANKS;
5329: if (in->buf == NULL)
5330: avail = in->length - (in->cur - in->base);
5331: else
5332: avail = in->buf->buffer->use - (in->cur - in->base);
5333: if (avail < 2)
5334: goto done;
5335: cur = in->cur[0];
5336: next = in->cur[1];
5337: if ((cur == '<') && (next == '!') &&
5338: (in->cur[2] == '-') && (in->cur[3] == '-')) {
5339: if ((!terminate) &&
5340: (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
5341: goto done;
5342: #ifdef DEBUG_PUSH
1.10 ! veillard 5343: xmlGenericError(xmlGenericErrorContext,
! 5344: "HPP: Parsing Comment\n");
1.1 veillard 5345: #endif
5346: sgmlParseComment(ctxt);
5347: ctxt->instate = XML_PARSER_PROLOG;
5348: } else if ((cur == '<') && (next == '!') &&
5349: (avail < 4)) {
5350: goto done;
5351: } else {
5352: ctxt->instate = XML_PARSER_START_TAG;
5353: #ifdef DEBUG_PUSH
1.10 ! veillard 5354: xmlGenericError(xmlGenericErrorContext,
! 5355: "HPP: entering START_TAG\n");
1.1 veillard 5356: #endif
5357: }
5358: break;
5359: case XML_PARSER_EPILOG:
5360: if (in->buf == NULL)
5361: avail = in->length - (in->cur - in->base);
5362: else
5363: avail = in->buf->buffer->use - (in->cur - in->base);
5364: if (avail < 1)
5365: goto done;
5366: cur = in->cur[0];
5367: if (IS_BLANK(cur)) {
5368: sgmlParseCharData(ctxt, 0);
5369: goto done;
5370: }
5371: if (avail < 2)
5372: goto done;
5373: next = in->cur[1];
5374: if ((cur == '<') && (next == '!') &&
5375: (in->cur[2] == '-') && (in->cur[3] == '-')) {
5376: if ((!terminate) &&
5377: (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
5378: goto done;
5379: #ifdef DEBUG_PUSH
1.10 ! veillard 5380: xmlGenericError(xmlGenericErrorContext,
! 5381: "HPP: Parsing Comment\n");
1.1 veillard 5382: #endif
5383: sgmlParseComment(ctxt);
5384: ctxt->instate = XML_PARSER_EPILOG;
5385: } else if ((cur == '<') && (next == '!') &&
5386: (avail < 4)) {
5387: goto done;
5388: } else {
1.6 veillard 5389: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.1 veillard 5390: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5391: ctxt->sax->error(ctxt->userData,
5392: "Extra content at the end of the document\n");
5393: ctxt->wellFormed = 0;
5394: ctxt->instate = XML_PARSER_EOF;
5395: #ifdef DEBUG_PUSH
1.10 ! veillard 5396: xmlGenericError(xmlGenericErrorContext,
! 5397: "HPP: entering EOF\n");
1.1 veillard 5398: #endif
5399: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5400: ctxt->sax->endDocument(ctxt->userData);
5401: goto done;
5402: }
5403: break;
5404: case XML_PARSER_START_TAG: {
5405: xmlChar *name, *oldname;
5406: int depth = ctxt->nameNr;
5407: sgmlElemDescPtr info;
5408:
5409: if (avail < 2)
5410: goto done;
5411: cur = in->cur[0];
5412: if (cur != '<') {
5413: ctxt->instate = XML_PARSER_CONTENT;
5414: #ifdef DEBUG_PUSH
1.10 ! veillard 5415: xmlGenericError(xmlGenericErrorContext,
! 5416: "HPP: entering CONTENT\n");
1.1 veillard 5417: #endif
5418: break;
5419: }
5420: if ((!terminate) &&
5421: (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
5422: goto done;
5423:
5424: oldname = xmlStrdup(ctxt->name);
5425: sgmlParseStartTag(ctxt);
5426: name = ctxt->name;
5427: #ifdef DEBUG
5428: if (oldname == NULL)
1.10 ! veillard 5429: xmlGenericError(xmlGenericErrorContext,
! 5430: "Start of element %s\n", name);
1.1 veillard 5431: else if (name == NULL)
1.10 ! veillard 5432: xmlGenericError(xmlGenericErrorContext,
! 5433: "Start of element failed, was %s\n",
1.1 veillard 5434: oldname);
5435: else
1.10 ! veillard 5436: xmlGenericError(xmlGenericErrorContext,
! 5437: "Start of element %s, was %s\n",
1.1 veillard 5438: name, oldname);
5439: #endif
5440: if (((depth == ctxt->nameNr) &&
1.7 veillard 5441: (xmlStrEqual(oldname, ctxt->name))) ||
1.1 veillard 5442: (name == NULL)) {
5443: if (CUR == '>')
5444: NEXT;
5445: if (oldname != NULL)
5446: xmlFree(oldname);
5447: break;
5448: }
5449: if (oldname != NULL)
5450: xmlFree(oldname);
5451:
5452: /*
5453: * Lookup the info for that element.
5454: */
5455: info = sgmlTagLookup(name);
5456: if (info == NULL) {
5457: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.4 veillard 5458: ctxt->sax->error(ctxt->userData, "Tag %s unknown\n",
1.1 veillard 5459: name);
5460: ctxt->wellFormed = 0;
5461: } else if (info->depr) {
5462: /***************************
5463: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5464: ctxt->sax->warning(ctxt->userData,
5465: "Tag %s is deprecated\n",
5466: name);
5467: ***************************/
5468: }
5469:
5470: /*
5471: * Check for an Empty Element labelled the XML/SGML way
5472: */
5473: if ((CUR == '/') && (NXT(1) == '>')) {
5474: SKIP(2);
5475: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5476: ctxt->sax->endElement(ctxt->userData, name);
5477: oldname = sgmlnamePop(ctxt);
5478: #ifdef DEBUG
1.10 ! veillard 5479: xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n",
1.1 veillard 5480: oldname);
5481: #endif
5482: if (oldname != NULL)
5483: xmlFree(oldname);
5484: ctxt->instate = XML_PARSER_CONTENT;
5485: #ifdef DEBUG_PUSH
1.10 ! veillard 5486: xmlGenericError(xmlGenericErrorContext,
! 5487: "HPP: entering CONTENT\n");
1.1 veillard 5488: #endif
5489: break;
5490: }
5491:
5492: if (CUR == '>') {
5493: NEXT;
5494: } else {
5495: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5496: ctxt->sax->error(ctxt->userData,
5497: "Couldn't find end of Start Tag %s\n",
5498: name);
5499: ctxt->wellFormed = 0;
5500:
5501: /*
5502: * end of parsing of this node.
5503: */
1.7 veillard 5504: if (xmlStrEqual(name, ctxt->name)) {
1.1 veillard 5505: nodePop(ctxt);
5506: oldname = sgmlnamePop(ctxt);
5507: #ifdef DEBUG
1.10 ! veillard 5508: xmlGenericError(xmlGenericErrorContext,
1.1 veillard 5509: "End of start tag problem: popping out %s\n", oldname);
5510: #endif
5511: if (oldname != NULL)
5512: xmlFree(oldname);
5513: }
5514:
5515: ctxt->instate = XML_PARSER_CONTENT;
5516: #ifdef DEBUG_PUSH
1.10 ! veillard 5517: xmlGenericError(xmlGenericErrorContext,
! 5518: "HPP: entering CONTENT\n");
1.1 veillard 5519: #endif
5520: break;
5521: }
5522:
5523: /*
5524: * Check for an Empty Element from DTD definition
5525: */
5526: if ((info != NULL) && (info->empty)) {
5527: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5528: ctxt->sax->endElement(ctxt->userData, name);
5529: oldname = sgmlnamePop(ctxt);
5530: #ifdef DEBUG
1.10 ! veillard 5531: xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname);
1.1 veillard 5532: #endif
5533: if (oldname != NULL)
5534: xmlFree(oldname);
5535: }
5536: ctxt->instate = XML_PARSER_CONTENT;
5537: #ifdef DEBUG_PUSH
1.10 ! veillard 5538: xmlGenericError(xmlGenericErrorContext,
! 5539: "HPP: entering CONTENT\n");
1.1 veillard 5540: #endif
5541: break;
5542: }
5543: case XML_PARSER_CONTENT: {
5544: long cons;
5545: /*
5546: * Handle preparsed entities and charRef
5547: */
5548: if (ctxt->token != 0) {
5549: xmlChar chr[2] = { 0 , 0 } ;
5550:
5551: chr[0] = (xmlChar) ctxt->token;
5552: sgmlCheckParagraph(ctxt);
5553: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5554: ctxt->sax->characters(ctxt->userData, chr, 1);
5555: ctxt->token = 0;
5556: ctxt->checkIndex = 0;
5557: }
5558: if ((avail == 1) && (terminate)) {
5559: cur = in->cur[0];
5560: if ((cur != '<') && (cur != '&')) {
5561: if (ctxt->sax != NULL) {
5562: if (IS_BLANK(cur)) {
5563: if (ctxt->sax->ignorableWhitespace != NULL)
5564: ctxt->sax->ignorableWhitespace(
5565: ctxt->userData, &cur, 1);
5566: } else {
5567: sgmlCheckParagraph(ctxt);
5568: if (ctxt->sax->characters != NULL)
5569: ctxt->sax->characters(
5570: ctxt->userData, &cur, 1);
5571: }
5572: }
5573: ctxt->token = 0;
5574: ctxt->checkIndex = 0;
5575: NEXT;
5576: }
5577: break;
5578: }
5579: if (avail < 2)
5580: goto done;
5581: cur = in->cur[0];
5582: next = in->cur[1];
5583: cons = ctxt->nbChars;
5584: /*
5585: * Sometimes DOCTYPE arrives in the middle of the document
5586: */
5587: if ((cur == '<') && (next == '!') &&
5588: (UPP(2) == 'D') && (UPP(3) == 'O') &&
5589: (UPP(4) == 'C') && (UPP(5) == 'T') &&
5590: (UPP(6) == 'Y') && (UPP(7) == 'P') &&
5591: (UPP(8) == 'E')) {
5592: if ((!terminate) &&
5593: (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
5594: goto done;
5595: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5596: ctxt->sax->error(ctxt->userData,
5597: "Misplaced DOCTYPE declaration\n");
5598: ctxt->wellFormed = 0;
5599: sgmlParseDocTypeDecl(ctxt);
5600: } else if ((cur == '<') && (next == '!') &&
5601: (in->cur[2] == '-') && (in->cur[3] == '-')) {
5602: if ((!terminate) &&
5603: (sgmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
5604: goto done;
5605: #ifdef DEBUG_PUSH
1.10 ! veillard 5606: xmlGenericError(xmlGenericErrorContext,
! 5607: "HPP: Parsing Comment\n");
1.1 veillard 5608: #endif
5609: sgmlParseComment(ctxt);
5610: ctxt->instate = XML_PARSER_CONTENT;
5611: } else if ((cur == '<') && (next == '!') && (avail < 4)) {
5612: goto done;
5613: } else if ((cur == '<') && (next == '/')) {
5614: ctxt->instate = XML_PARSER_END_TAG;
5615: ctxt->checkIndex = 0;
5616: #ifdef DEBUG_PUSH
1.10 ! veillard 5617: xmlGenericError(xmlGenericErrorContext,
! 5618: "HPP: entering END_TAG\n");
1.1 veillard 5619: #endif
5620: break;
5621: } else if (cur == '<') {
5622: ctxt->instate = XML_PARSER_START_TAG;
5623: ctxt->checkIndex = 0;
5624: #ifdef DEBUG_PUSH
1.10 ! veillard 5625: xmlGenericError(xmlGenericErrorContext,
! 5626: "HPP: entering START_TAG\n");
1.1 veillard 5627: #endif
5628: break;
5629: } else if (cur == '&') {
5630: if ((!terminate) &&
5631: (sgmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
5632: goto done;
5633: #ifdef DEBUG_PUSH
1.10 ! veillard 5634: xmlGenericError(xmlGenericErrorContext,
! 5635: "HPP: Parsing Reference\n");
1.1 veillard 5636: #endif
5637: /* TODO: check generation of subtrees if noent !!! */
5638: sgmlParseReference(ctxt);
5639: } else {
5640: /* TODO Avoid the extra copy, handle directly !!!!!! */
5641: /*
5642: * Goal of the following test is :
5643: * - minimize calls to the SAX 'character' callback
5644: * when they are mergeable
5645: */
5646: if ((ctxt->inputNr == 1) &&
5647: (avail < SGML_PARSER_BIG_BUFFER_SIZE)) {
5648: if ((!terminate) &&
5649: (sgmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
5650: goto done;
5651: }
5652: ctxt->checkIndex = 0;
5653: #ifdef DEBUG_PUSH
1.10 ! veillard 5654: xmlGenericError(xmlGenericErrorContext,
! 5655: "HPP: Parsing char data\n");
1.1 veillard 5656: #endif
5657: sgmlParseCharData(ctxt, 0);
5658: }
5659: if (cons == ctxt->nbChars) {
5660: if (ctxt->node != NULL) {
5661: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5662: ctxt->sax->error(ctxt->userData,
5663: "detected an error in element content\n");
5664: ctxt->wellFormed = 0;
5665: NEXT;
5666: }
5667: break;
5668: }
5669:
5670: break;
5671: }
5672: case XML_PARSER_END_TAG:
5673: if (avail < 2)
5674: goto done;
5675: if ((!terminate) &&
5676: (sgmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
5677: goto done;
5678: sgmlParseEndTag(ctxt);
5679: if (ctxt->nameNr == 0) {
5680: ctxt->instate = XML_PARSER_EPILOG;
5681: } else {
5682: ctxt->instate = XML_PARSER_CONTENT;
5683: }
5684: ctxt->checkIndex = 0;
5685: #ifdef DEBUG_PUSH
1.10 ! veillard 5686: xmlGenericError(xmlGenericErrorContext,
! 5687: "HPP: entering CONTENT\n");
1.1 veillard 5688: #endif
5689: break;
5690: case XML_PARSER_CDATA_SECTION:
1.10 ! veillard 5691: xmlGenericError(xmlGenericErrorContext,
! 5692: "HPP: internal error, state == CDATA\n");
1.1 veillard 5693: ctxt->instate = XML_PARSER_CONTENT;
5694: ctxt->checkIndex = 0;
5695: #ifdef DEBUG_PUSH
1.10 ! veillard 5696: xmlGenericError(xmlGenericErrorContext,
! 5697: "HPP: entering CONTENT\n");
1.1 veillard 5698: #endif
5699: break;
5700: case XML_PARSER_DTD:
1.10 ! veillard 5701: xmlGenericError(xmlGenericErrorContext,
! 5702: "HPP: internal error, state == DTD\n");
1.1 veillard 5703: ctxt->instate = XML_PARSER_CONTENT;
5704: ctxt->checkIndex = 0;
5705: #ifdef DEBUG_PUSH
1.10 ! veillard 5706: xmlGenericError(xmlGenericErrorContext,
! 5707: "HPP: entering CONTENT\n");
1.1 veillard 5708: #endif
5709: break;
5710: case XML_PARSER_COMMENT:
1.10 ! veillard 5711: xmlGenericError(xmlGenericErrorContext,
! 5712: "HPP: internal error, state == COMMENT\n");
1.1 veillard 5713: ctxt->instate = XML_PARSER_CONTENT;
5714: ctxt->checkIndex = 0;
5715: #ifdef DEBUG_PUSH
1.10 ! veillard 5716: xmlGenericError(xmlGenericErrorContext,
! 5717: "HPP: entering CONTENT\n");
1.1 veillard 5718: #endif
5719: break;
5720: case XML_PARSER_PI:
1.10 ! veillard 5721: xmlGenericError(xmlGenericErrorContext,
! 5722: "HPP: internal error, state == PI\n");
1.1 veillard 5723: ctxt->instate = XML_PARSER_CONTENT;
5724: ctxt->checkIndex = 0;
5725: #ifdef DEBUG_PUSH
1.10 ! veillard 5726: xmlGenericError(xmlGenericErrorContext,
! 5727: "HPP: entering CONTENT\n");
1.1 veillard 5728: #endif
5729: break;
5730: case XML_PARSER_ENTITY_DECL:
1.10 ! veillard 5731: xmlGenericError(xmlGenericErrorContext,
! 5732: "HPP: internal error, state == ENTITY_DECL\n");
1.1 veillard 5733: ctxt->instate = XML_PARSER_CONTENT;
5734: ctxt->checkIndex = 0;
5735: #ifdef DEBUG_PUSH
1.10 ! veillard 5736: xmlGenericError(xmlGenericErrorContext,
! 5737: "HPP: entering CONTENT\n");
1.1 veillard 5738: #endif
5739: break;
5740: case XML_PARSER_ENTITY_VALUE:
1.10 ! veillard 5741: xmlGenericError(xmlGenericErrorContext,
! 5742: "HPP: internal error, state == ENTITY_VALUE\n");
1.1 veillard 5743: ctxt->instate = XML_PARSER_CONTENT;
5744: ctxt->checkIndex = 0;
5745: #ifdef DEBUG_PUSH
1.10 ! veillard 5746: xmlGenericError(xmlGenericErrorContext,
! 5747: "HPP: entering DTD\n");
1.1 veillard 5748: #endif
5749: break;
5750: case XML_PARSER_ATTRIBUTE_VALUE:
1.10 ! veillard 5751: xmlGenericError(xmlGenericErrorContext,
! 5752: "HPP: internal error, state == ATTRIBUTE_VALUE\n");
1.1 veillard 5753: ctxt->instate = XML_PARSER_START_TAG;
5754: ctxt->checkIndex = 0;
5755: #ifdef DEBUG_PUSH
1.10 ! veillard 5756: xmlGenericError(xmlGenericErrorContext,
! 5757: "HPP: entering START_TAG\n");
1.1 veillard 5758: #endif
5759: break;
5760: case XML_PARSER_SYSTEM_LITERAL:
1.10 ! veillard 5761: xmlGenericError(xmlGenericErrorContext,
! 5762: "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n");
1.1 veillard 5763: ctxt->instate = XML_PARSER_CONTENT;
5764: ctxt->checkIndex = 0;
5765: #ifdef DEBUG_PUSH
1.10 ! veillard 5766: xmlGenericError(xmlGenericErrorContext,
! 5767: "HPP: entering CONTENT\n");
1.1 veillard 5768: #endif
5769: break;
5770: }
5771: }
5772: done:
5773: if ((avail == 0) && (terminate)) {
5774: sgmlAutoClose(ctxt, NULL);
5775: if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5776: /*
5777: * SAX: end of the document processing.
5778: */
5779: ctxt->instate = XML_PARSER_EOF;
5780: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5781: ctxt->sax->endDocument(ctxt->userData);
5782: }
5783: }
5784: if ((ctxt->myDoc != NULL) &&
5785: ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
5786: (ctxt->instate == XML_PARSER_EPILOG))) {
5787: xmlDtdPtr dtd;
5788: dtd = xmlGetIntSubset(ctxt->myDoc);
5789: if (dtd == NULL)
5790: ctxt->myDoc->intSubset =
5791: xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML",
5792: BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN",
5793: BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd");
5794: }
5795: #ifdef DEBUG_PUSH
1.10 ! veillard 5796: xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
1.1 veillard 5797: #endif
5798: return(ret);
5799: }
5800:
5801: /**
5802: * sgmlParseTry:
5803: * @ctxt: an SGML parser context
5804: *
5805: * Try to progress on parsing
5806: *
5807: * Returns zero if no parsing was possible
5808: */
5809: int
5810: sgmlParseTry(sgmlParserCtxtPtr ctxt) {
5811: return(sgmlParseTryOrFinish(ctxt, 0));
5812: }
5813:
5814: /**
5815: * sgmlParseChunk:
5816: * @ctxt: an XML parser context
5817: * @chunk: an char array
5818: * @size: the size in byte of the chunk
5819: * @terminate: last chunk indicator
5820: *
5821: * Parse a Chunk of memory
5822: *
5823: * Returns zero if no error, the xmlParserErrors otherwise.
5824: */
5825: int
5826: sgmlParseChunk(sgmlParserCtxtPtr ctxt, const char *chunk, int size,
5827: int terminate) {
5828: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5829: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
5830: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
5831: int cur = ctxt->input->cur - ctxt->input->base;
5832:
5833: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5834: ctxt->input->base = ctxt->input->buf->buffer->content + base;
5835: ctxt->input->cur = ctxt->input->base + cur;
5836: #ifdef DEBUG_PUSH
1.10 ! veillard 5837: xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
1.1 veillard 5838: #endif
5839:
5840: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
5841: sgmlParseTryOrFinish(ctxt, terminate);
5842: } else if (ctxt->instate != XML_PARSER_EOF) {
5843: xmlParserInputBufferPush(ctxt->input->buf, 0, "");
5844: sgmlParseTryOrFinish(ctxt, terminate);
5845: }
5846: if (terminate) {
5847: if ((ctxt->instate != XML_PARSER_EOF) &&
5848: (ctxt->instate != XML_PARSER_EPILOG) &&
5849: (ctxt->instate != XML_PARSER_MISC)) {
1.6 veillard 5850: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.1 veillard 5851: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5852: ctxt->sax->error(ctxt->userData,
5853: "Extra content at the end of the document\n");
5854: ctxt->wellFormed = 0;
5855: }
5856: if (ctxt->instate != XML_PARSER_EOF) {
5857: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5858: ctxt->sax->endDocument(ctxt->userData);
5859: }
5860: ctxt->instate = XML_PARSER_EOF;
5861: }
5862: return((xmlParserErrors) ctxt->errNo);
5863: }
5864:
5865: /************************************************************************
5866: * *
5867: * User entry points *
5868: * *
5869: ************************************************************************/
5870:
5871: /**
5872: * sgmlCreatePushParserCtxt :
5873: * @sax: a SAX handler
5874: * @user_data: The user data returned on SAX callbacks
5875: * @chunk: a pointer to an array of chars
5876: * @size: number of chars in the array
5877: * @filename: an optional file name or URI
5878: * @enc: an optional encoding
5879: *
5880: * Create a parser context for using the SGML parser in push mode
5881: * To allow content encoding detection, @size should be >= 4
5882: * The value of @filename is used for fetching external entities
5883: * and error/warning reports.
5884: *
5885: * Returns the new parser context or NULL
5886: */
5887: sgmlParserCtxtPtr
5888: sgmlCreatePushParserCtxt(sgmlSAXHandlerPtr sax, void *user_data,
5889: const char *chunk, int size, const char *filename,
5890: xmlCharEncoding enc) {
5891: sgmlParserCtxtPtr ctxt;
5892: sgmlParserInputPtr inputStream;
5893: xmlParserInputBufferPtr buf;
5894:
5895: buf = xmlAllocParserInputBuffer(enc);
5896: if (buf == NULL) return(NULL);
5897:
5898: ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
5899: if (ctxt == NULL) {
5900: xmlFree(buf);
5901: return(NULL);
5902: }
5903: memset(ctxt, 0, sizeof(sgmlParserCtxt));
5904: sgmlInitParserCtxt(ctxt);
5905: if (sax != NULL) {
5906: if (ctxt->sax != &sgmlDefaultSAXHandler)
5907: xmlFree(ctxt->sax);
5908: ctxt->sax = (sgmlSAXHandlerPtr) xmlMalloc(sizeof(sgmlSAXHandler));
5909: if (ctxt->sax == NULL) {
5910: xmlFree(buf);
5911: xmlFree(ctxt);
5912: return(NULL);
5913: }
5914: memcpy(ctxt->sax, sax, sizeof(sgmlSAXHandler));
5915: if (user_data != NULL)
5916: ctxt->userData = user_data;
5917: }
5918: if (filename == NULL) {
5919: ctxt->directory = NULL;
5920: } else {
5921: ctxt->directory = xmlParserGetDirectory(filename);
5922: }
5923:
5924: inputStream = sgmlNewInputStream(ctxt);
5925: if (inputStream == NULL) {
5926: xmlFreeParserCtxt(ctxt);
5927: return(NULL);
5928: }
5929:
5930: if (filename == NULL)
5931: inputStream->filename = NULL;
5932: else
5933: inputStream->filename = xmlMemStrdup(filename);
5934: inputStream->buf = buf;
5935: inputStream->base = inputStream->buf->buffer->content;
5936: inputStream->cur = inputStream->buf->buffer->content;
5937:
5938: inputPush(ctxt, inputStream);
5939:
5940: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5941: (ctxt->input->buf != NULL)) {
5942: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5943: #ifdef DEBUG_PUSH
1.10 ! veillard 5944: xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
1.1 veillard 5945: #endif
5946: }
5947:
5948: return(ctxt);
5949: }
5950:
5951: /**
5952: * sgmlSAXParseDoc :
5953: * @cur: a pointer to an array of xmlChar
5954: * @encoding: a free form C string describing the SGML document encoding, or NULL
5955: * @sax: the SAX handler block
5956: * @userData: if using SAX, this pointer will be provided on callbacks.
5957: *
5958: * parse an SGML in-memory document and build a tree.
5959: * It use the given SAX function block to handle the parsing callback.
5960: * If sax is NULL, fallback to the default DOM tree building routines.
5961: *
5962: * Returns the resulting document tree
5963: */
5964:
5965: sgmlDocPtr
5966: sgmlSAXParseDoc(xmlChar *cur, const char *encoding, sgmlSAXHandlerPtr sax, void *userData) {
5967: sgmlDocPtr ret;
5968: sgmlParserCtxtPtr ctxt;
5969:
5970: if (cur == NULL) return(NULL);
5971:
5972:
5973: ctxt = sgmlCreateDocParserCtxt(cur, encoding);
5974: if (ctxt == NULL) return(NULL);
5975: if (sax != NULL) {
5976: ctxt->sax = sax;
5977: ctxt->userData = userData;
5978: }
5979:
5980: sgmlParseDocument(ctxt);
5981: ret = ctxt->myDoc;
5982: if (sax != NULL) {
5983: ctxt->sax = NULL;
5984: ctxt->userData = NULL;
5985: }
5986: sgmlFreeParserCtxt(ctxt);
5987:
5988: return(ret);
5989: }
5990:
5991: /**
5992: * sgmlParseDoc :
5993: * @cur: a pointer to an array of xmlChar
5994: * @encoding: a free form C string describing the SGML document encoding, or NULL
5995: *
5996: * parse an SGML in-memory document and build a tree.
5997: *
5998: * Returns the resulting document tree
5999: */
6000:
6001: sgmlDocPtr
6002: sgmlParseDoc(xmlChar *cur, const char *encoding) {
6003: return(sgmlSAXParseDoc(cur, encoding, NULL, NULL));
6004: }
6005:
6006:
6007: /**
6008: * sgmlCreateFileParserCtxt :
6009: * @filename: the filename
6010: * @encoding: a free form C string describing the SGML document encoding, or NULL
6011: *
6012: * Create a parser context for a file content.
6013: * Automatic support for ZLIB/Compress compressed document is provided
6014: * by default if found at compile-time.
6015: *
6016: * Returns the new parser context or NULL
6017: */
6018: sgmlParserCtxtPtr
6019: sgmlCreateFileParserCtxt(const char *filename, const char *encoding)
6020: {
6021: sgmlParserCtxtPtr ctxt;
6022: sgmlParserInputPtr inputStream;
6023: xmlParserInputBufferPtr buf;
6024: /* sgmlCharEncoding enc; */
6025:
6026: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
6027: if (buf == NULL) return(NULL);
6028:
6029: ctxt = (sgmlParserCtxtPtr) xmlMalloc(sizeof(sgmlParserCtxt));
6030: if (ctxt == NULL) {
6031: perror("malloc");
6032: return(NULL);
6033: }
6034: memset(ctxt, 0, sizeof(sgmlParserCtxt));
6035: sgmlInitParserCtxt(ctxt);
6036: inputStream = (sgmlParserInputPtr) xmlMalloc(sizeof(sgmlParserInput));
6037: if (inputStream == NULL) {
6038: perror("malloc");
6039: xmlFree(ctxt);
6040: return(NULL);
6041: }
6042: memset(inputStream, 0, sizeof(sgmlParserInput));
6043:
6044: inputStream->filename = xmlMemStrdup(filename);
6045: inputStream->line = 1;
6046: inputStream->col = 1;
6047: inputStream->buf = buf;
6048: inputStream->directory = NULL;
6049:
6050: inputStream->base = inputStream->buf->buffer->content;
6051: inputStream->cur = inputStream->buf->buffer->content;
6052: inputStream->free = NULL;
6053:
6054: inputPush(ctxt, inputStream);
6055: return(ctxt);
6056: }
6057:
6058: /**
6059: * sgmlSAXParseFile :
6060: * @filename: the filename
6061: * @encoding: a free form C string describing the SGML document encoding, or NULL
6062: * @sax: the SAX handler block
6063: * @userData: if using SAX, this pointer will be provided on callbacks.
6064: *
6065: * parse an SGML file and build a tree. Automatic support for ZLIB/Compress
6066: * compressed document is provided by default if found at compile-time.
6067: * It use the given SAX function block to handle the parsing callback.
6068: * If sax is NULL, fallback to the default DOM tree building routines.
6069: *
6070: * Returns the resulting document tree
6071: */
6072:
6073: sgmlDocPtr
6074: sgmlSAXParseFile(const char *filename, const char *encoding, sgmlSAXHandlerPtr sax,
6075: void *userData) {
6076: sgmlDocPtr ret;
6077: sgmlParserCtxtPtr ctxt;
6078: sgmlSAXHandlerPtr oldsax = NULL;
6079:
6080: ctxt = sgmlCreateFileParserCtxt(filename, encoding);
6081: if (ctxt == NULL) return(NULL);
6082: if (sax != NULL) {
6083: oldsax = ctxt->sax;
6084: ctxt->sax = sax;
6085: ctxt->userData = userData;
6086: }
6087:
6088: sgmlParseDocument(ctxt);
6089:
6090: ret = ctxt->myDoc;
6091: if (sax != NULL) {
6092: ctxt->sax = oldsax;
6093: ctxt->userData = NULL;
6094: }
6095: sgmlFreeParserCtxt(ctxt);
6096:
6097: return(ret);
6098: }
6099:
6100: /**
6101: * sgmlParseFile :
6102: * @filename: the filename
6103: * @encoding: a free form C string describing the SGML document encoding, or NULL
6104: *
6105: * parse an SGML file and build a tree. Automatic support for ZLIB/Compress
6106: * compressed document is provided by default if found at compile-time.
6107: *
6108: * Returns the resulting document tree
6109: */
6110:
6111: sgmlDocPtr
6112: sgmlParseFile(const char *filename, const char *encoding) {
6113: return(sgmlSAXParseFile(filename, encoding, NULL, NULL));
6114: }
6115:
6116: #endif /* LIBXML_SGML_ENABLED */
Webmaster