Annotation of XML/parserInternals.c, revision 1.11
1.1 veillard 1: /*
2: * parser.c : Internal routines (and obsolete ones) needed for the
3: * XML and HTML parsers.
4: *
5: * See Copyright for the status of this software.
6: *
7: * Daniel.Veillard@w3.org
8: */
9:
10: #ifdef WIN32
11: #include "win32config.h"
12: #define XML_DIR_SEP '\\'
13: #else
14: #include "config.h"
15: #define XML_DIR_SEP '/'
16: #endif
17:
18: #include <stdio.h>
19: #include <string.h>
20: #ifdef HAVE_CTYPE_H
21: #include <ctype.h>
22: #endif
23: #ifdef HAVE_STDLIB_H
24: #include <stdlib.h>
25: #endif
26: #ifdef HAVE_SYS_STAT_H
27: #include <sys/stat.h>
28: #endif
29: #ifdef HAVE_FCNTL_H
30: #include <fcntl.h>
31: #endif
32: #ifdef HAVE_UNISTD_H
33: #include <unistd.h>
34: #endif
35: #ifdef HAVE_ZLIB_H
36: #include <zlib.h>
37: #endif
38:
39: #include <libxml/xmlmemory.h>
40: #include <libxml/tree.h>
41: #include <libxml/parser.h>
1.5 veillard 42: #include <libxml/parserInternals.h>
43: #include <libxml/valid.h>
1.1 veillard 44: #include <libxml/entities.h>
1.5 veillard 45: #include <libxml/xmlerror.h>
1.1 veillard 46: #include <libxml/encoding.h>
47: #include <libxml/valid.h>
48: #include <libxml/xmlIO.h>
49: #include <libxml/uri.h>
50:
51:
52: /************************************************************************
53: * *
54: * Version and Features handling *
55: * *
56: ************************************************************************/
57: const char *xmlParserVersion = LIBXML_VERSION_STRING;
58:
59: /*
60: * xmlCheckVersion:
61: * @version: the include version number
62: *
63: * check the compiled lib version against the include one.
64: * This can warn or immediately kill the application
65: */
66: void
67: xmlCheckVersion(int version) {
68: int myversion = (int) LIBXML_VERSION;
69:
70: if ((myversion / 10000) != (version / 10000)) {
1.7 veillard 71: xmlGenericError(xmlGenericErrorContext,
1.1 veillard 72: "Fatal: program compiled against libxml %d using libxml %d\n",
73: (version / 10000), (myversion / 10000));
74: exit(1);
75: }
76: if ((myversion / 100) < (version / 100)) {
1.7 veillard 77: xmlGenericError(xmlGenericErrorContext,
1.1 veillard 78: "Warning: program compiled against libxml %d using older %d\n",
79: (version / 100), (myversion / 100));
80: }
81: }
82:
83:
84: const char *xmlFeaturesList[] = {
85: "validate",
86: "keep blanks",
87: "disable SAX",
88: "fetch external entities",
89: "substitute entities",
90: "gather line info",
91: "user data",
92: "is html",
93: "is standalone",
94: "stop parser",
95: "document",
96: "is well formed",
97: "is valid",
98: "SAX block",
99: "SAX function internalSubset",
100: "SAX function isStandalone",
101: "SAX function hasInternalSubset",
102: "SAX function hasExternalSubset",
103: "SAX function resolveEntity",
104: "SAX function getEntity",
105: "SAX function entityDecl",
106: "SAX function notationDecl",
107: "SAX function attributeDecl",
108: "SAX function elementDecl",
109: "SAX function unparsedEntityDecl",
110: "SAX function setDocumentLocator",
111: "SAX function startDocument",
112: "SAX function endDocument",
113: "SAX function startElement",
114: "SAX function endElement",
115: "SAX function reference",
116: "SAX function characters",
117: "SAX function ignorableWhitespace",
118: "SAX function processingInstruction",
119: "SAX function comment",
120: "SAX function warning",
121: "SAX function error",
122: "SAX function fatalError",
123: "SAX function getParameterEntity",
124: "SAX function cdataBlock",
125: "SAX function externalSubset",
126: };
127:
128: /*
129: * xmlGetFeaturesList:
130: * @len: the length of the features name array (input/output)
131: * @result: an array of string to be filled with the features name.
132: *
133: * Copy at most *@len feature names into the @result array
134: *
135: * Returns -1 in case or error, or the total number of features,
136: * len is updated with the number of strings copied,
137: * strings must not be deallocated
138: */
139: int
140: xmlGetFeaturesList(int *len, const char **result) {
141: int ret, i;
142:
143: ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
144: if ((len == NULL) || (result == NULL))
145: return(ret);
146: if ((*len < 0) || (*len >= 1000))
147: return(-1);
148: if (*len > ret)
149: *len = ret;
150: for (i = 0;i < *len;i++)
151: result[i] = xmlFeaturesList[i];
152: return(ret);
153: }
154:
155: /*
156: * xmlGetFeature:
157: * @ctxt: an XML/HTML parser context
158: * @name: the feature name
159: * @result: location to store the result
160: *
161: * Read the current value of one feature of this parser instance
162: *
163: * Returns -1 in case or error, 0 otherwise
164: */
165: int
166: xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
167: if ((ctxt == NULL) || (name == NULL) || (result == NULL))
168: return(-1);
169:
170: if (!strcmp(name, "validate")) {
171: *((int *) result) = ctxt->validate;
172: } else if (!strcmp(name, "keep blanks")) {
173: *((int *) result) = ctxt->keepBlanks;
174: } else if (!strcmp(name, "disable SAX")) {
175: *((int *) result) = ctxt->disableSAX;
176: } else if (!strcmp(name, "fetch external entities")) {
177: *((int *) result) = ctxt->validate;
178: } else if (!strcmp(name, "substitute entities")) {
179: *((int *) result) = ctxt->replaceEntities;
180: } else if (!strcmp(name, "gather line info")) {
181: *((int *) result) = ctxt->record_info;
182: } else if (!strcmp(name, "user data")) {
183: *((void **)result) = ctxt->userData;
184: } else if (!strcmp(name, "is html")) {
185: *((int *) result) = ctxt->html;
186: } else if (!strcmp(name, "is standalone")) {
187: *((int *) result) = ctxt->standalone;
188: } else if (!strcmp(name, "document")) {
189: *((xmlDocPtr *) result) = ctxt->myDoc;
190: } else if (!strcmp(name, "is well formed")) {
191: *((int *) result) = ctxt->wellFormed;
192: } else if (!strcmp(name, "is valid")) {
193: *((int *) result) = ctxt->valid;
194: } else if (!strcmp(name, "SAX block")) {
195: *((xmlSAXHandlerPtr *) result) = ctxt->sax;
196: } else if (!strcmp(name, "SAX function internalSubset")) {
197: *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
198: } else if (!strcmp(name, "SAX function isStandalone")) {
199: *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
200: } else if (!strcmp(name, "SAX function hasInternalSubset")) {
201: *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
202: } else if (!strcmp(name, "SAX function hasExternalSubset")) {
203: *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
204: } else if (!strcmp(name, "SAX function resolveEntity")) {
205: *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
206: } else if (!strcmp(name, "SAX function getEntity")) {
207: *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
208: } else if (!strcmp(name, "SAX function entityDecl")) {
209: *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
210: } else if (!strcmp(name, "SAX function notationDecl")) {
211: *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
212: } else if (!strcmp(name, "SAX function attributeDecl")) {
213: *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
214: } else if (!strcmp(name, "SAX function elementDecl")) {
215: *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
216: } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
217: *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
218: } else if (!strcmp(name, "SAX function setDocumentLocator")) {
219: *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
220: } else if (!strcmp(name, "SAX function startDocument")) {
221: *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
222: } else if (!strcmp(name, "SAX function endDocument")) {
223: *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
224: } else if (!strcmp(name, "SAX function startElement")) {
225: *((startElementSAXFunc *) result) = ctxt->sax->startElement;
226: } else if (!strcmp(name, "SAX function endElement")) {
227: *((endElementSAXFunc *) result) = ctxt->sax->endElement;
228: } else if (!strcmp(name, "SAX function reference")) {
229: *((referenceSAXFunc *) result) = ctxt->sax->reference;
230: } else if (!strcmp(name, "SAX function characters")) {
231: *((charactersSAXFunc *) result) = ctxt->sax->characters;
232: } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
233: *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
234: } else if (!strcmp(name, "SAX function processingInstruction")) {
235: *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
236: } else if (!strcmp(name, "SAX function comment")) {
237: *((commentSAXFunc *) result) = ctxt->sax->comment;
238: } else if (!strcmp(name, "SAX function warning")) {
239: *((warningSAXFunc *) result) = ctxt->sax->warning;
240: } else if (!strcmp(name, "SAX function error")) {
241: *((errorSAXFunc *) result) = ctxt->sax->error;
242: } else if (!strcmp(name, "SAX function fatalError")) {
243: *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
244: } else if (!strcmp(name, "SAX function getParameterEntity")) {
245: *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
246: } else if (!strcmp(name, "SAX function cdataBlock")) {
247: *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
248: } else if (!strcmp(name, "SAX function externalSubset")) {
249: *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
250: } else {
251: return(-1);
252: }
253: return(0);
254: }
255:
256: /*
257: * xmlSetFeature:
258: * @ctxt: an XML/HTML parser context
259: * @name: the feature name
260: * @value: pointer to the location of the new value
261: *
262: * Change the current value of one feature of this parser instance
263: *
264: * Returns -1 in case or error, 0 otherwise
265: */
266: int
267: xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
268: if ((ctxt == NULL) || (name == NULL) || (value == NULL))
269: return(-1);
270:
271: if (!strcmp(name, "validate")) {
272: ctxt->validate = *((int *) value);
273: } else if (!strcmp(name, "keep blanks")) {
274: ctxt->keepBlanks = *((int *) value);
275: } else if (!strcmp(name, "disable SAX")) {
276: ctxt->disableSAX = *((int *) value);
277: } else if (!strcmp(name, "fetch external entities")) {
278: int newvalid = *((int *) value);
279: if ((!ctxt->validate) && (newvalid != 0)) {
280: if (ctxt->vctxt.warning == NULL)
281: ctxt->vctxt.warning = xmlParserValidityWarning;
282: if (ctxt->vctxt.error == NULL)
283: ctxt->vctxt.error = xmlParserValidityError;
284: /* Allocate the Node stack */
285: ctxt->vctxt.nodeTab = (xmlNodePtr *)
286: xmlMalloc(4 * sizeof(xmlNodePtr));
287: if (ctxt->vctxt.nodeTab == NULL) {
288: ctxt->vctxt.nodeMax = 0;
289: ctxt->validate = 0;
290: return(-1);
291: }
292: ctxt->vctxt.nodeNr = 0;
293: ctxt->vctxt.nodeMax = 4;
294: ctxt->vctxt.node = NULL;
295: }
296: ctxt->validate = newvalid;
297: } else if (!strcmp(name, "substitute entities")) {
298: ctxt->replaceEntities = *((int *) value);
299: } else if (!strcmp(name, "gather line info")) {
300: ctxt->record_info = *((int *) value);
301: } else if (!strcmp(name, "user data")) {
302: ctxt->userData = *((void **)value);
303: } else if (!strcmp(name, "is html")) {
304: ctxt->html = *((int *) value);
305: } else if (!strcmp(name, "is standalone")) {
306: ctxt->standalone = *((int *) value);
307: } else if (!strcmp(name, "document")) {
308: ctxt->myDoc = *((xmlDocPtr *) value);
309: } else if (!strcmp(name, "is well formed")) {
310: ctxt->wellFormed = *((int *) value);
311: } else if (!strcmp(name, "is valid")) {
312: ctxt->valid = *((int *) value);
313: } else if (!strcmp(name, "SAX block")) {
314: ctxt->sax = *((xmlSAXHandlerPtr *) value);
315: } else if (!strcmp(name, "SAX function internalSubset")) {
316: ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
317: } else if (!strcmp(name, "SAX function isStandalone")) {
318: ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
319: } else if (!strcmp(name, "SAX function hasInternalSubset")) {
320: ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
321: } else if (!strcmp(name, "SAX function hasExternalSubset")) {
322: ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
323: } else if (!strcmp(name, "SAX function resolveEntity")) {
324: ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
325: } else if (!strcmp(name, "SAX function getEntity")) {
326: ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
327: } else if (!strcmp(name, "SAX function entityDecl")) {
328: ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
329: } else if (!strcmp(name, "SAX function notationDecl")) {
330: ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
331: } else if (!strcmp(name, "SAX function attributeDecl")) {
332: ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
333: } else if (!strcmp(name, "SAX function elementDecl")) {
334: ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
335: } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
336: ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
337: } else if (!strcmp(name, "SAX function setDocumentLocator")) {
338: ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
339: } else if (!strcmp(name, "SAX function startDocument")) {
340: ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
341: } else if (!strcmp(name, "SAX function endDocument")) {
342: ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
343: } else if (!strcmp(name, "SAX function startElement")) {
344: ctxt->sax->startElement = *((startElementSAXFunc *) value);
345: } else if (!strcmp(name, "SAX function endElement")) {
346: ctxt->sax->endElement = *((endElementSAXFunc *) value);
347: } else if (!strcmp(name, "SAX function reference")) {
348: ctxt->sax->reference = *((referenceSAXFunc *) value);
349: } else if (!strcmp(name, "SAX function characters")) {
350: ctxt->sax->characters = *((charactersSAXFunc *) value);
351: } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
352: ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
353: } else if (!strcmp(name, "SAX function processingInstruction")) {
354: ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
355: } else if (!strcmp(name, "SAX function comment")) {
356: ctxt->sax->comment = *((commentSAXFunc *) value);
357: } else if (!strcmp(name, "SAX function warning")) {
358: ctxt->sax->warning = *((warningSAXFunc *) value);
359: } else if (!strcmp(name, "SAX function error")) {
360: ctxt->sax->error = *((errorSAXFunc *) value);
361: } else if (!strcmp(name, "SAX function fatalError")) {
362: ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
363: } else if (!strcmp(name, "SAX function getParameterEntity")) {
364: ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
365: } else if (!strcmp(name, "SAX function cdataBlock")) {
366: ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
367: } else if (!strcmp(name, "SAX function externalSubset")) {
368: ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
369: } else {
370: return(-1);
371: }
372: return(0);
373: }
374:
375: /************************************************************************
376: * *
377: * Some functions to avoid too large macros *
378: * *
379: ************************************************************************/
380:
381: /**
382: * xmlIsChar:
383: * @c: an unicode character (int)
384: *
385: * Check whether the character is allowed by the production
386: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
387: * | [#x10000-#x10FFFF]
388: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
389: * Also available as a macro IS_CHAR()
390: *
391: * Returns 0 if not, non-zero otherwise
392: */
393: int
394: xmlIsChar(int c) {
395: return(
396: ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
397: (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
398: (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
399: (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
400: }
401:
402: /**
403: * xmlIsBlank:
404: * @c: an unicode character (int)
405: *
406: * Check whether the character is allowed by the production
407: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
408: * Also available as a macro IS_BLANK()
409: *
410: * Returns 0 if not, non-zero otherwise
411: */
412: int
413: xmlIsBlank(int c) {
414: return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
415: }
416:
417: /**
418: * xmlIsBaseChar:
419: * @c: an unicode character (int)
420: *
421: * Check whether the character is allowed by the production
422: * [85] BaseChar ::= ... long list see REC ...
423: *
424: * VI is your friend !
425: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
426: * and
427: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
428: *
429: * Returns 0 if not, non-zero otherwise
430: */
1.11 ! veillard 431: static int xmlBaseArray[] = {
! 432: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
! 433: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
! 434: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
! 435: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
! 436: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
! 437: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
! 438: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
! 439: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
! 440: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
! 441: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
! 442: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
! 443: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
! 444: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
! 445: 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
! 446: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
! 447: 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
! 448: };
! 449:
1.1 veillard 450: int
451: xmlIsBaseChar(int c) {
452: return(
1.11 ! veillard 453: (((c) < 0x0100) ? xmlBaseArray[c] :
! 454: ( /* accelerator */
1.1 veillard 455: (((c) >= 0x0100) && ((c) <= 0x0131)) ||
456: (((c) >= 0x0134) && ((c) <= 0x013E)) ||
457: (((c) >= 0x0141) && ((c) <= 0x0148)) ||
458: (((c) >= 0x014A) && ((c) <= 0x017E)) ||
459: (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
460: (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
461: (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
462: (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
463: (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
464: (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
465: ((c) == 0x0386) ||
466: (((c) >= 0x0388) && ((c) <= 0x038A)) ||
467: ((c) == 0x038C) ||
468: (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
469: (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
470: (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
471: ((c) == 0x03DA) ||
472: ((c) == 0x03DC) ||
473: ((c) == 0x03DE) ||
474: ((c) == 0x03E0) ||
475: (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
476: (((c) >= 0x0401) && ((c) <= 0x040C)) ||
477: (((c) >= 0x040E) && ((c) <= 0x044F)) ||
478: (((c) >= 0x0451) && ((c) <= 0x045C)) ||
479: (((c) >= 0x045E) && ((c) <= 0x0481)) ||
480: (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
481: (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
482: (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
483: (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
484: (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
485: (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
486: (((c) >= 0x0531) && ((c) <= 0x0556)) ||
487: ((c) == 0x0559) ||
488: (((c) >= 0x0561) && ((c) <= 0x0586)) ||
489: (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
490: (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
491: (((c) >= 0x0621) && ((c) <= 0x063A)) ||
492: (((c) >= 0x0641) && ((c) <= 0x064A)) ||
493: (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
494: (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
495: (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
496: (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
497: ((c) == 0x06D5) ||
498: (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
1.10 veillard 499: (((c) >= 0x905) && ( /* accelerator */
1.1 veillard 500: (((c) >= 0x0905) && ((c) <= 0x0939)) ||
501: ((c) == 0x093D) ||
502: (((c) >= 0x0958) && ((c) <= 0x0961)) ||
503: (((c) >= 0x0985) && ((c) <= 0x098C)) ||
504: (((c) >= 0x098F) && ((c) <= 0x0990)) ||
505: (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
506: (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
507: ((c) == 0x09B2) ||
508: (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
509: (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
510: (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
511: (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
512: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
513: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
514: (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
515: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
516: (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
517: (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
518: (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
519: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
520: ((c) == 0x0A5E) ||
521: (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
522: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
523: ((c) == 0x0A8D) ||
524: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
525: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
526: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
527: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
528: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
529: ((c) == 0x0ABD) ||
530: ((c) == 0x0AE0) ||
531: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
532: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
533: (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
534: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
535: (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
536: (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
537: ((c) == 0x0B3D) ||
538: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
539: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
540: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
541: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
542: (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
543: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
544: ((c) == 0x0B9C) ||
545: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
546: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
547: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
548: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
549: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
550: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
551: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
552: (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
553: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
554: (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
555: (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
556: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
557: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
558: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
559: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
560: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
561: ((c) == 0x0CDE) ||
562: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
563: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
564: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
565: (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
566: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
567: (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
568: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
569: ((c) == 0x0E30) ||
570: (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
571: (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
572: (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
573: ((c) == 0x0E84) ||
574: (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
575: ((c) == 0x0E8A) ||
576: ((c) == 0x0E8D) ||
577: (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
578: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
579: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
580: ((c) == 0x0EA5) ||
581: ((c) == 0x0EA7) ||
582: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
583: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
584: ((c) == 0x0EB0) ||
585: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
586: ((c) == 0x0EBD) ||
587: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
588: (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
589: (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
1.10 veillard 590: (((c) >= 0x10A0) && ( /* accelerator */
1.1 veillard 591: (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
592: (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
593: ((c) == 0x1100) ||
594: (((c) >= 0x1102) && ((c) <= 0x1103)) ||
595: (((c) >= 0x1105) && ((c) <= 0x1107)) ||
596: ((c) == 0x1109) ||
597: (((c) >= 0x110B) && ((c) <= 0x110C)) ||
598: (((c) >= 0x110E) && ((c) <= 0x1112)) ||
599: ((c) == 0x113C) ||
600: ((c) == 0x113E) ||
601: ((c) == 0x1140) ||
602: ((c) == 0x114C) ||
603: ((c) == 0x114E) ||
604: ((c) == 0x1150) ||
605: (((c) >= 0x1154) && ((c) <= 0x1155)) ||
606: ((c) == 0x1159) ||
607: (((c) >= 0x115F) && ((c) <= 0x1161)) ||
608: ((c) == 0x1163) ||
609: ((c) == 0x1165) ||
610: ((c) == 0x1167) ||
611: ((c) == 0x1169) ||
612: (((c) >= 0x116D) && ((c) <= 0x116E)) ||
613: (((c) >= 0x1172) && ((c) <= 0x1173)) ||
614: ((c) == 0x1175) ||
615: ((c) == 0x119E) ||
616: ((c) == 0x11A8) ||
617: ((c) == 0x11AB) ||
618: (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
619: (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
620: ((c) == 0x11BA) ||
621: (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
622: ((c) == 0x11EB) ||
623: ((c) == 0x11F0) ||
624: ((c) == 0x11F9) ||
625: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
626: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
627: (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
628: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
629: (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
630: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
631: (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
632: ((c) == 0x1F59) ||
633: ((c) == 0x1F5B) ||
634: ((c) == 0x1F5D) ||
635: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
636: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
637: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
638: ((c) == 0x1FBE) ||
639: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
640: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
641: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
642: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
643: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
644: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
645: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
646: ((c) == 0x2126) ||
647: (((c) >= 0x212A) && ((c) <= 0x212B)) ||
648: ((c) == 0x212E) ||
649: (((c) >= 0x2180) && ((c) <= 0x2182)) ||
650: (((c) >= 0x3041) && ((c) <= 0x3094)) ||
651: (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
652: (((c) >= 0x3105) && ((c) <= 0x312C)) ||
653: (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ ))))));
654: }
655:
656: /**
657: * xmlIsDigit:
658: * @c: an unicode character (int)
659: *
660: * Check whether the character is allowed by the production
661: * [88] Digit ::= ... long list see REC ...
662: *
663: * Returns 0 if not, non-zero otherwise
664: */
665: int
666: xmlIsDigit(int c) {
667: return(
668: (((c) >= 0x0030) && ((c) <= 0x0039)) ||
669: (((c) >= 0x660) && ( /* accelerator */
670: (((c) >= 0x0660) && ((c) <= 0x0669)) ||
671: (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
672: (((c) >= 0x0966) && ((c) <= 0x096F)) ||
673: (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
674: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
675: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
676: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
677: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
678: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
679: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
680: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
681: (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
682: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
683: (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
684: }
685:
686: /**
687: * xmlIsCombining:
688: * @c: an unicode character (int)
689: *
690: * Check whether the character is allowed by the production
691: * [87] CombiningChar ::= ... long list see REC ...
692: *
693: * Returns 0 if not, non-zero otherwise
694: */
695: int
696: xmlIsCombining(int c) {
697: return(
698: (((c) >= 0x300) && ( /* accelerator */
699: (((c) >= 0x0300) && ((c) <= 0x0345)) ||
700: (((c) >= 0x0360) && ((c) <= 0x0361)) ||
701: (((c) >= 0x0483) && ((c) <= 0x0486)) ||
702: (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
703: (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
704: (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
705: ((c) == 0x05BF) ||
706: (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
707: ((c) == 0x05C4) ||
708: (((c) >= 0x064B) && ((c) <= 0x0652)) ||
709: ((c) == 0x0670) ||
710: (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
711: (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
712: (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
713: (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
714: (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
1.10 veillard 715: (((c) >= 0x0901) && ( /* accelerator */
1.1 veillard 716: (((c) >= 0x0901) && ((c) <= 0x0903)) ||
717: ((c) == 0x093C) ||
718: (((c) >= 0x093E) && ((c) <= 0x094C)) ||
719: ((c) == 0x094D) ||
720: (((c) >= 0x0951) && ((c) <= 0x0954)) ||
721: (((c) >= 0x0962) && ((c) <= 0x0963)) ||
722: (((c) >= 0x0981) && ((c) <= 0x0983)) ||
723: ((c) == 0x09BC) ||
724: ((c) == 0x09BE) ||
725: ((c) == 0x09BF) ||
726: (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
727: (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
728: (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
729: ((c) == 0x09D7) ||
730: (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
1.10 veillard 731: (((c) >= 0x0A02) && ( /* accelerator */
1.1 veillard 732: ((c) == 0x0A02) ||
733: ((c) == 0x0A3C) ||
734: ((c) == 0x0A3E) ||
735: ((c) == 0x0A3F) ||
736: (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
737: (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
738: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
739: (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
740: (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
741: ((c) == 0x0ABC) ||
742: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
743: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
744: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
745: (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
746: ((c) == 0x0B3C) ||
747: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
748: (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
749: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
750: (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
751: (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
752: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
753: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
754: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
755: ((c) == 0x0BD7) ||
756: (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
757: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
758: (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
759: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
760: (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
761: (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
762: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
763: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
764: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
765: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
766: (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
767: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
768: (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
769: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
770: ((c) == 0x0D57) ||
1.10 veillard 771: (((c) >= 0x0E31) && ( /* accelerator */
1.1 veillard 772: ((c) == 0x0E31) ||
773: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
774: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
775: ((c) == 0x0EB1) ||
776: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
777: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
778: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
779: (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
780: ((c) == 0x0F35) ||
781: ((c) == 0x0F37) ||
782: ((c) == 0x0F39) ||
783: ((c) == 0x0F3E) ||
784: ((c) == 0x0F3F) ||
785: (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
786: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
787: (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
788: ((c) == 0x0F97) ||
789: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
790: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
791: ((c) == 0x0FB9) ||
792: (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
793: ((c) == 0x20E1) ||
794: (((c) >= 0x302A) && ((c) <= 0x302F)) ||
795: ((c) == 0x3099) ||
796: ((c) == 0x309A))))))))));
797: }
798:
799: /**
800: * xmlIsExtender:
801: * @c: an unicode character (int)
802: *
803: * Check whether the character is allowed by the production
804: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
805: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
806: * [#x309D-#x309E] | [#x30FC-#x30FE]
807: *
808: * Returns 0 if not, non-zero otherwise
809: */
810: int
811: xmlIsExtender(int c) {
1.11 ! veillard 812: switch (c) {
! 813: case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
! 814: case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
! 815: case 0x3031: case 0x3032: case 0x3033: case 0x3034:
! 816: case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
! 817: case 0x30FE:
! 818: return 1;
! 819: default:
! 820: return 0;
! 821: }
1.1 veillard 822: }
823:
824: /**
825: * xmlIsIdeographic:
826: * @c: an unicode character (int)
827: *
828: * Check whether the character is allowed by the production
829: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
830: *
831: * Returns 0 if not, non-zero otherwise
832: */
833: int
834: xmlIsIdeographic(int c) {
1.11 ! veillard 835: return(((c) < 0x0100) ? 0 :
1.1 veillard 836: (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
837: (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
838: (((c) >= 0x3021) && ((c) <= 0x3029)) ||
839: ((c) == 0x3007));
840: }
841:
842: /**
843: * xmlIsLetter:
844: * @c: an unicode character (int)
845: *
846: * Check whether the character is allowed by the production
847: * [84] Letter ::= BaseChar | Ideographic
848: *
849: * Returns 0 if not, non-zero otherwise
850: */
851: int
852: xmlIsLetter(int c) {
853: return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
854: }
855:
856: /**
857: * xmlIsPubidChar:
858: * @c: an unicode character (int)
859: *
860: * Check whether the character is allowed by the production
861: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
862: *
863: * Returns 0 if not, non-zero otherwise
864: */
865: int
866: xmlIsPubidChar(int c) {
867: return(
868: ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
869: (((c) >= 'a') && ((c) <= 'z')) ||
870: (((c) >= 'A') && ((c) <= 'Z')) ||
871: (((c) >= '0') && ((c) <= '9')) ||
872: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
873: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
874: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
875: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
876: ((c) == '$') || ((c) == '_') || ((c) == '%'));
877: }
878:
879: /************************************************************************
880: * *
881: * Input handling functions for progressive parsing *
882: * *
883: ************************************************************************/
884:
885: /* #define DEBUG_INPUT */
886: /* #define DEBUG_STACK */
887: /* #define DEBUG_PUSH */
888:
889:
890: /* we need to keep enough input to show errors in context */
891: #define LINE_LEN 80
892:
893: #ifdef DEBUG_INPUT
894: #define CHECK_BUFFER(in) check_buffer(in)
895:
896: void check_buffer(xmlParserInputPtr in) {
897: if (in->base != in->buf->buffer->content) {
1.7 veillard 898: xmlGenericError(xmlGenericErrorContext,
899: "xmlParserInput: base mismatch problem\n");
1.1 veillard 900: }
901: if (in->cur < in->base) {
1.7 veillard 902: xmlGenericError(xmlGenericErrorContext,
903: "xmlParserInput: cur < base problem\n");
1.1 veillard 904: }
905: if (in->cur > in->base + in->buf->buffer->use) {
1.7 veillard 906: xmlGenericError(xmlGenericErrorContext,
907: "xmlParserInput: cur > base + use problem\n");
1.1 veillard 908: }
1.7 veillard 909: xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
1.1 veillard 910: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
911: in->buf->buffer->use, in->buf->buffer->size);
912: }
913:
914: #else
915: #define CHECK_BUFFER(in)
916: #endif
917:
918:
919: /**
920: * xmlParserInputRead:
921: * @in: an XML parser input
922: * @len: an indicative size for the lookahead
923: *
924: * This function refresh the input for the parser. It doesn't try to
925: * preserve pointers to the input buffer, and discard already read data
926: *
927: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
928: * end of this entity
929: */
930: int
931: xmlParserInputRead(xmlParserInputPtr in, int len) {
932: int ret;
933: int used;
934: int index;
935:
936: #ifdef DEBUG_INPUT
1.7 veillard 937: xmlGenericError(xmlGenericErrorContext, "Read\n");
1.1 veillard 938: #endif
939: if (in->buf == NULL) return(-1);
940: if (in->base == NULL) return(-1);
941: if (in->cur == NULL) return(-1);
942: if (in->buf->buffer == NULL) return(-1);
943: if (in->buf->readcallback == NULL) return(-1);
944:
945: CHECK_BUFFER(in);
946:
947: used = in->cur - in->buf->buffer->content;
948: ret = xmlBufferShrink(in->buf->buffer, used);
949: if (ret > 0) {
950: in->cur -= ret;
951: in->consumed += ret;
952: }
953: ret = xmlParserInputBufferRead(in->buf, len);
954: if (in->base != in->buf->buffer->content) {
955: /*
956: * the buffer has been realloced
957: */
958: index = in->cur - in->base;
959: in->base = in->buf->buffer->content;
960: in->cur = &in->buf->buffer->content[index];
961: }
962:
963: CHECK_BUFFER(in);
964:
965: return(ret);
966: }
967:
968: /**
969: * xmlParserInputGrow:
970: * @in: an XML parser input
971: * @len: an indicative size for the lookahead
972: *
973: * This function increase the input for the parser. It tries to
974: * preserve pointers to the input buffer, and keep already read data
975: *
976: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
977: * end of this entity
978: */
979: int
980: xmlParserInputGrow(xmlParserInputPtr in, int len) {
981: int ret;
982: int index;
983:
984: #ifdef DEBUG_INPUT
1.7 veillard 985: xmlGenericError(xmlGenericErrorContext, "Grow\n");
1.1 veillard 986: #endif
987: if (in->buf == NULL) return(-1);
988: if (in->base == NULL) return(-1);
989: if (in->cur == NULL) return(-1);
990: if (in->buf->buffer == NULL) return(-1);
991:
992: CHECK_BUFFER(in);
993:
994: index = in->cur - in->base;
995: if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
996:
997: CHECK_BUFFER(in);
998:
999: return(0);
1000: }
1001: if (in->buf->readcallback != NULL)
1002: ret = xmlParserInputBufferGrow(in->buf, len);
1003: else
1004: return(0);
1005:
1006: /*
1007: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
1008: * block, but we use it really as an integer to do some
1009: * pointer arithmetic. Insure will raise it as a bug but in
1010: * that specific case, that's not !
1011: */
1012: if (in->base != in->buf->buffer->content) {
1013: /*
1014: * the buffer has been realloced
1015: */
1016: index = in->cur - in->base;
1017: in->base = in->buf->buffer->content;
1018: in->cur = &in->buf->buffer->content[index];
1019: }
1020:
1021: CHECK_BUFFER(in);
1022:
1023: return(ret);
1024: }
1025:
1026: /**
1027: * xmlParserInputShrink:
1028: * @in: an XML parser input
1029: *
1030: * This function removes used input for the parser.
1031: */
1032: void
1033: xmlParserInputShrink(xmlParserInputPtr in) {
1034: int used;
1035: int ret;
1036: int index;
1037:
1038: #ifdef DEBUG_INPUT
1.7 veillard 1039: xmlGenericError(xmlGenericErrorContext, "Shrink\n");
1.1 veillard 1040: #endif
1041: if (in->buf == NULL) return;
1042: if (in->base == NULL) return;
1043: if (in->cur == NULL) return;
1044: if (in->buf->buffer == NULL) return;
1045:
1046: CHECK_BUFFER(in);
1047:
1048: used = in->cur - in->buf->buffer->content;
1.6 veillard 1049: /*
1050: * Do not shrink on large buffers whose only a tiny fraction
1051: * was consumned
1052: */
1053: if (in->buf->buffer->use > used + 2 * INPUT_CHUNK)
1054: return;
1.1 veillard 1055: if (used > INPUT_CHUNK) {
1056: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1057: if (ret > 0) {
1058: in->cur -= ret;
1059: in->consumed += ret;
1060: }
1061: }
1062:
1063: CHECK_BUFFER(in);
1064:
1065: if (in->buf->buffer->use > INPUT_CHUNK) {
1066: return;
1067: }
1068: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1069: if (in->base != in->buf->buffer->content) {
1070: /*
1071: * the buffer has been realloced
1072: */
1073: index = in->cur - in->base;
1074: in->base = in->buf->buffer->content;
1075: in->cur = &in->buf->buffer->content[index];
1076: }
1077:
1078: CHECK_BUFFER(in);
1079: }
1080:
1081: /************************************************************************
1082: * *
1083: * UTF8 character input and related functions *
1084: * *
1085: ************************************************************************/
1086:
1087: /**
1088: * xmlNextChar:
1089: * @ctxt: the XML parser context
1090: *
1091: * Skip to the next char input char.
1092: */
1093:
1094: void
1095: xmlNextChar(xmlParserCtxtPtr ctxt) {
1096: if (ctxt->instate == XML_PARSER_EOF)
1097: return;
1098:
1099: /*
1100: * 2.11 End-of-Line Handling
1101: * the literal two-character sequence "#xD#xA" or a standalone
1102: * literal #xD, an XML processor must pass to the application
1103: * the single character #xA.
1104: */
1105: if (ctxt->token != 0) ctxt->token = 0;
1106: else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1107: if ((*ctxt->input->cur == 0) &&
1108: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
1109: (ctxt->instate != XML_PARSER_COMMENT)) {
1110: /*
1111: * If we are at the end of the current entity and
1112: * the context allows it, we pop consumed entities
1113: * automatically.
1114: * the auto closing should be blocked in other cases
1115: */
1116: xmlPopInput(ctxt);
1117: } else {
1118: if (*(ctxt->input->cur) == '\n') {
1119: ctxt->input->line++; ctxt->input->col = 1;
1120: } else ctxt->input->col++;
1121: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1122: /*
1123: * We are supposed to handle UTF8, check it's valid
1124: * From rfc2044: encoding of the Unicode values on UTF-8:
1125: *
1126: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1127: * 0000 0000-0000 007F 0xxxxxxx
1128: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1129: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1130: *
1131: * Check for the 0x110000 limit too
1132: */
1133: const unsigned char *cur = ctxt->input->cur;
1134: unsigned char c;
1135:
1136: c = *cur;
1137: if (c & 0x80) {
1138: if (cur[1] == 0)
1139: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1140: if ((cur[1] & 0xc0) != 0x80)
1141: goto encoding_error;
1142: if ((c & 0xe0) == 0xe0) {
1143: unsigned int val;
1144:
1145: if (cur[2] == 0)
1146: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1147: if ((cur[2] & 0xc0) != 0x80)
1148: goto encoding_error;
1149: if ((c & 0xf0) == 0xf0) {
1150: if (cur[3] == 0)
1151: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1152: if (((c & 0xf8) != 0xf0) ||
1153: ((cur[3] & 0xc0) != 0x80))
1154: goto encoding_error;
1155: /* 4-byte code */
1156: ctxt->input->cur += 4;
1157: val = (cur[0] & 0x7) << 18;
1158: val |= (cur[1] & 0x3f) << 12;
1159: val |= (cur[2] & 0x3f) << 6;
1160: val |= cur[3] & 0x3f;
1161: } else {
1162: /* 3-byte code */
1163: ctxt->input->cur += 3;
1164: val = (cur[0] & 0xf) << 12;
1165: val |= (cur[1] & 0x3f) << 6;
1166: val |= cur[2] & 0x3f;
1167: }
1168: if (((val > 0xd7ff) && (val < 0xe000)) ||
1169: ((val > 0xfffd) && (val < 0x10000)) ||
1170: (val >= 0x110000)) {
1171: if ((ctxt->sax != NULL) &&
1172: (ctxt->sax->error != NULL))
1173: ctxt->sax->error(ctxt->userData,
1174: "Char 0x%X out of allowed range\n", val);
1175: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1176: ctxt->wellFormed = 0;
1177: ctxt->disableSAX = 1;
1178: }
1179: } else
1180: /* 2-byte code */
1181: ctxt->input->cur += 2;
1182: } else
1183: /* 1-byte code */
1184: ctxt->input->cur++;
1185: } else {
1186: /*
1187: * Assume it's a fixed lenght encoding (1) with
1188: * a compatibke encoding for the ASCII set, since
1189: * XML constructs only use < 128 chars
1190: */
1191: ctxt->input->cur++;
1192: }
1193: ctxt->nbChars++;
1194: if (*ctxt->input->cur == 0)
1195: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1196: }
1197: } else {
1198: ctxt->input->cur++;
1199: ctxt->nbChars++;
1200: if (*ctxt->input->cur == 0)
1201: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1202: }
1203: if ((*ctxt->input->cur == '%') && (!ctxt->html))
1204: xmlParserHandlePEReference(ctxt);
1205: if ((*ctxt->input->cur == 0) &&
1206: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1207: xmlPopInput(ctxt);
1208: return;
1209: encoding_error:
1210: /*
1211: * If we detect an UTF8 error that probably mean that the
1212: * input encoding didn't get properly advertized in the
1213: * declaration header. Report the error and switch the encoding
1214: * to ISO-Latin-1 (if you don't like this policy, just declare the
1215: * encoding !)
1216: */
1217: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1218: ctxt->sax->error(ctxt->userData,
1219: "Input is not proper UTF-8, indicate encoding !\n");
1220: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1221: ctxt->input->cur[0], ctxt->input->cur[1],
1222: ctxt->input->cur[2], ctxt->input->cur[3]);
1223: }
1224: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1225:
1226: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1227: ctxt->input->cur++;
1228: return;
1229: }
1230:
1231: /**
1232: * xmlCurrentChar:
1233: * @ctxt: the XML parser context
1234: * @len: pointer to the length of the char read
1235: *
1236: * The current char value, if using UTF-8 this may actaully span multiple
1237: * bytes in the input buffer. Implement the end of line normalization:
1238: * 2.11 End-of-Line Handling
1239: * Wherever an external parsed entity or the literal entity value
1240: * of an internal parsed entity contains either the literal two-character
1241: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
1242: * must pass to the application the single character #xA.
1243: * This behavior can conveniently be produced by normalizing all
1244: * line breaks to #xA on input, before parsing.)
1245: *
1246: * Returns the current char value and its lenght
1247: */
1248:
1249: int
1250: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1251: if (ctxt->instate == XML_PARSER_EOF)
1252: return(0);
1253:
1254: if (ctxt->token != 0) {
1255: *len = 0;
1256: return(ctxt->token);
1257: }
1258: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1259: /*
1260: * We are supposed to handle UTF8, check it's valid
1261: * From rfc2044: encoding of the Unicode values on UTF-8:
1262: *
1263: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1264: * 0000 0000-0000 007F 0xxxxxxx
1265: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1266: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1267: *
1268: * Check for the 0x110000 limit too
1269: */
1270: const unsigned char *cur = ctxt->input->cur;
1271: unsigned char c;
1272: unsigned int val;
1273:
1274: c = *cur;
1275: if (c & 0x80) {
1276: if (cur[1] == 0)
1277: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1278: if ((cur[1] & 0xc0) != 0x80)
1279: goto encoding_error;
1280: if ((c & 0xe0) == 0xe0) {
1281:
1282: if (cur[2] == 0)
1283: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1284: if ((cur[2] & 0xc0) != 0x80)
1285: goto encoding_error;
1286: if ((c & 0xf0) == 0xf0) {
1287: if (cur[3] == 0)
1288: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1289: if (((c & 0xf8) != 0xf0) ||
1290: ((cur[3] & 0xc0) != 0x80))
1291: goto encoding_error;
1292: /* 4-byte code */
1293: *len = 4;
1294: val = (cur[0] & 0x7) << 18;
1295: val |= (cur[1] & 0x3f) << 12;
1296: val |= (cur[2] & 0x3f) << 6;
1297: val |= cur[3] & 0x3f;
1298: } else {
1299: /* 3-byte code */
1300: *len = 3;
1301: val = (cur[0] & 0xf) << 12;
1302: val |= (cur[1] & 0x3f) << 6;
1303: val |= cur[2] & 0x3f;
1304: }
1305: } else {
1306: /* 2-byte code */
1307: *len = 2;
1308: val = (cur[0] & 0x1f) << 6;
1309: val |= cur[1] & 0x3f;
1310: }
1311: if (!IS_CHAR(val)) {
1312: if ((ctxt->sax != NULL) &&
1313: (ctxt->sax->error != NULL))
1314: ctxt->sax->error(ctxt->userData,
1315: "Char 0x%X out of allowed range\n", val);
1316: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1317: ctxt->wellFormed = 0;
1318: ctxt->disableSAX = 1;
1319: }
1320: return(val);
1321: } else {
1322: /* 1-byte code */
1323: *len = 1;
1324: if (*ctxt->input->cur == 0xD) {
1325: if (ctxt->input->cur[1] == 0xA) {
1326: ctxt->nbChars++;
1327: ctxt->input->cur++;
1328: }
1329: return(0xA);
1330: }
1331: return((int) *ctxt->input->cur);
1332: }
1333: }
1334: /*
1335: * Assume it's a fixed lenght encoding (1) with
1336: * a compatibke encoding for the ASCII set, since
1337: * XML constructs only use < 128 chars
1338: */
1339: *len = 1;
1340: if (*ctxt->input->cur == 0xD) {
1341: if (ctxt->input->cur[1] == 0xA) {
1342: ctxt->nbChars++;
1343: ctxt->input->cur++;
1344: }
1345: return(0xA);
1346: }
1347: return((int) *ctxt->input->cur);
1348: encoding_error:
1349: /*
1350: * If we detect an UTF8 error that probably mean that the
1351: * input encoding didn't get properly advertized in the
1352: * declaration header. Report the error and switch the encoding
1353: * to ISO-Latin-1 (if you don't like this policy, just declare the
1354: * encoding !)
1355: */
1356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1357: ctxt->sax->error(ctxt->userData,
1358: "Input is not proper UTF-8, indicate encoding !\n");
1359: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1360: ctxt->input->cur[0], ctxt->input->cur[1],
1361: ctxt->input->cur[2], ctxt->input->cur[3]);
1362: }
1363: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1364:
1365: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1366: *len = 1;
1367: return((int) *ctxt->input->cur);
1368: }
1369:
1370: /**
1371: * xmlStringCurrentChar:
1372: * @ctxt: the XML parser context
1373: * @cur: pointer to the beginning of the char
1374: * @len: pointer to the length of the char read
1375: *
1376: * The current char value, if using UTF-8 this may actaully span multiple
1377: * bytes in the input buffer.
1378: *
1379: * Returns the current char value and its lenght
1380: */
1381:
1382: int
1383: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1384: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1385: /*
1386: * We are supposed to handle UTF8, check it's valid
1387: * From rfc2044: encoding of the Unicode values on UTF-8:
1388: *
1389: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1390: * 0000 0000-0000 007F 0xxxxxxx
1391: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1392: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1393: *
1394: * Check for the 0x110000 limit too
1395: */
1396: unsigned char c;
1397: unsigned int val;
1398:
1399: c = *cur;
1400: if (c & 0x80) {
1401: if ((cur[1] & 0xc0) != 0x80)
1402: goto encoding_error;
1403: if ((c & 0xe0) == 0xe0) {
1404:
1405: if ((cur[2] & 0xc0) != 0x80)
1406: goto encoding_error;
1407: if ((c & 0xf0) == 0xf0) {
1408: if (((c & 0xf8) != 0xf0) ||
1409: ((cur[3] & 0xc0) != 0x80))
1410: goto encoding_error;
1411: /* 4-byte code */
1412: *len = 4;
1413: val = (cur[0] & 0x7) << 18;
1414: val |= (cur[1] & 0x3f) << 12;
1415: val |= (cur[2] & 0x3f) << 6;
1416: val |= cur[3] & 0x3f;
1417: } else {
1418: /* 3-byte code */
1419: *len = 3;
1420: val = (cur[0] & 0xf) << 12;
1421: val |= (cur[1] & 0x3f) << 6;
1422: val |= cur[2] & 0x3f;
1423: }
1424: } else {
1425: /* 2-byte code */
1426: *len = 2;
1427: val = (cur[0] & 0x1f) << 6;
1428: val |= cur[2] & 0x3f;
1429: }
1430: if (!IS_CHAR(val)) {
1431: if ((ctxt->sax != NULL) &&
1432: (ctxt->sax->error != NULL))
1433: ctxt->sax->error(ctxt->userData,
1434: "Char 0x%X out of allowed range\n", val);
1435: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1436: ctxt->wellFormed = 0;
1437: ctxt->disableSAX = 1;
1438: }
1439: return(val);
1440: } else {
1441: /* 1-byte code */
1442: *len = 1;
1443: return((int) *cur);
1444: }
1445: }
1446: /*
1447: * Assume it's a fixed lenght encoding (1) with
1448: * a compatibke encoding for the ASCII set, since
1449: * XML constructs only use < 128 chars
1450: */
1451: *len = 1;
1452: return((int) *cur);
1453: encoding_error:
1454: /*
1455: * If we detect an UTF8 error that probably mean that the
1456: * input encoding didn't get properly advertized in the
1457: * declaration header. Report the error and switch the encoding
1458: * to ISO-Latin-1 (if you don't like this policy, just declare the
1459: * encoding !)
1460: */
1461: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1462: ctxt->sax->error(ctxt->userData,
1463: "Input is not proper UTF-8, indicate encoding !\n");
1464: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1465: ctxt->input->cur[0], ctxt->input->cur[1],
1466: ctxt->input->cur[2], ctxt->input->cur[3]);
1467: }
1468: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1469:
1470: *len = 1;
1471: return((int) *cur);
1472: }
1473:
1474: /**
1475: * xmlCopyChar:
1476: * @len: pointer to the length of the char read (or zero)
1477: * @array: pointer to an arry of xmlChar
1478: * @val: the char value
1479: *
1480: * append the char value in the array
1481: *
1482: * Returns the number of xmlChar written
1483: */
1484:
1485: int
1486: xmlCopyChar(int len, xmlChar *out, int val) {
1487: /*
1488: * We are supposed to handle UTF8, check it's valid
1489: * From rfc2044: encoding of the Unicode values on UTF-8:
1490: *
1491: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1492: * 0000 0000-0000 007F 0xxxxxxx
1493: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1494: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1495: */
1496: if (len == 0) {
1497: if (val < 0) len = 0;
1498: else if (val < 0x80) len = 1;
1499: else if (val < 0x800) len = 2;
1500: else if (val < 0x10000) len = 3;
1501: else if (val < 0x110000) len = 4;
1502: if (len == 0) {
1.7 veillard 1503: xmlGenericError(xmlGenericErrorContext,
1504: "Internal error, xmlCopyChar 0x%X out of bound\n",
1.1 veillard 1505: val);
1506: return(0);
1507: }
1508: }
1509: if (len > 1) {
1510: int bits;
1511:
1512: if (val < 0x80) { *out++= val; bits= -6; }
1513: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1514: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1515: else { *out++= (val >> 18) | 0xF0; bits= 12; }
1516:
1517: for ( ; bits >= 0; bits-= 6)
1518: *out++= ((val >> bits) & 0x3F) | 0x80 ;
1519:
1520: return(len);
1521: }
1522: *out = (xmlChar) val;
1523: return(1);
1524: }
1525:
1526: /************************************************************************
1527: * *
1.3 veillard 1528: * Commodity functions to switch encodings *
1529: * *
1530: ************************************************************************/
1531:
1532: /**
1533: * xmlSwitchEncoding:
1534: * @ctxt: the parser context
1535: * @enc: the encoding value (number)
1536: *
1537: * change the input functions when discovering the character encoding
1538: * of a given entity.
1539: *
1540: * Returns 0 in case of success, -1 otherwise
1541: */
1542: int
1543: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1544: {
1545: xmlCharEncodingHandlerPtr handler;
1546:
1547: switch (enc) {
1548: case XML_CHAR_ENCODING_ERROR:
1549: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1550: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1551: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1552: ctxt->wellFormed = 0;
1553: ctxt->disableSAX = 1;
1554: break;
1555: case XML_CHAR_ENCODING_NONE:
1556: /* let's assume it's UTF-8 without the XML decl */
1557: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1558: return(0);
1559: case XML_CHAR_ENCODING_UTF8:
1560: /* default encoding, no conversion should be needed */
1561: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1562: return(0);
1563: default:
1564: break;
1565: }
1566: handler = xmlGetCharEncodingHandler(enc);
1567: if (handler == NULL) {
1568: /*
1569: * Default handlers.
1570: */
1571: switch (enc) {
1572: case XML_CHAR_ENCODING_ERROR:
1573: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1575: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1576: ctxt->wellFormed = 0;
1577: ctxt->disableSAX = 1;
1578: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1579: break;
1580: case XML_CHAR_ENCODING_NONE:
1581: /* let's assume it's UTF-8 without the XML decl */
1582: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1583: return(0);
1584: case XML_CHAR_ENCODING_UTF8:
1585: case XML_CHAR_ENCODING_ASCII:
1586: /* default encoding, no conversion should be needed */
1587: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1588: return(0);
1589: case XML_CHAR_ENCODING_UTF16LE:
1590: break;
1591: case XML_CHAR_ENCODING_UTF16BE:
1592: break;
1593: case XML_CHAR_ENCODING_UCS4LE:
1594: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1595: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1596: ctxt->sax->error(ctxt->userData,
1597: "char encoding USC4 little endian not supported\n");
1598: break;
1599: case XML_CHAR_ENCODING_UCS4BE:
1600: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1601: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1602: ctxt->sax->error(ctxt->userData,
1603: "char encoding USC4 big endian not supported\n");
1604: break;
1605: case XML_CHAR_ENCODING_EBCDIC:
1606: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1607: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1608: ctxt->sax->error(ctxt->userData,
1609: "char encoding EBCDIC not supported\n");
1610: break;
1611: case XML_CHAR_ENCODING_UCS4_2143:
1612: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1613: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1614: ctxt->sax->error(ctxt->userData,
1615: "char encoding UCS4 2143 not supported\n");
1616: break;
1617: case XML_CHAR_ENCODING_UCS4_3412:
1618: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1619: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1620: ctxt->sax->error(ctxt->userData,
1621: "char encoding UCS4 3412 not supported\n");
1622: break;
1623: case XML_CHAR_ENCODING_UCS2:
1624: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1626: ctxt->sax->error(ctxt->userData,
1627: "char encoding UCS2 not supported\n");
1628: break;
1629: case XML_CHAR_ENCODING_8859_1:
1630: case XML_CHAR_ENCODING_8859_2:
1631: case XML_CHAR_ENCODING_8859_3:
1632: case XML_CHAR_ENCODING_8859_4:
1633: case XML_CHAR_ENCODING_8859_5:
1634: case XML_CHAR_ENCODING_8859_6:
1635: case XML_CHAR_ENCODING_8859_7:
1636: case XML_CHAR_ENCODING_8859_8:
1637: case XML_CHAR_ENCODING_8859_9:
1638: /*
1639: * We used to keep the internal content in the
1640: * document encoding however this turns being unmaintainable
1641: * So xmlGetCharEncodingHandler() will return non-null
1642: * values for this now.
1643: */
1644: if ((ctxt->inputNr == 1) &&
1645: (ctxt->encoding == NULL) &&
1646: (ctxt->input->encoding != NULL)) {
1647: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1648: }
1649: ctxt->charset = enc;
1650: return(0);
1651: case XML_CHAR_ENCODING_2022_JP:
1652: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1653: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1654: ctxt->sax->error(ctxt->userData,
1655: "char encoding ISO-2022-JPnot supported\n");
1656: break;
1657: case XML_CHAR_ENCODING_SHIFT_JIS:
1658: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1659: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1660: ctxt->sax->error(ctxt->userData,
1661: "char encoding Shift_JIS not supported\n");
1662: break;
1663: case XML_CHAR_ENCODING_EUC_JP:
1664: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1665: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1666: ctxt->sax->error(ctxt->userData,
1667: "char encoding EUC-JPnot supported\n");
1668: break;
1669: }
1670: }
1671: if (handler == NULL)
1672: return(-1);
1673: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1674: return(xmlSwitchToEncoding(ctxt, handler));
1675: }
1676:
1677: /**
1678: * xmlSwitchToEncoding:
1679: * @ctxt: the parser context
1680: * @handler: the encoding handler
1681: *
1682: * change the input functions when discovering the character encoding
1683: * of a given entity.
1684: *
1685: * Returns 0 in case of success, -1 otherwise
1686: */
1687: int
1688: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1689: {
1690: int nbchars;
1691:
1692: if (handler != NULL) {
1693: if (ctxt->input != NULL) {
1694: if (ctxt->input->buf != NULL) {
1695: if (ctxt->input->buf->encoder != NULL) {
1696: if (ctxt->input->buf->encoder == handler)
1697: return(0);
1698: /*
1699: * Note: this is a bit dangerous, but that's what it
1700: * takes to use nearly compatible signature for different
1701: * encodings.
1702: */
1703: xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1704: ctxt->input->buf->encoder = handler;
1705: return(0);
1706: }
1707: ctxt->input->buf->encoder = handler;
1708:
1709: /*
1710: * Is there already some content down the pipe to convert ?
1711: */
1712: if ((ctxt->input->buf->buffer != NULL) &&
1713: (ctxt->input->buf->buffer->use > 0)) {
1714: int processed;
1715:
1716: /*
1717: * Specific handling of the Byte Order Mark for
1718: * UTF-16
1719: */
1720: if ((handler->name != NULL) &&
1721: (!strcmp(handler->name, "UTF-16LE")) &&
1722: (ctxt->input->cur[0] == 0xFF) &&
1723: (ctxt->input->cur[1] == 0xFE)) {
1724: ctxt->input->cur += 2;
1725: }
1726: if ((handler->name != NULL) &&
1727: (!strcmp(handler->name, "UTF-16BE")) &&
1728: (ctxt->input->cur[0] == 0xFE) &&
1729: (ctxt->input->cur[1] == 0xFF)) {
1730: ctxt->input->cur += 2;
1731: }
1732:
1733: /*
1734: * Shring the current input buffer.
1735: * Move it as the raw buffer and create a new input buffer
1736: */
1737: processed = ctxt->input->cur - ctxt->input->base;
1738: xmlBufferShrink(ctxt->input->buf->buffer, processed);
1739: ctxt->input->buf->raw = ctxt->input->buf->buffer;
1740: ctxt->input->buf->buffer = xmlBufferCreate();
1741:
1742: if (ctxt->html) {
1743: /*
1744: * converst as much as possbile of the buffer
1745: */
1746: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1747: ctxt->input->buf->buffer,
1748: ctxt->input->buf->raw);
1749: } else {
1750: /*
1751: * convert just enough to get
1752: * '<?xml version="1.0" encoding="xxx"?>'
1753: * parsed with the autodetected encoding
1754: * into the parser reading buffer.
1755: */
1756: nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1757: ctxt->input->buf->buffer,
1758: ctxt->input->buf->raw);
1759: }
1760: if (nbchars < 0) {
1.7 veillard 1761: xmlGenericError(xmlGenericErrorContext,
1762: "xmlSwitchToEncoding: encoder error\n");
1.3 veillard 1763: return(-1);
1764: }
1765: ctxt->input->base =
1766: ctxt->input->cur = ctxt->input->buf->buffer->content;
1767:
1768: }
1769: return(0);
1770: } else {
1771: if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1772: /*
1773: * When parsing a static memory array one must know the
1774: * size to be able to convert the buffer.
1775: */
1776: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1777: ctxt->sax->error(ctxt->userData,
1778: "xmlSwitchEncoding : no input\n");
1779: return(-1);
1780: } else {
1781: int processed;
1782:
1783: /*
1784: * Shring the current input buffer.
1785: * Move it as the raw buffer and create a new input buffer
1786: */
1787: processed = ctxt->input->cur - ctxt->input->base;
1788:
1789: ctxt->input->buf->raw = xmlBufferCreate();
1790: xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1791: ctxt->input->length - processed);
1792: ctxt->input->buf->buffer = xmlBufferCreate();
1793:
1794: /*
1795: * convert as much as possible of the raw input
1796: * to the parser reading buffer.
1797: */
1798: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1799: ctxt->input->buf->buffer,
1800: ctxt->input->buf->raw);
1801: if (nbchars < 0) {
1.7 veillard 1802: xmlGenericError(xmlGenericErrorContext,
1803: "xmlSwitchToEncoding: encoder error\n");
1.3 veillard 1804: return(-1);
1805: }
1806:
1807: /*
1808: * Conversion succeeded, get rid of the old buffer
1809: */
1810: if ((ctxt->input->free != NULL) &&
1811: (ctxt->input->base != NULL))
1812: ctxt->input->free((xmlChar *) ctxt->input->base);
1813: ctxt->input->base =
1814: ctxt->input->cur = ctxt->input->buf->buffer->content;
1815: }
1816: }
1817: } else {
1818: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1819: ctxt->sax->error(ctxt->userData,
1820: "xmlSwitchEncoding : no input\n");
1821: return(-1);
1822: }
1823: /*
1824: * The parsing is now done in UTF8 natively
1825: */
1826: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1827: } else
1828: return(-1);
1829: return(0);
1830:
1831: }
1832:
1833: /************************************************************************
1834: * *
1.1 veillard 1835: * Commodity functions to handle entities processing *
1836: * *
1837: ************************************************************************/
1838:
1839: /**
1840: * xmlFreeInputStream:
1841: * @input: an xmlParserInputPtr
1842: *
1843: * Free up an input stream.
1844: */
1845: void
1846: xmlFreeInputStream(xmlParserInputPtr input) {
1847: if (input == NULL) return;
1848:
1849: if (input->filename != NULL) xmlFree((char *) input->filename);
1850: if (input->directory != NULL) xmlFree((char *) input->directory);
1851: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1852: if (input->version != NULL) xmlFree((char *) input->version);
1853: if ((input->free != NULL) && (input->base != NULL))
1854: input->free((xmlChar *) input->base);
1855: if (input->buf != NULL)
1856: xmlFreeParserInputBuffer(input->buf);
1857: memset(input, -1, sizeof(xmlParserInput));
1858: xmlFree(input);
1859: }
1860:
1861: /**
1862: * xmlNewInputStream:
1863: * @ctxt: an XML parser context
1864: *
1865: * Create a new input stream structure
1866: * Returns the new input stream or NULL
1867: */
1868: xmlParserInputPtr
1869: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1870: xmlParserInputPtr input;
1871:
1872: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1873: if (input == NULL) {
1874: if (ctxt != NULL) {
1875: ctxt->errNo = XML_ERR_NO_MEMORY;
1876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1877: ctxt->sax->error(ctxt->userData,
1878: "malloc: couldn't allocate a new input stream\n");
1879: ctxt->errNo = XML_ERR_NO_MEMORY;
1880: }
1881: return(NULL);
1882: }
1883: memset(input, 0, sizeof(xmlParserInput));
1884: input->line = 1;
1885: input->col = 1;
1886: input->standalone = -1;
1887: return(input);
1888: }
1889:
1890: /**
1891: * xmlNewIOInputStream:
1892: * @ctxt: an XML parser context
1893: * @input: an I/O Input
1894: * @enc: the charset encoding if known
1895: *
1896: * Create a new input stream structure encapsulating the @input into
1897: * a stream suitable for the parser.
1898: *
1899: * Returns the new input stream or NULL
1900: */
1901: xmlParserInputPtr
1902: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1903: xmlCharEncoding enc) {
1904: xmlParserInputPtr inputStream;
1905:
1906: if (xmlParserDebugEntities)
1.7 veillard 1907: xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1.1 veillard 1908: inputStream = xmlNewInputStream(ctxt);
1909: if (inputStream == NULL) {
1910: return(NULL);
1911: }
1912: inputStream->filename = NULL;
1913: inputStream->buf = input;
1914: inputStream->base = inputStream->buf->buffer->content;
1915: inputStream->cur = inputStream->buf->buffer->content;
1916: if (enc != XML_CHAR_ENCODING_NONE) {
1917: xmlSwitchEncoding(ctxt, enc);
1918: }
1919:
1920: return(inputStream);
1921: }
1922:
1923: /**
1924: * xmlNewEntityInputStream:
1925: * @ctxt: an XML parser context
1926: * @entity: an Entity pointer
1927: *
1928: * Create a new input stream based on an xmlEntityPtr
1929: *
1930: * Returns the new input stream or NULL
1931: */
1932: xmlParserInputPtr
1933: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1934: xmlParserInputPtr input;
1935:
1936: if (entity == NULL) {
1937: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1938: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1939: ctxt->sax->error(ctxt->userData,
1940: "internal: xmlNewEntityInputStream entity = NULL\n");
1941: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1942: return(NULL);
1943: }
1944: if (xmlParserDebugEntities)
1.7 veillard 1945: xmlGenericError(xmlGenericErrorContext,
1946: "new input from entity: %s\n", entity->name);
1.1 veillard 1947: if (entity->content == NULL) {
1948: switch (entity->etype) {
1949: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1950: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1951: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1952: ctxt->sax->error(ctxt->userData,
1953: "xmlNewEntityInputStream unparsed entity !\n");
1954: break;
1955: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1956: case XML_EXTERNAL_PARAMETER_ENTITY:
1957: return(xmlLoadExternalEntity((char *) entity->URI,
1958: (char *) entity->ExternalID, ctxt));
1959: case XML_INTERNAL_GENERAL_ENTITY:
1960: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1961: ctxt->sax->error(ctxt->userData,
1962: "Internal entity %s without content !\n", entity->name);
1963: break;
1964: case XML_INTERNAL_PARAMETER_ENTITY:
1965: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1966: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1967: ctxt->sax->error(ctxt->userData,
1968: "Internal parameter entity %s without content !\n", entity->name);
1969: break;
1970: case XML_INTERNAL_PREDEFINED_ENTITY:
1971: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1973: ctxt->sax->error(ctxt->userData,
1974: "Predefined entity %s without content !\n", entity->name);
1975: break;
1976: }
1977: return(NULL);
1978: }
1979: input = xmlNewInputStream(ctxt);
1980: if (input == NULL) {
1981: return(NULL);
1982: }
1983: input->filename = (char *) entity->URI;
1984: input->base = entity->content;
1985: input->cur = entity->content;
1986: input->length = entity->length;
1987: return(input);
1988: }
1989:
1990: /**
1991: * xmlNewStringInputStream:
1992: * @ctxt: an XML parser context
1993: * @buffer: an memory buffer
1994: *
1995: * Create a new input stream based on a memory buffer.
1996: * Returns the new input stream
1997: */
1998: xmlParserInputPtr
1999: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2000: xmlParserInputPtr input;
2001:
2002: if (buffer == NULL) {
2003: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
2004: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2005: ctxt->sax->error(ctxt->userData,
2006: "internal: xmlNewStringInputStream string = NULL\n");
2007: return(NULL);
2008: }
2009: if (xmlParserDebugEntities)
1.7 veillard 2010: xmlGenericError(xmlGenericErrorContext,
2011: "new fixed input: %.30s\n", buffer);
1.1 veillard 2012: input = xmlNewInputStream(ctxt);
2013: if (input == NULL) {
2014: return(NULL);
2015: }
2016: input->base = buffer;
2017: input->cur = buffer;
2018: input->length = xmlStrlen(buffer);
2019: return(input);
2020: }
2021:
2022: /**
2023: * xmlNewInputFromFile:
2024: * @ctxt: an XML parser context
2025: * @filename: the filename to use as entity
2026: *
2027: * Create a new input stream based on a file.
2028: *
2029: * Returns the new input stream or NULL in case of error
2030: */
2031: xmlParserInputPtr
2032: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2033: xmlParserInputBufferPtr buf;
2034: xmlParserInputPtr inputStream;
2035: char *directory = NULL;
2036: xmlChar *URI = NULL;
2037:
2038: if (xmlParserDebugEntities)
1.7 veillard 2039: xmlGenericError(xmlGenericErrorContext,
2040: "new input from file: %s\n", filename);
1.1 veillard 2041: if (ctxt == NULL) return(NULL);
2042: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
2043: if (buf == NULL)
2044: return(NULL);
2045:
2046: URI = xmlStrdup((xmlChar *) filename);
1.3 veillard 2047: directory = xmlParserGetDirectory((const char *) URI);
1.1 veillard 2048:
2049: inputStream = xmlNewInputStream(ctxt);
2050: if (inputStream == NULL) {
2051: if (directory != NULL) xmlFree((char *) directory);
2052: if (URI != NULL) xmlFree((char *) URI);
2053: return(NULL);
2054: }
2055:
1.3 veillard 2056: inputStream->filename = (const char *) URI;
1.1 veillard 2057: inputStream->directory = directory;
2058: inputStream->buf = buf;
2059:
2060: inputStream->base = inputStream->buf->buffer->content;
2061: inputStream->cur = inputStream->buf->buffer->content;
2062: if ((ctxt->directory == NULL) && (directory != NULL))
2063: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
2064: return(inputStream);
2065: }
2066:
2067: /************************************************************************
2068: * *
2069: * Commodity functions to handle parser contexts *
2070: * *
2071: ************************************************************************/
2072:
2073: /**
2074: * xmlInitParserCtxt:
2075: * @ctxt: an XML parser context
2076: *
2077: * Initialize a parser context
2078: */
2079:
2080: void
2081: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2082: {
2083: xmlSAXHandler *sax;
2084:
2085: xmlDefaultSAXHandlerInit();
2086:
2087: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2088: if (sax == NULL) {
1.7 veillard 2089: xmlGenericError(xmlGenericErrorContext,
2090: "xmlInitParserCtxt: out of memory\n");
1.1 veillard 2091: }
1.4 veillard 2092: else
2093: memset(sax, 0, sizeof(xmlSAXHandler));
1.1 veillard 2094:
2095: /* Allocate the Input stack */
1.7 veillard 2096: ctxt->inputTab = (xmlParserInputPtr *)
2097: xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.1 veillard 2098: if (ctxt->inputTab == NULL) {
1.7 veillard 2099: xmlGenericError(xmlGenericErrorContext,
2100: "xmlInitParserCtxt: out of memory\n");
1.1 veillard 2101: ctxt->inputNr = 0;
2102: ctxt->inputMax = 0;
2103: ctxt->input = NULL;
2104: return;
2105: }
2106: ctxt->inputNr = 0;
2107: ctxt->inputMax = 5;
2108: ctxt->input = NULL;
2109:
2110: ctxt->version = NULL;
2111: ctxt->encoding = NULL;
2112: ctxt->standalone = -1;
2113: ctxt->hasExternalSubset = 0;
2114: ctxt->hasPErefs = 0;
2115: ctxt->html = 0;
2116: ctxt->external = 0;
2117: ctxt->instate = XML_PARSER_START;
2118: ctxt->token = 0;
2119: ctxt->directory = NULL;
2120:
2121: /* Allocate the Node stack */
2122: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2123: if (ctxt->nodeTab == NULL) {
1.7 veillard 2124: xmlGenericError(xmlGenericErrorContext,
2125: "xmlInitParserCtxt: out of memory\n");
1.1 veillard 2126: ctxt->nodeNr = 0;
2127: ctxt->nodeMax = 0;
2128: ctxt->node = NULL;
2129: ctxt->inputNr = 0;
2130: ctxt->inputMax = 0;
2131: ctxt->input = NULL;
2132: return;
2133: }
2134: ctxt->nodeNr = 0;
2135: ctxt->nodeMax = 10;
2136: ctxt->node = NULL;
2137:
2138: /* Allocate the Name stack */
2139: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2140: if (ctxt->nameTab == NULL) {
1.7 veillard 2141: xmlGenericError(xmlGenericErrorContext,
2142: "xmlInitParserCtxt: out of memory\n");
1.1 veillard 2143: ctxt->nodeNr = 0;
2144: ctxt->nodeMax = 0;
2145: ctxt->node = NULL;
2146: ctxt->inputNr = 0;
2147: ctxt->inputMax = 0;
2148: ctxt->input = NULL;
2149: ctxt->nameNr = 0;
2150: ctxt->nameMax = 0;
2151: ctxt->name = NULL;
2152: return;
2153: }
2154: ctxt->nameNr = 0;
2155: ctxt->nameMax = 10;
2156: ctxt->name = NULL;
2157:
2158: /* Allocate the space stack */
2159: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2160: if (ctxt->spaceTab == NULL) {
1.7 veillard 2161: xmlGenericError(xmlGenericErrorContext,
2162: "xmlInitParserCtxt: out of memory\n");
1.1 veillard 2163: ctxt->nodeNr = 0;
2164: ctxt->nodeMax = 0;
2165: ctxt->node = NULL;
2166: ctxt->inputNr = 0;
2167: ctxt->inputMax = 0;
2168: ctxt->input = NULL;
2169: ctxt->nameNr = 0;
2170: ctxt->nameMax = 0;
2171: ctxt->name = NULL;
2172: ctxt->spaceNr = 0;
2173: ctxt->spaceMax = 0;
2174: ctxt->space = NULL;
2175: return;
2176: }
2177: ctxt->spaceNr = 1;
2178: ctxt->spaceMax = 10;
2179: ctxt->spaceTab[0] = -1;
2180: ctxt->space = &ctxt->spaceTab[0];
2181:
2182: if (sax == NULL) {
2183: ctxt->sax = &xmlDefaultSAXHandler;
2184: } else {
2185: ctxt->sax = sax;
2186: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
2187: }
2188: ctxt->userData = ctxt;
2189: ctxt->myDoc = NULL;
2190: ctxt->wellFormed = 1;
2191: ctxt->valid = 1;
2192: ctxt->validate = xmlDoValidityCheckingDefaultValue;
2193: ctxt->pedantic = xmlPedanticParserDefaultValue;
2194: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2195: ctxt->vctxt.userData = ctxt;
2196: if (ctxt->validate) {
2197: ctxt->vctxt.error = xmlParserValidityError;
2198: if (xmlGetWarningsDefaultValue == 0)
2199: ctxt->vctxt.warning = NULL;
2200: else
2201: ctxt->vctxt.warning = xmlParserValidityWarning;
2202: /* Allocate the Node stack */
2203: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
2204: if (ctxt->vctxt.nodeTab == NULL) {
1.7 veillard 2205: xmlGenericError(xmlGenericErrorContext,
2206: "xmlInitParserCtxt: out of memory\n");
1.1 veillard 2207: ctxt->vctxt.nodeMax = 0;
2208: ctxt->validate = 0;
2209: ctxt->vctxt.error = NULL;
2210: ctxt->vctxt.warning = NULL;
2211: } else {
2212: ctxt->vctxt.nodeNr = 0;
2213: ctxt->vctxt.nodeMax = 4;
2214: ctxt->vctxt.node = NULL;
2215: }
2216: } else {
2217: ctxt->vctxt.error = NULL;
2218: ctxt->vctxt.warning = NULL;
2219: }
2220: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2221: ctxt->record_info = 0;
2222: ctxt->nbChars = 0;
2223: ctxt->checkIndex = 0;
2224: ctxt->inSubset = 0;
2225: ctxt->errNo = XML_ERR_OK;
2226: ctxt->depth = 0;
2227: ctxt->charset = XML_CHAR_ENCODING_UTF8;
2228: xmlInitNodeInfoSeq(&ctxt->node_seq);
2229: }
2230:
2231: /**
2232: * xmlFreeParserCtxt:
2233: * @ctxt: an XML parser context
2234: *
2235: * Free all the memory used by a parser context. However the parsed
2236: * document in ctxt->myDoc is not freed.
2237: */
2238:
2239: void
2240: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2241: {
2242: xmlParserInputPtr input;
2243: xmlChar *oldname;
2244:
2245: if (ctxt == NULL) return;
2246:
2247: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2248: xmlFreeInputStream(input);
2249: }
2250: while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
2251: xmlFree(oldname);
2252: }
2253: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2254: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
2255: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2256: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2257: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2258: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2259: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
2260: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2261: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2262: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2263: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
2264: xmlFree(ctxt->sax);
2265: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2266: xmlFree(ctxt);
2267: }
2268:
2269: /**
2270: * xmlNewParserCtxt:
2271: *
2272: * Allocate and initialize a new parser context.
2273: *
2274: * Returns the xmlParserCtxtPtr or NULL
2275: */
2276:
2277: xmlParserCtxtPtr
2278: xmlNewParserCtxt()
2279: {
2280: xmlParserCtxtPtr ctxt;
2281:
2282: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2283: if (ctxt == NULL) {
1.7 veillard 2284: xmlGenericError(xmlGenericErrorContext,
2285: "xmlNewParserCtxt : cannot allocate context\n");
1.1 veillard 2286: perror("malloc");
2287: return(NULL);
2288: }
2289: memset(ctxt, 0, sizeof(xmlParserCtxt));
2290: xmlInitParserCtxt(ctxt);
2291: return(ctxt);
2292: }
2293:
2294: /************************************************************************
2295: * *
2296: * Handling of node informations *
2297: * *
2298: ************************************************************************/
2299:
2300: /**
2301: * xmlClearParserCtxt:
2302: * @ctxt: an XML parser context
2303: *
2304: * Clear (release owned resources) and reinitialize a parser context
2305: */
2306:
2307: void
2308: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2309: {
2310: xmlClearNodeInfoSeq(&ctxt->node_seq);
2311: xmlInitParserCtxt(ctxt);
2312: }
2313:
2314: /**
2315: * xmlParserFindNodeInfo:
2316: * @ctxt: an XML parser context
2317: * @node: an XML node within the tree
2318: *
2319: * Find the parser node info struct for a given node
2320: *
2321: * Returns an xmlParserNodeInfo block pointer or NULL
2322: */
2323: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
2324: const xmlNode* node)
2325: {
2326: unsigned long pos;
2327:
2328: /* Find position where node should be at */
2329: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2330: if ( ctx->node_seq.buffer[pos].node == node )
2331: return &ctx->node_seq.buffer[pos];
2332: else
2333: return NULL;
2334: }
2335:
2336:
2337: /**
2338: * xmlInitNodeInfoSeq:
2339: * @seq: a node info sequence pointer
2340: *
2341: * -- Initialize (set to initial state) node info sequence
2342: */
2343: void
2344: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2345: {
2346: seq->length = 0;
2347: seq->maximum = 0;
2348: seq->buffer = NULL;
2349: }
2350:
2351: /**
2352: * xmlClearNodeInfoSeq:
2353: * @seq: a node info sequence pointer
2354: *
2355: * -- Clear (release memory and reinitialize) node
2356: * info sequence
2357: */
2358: void
2359: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2360: {
2361: if ( seq->buffer != NULL )
2362: xmlFree(seq->buffer);
2363: xmlInitNodeInfoSeq(seq);
2364: }
2365:
2366:
2367: /**
2368: * xmlParserFindNodeInfoIndex:
2369: * @seq: a node info sequence pointer
2370: * @node: an XML node pointer
2371: *
2372: *
2373: * xmlParserFindNodeInfoIndex : Find the index that the info record for
2374: * the given node is or should be at in a sorted sequence
2375: *
2376: * Returns a long indicating the position of the record
2377: */
2378: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
2379: const xmlNode* node)
2380: {
2381: unsigned long upper, lower, middle;
2382: int found = 0;
2383:
2384: /* Do a binary search for the key */
2385: lower = 1;
2386: upper = seq->length;
2387: middle = 0;
2388: while ( lower <= upper && !found) {
2389: middle = lower + (upper - lower) / 2;
2390: if ( node == seq->buffer[middle - 1].node )
2391: found = 1;
2392: else if ( node < seq->buffer[middle - 1].node )
2393: upper = middle - 1;
2394: else
2395: lower = middle + 1;
2396: }
2397:
2398: /* Return position */
2399: if ( middle == 0 || seq->buffer[middle - 1].node < node )
2400: return middle;
2401: else
2402: return middle - 1;
2403: }
2404:
2405:
2406: /**
2407: * xmlParserAddNodeInfo:
2408: * @ctxt: an XML parser context
2409: * @info: a node info sequence pointer
2410: *
2411: * Insert node info record into the sorted sequence
2412: */
2413: void
2414: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2415: const xmlParserNodeInfo* info)
2416: {
2417: unsigned long pos;
2418: static unsigned int block_size = 5;
2419:
2420: /* Find pos and check to see if node is already in the sequence */
2421: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
2422: if ( pos < ctxt->node_seq.length
2423: && ctxt->node_seq.buffer[pos].node == info->node ) {
2424: ctxt->node_seq.buffer[pos] = *info;
2425: }
2426:
2427: /* Otherwise, we need to add new node to buffer */
2428: else {
2429: /* Expand buffer by 5 if needed */
2430: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
2431: xmlParserNodeInfo* tmp_buffer;
2432: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
2433: *(ctxt->node_seq.maximum + block_size));
2434:
2435: if ( ctxt->node_seq.buffer == NULL )
2436: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
2437: else
2438: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
2439:
2440: if ( tmp_buffer == NULL ) {
2441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2442: ctxt->sax->error(ctxt->userData, "Out of memory\n");
2443: ctxt->errNo = XML_ERR_NO_MEMORY;
2444: return;
2445: }
2446: ctxt->node_seq.buffer = tmp_buffer;
2447: ctxt->node_seq.maximum += block_size;
2448: }
2449:
2450: /* If position is not at end, move elements out of the way */
2451: if ( pos != ctxt->node_seq.length ) {
2452: unsigned long i;
2453:
2454: for ( i = ctxt->node_seq.length; i > pos; i-- )
2455: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2456: }
2457:
2458: /* Copy element and increase length */
2459: ctxt->node_seq.buffer[pos] = *info;
2460: ctxt->node_seq.length++;
2461: }
2462: }
2463:
2464: /************************************************************************
2465: * *
2466: * Deprecated functions kept for compatibility *
2467: * *
2468: ************************************************************************/
2469:
2470: /*
2471: * xmlCheckLanguageID
2472: * @lang: pointer to the string value
2473: *
2474: * Checks that the value conforms to the LanguageID production:
2475: *
2476: * NOTE: this is somewhat deprecated, those productions were removed from
2477: * the XML Second edition.
2478: *
2479: * [33] LanguageID ::= Langcode ('-' Subcode)*
2480: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2481: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2482: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2483: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2484: * [38] Subcode ::= ([a-z] | [A-Z])+
2485: *
2486: * Returns 1 if correct 0 otherwise
2487: **/
2488: int
2489: xmlCheckLanguageID(const xmlChar *lang) {
2490: const xmlChar *cur = lang;
2491:
2492: if (cur == NULL)
2493: return(0);
2494: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2495: ((cur[0] == 'I') && (cur[1] == '-'))) {
2496: /*
2497: * IANA code
2498: */
2499: cur += 2;
2500: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2501: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2502: cur++;
2503: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2504: ((cur[0] == 'X') && (cur[1] == '-'))) {
2505: /*
2506: * User code
2507: */
2508: cur += 2;
2509: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2510: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2511: cur++;
2512: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2513: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2514: /*
2515: * ISO639
2516: */
2517: cur++;
2518: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2519: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2520: cur++;
2521: else
2522: return(0);
2523: } else
2524: return(0);
2525: while (cur[0] != 0) { /* non input consuming */
2526: if (cur[0] != '-')
2527: return(0);
2528: cur++;
2529: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2530: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2531: cur++;
2532: else
2533: return(0);
2534: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
2535: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2536: cur++;
2537: }
2538: return(1);
2539: }
2540:
2541: /**
2542: * xmlDecodeEntities:
2543: * @ctxt: the parser context
2544: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2545: * @len: the len to decode (in bytes !), -1 for no size limit
2546: * @end: an end marker xmlChar, 0 if none
2547: * @end2: an end marker xmlChar, 0 if none
2548: * @end3: an end marker xmlChar, 0 if none
2549: *
2550: * This function is deprecated, we now always process entities content
2551: * through xmlStringDecodeEntities
2552: *
2553: * TODO: remove it in next major release.
2554: *
2555: * [67] Reference ::= EntityRef | CharRef
2556: *
2557: * [69] PEReference ::= '%' Name ';'
2558: *
2559: * Returns A newly allocated string with the substitution done. The caller
2560: * must deallocate it !
2561: */
2562: xmlChar *
2563: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
2564: xmlChar end, xmlChar end2, xmlChar end3) {
2565: #if 0
2566: xmlChar *buffer = NULL;
2567: unsigned int buffer_size = 0;
2568: unsigned int nbchars = 0;
2569:
2570: xmlChar *current = NULL;
2571: xmlEntityPtr ent;
2572: unsigned int max = (unsigned int) len;
2573: int c,l;
2574: #endif
2575:
2576: static int deprecated = 0;
2577: if (!deprecated) {
1.7 veillard 2578: xmlGenericError(xmlGenericErrorContext,
2579: "xmlDecodeEntities() deprecated function reached\n");
1.1 veillard 2580: deprecated = 1;
2581: }
2582:
2583: #if 0
2584: if (ctxt->depth > 40) {
2585: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2586: ctxt->sax->error(ctxt->userData,
2587: "Detected entity reference loop\n");
2588: ctxt->wellFormed = 0;
2589: ctxt->disableSAX = 1;
2590: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2591: return(NULL);
2592: }
2593:
2594: /*
2595: * allocate a translation buffer.
2596: */
2597: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2598: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2599: if (buffer == NULL) {
2600: perror("xmlDecodeEntities: malloc failed");
2601: return(NULL);
2602: }
2603:
2604: /*
2605: * Ok loop until we reach one of the ending char or a size limit.
2606: */
2607: GROW;
2608: c = CUR_CHAR(l);
2609: while ((nbchars < max) && (c != end) && /* NOTUSED */
2610: (c != end2) && (c != end3)) {
2611: GROW;
2612: if (c == 0) break;
2613: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
2614: int val = xmlParseCharRef(ctxt);
2615: COPY_BUF(0,buffer,nbchars,val);
2616: NEXTL(l);
2617: } else if ((c == '&') && (ctxt->token != '&') &&
2618: (what & XML_SUBSTITUTE_REF)) {
2619: if (xmlParserDebugEntities)
1.7 veillard 2620: xmlGenericError(xmlGenericErrorContext,
2621: "decoding Entity Reference\n");
1.1 veillard 2622: ent = xmlParseEntityRef(ctxt);
2623: if ((ent != NULL) &&
2624: (ctxt->replaceEntities != 0)) {
2625: current = ent->content;
2626: while (*current != 0) { /* non input consuming loop */
2627: buffer[nbchars++] = *current++;
2628: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2629: growBuffer(buffer);
2630: }
2631: }
2632: } else if (ent != NULL) {
2633: const xmlChar *cur = ent->name;
2634:
2635: buffer[nbchars++] = '&';
2636: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2637: growBuffer(buffer);
2638: }
2639: while (*cur != 0) { /* non input consuming loop */
2640: buffer[nbchars++] = *cur++;
2641: }
2642: buffer[nbchars++] = ';';
2643: }
2644: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2645: /*
2646: * a PEReference induce to switch the entity flow,
2647: * we break here to flush the current set of chars
2648: * parsed if any. We will be called back later.
2649: */
2650: if (xmlParserDebugEntities)
1.7 veillard 2651: xmlGenericError(xmlGenericErrorContext,
2652: "decoding PE Reference\n");
1.1 veillard 2653: if (nbchars != 0) break;
2654:
2655: xmlParsePEReference(ctxt);
2656:
2657: /*
2658: * Pop-up of finished entities.
2659: */
2660: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2661: xmlPopInput(ctxt);
2662:
2663: break;
2664: } else {
2665: COPY_BUF(l,buffer,nbchars,c);
2666: NEXTL(l);
2667: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2668: growBuffer(buffer);
2669: }
2670: }
2671: c = CUR_CHAR(l);
2672: }
2673: buffer[nbchars++] = 0;
2674: return(buffer);
2675: #endif
2676: return(NULL);
2677: }
2678:
2679: /**
2680: * xmlNamespaceParseNCName:
2681: * @ctxt: an XML parser context
2682: *
2683: * parse an XML namespace name.
2684: *
2685: * TODO: this seems not in use anymore, the namespace handling is done on
2686: * top of the SAX interfaces, i.e. not on raw input.
2687: *
2688: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2689: *
2690: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2691: * CombiningChar | Extender
2692: *
2693: * Returns the namespace name or NULL
2694: */
2695:
2696: xmlChar *
2697: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
2698: #if 0
2699: xmlChar buf[XML_MAX_NAMELEN + 5];
2700: int len = 0, l;
2701: int cur = CUR_CHAR(l);
2702: #endif
2703:
2704: static int deprecated = 0;
2705: if (!deprecated) {
1.7 veillard 2706: xmlGenericError(xmlGenericErrorContext,
2707: "xmlNamespaceParseNCName() deprecated function reached\n");
1.1 veillard 2708: deprecated = 1;
2709: }
2710:
2711: #if 0
2712: /* load first the value of the char !!! */
2713: GROW;
2714: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
2715:
1.7 veillard 2716: xmlGenericError(xmlGenericErrorContext,
2717: "xmlNamespaceParseNCName: reached loop 3\n");
1.1 veillard 2718: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
2719: (cur == '.') || (cur == '-') ||
2720: (cur == '_') ||
2721: (IS_COMBINING(cur)) ||
2722: (IS_EXTENDER(cur))) {
2723: COPY_BUF(l,buf,len,cur);
2724: NEXTL(l);
2725: cur = CUR_CHAR(l);
2726: if (len >= XML_MAX_NAMELEN) {
1.7 veillard 2727: xmlGenericError(xmlGenericErrorContext,
1.1 veillard 2728: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2729: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
2730: (cur == '.') || (cur == '-') ||
2731: (cur == '_') ||
2732: (IS_COMBINING(cur)) ||
2733: (IS_EXTENDER(cur))) {
2734: NEXTL(l);
2735: cur = CUR_CHAR(l);
2736: }
2737: break;
2738: }
2739: }
2740: return(xmlStrndup(buf, len));
2741: #endif
2742: return(NULL);
2743: }
2744:
2745: /**
2746: * xmlNamespaceParseQName:
2747: * @ctxt: an XML parser context
2748: * @prefix: a xmlChar **
2749: *
2750: * TODO: this seems not in use anymore, the namespace handling is done on
2751: * top of the SAX interfaces, i.e. not on raw input.
2752: *
2753: * parse an XML qualified name
2754: *
2755: * [NS 5] QName ::= (Prefix ':')? LocalPart
2756: *
2757: * [NS 6] Prefix ::= NCName
2758: *
2759: * [NS 7] LocalPart ::= NCName
2760: *
2761: * Returns the local part, and prefix is updated
2762: * to get the Prefix if any.
2763: */
2764:
2765: xmlChar *
2766: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2767:
2768: static int deprecated = 0;
2769: if (!deprecated) {
1.7 veillard 2770: xmlGenericError(xmlGenericErrorContext,
2771: "xmlNamespaceParseQName() deprecated function reached\n");
1.1 veillard 2772: deprecated = 1;
2773: }
2774:
2775: #if 0
2776: xmlChar *ret = NULL;
2777:
2778: *prefix = NULL;
2779: ret = xmlNamespaceParseNCName(ctxt);
2780: if (RAW == ':') {
2781: *prefix = ret;
2782: NEXT;
2783: ret = xmlNamespaceParseNCName(ctxt);
2784: }
2785:
2786: return(ret);
2787: #endif
2788: return(NULL);
2789: }
2790:
2791: /**
2792: * xmlNamespaceParseNSDef:
2793: * @ctxt: an XML parser context
2794: *
2795: * parse a namespace prefix declaration
2796: *
2797: * TODO: this seems not in use anymore, the namespace handling is done on
2798: * top of the SAX interfaces, i.e. not on raw input.
2799: *
2800: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2801: *
2802: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
2803: *
2804: * Returns the namespace name
2805: */
2806:
2807: xmlChar *
2808: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
2809: static int deprecated = 0;
2810: if (!deprecated) {
1.7 veillard 2811: xmlGenericError(xmlGenericErrorContext,
2812: "xmlNamespaceParseNSDef() deprecated function reached\n");
1.1 veillard 2813: deprecated = 1;
2814: }
2815: return(NULL);
2816: #if 0
2817: xmlChar *name = NULL;
2818:
2819: if ((RAW == 'x') && (NXT(1) == 'm') &&
2820: (NXT(2) == 'l') && (NXT(3) == 'n') &&
2821: (NXT(4) == 's')) {
2822: SKIP(5);
2823: if (RAW == ':') {
2824: NEXT;
2825: name = xmlNamespaceParseNCName(ctxt);
2826: }
2827: }
2828: return(name);
2829: #endif
2830: }
2831:
2832: /**
2833: * xmlParseQuotedString:
2834: * @ctxt: an XML parser context
2835: *
2836: * Parse and return a string between quotes or doublequotes
2837: *
2838: * TODO: Deprecated, to be removed at next drop of binary compatibility
2839: *
2840: * Returns the string parser or NULL.
2841: */
2842: xmlChar *
2843: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
2844: static int deprecated = 0;
2845: if (!deprecated) {
1.7 veillard 2846: xmlGenericError(xmlGenericErrorContext,
2847: "xmlParseQuotedString() deprecated function reached\n");
1.1 veillard 2848: deprecated = 1;
2849: }
2850: return(NULL);
2851:
2852: #if 0
2853: xmlChar *buf = NULL;
2854: int len = 0,l;
2855: int size = XML_PARSER_BUFFER_SIZE;
2856: int c;
2857:
2858: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2859: if (buf == NULL) {
1.7 veillard 2860: xmlGenericError(xmlGenericErrorContext,
2861: "malloc of %d byte failed\n", size);
1.1 veillard 2862: return(NULL);
2863: }
1.7 veillard 2864: xmlGenericError(xmlGenericErrorContext,
2865: "xmlParseQuotedString: reached loop 4\n");
1.1 veillard 2866: if (RAW == '"') {
2867: NEXT;
2868: c = CUR_CHAR(l);
2869: while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
2870: if (len + 5 >= size) {
2871: size *= 2;
2872: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2873: if (buf == NULL) {
1.7 veillard 2874: xmlGenericError(xmlGenericErrorContext,
2875: "realloc of %d byte failed\n", size);
1.1 veillard 2876: return(NULL);
2877: }
2878: }
2879: COPY_BUF(l,buf,len,c);
2880: NEXTL(l);
2881: c = CUR_CHAR(l);
2882: }
2883: if (c != '"') {
2884: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2885: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2886: ctxt->sax->error(ctxt->userData,
2887: "String not closed \"%.50s\"\n", buf);
2888: ctxt->wellFormed = 0;
2889: ctxt->disableSAX = 1;
2890: } else {
2891: NEXT;
2892: }
2893: } else if (RAW == '\''){
2894: NEXT;
2895: c = CUR;
2896: while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
2897: if (len + 1 >= size) {
2898: size *= 2;
2899: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2900: if (buf == NULL) {
1.7 veillard 2901: xmlGenericError(xmlGenericErrorContext,
2902: "realloc of %d byte failed\n", size);
1.1 veillard 2903: return(NULL);
2904: }
2905: }
2906: buf[len++] = c;
2907: NEXT;
2908: c = CUR;
2909: }
2910: if (RAW != '\'') {
2911: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
2912: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2913: ctxt->sax->error(ctxt->userData,
2914: "String not closed \"%.50s\"\n", buf);
2915: ctxt->wellFormed = 0;
2916: ctxt->disableSAX = 1;
2917: } else {
2918: NEXT;
2919: }
2920: }
2921: return(buf);
2922: #endif
2923: }
2924:
2925: /**
2926: * xmlParseNamespace:
2927: * @ctxt: an XML parser context
2928: *
2929: * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2930: *
2931: * This is what the older xml-name Working Draft specified, a bunch of
2932: * other stuff may still rely on it, so support is still here as
2933: * if it was declared on the root of the Tree:-(
2934: *
2935: * TODO: remove from library
2936: *
2937: * To be removed at next drop of binary compatibility
2938: */
2939:
2940: void
2941: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
2942: static int deprecated = 0;
2943: if (!deprecated) {
1.7 veillard 2944: xmlGenericError(xmlGenericErrorContext,
2945: "xmlParseNamespace() deprecated function reached\n");
1.1 veillard 2946: deprecated = 1;
2947: }
2948:
2949: #if 0
2950: xmlChar *href = NULL;
2951: xmlChar *prefix = NULL;
2952: int garbage = 0;
2953:
2954: /*
2955: * We just skipped "namespace" or "xml:namespace"
2956: */
2957: SKIP_BLANKS;
2958:
1.7 veillard 2959: xmlGenericError(xmlGenericErrorContext,
2960: "xmlParseNamespace: reached loop 5\n");
1.1 veillard 2961: while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
2962: /*
2963: * We can have "ns" or "prefix" attributes
2964: * Old encoding as 'href' or 'AS' attributes is still supported
2965: */
2966: if ((RAW == 'n') && (NXT(1) == 's')) {
2967: garbage = 0;
2968: SKIP(2);
2969: SKIP_BLANKS;
2970:
2971: if (RAW != '=') continue;
2972: NEXT;
2973: SKIP_BLANKS;
2974:
2975: href = xmlParseQuotedString(ctxt);
2976: SKIP_BLANKS;
2977: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
2978: (NXT(2) == 'e') && (NXT(3) == 'f')) {
2979: garbage = 0;
2980: SKIP(4);
2981: SKIP_BLANKS;
2982:
2983: if (RAW != '=') continue;
2984: NEXT;
2985: SKIP_BLANKS;
2986:
2987: href = xmlParseQuotedString(ctxt);
2988: SKIP_BLANKS;
2989: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
2990: (NXT(2) == 'e') && (NXT(3) == 'f') &&
2991: (NXT(4) == 'i') && (NXT(5) == 'x')) {
2992: garbage = 0;
2993: SKIP(6);
2994: SKIP_BLANKS;
2995:
2996: if (RAW != '=') continue;
2997: NEXT;
2998: SKIP_BLANKS;
2999:
3000: prefix = xmlParseQuotedString(ctxt);
3001: SKIP_BLANKS;
3002: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
3003: garbage = 0;
3004: SKIP(2);
3005: SKIP_BLANKS;
3006:
3007: if (RAW != '=') continue;
3008: NEXT;
3009: SKIP_BLANKS;
3010:
3011: prefix = xmlParseQuotedString(ctxt);
3012: SKIP_BLANKS;
3013: } else if ((RAW == '?') && (NXT(1) == '>')) {
3014: garbage = 0;
3015: NEXT;
3016: } else {
3017: /*
3018: * Found garbage when parsing the namespace
3019: */
3020: if (!garbage) {
3021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3022: ctxt->sax->error(ctxt->userData,
3023: "xmlParseNamespace found garbage\n");
3024: }
3025: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
3026: ctxt->wellFormed = 0;
3027: ctxt->disableSAX = 1;
3028: NEXT;
3029: }
3030: }
3031:
3032: MOVETO_ENDTAG(CUR_PTR);
3033: NEXT;
3034:
3035: /*
3036: * Register the DTD.
3037: if (href != NULL)
3038: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
3039: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
3040: */
3041:
3042: if (prefix != NULL) xmlFree(prefix);
3043: if (href != NULL) xmlFree(href);
3044: #endif
3045: }
3046:
3047: /**
3048: * xmlScanName:
3049: * @ctxt: an XML parser context
3050: *
3051: * Trickery: parse an XML name but without consuming the input flow
3052: * Needed for rollback cases. Used only when parsing entities references.
3053: *
3054: * TODO: seems deprecated now, only used in the default part of
3055: * xmlParserHandleReference
3056: *
3057: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3058: * CombiningChar | Extender
3059: *
3060: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3061: *
3062: * [6] Names ::= Name (S Name)*
3063: *
3064: * Returns the Name parsed or NULL
3065: */
3066:
3067: xmlChar *
3068: xmlScanName(xmlParserCtxtPtr ctxt) {
3069: static int deprecated = 0;
3070: if (!deprecated) {
1.7 veillard 3071: xmlGenericError(xmlGenericErrorContext,
3072: "xmlScanName() deprecated function reached\n");
1.1 veillard 3073: deprecated = 1;
3074: }
3075: return(NULL);
3076:
3077: #if 0
3078: xmlChar buf[XML_MAX_NAMELEN];
3079: int len = 0;
3080:
3081: GROW;
3082: if (!IS_LETTER(RAW) && (RAW != '_') &&
3083: (RAW != ':')) {
3084: return(NULL);
3085: }
3086:
3087:
3088: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
3089: (NXT(len) == '.') || (NXT(len) == '-') ||
3090: (NXT(len) == '_') || (NXT(len) == ':') ||
3091: (IS_COMBINING(NXT(len))) ||
3092: (IS_EXTENDER(NXT(len)))) {
3093: GROW;
3094: buf[len] = NXT(len);
3095: len++;
3096: if (len >= XML_MAX_NAMELEN) {
1.7 veillard 3097: xmlGenericError(xmlGenericErrorContext,
1.1 veillard 3098: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3099: while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3100: (IS_DIGIT(NXT(len))) ||
3101: (NXT(len) == '.') || (NXT(len) == '-') ||
3102: (NXT(len) == '_') || (NXT(len) == ':') ||
3103: (IS_COMBINING(NXT(len))) ||
3104: (IS_EXTENDER(NXT(len))))
3105: len++;
3106: break;
3107: }
3108: }
3109: return(xmlStrndup(buf, len));
3110: #endif
3111: }
3112:
3113: /**
3114: * xmlParserHandleReference:
3115: * @ctxt: the parser context
3116: *
3117: * TODO: Remove, now deprecated ... the test is done directly in the
3118: * content parsing
3119: * routines.
3120: *
3121: * [67] Reference ::= EntityRef | CharRef
3122: *
3123: * [68] EntityRef ::= '&' Name ';'
3124: *
3125: * [ WFC: Entity Declared ]
3126: * the Name given in the entity reference must match that in an entity
3127: * declaration, except that well-formed documents need not declare any
3128: * of the following entities: amp, lt, gt, apos, quot.
3129: *
3130: * [ WFC: Parsed Entity ]
3131: * An entity reference must not contain the name of an unparsed entity
3132: *
3133: * [66] CharRef ::= '&#' [0-9]+ ';' |
3134: * '&#x' [0-9a-fA-F]+ ';'
3135: *
3136: * A PEReference may have been detectect in the current input stream
3137: * the handling is done accordingly to
3138: * http://www.w3.org/TR/REC-xml#entproc
3139: */
3140: void
3141: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
3142: static int deprecated = 0;
3143: if (!deprecated) {
1.7 veillard 3144: xmlGenericError(xmlGenericErrorContext,
3145: "xmlParserHandleReference() deprecated function reached\n");
1.1 veillard 3146: deprecated = 1;
3147: }
3148:
3149: #if 0
3150: xmlParserInputPtr input;
3151: xmlChar *name;
3152: xmlEntityPtr ent = NULL;
3153:
3154: if (ctxt->token != 0) {
3155: return;
3156: }
3157: if (RAW != '&') return;
3158: GROW;
3159: if ((RAW == '&') && (NXT(1) == '#')) {
3160: switch(ctxt->instate) {
3161: case XML_PARSER_ENTITY_DECL:
3162: case XML_PARSER_PI:
3163: case XML_PARSER_CDATA_SECTION:
3164: case XML_PARSER_COMMENT:
3165: case XML_PARSER_SYSTEM_LITERAL:
3166: /* we just ignore it there */
3167: return;
3168: case XML_PARSER_START_TAG:
3169: return;
3170: case XML_PARSER_END_TAG:
3171: return;
3172: case XML_PARSER_EOF:
3173: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
3174: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3175: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
3176: ctxt->wellFormed = 0;
3177: ctxt->disableSAX = 1;
3178: return;
3179: case XML_PARSER_PROLOG:
3180: case XML_PARSER_START:
3181: case XML_PARSER_MISC:
3182: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
3183: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3184: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
3185: ctxt->wellFormed = 0;
3186: ctxt->disableSAX = 1;
3187: return;
3188: case XML_PARSER_EPILOG:
3189: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
3190: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3191: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
3192: ctxt->wellFormed = 0;
3193: ctxt->disableSAX = 1;
3194: return;
3195: case XML_PARSER_DTD:
3196: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
3197: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3198: ctxt->sax->error(ctxt->userData,
3199: "CharRef are forbiden in DTDs!\n");
3200: ctxt->wellFormed = 0;
3201: ctxt->disableSAX = 1;
3202: return;
3203: case XML_PARSER_ENTITY_VALUE:
3204: /*
3205: * NOTE: in the case of entity values, we don't do the
3206: * substitution here since we need the literal
3207: * entity value to be able to save the internal
3208: * subset of the document.
3209: * This will be handled by xmlStringDecodeEntities
3210: */
3211: return;
3212: case XML_PARSER_CONTENT:
3213: return;
3214: case XML_PARSER_ATTRIBUTE_VALUE:
3215: /* ctxt->token = xmlParseCharRef(ctxt); */
3216: return;
1.9 veillard 3217: case XML_PARSER_IGNORE:
3218: return;
1.1 veillard 3219: }
3220: return;
3221: }
3222:
3223: switch(ctxt->instate) {
3224: case XML_PARSER_CDATA_SECTION:
3225: return;
3226: case XML_PARSER_PI:
3227: case XML_PARSER_COMMENT:
3228: case XML_PARSER_SYSTEM_LITERAL:
3229: case XML_PARSER_CONTENT:
3230: return;
3231: case XML_PARSER_START_TAG:
3232: return;
3233: case XML_PARSER_END_TAG:
3234: return;
3235: case XML_PARSER_EOF:
3236: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
3237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3238: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
3239: ctxt->wellFormed = 0;
3240: ctxt->disableSAX = 1;
3241: return;
3242: case XML_PARSER_PROLOG:
3243: case XML_PARSER_START:
3244: case XML_PARSER_MISC:
3245: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
3246: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3247: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
3248: ctxt->wellFormed = 0;
3249: ctxt->disableSAX = 1;
3250: return;
3251: case XML_PARSER_EPILOG:
3252: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
3253: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
3255: ctxt->wellFormed = 0;
3256: ctxt->disableSAX = 1;
3257: return;
3258: case XML_PARSER_ENTITY_VALUE:
3259: /*
3260: * NOTE: in the case of entity values, we don't do the
3261: * substitution here since we need the literal
3262: * entity value to be able to save the internal
3263: * subset of the document.
3264: * This will be handled by xmlStringDecodeEntities
3265: */
3266: return;
3267: case XML_PARSER_ATTRIBUTE_VALUE:
3268: /*
3269: * NOTE: in the case of attributes values, we don't do the
3270: * substitution here unless we are in a mode where
3271: * the parser is explicitely asked to substitute
3272: * entities. The SAX callback is called with values
3273: * without entity substitution.
3274: * This will then be handled by xmlStringDecodeEntities
3275: */
3276: return;
3277: case XML_PARSER_ENTITY_DECL:
3278: /*
3279: * we just ignore it there
3280: * the substitution will be done once the entity is referenced
3281: */
3282: return;
3283: case XML_PARSER_DTD:
3284: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
3285: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3286: ctxt->sax->error(ctxt->userData,
3287: "Entity references are forbiden in DTDs!\n");
3288: ctxt->wellFormed = 0;
3289: ctxt->disableSAX = 1;
1.9 veillard 3290: return;
3291: case XML_PARSER_IGNORE:
1.1 veillard 3292: return;
3293: }
3294:
3295: /* TODO: this seems not reached anymore .... Verify ... */
1.7 veillard 3296: xmlGenericError(xmlGenericErrorContext,
3297: "Reached deprecated section in xmlParserHandleReference()\n");
3298: xmlGenericError(xmlGenericErrorContext,
3299: "Please forward the document to Daniel.Veillard@w3.org\n");
3300: xmlGenericError(xmlGenericErrorContext,
3301: "indicating the version: %s, thanks !\n", xmlParserVersion);
1.1 veillard 3302: NEXT;
3303: name = xmlScanName(ctxt);
3304: if (name == NULL) {
3305: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
3306: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3307: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
3308: ctxt->wellFormed = 0;
3309: ctxt->disableSAX = 1;
3310: ctxt->token = '&';
3311: return;
3312: }
3313: if (NXT(xmlStrlen(name)) != ';') {
3314: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
3315: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3316: ctxt->sax->error(ctxt->userData,
3317: "Entity reference: ';' expected\n");
3318: ctxt->wellFormed = 0;
3319: ctxt->disableSAX = 1;
3320: ctxt->token = '&';
3321: xmlFree(name);
3322: return;
3323: }
3324: SKIP(xmlStrlen(name) + 1);
3325: if (ctxt->sax != NULL) {
3326: if (ctxt->sax->getEntity != NULL)
3327: ent = ctxt->sax->getEntity(ctxt->userData, name);
3328: }
3329:
3330: /*
3331: * [ WFC: Entity Declared ]
3332: * the Name given in the entity reference must match that in an entity
3333: * declaration, except that well-formed documents need not declare any
3334: * of the following entities: amp, lt, gt, apos, quot.
3335: */
3336: if (ent == NULL)
3337: ent = xmlGetPredefinedEntity(name);
3338: if (ent == NULL) {
3339: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
3340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3341: ctxt->sax->error(ctxt->userData,
3342: "Entity reference: entity %s not declared\n",
3343: name);
3344: ctxt->wellFormed = 0;
3345: ctxt->disableSAX = 1;
3346: xmlFree(name);
3347: return;
3348: }
3349:
3350: /*
3351: * [ WFC: Parsed Entity ]
3352: * An entity reference must not contain the name of an unparsed entity
3353: */
3354: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
3355: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
3356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3357: ctxt->sax->error(ctxt->userData,
3358: "Entity reference to unparsed entity %s\n", name);
3359: ctxt->wellFormed = 0;
3360: ctxt->disableSAX = 1;
3361: }
3362:
3363: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
3364: ctxt->token = ent->content[0];
3365: xmlFree(name);
3366: return;
3367: }
3368: input = xmlNewEntityInputStream(ctxt, ent);
3369: xmlPushInput(ctxt, input);
3370: xmlFree(name);
3371: #endif
3372: return;
3373: }
3374:
3375: /**
3376: * xmlHandleEntity:
3377: * @ctxt: an XML parser context
3378: * @entity: an XML entity pointer.
3379: *
3380: * Default handling of defined entities, when should we define a new input
3381: * stream ? When do we just handle that as a set of chars ?
3382: *
3383: * OBSOLETE: to be removed at some point.
3384: */
3385:
3386: void
3387: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
3388: static int deprecated = 0;
3389: if (!deprecated) {
1.7 veillard 3390: xmlGenericError(xmlGenericErrorContext,
3391: "xmlHandleEntity() deprecated function reached\n");
1.1 veillard 3392: deprecated = 1;
3393: }
3394:
3395: #if 0
3396: int len;
3397: xmlParserInputPtr input;
3398:
3399: if (entity->content == NULL) {
3400: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3401: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3402: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
3403: entity->name);
3404: ctxt->wellFormed = 0;
3405: ctxt->disableSAX = 1;
3406: return;
3407: }
3408: len = xmlStrlen(entity->content);
3409: if (len <= 2) goto handle_as_char;
3410:
3411: /*
3412: * Redefine its content as an input stream.
3413: */
3414: input = xmlNewEntityInputStream(ctxt, entity);
3415: xmlPushInput(ctxt, input);
3416: return;
3417:
3418: handle_as_char:
3419: /*
3420: * Just handle the content as a set of chars.
3421: */
3422: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3423: (ctxt->sax->characters != NULL))
3424: ctxt->sax->characters(ctxt->userData, entity->content, len);
3425: #endif
3426: }
3427:
1.8 veillard 3428: /**
3429: * xmlNewGlobalNs:
3430: * @doc: the document carrying the namespace
3431: * @href: the URI associated
3432: * @prefix: the prefix for the namespace
3433: *
3434: * Creation of a Namespace, the old way using PI and without scoping
3435: * DEPRECATED !!!
3436: * It now create a namespace on the root element of the document if found.
3437: * Returns NULL this functionnality had been removed
3438: */
3439: xmlNsPtr
3440: xmlNewGlobalNs(xmlDocPtr doc, const xmlChar *href, const xmlChar *prefix) {
3441: static int deprecated = 0;
3442: if (!deprecated) {
3443: xmlGenericError(xmlGenericErrorContext,
3444: "xmlNewGlobalNs() deprecated function reached\n");
3445: deprecated = 1;
3446: }
3447: return(NULL);
3448: #if 0
3449: xmlNodePtr root;
3450:
3451: xmlNsPtr cur;
3452:
3453: root = xmlDocGetRootElement(doc);
3454: if (root != NULL)
3455: return(xmlNewNs(root, href, prefix));
3456:
3457: /*
3458: * if there is no root element yet, create an old Namespace type
3459: * and it will be moved to the root at save time.
3460: */
3461: cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
3462: if (cur == NULL) {
3463: xmlGenericError(xmlGenericErrorContext,
3464: "xmlNewGlobalNs : malloc failed\n");
3465: return(NULL);
3466: }
3467: memset(cur, 0, sizeof(xmlNs));
3468: cur->type = XML_GLOBAL_NAMESPACE;
3469:
3470: if (href != NULL)
3471: cur->href = xmlStrdup(href);
3472: if (prefix != NULL)
3473: cur->prefix = xmlStrdup(prefix);
3474:
3475: /*
3476: * Add it at the end to preserve parsing order ...
3477: */
3478: if (doc != NULL) {
3479: if (doc->oldNs == NULL) {
3480: doc->oldNs = cur;
3481: } else {
3482: xmlNsPtr prev = doc->oldNs;
3483:
3484: while (prev->next != NULL) prev = prev->next;
3485: prev->next = cur;
3486: }
3487: }
3488:
3489: return(NULL);
3490: #endif
3491: }
3492:
3493: /**
3494: * xmlUpgradeOldNs:
3495: * @doc: a document pointer
3496: *
3497: * Upgrade old style Namespaces (PI) and move them to the root of the document.
3498: * DEPRECATED
3499: */
3500: void
3501: xmlUpgradeOldNs(xmlDocPtr doc) {
3502: static int deprecated = 0;
3503: if (!deprecated) {
3504: xmlGenericError(xmlGenericErrorContext,
3505: "xmlNewGlobalNs() deprecated function reached\n");
3506: deprecated = 1;
3507: }
3508: #if 0
3509: xmlNsPtr cur;
3510:
3511: if ((doc == NULL) || (doc->oldNs == NULL)) return;
3512: if (doc->children == NULL) {
3513: #ifdef DEBUG_TREE
3514: xmlGenericError(xmlGenericErrorContext,
3515: "xmlUpgradeOldNs: failed no root !\n");
3516: #endif
3517: return;
3518: }
3519:
3520: cur = doc->oldNs;
3521: while (cur->next != NULL) {
3522: cur->type = XML_LOCAL_NAMESPACE;
3523: cur = cur->next;
3524: }
3525: cur->type = XML_LOCAL_NAMESPACE;
3526: cur->next = doc->children->nsDef;
3527: doc->children->nsDef = doc->oldNs;
3528: doc->oldNs = NULL;
3529: #endif
3530: }
3531:
Webmaster