Annotation of XML/parser.c, revision 1.217
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.204 veillard 16: #include <string.h>
1.121 daniel 17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.188 daniel 36: #include <libxml/xmlmemory.h>
37: #include <libxml/tree.h>
38: #include <libxml/parser.h>
39: #include <libxml/entities.h>
40: #include <libxml/encoding.h>
41: #include <libxml/valid.h>
42: #include <libxml/parserInternals.h>
43: #include <libxml/xmlIO.h>
1.193 daniel 44: #include <libxml/uri.h>
1.122 daniel 45: #include "xml-error.h"
1.1 veillard 46:
1.140 daniel 47: #define XML_PARSER_BIG_BUFFER_SIZE 1000
48: #define XML_PARSER_BUFFER_SIZE 100
49:
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.214 veillard 65: /************************************************************************
66: * *
67: * Version and Features handling *
68: * *
69: ************************************************************************/
1.200 daniel 70: const char *xmlParserVersion = LIBXML_VERSION_STRING;
71:
72: /*
73: * xmlCheckVersion:
74: * @version: the include version number
75: *
76: * check the compiled lib version against the include one.
77: * This can warn or immediately kill the application
78: */
79: void
80: xmlCheckVersion(int version) {
1.202 daniel 81: int myversion = (int) LIBXML_VERSION;
1.200 daniel 82:
83: if ((myversion / 10000) != (version / 10000)) {
84: fprintf(stderr,
85: "Fatal: program compiled against libxml %d using libxml %d\n",
86: (version / 10000), (myversion / 10000));
87: exit(1);
88: }
89: if ((myversion / 100) < (version / 100)) {
90: fprintf(stderr,
91: "Warning: program compiled against libxml %d using older %d\n",
92: (version / 100), (myversion / 100));
93: }
94: }
95:
96:
1.214 veillard 97: const char *xmlFeaturesList[] = {
98: "validate",
99: "keep blanks",
100: "disable SAX",
101: "fetch external entities",
102: "substitute entities",
103: "gather line info",
104: "user data",
105: "is html",
106: "is standalone",
107: "stop parser",
108: "document",
109: "is well formed",
110: "is valid",
111: "SAX block",
112: "SAX function internalSubset",
113: "SAX function isStandalone",
114: "SAX function hasInternalSubset",
115: "SAX function hasExternalSubset",
116: "SAX function resolveEntity",
117: "SAX function getEntity",
118: "SAX function entityDecl",
119: "SAX function notationDecl",
120: "SAX function attributeDecl",
121: "SAX function elementDecl",
122: "SAX function unparsedEntityDecl",
123: "SAX function setDocumentLocator",
124: "SAX function startDocument",
125: "SAX function endDocument",
126: "SAX function startElement",
127: "SAX function endElement",
128: "SAX function reference",
129: "SAX function characters",
130: "SAX function ignorableWhitespace",
131: "SAX function processingInstruction",
132: "SAX function comment",
133: "SAX function warning",
134: "SAX function error",
135: "SAX function fatalError",
136: "SAX function getParameterEntity",
137: "SAX function cdataBlock",
138: "SAX function externalSubset",
139: };
140:
141: /*
142: * xmlGetFeaturesList:
143: * @len: the length of the features name array (input/output)
144: * @result: an array of string to be filled with the features name.
145: *
146: * Copy at most *@len feature names into the @result array
147: *
148: * Returns -1 in case or error, or the total number of features,
149: * len is updated with the number of strings copied,
150: * strings must not be deallocated
151: */
152: int
153: xmlGetFeaturesList(int *len, const char **result) {
154: int ret, i;
155:
156: ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
157: if ((len == NULL) || (result == NULL))
158: return(ret);
159: if ((*len < 0) || (*len >= 1000))
160: return(-1);
161: if (*len > ret)
162: *len = ret;
163: for (i = 0;i < *len;i++)
164: result[i] = xmlFeaturesList[i];
165: return(ret);
166: }
167:
168: /*
169: * xmlGetFeature:
170: * @ctxt: an XML/HTML parser context
171: * @name: the feature name
172: * @result: location to store the result
173: *
174: * Read the current value of one feature of this parser instance
175: *
176: * Returns -1 in case or error, 0 otherwise
177: */
178: int
179: xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
180: if ((ctxt == NULL) || (name == NULL) || (result == NULL))
181: return(-1);
182:
183: if (!strcmp(name, "validate")) {
184: *((int *) result) = ctxt->validate;
185: } else if (!strcmp(name, "keep blanks")) {
186: *((int *) result) = ctxt->keepBlanks;
187: } else if (!strcmp(name, "disable SAX")) {
188: *((int *) result) = ctxt->disableSAX;
189: } else if (!strcmp(name, "fetch external entities")) {
190: *((int *) result) = ctxt->validate;
191: } else if (!strcmp(name, "substitute entities")) {
192: *((int *) result) = ctxt->replaceEntities;
193: } else if (!strcmp(name, "gather line info")) {
194: *((int *) result) = ctxt->record_info;
195: } else if (!strcmp(name, "user data")) {
196: *((void **)result) = ctxt->userData;
197: } else if (!strcmp(name, "is html")) {
198: *((int *) result) = ctxt->html;
199: } else if (!strcmp(name, "is standalone")) {
200: *((int *) result) = ctxt->standalone;
201: } else if (!strcmp(name, "document")) {
202: *((xmlDocPtr *) result) = ctxt->myDoc;
203: } else if (!strcmp(name, "is well formed")) {
204: *((int *) result) = ctxt->wellFormed;
205: } else if (!strcmp(name, "is valid")) {
206: *((int *) result) = ctxt->valid;
207: } else if (!strcmp(name, "SAX block")) {
208: *((xmlSAXHandlerPtr *) result) = ctxt->sax;
209: } else if (!strcmp(name, "SAX function internalSubset")) {
210: *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
211: } else if (!strcmp(name, "SAX function isStandalone")) {
212: *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
213: } else if (!strcmp(name, "SAX function hasInternalSubset")) {
214: *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
215: } else if (!strcmp(name, "SAX function hasExternalSubset")) {
216: *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
217: } else if (!strcmp(name, "SAX function resolveEntity")) {
218: *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
219: } else if (!strcmp(name, "SAX function getEntity")) {
220: *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
221: } else if (!strcmp(name, "SAX function entityDecl")) {
222: *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
223: } else if (!strcmp(name, "SAX function notationDecl")) {
224: *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
225: } else if (!strcmp(name, "SAX function attributeDecl")) {
226: *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
227: } else if (!strcmp(name, "SAX function elementDecl")) {
228: *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
229: } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
230: *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
231: } else if (!strcmp(name, "SAX function setDocumentLocator")) {
232: *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
233: } else if (!strcmp(name, "SAX function startDocument")) {
234: *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
235: } else if (!strcmp(name, "SAX function endDocument")) {
236: *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
237: } else if (!strcmp(name, "SAX function startElement")) {
238: *((startElementSAXFunc *) result) = ctxt->sax->startElement;
239: } else if (!strcmp(name, "SAX function endElement")) {
240: *((endElementSAXFunc *) result) = ctxt->sax->endElement;
241: } else if (!strcmp(name, "SAX function reference")) {
242: *((referenceSAXFunc *) result) = ctxt->sax->reference;
243: } else if (!strcmp(name, "SAX function characters")) {
244: *((charactersSAXFunc *) result) = ctxt->sax->characters;
245: } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
246: *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
247: } else if (!strcmp(name, "SAX function processingInstruction")) {
248: *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
249: } else if (!strcmp(name, "SAX function comment")) {
250: *((commentSAXFunc *) result) = ctxt->sax->comment;
251: } else if (!strcmp(name, "SAX function warning")) {
252: *((warningSAXFunc *) result) = ctxt->sax->warning;
253: } else if (!strcmp(name, "SAX function error")) {
254: *((errorSAXFunc *) result) = ctxt->sax->error;
255: } else if (!strcmp(name, "SAX function fatalError")) {
256: *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
257: } else if (!strcmp(name, "SAX function getParameterEntity")) {
258: *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
259: } else if (!strcmp(name, "SAX function cdataBlock")) {
260: *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
261: } else if (!strcmp(name, "SAX function externalSubset")) {
262: *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
263: } else {
264: return(-1);
265: }
266: return(0);
267: }
268:
269: /*
270: * xmlSetFeature:
271: * @ctxt: an XML/HTML parser context
272: * @name: the feature name
273: * @value: pointer to the location of the new value
274: *
275: * Change the current value of one feature of this parser instance
276: *
277: * Returns -1 in case or error, 0 otherwise
278: */
279: int
280: xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
281: if ((ctxt == NULL) || (name == NULL) || (value == NULL))
282: return(-1);
283:
284: if (!strcmp(name, "validate")) {
1.215 veillard 285: ctxt->validate = *((int *) value);
1.214 veillard 286: } else if (!strcmp(name, "keep blanks")) {
1.215 veillard 287: ctxt->keepBlanks = *((int *) value);
1.214 veillard 288: } else if (!strcmp(name, "disable SAX")) {
1.215 veillard 289: ctxt->disableSAX = *((int *) value);
1.214 veillard 290: } else if (!strcmp(name, "fetch external entities")) {
1.215 veillard 291: int newvalid = *((int *) value);
292: if ((!ctxt->validate) && (newvalid != 0)) {
293: if (ctxt->vctxt.warning == NULL)
294: ctxt->vctxt.warning = xmlParserValidityWarning;
295: if (ctxt->vctxt.error == NULL)
296: ctxt->vctxt.error = xmlParserValidityError;
297: /* Allocate the Node stack */
298: ctxt->vctxt.nodeTab = (xmlNodePtr *)
299: xmlMalloc(4 * sizeof(xmlNodePtr));
300: ctxt->vctxt.nodeNr = 0;
301: ctxt->vctxt.nodeMax = 4;
302: ctxt->vctxt.node = NULL;
303: }
304: ctxt->validate = newvalid;
1.214 veillard 305: } else if (!strcmp(name, "substitute entities")) {
1.215 veillard 306: ctxt->replaceEntities = *((int *) value);
1.214 veillard 307: } else if (!strcmp(name, "gather line info")) {
1.215 veillard 308: ctxt->record_info = *((int *) value);
1.214 veillard 309: } else if (!strcmp(name, "user data")) {
1.215 veillard 310: ctxt->userData = *((void **)value);
1.214 veillard 311: } else if (!strcmp(name, "is html")) {
1.215 veillard 312: ctxt->html = *((int *) value);
1.214 veillard 313: } else if (!strcmp(name, "is standalone")) {
1.215 veillard 314: ctxt->standalone = *((int *) value);
1.214 veillard 315: } else if (!strcmp(name, "document")) {
1.215 veillard 316: ctxt->myDoc = *((xmlDocPtr *) value);
1.214 veillard 317: } else if (!strcmp(name, "is well formed")) {
1.215 veillard 318: ctxt->wellFormed = *((int *) value);
1.214 veillard 319: } else if (!strcmp(name, "is valid")) {
1.215 veillard 320: ctxt->valid = *((int *) value);
1.214 veillard 321: } else if (!strcmp(name, "SAX block")) {
1.215 veillard 322: ctxt->sax = *((xmlSAXHandlerPtr *) value);
1.214 veillard 323: } else if (!strcmp(name, "SAX function internalSubset")) {
324: ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
325: } else if (!strcmp(name, "SAX function isStandalone")) {
326: ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
327: } else if (!strcmp(name, "SAX function hasInternalSubset")) {
328: ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
329: } else if (!strcmp(name, "SAX function hasExternalSubset")) {
330: ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
331: } else if (!strcmp(name, "SAX function resolveEntity")) {
332: ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
333: } else if (!strcmp(name, "SAX function getEntity")) {
334: ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
335: } else if (!strcmp(name, "SAX function entityDecl")) {
336: ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
337: } else if (!strcmp(name, "SAX function notationDecl")) {
338: ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
339: } else if (!strcmp(name, "SAX function attributeDecl")) {
340: ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
341: } else if (!strcmp(name, "SAX function elementDecl")) {
342: ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
343: } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
344: ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
345: } else if (!strcmp(name, "SAX function setDocumentLocator")) {
346: ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
347: } else if (!strcmp(name, "SAX function startDocument")) {
348: ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
349: } else if (!strcmp(name, "SAX function endDocument")) {
350: ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
351: } else if (!strcmp(name, "SAX function startElement")) {
352: ctxt->sax->startElement = *((startElementSAXFunc *) value);
353: } else if (!strcmp(name, "SAX function endElement")) {
354: ctxt->sax->endElement = *((endElementSAXFunc *) value);
355: } else if (!strcmp(name, "SAX function reference")) {
356: ctxt->sax->reference = *((referenceSAXFunc *) value);
357: } else if (!strcmp(name, "SAX function characters")) {
358: ctxt->sax->characters = *((charactersSAXFunc *) value);
359: } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
360: ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
361: } else if (!strcmp(name, "SAX function processingInstruction")) {
362: ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
363: } else if (!strcmp(name, "SAX function comment")) {
364: ctxt->sax->comment = *((commentSAXFunc *) value);
365: } else if (!strcmp(name, "SAX function warning")) {
366: ctxt->sax->warning = *((warningSAXFunc *) value);
367: } else if (!strcmp(name, "SAX function error")) {
368: ctxt->sax->error = *((errorSAXFunc *) value);
369: } else if (!strcmp(name, "SAX function fatalError")) {
370: ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
371: } else if (!strcmp(name, "SAX function getParameterEntity")) {
372: ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
373: } else if (!strcmp(name, "SAX function cdataBlock")) {
374: ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
375: } else if (!strcmp(name, "SAX function externalSubset")) {
376: ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
377: } else {
378: return(-1);
379: }
380: return(0);
381: }
382:
383:
1.91 daniel 384: /************************************************************************
385: * *
386: * Input handling functions for progressive parsing *
387: * *
388: ************************************************************************/
389:
390: /* #define DEBUG_INPUT */
1.140 daniel 391: /* #define DEBUG_STACK */
392: /* #define DEBUG_PUSH */
393:
1.91 daniel 394:
1.110 daniel 395: #define INPUT_CHUNK 250
396: /* we need to keep enough input to show errors in context */
397: #define LINE_LEN 80
1.91 daniel 398:
399: #ifdef DEBUG_INPUT
400: #define CHECK_BUFFER(in) check_buffer(in)
401:
402: void check_buffer(xmlParserInputPtr in) {
403: if (in->base != in->buf->buffer->content) {
404: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
405: }
406: if (in->cur < in->base) {
407: fprintf(stderr, "xmlParserInput: cur < base problem\n");
408: }
409: if (in->cur > in->base + in->buf->buffer->use) {
410: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
411: }
412: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
413: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
414: in->buf->buffer->use, in->buf->buffer->size);
415: }
416:
1.110 daniel 417: #else
418: #define CHECK_BUFFER(in)
419: #endif
420:
1.91 daniel 421:
422: /**
423: * xmlParserInputRead:
424: * @in: an XML parser input
425: * @len: an indicative size for the lookahead
426: *
427: * This function refresh the input for the parser. It doesn't try to
428: * preserve pointers to the input buffer, and discard already read data
429: *
1.123 daniel 430: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 431: * end of this entity
432: */
433: int
434: xmlParserInputRead(xmlParserInputPtr in, int len) {
435: int ret;
436: int used;
437: int index;
438:
439: #ifdef DEBUG_INPUT
440: fprintf(stderr, "Read\n");
441: #endif
442: if (in->buf == NULL) return(-1);
443: if (in->base == NULL) return(-1);
444: if (in->cur == NULL) return(-1);
445: if (in->buf->buffer == NULL) return(-1);
446:
447: CHECK_BUFFER(in);
448:
449: used = in->cur - in->buf->buffer->content;
450: ret = xmlBufferShrink(in->buf->buffer, used);
451: if (ret > 0) {
452: in->cur -= ret;
453: in->consumed += ret;
454: }
455: ret = xmlParserInputBufferRead(in->buf, len);
456: if (in->base != in->buf->buffer->content) {
457: /*
458: * the buffer has been realloced
459: */
460: index = in->cur - in->base;
461: in->base = in->buf->buffer->content;
462: in->cur = &in->buf->buffer->content[index];
463: }
464:
465: CHECK_BUFFER(in);
466:
467: return(ret);
468: }
469:
470: /**
471: * xmlParserInputGrow:
472: * @in: an XML parser input
473: * @len: an indicative size for the lookahead
474: *
475: * This function increase the input for the parser. It tries to
476: * preserve pointers to the input buffer, and keep already read data
477: *
1.123 daniel 478: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 479: * end of this entity
480: */
481: int
482: xmlParserInputGrow(xmlParserInputPtr in, int len) {
483: int ret;
484: int index;
485:
486: #ifdef DEBUG_INPUT
487: fprintf(stderr, "Grow\n");
488: #endif
489: if (in->buf == NULL) return(-1);
490: if (in->base == NULL) return(-1);
491: if (in->cur == NULL) return(-1);
492: if (in->buf->buffer == NULL) return(-1);
493:
494: CHECK_BUFFER(in);
495:
496: index = in->cur - in->base;
1.202 daniel 497: if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
1.91 daniel 498:
499: CHECK_BUFFER(in);
500:
501: return(0);
502: }
1.189 daniel 503: if (in->buf->readcallback != NULL)
1.140 daniel 504: ret = xmlParserInputBufferGrow(in->buf, len);
505: else
506: return(0);
1.135 daniel 507:
508: /*
509: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
510: * block, but we use it really as an integer to do some
511: * pointer arithmetic. Insure will raise it as a bug but in
512: * that specific case, that's not !
513: */
1.91 daniel 514: if (in->base != in->buf->buffer->content) {
515: /*
516: * the buffer has been realloced
517: */
518: index = in->cur - in->base;
519: in->base = in->buf->buffer->content;
520: in->cur = &in->buf->buffer->content[index];
521: }
522:
523: CHECK_BUFFER(in);
524:
525: return(ret);
526: }
527:
528: /**
529: * xmlParserInputShrink:
530: * @in: an XML parser input
531: *
532: * This function removes used input for the parser.
533: */
534: void
535: xmlParserInputShrink(xmlParserInputPtr in) {
536: int used;
537: int ret;
538: int index;
539:
540: #ifdef DEBUG_INPUT
541: fprintf(stderr, "Shrink\n");
542: #endif
543: if (in->buf == NULL) return;
544: if (in->base == NULL) return;
545: if (in->cur == NULL) return;
546: if (in->buf->buffer == NULL) return;
547:
548: CHECK_BUFFER(in);
549:
550: used = in->cur - in->buf->buffer->content;
551: if (used > INPUT_CHUNK) {
1.110 daniel 552: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 553: if (ret > 0) {
554: in->cur -= ret;
555: in->consumed += ret;
556: }
557: }
558:
559: CHECK_BUFFER(in);
560:
561: if (in->buf->buffer->use > INPUT_CHUNK) {
562: return;
563: }
564: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
565: if (in->base != in->buf->buffer->content) {
566: /*
567: * the buffer has been realloced
568: */
569: index = in->cur - in->base;
570: in->base = in->buf->buffer->content;
571: in->cur = &in->buf->buffer->content[index];
572: }
573:
574: CHECK_BUFFER(in);
575: }
576:
1.45 daniel 577: /************************************************************************
578: * *
579: * Parser stacks related functions and macros *
580: * *
581: ************************************************************************/
1.79 daniel 582:
583: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 584: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 585: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 586: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
587: const xmlChar ** str);
1.79 daniel 588:
1.1 veillard 589: /*
1.40 daniel 590: * Generic function for accessing stacks in the Parser Context
1.1 veillard 591: */
592:
1.140 daniel 593: #define PUSH_AND_POP(scope, type, name) \
594: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 595: if (ctxt->name##Nr >= ctxt->name##Max) { \
596: ctxt->name##Max *= 2; \
1.204 veillard 597: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 598: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
599: if (ctxt->name##Tab == NULL) { \
1.31 daniel 600: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 601: return(0); \
1.31 daniel 602: } \
603: } \
1.40 daniel 604: ctxt->name##Tab[ctxt->name##Nr] = value; \
605: ctxt->name = value; \
606: return(ctxt->name##Nr++); \
1.31 daniel 607: } \
1.140 daniel 608: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 609: type ret; \
1.40 daniel 610: if (ctxt->name##Nr <= 0) return(0); \
611: ctxt->name##Nr--; \
1.50 daniel 612: if (ctxt->name##Nr > 0) \
613: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
614: else \
615: ctxt->name = NULL; \
1.69 daniel 616: ret = ctxt->name##Tab[ctxt->name##Nr]; \
617: ctxt->name##Tab[ctxt->name##Nr] = 0; \
618: return(ret); \
1.31 daniel 619: } \
620:
1.140 daniel 621: PUSH_AND_POP(extern, xmlParserInputPtr, input)
622: PUSH_AND_POP(extern, xmlNodePtr, node)
623: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 624:
1.176 daniel 625: int spacePush(xmlParserCtxtPtr ctxt, int val) {
626: if (ctxt->spaceNr >= ctxt->spaceMax) {
627: ctxt->spaceMax *= 2;
1.204 veillard 628: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 629: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
630: if (ctxt->spaceTab == NULL) {
631: fprintf(stderr, "realloc failed !\n");
632: return(0);
633: }
634: }
635: ctxt->spaceTab[ctxt->spaceNr] = val;
636: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
637: return(ctxt->spaceNr++);
638: }
639:
640: int spacePop(xmlParserCtxtPtr ctxt) {
641: int ret;
642: if (ctxt->spaceNr <= 0) return(0);
643: ctxt->spaceNr--;
644: if (ctxt->spaceNr > 0)
645: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
646: else
647: ctxt->space = NULL;
648: ret = ctxt->spaceTab[ctxt->spaceNr];
649: ctxt->spaceTab[ctxt->spaceNr] = -1;
650: return(ret);
651: }
652:
1.55 daniel 653: /*
654: * Macros for accessing the content. Those should be used only by the parser,
655: * and not exported.
656: *
657: * Dirty macros, i.e. one need to make assumption on the context to use them
658: *
1.123 daniel 659: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 660: * To be used with extreme caution since operations consuming
661: * characters may move the input buffer to a different location !
1.123 daniel 662: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 663: * in ISO-Latin or UTF-8.
1.151 daniel 664: * This should be used internally by the parser
1.55 daniel 665: * only to compare to ASCII values otherwise it would break when
666: * running with UTF-8 encoding.
1.123 daniel 667: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 668: * to compare on ASCII based substring.
1.123 daniel 669: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 670: * strings within the parser.
671: *
1.77 daniel 672: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 673: *
674: * NEXT Skip to the next character, this does the proper decoding
675: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 676: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 677: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 678: */
1.45 daniel 679:
1.152 daniel 680: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 681: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 682: #define NXT(val) ctxt->input->cur[(val)]
683: #define CUR_PTR ctxt->input->cur
1.154 daniel 684:
1.164 daniel 685: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
686: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 687: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
688: if ((*ctxt->input->cur == 0) && \
689: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
690: xmlPopInput(ctxt)
1.164 daniel 691:
1.97 daniel 692: #define SHRINK xmlParserInputShrink(ctxt->input); \
693: if ((*ctxt->input->cur == 0) && \
694: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
695: xmlPopInput(ctxt)
696:
697: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
698: if ((*ctxt->input->cur == 0) && \
699: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
700: xmlPopInput(ctxt)
1.55 daniel 701:
1.155 daniel 702: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 703:
1.151 daniel 704: #define NEXT xmlNextChar(ctxt);
1.154 daniel 705:
1.153 daniel 706: #define NEXTL(l) \
707: if (*(ctxt->input->cur) == '\n') { \
708: ctxt->input->line++; ctxt->input->col = 1; \
709: } else ctxt->input->col++; \
1.154 daniel 710: ctxt->token = 0; ctxt->input->cur += l; \
711: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
712: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
713:
1.152 daniel 714: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 715: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 716:
1.152 daniel 717: #define COPY_BUF(l,b,i,v) \
718: if (l == 1) b[i++] = (xmlChar) v; \
719: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 720:
721: /**
722: * xmlNextChar:
723: * @ctxt: the XML parser context
724: *
725: * Skip to the next char input char.
726: */
1.55 daniel 727:
1.151 daniel 728: void
729: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.201 daniel 730: if (ctxt->instate == XML_PARSER_EOF)
731: return;
732:
1.176 daniel 733: /*
734: * TODO: 2.11 End-of-Line Handling
735: * the literal two-character sequence "#xD#xA" or a standalone
736: * literal #xD, an XML processor must pass to the application
737: * the single character #xA.
738: */
1.151 daniel 739: if (ctxt->token != 0) ctxt->token = 0;
1.208 veillard 740: else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.151 daniel 741: if ((*ctxt->input->cur == 0) &&
742: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
743: (ctxt->instate != XML_PARSER_COMMENT)) {
744: /*
745: * If we are at the end of the current entity and
746: * the context allows it, we pop consumed entities
747: * automatically.
748: * TODO: the auto closing should be blocked in other cases
749: */
750: xmlPopInput(ctxt);
751: } else {
752: if (*(ctxt->input->cur) == '\n') {
753: ctxt->input->line++; ctxt->input->col = 1;
754: } else ctxt->input->col++;
1.198 daniel 755: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.151 daniel 756: /*
757: * We are supposed to handle UTF8, check it's valid
758: * From rfc2044: encoding of the Unicode values on UTF-8:
759: *
760: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
761: * 0000 0000-0000 007F 0xxxxxxx
762: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
763: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
764: *
1.160 daniel 765: * Check for the 0x110000 limit too
1.151 daniel 766: */
767: const unsigned char *cur = ctxt->input->cur;
768: unsigned char c;
1.91 daniel 769:
1.151 daniel 770: c = *cur;
771: if (c & 0x80) {
772: if (cur[1] == 0)
773: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
774: if ((cur[1] & 0xc0) != 0x80)
775: goto encoding_error;
776: if ((c & 0xe0) == 0xe0) {
777: unsigned int val;
778:
779: if (cur[2] == 0)
780: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
781: if ((cur[2] & 0xc0) != 0x80)
782: goto encoding_error;
783: if ((c & 0xf0) == 0xf0) {
784: if (cur[3] == 0)
785: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
786: if (((c & 0xf8) != 0xf0) ||
787: ((cur[3] & 0xc0) != 0x80))
788: goto encoding_error;
789: /* 4-byte code */
790: ctxt->input->cur += 4;
791: val = (cur[0] & 0x7) << 18;
792: val |= (cur[1] & 0x3f) << 12;
793: val |= (cur[2] & 0x3f) << 6;
794: val |= cur[3] & 0x3f;
795: } else {
796: /* 3-byte code */
797: ctxt->input->cur += 3;
798: val = (cur[0] & 0xf) << 12;
799: val |= (cur[1] & 0x3f) << 6;
800: val |= cur[2] & 0x3f;
801: }
802: if (((val > 0xd7ff) && (val < 0xe000)) ||
803: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 804: (val >= 0x110000)) {
1.151 daniel 805: if ((ctxt->sax != NULL) &&
806: (ctxt->sax->error != NULL))
807: ctxt->sax->error(ctxt->userData,
1.196 daniel 808: "Char 0x%X out of allowed range\n", val);
1.151 daniel 809: ctxt->errNo = XML_ERR_INVALID_ENCODING;
810: ctxt->wellFormed = 0;
1.180 daniel 811: ctxt->disableSAX = 1;
1.151 daniel 812: }
813: } else
814: /* 2-byte code */
815: ctxt->input->cur += 2;
816: } else
817: /* 1-byte code */
818: ctxt->input->cur++;
819: } else {
820: /*
821: * Assume it's a fixed lenght encoding (1) with
822: * a compatibke encoding for the ASCII set, since
823: * XML constructs only use < 128 chars
824: */
825: ctxt->input->cur++;
826: }
827: ctxt->nbChars++;
828: if (*ctxt->input->cur == 0)
829: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
830: }
1.208 veillard 831: } else {
832: ctxt->input->cur++;
833: ctxt->nbChars++;
834: if (*ctxt->input->cur == 0)
835: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1.151 daniel 836: }
1.207 veillard 837: if ((*ctxt->input->cur == '%') && (!ctxt->html))
838: xmlParserHandlePEReference(ctxt);
839: if ((*ctxt->input->cur == '&')&& (!ctxt->html))
840: xmlParserHandleReference(ctxt);
1.168 daniel 841: if ((*ctxt->input->cur == 0) &&
842: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
843: xmlPopInput(ctxt);
1.151 daniel 844: return;
845: encoding_error:
846: /*
847: * If we detect an UTF8 error that probably mean that the
848: * input encoding didn't get properly advertized in the
849: * declaration header. Report the error and switch the encoding
850: * to ISO-Latin-1 (if you don't like this policy, just declare the
851: * encoding !)
852: */
1.198 daniel 853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.151 daniel 854: ctxt->sax->error(ctxt->userData,
855: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 856: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
857: ctxt->input->cur[0], ctxt->input->cur[1],
858: ctxt->input->cur[2], ctxt->input->cur[3]);
859: }
1.151 daniel 860: ctxt->errNo = XML_ERR_INVALID_ENCODING;
861:
1.198 daniel 862: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.151 daniel 863: ctxt->input->cur++;
864: return;
865: }
1.42 daniel 866:
1.152 daniel 867: /**
868: * xmlCurrentChar:
869: * @ctxt: the XML parser context
870: * @len: pointer to the length of the char read
871: *
872: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 873: * bytes in the input buffer. Implement the end of line normalization:
874: * 2.11 End-of-Line Handling
875: * Wherever an external parsed entity or the literal entity value
876: * of an internal parsed entity contains either the literal two-character
877: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
878: * must pass to the application the single character #xA.
879: * This behavior can conveniently be produced by normalizing all
880: * line breaks to #xA on input, before parsing.)
1.152 daniel 881: *
882: * Returns the current char value and its lenght
883: */
884:
885: int
886: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1.201 daniel 887: if (ctxt->instate == XML_PARSER_EOF)
888: return(0);
889:
1.152 daniel 890: if (ctxt->token != 0) {
891: *len = 0;
892: return(ctxt->token);
893: }
1.198 daniel 894: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.152 daniel 895: /*
896: * We are supposed to handle UTF8, check it's valid
897: * From rfc2044: encoding of the Unicode values on UTF-8:
898: *
899: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
900: * 0000 0000-0000 007F 0xxxxxxx
901: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
902: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
903: *
1.160 daniel 904: * Check for the 0x110000 limit too
1.152 daniel 905: */
906: const unsigned char *cur = ctxt->input->cur;
907: unsigned char c;
908: unsigned int val;
909:
910: c = *cur;
911: if (c & 0x80) {
912: if (cur[1] == 0)
913: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
914: if ((cur[1] & 0xc0) != 0x80)
915: goto encoding_error;
916: if ((c & 0xe0) == 0xe0) {
917:
918: if (cur[2] == 0)
919: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
920: if ((cur[2] & 0xc0) != 0x80)
921: goto encoding_error;
922: if ((c & 0xf0) == 0xf0) {
923: if (cur[3] == 0)
924: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
925: if (((c & 0xf8) != 0xf0) ||
926: ((cur[3] & 0xc0) != 0x80))
927: goto encoding_error;
928: /* 4-byte code */
929: *len = 4;
930: val = (cur[0] & 0x7) << 18;
931: val |= (cur[1] & 0x3f) << 12;
932: val |= (cur[2] & 0x3f) << 6;
933: val |= cur[3] & 0x3f;
934: } else {
935: /* 3-byte code */
936: *len = 3;
937: val = (cur[0] & 0xf) << 12;
938: val |= (cur[1] & 0x3f) << 6;
939: val |= cur[2] & 0x3f;
940: }
941: } else {
942: /* 2-byte code */
943: *len = 2;
944: val = (cur[0] & 0x1f) << 6;
1.168 daniel 945: val |= cur[1] & 0x3f;
1.152 daniel 946: }
947: if (!IS_CHAR(val)) {
948: if ((ctxt->sax != NULL) &&
949: (ctxt->sax->error != NULL))
950: ctxt->sax->error(ctxt->userData,
1.196 daniel 951: "Char 0x%X out of allowed range\n", val);
1.152 daniel 952: ctxt->errNo = XML_ERR_INVALID_ENCODING;
953: ctxt->wellFormed = 0;
1.180 daniel 954: ctxt->disableSAX = 1;
1.152 daniel 955: }
956: return(val);
957: } else {
958: /* 1-byte code */
959: *len = 1;
1.180 daniel 960: if (*ctxt->input->cur == 0xD) {
961: if (ctxt->input->cur[1] == 0xA) {
962: ctxt->nbChars++;
963: ctxt->input->cur++;
964: }
965: return(0xA);
966: }
1.152 daniel 967: return((int) *ctxt->input->cur);
968: }
969: }
970: /*
971: * Assume it's a fixed lenght encoding (1) with
972: * a compatibke encoding for the ASCII set, since
973: * XML constructs only use < 128 chars
974: */
975: *len = 1;
1.180 daniel 976: if (*ctxt->input->cur == 0xD) {
977: if (ctxt->input->cur[1] == 0xA) {
978: ctxt->nbChars++;
979: ctxt->input->cur++;
980: }
981: return(0xA);
982: }
1.152 daniel 983: return((int) *ctxt->input->cur);
984: encoding_error:
985: /*
986: * If we detect an UTF8 error that probably mean that the
987: * input encoding didn't get properly advertized in the
988: * declaration header. Report the error and switch the encoding
989: * to ISO-Latin-1 (if you don't like this policy, just declare the
990: * encoding !)
991: */
1.198 daniel 992: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.152 daniel 993: ctxt->sax->error(ctxt->userData,
994: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 995: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
996: ctxt->input->cur[0], ctxt->input->cur[1],
997: ctxt->input->cur[2], ctxt->input->cur[3]);
998: }
1.152 daniel 999: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1000:
1.198 daniel 1001: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.152 daniel 1002: *len = 1;
1003: return((int) *ctxt->input->cur);
1004: }
1005:
1006: /**
1.162 daniel 1007: * xmlStringCurrentChar:
1008: * @ctxt: the XML parser context
1009: * @cur: pointer to the beginning of the char
1010: * @len: pointer to the length of the char read
1011: *
1012: * The current char value, if using UTF-8 this may actaully span multiple
1013: * bytes in the input buffer.
1014: *
1015: * Returns the current char value and its lenght
1016: */
1017:
1018: int
1019: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1.198 daniel 1020: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.162 daniel 1021: /*
1022: * We are supposed to handle UTF8, check it's valid
1023: * From rfc2044: encoding of the Unicode values on UTF-8:
1024: *
1025: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1026: * 0000 0000-0000 007F 0xxxxxxx
1027: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1028: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1029: *
1030: * Check for the 0x110000 limit too
1031: */
1032: unsigned char c;
1033: unsigned int val;
1034:
1035: c = *cur;
1036: if (c & 0x80) {
1037: if ((cur[1] & 0xc0) != 0x80)
1038: goto encoding_error;
1039: if ((c & 0xe0) == 0xe0) {
1040:
1041: if ((cur[2] & 0xc0) != 0x80)
1042: goto encoding_error;
1043: if ((c & 0xf0) == 0xf0) {
1044: if (((c & 0xf8) != 0xf0) ||
1045: ((cur[3] & 0xc0) != 0x80))
1046: goto encoding_error;
1047: /* 4-byte code */
1048: *len = 4;
1049: val = (cur[0] & 0x7) << 18;
1050: val |= (cur[1] & 0x3f) << 12;
1051: val |= (cur[2] & 0x3f) << 6;
1052: val |= cur[3] & 0x3f;
1053: } else {
1054: /* 3-byte code */
1055: *len = 3;
1056: val = (cur[0] & 0xf) << 12;
1057: val |= (cur[1] & 0x3f) << 6;
1058: val |= cur[2] & 0x3f;
1059: }
1060: } else {
1061: /* 2-byte code */
1062: *len = 2;
1063: val = (cur[0] & 0x1f) << 6;
1064: val |= cur[2] & 0x3f;
1065: }
1066: if (!IS_CHAR(val)) {
1067: if ((ctxt->sax != NULL) &&
1068: (ctxt->sax->error != NULL))
1069: ctxt->sax->error(ctxt->userData,
1.196 daniel 1070: "Char 0x%X out of allowed range\n", val);
1.162 daniel 1071: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1072: ctxt->wellFormed = 0;
1.180 daniel 1073: ctxt->disableSAX = 1;
1.162 daniel 1074: }
1075: return(val);
1076: } else {
1077: /* 1-byte code */
1078: *len = 1;
1079: return((int) *cur);
1080: }
1081: }
1082: /*
1083: * Assume it's a fixed lenght encoding (1) with
1084: * a compatibke encoding for the ASCII set, since
1085: * XML constructs only use < 128 chars
1086: */
1087: *len = 1;
1088: return((int) *cur);
1089: encoding_error:
1090: /*
1091: * If we detect an UTF8 error that probably mean that the
1092: * input encoding didn't get properly advertized in the
1093: * declaration header. Report the error and switch the encoding
1094: * to ISO-Latin-1 (if you don't like this policy, just declare the
1095: * encoding !)
1096: */
1.198 daniel 1097: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.162 daniel 1098: ctxt->sax->error(ctxt->userData,
1099: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 1100: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1101: ctxt->input->cur[0], ctxt->input->cur[1],
1102: ctxt->input->cur[2], ctxt->input->cur[3]);
1103: }
1.162 daniel 1104: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1105:
1106: *len = 1;
1107: return((int) *cur);
1108: }
1109:
1110: /**
1.152 daniel 1111: * xmlCopyChar:
1112: * @len: pointer to the length of the char read (or zero)
1113: * @array: pointer to an arry of xmlChar
1114: * @val: the char value
1115: *
1116: * append the char value in the array
1117: *
1118: * Returns the number of xmlChar written
1119: */
1120:
1121: int
1122: xmlCopyChar(int len, xmlChar *out, int val) {
1123: /*
1124: * We are supposed to handle UTF8, check it's valid
1125: * From rfc2044: encoding of the Unicode values on UTF-8:
1126: *
1127: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1128: * 0000 0000-0000 007F 0xxxxxxx
1129: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1130: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1131: */
1132: if (len == 0) {
1133: if (val < 0) len = 0;
1.160 daniel 1134: else if (val < 0x80) len = 1;
1135: else if (val < 0x800) len = 2;
1136: else if (val < 0x10000) len = 3;
1137: else if (val < 0x110000) len = 4;
1.152 daniel 1138: if (len == 0) {
1139: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
1140: val);
1141: return(0);
1142: }
1143: }
1144: if (len > 1) {
1145: int bits;
1146:
1147: if (val < 0x80) { *out++= val; bits= -6; }
1148: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1149: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1150: else { *out++= (val >> 18) | 0xF0; bits= 12; }
1151:
1152: for ( ; bits >= 0; bits-= 6)
1153: *out++= ((val >> bits) & 0x3F) | 0x80 ;
1154:
1155: return(len);
1156: }
1157: *out = (xmlChar) val;
1158: return(1);
1.155 daniel 1159: }
1160:
1161: /**
1162: * xmlSkipBlankChars:
1163: * @ctxt: the XML parser context
1164: *
1165: * skip all blanks character found at that point in the input streams.
1166: * It pops up finished entities in the process if allowable at that point.
1167: *
1168: * Returns the number of space chars skipped
1169: */
1170:
1171: int
1172: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1173: int cur, res = 0;
1174:
1175: do {
1176: cur = CUR;
1177: while (IS_BLANK(cur)) {
1178: NEXT;
1179: cur = CUR;
1180: res++;
1181: }
1182: while ((cur == 0) && (ctxt->inputNr > 1) &&
1183: (ctxt->instate != XML_PARSER_COMMENT)) {
1184: xmlPopInput(ctxt);
1185: cur = CUR;
1186: }
1187: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1188: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1189: } while (IS_BLANK(cur));
1190: return(res);
1.152 daniel 1191: }
1192:
1.97 daniel 1193: /************************************************************************
1194: * *
1195: * Commodity functions to handle entities processing *
1196: * *
1197: ************************************************************************/
1.40 daniel 1198:
1.50 daniel 1199: /**
1200: * xmlPopInput:
1201: * @ctxt: an XML parser context
1202: *
1.40 daniel 1203: * xmlPopInput: the current input pointed by ctxt->input came to an end
1204: * pop it and return the next char.
1.45 daniel 1205: *
1.123 daniel 1206: * Returns the current xmlChar in the parser context
1.40 daniel 1207: */
1.123 daniel 1208: xmlChar
1.55 daniel 1209: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 1210: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 1211: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 1212: if ((*ctxt->input->cur == 0) &&
1213: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1214: return(xmlPopInput(ctxt));
1.40 daniel 1215: return(CUR);
1216: }
1217:
1.50 daniel 1218: /**
1219: * xmlPushInput:
1220: * @ctxt: an XML parser context
1221: * @input: an XML parser input fragment (entity, XML fragment ...).
1222: *
1.40 daniel 1223: * xmlPushInput: switch to a new input stream which is stacked on top
1224: * of the previous one(s).
1225: */
1.55 daniel 1226: void
1227: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 1228: if (input == NULL) return;
1229: inputPush(ctxt, input);
1.164 daniel 1230: GROW;
1.40 daniel 1231: }
1232:
1.50 daniel 1233: /**
1.69 daniel 1234: * xmlFreeInputStream:
1.127 daniel 1235: * @input: an xmlParserInputPtr
1.69 daniel 1236: *
1237: * Free up an input stream.
1238: */
1239: void
1240: xmlFreeInputStream(xmlParserInputPtr input) {
1241: if (input == NULL) return;
1242:
1.119 daniel 1243: if (input->filename != NULL) xmlFree((char *) input->filename);
1244: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 1245: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 1246: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 1247: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 1248: input->free((xmlChar *) input->base);
1.93 veillard 1249: if (input->buf != NULL)
1250: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 1251: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 1252: xmlFree(input);
1.69 daniel 1253: }
1254:
1255: /**
1.96 daniel 1256: * xmlNewInputStream:
1257: * @ctxt: an XML parser context
1258: *
1259: * Create a new input stream structure
1260: * Returns the new input stream or NULL
1261: */
1262: xmlParserInputPtr
1263: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1264: xmlParserInputPtr input;
1265:
1.119 daniel 1266: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 1267: if (input == NULL) {
1.190 daniel 1268: if (ctxt != NULL) {
1269: ctxt->errNo = XML_ERR_NO_MEMORY;
1270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1271: ctxt->sax->error(ctxt->userData,
1272: "malloc: couldn't allocate a new input stream\n");
1273: ctxt->errNo = XML_ERR_NO_MEMORY;
1274: }
1.96 daniel 1275: return(NULL);
1276: }
1.165 daniel 1277: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 1278: input->line = 1;
1279: input->col = 1;
1.167 daniel 1280: input->standalone = -1;
1.96 daniel 1281: return(input);
1282: }
1283:
1284: /**
1.190 daniel 1285: * xmlNewIOInputStream:
1286: * @ctxt: an XML parser context
1287: * @input: an I/O Input
1288: * @enc: the charset encoding if known
1289: *
1290: * Create a new input stream structure encapsulating the @input into
1291: * a stream suitable for the parser.
1292: *
1293: * Returns the new input stream or NULL
1294: */
1295: xmlParserInputPtr
1296: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1297: xmlCharEncoding enc) {
1298: xmlParserInputPtr inputStream;
1299:
1300: inputStream = xmlNewInputStream(ctxt);
1301: if (inputStream == NULL) {
1302: return(NULL);
1303: }
1304: inputStream->filename = NULL;
1305: inputStream->buf = input;
1306: inputStream->base = inputStream->buf->buffer->content;
1307: inputStream->cur = inputStream->buf->buffer->content;
1308: if (enc != XML_CHAR_ENCODING_NONE) {
1309: xmlSwitchEncoding(ctxt, enc);
1310: }
1311:
1312: return(inputStream);
1313: }
1314:
1315: /**
1.50 daniel 1316: * xmlNewEntityInputStream:
1317: * @ctxt: an XML parser context
1318: * @entity: an Entity pointer
1319: *
1.82 daniel 1320: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 1321: *
1322: * Returns the new input stream or NULL
1.45 daniel 1323: */
1.50 daniel 1324: xmlParserInputPtr
1325: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1326: xmlParserInputPtr input;
1327:
1328: if (entity == NULL) {
1.123 daniel 1329: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 1330: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1331: ctxt->sax->error(ctxt->userData,
1.45 daniel 1332: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 1333: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 1334: return(NULL);
1.45 daniel 1335: }
1336: if (entity->content == NULL) {
1.159 daniel 1337: switch (entity->etype) {
1.113 daniel 1338: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 1339: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 1340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1341: ctxt->sax->error(ctxt->userData,
1342: "xmlNewEntityInputStream unparsed entity !\n");
1343: break;
1344: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1345: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 1346: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 1347: (char *) entity->ExternalID, ctxt));
1.113 daniel 1348: case XML_INTERNAL_GENERAL_ENTITY:
1349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1350: ctxt->sax->error(ctxt->userData,
1351: "Internal entity %s without content !\n", entity->name);
1352: break;
1353: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 1354: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1356: ctxt->sax->error(ctxt->userData,
1357: "Internal parameter entity %s without content !\n", entity->name);
1358: break;
1359: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 1360: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1361: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1362: ctxt->sax->error(ctxt->userData,
1363: "Predefined entity %s without content !\n", entity->name);
1364: break;
1365: }
1.50 daniel 1366: return(NULL);
1.45 daniel 1367: }
1.96 daniel 1368: input = xmlNewInputStream(ctxt);
1.45 daniel 1369: if (input == NULL) {
1.50 daniel 1370: return(NULL);
1.45 daniel 1371: }
1.156 daniel 1372: input->filename = (char *) entity->SystemID;
1.45 daniel 1373: input->base = entity->content;
1374: input->cur = entity->content;
1.140 daniel 1375: input->length = entity->length;
1.50 daniel 1376: return(input);
1.45 daniel 1377: }
1378:
1.59 daniel 1379: /**
1380: * xmlNewStringInputStream:
1381: * @ctxt: an XML parser context
1.96 daniel 1382: * @buffer: an memory buffer
1.59 daniel 1383: *
1384: * Create a new input stream based on a memory buffer.
1.68 daniel 1385: * Returns the new input stream
1.59 daniel 1386: */
1387: xmlParserInputPtr
1.123 daniel 1388: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1389: xmlParserInputPtr input;
1390:
1.96 daniel 1391: if (buffer == NULL) {
1.123 daniel 1392: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1394: ctxt->sax->error(ctxt->userData,
1.59 daniel 1395: "internal: xmlNewStringInputStream string = NULL\n");
1396: return(NULL);
1397: }
1.96 daniel 1398: input = xmlNewInputStream(ctxt);
1.59 daniel 1399: if (input == NULL) {
1400: return(NULL);
1401: }
1.96 daniel 1402: input->base = buffer;
1403: input->cur = buffer;
1.140 daniel 1404: input->length = xmlStrlen(buffer);
1.59 daniel 1405: return(input);
1406: }
1407:
1.76 daniel 1408: /**
1409: * xmlNewInputFromFile:
1410: * @ctxt: an XML parser context
1411: * @filename: the filename to use as entity
1412: *
1413: * Create a new input stream based on a file.
1414: *
1415: * Returns the new input stream or NULL in case of error
1416: */
1417: xmlParserInputPtr
1.79 daniel 1418: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1419: xmlParserInputBufferPtr buf;
1.76 daniel 1420: xmlParserInputPtr inputStream;
1.111 daniel 1421: char *directory = NULL;
1.76 daniel 1422:
1.96 daniel 1423: if (ctxt == NULL) return(NULL);
1.91 daniel 1424: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1425: if (buf == NULL) {
1.140 daniel 1426: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1427:
1.94 daniel 1428: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1429: #ifdef WIN32
1430: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1431: #else
1432: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1433: #endif
1434: buf = xmlParserInputBufferCreateFilename(name,
1435: XML_CHAR_ENCODING_NONE);
1.106 daniel 1436: if (buf != NULL)
1.142 daniel 1437: directory = xmlParserGetDirectory(name);
1.106 daniel 1438: }
1439: if ((buf == NULL) && (ctxt->directory != NULL)) {
1440: #ifdef WIN32
1441: sprintf(name, "%s\\%s", ctxt->directory, filename);
1442: #else
1443: sprintf(name, "%s/%s", ctxt->directory, filename);
1444: #endif
1445: buf = xmlParserInputBufferCreateFilename(name,
1446: XML_CHAR_ENCODING_NONE);
1447: if (buf != NULL)
1.142 daniel 1448: directory = xmlParserGetDirectory(name);
1.106 daniel 1449: }
1450: if (buf == NULL)
1.94 daniel 1451: return(NULL);
1452: }
1453: if (directory == NULL)
1454: directory = xmlParserGetDirectory(filename);
1.76 daniel 1455:
1.96 daniel 1456: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1457: if (inputStream == NULL) {
1.119 daniel 1458: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1459: return(NULL);
1460: }
1461:
1.119 daniel 1462: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1463: inputStream->directory = directory;
1.91 daniel 1464: inputStream->buf = buf;
1.76 daniel 1465:
1.91 daniel 1466: inputStream->base = inputStream->buf->buffer->content;
1467: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1468: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1469: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1470: return(inputStream);
1471: }
1472:
1.77 daniel 1473: /************************************************************************
1474: * *
1.97 daniel 1475: * Commodity functions to handle parser contexts *
1476: * *
1477: ************************************************************************/
1478:
1479: /**
1480: * xmlInitParserCtxt:
1481: * @ctxt: an XML parser context
1482: *
1483: * Initialize a parser context
1484: */
1485:
1486: void
1487: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1488: {
1489: xmlSAXHandler *sax;
1490:
1.168 daniel 1491: xmlDefaultSAXHandlerInit();
1492:
1.119 daniel 1493: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1494: if (sax == NULL) {
1495: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1496: }
1.180 daniel 1497: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1498:
1499: /* Allocate the Input stack */
1.119 daniel 1500: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1501: ctxt->inputNr = 0;
1502: ctxt->inputMax = 5;
1503: ctxt->input = NULL;
1.165 daniel 1504:
1.97 daniel 1505: ctxt->version = NULL;
1506: ctxt->encoding = NULL;
1507: ctxt->standalone = -1;
1.98 daniel 1508: ctxt->hasExternalSubset = 0;
1509: ctxt->hasPErefs = 0;
1.97 daniel 1510: ctxt->html = 0;
1.98 daniel 1511: ctxt->external = 0;
1.140 daniel 1512: ctxt->instate = XML_PARSER_START;
1.97 daniel 1513: ctxt->token = 0;
1.106 daniel 1514: ctxt->directory = NULL;
1.97 daniel 1515:
1516: /* Allocate the Node stack */
1.119 daniel 1517: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1518: ctxt->nodeNr = 0;
1519: ctxt->nodeMax = 10;
1520: ctxt->node = NULL;
1521:
1.140 daniel 1522: /* Allocate the Name stack */
1523: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1524: ctxt->nameNr = 0;
1525: ctxt->nameMax = 10;
1526: ctxt->name = NULL;
1527:
1.176 daniel 1528: /* Allocate the space stack */
1529: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1530: ctxt->spaceNr = 1;
1531: ctxt->spaceMax = 10;
1532: ctxt->spaceTab[0] = -1;
1533: ctxt->space = &ctxt->spaceTab[0];
1534:
1.160 daniel 1535: if (sax == NULL) {
1536: ctxt->sax = &xmlDefaultSAXHandler;
1537: } else {
1.97 daniel 1538: ctxt->sax = sax;
1539: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1540: }
1541: ctxt->userData = ctxt;
1542: ctxt->myDoc = NULL;
1543: ctxt->wellFormed = 1;
1.99 daniel 1544: ctxt->valid = 1;
1.100 daniel 1545: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1546: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1547: ctxt->vctxt.userData = ctxt;
1.149 daniel 1548: if (ctxt->validate) {
1549: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1550: if (xmlGetWarningsDefaultValue == 0)
1551: ctxt->vctxt.warning = NULL;
1552: else
1553: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1554: /* Allocate the Node stack */
1555: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1556: ctxt->vctxt.nodeNr = 0;
1557: ctxt->vctxt.nodeMax = 4;
1558: ctxt->vctxt.node = NULL;
1.149 daniel 1559: } else {
1560: ctxt->vctxt.error = NULL;
1561: ctxt->vctxt.warning = NULL;
1562: }
1.97 daniel 1563: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1564: ctxt->record_info = 0;
1.135 daniel 1565: ctxt->nbChars = 0;
1.140 daniel 1566: ctxt->checkIndex = 0;
1.180 daniel 1567: ctxt->inSubset = 0;
1.140 daniel 1568: ctxt->errNo = XML_ERR_OK;
1.185 daniel 1569: ctxt->depth = 0;
1.198 daniel 1570: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.97 daniel 1571: xmlInitNodeInfoSeq(&ctxt->node_seq);
1572: }
1573:
1574: /**
1575: * xmlFreeParserCtxt:
1576: * @ctxt: an XML parser context
1577: *
1578: * Free all the memory used by a parser context. However the parsed
1579: * document in ctxt->myDoc is not freed.
1580: */
1581:
1582: void
1583: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1584: {
1585: xmlParserInputPtr input;
1.140 daniel 1586: xmlChar *oldname;
1.97 daniel 1587:
1588: if (ctxt == NULL) return;
1589:
1590: while ((input = inputPop(ctxt)) != NULL) {
1591: xmlFreeInputStream(input);
1592: }
1.140 daniel 1593: while ((oldname = namePop(ctxt)) != NULL) {
1594: xmlFree(oldname);
1595: }
1.176 daniel 1596: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1597: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1598: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1599: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1600: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1601: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1602: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1603: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1604: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1605: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1606: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1607: xmlFree(ctxt->sax);
1608: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1609: xmlFree(ctxt);
1.97 daniel 1610: }
1611:
1612: /**
1613: * xmlNewParserCtxt:
1614: *
1615: * Allocate and initialize a new parser context.
1616: *
1617: * Returns the xmlParserCtxtPtr or NULL
1618: */
1619:
1620: xmlParserCtxtPtr
1621: xmlNewParserCtxt()
1622: {
1623: xmlParserCtxtPtr ctxt;
1624:
1.119 daniel 1625: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1626: if (ctxt == NULL) {
1627: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1628: perror("malloc");
1629: return(NULL);
1630: }
1.165 daniel 1631: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1632: xmlInitParserCtxt(ctxt);
1633: return(ctxt);
1634: }
1635:
1636: /**
1637: * xmlClearParserCtxt:
1638: * @ctxt: an XML parser context
1639: *
1640: * Clear (release owned resources) and reinitialize a parser context
1641: */
1642:
1643: void
1644: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1645: {
1646: xmlClearNodeInfoSeq(&ctxt->node_seq);
1647: xmlInitParserCtxt(ctxt);
1648: }
1649:
1650: /************************************************************************
1651: * *
1.77 daniel 1652: * Commodity functions to handle entities *
1653: * *
1654: ************************************************************************/
1655:
1.174 daniel 1656: /**
1657: * xmlCheckEntity:
1658: * @ctxt: an XML parser context
1659: * @content: the entity content string
1660: *
1661: * Parse an entity content and checks the WF constraints
1662: *
1663: */
1664:
1665: void
1666: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1667: }
1.97 daniel 1668:
1669: /**
1670: * xmlParseCharRef:
1671: * @ctxt: an XML parser context
1672: *
1673: * parse Reference declarations
1674: *
1675: * [66] CharRef ::= '&#' [0-9]+ ';' |
1676: * '&#x' [0-9a-fA-F]+ ';'
1677: *
1.98 daniel 1678: * [ WFC: Legal Character ]
1679: * Characters referred to using character references must match the
1680: * production for Char.
1681: *
1.135 daniel 1682: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1683: */
1.97 daniel 1684: int
1685: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1686: int val = 0;
1687:
1.111 daniel 1688: if (ctxt->token != 0) {
1689: val = ctxt->token;
1690: ctxt->token = 0;
1691: return(val);
1692: }
1.152 daniel 1693: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1694: (NXT(2) == 'x')) {
1695: SKIP(3);
1.152 daniel 1696: while (RAW != ';') {
1697: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1698: val = val * 16 + (CUR - '0');
1.152 daniel 1699: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1700: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1701: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1702: val = val * 16 + (CUR - 'A') + 10;
1703: else {
1.123 daniel 1704: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1705: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1706: ctxt->sax->error(ctxt->userData,
1707: "xmlParseCharRef: invalid hexadecimal value\n");
1708: ctxt->wellFormed = 0;
1.180 daniel 1709: ctxt->disableSAX = 1;
1.97 daniel 1710: val = 0;
1711: break;
1712: }
1713: NEXT;
1714: }
1.164 daniel 1715: if (RAW == ';') {
1716: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1717: ctxt->nbChars ++;
1718: ctxt->input->cur++;
1719: }
1.152 daniel 1720: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1721: SKIP(2);
1.152 daniel 1722: while (RAW != ';') {
1723: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1724: val = val * 10 + (CUR - '0');
1725: else {
1.123 daniel 1726: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1727: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1728: ctxt->sax->error(ctxt->userData,
1729: "xmlParseCharRef: invalid decimal value\n");
1730: ctxt->wellFormed = 0;
1.180 daniel 1731: ctxt->disableSAX = 1;
1.97 daniel 1732: val = 0;
1733: break;
1734: }
1735: NEXT;
1736: }
1.164 daniel 1737: if (RAW == ';') {
1738: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1739: ctxt->nbChars ++;
1740: ctxt->input->cur++;
1741: }
1.97 daniel 1742: } else {
1.123 daniel 1743: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1744: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1745: ctxt->sax->error(ctxt->userData,
1746: "xmlParseCharRef: invalid value\n");
1.97 daniel 1747: ctxt->wellFormed = 0;
1.180 daniel 1748: ctxt->disableSAX = 1;
1.97 daniel 1749: }
1.98 daniel 1750:
1.97 daniel 1751: /*
1.98 daniel 1752: * [ WFC: Legal Character ]
1753: * Characters referred to using character references must match the
1754: * production for Char.
1.97 daniel 1755: */
1756: if (IS_CHAR(val)) {
1757: return(val);
1758: } else {
1.123 daniel 1759: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1760: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1761: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1762: val);
1763: ctxt->wellFormed = 0;
1.180 daniel 1764: ctxt->disableSAX = 1;
1.97 daniel 1765: }
1766: return(0);
1.77 daniel 1767: }
1768:
1.96 daniel 1769: /**
1.135 daniel 1770: * xmlParseStringCharRef:
1771: * @ctxt: an XML parser context
1772: * @str: a pointer to an index in the string
1773: *
1774: * parse Reference declarations, variant parsing from a string rather
1775: * than an an input flow.
1776: *
1777: * [66] CharRef ::= '&#' [0-9]+ ';' |
1778: * '&#x' [0-9a-fA-F]+ ';'
1779: *
1780: * [ WFC: Legal Character ]
1781: * Characters referred to using character references must match the
1782: * production for Char.
1783: *
1784: * Returns the value parsed (as an int), 0 in case of error, str will be
1785: * updated to the current value of the index
1786: */
1787: int
1788: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1789: const xmlChar *ptr;
1790: xmlChar cur;
1791: int val = 0;
1792:
1793: if ((str == NULL) || (*str == NULL)) return(0);
1794: ptr = *str;
1795: cur = *ptr;
1.137 daniel 1796: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1797: ptr += 3;
1798: cur = *ptr;
1799: while (cur != ';') {
1800: if ((cur >= '0') && (cur <= '9'))
1801: val = val * 16 + (cur - '0');
1802: else if ((cur >= 'a') && (cur <= 'f'))
1803: val = val * 16 + (cur - 'a') + 10;
1804: else if ((cur >= 'A') && (cur <= 'F'))
1805: val = val * 16 + (cur - 'A') + 10;
1806: else {
1807: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1808: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1809: ctxt->sax->error(ctxt->userData,
1.198 daniel 1810: "xmlParseStringCharRef: invalid hexadecimal value\n");
1.135 daniel 1811: ctxt->wellFormed = 0;
1.180 daniel 1812: ctxt->disableSAX = 1;
1.135 daniel 1813: val = 0;
1814: break;
1815: }
1816: ptr++;
1817: cur = *ptr;
1818: }
1819: if (cur == ';')
1820: ptr++;
1.145 daniel 1821: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1822: ptr += 2;
1823: cur = *ptr;
1824: while (cur != ';') {
1825: if ((cur >= '0') && (cur <= '9'))
1826: val = val * 10 + (cur - '0');
1827: else {
1828: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1829: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1830: ctxt->sax->error(ctxt->userData,
1.198 daniel 1831: "xmlParseStringCharRef: invalid decimal value\n");
1.135 daniel 1832: ctxt->wellFormed = 0;
1.180 daniel 1833: ctxt->disableSAX = 1;
1.135 daniel 1834: val = 0;
1835: break;
1836: }
1837: ptr++;
1838: cur = *ptr;
1839: }
1840: if (cur == ';')
1841: ptr++;
1842: } else {
1843: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1844: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1845: ctxt->sax->error(ctxt->userData,
1846: "xmlParseCharRef: invalid value\n");
1847: ctxt->wellFormed = 0;
1.180 daniel 1848: ctxt->disableSAX = 1;
1.135 daniel 1849: return(0);
1850: }
1851: *str = ptr;
1852:
1853: /*
1854: * [ WFC: Legal Character ]
1855: * Characters referred to using character references must match the
1856: * production for Char.
1857: */
1858: if (IS_CHAR(val)) {
1859: return(val);
1860: } else {
1861: ctxt->errNo = XML_ERR_INVALID_CHAR;
1862: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1863: ctxt->sax->error(ctxt->userData,
1864: "CharRef: invalid xmlChar value %d\n", val);
1865: ctxt->wellFormed = 0;
1.180 daniel 1866: ctxt->disableSAX = 1;
1.135 daniel 1867: }
1868: return(0);
1869: }
1870:
1871: /**
1.96 daniel 1872: * xmlParserHandleReference:
1873: * @ctxt: the parser context
1874: *
1.97 daniel 1875: * [67] Reference ::= EntityRef | CharRef
1876: *
1.96 daniel 1877: * [68] EntityRef ::= '&' Name ';'
1878: *
1.98 daniel 1879: * [ WFC: Entity Declared ]
1880: * the Name given in the entity reference must match that in an entity
1881: * declaration, except that well-formed documents need not declare any
1882: * of the following entities: amp, lt, gt, apos, quot.
1883: *
1884: * [ WFC: Parsed Entity ]
1885: * An entity reference must not contain the name of an unparsed entity
1886: *
1.97 daniel 1887: * [66] CharRef ::= '&#' [0-9]+ ';' |
1888: * '&#x' [0-9a-fA-F]+ ';'
1889: *
1.96 daniel 1890: * A PEReference may have been detectect in the current input stream
1891: * the handling is done accordingly to
1892: * http://www.w3.org/TR/REC-xml#entproc
1893: */
1894: void
1895: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1896: xmlParserInputPtr input;
1.123 daniel 1897: xmlChar *name;
1.97 daniel 1898: xmlEntityPtr ent = NULL;
1899:
1.126 daniel 1900: if (ctxt->token != 0) {
1901: return;
1902: }
1.152 daniel 1903: if (RAW != '&') return;
1.97 daniel 1904: GROW;
1.152 daniel 1905: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1906: switch(ctxt->instate) {
1.140 daniel 1907: case XML_PARSER_ENTITY_DECL:
1908: case XML_PARSER_PI:
1.109 daniel 1909: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1910: case XML_PARSER_COMMENT:
1.168 daniel 1911: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1912: /* we just ignore it there */
1913: return;
1914: case XML_PARSER_START_TAG:
1.109 daniel 1915: return;
1.140 daniel 1916: case XML_PARSER_END_TAG:
1.97 daniel 1917: return;
1918: case XML_PARSER_EOF:
1.123 daniel 1919: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1920: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1921: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1922: ctxt->wellFormed = 0;
1.180 daniel 1923: ctxt->disableSAX = 1;
1.97 daniel 1924: return;
1925: case XML_PARSER_PROLOG:
1.140 daniel 1926: case XML_PARSER_START:
1927: case XML_PARSER_MISC:
1.123 daniel 1928: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1929: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1930: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1931: ctxt->wellFormed = 0;
1.180 daniel 1932: ctxt->disableSAX = 1;
1.97 daniel 1933: return;
1934: case XML_PARSER_EPILOG:
1.123 daniel 1935: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1936: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1937: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1938: ctxt->wellFormed = 0;
1.180 daniel 1939: ctxt->disableSAX = 1;
1.97 daniel 1940: return;
1941: case XML_PARSER_DTD:
1.123 daniel 1942: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1944: ctxt->sax->error(ctxt->userData,
1945: "CharRef are forbiden in DTDs!\n");
1946: ctxt->wellFormed = 0;
1.180 daniel 1947: ctxt->disableSAX = 1;
1.97 daniel 1948: return;
1949: case XML_PARSER_ENTITY_VALUE:
1950: /*
1951: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1952: * substitution here since we need the literal
1.97 daniel 1953: * entity value to be able to save the internal
1954: * subset of the document.
1955: * This will be handled by xmlDecodeEntities
1956: */
1957: return;
1958: case XML_PARSER_CONTENT:
1959: case XML_PARSER_ATTRIBUTE_VALUE:
1960: ctxt->token = xmlParseCharRef(ctxt);
1961: return;
1962: }
1963: return;
1964: }
1965:
1966: switch(ctxt->instate) {
1.109 daniel 1967: case XML_PARSER_CDATA_SECTION:
1968: return;
1.140 daniel 1969: case XML_PARSER_PI:
1.97 daniel 1970: case XML_PARSER_COMMENT:
1.168 daniel 1971: case XML_PARSER_SYSTEM_LITERAL:
1972: case XML_PARSER_CONTENT:
1.97 daniel 1973: return;
1.140 daniel 1974: case XML_PARSER_START_TAG:
1975: return;
1976: case XML_PARSER_END_TAG:
1977: return;
1.97 daniel 1978: case XML_PARSER_EOF:
1.123 daniel 1979: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1980: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1981: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1982: ctxt->wellFormed = 0;
1.180 daniel 1983: ctxt->disableSAX = 1;
1.97 daniel 1984: return;
1985: case XML_PARSER_PROLOG:
1.140 daniel 1986: case XML_PARSER_START:
1987: case XML_PARSER_MISC:
1.123 daniel 1988: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1990: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1991: ctxt->wellFormed = 0;
1.180 daniel 1992: ctxt->disableSAX = 1;
1.97 daniel 1993: return;
1994: case XML_PARSER_EPILOG:
1.123 daniel 1995: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1998: ctxt->wellFormed = 0;
1.180 daniel 1999: ctxt->disableSAX = 1;
1.97 daniel 2000: return;
2001: case XML_PARSER_ENTITY_VALUE:
2002: /*
2003: * NOTE: in the case of entity values, we don't do the
1.127 daniel 2004: * substitution here since we need the literal
1.97 daniel 2005: * entity value to be able to save the internal
2006: * subset of the document.
2007: * This will be handled by xmlDecodeEntities
2008: */
2009: return;
2010: case XML_PARSER_ATTRIBUTE_VALUE:
2011: /*
2012: * NOTE: in the case of attributes values, we don't do the
2013: * substitution here unless we are in a mode where
2014: * the parser is explicitely asked to substitute
2015: * entities. The SAX callback is called with values
2016: * without entity substitution.
2017: * This will then be handled by xmlDecodeEntities
2018: */
1.113 daniel 2019: return;
1.97 daniel 2020: case XML_PARSER_ENTITY_DECL:
2021: /*
2022: * we just ignore it there
2023: * the substitution will be done once the entity is referenced
2024: */
2025: return;
2026: case XML_PARSER_DTD:
1.123 daniel 2027: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 2028: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2029: ctxt->sax->error(ctxt->userData,
2030: "Entity references are forbiden in DTDs!\n");
2031: ctxt->wellFormed = 0;
1.180 daniel 2032: ctxt->disableSAX = 1;
1.97 daniel 2033: return;
2034: }
2035:
2036: NEXT;
2037: name = xmlScanName(ctxt);
2038: if (name == NULL) {
1.123 daniel 2039: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 2040: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2041: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
2042: ctxt->wellFormed = 0;
1.180 daniel 2043: ctxt->disableSAX = 1;
1.97 daniel 2044: ctxt->token = '&';
2045: return;
2046: }
2047: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 2048: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 2049: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2050: ctxt->sax->error(ctxt->userData,
2051: "Entity reference: ';' expected\n");
2052: ctxt->wellFormed = 0;
1.180 daniel 2053: ctxt->disableSAX = 1;
1.97 daniel 2054: ctxt->token = '&';
1.119 daniel 2055: xmlFree(name);
1.97 daniel 2056: return;
2057: }
2058: SKIP(xmlStrlen(name) + 1);
2059: if (ctxt->sax != NULL) {
2060: if (ctxt->sax->getEntity != NULL)
2061: ent = ctxt->sax->getEntity(ctxt->userData, name);
2062: }
1.98 daniel 2063:
2064: /*
2065: * [ WFC: Entity Declared ]
2066: * the Name given in the entity reference must match that in an entity
2067: * declaration, except that well-formed documents need not declare any
2068: * of the following entities: amp, lt, gt, apos, quot.
2069: */
1.97 daniel 2070: if (ent == NULL)
2071: ent = xmlGetPredefinedEntity(name);
2072: if (ent == NULL) {
1.123 daniel 2073: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 2074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2075: ctxt->sax->error(ctxt->userData,
1.98 daniel 2076: "Entity reference: entity %s not declared\n",
2077: name);
1.97 daniel 2078: ctxt->wellFormed = 0;
1.180 daniel 2079: ctxt->disableSAX = 1;
1.119 daniel 2080: xmlFree(name);
1.97 daniel 2081: return;
2082: }
1.98 daniel 2083:
2084: /*
2085: * [ WFC: Parsed Entity ]
2086: * An entity reference must not contain the name of an unparsed entity
2087: */
1.159 daniel 2088: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 2089: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 2090: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2091: ctxt->sax->error(ctxt->userData,
2092: "Entity reference to unparsed entity %s\n", name);
2093: ctxt->wellFormed = 0;
1.180 daniel 2094: ctxt->disableSAX = 1;
1.98 daniel 2095: }
2096:
1.159 daniel 2097: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 2098: ctxt->token = ent->content[0];
1.119 daniel 2099: xmlFree(name);
1.97 daniel 2100: return;
2101: }
2102: input = xmlNewEntityInputStream(ctxt, ent);
2103: xmlPushInput(ctxt, input);
1.119 daniel 2104: xmlFree(name);
1.96 daniel 2105: return;
2106: }
2107:
2108: /**
2109: * xmlParserHandlePEReference:
2110: * @ctxt: the parser context
2111: *
2112: * [69] PEReference ::= '%' Name ';'
2113: *
1.98 daniel 2114: * [ WFC: No Recursion ]
2115: * TODO A parsed entity must not contain a recursive
2116: * reference to itself, either directly or indirectly.
2117: *
2118: * [ WFC: Entity Declared ]
2119: * In a document without any DTD, a document with only an internal DTD
2120: * subset which contains no parameter entity references, or a document
2121: * with "standalone='yes'", ... ... The declaration of a parameter
2122: * entity must precede any reference to it...
2123: *
2124: * [ VC: Entity Declared ]
2125: * In a document with an external subset or external parameter entities
2126: * with "standalone='no'", ... ... The declaration of a parameter entity
2127: * must precede any reference to it...
2128: *
2129: * [ WFC: In DTD ]
2130: * Parameter-entity references may only appear in the DTD.
2131: * NOTE: misleading but this is handled.
2132: *
2133: * A PEReference may have been detected in the current input stream
1.96 daniel 2134: * the handling is done accordingly to
2135: * http://www.w3.org/TR/REC-xml#entproc
2136: * i.e.
2137: * - Included in literal in entity values
2138: * - Included as Paraemeter Entity reference within DTDs
2139: */
2140: void
2141: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 2142: xmlChar *name;
1.96 daniel 2143: xmlEntityPtr entity = NULL;
2144: xmlParserInputPtr input;
2145:
1.126 daniel 2146: if (ctxt->token != 0) {
2147: return;
2148: }
1.152 daniel 2149: if (RAW != '%') return;
1.96 daniel 2150: switch(ctxt->instate) {
1.109 daniel 2151: case XML_PARSER_CDATA_SECTION:
2152: return;
1.97 daniel 2153: case XML_PARSER_COMMENT:
2154: return;
1.140 daniel 2155: case XML_PARSER_START_TAG:
2156: return;
2157: case XML_PARSER_END_TAG:
2158: return;
1.96 daniel 2159: case XML_PARSER_EOF:
1.123 daniel 2160: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 2161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2162: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
2163: ctxt->wellFormed = 0;
1.180 daniel 2164: ctxt->disableSAX = 1;
1.96 daniel 2165: return;
2166: case XML_PARSER_PROLOG:
1.140 daniel 2167: case XML_PARSER_START:
2168: case XML_PARSER_MISC:
1.123 daniel 2169: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 2170: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2171: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
2172: ctxt->wellFormed = 0;
1.180 daniel 2173: ctxt->disableSAX = 1;
1.96 daniel 2174: return;
1.97 daniel 2175: case XML_PARSER_ENTITY_DECL:
1.96 daniel 2176: case XML_PARSER_CONTENT:
2177: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 2178: case XML_PARSER_PI:
1.168 daniel 2179: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 2180: /* we just ignore it there */
2181: return;
2182: case XML_PARSER_EPILOG:
1.123 daniel 2183: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 2184: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 2185: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 2186: ctxt->wellFormed = 0;
1.180 daniel 2187: ctxt->disableSAX = 1;
1.96 daniel 2188: return;
1.97 daniel 2189: case XML_PARSER_ENTITY_VALUE:
2190: /*
2191: * NOTE: in the case of entity values, we don't do the
1.127 daniel 2192: * substitution here since we need the literal
1.97 daniel 2193: * entity value to be able to save the internal
2194: * subset of the document.
2195: * This will be handled by xmlDecodeEntities
2196: */
2197: return;
1.96 daniel 2198: case XML_PARSER_DTD:
1.98 daniel 2199: /*
2200: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2201: * In the internal DTD subset, parameter-entity references
2202: * can occur only where markup declarations can occur, not
2203: * within markup declarations.
2204: * In that case this is handled in xmlParseMarkupDecl
2205: */
2206: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2207: return;
1.96 daniel 2208: }
2209:
2210: NEXT;
2211: name = xmlParseName(ctxt);
2212: if (name == NULL) {
1.123 daniel 2213: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 2214: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2215: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
2216: ctxt->wellFormed = 0;
1.180 daniel 2217: ctxt->disableSAX = 1;
1.96 daniel 2218: } else {
1.152 daniel 2219: if (RAW == ';') {
1.96 daniel 2220: NEXT;
1.98 daniel 2221: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2222: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 2223: if (entity == NULL) {
1.98 daniel 2224:
2225: /*
2226: * [ WFC: Entity Declared ]
2227: * In a document without any DTD, a document with only an
2228: * internal DTD subset which contains no parameter entity
2229: * references, or a document with "standalone='yes'", ...
2230: * ... The declaration of a parameter entity must precede
2231: * any reference to it...
2232: */
2233: if ((ctxt->standalone == 1) ||
2234: ((ctxt->hasExternalSubset == 0) &&
2235: (ctxt->hasPErefs == 0))) {
2236: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2237: ctxt->sax->error(ctxt->userData,
2238: "PEReference: %%%s; not found\n", name);
2239: ctxt->wellFormed = 0;
1.180 daniel 2240: ctxt->disableSAX = 1;
1.98 daniel 2241: } else {
2242: /*
2243: * [ VC: Entity Declared ]
2244: * In a document with an external subset or external
2245: * parameter entities with "standalone='no'", ...
2246: * ... The declaration of a parameter entity must precede
2247: * any reference to it...
2248: */
1.212 veillard 2249: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2250: ctxt->vctxt.error(ctxt->vctxt.userData,
2251: "PEReference: %%%s; not found\n", name);
2252: } else
1.98 daniel 2253: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
2254: ctxt->sax->warning(ctxt->userData,
2255: "PEReference: %%%s; not found\n", name);
2256: ctxt->valid = 0;
2257: }
1.96 daniel 2258: } else {
1.159 daniel 2259: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2260: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 2261: /*
1.156 daniel 2262: * TODO !!! handle the extra spaces added before and after
1.96 daniel 2263: * c.f. http://www.w3.org/TR/REC-xml#as-PE
2264: */
2265: input = xmlNewEntityInputStream(ctxt, entity);
2266: xmlPushInput(ctxt, input);
1.164 daniel 2267: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2268: (RAW == '<') && (NXT(1) == '?') &&
2269: (NXT(2) == 'x') && (NXT(3) == 'm') &&
2270: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 2271: xmlParseTextDecl(ctxt);
1.164 daniel 2272: }
2273: if (ctxt->token == 0)
2274: ctxt->token = ' ';
1.96 daniel 2275: } else {
2276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2277: ctxt->sax->error(ctxt->userData,
2278: "xmlHandlePEReference: %s is not a parameter entity\n",
2279: name);
2280: ctxt->wellFormed = 0;
1.180 daniel 2281: ctxt->disableSAX = 1;
1.96 daniel 2282: }
2283: }
2284: } else {
1.123 daniel 2285: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 2286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2287: ctxt->sax->error(ctxt->userData,
2288: "xmlHandlePEReference: expecting ';'\n");
2289: ctxt->wellFormed = 0;
1.180 daniel 2290: ctxt->disableSAX = 1;
1.96 daniel 2291: }
1.119 daniel 2292: xmlFree(name);
1.97 daniel 2293: }
2294: }
2295:
2296: /*
2297: * Macro used to grow the current buffer.
2298: */
2299: #define growBuffer(buffer) { \
2300: buffer##_size *= 2; \
1.145 daniel 2301: buffer = (xmlChar *) \
2302: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 2303: if (buffer == NULL) { \
2304: perror("realloc failed"); \
1.145 daniel 2305: return(NULL); \
1.97 daniel 2306: } \
1.96 daniel 2307: }
1.77 daniel 2308:
2309: /**
2310: * xmlDecodeEntities:
2311: * @ctxt: the parser context
2312: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2313: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 2314: * @end: an end marker xmlChar, 0 if none
2315: * @end2: an end marker xmlChar, 0 if none
2316: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 2317: *
2318: * [67] Reference ::= EntityRef | CharRef
2319: *
2320: * [69] PEReference ::= '%' Name ';'
2321: *
2322: * Returns A newly allocated string with the substitution done. The caller
2323: * must deallocate it !
2324: */
1.123 daniel 2325: xmlChar *
1.77 daniel 2326: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 2327: xmlChar end, xmlChar end2, xmlChar end3) {
2328: xmlChar *buffer = NULL;
1.202 daniel 2329: unsigned int buffer_size = 0;
2330: unsigned int nbchars = 0;
1.78 daniel 2331:
1.123 daniel 2332: xmlChar *current = NULL;
1.77 daniel 2333: xmlEntityPtr ent;
2334: unsigned int max = (unsigned int) len;
1.161 daniel 2335: int c,l;
1.77 daniel 2336:
1.185 daniel 2337: if (ctxt->depth > 40) {
2338: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2339: ctxt->sax->error(ctxt->userData,
2340: "Detected entity reference loop\n");
2341: ctxt->wellFormed = 0;
2342: ctxt->disableSAX = 1;
2343: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2344: return(NULL);
2345: }
2346:
1.77 daniel 2347: /*
2348: * allocate a translation buffer.
2349: */
1.140 daniel 2350: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 2351: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 2352: if (buffer == NULL) {
2353: perror("xmlDecodeEntities: malloc failed");
2354: return(NULL);
2355: }
2356:
1.78 daniel 2357: /*
2358: * Ok loop until we reach one of the ending char or a size limit.
2359: */
1.161 daniel 2360: c = CUR_CHAR(l);
2361: while ((nbchars < max) && (c != end) &&
2362: (c != end2) && (c != end3)) {
1.77 daniel 2363:
1.161 daniel 2364: if (c == 0) break;
2365: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 2366: int val = xmlParseCharRef(ctxt);
1.161 daniel 2367: COPY_BUF(0,buffer,nbchars,val);
2368: NEXTL(l);
2369: } else if ((c == '&') && (ctxt->token != '&') &&
2370: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 2371: ent = xmlParseEntityRef(ctxt);
2372: if ((ent != NULL) &&
2373: (ctxt->replaceEntities != 0)) {
2374: current = ent->content;
2375: while (*current != 0) {
1.161 daniel 2376: buffer[nbchars++] = *current++;
2377: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2378: growBuffer(buffer);
1.77 daniel 2379: }
2380: }
1.98 daniel 2381: } else if (ent != NULL) {
1.123 daniel 2382: const xmlChar *cur = ent->name;
1.98 daniel 2383:
1.161 daniel 2384: buffer[nbchars++] = '&';
2385: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2386: growBuffer(buffer);
2387: }
1.161 daniel 2388: while (*cur != 0) {
2389: buffer[nbchars++] = *cur++;
2390: }
2391: buffer[nbchars++] = ';';
1.77 daniel 2392: }
1.161 daniel 2393: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2394: /*
1.77 daniel 2395: * a PEReference induce to switch the entity flow,
2396: * we break here to flush the current set of chars
2397: * parsed if any. We will be called back later.
1.97 daniel 2398: */
1.91 daniel 2399: if (nbchars != 0) break;
1.77 daniel 2400:
2401: xmlParsePEReference(ctxt);
1.79 daniel 2402:
1.97 daniel 2403: /*
1.79 daniel 2404: * Pop-up of finished entities.
1.97 daniel 2405: */
1.152 daniel 2406: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2407: xmlPopInput(ctxt);
2408:
1.98 daniel 2409: break;
1.77 daniel 2410: } else {
1.161 daniel 2411: COPY_BUF(l,buffer,nbchars,c);
2412: NEXTL(l);
2413: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2414: growBuffer(buffer);
2415: }
1.77 daniel 2416: }
1.161 daniel 2417: c = CUR_CHAR(l);
1.77 daniel 2418: }
1.161 daniel 2419: buffer[nbchars++] = 0;
1.77 daniel 2420: return(buffer);
2421: }
2422:
1.135 daniel 2423: /**
2424: * xmlStringDecodeEntities:
2425: * @ctxt: the parser context
2426: * @str: the input string
2427: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2428: * @end: an end marker xmlChar, 0 if none
2429: * @end2: an end marker xmlChar, 0 if none
2430: * @end3: an end marker xmlChar, 0 if none
2431: *
2432: * [67] Reference ::= EntityRef | CharRef
2433: *
2434: * [69] PEReference ::= '%' Name ';'
2435: *
2436: * Returns A newly allocated string with the substitution done. The caller
2437: * must deallocate it !
2438: */
2439: xmlChar *
2440: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2441: xmlChar end, xmlChar end2, xmlChar end3) {
2442: xmlChar *buffer = NULL;
2443: int buffer_size = 0;
2444:
2445: xmlChar *current = NULL;
2446: xmlEntityPtr ent;
1.176 daniel 2447: int c,l;
2448: int nbchars = 0;
1.135 daniel 2449:
1.211 veillard 2450: if (str == NULL)
2451: return(NULL);
2452:
1.185 daniel 2453: if (ctxt->depth > 40) {
2454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2455: ctxt->sax->error(ctxt->userData,
2456: "Detected entity reference loop\n");
2457: ctxt->wellFormed = 0;
2458: ctxt->disableSAX = 1;
2459: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2460: return(NULL);
2461: }
2462:
1.135 daniel 2463: /*
2464: * allocate a translation buffer.
2465: */
1.140 daniel 2466: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2467: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2468: if (buffer == NULL) {
2469: perror("xmlDecodeEntities: malloc failed");
2470: return(NULL);
2471: }
2472:
2473: /*
2474: * Ok loop until we reach one of the ending char or a size limit.
2475: */
1.176 daniel 2476: c = CUR_SCHAR(str, l);
2477: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2478:
1.176 daniel 2479: if (c == 0) break;
2480: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2481: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2482: if (val != 0) {
2483: COPY_BUF(0,buffer,nbchars,val);
2484: }
2485: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2486: ent = xmlParseStringEntityRef(ctxt, &str);
1.185 daniel 2487: if ((ent != NULL) && (ent->content != NULL)) {
2488: xmlChar *rep;
2489:
2490: ctxt->depth++;
2491: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2492: 0, 0, 0);
2493: ctxt->depth--;
2494: if (rep != NULL) {
2495: current = rep;
2496: while (*current != 0) {
2497: buffer[nbchars++] = *current++;
2498: if (nbchars >
2499: buffer_size - XML_PARSER_BUFFER_SIZE) {
2500: growBuffer(buffer);
2501: }
1.135 daniel 2502: }
1.185 daniel 2503: xmlFree(rep);
1.135 daniel 2504: }
2505: } else if (ent != NULL) {
2506: int i = xmlStrlen(ent->name);
2507: const xmlChar *cur = ent->name;
2508:
1.176 daniel 2509: buffer[nbchars++] = '&';
2510: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2511: growBuffer(buffer);
2512: }
2513: for (;i > 0;i--)
1.176 daniel 2514: buffer[nbchars++] = *cur++;
2515: buffer[nbchars++] = ';';
1.135 daniel 2516: }
1.176 daniel 2517: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2518: ent = xmlParseStringPEReference(ctxt, &str);
2519: if (ent != NULL) {
1.185 daniel 2520: xmlChar *rep;
2521:
2522: ctxt->depth++;
2523: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2524: 0, 0, 0);
2525: ctxt->depth--;
2526: if (rep != NULL) {
2527: current = rep;
2528: while (*current != 0) {
2529: buffer[nbchars++] = *current++;
2530: if (nbchars >
2531: buffer_size - XML_PARSER_BUFFER_SIZE) {
2532: growBuffer(buffer);
2533: }
1.135 daniel 2534: }
1.185 daniel 2535: xmlFree(rep);
1.135 daniel 2536: }
2537: }
2538: } else {
1.176 daniel 2539: COPY_BUF(l,buffer,nbchars,c);
2540: str += l;
2541: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2542: growBuffer(buffer);
2543: }
2544: }
1.176 daniel 2545: c = CUR_SCHAR(str, l);
1.135 daniel 2546: }
1.176 daniel 2547: buffer[nbchars++] = 0;
1.135 daniel 2548: return(buffer);
2549: }
2550:
1.1 veillard 2551:
1.28 daniel 2552: /************************************************************************
2553: * *
1.75 daniel 2554: * Commodity functions to handle encodings *
2555: * *
2556: ************************************************************************/
2557:
1.172 daniel 2558: /*
2559: * xmlCheckLanguageID
2560: * @lang: pointer to the string value
2561: *
2562: * Checks that the value conforms to the LanguageID production:
2563: *
2564: * [33] LanguageID ::= Langcode ('-' Subcode)*
2565: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2566: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2567: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2568: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2569: * [38] Subcode ::= ([a-z] | [A-Z])+
2570: *
2571: * Returns 1 if correct 0 otherwise
2572: **/
2573: int
2574: xmlCheckLanguageID(const xmlChar *lang) {
2575: const xmlChar *cur = lang;
2576:
2577: if (cur == NULL)
2578: return(0);
2579: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2580: ((cur[0] == 'I') && (cur[1] == '-'))) {
2581: /*
2582: * IANA code
2583: */
2584: cur += 2;
2585: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2586: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2587: cur++;
2588: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2589: ((cur[0] == 'X') && (cur[1] == '-'))) {
2590: /*
2591: * User code
2592: */
2593: cur += 2;
2594: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2595: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2596: cur++;
2597: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2598: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2599: /*
2600: * ISO639
2601: */
2602: cur++;
2603: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2604: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2605: cur++;
2606: else
2607: return(0);
2608: } else
2609: return(0);
2610: while (cur[0] != 0) {
2611: if (cur[0] != '-')
2612: return(0);
2613: cur++;
2614: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2615: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2616: cur++;
2617: else
2618: return(0);
2619: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2620: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2621: cur++;
2622: }
2623: return(1);
2624: }
2625:
1.75 daniel 2626: /**
2627: * xmlSwitchEncoding:
2628: * @ctxt: the parser context
1.124 daniel 2629: * @enc: the encoding value (number)
1.75 daniel 2630: *
2631: * change the input functions when discovering the character encoding
2632: * of a given entity.
1.193 daniel 2633: *
2634: * Returns 0 in case of success, -1 otherwise
1.75 daniel 2635: */
1.193 daniel 2636: int
1.75 daniel 2637: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2638: {
1.156 daniel 2639: xmlCharEncodingHandlerPtr handler;
2640:
1.193 daniel 2641: switch (enc) {
2642: case XML_CHAR_ENCODING_ERROR:
2643: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2645: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2646: ctxt->wellFormed = 0;
2647: ctxt->disableSAX = 1;
2648: break;
2649: case XML_CHAR_ENCODING_NONE:
2650: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2651: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2652: return(0);
2653: case XML_CHAR_ENCODING_UTF8:
2654: /* default encoding, no conversion should be needed */
1.198 daniel 2655: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2656: return(0);
2657: default:
2658: break;
2659: }
1.156 daniel 2660: handler = xmlGetCharEncodingHandler(enc);
1.193 daniel 2661: if (handler == NULL) {
2662: /*
2663: * Default handlers.
2664: */
2665: switch (enc) {
2666: case XML_CHAR_ENCODING_ERROR:
2667: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2668: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2670: ctxt->wellFormed = 0;
2671: ctxt->disableSAX = 1;
1.198 daniel 2672: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2673: break;
2674: case XML_CHAR_ENCODING_NONE:
2675: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2676: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2677: return(0);
2678: case XML_CHAR_ENCODING_UTF8:
1.211 veillard 2679: case XML_CHAR_ENCODING_ASCII:
1.193 daniel 2680: /* default encoding, no conversion should be needed */
1.198 daniel 2681: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2682: return(0);
2683: case XML_CHAR_ENCODING_UTF16LE:
2684: break;
2685: case XML_CHAR_ENCODING_UTF16BE:
2686: break;
2687: case XML_CHAR_ENCODING_UCS4LE:
2688: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2689: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2690: ctxt->sax->error(ctxt->userData,
2691: "char encoding USC4 little endian not supported\n");
2692: break;
2693: case XML_CHAR_ENCODING_UCS4BE:
2694: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2696: ctxt->sax->error(ctxt->userData,
2697: "char encoding USC4 big endian not supported\n");
2698: break;
2699: case XML_CHAR_ENCODING_EBCDIC:
2700: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2702: ctxt->sax->error(ctxt->userData,
2703: "char encoding EBCDIC not supported\n");
2704: break;
2705: case XML_CHAR_ENCODING_UCS4_2143:
2706: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2707: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2708: ctxt->sax->error(ctxt->userData,
2709: "char encoding UCS4 2143 not supported\n");
2710: break;
2711: case XML_CHAR_ENCODING_UCS4_3412:
2712: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2714: ctxt->sax->error(ctxt->userData,
2715: "char encoding UCS4 3412 not supported\n");
2716: break;
2717: case XML_CHAR_ENCODING_UCS2:
2718: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2719: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2720: ctxt->sax->error(ctxt->userData,
2721: "char encoding UCS2 not supported\n");
2722: break;
2723: case XML_CHAR_ENCODING_8859_1:
2724: case XML_CHAR_ENCODING_8859_2:
2725: case XML_CHAR_ENCODING_8859_3:
2726: case XML_CHAR_ENCODING_8859_4:
2727: case XML_CHAR_ENCODING_8859_5:
2728: case XML_CHAR_ENCODING_8859_6:
2729: case XML_CHAR_ENCODING_8859_7:
2730: case XML_CHAR_ENCODING_8859_8:
2731: case XML_CHAR_ENCODING_8859_9:
1.195 daniel 2732: /*
1.203 veillard 2733: * We used to keep the internal content in the
2734: * document encoding however this turns being unmaintainable
2735: * So xmlGetCharEncodingHandler() will return non-null
2736: * values for this now.
1.195 daniel 2737: */
2738: if ((ctxt->inputNr == 1) &&
2739: (ctxt->encoding == NULL) &&
2740: (ctxt->input->encoding != NULL)) {
2741: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
2742: }
1.198 daniel 2743: ctxt->charset = enc;
1.195 daniel 2744: return(0);
1.193 daniel 2745: case XML_CHAR_ENCODING_2022_JP:
2746: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2747: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2748: ctxt->sax->error(ctxt->userData,
2749: "char encoding ISO-2022-JPnot supported\n");
2750: break;
2751: case XML_CHAR_ENCODING_SHIFT_JIS:
2752: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2753: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2754: ctxt->sax->error(ctxt->userData,
2755: "char encoding Shift_JIS not supported\n");
2756: break;
2757: case XML_CHAR_ENCODING_EUC_JP:
2758: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2759: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2760: ctxt->sax->error(ctxt->userData,
2761: "char encoding EUC-JPnot supported\n");
2762: break;
2763: }
2764: }
2765: if (handler == NULL)
2766: return(-1);
1.198 daniel 2767: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2768: return(xmlSwitchToEncoding(ctxt, handler));
2769: }
2770:
2771: /**
2772: * xmlSwitchToEncoding:
2773: * @ctxt: the parser context
2774: * @handler: the encoding handler
2775: *
2776: * change the input functions when discovering the character encoding
2777: * of a given entity.
2778: *
2779: * Returns 0 in case of success, -1 otherwise
2780: */
2781: int
2782: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
2783: {
1.194 daniel 2784: int nbchars;
2785:
1.156 daniel 2786: if (handler != NULL) {
2787: if (ctxt->input != NULL) {
2788: if (ctxt->input->buf != NULL) {
2789: if (ctxt->input->buf->encoder != NULL) {
1.193 daniel 2790: if (ctxt->input->buf->encoder == handler)
2791: return(0);
1.197 daniel 2792: /*
2793: * Note: this is a bit dangerous, but that's what it
2794: * takes to use nearly compatible signature for different
2795: * encodings.
2796: */
2797: xmlCharEncCloseFunc(ctxt->input->buf->encoder);
2798: ctxt->input->buf->encoder = handler;
2799: return(0);
1.156 daniel 2800: }
2801: ctxt->input->buf->encoder = handler;
2802:
2803: /*
1.194 daniel 2804: * Is there already some content down the pipe to convert ?
1.156 daniel 2805: */
2806: if ((ctxt->input->buf->buffer != NULL) &&
2807: (ctxt->input->buf->buffer->use > 0)) {
2808: int processed;
2809:
2810: /*
2811: * Specific handling of the Byte Order Mark for
2812: * UTF-16
2813: */
1.195 daniel 2814: if ((handler->name != NULL) &&
2815: (!strcmp(handler->name, "UTF-16LE")) &&
1.156 daniel 2816: (ctxt->input->cur[0] == 0xFF) &&
2817: (ctxt->input->cur[1] == 0xFE)) {
1.194 daniel 2818: ctxt->input->cur += 2;
1.156 daniel 2819: }
1.195 daniel 2820: if ((handler->name != NULL) &&
2821: (!strcmp(handler->name, "UTF-16BE")) &&
1.156 daniel 2822: (ctxt->input->cur[0] == 0xFE) &&
2823: (ctxt->input->cur[1] == 0xFF)) {
1.194 daniel 2824: ctxt->input->cur += 2;
1.156 daniel 2825: }
2826:
2827: /*
1.194 daniel 2828: * Shring the current input buffer.
2829: * Move it as the raw buffer and create a new input buffer
1.156 daniel 2830: */
2831: processed = ctxt->input->cur - ctxt->input->base;
1.194 daniel 2832: xmlBufferShrink(ctxt->input->buf->buffer, processed);
2833: ctxt->input->buf->raw = ctxt->input->buf->buffer;
2834: ctxt->input->buf->buffer = xmlBufferCreate();
2835:
2836: /*
1.197 daniel 2837: * convert just enough to get
2838: * '<?xml version="1.0" encoding="xxx"?>'
2839: * parsed with the autodetected encoding
2840: * into the parser reading buffer.
1.194 daniel 2841: */
1.197 daniel 2842: nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
2843: ctxt->input->buf->buffer,
2844: ctxt->input->buf->raw);
1.194 daniel 2845: if (nbchars < 0) {
2846: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2847: return(-1);
1.156 daniel 2848: }
1.194 daniel 2849: ctxt->input->base =
2850: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.217 ! veillard 2851:
1.156 daniel 2852: }
1.193 daniel 2853: return(0);
1.156 daniel 2854: } else {
1.209 veillard 2855: if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1.156 daniel 2856: /*
2857: * When parsing a static memory array one must know the
2858: * size to be able to convert the buffer.
2859: */
2860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2861: ctxt->sax->error(ctxt->userData,
2862: "xmlSwitchEncoding : no input\n");
1.193 daniel 2863: return(-1);
1.156 daniel 2864: } else {
1.194 daniel 2865: int processed;
2866:
2867: /*
2868: * Shring the current input buffer.
2869: * Move it as the raw buffer and create a new input buffer
2870: */
2871: processed = ctxt->input->cur - ctxt->input->base;
1.209 veillard 2872:
1.194 daniel 2873: ctxt->input->buf->raw = xmlBufferCreate();
2874: xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1.209 veillard 2875: ctxt->input->length - processed);
1.194 daniel 2876: ctxt->input->buf->buffer = xmlBufferCreate();
1.156 daniel 2877:
2878: /*
1.194 daniel 2879: * convert as much as possible of the raw input
2880: * to the parser reading buffer.
2881: */
2882: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2883: ctxt->input->buf->buffer,
2884: ctxt->input->buf->raw);
2885: if (nbchars < 0) {
2886: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2887: return(-1);
1.156 daniel 2888: }
1.194 daniel 2889:
1.156 daniel 2890: /*
2891: * Conversion succeeded, get rid of the old buffer
2892: */
2893: if ((ctxt->input->free != NULL) &&
2894: (ctxt->input->base != NULL))
2895: ctxt->input->free((xmlChar *) ctxt->input->base);
1.194 daniel 2896: ctxt->input->base =
2897: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2898: }
2899: }
2900: } else {
2901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2902: ctxt->sax->error(ctxt->userData,
2903: "xmlSwitchEncoding : no input\n");
1.193 daniel 2904: return(-1);
1.156 daniel 2905: }
1.195 daniel 2906: /*
2907: * The parsing is now done in UTF8 natively
2908: */
1.198 daniel 2909: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2910: } else
2911: return(-1);
2912: return(0);
1.156 daniel 2913:
1.75 daniel 2914: }
2915:
2916: /************************************************************************
2917: * *
1.123 daniel 2918: * Commodity functions to handle xmlChars *
1.28 daniel 2919: * *
2920: ************************************************************************/
2921:
1.50 daniel 2922: /**
2923: * xmlStrndup:
1.123 daniel 2924: * @cur: the input xmlChar *
1.50 daniel 2925: * @len: the len of @cur
2926: *
1.123 daniel 2927: * a strndup for array of xmlChar's
1.68 daniel 2928: *
1.123 daniel 2929: * Returns a new xmlChar * or NULL
1.1 veillard 2930: */
1.123 daniel 2931: xmlChar *
2932: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2933: xmlChar *ret;
2934:
2935: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 2936: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2937: if (ret == NULL) {
1.86 daniel 2938: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2939: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2940: return(NULL);
2941: }
1.123 daniel 2942: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2943: ret[len] = 0;
2944: return(ret);
2945: }
2946:
1.50 daniel 2947: /**
2948: * xmlStrdup:
1.123 daniel 2949: * @cur: the input xmlChar *
1.50 daniel 2950: *
1.152 daniel 2951: * a strdup for array of xmlChar's. Since they are supposed to be
2952: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2953: * a termination mark of '0'.
1.68 daniel 2954: *
1.123 daniel 2955: * Returns a new xmlChar * or NULL
1.1 veillard 2956: */
1.123 daniel 2957: xmlChar *
2958: xmlStrdup(const xmlChar *cur) {
2959: const xmlChar *p = cur;
1.1 veillard 2960:
1.135 daniel 2961: if (cur == NULL) return(NULL);
1.152 daniel 2962: while (*p != 0) p++;
1.1 veillard 2963: return(xmlStrndup(cur, p - cur));
2964: }
2965:
1.50 daniel 2966: /**
2967: * xmlCharStrndup:
2968: * @cur: the input char *
2969: * @len: the len of @cur
2970: *
1.123 daniel 2971: * a strndup for char's to xmlChar's
1.68 daniel 2972: *
1.123 daniel 2973: * Returns a new xmlChar * or NULL
1.45 daniel 2974: */
2975:
1.123 daniel 2976: xmlChar *
1.55 daniel 2977: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2978: int i;
1.135 daniel 2979: xmlChar *ret;
2980:
2981: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 2982: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2983: if (ret == NULL) {
1.86 daniel 2984: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2985: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2986: return(NULL);
2987: }
2988: for (i = 0;i < len;i++)
1.123 daniel 2989: ret[i] = (xmlChar) cur[i];
1.45 daniel 2990: ret[len] = 0;
2991: return(ret);
2992: }
2993:
1.50 daniel 2994: /**
2995: * xmlCharStrdup:
2996: * @cur: the input char *
2997: * @len: the len of @cur
2998: *
1.123 daniel 2999: * a strdup for char's to xmlChar's
1.68 daniel 3000: *
1.123 daniel 3001: * Returns a new xmlChar * or NULL
1.45 daniel 3002: */
3003:
1.123 daniel 3004: xmlChar *
1.55 daniel 3005: xmlCharStrdup(const char *cur) {
1.45 daniel 3006: const char *p = cur;
3007:
1.135 daniel 3008: if (cur == NULL) return(NULL);
1.45 daniel 3009: while (*p != '\0') p++;
3010: return(xmlCharStrndup(cur, p - cur));
3011: }
3012:
1.50 daniel 3013: /**
3014: * xmlStrcmp:
1.123 daniel 3015: * @str1: the first xmlChar *
3016: * @str2: the second xmlChar *
1.50 daniel 3017: *
1.123 daniel 3018: * a strcmp for xmlChar's
1.68 daniel 3019: *
3020: * Returns the integer result of the comparison
1.14 veillard 3021: */
3022:
1.55 daniel 3023: int
1.123 daniel 3024: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 3025: register int tmp;
3026:
1.135 daniel 3027: if ((str1 == NULL) && (str2 == NULL)) return(0);
3028: if (str1 == NULL) return(-1);
3029: if (str2 == NULL) return(1);
1.14 veillard 3030: do {
3031: tmp = *str1++ - *str2++;
3032: if (tmp != 0) return(tmp);
3033: } while ((*str1 != 0) && (*str2 != 0));
3034: return (*str1 - *str2);
3035: }
3036:
1.50 daniel 3037: /**
3038: * xmlStrncmp:
1.123 daniel 3039: * @str1: the first xmlChar *
3040: * @str2: the second xmlChar *
1.50 daniel 3041: * @len: the max comparison length
3042: *
1.123 daniel 3043: * a strncmp for xmlChar's
1.68 daniel 3044: *
3045: * Returns the integer result of the comparison
1.14 veillard 3046: */
3047:
1.55 daniel 3048: int
1.123 daniel 3049: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 3050: register int tmp;
3051:
3052: if (len <= 0) return(0);
1.135 daniel 3053: if ((str1 == NULL) && (str2 == NULL)) return(0);
3054: if (str1 == NULL) return(-1);
3055: if (str2 == NULL) return(1);
1.14 veillard 3056: do {
3057: tmp = *str1++ - *str2++;
3058: if (tmp != 0) return(tmp);
3059: len--;
3060: if (len <= 0) return(0);
3061: } while ((*str1 != 0) && (*str2 != 0));
3062: return (*str1 - *str2);
3063: }
3064:
1.50 daniel 3065: /**
3066: * xmlStrchr:
1.123 daniel 3067: * @str: the xmlChar * array
3068: * @val: the xmlChar to search
1.50 daniel 3069: *
1.123 daniel 3070: * a strchr for xmlChar's
1.68 daniel 3071: *
1.123 daniel 3072: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 3073: */
3074:
1.123 daniel 3075: const xmlChar *
3076: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 3077: if (str == NULL) return(NULL);
1.14 veillard 3078: while (*str != 0) {
1.123 daniel 3079: if (*str == val) return((xmlChar *) str);
1.14 veillard 3080: str++;
3081: }
3082: return(NULL);
1.89 daniel 3083: }
3084:
3085: /**
3086: * xmlStrstr:
1.123 daniel 3087: * @str: the xmlChar * array (haystack)
3088: * @val: the xmlChar to search (needle)
1.89 daniel 3089: *
1.123 daniel 3090: * a strstr for xmlChar's
1.89 daniel 3091: *
1.123 daniel 3092: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 3093: */
3094:
1.123 daniel 3095: const xmlChar *
3096: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 3097: int n;
3098:
3099: if (str == NULL) return(NULL);
3100: if (val == NULL) return(NULL);
3101: n = xmlStrlen(val);
3102:
3103: if (n == 0) return(str);
3104: while (*str != 0) {
3105: if (*str == *val) {
1.123 daniel 3106: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 3107: }
3108: str++;
3109: }
3110: return(NULL);
3111: }
3112:
3113: /**
3114: * xmlStrsub:
1.123 daniel 3115: * @str: the xmlChar * array (haystack)
1.89 daniel 3116: * @start: the index of the first char (zero based)
3117: * @len: the length of the substring
3118: *
3119: * Extract a substring of a given string
3120: *
1.123 daniel 3121: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 3122: */
3123:
1.123 daniel 3124: xmlChar *
3125: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 3126: int i;
3127:
3128: if (str == NULL) return(NULL);
3129: if (start < 0) return(NULL);
1.90 daniel 3130: if (len < 0) return(NULL);
1.89 daniel 3131:
3132: for (i = 0;i < start;i++) {
3133: if (*str == 0) return(NULL);
3134: str++;
3135: }
3136: if (*str == 0) return(NULL);
3137: return(xmlStrndup(str, len));
1.14 veillard 3138: }
1.28 daniel 3139:
1.50 daniel 3140: /**
3141: * xmlStrlen:
1.123 daniel 3142: * @str: the xmlChar * array
1.50 daniel 3143: *
1.127 daniel 3144: * length of a xmlChar's string
1.68 daniel 3145: *
1.123 daniel 3146: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 3147: */
3148:
1.55 daniel 3149: int
1.123 daniel 3150: xmlStrlen(const xmlChar *str) {
1.45 daniel 3151: int len = 0;
3152:
3153: if (str == NULL) return(0);
3154: while (*str != 0) {
3155: str++;
3156: len++;
3157: }
3158: return(len);
3159: }
3160:
1.50 daniel 3161: /**
3162: * xmlStrncat:
1.123 daniel 3163: * @cur: the original xmlChar * array
3164: * @add: the xmlChar * array added
1.50 daniel 3165: * @len: the length of @add
3166: *
1.123 daniel 3167: * a strncat for array of xmlChar's
1.68 daniel 3168: *
1.123 daniel 3169: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 3170: */
3171:
1.123 daniel 3172: xmlChar *
3173: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 3174: int size;
1.123 daniel 3175: xmlChar *ret;
1.45 daniel 3176:
3177: if ((add == NULL) || (len == 0))
3178: return(cur);
3179: if (cur == NULL)
3180: return(xmlStrndup(add, len));
3181:
3182: size = xmlStrlen(cur);
1.204 veillard 3183: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 3184: if (ret == NULL) {
1.86 daniel 3185: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 3186: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 3187: return(cur);
3188: }
1.123 daniel 3189: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 3190: ret[size + len] = 0;
3191: return(ret);
3192: }
3193:
1.50 daniel 3194: /**
3195: * xmlStrcat:
1.123 daniel 3196: * @cur: the original xmlChar * array
3197: * @add: the xmlChar * array added
1.50 daniel 3198: *
1.152 daniel 3199: * a strcat for array of xmlChar's. Since they are supposed to be
3200: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
3201: * a termination mark of '0'.
1.68 daniel 3202: *
1.123 daniel 3203: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 3204: */
1.123 daniel 3205: xmlChar *
3206: xmlStrcat(xmlChar *cur, const xmlChar *add) {
3207: const xmlChar *p = add;
1.45 daniel 3208:
3209: if (add == NULL) return(cur);
3210: if (cur == NULL)
3211: return(xmlStrdup(add));
3212:
1.152 daniel 3213: while (*p != 0) p++;
1.45 daniel 3214: return(xmlStrncat(cur, add, p - add));
3215: }
3216:
3217: /************************************************************************
3218: * *
3219: * Commodity functions, cleanup needed ? *
3220: * *
3221: ************************************************************************/
3222:
1.50 daniel 3223: /**
3224: * areBlanks:
3225: * @ctxt: an XML parser context
1.123 daniel 3226: * @str: a xmlChar *
1.50 daniel 3227: * @len: the size of @str
3228: *
1.45 daniel 3229: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 3230: *
1.68 daniel 3231: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 3232: */
3233:
1.123 daniel 3234: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 3235: int i, ret;
1.45 daniel 3236: xmlNodePtr lastChild;
3237:
1.176 daniel 3238: /*
3239: * Check for xml:space value.
3240: */
3241: if (*(ctxt->space) == 1)
3242: return(0);
3243:
3244: /*
3245: * Check that the string is made of blanks
3246: */
1.45 daniel 3247: for (i = 0;i < len;i++)
3248: if (!(IS_BLANK(str[i]))) return(0);
3249:
1.176 daniel 3250: /*
3251: * Look if the element is mixed content in the Dtd if available
3252: */
1.104 daniel 3253: if (ctxt->myDoc != NULL) {
3254: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3255: if (ret == 0) return(1);
3256: if (ret == 1) return(0);
3257: }
1.176 daniel 3258:
1.104 daniel 3259: /*
1.176 daniel 3260: * Otherwise, heuristic :-\
1.104 daniel 3261: */
1.179 daniel 3262: if (ctxt->keepBlanks)
3263: return(0);
3264: if (RAW != '<') return(0);
3265: if (ctxt->node == NULL) return(0);
3266: if ((ctxt->node->children == NULL) &&
3267: (RAW == '<') && (NXT(1) == '/')) return(0);
3268:
1.45 daniel 3269: lastChild = xmlGetLastChild(ctxt->node);
3270: if (lastChild == NULL) {
3271: if (ctxt->node->content != NULL) return(0);
3272: } else if (xmlNodeIsText(lastChild))
3273: return(0);
1.157 daniel 3274: else if ((ctxt->node->children != NULL) &&
3275: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 3276: return(0);
1.45 daniel 3277: return(1);
3278: }
3279:
1.50 daniel 3280: /**
3281: * xmlHandleEntity:
3282: * @ctxt: an XML parser context
3283: * @entity: an XML entity pointer.
3284: *
3285: * Default handling of defined entities, when should we define a new input
1.45 daniel 3286: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 3287: *
3288: * OBSOLETE: to be removed at some point.
1.45 daniel 3289: */
3290:
1.55 daniel 3291: void
3292: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 3293: int len;
1.50 daniel 3294: xmlParserInputPtr input;
1.45 daniel 3295:
3296: if (entity->content == NULL) {
1.123 daniel 3297: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 3298: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3299: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 3300: entity->name);
1.59 daniel 3301: ctxt->wellFormed = 0;
1.180 daniel 3302: ctxt->disableSAX = 1;
1.45 daniel 3303: return;
3304: }
3305: len = xmlStrlen(entity->content);
3306: if (len <= 2) goto handle_as_char;
3307:
3308: /*
3309: * Redefine its content as an input stream.
3310: */
1.50 daniel 3311: input = xmlNewEntityInputStream(ctxt, entity);
3312: xmlPushInput(ctxt, input);
1.45 daniel 3313: return;
3314:
3315: handle_as_char:
3316: /*
3317: * Just handle the content as a set of chars.
3318: */
1.171 daniel 3319: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3320: (ctxt->sax->characters != NULL))
1.74 daniel 3321: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 3322:
3323: }
3324:
3325: /*
3326: * Forward definition for recusive behaviour.
3327: */
1.77 daniel 3328: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
3329: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 3330:
1.28 daniel 3331: /************************************************************************
3332: * *
3333: * Extra stuff for namespace support *
3334: * Relates to http://www.w3.org/TR/WD-xml-names *
3335: * *
3336: ************************************************************************/
3337:
1.50 daniel 3338: /**
3339: * xmlNamespaceParseNCName:
3340: * @ctxt: an XML parser context
3341: *
3342: * parse an XML namespace name.
1.28 daniel 3343: *
3344: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
3345: *
3346: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3347: * CombiningChar | Extender
1.68 daniel 3348: *
3349: * Returns the namespace name or NULL
1.28 daniel 3350: */
3351:
1.123 daniel 3352: xmlChar *
1.55 daniel 3353: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 3354: xmlChar buf[XML_MAX_NAMELEN + 5];
3355: int len = 0, l;
3356: int cur = CUR_CHAR(l);
1.28 daniel 3357:
1.156 daniel 3358: /* load first the value of the char !!! */
1.152 daniel 3359: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 3360:
1.152 daniel 3361: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3362: (cur == '.') || (cur == '-') ||
3363: (cur == '_') ||
3364: (IS_COMBINING(cur)) ||
3365: (IS_EXTENDER(cur))) {
3366: COPY_BUF(l,buf,len,cur);
3367: NEXTL(l);
3368: cur = CUR_CHAR(l);
1.91 daniel 3369: if (len >= XML_MAX_NAMELEN) {
3370: fprintf(stderr,
3371: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 3372: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3373: (cur == '.') || (cur == '-') ||
3374: (cur == '_') ||
3375: (IS_COMBINING(cur)) ||
3376: (IS_EXTENDER(cur))) {
3377: NEXTL(l);
3378: cur = CUR_CHAR(l);
3379: }
1.91 daniel 3380: break;
3381: }
3382: }
3383: return(xmlStrndup(buf, len));
1.28 daniel 3384: }
3385:
1.50 daniel 3386: /**
3387: * xmlNamespaceParseQName:
3388: * @ctxt: an XML parser context
1.123 daniel 3389: * @prefix: a xmlChar **
1.50 daniel 3390: *
3391: * parse an XML qualified name
1.28 daniel 3392: *
3393: * [NS 5] QName ::= (Prefix ':')? LocalPart
3394: *
3395: * [NS 6] Prefix ::= NCName
3396: *
3397: * [NS 7] LocalPart ::= NCName
1.68 daniel 3398: *
1.127 daniel 3399: * Returns the local part, and prefix is updated
1.50 daniel 3400: * to get the Prefix if any.
1.28 daniel 3401: */
3402:
1.123 daniel 3403: xmlChar *
3404: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3405: xmlChar *ret = NULL;
1.28 daniel 3406:
3407: *prefix = NULL;
3408: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 3409: if (RAW == ':') {
1.28 daniel 3410: *prefix = ret;
1.40 daniel 3411: NEXT;
1.28 daniel 3412: ret = xmlNamespaceParseNCName(ctxt);
3413: }
3414:
3415: return(ret);
3416: }
3417:
1.50 daniel 3418: /**
1.72 daniel 3419: * xmlSplitQName:
1.162 daniel 3420: * @ctxt: an XML parser context
1.72 daniel 3421: * @name: an XML parser context
1.123 daniel 3422: * @prefix: a xmlChar **
1.72 daniel 3423: *
1.206 veillard 3424: * parse an UTF8 encoded XML qualified name string
1.72 daniel 3425: *
3426: * [NS 5] QName ::= (Prefix ':')? LocalPart
3427: *
3428: * [NS 6] Prefix ::= NCName
3429: *
3430: * [NS 7] LocalPart ::= NCName
3431: *
1.127 daniel 3432: * Returns the local part, and prefix is updated
1.72 daniel 3433: * to get the Prefix if any.
3434: */
3435:
1.123 daniel 3436: xmlChar *
1.162 daniel 3437: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3438: xmlChar buf[XML_MAX_NAMELEN + 5];
3439: int len = 0;
1.123 daniel 3440: xmlChar *ret = NULL;
3441: const xmlChar *cur = name;
1.206 veillard 3442: int c;
1.72 daniel 3443:
3444: *prefix = NULL;
1.113 daniel 3445:
3446: /* xml: prefix is not really a namespace */
3447: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3448: (cur[2] == 'l') && (cur[3] == ':'))
3449: return(xmlStrdup(name));
3450:
1.162 daniel 3451: /* nasty but valid */
3452: if (cur[0] == ':')
3453: return(xmlStrdup(name));
3454:
1.206 veillard 3455: c = *cur++;
3456: while ((c != 0) && (c != ':')) {
3457: buf[len++] = c;
3458: c = *cur++;
1.162 daniel 3459: }
1.72 daniel 3460:
1.162 daniel 3461: ret = xmlStrndup(buf, len);
1.72 daniel 3462:
1.162 daniel 3463: if (c == ':') {
1.206 veillard 3464: c = *cur++;
3465: if (c == 0) return(ret);
1.72 daniel 3466: *prefix = ret;
1.162 daniel 3467: len = 0;
1.72 daniel 3468:
1.206 veillard 3469: while (c != 0) {
3470: buf[len++] = c;
3471: c = *cur++;
1.162 daniel 3472: }
1.72 daniel 3473:
1.162 daniel 3474: ret = xmlStrndup(buf, len);
1.72 daniel 3475: }
3476:
3477: return(ret);
3478: }
1.206 veillard 3479:
1.72 daniel 3480: /**
1.50 daniel 3481: * xmlNamespaceParseNSDef:
3482: * @ctxt: an XML parser context
3483: *
3484: * parse a namespace prefix declaration
1.28 daniel 3485: *
3486: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3487: *
3488: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3489: *
3490: * Returns the namespace name
1.28 daniel 3491: */
3492:
1.123 daniel 3493: xmlChar *
1.55 daniel 3494: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3495: xmlChar *name = NULL;
1.28 daniel 3496:
1.152 daniel 3497: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3498: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3499: (NXT(4) == 's')) {
3500: SKIP(5);
1.152 daniel 3501: if (RAW == ':') {
1.40 daniel 3502: NEXT;
1.28 daniel 3503: name = xmlNamespaceParseNCName(ctxt);
3504: }
3505: }
1.39 daniel 3506: return(name);
1.28 daniel 3507: }
3508:
1.50 daniel 3509: /**
3510: * xmlParseQuotedString:
3511: * @ctxt: an XML parser context
3512: *
1.45 daniel 3513: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3514: * To be removed at next drop of binary compatibility
1.68 daniel 3515: *
3516: * Returns the string parser or NULL.
1.45 daniel 3517: */
1.123 daniel 3518: xmlChar *
1.55 daniel 3519: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3520: xmlChar *buf = NULL;
1.152 daniel 3521: int len = 0,l;
1.140 daniel 3522: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3523: int c;
1.45 daniel 3524:
1.135 daniel 3525: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3526: if (buf == NULL) {
3527: fprintf(stderr, "malloc of %d byte failed\n", size);
3528: return(NULL);
3529: }
1.152 daniel 3530: if (RAW == '"') {
1.45 daniel 3531: NEXT;
1.152 daniel 3532: c = CUR_CHAR(l);
1.135 daniel 3533: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3534: if (len + 5 >= size) {
1.135 daniel 3535: size *= 2;
1.204 veillard 3536: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3537: if (buf == NULL) {
3538: fprintf(stderr, "realloc of %d byte failed\n", size);
3539: return(NULL);
3540: }
3541: }
1.152 daniel 3542: COPY_BUF(l,buf,len,c);
3543: NEXTL(l);
3544: c = CUR_CHAR(l);
1.135 daniel 3545: }
3546: if (c != '"') {
1.123 daniel 3547: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3549: ctxt->sax->error(ctxt->userData,
3550: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3551: ctxt->wellFormed = 0;
1.180 daniel 3552: ctxt->disableSAX = 1;
1.55 daniel 3553: } else {
1.45 daniel 3554: NEXT;
3555: }
1.152 daniel 3556: } else if (RAW == '\''){
1.45 daniel 3557: NEXT;
1.135 daniel 3558: c = CUR;
3559: while (IS_CHAR(c) && (c != '\'')) {
3560: if (len + 1 >= size) {
3561: size *= 2;
1.204 veillard 3562: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3563: if (buf == NULL) {
3564: fprintf(stderr, "realloc of %d byte failed\n", size);
3565: return(NULL);
3566: }
3567: }
3568: buf[len++] = c;
3569: NEXT;
3570: c = CUR;
3571: }
1.152 daniel 3572: if (RAW != '\'') {
1.123 daniel 3573: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3575: ctxt->sax->error(ctxt->userData,
3576: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3577: ctxt->wellFormed = 0;
1.180 daniel 3578: ctxt->disableSAX = 1;
1.55 daniel 3579: } else {
1.45 daniel 3580: NEXT;
3581: }
3582: }
1.135 daniel 3583: return(buf);
1.45 daniel 3584: }
3585:
1.50 daniel 3586: /**
3587: * xmlParseNamespace:
3588: * @ctxt: an XML parser context
3589: *
1.45 daniel 3590: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3591: *
3592: * This is what the older xml-name Working Draft specified, a bunch of
3593: * other stuff may still rely on it, so support is still here as
1.127 daniel 3594: * if it was declared on the root of the Tree:-(
1.110 daniel 3595: *
3596: * To be removed at next drop of binary compatibility
1.45 daniel 3597: */
3598:
1.55 daniel 3599: void
3600: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3601: xmlChar *href = NULL;
3602: xmlChar *prefix = NULL;
1.45 daniel 3603: int garbage = 0;
3604:
3605: /*
3606: * We just skipped "namespace" or "xml:namespace"
3607: */
3608: SKIP_BLANKS;
3609:
1.153 daniel 3610: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3611: /*
3612: * We can have "ns" or "prefix" attributes
3613: * Old encoding as 'href' or 'AS' attributes is still supported
3614: */
1.152 daniel 3615: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3616: garbage = 0;
3617: SKIP(2);
3618: SKIP_BLANKS;
3619:
1.152 daniel 3620: if (RAW != '=') continue;
1.45 daniel 3621: NEXT;
3622: SKIP_BLANKS;
3623:
3624: href = xmlParseQuotedString(ctxt);
3625: SKIP_BLANKS;
1.152 daniel 3626: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3627: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3628: garbage = 0;
3629: SKIP(4);
3630: SKIP_BLANKS;
3631:
1.152 daniel 3632: if (RAW != '=') continue;
1.45 daniel 3633: NEXT;
3634: SKIP_BLANKS;
3635:
3636: href = xmlParseQuotedString(ctxt);
3637: SKIP_BLANKS;
1.152 daniel 3638: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3639: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3640: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3641: garbage = 0;
3642: SKIP(6);
3643: SKIP_BLANKS;
3644:
1.152 daniel 3645: if (RAW != '=') continue;
1.45 daniel 3646: NEXT;
3647: SKIP_BLANKS;
3648:
3649: prefix = xmlParseQuotedString(ctxt);
3650: SKIP_BLANKS;
1.152 daniel 3651: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3652: garbage = 0;
3653: SKIP(2);
3654: SKIP_BLANKS;
3655:
1.152 daniel 3656: if (RAW != '=') continue;
1.45 daniel 3657: NEXT;
3658: SKIP_BLANKS;
3659:
3660: prefix = xmlParseQuotedString(ctxt);
3661: SKIP_BLANKS;
1.152 daniel 3662: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3663: garbage = 0;
1.91 daniel 3664: NEXT;
1.45 daniel 3665: } else {
3666: /*
3667: * Found garbage when parsing the namespace
3668: */
1.122 daniel 3669: if (!garbage) {
1.55 daniel 3670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3671: ctxt->sax->error(ctxt->userData,
3672: "xmlParseNamespace found garbage\n");
3673: }
1.123 daniel 3674: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3675: ctxt->wellFormed = 0;
1.180 daniel 3676: ctxt->disableSAX = 1;
1.45 daniel 3677: NEXT;
3678: }
3679: }
3680:
3681: MOVETO_ENDTAG(CUR_PTR);
3682: NEXT;
3683:
3684: /*
3685: * Register the DTD.
1.72 daniel 3686: if (href != NULL)
3687: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3688: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3689: */
3690:
1.119 daniel 3691: if (prefix != NULL) xmlFree(prefix);
3692: if (href != NULL) xmlFree(href);
1.45 daniel 3693: }
3694:
1.28 daniel 3695: /************************************************************************
3696: * *
3697: * The parser itself *
3698: * Relates to http://www.w3.org/TR/REC-xml *
3699: * *
3700: ************************************************************************/
1.14 veillard 3701:
1.50 daniel 3702: /**
1.97 daniel 3703: * xmlScanName:
3704: * @ctxt: an XML parser context
3705: *
3706: * Trickery: parse an XML name but without consuming the input flow
3707: * Needed for rollback cases.
3708: *
3709: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3710: * CombiningChar | Extender
3711: *
3712: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3713: *
3714: * [6] Names ::= Name (S Name)*
3715: *
3716: * Returns the Name parsed or NULL
3717: */
3718:
1.123 daniel 3719: xmlChar *
1.97 daniel 3720: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3721: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3722: int len = 0;
3723:
3724: GROW;
1.152 daniel 3725: if (!IS_LETTER(RAW) && (RAW != '_') &&
3726: (RAW != ':')) {
1.97 daniel 3727: return(NULL);
3728: }
3729:
3730: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3731: (NXT(len) == '.') || (NXT(len) == '-') ||
3732: (NXT(len) == '_') || (NXT(len) == ':') ||
3733: (IS_COMBINING(NXT(len))) ||
3734: (IS_EXTENDER(NXT(len)))) {
3735: buf[len] = NXT(len);
3736: len++;
3737: if (len >= XML_MAX_NAMELEN) {
3738: fprintf(stderr,
3739: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3740: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3741: (NXT(len) == '.') || (NXT(len) == '-') ||
3742: (NXT(len) == '_') || (NXT(len) == ':') ||
3743: (IS_COMBINING(NXT(len))) ||
3744: (IS_EXTENDER(NXT(len))))
3745: len++;
3746: break;
3747: }
3748: }
3749: return(xmlStrndup(buf, len));
3750: }
3751:
3752: /**
1.50 daniel 3753: * xmlParseName:
3754: * @ctxt: an XML parser context
3755: *
3756: * parse an XML name.
1.22 daniel 3757: *
3758: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3759: * CombiningChar | Extender
3760: *
3761: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3762: *
3763: * [6] Names ::= Name (S Name)*
1.68 daniel 3764: *
3765: * Returns the Name parsed or NULL
1.1 veillard 3766: */
3767:
1.123 daniel 3768: xmlChar *
1.55 daniel 3769: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3770: xmlChar buf[XML_MAX_NAMELEN + 5];
3771: int len = 0, l;
3772: int c;
1.1 veillard 3773:
1.91 daniel 3774: GROW;
1.160 daniel 3775: c = CUR_CHAR(l);
1.190 daniel 3776: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3777: (!IS_LETTER(c) && (c != '_') &&
3778: (c != ':'))) {
1.91 daniel 3779: return(NULL);
3780: }
1.40 daniel 3781:
1.190 daniel 3782: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3783: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3784: (c == '.') || (c == '-') ||
3785: (c == '_') || (c == ':') ||
3786: (IS_COMBINING(c)) ||
3787: (IS_EXTENDER(c)))) {
1.160 daniel 3788: COPY_BUF(l,buf,len,c);
3789: NEXTL(l);
3790: c = CUR_CHAR(l);
1.91 daniel 3791: if (len >= XML_MAX_NAMELEN) {
3792: fprintf(stderr,
3793: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3794: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3795: (c == '.') || (c == '-') ||
3796: (c == '_') || (c == ':') ||
3797: (IS_COMBINING(c)) ||
3798: (IS_EXTENDER(c))) {
3799: NEXTL(l);
3800: c = CUR_CHAR(l);
1.97 daniel 3801: }
1.91 daniel 3802: break;
3803: }
3804: }
3805: return(xmlStrndup(buf, len));
1.22 daniel 3806: }
3807:
1.50 daniel 3808: /**
1.135 daniel 3809: * xmlParseStringName:
3810: * @ctxt: an XML parser context
3811: * @str: a pointer to an index in the string
3812: *
3813: * parse an XML name.
3814: *
3815: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3816: * CombiningChar | Extender
3817: *
3818: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3819: *
3820: * [6] Names ::= Name (S Name)*
3821: *
3822: * Returns the Name parsed or NULL. The str pointer
3823: * is updated to the current location in the string.
3824: */
3825:
3826: xmlChar *
3827: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3828: xmlChar buf[XML_MAX_NAMELEN + 5];
3829: const xmlChar *cur = *str;
3830: int len = 0, l;
3831: int c;
1.135 daniel 3832:
1.176 daniel 3833: c = CUR_SCHAR(cur, l);
3834: if (!IS_LETTER(c) && (c != '_') &&
3835: (c != ':')) {
1.135 daniel 3836: return(NULL);
3837: }
3838:
1.176 daniel 3839: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3840: (c == '.') || (c == '-') ||
3841: (c == '_') || (c == ':') ||
3842: (IS_COMBINING(c)) ||
3843: (IS_EXTENDER(c))) {
3844: COPY_BUF(l,buf,len,c);
3845: cur += l;
3846: c = CUR_SCHAR(cur, l);
3847: if (len >= XML_MAX_NAMELEN) {
3848: fprintf(stderr,
3849: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3850: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3851: (c == '.') || (c == '-') ||
3852: (c == '_') || (c == ':') ||
3853: (IS_COMBINING(c)) ||
3854: (IS_EXTENDER(c))) {
3855: cur += l;
3856: c = CUR_SCHAR(cur, l);
3857: }
3858: break;
3859: }
1.135 daniel 3860: }
1.176 daniel 3861: *str = cur;
3862: return(xmlStrndup(buf, len));
1.135 daniel 3863: }
3864:
3865: /**
1.50 daniel 3866: * xmlParseNmtoken:
3867: * @ctxt: an XML parser context
3868: *
3869: * parse an XML Nmtoken.
1.22 daniel 3870: *
3871: * [7] Nmtoken ::= (NameChar)+
3872: *
3873: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3874: *
3875: * Returns the Nmtoken parsed or NULL
1.22 daniel 3876: */
3877:
1.123 daniel 3878: xmlChar *
1.55 daniel 3879: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3880: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3881: int len = 0;
1.160 daniel 3882: int c,l;
1.22 daniel 3883:
1.91 daniel 3884: GROW;
1.160 daniel 3885: c = CUR_CHAR(l);
3886: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3887: (c == '.') || (c == '-') ||
3888: (c == '_') || (c == ':') ||
3889: (IS_COMBINING(c)) ||
3890: (IS_EXTENDER(c))) {
3891: COPY_BUF(l,buf,len,c);
3892: NEXTL(l);
3893: c = CUR_CHAR(l);
1.91 daniel 3894: if (len >= XML_MAX_NAMELEN) {
3895: fprintf(stderr,
3896: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3897: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3898: (c == '.') || (c == '-') ||
3899: (c == '_') || (c == ':') ||
3900: (IS_COMBINING(c)) ||
3901: (IS_EXTENDER(c))) {
3902: NEXTL(l);
3903: c = CUR_CHAR(l);
3904: }
1.91 daniel 3905: break;
3906: }
3907: }
1.168 daniel 3908: if (len == 0)
3909: return(NULL);
1.91 daniel 3910: return(xmlStrndup(buf, len));
1.1 veillard 3911: }
3912:
1.50 daniel 3913: /**
3914: * xmlParseEntityValue:
3915: * @ctxt: an XML parser context
1.78 daniel 3916: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3917: *
3918: * parse a value for ENTITY decl.
1.24 daniel 3919: *
3920: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3921: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3922: *
1.78 daniel 3923: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3924: */
3925:
1.123 daniel 3926: xmlChar *
3927: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3928: xmlChar *buf = NULL;
3929: int len = 0;
1.140 daniel 3930: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3931: int c, l;
1.135 daniel 3932: xmlChar stop;
1.123 daniel 3933: xmlChar *ret = NULL;
1.176 daniel 3934: const xmlChar *cur = NULL;
1.98 daniel 3935: xmlParserInputPtr input;
1.24 daniel 3936:
1.152 daniel 3937: if (RAW == '"') stop = '"';
3938: else if (RAW == '\'') stop = '\'';
1.135 daniel 3939: else {
3940: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3941: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3942: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3943: ctxt->wellFormed = 0;
1.180 daniel 3944: ctxt->disableSAX = 1;
1.135 daniel 3945: return(NULL);
3946: }
3947: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3948: if (buf == NULL) {
3949: fprintf(stderr, "malloc of %d byte failed\n", size);
3950: return(NULL);
3951: }
1.94 daniel 3952:
1.135 daniel 3953: /*
3954: * The content of the entity definition is copied in a buffer.
3955: */
1.94 daniel 3956:
1.135 daniel 3957: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3958: input = ctxt->input;
3959: GROW;
3960: NEXT;
1.152 daniel 3961: c = CUR_CHAR(l);
1.135 daniel 3962: /*
3963: * NOTE: 4.4.5 Included in Literal
3964: * When a parameter entity reference appears in a literal entity
3965: * value, ... a single or double quote character in the replacement
3966: * text is always treated as a normal data character and will not
3967: * terminate the literal.
3968: * In practice it means we stop the loop only when back at parsing
3969: * the initial entity and the quote is found
3970: */
3971: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3972: if (len + 5 >= size) {
1.135 daniel 3973: size *= 2;
1.204 veillard 3974: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3975: if (buf == NULL) {
3976: fprintf(stderr, "realloc of %d byte failed\n", size);
3977: return(NULL);
1.94 daniel 3978: }
1.79 daniel 3979: }
1.152 daniel 3980: COPY_BUF(l,buf,len,c);
3981: NEXTL(l);
1.98 daniel 3982: /*
1.135 daniel 3983: * Pop-up of finished entities.
1.98 daniel 3984: */
1.152 daniel 3985: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3986: xmlPopInput(ctxt);
1.152 daniel 3987:
3988: c = CUR_CHAR(l);
1.135 daniel 3989: if (c == 0) {
1.94 daniel 3990: GROW;
1.152 daniel 3991: c = CUR_CHAR(l);
1.79 daniel 3992: }
1.135 daniel 3993: }
3994: buf[len] = 0;
3995:
3996: /*
1.176 daniel 3997: * Raise problem w.r.t. '&' and '%' being used in non-entities
3998: * reference constructs. Note Charref will be handled in
3999: * xmlStringDecodeEntities()
4000: */
4001: cur = buf;
4002: while (*cur != 0) {
4003: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
4004: xmlChar *name;
4005: xmlChar tmp = *cur;
4006:
4007: cur++;
4008: name = xmlParseStringName(ctxt, &cur);
4009: if ((name == NULL) || (*cur != ';')) {
4010: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4011: ctxt->sax->error(ctxt->userData,
4012: "EntityValue: '%c' forbidden except for entities references\n",
4013: tmp);
4014: ctxt->wellFormed = 0;
1.180 daniel 4015: ctxt->disableSAX = 1;
1.176 daniel 4016: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
4017: }
4018: if ((ctxt->inSubset == 1) && (tmp == '%')) {
4019: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4020: ctxt->sax->error(ctxt->userData,
4021: "EntityValue: PEReferences forbidden in internal subset\n",
4022: tmp);
4023: ctxt->wellFormed = 0;
1.180 daniel 4024: ctxt->disableSAX = 1;
1.176 daniel 4025: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
4026: }
4027: if (name != NULL)
4028: xmlFree(name);
4029: }
4030: cur++;
4031: }
4032:
4033: /*
1.135 daniel 4034: * Then PEReference entities are substituted.
4035: */
4036: if (c != stop) {
4037: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 4038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 4039: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 4040: ctxt->wellFormed = 0;
1.180 daniel 4041: ctxt->disableSAX = 1;
1.170 daniel 4042: xmlFree(buf);
1.135 daniel 4043: } else {
4044: NEXT;
4045: /*
4046: * NOTE: 4.4.7 Bypassed
4047: * When a general entity reference appears in the EntityValue in
4048: * an entity declaration, it is bypassed and left as is.
1.176 daniel 4049: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 4050: */
4051: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
4052: 0, 0, 0);
4053: if (orig != NULL)
4054: *orig = buf;
4055: else
4056: xmlFree(buf);
1.24 daniel 4057: }
4058:
4059: return(ret);
4060: }
4061:
1.50 daniel 4062: /**
4063: * xmlParseAttValue:
4064: * @ctxt: an XML parser context
4065: *
4066: * parse a value for an attribute
1.78 daniel 4067: * Note: the parser won't do substitution of entities here, this
1.113 daniel 4068: * will be handled later in xmlStringGetNodeList
1.29 daniel 4069: *
4070: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4071: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 4072: *
1.129 daniel 4073: * 3.3.3 Attribute-Value Normalization:
4074: * Before the value of an attribute is passed to the application or
4075: * checked for validity, the XML processor must normalize it as follows:
4076: * - a character reference is processed by appending the referenced
4077: * character to the attribute value
4078: * - an entity reference is processed by recursively processing the
4079: * replacement text of the entity
4080: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4081: * appending #x20 to the normalized value, except that only a single
4082: * #x20 is appended for a "#xD#xA" sequence that is part of an external
4083: * parsed entity or the literal entity value of an internal parsed entity
4084: * - other characters are processed by appending them to the normalized value
1.130 daniel 4085: * If the declared value is not CDATA, then the XML processor must further
4086: * process the normalized attribute value by discarding any leading and
4087: * trailing space (#x20) characters, and by replacing sequences of space
4088: * (#x20) characters by a single space (#x20) character.
4089: * All attributes for which no declaration has been read should be treated
4090: * by a non-validating parser as if declared CDATA.
1.129 daniel 4091: *
4092: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 4093: */
4094:
1.123 daniel 4095: xmlChar *
1.55 daniel 4096: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 4097: xmlChar limit = 0;
1.198 daniel 4098: xmlChar *buf = NULL;
4099: int len = 0;
4100: int buf_size = 0;
4101: int c, l;
1.129 daniel 4102: xmlChar *current = NULL;
4103: xmlEntityPtr ent;
4104:
1.29 daniel 4105:
1.91 daniel 4106: SHRINK;
1.151 daniel 4107: if (NXT(0) == '"') {
1.96 daniel 4108: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 4109: limit = '"';
1.40 daniel 4110: NEXT;
1.151 daniel 4111: } else if (NXT(0) == '\'') {
1.129 daniel 4112: limit = '\'';
1.96 daniel 4113: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 4114: NEXT;
1.29 daniel 4115: } else {
1.123 daniel 4116: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 4117: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4118: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 4119: ctxt->wellFormed = 0;
1.180 daniel 4120: ctxt->disableSAX = 1;
1.129 daniel 4121: return(NULL);
1.29 daniel 4122: }
4123:
1.129 daniel 4124: /*
4125: * allocate a translation buffer.
4126: */
1.198 daniel 4127: buf_size = XML_PARSER_BUFFER_SIZE;
4128: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
4129: if (buf == NULL) {
1.129 daniel 4130: perror("xmlParseAttValue: malloc failed");
4131: return(NULL);
4132: }
4133:
4134: /*
4135: * Ok loop until we reach one of the ending char or a size limit.
4136: */
1.198 daniel 4137: c = CUR_CHAR(l);
4138: while (((NXT(0) != limit) && (c != '<')) || (ctxt->token != 0)) {
4139: if (c == 0) break;
1.205 veillard 4140: if (ctxt->token == '&') {
4141: static xmlChar buffer[6] = "&";
4142:
4143: if (len > buf_size - 10) {
4144: growBuffer(buf);
4145: }
4146: current = &buffer[0];
4147: while (*current != 0) {
4148: buf[len++] = *current++;
4149: }
4150: ctxt->token = 0;
4151: } else if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 4152: int val = xmlParseCharRef(ctxt);
1.198 daniel 4153: COPY_BUF(l,buf,len,val);
4154: NEXTL(l);
4155: } else if (c == '&') {
1.129 daniel 4156: ent = xmlParseEntityRef(ctxt);
4157: if ((ent != NULL) &&
4158: (ctxt->replaceEntities != 0)) {
1.185 daniel 4159: xmlChar *rep;
4160:
1.186 daniel 4161: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4162: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 4163: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 4164: if (rep != NULL) {
4165: current = rep;
4166: while (*current != 0) {
1.198 daniel 4167: buf[len++] = *current++;
4168: if (len > buf_size - 10) {
4169: growBuffer(buf);
1.186 daniel 4170: }
1.185 daniel 4171: }
1.186 daniel 4172: xmlFree(rep);
1.129 daniel 4173: }
1.186 daniel 4174: } else {
4175: if (ent->content != NULL)
1.198 daniel 4176: buf[len++] = ent->content[0];
1.129 daniel 4177: }
4178: } else if (ent != NULL) {
4179: int i = xmlStrlen(ent->name);
4180: const xmlChar *cur = ent->name;
4181:
1.186 daniel 4182: /*
4183: * This may look absurd but is needed to detect
4184: * entities problems
4185: */
1.211 veillard 4186: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4187: (ent->content != NULL)) {
1.186 daniel 4188: xmlChar *rep;
4189: rep = xmlStringDecodeEntities(ctxt, ent->content,
4190: XML_SUBSTITUTE_REF, 0, 0, 0);
4191: if (rep != NULL)
4192: xmlFree(rep);
4193: }
4194:
4195: /*
4196: * Just output the reference
4197: */
1.198 daniel 4198: buf[len++] = '&';
4199: if (len > buf_size - i - 10) {
4200: growBuffer(buf);
1.129 daniel 4201: }
4202: for (;i > 0;i--)
1.198 daniel 4203: buf[len++] = *cur++;
4204: buf[len++] = ';';
1.129 daniel 4205: }
4206: } else {
1.198 daniel 4207: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4208: COPY_BUF(l,buf,len,0x20);
4209: if (len > buf_size - 10) {
4210: growBuffer(buf);
1.129 daniel 4211: }
4212: } else {
1.198 daniel 4213: COPY_BUF(l,buf,len,c);
4214: if (len > buf_size - 10) {
4215: growBuffer(buf);
1.129 daniel 4216: }
4217: }
1.198 daniel 4218: NEXTL(l);
1.129 daniel 4219: }
1.198 daniel 4220: GROW;
4221: c = CUR_CHAR(l);
1.129 daniel 4222: }
1.198 daniel 4223: buf[len++] = 0;
1.152 daniel 4224: if (RAW == '<') {
1.129 daniel 4225: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4226: ctxt->sax->error(ctxt->userData,
4227: "Unescaped '<' not allowed in attributes values\n");
4228: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
4229: ctxt->wellFormed = 0;
1.180 daniel 4230: ctxt->disableSAX = 1;
1.152 daniel 4231: } else if (RAW != limit) {
1.129 daniel 4232: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4233: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
4234: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
4235: ctxt->wellFormed = 0;
1.180 daniel 4236: ctxt->disableSAX = 1;
1.129 daniel 4237: } else
4238: NEXT;
1.198 daniel 4239: return(buf);
1.29 daniel 4240: }
4241:
1.50 daniel 4242: /**
4243: * xmlParseSystemLiteral:
4244: * @ctxt: an XML parser context
4245: *
4246: * parse an XML Literal
1.21 daniel 4247: *
1.22 daniel 4248: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 4249: *
4250: * Returns the SystemLiteral parsed or NULL
1.21 daniel 4251: */
4252:
1.123 daniel 4253: xmlChar *
1.55 daniel 4254: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 4255: xmlChar *buf = NULL;
4256: int len = 0;
1.140 daniel 4257: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4258: int cur, l;
1.135 daniel 4259: xmlChar stop;
1.168 daniel 4260: int state = ctxt->instate;
1.21 daniel 4261:
1.91 daniel 4262: SHRINK;
1.152 daniel 4263: if (RAW == '"') {
1.40 daniel 4264: NEXT;
1.135 daniel 4265: stop = '"';
1.152 daniel 4266: } else if (RAW == '\'') {
1.40 daniel 4267: NEXT;
1.135 daniel 4268: stop = '\'';
1.21 daniel 4269: } else {
1.55 daniel 4270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4271: ctxt->sax->error(ctxt->userData,
4272: "SystemLiteral \" or ' expected\n");
1.123 daniel 4273: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 4274: ctxt->wellFormed = 0;
1.180 daniel 4275: ctxt->disableSAX = 1;
1.135 daniel 4276: return(NULL);
1.21 daniel 4277: }
4278:
1.135 daniel 4279: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4280: if (buf == NULL) {
4281: fprintf(stderr, "malloc of %d byte failed\n", size);
4282: return(NULL);
4283: }
1.168 daniel 4284: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 4285: cur = CUR_CHAR(l);
1.135 daniel 4286: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 4287: if (len + 5 >= size) {
1.135 daniel 4288: size *= 2;
1.204 veillard 4289: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4290: if (buf == NULL) {
4291: fprintf(stderr, "realloc of %d byte failed\n", size);
1.204 veillard 4292: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 4293: return(NULL);
4294: }
4295: }
1.152 daniel 4296: COPY_BUF(l,buf,len,cur);
4297: NEXTL(l);
4298: cur = CUR_CHAR(l);
1.135 daniel 4299: if (cur == 0) {
4300: GROW;
4301: SHRINK;
1.152 daniel 4302: cur = CUR_CHAR(l);
1.135 daniel 4303: }
4304: }
4305: buf[len] = 0;
1.204 veillard 4306: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 4307: if (!IS_CHAR(cur)) {
4308: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4309: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
4310: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4311: ctxt->wellFormed = 0;
1.180 daniel 4312: ctxt->disableSAX = 1;
1.135 daniel 4313: } else {
4314: NEXT;
4315: }
4316: return(buf);
1.21 daniel 4317: }
4318:
1.50 daniel 4319: /**
4320: * xmlParsePubidLiteral:
4321: * @ctxt: an XML parser context
1.21 daniel 4322: *
1.50 daniel 4323: * parse an XML public literal
1.68 daniel 4324: *
4325: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4326: *
4327: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 4328: */
4329:
1.123 daniel 4330: xmlChar *
1.55 daniel 4331: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 4332: xmlChar *buf = NULL;
4333: int len = 0;
1.140 daniel 4334: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 4335: xmlChar cur;
4336: xmlChar stop;
1.125 daniel 4337:
1.91 daniel 4338: SHRINK;
1.152 daniel 4339: if (RAW == '"') {
1.40 daniel 4340: NEXT;
1.135 daniel 4341: stop = '"';
1.152 daniel 4342: } else if (RAW == '\'') {
1.40 daniel 4343: NEXT;
1.135 daniel 4344: stop = '\'';
1.21 daniel 4345: } else {
1.55 daniel 4346: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4347: ctxt->sax->error(ctxt->userData,
4348: "SystemLiteral \" or ' expected\n");
1.123 daniel 4349: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 4350: ctxt->wellFormed = 0;
1.180 daniel 4351: ctxt->disableSAX = 1;
1.135 daniel 4352: return(NULL);
4353: }
4354: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4355: if (buf == NULL) {
4356: fprintf(stderr, "malloc of %d byte failed\n", size);
4357: return(NULL);
4358: }
4359: cur = CUR;
4360: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
4361: if (len + 1 >= size) {
4362: size *= 2;
1.204 veillard 4363: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4364: if (buf == NULL) {
4365: fprintf(stderr, "realloc of %d byte failed\n", size);
4366: return(NULL);
4367: }
4368: }
4369: buf[len++] = cur;
4370: NEXT;
4371: cur = CUR;
4372: if (cur == 0) {
4373: GROW;
4374: SHRINK;
4375: cur = CUR;
4376: }
4377: }
4378: buf[len] = 0;
4379: if (cur != stop) {
4380: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4381: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4382: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4383: ctxt->wellFormed = 0;
1.180 daniel 4384: ctxt->disableSAX = 1;
1.135 daniel 4385: } else {
4386: NEXT;
1.21 daniel 4387: }
1.135 daniel 4388: return(buf);
1.21 daniel 4389: }
4390:
1.50 daniel 4391: /**
4392: * xmlParseCharData:
4393: * @ctxt: an XML parser context
4394: * @cdata: int indicating whether we are within a CDATA section
4395: *
4396: * parse a CharData section.
4397: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 4398: *
1.151 daniel 4399: * The right angle bracket (>) may be represented using the string ">",
4400: * and must, for compatibility, be escaped using ">" or a character
4401: * reference when it appears in the string "]]>" in content, when that
4402: * string is not marking the end of a CDATA section.
4403: *
1.27 daniel 4404: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4405: */
4406:
1.55 daniel 4407: void
4408: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 4409: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 4410: int nbchar = 0;
1.152 daniel 4411: int cur, l;
1.27 daniel 4412:
1.91 daniel 4413: SHRINK;
1.152 daniel 4414: cur = CUR_CHAR(l);
1.190 daniel 4415: while (((cur != '<') || (ctxt->token == '<')) &&
4416: ((cur != '&') || (ctxt->token == '&')) &&
4417: (IS_CHAR(cur))) {
1.97 daniel 4418: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 4419: (NXT(2) == '>')) {
4420: if (cdata) break;
4421: else {
4422: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 4423: ctxt->sax->error(ctxt->userData,
1.59 daniel 4424: "Sequence ']]>' not allowed in content\n");
1.123 daniel 4425: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 4426: /* Should this be relaxed ??? I see a "must here */
4427: ctxt->wellFormed = 0;
1.180 daniel 4428: ctxt->disableSAX = 1;
1.59 daniel 4429: }
4430: }
1.152 daniel 4431: COPY_BUF(l,buf,nbchar,cur);
4432: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4433: /*
4434: * Ok the segment is to be consumed as chars.
4435: */
1.171 daniel 4436: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4437: if (areBlanks(ctxt, buf, nbchar)) {
4438: if (ctxt->sax->ignorableWhitespace != NULL)
4439: ctxt->sax->ignorableWhitespace(ctxt->userData,
4440: buf, nbchar);
4441: } else {
4442: if (ctxt->sax->characters != NULL)
4443: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4444: }
4445: }
4446: nbchar = 0;
4447: }
1.152 daniel 4448: NEXTL(l);
4449: cur = CUR_CHAR(l);
1.27 daniel 4450: }
1.91 daniel 4451: if (nbchar != 0) {
4452: /*
4453: * Ok the segment is to be consumed as chars.
4454: */
1.171 daniel 4455: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4456: if (areBlanks(ctxt, buf, nbchar)) {
4457: if (ctxt->sax->ignorableWhitespace != NULL)
4458: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4459: } else {
4460: if (ctxt->sax->characters != NULL)
4461: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4462: }
4463: }
1.45 daniel 4464: }
1.27 daniel 4465: }
4466:
1.50 daniel 4467: /**
4468: * xmlParseExternalID:
4469: * @ctxt: an XML parser context
1.123 daniel 4470: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4471: * @strict: indicate whether we should restrict parsing to only
4472: * production [75], see NOTE below
1.50 daniel 4473: *
1.67 daniel 4474: * Parse an External ID or a Public ID
4475: *
4476: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4477: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4478: *
4479: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4480: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4481: *
4482: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4483: *
1.68 daniel 4484: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4485: * case publicID receives PubidLiteral, is strict is off
4486: * it is possible to return NULL and have publicID set.
1.22 daniel 4487: */
4488:
1.123 daniel 4489: xmlChar *
4490: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4491: xmlChar *URI = NULL;
1.22 daniel 4492:
1.91 daniel 4493: SHRINK;
1.152 daniel 4494: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4495: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4496: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4497: SKIP(6);
1.59 daniel 4498: if (!IS_BLANK(CUR)) {
4499: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4500: ctxt->sax->error(ctxt->userData,
1.59 daniel 4501: "Space required after 'SYSTEM'\n");
1.123 daniel 4502: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4503: ctxt->wellFormed = 0;
1.180 daniel 4504: ctxt->disableSAX = 1;
1.59 daniel 4505: }
1.42 daniel 4506: SKIP_BLANKS;
1.39 daniel 4507: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4508: if (URI == NULL) {
1.55 daniel 4509: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4510: ctxt->sax->error(ctxt->userData,
1.39 daniel 4511: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4512: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4513: ctxt->wellFormed = 0;
1.180 daniel 4514: ctxt->disableSAX = 1;
1.59 daniel 4515: }
1.152 daniel 4516: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4517: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4518: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4519: SKIP(6);
1.59 daniel 4520: if (!IS_BLANK(CUR)) {
4521: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4522: ctxt->sax->error(ctxt->userData,
1.59 daniel 4523: "Space required after 'PUBLIC'\n");
1.123 daniel 4524: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4525: ctxt->wellFormed = 0;
1.180 daniel 4526: ctxt->disableSAX = 1;
1.59 daniel 4527: }
1.42 daniel 4528: SKIP_BLANKS;
1.39 daniel 4529: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4530: if (*publicID == NULL) {
1.55 daniel 4531: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4532: ctxt->sax->error(ctxt->userData,
1.39 daniel 4533: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4534: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4535: ctxt->wellFormed = 0;
1.180 daniel 4536: ctxt->disableSAX = 1;
1.59 daniel 4537: }
1.67 daniel 4538: if (strict) {
4539: /*
4540: * We don't handle [83] so "S SystemLiteral" is required.
4541: */
4542: if (!IS_BLANK(CUR)) {
4543: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4544: ctxt->sax->error(ctxt->userData,
1.67 daniel 4545: "Space required after the Public Identifier\n");
1.123 daniel 4546: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4547: ctxt->wellFormed = 0;
1.180 daniel 4548: ctxt->disableSAX = 1;
1.67 daniel 4549: }
4550: } else {
4551: /*
4552: * We handle [83] so we return immediately, if
4553: * "S SystemLiteral" is not detected. From a purely parsing
4554: * point of view that's a nice mess.
4555: */
1.135 daniel 4556: const xmlChar *ptr;
4557: GROW;
4558:
4559: ptr = CUR_PTR;
1.67 daniel 4560: if (!IS_BLANK(*ptr)) return(NULL);
4561:
4562: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4563: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4564: }
1.42 daniel 4565: SKIP_BLANKS;
1.39 daniel 4566: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4567: if (URI == NULL) {
1.55 daniel 4568: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4569: ctxt->sax->error(ctxt->userData,
1.39 daniel 4570: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4571: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4572: ctxt->wellFormed = 0;
1.180 daniel 4573: ctxt->disableSAX = 1;
1.59 daniel 4574: }
1.22 daniel 4575: }
1.39 daniel 4576: return(URI);
1.22 daniel 4577: }
4578:
1.50 daniel 4579: /**
4580: * xmlParseComment:
1.69 daniel 4581: * @ctxt: an XML parser context
1.50 daniel 4582: *
1.3 veillard 4583: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4584: * The spec says that "For compatibility, the string "--" (double-hyphen)
4585: * must not occur within comments. "
1.22 daniel 4586: *
4587: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4588: */
1.72 daniel 4589: void
1.114 daniel 4590: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4591: xmlChar *buf = NULL;
1.195 daniel 4592: int len;
1.140 daniel 4593: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4594: int q, ql;
4595: int r, rl;
4596: int cur, l;
1.140 daniel 4597: xmlParserInputState state;
1.187 daniel 4598: xmlParserInputPtr input = ctxt->input;
1.3 veillard 4599:
4600: /*
1.22 daniel 4601: * Check that there is a comment right here.
1.3 veillard 4602: */
1.152 daniel 4603: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4604: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4605:
1.140 daniel 4606: state = ctxt->instate;
1.97 daniel 4607: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4608: SHRINK;
1.40 daniel 4609: SKIP(4);
1.135 daniel 4610: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4611: if (buf == NULL) {
4612: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4613: ctxt->instate = state;
1.135 daniel 4614: return;
4615: }
1.152 daniel 4616: q = CUR_CHAR(ql);
4617: NEXTL(ql);
4618: r = CUR_CHAR(rl);
4619: NEXTL(rl);
4620: cur = CUR_CHAR(l);
1.195 daniel 4621: len = 0;
1.135 daniel 4622: while (IS_CHAR(cur) &&
4623: ((cur != '>') ||
4624: (r != '-') || (q != '-'))) {
1.195 daniel 4625: if ((r == '-') && (q == '-') && (len > 1)) {
1.55 daniel 4626: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4627: ctxt->sax->error(ctxt->userData,
1.38 daniel 4628: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4629: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4630: ctxt->wellFormed = 0;
1.180 daniel 4631: ctxt->disableSAX = 1;
1.59 daniel 4632: }
1.152 daniel 4633: if (len + 5 >= size) {
1.135 daniel 4634: size *= 2;
1.204 veillard 4635: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4636: if (buf == NULL) {
4637: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4638: ctxt->instate = state;
1.135 daniel 4639: return;
4640: }
4641: }
1.152 daniel 4642: COPY_BUF(ql,buf,len,q);
1.135 daniel 4643: q = r;
1.152 daniel 4644: ql = rl;
1.135 daniel 4645: r = cur;
1.152 daniel 4646: rl = l;
4647: NEXTL(l);
4648: cur = CUR_CHAR(l);
1.135 daniel 4649: if (cur == 0) {
4650: SHRINK;
4651: GROW;
1.152 daniel 4652: cur = CUR_CHAR(l);
1.135 daniel 4653: }
1.3 veillard 4654: }
1.135 daniel 4655: buf[len] = 0;
4656: if (!IS_CHAR(cur)) {
1.55 daniel 4657: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4658: ctxt->sax->error(ctxt->userData,
1.135 daniel 4659: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4660: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4661: ctxt->wellFormed = 0;
1.180 daniel 4662: ctxt->disableSAX = 1;
1.178 daniel 4663: xmlFree(buf);
1.3 veillard 4664: } else {
1.187 daniel 4665: if (input != ctxt->input) {
4666: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4667: ctxt->sax->error(ctxt->userData,
4668: "Comment doesn't start and stop in the same entity\n");
4669: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4670: ctxt->wellFormed = 0;
4671: ctxt->disableSAX = 1;
4672: }
1.40 daniel 4673: NEXT;
1.171 daniel 4674: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4675: (!ctxt->disableSAX))
1.135 daniel 4676: ctxt->sax->comment(ctxt->userData, buf);
4677: xmlFree(buf);
1.3 veillard 4678: }
1.140 daniel 4679: ctxt->instate = state;
1.3 veillard 4680: }
4681:
1.50 daniel 4682: /**
4683: * xmlParsePITarget:
4684: * @ctxt: an XML parser context
4685: *
4686: * parse the name of a PI
1.22 daniel 4687: *
4688: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4689: *
4690: * Returns the PITarget name or NULL
1.22 daniel 4691: */
4692:
1.123 daniel 4693: xmlChar *
1.55 daniel 4694: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4695: xmlChar *name;
1.22 daniel 4696:
4697: name = xmlParseName(ctxt);
1.139 daniel 4698: if ((name != NULL) &&
1.22 daniel 4699: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4700: ((name[1] == 'm') || (name[1] == 'M')) &&
4701: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4702: int i;
1.177 daniel 4703: if ((name[0] == 'x') && (name[1] == 'm') &&
4704: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4705: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4706: ctxt->sax->error(ctxt->userData,
4707: "XML declaration allowed only at the start of the document\n");
4708: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4709: ctxt->wellFormed = 0;
1.180 daniel 4710: ctxt->disableSAX = 1;
1.151 daniel 4711: return(name);
4712: } else if (name[3] == 0) {
4713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4714: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4715: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4716: ctxt->wellFormed = 0;
1.180 daniel 4717: ctxt->disableSAX = 1;
1.151 daniel 4718: return(name);
4719: }
1.139 daniel 4720: for (i = 0;;i++) {
4721: if (xmlW3CPIs[i] == NULL) break;
4722: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4723: return(name);
4724: }
4725: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4726: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4727: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4728: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4729: }
1.22 daniel 4730: }
4731: return(name);
4732: }
4733:
1.50 daniel 4734: /**
4735: * xmlParsePI:
4736: * @ctxt: an XML parser context
4737: *
4738: * parse an XML Processing Instruction.
1.22 daniel 4739: *
4740: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4741: *
1.69 daniel 4742: * The processing is transfered to SAX once parsed.
1.3 veillard 4743: */
4744:
1.55 daniel 4745: void
4746: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4747: xmlChar *buf = NULL;
4748: int len = 0;
1.140 daniel 4749: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4750: int cur, l;
1.123 daniel 4751: xmlChar *target;
1.140 daniel 4752: xmlParserInputState state;
1.22 daniel 4753:
1.152 daniel 4754: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 4755: xmlParserInputPtr input = ctxt->input;
1.140 daniel 4756: state = ctxt->instate;
4757: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4758: /*
4759: * this is a Processing Instruction.
4760: */
1.40 daniel 4761: SKIP(2);
1.91 daniel 4762: SHRINK;
1.3 veillard 4763:
4764: /*
1.22 daniel 4765: * Parse the target name and check for special support like
4766: * namespace.
1.3 veillard 4767: */
1.22 daniel 4768: target = xmlParsePITarget(ctxt);
4769: if (target != NULL) {
1.156 daniel 4770: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 4771: if (input != ctxt->input) {
4772: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4773: ctxt->sax->error(ctxt->userData,
4774: "PI declaration doesn't start and stop in the same entity\n");
4775: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4776: ctxt->wellFormed = 0;
4777: ctxt->disableSAX = 1;
4778: }
1.156 daniel 4779: SKIP(2);
4780:
4781: /*
4782: * SAX: PI detected.
4783: */
1.171 daniel 4784: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4785: (ctxt->sax->processingInstruction != NULL))
4786: ctxt->sax->processingInstruction(ctxt->userData,
4787: target, NULL);
4788: ctxt->instate = state;
1.170 daniel 4789: xmlFree(target);
1.156 daniel 4790: return;
4791: }
1.135 daniel 4792: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4793: if (buf == NULL) {
4794: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4795: ctxt->instate = state;
1.135 daniel 4796: return;
4797: }
4798: cur = CUR;
4799: if (!IS_BLANK(cur)) {
1.114 daniel 4800: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4801: ctxt->sax->error(ctxt->userData,
4802: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4803: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4804: ctxt->wellFormed = 0;
1.180 daniel 4805: ctxt->disableSAX = 1;
1.114 daniel 4806: }
4807: SKIP_BLANKS;
1.152 daniel 4808: cur = CUR_CHAR(l);
1.135 daniel 4809: while (IS_CHAR(cur) &&
4810: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4811: if (len + 5 >= size) {
1.135 daniel 4812: size *= 2;
1.204 veillard 4813: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4814: if (buf == NULL) {
4815: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4816: ctxt->instate = state;
1.135 daniel 4817: return;
4818: }
4819: }
1.152 daniel 4820: COPY_BUF(l,buf,len,cur);
4821: NEXTL(l);
4822: cur = CUR_CHAR(l);
1.135 daniel 4823: if (cur == 0) {
4824: SHRINK;
4825: GROW;
1.152 daniel 4826: cur = CUR_CHAR(l);
1.135 daniel 4827: }
4828: }
4829: buf[len] = 0;
1.152 daniel 4830: if (cur != '?') {
1.72 daniel 4831: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4832: ctxt->sax->error(ctxt->userData,
1.72 daniel 4833: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4834: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4835: ctxt->wellFormed = 0;
1.180 daniel 4836: ctxt->disableSAX = 1;
1.22 daniel 4837: } else {
1.187 daniel 4838: if (input != ctxt->input) {
4839: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4840: ctxt->sax->error(ctxt->userData,
4841: "PI declaration doesn't start and stop in the same entity\n");
4842: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4843: ctxt->wellFormed = 0;
4844: ctxt->disableSAX = 1;
4845: }
1.72 daniel 4846: SKIP(2);
1.44 daniel 4847:
1.72 daniel 4848: /*
4849: * SAX: PI detected.
4850: */
1.171 daniel 4851: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4852: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4853: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4854: target, buf);
1.22 daniel 4855: }
1.135 daniel 4856: xmlFree(buf);
1.119 daniel 4857: xmlFree(target);
1.3 veillard 4858: } else {
1.55 daniel 4859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4860: ctxt->sax->error(ctxt->userData,
4861: "xmlParsePI : no target name\n");
1.123 daniel 4862: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4863: ctxt->wellFormed = 0;
1.180 daniel 4864: ctxt->disableSAX = 1;
1.22 daniel 4865: }
1.140 daniel 4866: ctxt->instate = state;
1.22 daniel 4867: }
4868: }
4869:
1.50 daniel 4870: /**
4871: * xmlParseNotationDecl:
4872: * @ctxt: an XML parser context
4873: *
4874: * parse a notation declaration
1.22 daniel 4875: *
4876: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4877: *
4878: * Hence there is actually 3 choices:
4879: * 'PUBLIC' S PubidLiteral
4880: * 'PUBLIC' S PubidLiteral S SystemLiteral
4881: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4882: *
1.67 daniel 4883: * See the NOTE on xmlParseExternalID().
1.22 daniel 4884: */
4885:
1.55 daniel 4886: void
4887: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4888: xmlChar *name;
4889: xmlChar *Pubid;
4890: xmlChar *Systemid;
1.22 daniel 4891:
1.152 daniel 4892: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4893: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4894: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4895: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4896: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 4897: xmlParserInputPtr input = ctxt->input;
1.91 daniel 4898: SHRINK;
1.40 daniel 4899: SKIP(10);
1.67 daniel 4900: if (!IS_BLANK(CUR)) {
4901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4902: ctxt->sax->error(ctxt->userData,
4903: "Space required after '<!NOTATION'\n");
1.123 daniel 4904: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4905: ctxt->wellFormed = 0;
1.180 daniel 4906: ctxt->disableSAX = 1;
1.67 daniel 4907: return;
4908: }
4909: SKIP_BLANKS;
1.22 daniel 4910:
4911: name = xmlParseName(ctxt);
4912: if (name == NULL) {
1.55 daniel 4913: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4914: ctxt->sax->error(ctxt->userData,
4915: "NOTATION: Name expected here\n");
1.123 daniel 4916: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4917: ctxt->wellFormed = 0;
1.180 daniel 4918: ctxt->disableSAX = 1;
1.67 daniel 4919: return;
4920: }
4921: if (!IS_BLANK(CUR)) {
4922: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4923: ctxt->sax->error(ctxt->userData,
1.67 daniel 4924: "Space required after the NOTATION name'\n");
1.123 daniel 4925: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4926: ctxt->wellFormed = 0;
1.180 daniel 4927: ctxt->disableSAX = 1;
1.22 daniel 4928: return;
4929: }
1.42 daniel 4930: SKIP_BLANKS;
1.67 daniel 4931:
1.22 daniel 4932: /*
1.67 daniel 4933: * Parse the IDs.
1.22 daniel 4934: */
1.160 daniel 4935: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4936: SKIP_BLANKS;
4937:
1.152 daniel 4938: if (RAW == '>') {
1.187 daniel 4939: if (input != ctxt->input) {
4940: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4941: ctxt->sax->error(ctxt->userData,
4942: "Notation declaration doesn't start and stop in the same entity\n");
4943: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4944: ctxt->wellFormed = 0;
4945: ctxt->disableSAX = 1;
4946: }
1.40 daniel 4947: NEXT;
1.171 daniel 4948: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4949: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4950: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4951: } else {
4952: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4953: ctxt->sax->error(ctxt->userData,
1.67 daniel 4954: "'>' required to close NOTATION declaration\n");
1.123 daniel 4955: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4956: ctxt->wellFormed = 0;
1.180 daniel 4957: ctxt->disableSAX = 1;
1.67 daniel 4958: }
1.119 daniel 4959: xmlFree(name);
4960: if (Systemid != NULL) xmlFree(Systemid);
4961: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4962: }
4963: }
4964:
1.50 daniel 4965: /**
4966: * xmlParseEntityDecl:
4967: * @ctxt: an XML parser context
4968: *
4969: * parse <!ENTITY declarations
1.22 daniel 4970: *
4971: * [70] EntityDecl ::= GEDecl | PEDecl
4972: *
4973: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4974: *
4975: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4976: *
4977: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4978: *
4979: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4980: *
4981: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4982: *
4983: * [ VC: Notation Declared ]
1.116 daniel 4984: * The Name must match the declared name of a notation.
1.22 daniel 4985: */
4986:
1.55 daniel 4987: void
4988: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4989: xmlChar *name = NULL;
4990: xmlChar *value = NULL;
4991: xmlChar *URI = NULL, *literal = NULL;
4992: xmlChar *ndata = NULL;
1.39 daniel 4993: int isParameter = 0;
1.123 daniel 4994: xmlChar *orig = NULL;
1.22 daniel 4995:
1.94 daniel 4996: GROW;
1.152 daniel 4997: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4998: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4999: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 5000: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 5001: xmlParserInputPtr input = ctxt->input;
1.96 daniel 5002: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 5003: SHRINK;
1.40 daniel 5004: SKIP(8);
1.59 daniel 5005: if (!IS_BLANK(CUR)) {
5006: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5007: ctxt->sax->error(ctxt->userData,
5008: "Space required after '<!ENTITY'\n");
1.123 daniel 5009: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5010: ctxt->wellFormed = 0;
1.180 daniel 5011: ctxt->disableSAX = 1;
1.59 daniel 5012: }
5013: SKIP_BLANKS;
1.40 daniel 5014:
1.152 daniel 5015: if (RAW == '%') {
1.40 daniel 5016: NEXT;
1.59 daniel 5017: if (!IS_BLANK(CUR)) {
5018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5019: ctxt->sax->error(ctxt->userData,
5020: "Space required after '%'\n");
1.123 daniel 5021: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5022: ctxt->wellFormed = 0;
1.180 daniel 5023: ctxt->disableSAX = 1;
1.59 daniel 5024: }
1.42 daniel 5025: SKIP_BLANKS;
1.39 daniel 5026: isParameter = 1;
1.22 daniel 5027: }
5028:
5029: name = xmlParseName(ctxt);
1.24 daniel 5030: if (name == NULL) {
1.55 daniel 5031: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5032: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 5033: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5034: ctxt->wellFormed = 0;
1.180 daniel 5035: ctxt->disableSAX = 1;
1.24 daniel 5036: return;
5037: }
1.59 daniel 5038: if (!IS_BLANK(CUR)) {
5039: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5040: ctxt->sax->error(ctxt->userData,
1.59 daniel 5041: "Space required after the entity name\n");
1.123 daniel 5042: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5043: ctxt->wellFormed = 0;
1.180 daniel 5044: ctxt->disableSAX = 1;
1.59 daniel 5045: }
1.42 daniel 5046: SKIP_BLANKS;
1.24 daniel 5047:
1.22 daniel 5048: /*
1.68 daniel 5049: * handle the various case of definitions...
1.22 daniel 5050: */
1.39 daniel 5051: if (isParameter) {
1.152 daniel 5052: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 5053: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 5054: if (value) {
1.171 daniel 5055: if ((ctxt->sax != NULL) &&
5056: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 5057: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 5058: XML_INTERNAL_PARAMETER_ENTITY,
5059: NULL, NULL, value);
5060: }
1.24 daniel 5061: else {
1.67 daniel 5062: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 5063: if ((URI == NULL) && (literal == NULL)) {
5064: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5065: ctxt->sax->error(ctxt->userData,
5066: "Entity value required\n");
5067: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
5068: ctxt->wellFormed = 0;
1.180 daniel 5069: ctxt->disableSAX = 1;
1.169 daniel 5070: }
1.39 daniel 5071: if (URI) {
1.193 daniel 5072: xmlURIPtr uri;
5073:
5074: uri = xmlParseURI((const char *) URI);
5075: if (uri == NULL) {
5076: if ((ctxt->sax != NULL) &&
5077: (!ctxt->disableSAX) &&
5078: (ctxt->sax->error != NULL))
5079: ctxt->sax->error(ctxt->userData,
5080: "Invalid URI: %s\n", URI);
5081: ctxt->wellFormed = 0;
5082: ctxt->errNo = XML_ERR_INVALID_URI;
5083: } else {
5084: if (uri->fragment != NULL) {
5085: if ((ctxt->sax != NULL) &&
5086: (!ctxt->disableSAX) &&
5087: (ctxt->sax->error != NULL))
5088: ctxt->sax->error(ctxt->userData,
5089: "Fragment not allowed: %s\n", URI);
5090: ctxt->wellFormed = 0;
5091: ctxt->errNo = XML_ERR_URI_FRAGMENT;
5092: } else {
5093: if ((ctxt->sax != NULL) &&
5094: (!ctxt->disableSAX) &&
5095: (ctxt->sax->entityDecl != NULL))
5096: ctxt->sax->entityDecl(ctxt->userData, name,
5097: XML_EXTERNAL_PARAMETER_ENTITY,
5098: literal, URI, NULL);
5099: }
5100: xmlFreeURI(uri);
5101: }
1.39 daniel 5102: }
1.24 daniel 5103: }
5104: } else {
1.152 daniel 5105: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 5106: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 5107: if ((ctxt->sax != NULL) &&
5108: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 5109: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 5110: XML_INTERNAL_GENERAL_ENTITY,
5111: NULL, NULL, value);
5112: } else {
1.67 daniel 5113: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 5114: if ((URI == NULL) && (literal == NULL)) {
5115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5116: ctxt->sax->error(ctxt->userData,
5117: "Entity value required\n");
5118: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
5119: ctxt->wellFormed = 0;
1.180 daniel 5120: ctxt->disableSAX = 1;
1.169 daniel 5121: }
1.193 daniel 5122: if (URI) {
5123: xmlURIPtr uri;
5124:
5125: uri = xmlParseURI((const char *)URI);
5126: if (uri == NULL) {
5127: if ((ctxt->sax != NULL) &&
5128: (!ctxt->disableSAX) &&
5129: (ctxt->sax->error != NULL))
5130: ctxt->sax->error(ctxt->userData,
5131: "Invalid URI: %s\n", URI);
5132: ctxt->wellFormed = 0;
5133: ctxt->errNo = XML_ERR_INVALID_URI;
5134: } else {
5135: if (uri->fragment != NULL) {
5136: if ((ctxt->sax != NULL) &&
5137: (!ctxt->disableSAX) &&
5138: (ctxt->sax->error != NULL))
5139: ctxt->sax->error(ctxt->userData,
5140: "Fragment not allowed: %s\n", URI);
5141: ctxt->wellFormed = 0;
5142: ctxt->errNo = XML_ERR_URI_FRAGMENT;
5143: }
5144: xmlFreeURI(uri);
5145: }
5146: }
1.152 daniel 5147: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 5148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5149: ctxt->sax->error(ctxt->userData,
1.59 daniel 5150: "Space required before 'NDATA'\n");
1.123 daniel 5151: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5152: ctxt->wellFormed = 0;
1.180 daniel 5153: ctxt->disableSAX = 1;
1.59 daniel 5154: }
1.42 daniel 5155: SKIP_BLANKS;
1.152 daniel 5156: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 5157: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5158: (NXT(4) == 'A')) {
5159: SKIP(5);
1.59 daniel 5160: if (!IS_BLANK(CUR)) {
5161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5162: ctxt->sax->error(ctxt->userData,
1.59 daniel 5163: "Space required after 'NDATA'\n");
1.123 daniel 5164: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5165: ctxt->wellFormed = 0;
1.180 daniel 5166: ctxt->disableSAX = 1;
1.59 daniel 5167: }
1.42 daniel 5168: SKIP_BLANKS;
1.24 daniel 5169: ndata = xmlParseName(ctxt);
1.171 daniel 5170: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 5171: (ctxt->sax->unparsedEntityDecl != NULL))
5172: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 5173: literal, URI, ndata);
5174: } else {
1.171 daniel 5175: if ((ctxt->sax != NULL) &&
5176: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 5177: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 5178: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5179: literal, URI, NULL);
1.24 daniel 5180: }
5181: }
5182: }
1.42 daniel 5183: SKIP_BLANKS;
1.152 daniel 5184: if (RAW != '>') {
1.55 daniel 5185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5186: ctxt->sax->error(ctxt->userData,
1.31 daniel 5187: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 5188: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 5189: ctxt->wellFormed = 0;
1.180 daniel 5190: ctxt->disableSAX = 1;
1.187 daniel 5191: } else {
5192: if (input != ctxt->input) {
5193: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5194: ctxt->sax->error(ctxt->userData,
5195: "Entity declaration doesn't start and stop in the same entity\n");
5196: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5197: ctxt->wellFormed = 0;
5198: ctxt->disableSAX = 1;
5199: }
1.40 daniel 5200: NEXT;
1.187 daniel 5201: }
1.78 daniel 5202: if (orig != NULL) {
5203: /*
1.98 daniel 5204: * Ugly mechanism to save the raw entity value.
1.78 daniel 5205: */
5206: xmlEntityPtr cur = NULL;
5207:
1.98 daniel 5208: if (isParameter) {
5209: if ((ctxt->sax != NULL) &&
5210: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 5211: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 5212: } else {
5213: if ((ctxt->sax != NULL) &&
5214: (ctxt->sax->getEntity != NULL))
1.120 daniel 5215: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 5216: }
5217: if (cur != NULL) {
5218: if (cur->orig != NULL)
1.119 daniel 5219: xmlFree(orig);
1.98 daniel 5220: else
5221: cur->orig = orig;
5222: } else
1.119 daniel 5223: xmlFree(orig);
1.78 daniel 5224: }
1.119 daniel 5225: if (name != NULL) xmlFree(name);
5226: if (value != NULL) xmlFree(value);
5227: if (URI != NULL) xmlFree(URI);
5228: if (literal != NULL) xmlFree(literal);
5229: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 5230: }
5231: }
5232:
1.50 daniel 5233: /**
1.59 daniel 5234: * xmlParseDefaultDecl:
5235: * @ctxt: an XML parser context
5236: * @value: Receive a possible fixed default value for the attribute
5237: *
5238: * Parse an attribute default declaration
5239: *
5240: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5241: *
1.99 daniel 5242: * [ VC: Required Attribute ]
1.117 daniel 5243: * if the default declaration is the keyword #REQUIRED, then the
5244: * attribute must be specified for all elements of the type in the
5245: * attribute-list declaration.
1.99 daniel 5246: *
5247: * [ VC: Attribute Default Legal ]
1.102 daniel 5248: * The declared default value must meet the lexical constraints of
5249: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 5250: *
5251: * [ VC: Fixed Attribute Default ]
1.117 daniel 5252: * if an attribute has a default value declared with the #FIXED
5253: * keyword, instances of that attribute must match the default value.
1.99 daniel 5254: *
5255: * [ WFC: No < in Attribute Values ]
5256: * handled in xmlParseAttValue()
5257: *
1.59 daniel 5258: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5259: * or XML_ATTRIBUTE_FIXED.
5260: */
5261:
5262: int
1.123 daniel 5263: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 5264: int val;
1.123 daniel 5265: xmlChar *ret;
1.59 daniel 5266:
5267: *value = NULL;
1.152 daniel 5268: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 5269: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
5270: (NXT(4) == 'U') && (NXT(5) == 'I') &&
5271: (NXT(6) == 'R') && (NXT(7) == 'E') &&
5272: (NXT(8) == 'D')) {
5273: SKIP(9);
5274: return(XML_ATTRIBUTE_REQUIRED);
5275: }
1.152 daniel 5276: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 5277: (NXT(2) == 'M') && (NXT(3) == 'P') &&
5278: (NXT(4) == 'L') && (NXT(5) == 'I') &&
5279: (NXT(6) == 'E') && (NXT(7) == 'D')) {
5280: SKIP(8);
5281: return(XML_ATTRIBUTE_IMPLIED);
5282: }
5283: val = XML_ATTRIBUTE_NONE;
1.152 daniel 5284: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 5285: (NXT(2) == 'I') && (NXT(3) == 'X') &&
5286: (NXT(4) == 'E') && (NXT(5) == 'D')) {
5287: SKIP(6);
5288: val = XML_ATTRIBUTE_FIXED;
5289: if (!IS_BLANK(CUR)) {
5290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5291: ctxt->sax->error(ctxt->userData,
5292: "Space required after '#FIXED'\n");
1.123 daniel 5293: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5294: ctxt->wellFormed = 0;
1.180 daniel 5295: ctxt->disableSAX = 1;
1.59 daniel 5296: }
5297: SKIP_BLANKS;
5298: }
5299: ret = xmlParseAttValue(ctxt);
1.96 daniel 5300: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 5301: if (ret == NULL) {
5302: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5303: ctxt->sax->error(ctxt->userData,
1.59 daniel 5304: "Attribute default value declaration error\n");
5305: ctxt->wellFormed = 0;
1.180 daniel 5306: ctxt->disableSAX = 1;
1.59 daniel 5307: } else
5308: *value = ret;
5309: return(val);
5310: }
5311:
5312: /**
1.66 daniel 5313: * xmlParseNotationType:
5314: * @ctxt: an XML parser context
5315: *
5316: * parse an Notation attribute type.
5317: *
1.99 daniel 5318: * Note: the leading 'NOTATION' S part has already being parsed...
5319: *
1.66 daniel 5320: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5321: *
1.99 daniel 5322: * [ VC: Notation Attributes ]
1.117 daniel 5323: * Values of this type must match one of the notation names included
1.99 daniel 5324: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 5325: *
5326: * Returns: the notation attribute tree built while parsing
5327: */
5328:
5329: xmlEnumerationPtr
5330: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5331: xmlChar *name;
1.66 daniel 5332: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5333:
1.152 daniel 5334: if (RAW != '(') {
1.66 daniel 5335: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5336: ctxt->sax->error(ctxt->userData,
5337: "'(' required to start 'NOTATION'\n");
1.123 daniel 5338: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 5339: ctxt->wellFormed = 0;
1.180 daniel 5340: ctxt->disableSAX = 1;
1.66 daniel 5341: return(NULL);
5342: }
1.91 daniel 5343: SHRINK;
1.66 daniel 5344: do {
5345: NEXT;
5346: SKIP_BLANKS;
5347: name = xmlParseName(ctxt);
5348: if (name == NULL) {
5349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5350: ctxt->sax->error(ctxt->userData,
1.66 daniel 5351: "Name expected in NOTATION declaration\n");
1.123 daniel 5352: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 5353: ctxt->wellFormed = 0;
1.180 daniel 5354: ctxt->disableSAX = 1;
1.66 daniel 5355: return(ret);
5356: }
5357: cur = xmlCreateEnumeration(name);
1.119 daniel 5358: xmlFree(name);
1.66 daniel 5359: if (cur == NULL) return(ret);
5360: if (last == NULL) ret = last = cur;
5361: else {
5362: last->next = cur;
5363: last = cur;
5364: }
5365: SKIP_BLANKS;
1.152 daniel 5366: } while (RAW == '|');
5367: if (RAW != ')') {
1.66 daniel 5368: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5369: ctxt->sax->error(ctxt->userData,
1.66 daniel 5370: "')' required to finish NOTATION declaration\n");
1.123 daniel 5371: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 5372: ctxt->wellFormed = 0;
1.180 daniel 5373: ctxt->disableSAX = 1;
1.170 daniel 5374: if ((last != NULL) && (last != ret))
5375: xmlFreeEnumeration(last);
1.66 daniel 5376: return(ret);
5377: }
5378: NEXT;
5379: return(ret);
5380: }
5381:
5382: /**
5383: * xmlParseEnumerationType:
5384: * @ctxt: an XML parser context
5385: *
5386: * parse an Enumeration attribute type.
5387: *
5388: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5389: *
1.99 daniel 5390: * [ VC: Enumeration ]
1.117 daniel 5391: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 5392: * the declaration
5393: *
1.66 daniel 5394: * Returns: the enumeration attribute tree built while parsing
5395: */
5396:
5397: xmlEnumerationPtr
5398: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5399: xmlChar *name;
1.66 daniel 5400: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5401:
1.152 daniel 5402: if (RAW != '(') {
1.66 daniel 5403: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5404: ctxt->sax->error(ctxt->userData,
1.66 daniel 5405: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 5406: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 5407: ctxt->wellFormed = 0;
1.180 daniel 5408: ctxt->disableSAX = 1;
1.66 daniel 5409: return(NULL);
5410: }
1.91 daniel 5411: SHRINK;
1.66 daniel 5412: do {
5413: NEXT;
5414: SKIP_BLANKS;
5415: name = xmlParseNmtoken(ctxt);
5416: if (name == NULL) {
5417: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5418: ctxt->sax->error(ctxt->userData,
1.66 daniel 5419: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 5420: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 5421: ctxt->wellFormed = 0;
1.180 daniel 5422: ctxt->disableSAX = 1;
1.66 daniel 5423: return(ret);
5424: }
5425: cur = xmlCreateEnumeration(name);
1.119 daniel 5426: xmlFree(name);
1.66 daniel 5427: if (cur == NULL) return(ret);
5428: if (last == NULL) ret = last = cur;
5429: else {
5430: last->next = cur;
5431: last = cur;
5432: }
5433: SKIP_BLANKS;
1.152 daniel 5434: } while (RAW == '|');
5435: if (RAW != ')') {
1.66 daniel 5436: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5437: ctxt->sax->error(ctxt->userData,
1.66 daniel 5438: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 5439: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 5440: ctxt->wellFormed = 0;
1.180 daniel 5441: ctxt->disableSAX = 1;
1.66 daniel 5442: return(ret);
5443: }
5444: NEXT;
5445: return(ret);
5446: }
5447:
5448: /**
1.50 daniel 5449: * xmlParseEnumeratedType:
5450: * @ctxt: an XML parser context
1.66 daniel 5451: * @tree: the enumeration tree built while parsing
1.50 daniel 5452: *
1.66 daniel 5453: * parse an Enumerated attribute type.
1.22 daniel 5454: *
5455: * [57] EnumeratedType ::= NotationType | Enumeration
5456: *
5457: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5458: *
1.50 daniel 5459: *
1.66 daniel 5460: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 5461: */
5462:
1.66 daniel 5463: int
5464: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 5465: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 5466: (NXT(2) == 'T') && (NXT(3) == 'A') &&
5467: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5468: (NXT(6) == 'O') && (NXT(7) == 'N')) {
5469: SKIP(8);
5470: if (!IS_BLANK(CUR)) {
5471: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5472: ctxt->sax->error(ctxt->userData,
5473: "Space required after 'NOTATION'\n");
1.123 daniel 5474: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 5475: ctxt->wellFormed = 0;
1.180 daniel 5476: ctxt->disableSAX = 1;
1.66 daniel 5477: return(0);
5478: }
5479: SKIP_BLANKS;
5480: *tree = xmlParseNotationType(ctxt);
5481: if (*tree == NULL) return(0);
5482: return(XML_ATTRIBUTE_NOTATION);
5483: }
5484: *tree = xmlParseEnumerationType(ctxt);
5485: if (*tree == NULL) return(0);
5486: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 5487: }
5488:
1.50 daniel 5489: /**
5490: * xmlParseAttributeType:
5491: * @ctxt: an XML parser context
1.66 daniel 5492: * @tree: the enumeration tree built while parsing
1.50 daniel 5493: *
1.59 daniel 5494: * parse the Attribute list def for an element
1.22 daniel 5495: *
5496: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5497: *
5498: * [55] StringType ::= 'CDATA'
5499: *
5500: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5501: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 5502: *
1.102 daniel 5503: * Validity constraints for attribute values syntax are checked in
5504: * xmlValidateAttributeValue()
5505: *
1.99 daniel 5506: * [ VC: ID ]
1.117 daniel 5507: * Values of type ID must match the Name production. A name must not
1.99 daniel 5508: * appear more than once in an XML document as a value of this type;
5509: * i.e., ID values must uniquely identify the elements which bear them.
5510: *
5511: * [ VC: One ID per Element Type ]
1.117 daniel 5512: * No element type may have more than one ID attribute specified.
1.99 daniel 5513: *
5514: * [ VC: ID Attribute Default ]
1.117 daniel 5515: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 5516: *
5517: * [ VC: IDREF ]
1.102 daniel 5518: * Values of type IDREF must match the Name production, and values
1.140 daniel 5519: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 5520: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 5521: * values must match the value of some ID attribute.
5522: *
5523: * [ VC: Entity Name ]
1.102 daniel 5524: * Values of type ENTITY must match the Name production, values
1.140 daniel 5525: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 5526: * name of an unparsed entity declared in the DTD.
1.99 daniel 5527: *
5528: * [ VC: Name Token ]
1.102 daniel 5529: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5530: * of type NMTOKENS must match Nmtokens.
5531: *
1.69 daniel 5532: * Returns the attribute type
1.22 daniel 5533: */
1.59 daniel 5534: int
1.66 daniel 5535: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5536: SHRINK;
1.152 daniel 5537: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5538: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5539: (NXT(4) == 'A')) {
5540: SKIP(5);
1.66 daniel 5541: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5542: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5543: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5544: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5545: SKIP(6);
5546: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5547: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5548: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5549: (NXT(4) == 'F')) {
5550: SKIP(5);
1.59 daniel 5551: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5552: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5553: SKIP(2);
5554: return(XML_ATTRIBUTE_ID);
1.152 daniel 5555: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5556: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5557: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5558: SKIP(6);
1.59 daniel 5559: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5560: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5561: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5562: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5563: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5564: SKIP(8);
1.59 daniel 5565: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5566: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5567: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5568: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5569: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5570: SKIP(8);
5571: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5572: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5573: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5574: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5575: (NXT(6) == 'N')) {
5576: SKIP(7);
1.59 daniel 5577: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5578: }
1.66 daniel 5579: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5580: }
5581:
1.50 daniel 5582: /**
5583: * xmlParseAttributeListDecl:
5584: * @ctxt: an XML parser context
5585: *
5586: * : parse the Attribute list def for an element
1.22 daniel 5587: *
5588: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5589: *
5590: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5591: *
1.22 daniel 5592: */
1.55 daniel 5593: void
5594: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5595: xmlChar *elemName;
5596: xmlChar *attrName;
1.103 daniel 5597: xmlEnumerationPtr tree;
1.22 daniel 5598:
1.152 daniel 5599: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5600: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5601: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5602: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5603: (NXT(8) == 'T')) {
1.187 daniel 5604: xmlParserInputPtr input = ctxt->input;
5605:
1.40 daniel 5606: SKIP(9);
1.59 daniel 5607: if (!IS_BLANK(CUR)) {
5608: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5609: ctxt->sax->error(ctxt->userData,
5610: "Space required after '<!ATTLIST'\n");
1.123 daniel 5611: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5612: ctxt->wellFormed = 0;
1.180 daniel 5613: ctxt->disableSAX = 1;
1.59 daniel 5614: }
1.42 daniel 5615: SKIP_BLANKS;
1.59 daniel 5616: elemName = xmlParseName(ctxt);
5617: if (elemName == NULL) {
1.55 daniel 5618: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5619: ctxt->sax->error(ctxt->userData,
5620: "ATTLIST: no name for Element\n");
1.123 daniel 5621: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5622: ctxt->wellFormed = 0;
1.180 daniel 5623: ctxt->disableSAX = 1;
1.22 daniel 5624: return;
5625: }
1.42 daniel 5626: SKIP_BLANKS;
1.152 daniel 5627: while (RAW != '>') {
1.123 daniel 5628: const xmlChar *check = CUR_PTR;
1.59 daniel 5629: int type;
5630: int def;
1.123 daniel 5631: xmlChar *defaultValue = NULL;
1.59 daniel 5632:
1.103 daniel 5633: tree = NULL;
1.59 daniel 5634: attrName = xmlParseName(ctxt);
5635: if (attrName == NULL) {
5636: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5637: ctxt->sax->error(ctxt->userData,
5638: "ATTLIST: no name for Attribute\n");
1.123 daniel 5639: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5640: ctxt->wellFormed = 0;
1.180 daniel 5641: ctxt->disableSAX = 1;
1.59 daniel 5642: break;
5643: }
1.97 daniel 5644: GROW;
1.59 daniel 5645: if (!IS_BLANK(CUR)) {
5646: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5647: ctxt->sax->error(ctxt->userData,
1.59 daniel 5648: "Space required after the attribute name\n");
1.123 daniel 5649: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5650: ctxt->wellFormed = 0;
1.180 daniel 5651: ctxt->disableSAX = 1;
1.170 daniel 5652: if (attrName != NULL)
5653: xmlFree(attrName);
5654: if (defaultValue != NULL)
5655: xmlFree(defaultValue);
1.59 daniel 5656: break;
5657: }
5658: SKIP_BLANKS;
5659:
1.66 daniel 5660: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5661: if (type <= 0) {
5662: if (attrName != NULL)
5663: xmlFree(attrName);
5664: if (defaultValue != NULL)
5665: xmlFree(defaultValue);
5666: break;
5667: }
1.22 daniel 5668:
1.97 daniel 5669: GROW;
1.59 daniel 5670: if (!IS_BLANK(CUR)) {
5671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5672: ctxt->sax->error(ctxt->userData,
1.59 daniel 5673: "Space required after the attribute type\n");
1.123 daniel 5674: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5675: ctxt->wellFormed = 0;
1.180 daniel 5676: ctxt->disableSAX = 1;
1.170 daniel 5677: if (attrName != NULL)
5678: xmlFree(attrName);
5679: if (defaultValue != NULL)
5680: xmlFree(defaultValue);
5681: if (tree != NULL)
5682: xmlFreeEnumeration(tree);
1.59 daniel 5683: break;
5684: }
1.42 daniel 5685: SKIP_BLANKS;
1.59 daniel 5686:
5687: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5688: if (def <= 0) {
5689: if (attrName != NULL)
5690: xmlFree(attrName);
5691: if (defaultValue != NULL)
5692: xmlFree(defaultValue);
5693: if (tree != NULL)
5694: xmlFreeEnumeration(tree);
5695: break;
5696: }
1.59 daniel 5697:
1.97 daniel 5698: GROW;
1.152 daniel 5699: if (RAW != '>') {
1.59 daniel 5700: if (!IS_BLANK(CUR)) {
5701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5702: ctxt->sax->error(ctxt->userData,
1.59 daniel 5703: "Space required after the attribute default value\n");
1.123 daniel 5704: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5705: ctxt->wellFormed = 0;
1.180 daniel 5706: ctxt->disableSAX = 1;
1.170 daniel 5707: if (attrName != NULL)
5708: xmlFree(attrName);
5709: if (defaultValue != NULL)
5710: xmlFree(defaultValue);
5711: if (tree != NULL)
5712: xmlFreeEnumeration(tree);
1.59 daniel 5713: break;
5714: }
5715: SKIP_BLANKS;
5716: }
1.40 daniel 5717: if (check == CUR_PTR) {
1.55 daniel 5718: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5719: ctxt->sax->error(ctxt->userData,
1.59 daniel 5720: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5721: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5722: if (attrName != NULL)
5723: xmlFree(attrName);
5724: if (defaultValue != NULL)
5725: xmlFree(defaultValue);
5726: if (tree != NULL)
5727: xmlFreeEnumeration(tree);
1.22 daniel 5728: break;
5729: }
1.171 daniel 5730: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5731: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5732: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5733: type, def, defaultValue, tree);
1.59 daniel 5734: if (attrName != NULL)
1.119 daniel 5735: xmlFree(attrName);
1.59 daniel 5736: if (defaultValue != NULL)
1.119 daniel 5737: xmlFree(defaultValue);
1.97 daniel 5738: GROW;
1.22 daniel 5739: }
1.187 daniel 5740: if (RAW == '>') {
5741: if (input != ctxt->input) {
5742: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5743: ctxt->sax->error(ctxt->userData,
5744: "Attribute list declaration doesn't start and stop in the same entity\n");
5745: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5746: ctxt->wellFormed = 0;
5747: ctxt->disableSAX = 1;
5748: }
1.40 daniel 5749: NEXT;
1.187 daniel 5750: }
1.22 daniel 5751:
1.119 daniel 5752: xmlFree(elemName);
1.22 daniel 5753: }
5754: }
5755:
1.50 daniel 5756: /**
1.61 daniel 5757: * xmlParseElementMixedContentDecl:
5758: * @ctxt: an XML parser context
5759: *
5760: * parse the declaration for a Mixed Element content
5761: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5762: *
5763: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5764: * '(' S? '#PCDATA' S? ')'
5765: *
1.99 daniel 5766: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5767: *
5768: * [ VC: No Duplicate Types ]
1.117 daniel 5769: * The same name must not appear more than once in a single
5770: * mixed-content declaration.
1.99 daniel 5771: *
1.61 daniel 5772: * returns: the list of the xmlElementContentPtr describing the element choices
5773: */
5774: xmlElementContentPtr
1.62 daniel 5775: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5776: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5777: xmlChar *elem = NULL;
1.61 daniel 5778:
1.97 daniel 5779: GROW;
1.152 daniel 5780: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5781: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5782: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5783: (NXT(6) == 'A')) {
5784: SKIP(7);
5785: SKIP_BLANKS;
1.91 daniel 5786: SHRINK;
1.152 daniel 5787: if (RAW == ')') {
1.187 daniel 5788: ctxt->entity = ctxt->input;
1.63 daniel 5789: NEXT;
5790: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5791: if (RAW == '*') {
1.136 daniel 5792: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5793: NEXT;
5794: }
1.63 daniel 5795: return(ret);
5796: }
1.152 daniel 5797: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5798: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5799: if (ret == NULL) return(NULL);
1.99 daniel 5800: }
1.152 daniel 5801: while (RAW == '|') {
1.64 daniel 5802: NEXT;
1.61 daniel 5803: if (elem == NULL) {
5804: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5805: if (ret == NULL) return(NULL);
5806: ret->c1 = cur;
1.64 daniel 5807: cur = ret;
1.61 daniel 5808: } else {
1.64 daniel 5809: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5810: if (n == NULL) return(NULL);
5811: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5812: cur->c2 = n;
5813: cur = n;
1.119 daniel 5814: xmlFree(elem);
1.61 daniel 5815: }
5816: SKIP_BLANKS;
5817: elem = xmlParseName(ctxt);
5818: if (elem == NULL) {
5819: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5820: ctxt->sax->error(ctxt->userData,
1.61 daniel 5821: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5822: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5823: ctxt->wellFormed = 0;
1.180 daniel 5824: ctxt->disableSAX = 1;
1.61 daniel 5825: xmlFreeElementContent(cur);
5826: return(NULL);
5827: }
5828: SKIP_BLANKS;
1.97 daniel 5829: GROW;
1.61 daniel 5830: }
1.152 daniel 5831: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5832: if (elem != NULL) {
1.61 daniel 5833: cur->c2 = xmlNewElementContent(elem,
5834: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5835: xmlFree(elem);
1.66 daniel 5836: }
1.65 daniel 5837: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 5838: ctxt->entity = ctxt->input;
1.64 daniel 5839: SKIP(2);
1.61 daniel 5840: } else {
1.119 daniel 5841: if (elem != NULL) xmlFree(elem);
1.61 daniel 5842: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5843: ctxt->sax->error(ctxt->userData,
1.63 daniel 5844: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5845: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5846: ctxt->wellFormed = 0;
1.180 daniel 5847: ctxt->disableSAX = 1;
1.61 daniel 5848: xmlFreeElementContent(ret);
5849: return(NULL);
5850: }
5851:
5852: } else {
5853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5854: ctxt->sax->error(ctxt->userData,
1.61 daniel 5855: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5856: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5857: ctxt->wellFormed = 0;
1.180 daniel 5858: ctxt->disableSAX = 1;
1.61 daniel 5859: }
5860: return(ret);
5861: }
5862:
5863: /**
5864: * xmlParseElementChildrenContentDecl:
1.50 daniel 5865: * @ctxt: an XML parser context
5866: *
1.61 daniel 5867: * parse the declaration for a Mixed Element content
5868: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5869: *
1.61 daniel 5870: *
1.22 daniel 5871: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5872: *
5873: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5874: *
5875: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5876: *
5877: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5878: *
1.99 daniel 5879: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5880: * TODO Parameter-entity replacement text must be properly nested
5881: * with parenthetized groups. That is to say, if either of the
5882: * opening or closing parentheses in a choice, seq, or Mixed
5883: * construct is contained in the replacement text for a parameter
5884: * entity, both must be contained in the same replacement text. For
5885: * interoperability, if a parameter-entity reference appears in a
5886: * choice, seq, or Mixed construct, its replacement text should not
5887: * be empty, and neither the first nor last non-blank character of
5888: * the replacement text should be a connector (| or ,).
5889: *
1.62 daniel 5890: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5891: * hierarchy.
5892: */
5893: xmlElementContentPtr
1.62 daniel 5894: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5895: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5896: xmlChar *elem;
5897: xmlChar type = 0;
1.62 daniel 5898:
5899: SKIP_BLANKS;
1.94 daniel 5900: GROW;
1.152 daniel 5901: if (RAW == '(') {
1.63 daniel 5902: /* Recurse on first child */
1.62 daniel 5903: NEXT;
5904: SKIP_BLANKS;
5905: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5906: SKIP_BLANKS;
1.101 daniel 5907: GROW;
1.62 daniel 5908: } else {
5909: elem = xmlParseName(ctxt);
5910: if (elem == NULL) {
5911: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5912: ctxt->sax->error(ctxt->userData,
1.62 daniel 5913: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5914: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5915: ctxt->wellFormed = 0;
1.180 daniel 5916: ctxt->disableSAX = 1;
1.62 daniel 5917: return(NULL);
5918: }
5919: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5920: GROW;
1.152 daniel 5921: if (RAW == '?') {
1.104 daniel 5922: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5923: NEXT;
1.152 daniel 5924: } else if (RAW == '*') {
1.104 daniel 5925: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5926: NEXT;
1.152 daniel 5927: } else if (RAW == '+') {
1.104 daniel 5928: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5929: NEXT;
5930: } else {
1.104 daniel 5931: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5932: }
1.119 daniel 5933: xmlFree(elem);
1.101 daniel 5934: GROW;
1.62 daniel 5935: }
5936: SKIP_BLANKS;
1.91 daniel 5937: SHRINK;
1.152 daniel 5938: while (RAW != ')') {
1.63 daniel 5939: /*
5940: * Each loop we parse one separator and one element.
5941: */
1.152 daniel 5942: if (RAW == ',') {
1.62 daniel 5943: if (type == 0) type = CUR;
5944:
5945: /*
5946: * Detect "Name | Name , Name" error
5947: */
5948: else if (type != CUR) {
5949: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5950: ctxt->sax->error(ctxt->userData,
1.62 daniel 5951: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5952: type);
1.123 daniel 5953: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5954: ctxt->wellFormed = 0;
1.180 daniel 5955: ctxt->disableSAX = 1;
1.170 daniel 5956: if ((op != NULL) && (op != ret))
5957: xmlFreeElementContent(op);
1.211 veillard 5958: if ((last != NULL) && (last != ret) &&
5959: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5960: xmlFreeElementContent(last);
5961: if (ret != NULL)
5962: xmlFreeElementContent(ret);
1.62 daniel 5963: return(NULL);
5964: }
1.64 daniel 5965: NEXT;
1.62 daniel 5966:
1.63 daniel 5967: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5968: if (op == NULL) {
5969: xmlFreeElementContent(ret);
5970: return(NULL);
5971: }
5972: if (last == NULL) {
5973: op->c1 = ret;
1.65 daniel 5974: ret = cur = op;
1.63 daniel 5975: } else {
5976: cur->c2 = op;
5977: op->c1 = last;
5978: cur =op;
1.65 daniel 5979: last = NULL;
1.63 daniel 5980: }
1.152 daniel 5981: } else if (RAW == '|') {
1.62 daniel 5982: if (type == 0) type = CUR;
5983:
5984: /*
1.63 daniel 5985: * Detect "Name , Name | Name" error
1.62 daniel 5986: */
5987: else if (type != CUR) {
5988: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5989: ctxt->sax->error(ctxt->userData,
1.62 daniel 5990: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5991: type);
1.123 daniel 5992: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5993: ctxt->wellFormed = 0;
1.180 daniel 5994: ctxt->disableSAX = 1;
1.211 veillard 5995: if ((op != NULL) && (op != ret) && (op != last))
1.170 daniel 5996: xmlFreeElementContent(op);
1.211 veillard 5997: if ((last != NULL) && (last != ret) &&
5998: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5999: xmlFreeElementContent(last);
6000: if (ret != NULL)
6001: xmlFreeElementContent(ret);
1.62 daniel 6002: return(NULL);
6003: }
1.64 daniel 6004: NEXT;
1.62 daniel 6005:
1.63 daniel 6006: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
6007: if (op == NULL) {
1.170 daniel 6008: if ((op != NULL) && (op != ret))
6009: xmlFreeElementContent(op);
1.211 veillard 6010: if ((last != NULL) && (last != ret) &&
6011: (last != ret->c1) && (last != ret->c2))
1.170 daniel 6012: xmlFreeElementContent(last);
6013: if (ret != NULL)
6014: xmlFreeElementContent(ret);
1.63 daniel 6015: return(NULL);
6016: }
6017: if (last == NULL) {
6018: op->c1 = ret;
1.65 daniel 6019: ret = cur = op;
1.63 daniel 6020: } else {
6021: cur->c2 = op;
6022: op->c1 = last;
6023: cur =op;
1.65 daniel 6024: last = NULL;
1.63 daniel 6025: }
1.62 daniel 6026: } else {
6027: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6028: ctxt->sax->error(ctxt->userData,
1.62 daniel 6029: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
6030: ctxt->wellFormed = 0;
1.180 daniel 6031: ctxt->disableSAX = 1;
1.123 daniel 6032: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 6033: if ((op != NULL) && (op != ret))
6034: xmlFreeElementContent(op);
1.211 veillard 6035: if ((last != NULL) && (last != ret) &&
6036: (last != ret->c1) && (last != ret->c2))
1.170 daniel 6037: xmlFreeElementContent(last);
6038: if (ret != NULL)
6039: xmlFreeElementContent(ret);
1.62 daniel 6040: return(NULL);
6041: }
1.101 daniel 6042: GROW;
1.62 daniel 6043: SKIP_BLANKS;
1.101 daniel 6044: GROW;
1.152 daniel 6045: if (RAW == '(') {
1.63 daniel 6046: /* Recurse on second child */
1.62 daniel 6047: NEXT;
6048: SKIP_BLANKS;
1.65 daniel 6049: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 6050: SKIP_BLANKS;
6051: } else {
6052: elem = xmlParseName(ctxt);
6053: if (elem == NULL) {
6054: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6055: ctxt->sax->error(ctxt->userData,
1.122 daniel 6056: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 6057: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 6058: ctxt->wellFormed = 0;
1.180 daniel 6059: ctxt->disableSAX = 1;
1.170 daniel 6060: if ((op != NULL) && (op != ret))
6061: xmlFreeElementContent(op);
1.211 veillard 6062: if ((last != NULL) && (last != ret) &&
6063: (last != ret->c1) && (last != ret->c2))
1.170 daniel 6064: xmlFreeElementContent(last);
6065: if (ret != NULL)
6066: xmlFreeElementContent(ret);
1.62 daniel 6067: return(NULL);
6068: }
1.65 daniel 6069: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 6070: xmlFree(elem);
1.152 daniel 6071: if (RAW == '?') {
1.105 daniel 6072: last->ocur = XML_ELEMENT_CONTENT_OPT;
6073: NEXT;
1.152 daniel 6074: } else if (RAW == '*') {
1.105 daniel 6075: last->ocur = XML_ELEMENT_CONTENT_MULT;
6076: NEXT;
1.152 daniel 6077: } else if (RAW == '+') {
1.105 daniel 6078: last->ocur = XML_ELEMENT_CONTENT_PLUS;
6079: NEXT;
6080: } else {
6081: last->ocur = XML_ELEMENT_CONTENT_ONCE;
6082: }
1.63 daniel 6083: }
6084: SKIP_BLANKS;
1.97 daniel 6085: GROW;
1.64 daniel 6086: }
1.65 daniel 6087: if ((cur != NULL) && (last != NULL)) {
6088: cur->c2 = last;
1.62 daniel 6089: }
1.187 daniel 6090: ctxt->entity = ctxt->input;
1.62 daniel 6091: NEXT;
1.152 daniel 6092: if (RAW == '?') {
1.62 daniel 6093: ret->ocur = XML_ELEMENT_CONTENT_OPT;
6094: NEXT;
1.152 daniel 6095: } else if (RAW == '*') {
1.62 daniel 6096: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6097: NEXT;
1.152 daniel 6098: } else if (RAW == '+') {
1.62 daniel 6099: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6100: NEXT;
6101: }
6102: return(ret);
1.61 daniel 6103: }
6104:
6105: /**
6106: * xmlParseElementContentDecl:
6107: * @ctxt: an XML parser context
6108: * @name: the name of the element being defined.
6109: * @result: the Element Content pointer will be stored here if any
1.22 daniel 6110: *
1.61 daniel 6111: * parse the declaration for an Element content either Mixed or Children,
6112: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6113: *
6114: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 6115: *
1.61 daniel 6116: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 6117: */
6118:
1.61 daniel 6119: int
1.123 daniel 6120: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 6121: xmlElementContentPtr *result) {
6122:
6123: xmlElementContentPtr tree = NULL;
1.187 daniel 6124: xmlParserInputPtr input = ctxt->input;
1.61 daniel 6125: int res;
6126:
6127: *result = NULL;
6128:
1.152 daniel 6129: if (RAW != '(') {
1.61 daniel 6130: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6131: ctxt->sax->error(ctxt->userData,
1.61 daniel 6132: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 6133: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 6134: ctxt->wellFormed = 0;
1.180 daniel 6135: ctxt->disableSAX = 1;
1.61 daniel 6136: return(-1);
6137: }
6138: NEXT;
1.97 daniel 6139: GROW;
1.61 daniel 6140: SKIP_BLANKS;
1.152 daniel 6141: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 6142: (NXT(2) == 'C') && (NXT(3) == 'D') &&
6143: (NXT(4) == 'A') && (NXT(5) == 'T') &&
6144: (NXT(6) == 'A')) {
1.62 daniel 6145: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 6146: res = XML_ELEMENT_TYPE_MIXED;
6147: } else {
1.62 daniel 6148: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 6149: res = XML_ELEMENT_TYPE_ELEMENT;
6150: }
1.187 daniel 6151: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
6152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6153: ctxt->sax->error(ctxt->userData,
6154: "Element content declaration doesn't start and stop in the same entity\n");
6155: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
6156: ctxt->wellFormed = 0;
6157: ctxt->disableSAX = 1;
6158: }
1.61 daniel 6159: SKIP_BLANKS;
1.63 daniel 6160: /****************************
1.152 daniel 6161: if (RAW != ')') {
1.61 daniel 6162: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6163: ctxt->sax->error(ctxt->userData,
1.61 daniel 6164: "xmlParseElementContentDecl : ')' expected\n");
6165: ctxt->wellFormed = 0;
1.180 daniel 6166: ctxt->disableSAX = 1;
1.61 daniel 6167: return(-1);
6168: }
1.63 daniel 6169: ****************************/
6170: *result = tree;
1.61 daniel 6171: return(res);
1.22 daniel 6172: }
6173:
1.50 daniel 6174: /**
6175: * xmlParseElementDecl:
6176: * @ctxt: an XML parser context
6177: *
6178: * parse an Element declaration.
1.22 daniel 6179: *
6180: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6181: *
1.99 daniel 6182: * [ VC: Unique Element Type Declaration ]
1.117 daniel 6183: * No element type may be declared more than once
1.69 daniel 6184: *
6185: * Returns the type of the element, or -1 in case of error
1.22 daniel 6186: */
1.59 daniel 6187: int
1.55 daniel 6188: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6189: xmlChar *name;
1.59 daniel 6190: int ret = -1;
1.61 daniel 6191: xmlElementContentPtr content = NULL;
1.22 daniel 6192:
1.97 daniel 6193: GROW;
1.152 daniel 6194: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6195: (NXT(2) == 'E') && (NXT(3) == 'L') &&
6196: (NXT(4) == 'E') && (NXT(5) == 'M') &&
6197: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 6198: (NXT(8) == 'T')) {
1.187 daniel 6199: xmlParserInputPtr input = ctxt->input;
6200:
1.40 daniel 6201: SKIP(9);
1.59 daniel 6202: if (!IS_BLANK(CUR)) {
6203: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6204: ctxt->sax->error(ctxt->userData,
1.59 daniel 6205: "Space required after 'ELEMENT'\n");
1.123 daniel 6206: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6207: ctxt->wellFormed = 0;
1.180 daniel 6208: ctxt->disableSAX = 1;
1.59 daniel 6209: }
1.42 daniel 6210: SKIP_BLANKS;
1.22 daniel 6211: name = xmlParseName(ctxt);
6212: if (name == NULL) {
1.55 daniel 6213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6214: ctxt->sax->error(ctxt->userData,
1.59 daniel 6215: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 6216: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6217: ctxt->wellFormed = 0;
1.180 daniel 6218: ctxt->disableSAX = 1;
1.59 daniel 6219: return(-1);
6220: }
6221: if (!IS_BLANK(CUR)) {
6222: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6223: ctxt->sax->error(ctxt->userData,
1.59 daniel 6224: "Space required after the element name\n");
1.123 daniel 6225: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6226: ctxt->wellFormed = 0;
1.180 daniel 6227: ctxt->disableSAX = 1;
1.22 daniel 6228: }
1.42 daniel 6229: SKIP_BLANKS;
1.152 daniel 6230: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 6231: (NXT(2) == 'P') && (NXT(3) == 'T') &&
6232: (NXT(4) == 'Y')) {
6233: SKIP(5);
1.22 daniel 6234: /*
6235: * Element must always be empty.
6236: */
1.59 daniel 6237: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 6238: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 6239: (NXT(2) == 'Y')) {
6240: SKIP(3);
1.22 daniel 6241: /*
6242: * Element is a generic container.
6243: */
1.59 daniel 6244: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 6245: } else if (RAW == '(') {
1.61 daniel 6246: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 6247: } else {
1.98 daniel 6248: /*
6249: * [ WFC: PEs in Internal Subset ] error handling.
6250: */
1.152 daniel 6251: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 6252: (ctxt->inputNr == 1)) {
6253: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6254: ctxt->sax->error(ctxt->userData,
6255: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 6256: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 6257: } else {
6258: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6259: ctxt->sax->error(ctxt->userData,
6260: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 6261: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 6262: }
1.61 daniel 6263: ctxt->wellFormed = 0;
1.180 daniel 6264: ctxt->disableSAX = 1;
1.119 daniel 6265: if (name != NULL) xmlFree(name);
1.61 daniel 6266: return(-1);
1.22 daniel 6267: }
1.142 daniel 6268:
6269: SKIP_BLANKS;
6270: /*
6271: * Pop-up of finished entities.
6272: */
1.152 daniel 6273: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 6274: xmlPopInput(ctxt);
1.42 daniel 6275: SKIP_BLANKS;
1.142 daniel 6276:
1.152 daniel 6277: if (RAW != '>') {
1.55 daniel 6278: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6279: ctxt->sax->error(ctxt->userData,
1.31 daniel 6280: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 6281: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 6282: ctxt->wellFormed = 0;
1.180 daniel 6283: ctxt->disableSAX = 1;
1.61 daniel 6284: } else {
1.187 daniel 6285: if (input != ctxt->input) {
6286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6287: ctxt->sax->error(ctxt->userData,
6288: "Element declaration doesn't start and stop in the same entity\n");
6289: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
6290: ctxt->wellFormed = 0;
6291: ctxt->disableSAX = 1;
6292: }
6293:
1.40 daniel 6294: NEXT;
1.171 daniel 6295: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6296: (ctxt->sax->elementDecl != NULL))
1.76 daniel 6297: ctxt->sax->elementDecl(ctxt->userData, name, ret,
6298: content);
1.61 daniel 6299: }
1.84 daniel 6300: if (content != NULL) {
6301: xmlFreeElementContent(content);
6302: }
1.61 daniel 6303: if (name != NULL) {
1.119 daniel 6304: xmlFree(name);
1.61 daniel 6305: }
1.22 daniel 6306: }
1.59 daniel 6307: return(ret);
1.22 daniel 6308: }
6309:
1.50 daniel 6310: /**
6311: * xmlParseMarkupDecl:
6312: * @ctxt: an XML parser context
6313: *
6314: * parse Markup declarations
1.22 daniel 6315: *
6316: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6317: * NotationDecl | PI | Comment
6318: *
1.98 daniel 6319: * [ VC: Proper Declaration/PE Nesting ]
6320: * TODO Parameter-entity replacement text must be properly nested with
6321: * markup declarations. That is to say, if either the first character
6322: * or the last character of a markup declaration (markupdecl above) is
6323: * contained in the replacement text for a parameter-entity reference,
6324: * both must be contained in the same replacement text.
6325: *
6326: * [ WFC: PEs in Internal Subset ]
6327: * In the internal DTD subset, parameter-entity references can occur
6328: * only where markup declarations can occur, not within markup declarations.
6329: * (This does not apply to references that occur in external parameter
6330: * entities or to the external subset.)
1.22 daniel 6331: */
1.55 daniel 6332: void
6333: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 6334: GROW;
1.22 daniel 6335: xmlParseElementDecl(ctxt);
6336: xmlParseAttributeListDecl(ctxt);
6337: xmlParseEntityDecl(ctxt);
6338: xmlParseNotationDecl(ctxt);
6339: xmlParsePI(ctxt);
1.114 daniel 6340: xmlParseComment(ctxt);
1.98 daniel 6341: /*
6342: * This is only for internal subset. On external entities,
6343: * the replacement is done before parsing stage
6344: */
6345: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6346: xmlParsePEReference(ctxt);
1.97 daniel 6347: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 6348: }
6349:
1.50 daniel 6350: /**
1.76 daniel 6351: * xmlParseTextDecl:
6352: * @ctxt: an XML parser context
6353: *
6354: * parse an XML declaration header for external entities
6355: *
6356: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 6357: *
6358: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 6359: */
6360:
1.172 daniel 6361: void
1.76 daniel 6362: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6363: xmlChar *version;
1.76 daniel 6364:
6365: /*
6366: * We know that '<?xml' is here.
6367: */
1.193 daniel 6368: if ((RAW == '<') && (NXT(1) == '?') &&
6369: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6370: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6371: SKIP(5);
6372: } else {
6373: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6374: ctxt->sax->error(ctxt->userData,
6375: "Text declaration '<?xml' required\n");
6376: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
6377: ctxt->wellFormed = 0;
6378: ctxt->disableSAX = 1;
6379:
6380: return;
6381: }
1.76 daniel 6382:
6383: if (!IS_BLANK(CUR)) {
6384: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6385: ctxt->sax->error(ctxt->userData,
6386: "Space needed after '<?xml'\n");
1.123 daniel 6387: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6388: ctxt->wellFormed = 0;
1.180 daniel 6389: ctxt->disableSAX = 1;
1.76 daniel 6390: }
6391: SKIP_BLANKS;
6392:
6393: /*
6394: * We may have the VersionInfo here.
6395: */
6396: version = xmlParseVersionInfo(ctxt);
6397: if (version == NULL)
6398: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 6399: ctxt->input->version = version;
1.76 daniel 6400:
6401: /*
6402: * We must have the encoding declaration
6403: */
6404: if (!IS_BLANK(CUR)) {
6405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6406: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 6407: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6408: ctxt->wellFormed = 0;
1.180 daniel 6409: ctxt->disableSAX = 1;
1.76 daniel 6410: }
1.195 daniel 6411: xmlParseEncodingDecl(ctxt);
1.193 daniel 6412: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6413: /*
6414: * The XML REC instructs us to stop parsing right here
6415: */
6416: return;
6417: }
1.76 daniel 6418:
6419: SKIP_BLANKS;
1.152 daniel 6420: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 6421: SKIP(2);
1.152 daniel 6422: } else if (RAW == '>') {
1.76 daniel 6423: /* Deprecated old WD ... */
6424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6425: ctxt->sax->error(ctxt->userData,
6426: "XML declaration must end-up with '?>'\n");
1.123 daniel 6427: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6428: ctxt->wellFormed = 0;
1.180 daniel 6429: ctxt->disableSAX = 1;
1.76 daniel 6430: NEXT;
6431: } else {
6432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6433: ctxt->sax->error(ctxt->userData,
6434: "parsing XML declaration: '?>' expected\n");
1.123 daniel 6435: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6436: ctxt->wellFormed = 0;
1.180 daniel 6437: ctxt->disableSAX = 1;
1.76 daniel 6438: MOVETO_ENDTAG(CUR_PTR);
6439: NEXT;
6440: }
6441: }
6442:
6443: /*
6444: * xmlParseConditionalSections
6445: * @ctxt: an XML parser context
6446: *
6447: * TODO : Conditionnal section are not yet supported !
6448: *
6449: * [61] conditionalSect ::= includeSect | ignoreSect
6450: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6451: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6452: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6453: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6454: */
6455:
6456: void
6457: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 6458: SKIP(3);
6459: SKIP_BLANKS;
1.168 daniel 6460: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6461: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6462: (NXT(6) == 'E')) {
1.165 daniel 6463: SKIP(7);
1.168 daniel 6464: SKIP_BLANKS;
6465: if (RAW != '[') {
6466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6467: ctxt->sax->error(ctxt->userData,
6468: "XML conditional section '[' expected\n");
6469: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6470: ctxt->wellFormed = 0;
1.180 daniel 6471: ctxt->disableSAX = 1;
1.168 daniel 6472: } else {
6473: NEXT;
6474: }
1.165 daniel 6475: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6476: (NXT(2) != '>'))) {
6477: const xmlChar *check = CUR_PTR;
6478: int cons = ctxt->input->consumed;
6479: int tok = ctxt->token;
6480:
6481: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6482: xmlParseConditionalSections(ctxt);
6483: } else if (IS_BLANK(CUR)) {
6484: NEXT;
6485: } else if (RAW == '%') {
6486: xmlParsePEReference(ctxt);
6487: } else
6488: xmlParseMarkupDecl(ctxt);
6489:
6490: /*
6491: * Pop-up of finished entities.
6492: */
6493: while ((RAW == 0) && (ctxt->inputNr > 1))
6494: xmlPopInput(ctxt);
6495:
6496: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6497: (tok == ctxt->token)) {
6498: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6499: ctxt->sax->error(ctxt->userData,
6500: "Content error in the external subset\n");
6501: ctxt->wellFormed = 0;
1.180 daniel 6502: ctxt->disableSAX = 1;
1.165 daniel 6503: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6504: break;
6505: }
6506: }
1.168 daniel 6507: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6508: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 6509: int state;
6510:
1.168 daniel 6511: SKIP(6);
6512: SKIP_BLANKS;
6513: if (RAW != '[') {
6514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6515: ctxt->sax->error(ctxt->userData,
6516: "XML conditional section '[' expected\n");
6517: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6518: ctxt->wellFormed = 0;
1.180 daniel 6519: ctxt->disableSAX = 1;
1.168 daniel 6520: } else {
6521: NEXT;
6522: }
1.171 daniel 6523:
1.143 daniel 6524: /*
1.171 daniel 6525: * Parse up to the end of the conditionnal section
6526: * But disable SAX event generating DTD building in the meantime
1.143 daniel 6527: */
1.171 daniel 6528: state = ctxt->disableSAX;
1.165 daniel 6529: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6530: (NXT(2) != '>'))) {
1.171 daniel 6531: const xmlChar *check = CUR_PTR;
6532: int cons = ctxt->input->consumed;
6533: int tok = ctxt->token;
6534:
6535: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6536: xmlParseConditionalSections(ctxt);
6537: } else if (IS_BLANK(CUR)) {
6538: NEXT;
6539: } else if (RAW == '%') {
6540: xmlParsePEReference(ctxt);
6541: } else
6542: xmlParseMarkupDecl(ctxt);
6543:
1.165 daniel 6544: /*
6545: * Pop-up of finished entities.
6546: */
6547: while ((RAW == 0) && (ctxt->inputNr > 1))
6548: xmlPopInput(ctxt);
1.143 daniel 6549:
1.171 daniel 6550: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6551: (tok == ctxt->token)) {
6552: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6553: ctxt->sax->error(ctxt->userData,
6554: "Content error in the external subset\n");
6555: ctxt->wellFormed = 0;
1.180 daniel 6556: ctxt->disableSAX = 1;
1.171 daniel 6557: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6558: break;
6559: }
1.165 daniel 6560: }
1.171 daniel 6561: ctxt->disableSAX = state;
1.168 daniel 6562: } else {
6563: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6564: ctxt->sax->error(ctxt->userData,
6565: "XML conditional section INCLUDE or IGNORE keyword expected\n");
6566: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6567: ctxt->wellFormed = 0;
1.180 daniel 6568: ctxt->disableSAX = 1;
1.143 daniel 6569: }
6570:
1.152 daniel 6571: if (RAW == 0)
1.143 daniel 6572: SHRINK;
6573:
1.152 daniel 6574: if (RAW == 0) {
1.76 daniel 6575: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6576: ctxt->sax->error(ctxt->userData,
6577: "XML conditional section not closed\n");
1.123 daniel 6578: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 6579: ctxt->wellFormed = 0;
1.180 daniel 6580: ctxt->disableSAX = 1;
1.143 daniel 6581: } else {
6582: SKIP(3);
1.76 daniel 6583: }
6584: }
6585:
6586: /**
1.124 daniel 6587: * xmlParseExternalSubset:
1.76 daniel 6588: * @ctxt: an XML parser context
1.124 daniel 6589: * @ExternalID: the external identifier
6590: * @SystemID: the system identifier (or URL)
1.76 daniel 6591: *
6592: * parse Markup declarations from an external subset
6593: *
6594: * [30] extSubset ::= textDecl? extSubsetDecl
6595: *
6596: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6597: */
6598: void
1.123 daniel 6599: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6600: const xmlChar *SystemID) {
1.132 daniel 6601: GROW;
1.152 daniel 6602: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6603: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6604: (NXT(4) == 'l')) {
1.172 daniel 6605: xmlParseTextDecl(ctxt);
1.193 daniel 6606: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6607: /*
6608: * The XML REC instructs us to stop parsing right here
6609: */
6610: ctxt->instate = XML_PARSER_EOF;
6611: return;
6612: }
1.76 daniel 6613: }
1.79 daniel 6614: if (ctxt->myDoc == NULL) {
1.116 daniel 6615: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6616: }
6617: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6618: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6619:
1.96 daniel 6620: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6621: ctxt->external = 1;
1.152 daniel 6622: while (((RAW == '<') && (NXT(1) == '?')) ||
6623: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6624: IS_BLANK(CUR)) {
1.123 daniel 6625: const xmlChar *check = CUR_PTR;
1.115 daniel 6626: int cons = ctxt->input->consumed;
1.164 daniel 6627: int tok = ctxt->token;
1.115 daniel 6628:
1.152 daniel 6629: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6630: xmlParseConditionalSections(ctxt);
6631: } else if (IS_BLANK(CUR)) {
6632: NEXT;
1.152 daniel 6633: } else if (RAW == '%') {
1.76 daniel 6634: xmlParsePEReference(ctxt);
6635: } else
6636: xmlParseMarkupDecl(ctxt);
1.77 daniel 6637:
6638: /*
6639: * Pop-up of finished entities.
6640: */
1.166 daniel 6641: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6642: xmlPopInput(ctxt);
6643:
1.164 daniel 6644: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6645: (tok == ctxt->token)) {
1.115 daniel 6646: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6647: ctxt->sax->error(ctxt->userData,
6648: "Content error in the external subset\n");
6649: ctxt->wellFormed = 0;
1.180 daniel 6650: ctxt->disableSAX = 1;
1.123 daniel 6651: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6652: break;
6653: }
1.76 daniel 6654: }
6655:
1.152 daniel 6656: if (RAW != 0) {
1.76 daniel 6657: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6658: ctxt->sax->error(ctxt->userData,
6659: "Extra content at the end of the document\n");
1.123 daniel 6660: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6661: ctxt->wellFormed = 0;
1.180 daniel 6662: ctxt->disableSAX = 1;
1.76 daniel 6663: }
6664:
6665: }
6666:
6667: /**
1.77 daniel 6668: * xmlParseReference:
6669: * @ctxt: an XML parser context
6670: *
6671: * parse and handle entity references in content, depending on the SAX
6672: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6673: * CharRef, a predefined entity, if there is no reference() callback.
6674: * or if the parser was asked to switch to that mode.
1.77 daniel 6675: *
6676: * [67] Reference ::= EntityRef | CharRef
6677: */
6678: void
6679: xmlParseReference(xmlParserCtxtPtr ctxt) {
6680: xmlEntityPtr ent;
1.123 daniel 6681: xmlChar *val;
1.152 daniel 6682: if (RAW != '&') return;
1.77 daniel 6683:
1.113 daniel 6684: if (ctxt->inputNr > 1) {
1.123 daniel 6685: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6686:
1.171 daniel 6687: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6688: (!ctxt->disableSAX))
1.113 daniel 6689: ctxt->sax->characters(ctxt->userData, cur, 1);
6690: if (ctxt->token == '&')
6691: ctxt->token = 0;
6692: else {
6693: SKIP(1);
6694: }
6695: return;
6696: }
1.77 daniel 6697: if (NXT(1) == '#') {
1.152 daniel 6698: int i = 0;
1.153 daniel 6699: xmlChar out[10];
6700: int hex = NXT(2);
1.77 daniel 6701: int val = xmlParseCharRef(ctxt);
1.152 daniel 6702:
1.198 daniel 6703: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 6704: /*
6705: * So we are using non-UTF-8 buffers
6706: * Check that the char fit on 8bits, if not
6707: * generate a CharRef.
6708: */
6709: if (val <= 0xFF) {
6710: out[0] = val;
6711: out[1] = 0;
1.171 daniel 6712: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6713: (!ctxt->disableSAX))
1.153 daniel 6714: ctxt->sax->characters(ctxt->userData, out, 1);
6715: } else {
6716: if ((hex == 'x') || (hex == 'X'))
6717: sprintf((char *)out, "#x%X", val);
6718: else
6719: sprintf((char *)out, "#%d", val);
1.171 daniel 6720: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6721: (!ctxt->disableSAX))
1.153 daniel 6722: ctxt->sax->reference(ctxt->userData, out);
6723: }
6724: } else {
6725: /*
6726: * Just encode the value in UTF-8
6727: */
6728: COPY_BUF(0 ,out, i, val);
6729: out[i] = 0;
1.171 daniel 6730: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6731: (!ctxt->disableSAX))
1.153 daniel 6732: ctxt->sax->characters(ctxt->userData, out, i);
6733: }
1.77 daniel 6734: } else {
6735: ent = xmlParseEntityRef(ctxt);
6736: if (ent == NULL) return;
6737: if ((ent->name != NULL) &&
1.159 daniel 6738: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6739: xmlNodePtr list = NULL;
6740: int ret;
6741:
6742:
6743: /*
6744: * The first reference to the entity trigger a parsing phase
6745: * where the ent->children is filled with the result from
6746: * the parsing.
6747: */
6748: if (ent->children == NULL) {
6749: xmlChar *value;
6750: value = ent->content;
6751:
6752: /*
6753: * Check that this entity is well formed
6754: */
6755: if ((value != NULL) &&
6756: (value[1] == 0) && (value[0] == '<') &&
6757: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6758: /*
6759: * TODO: get definite answer on this !!!
6760: * Lots of entity decls are used to declare a single
6761: * char
6762: * <!ENTITY lt "<">
6763: * Which seems to be valid since
6764: * 2.4: The ampersand character (&) and the left angle
6765: * bracket (<) may appear in their literal form only
6766: * when used ... They are also legal within the literal
6767: * entity value of an internal entity declaration;i
6768: * see "4.3.2 Well-Formed Parsed Entities".
6769: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6770: * Looking at the OASIS test suite and James Clark
6771: * tests, this is broken. However the XML REC uses
6772: * it. Is the XML REC not well-formed ????
6773: * This is a hack to avoid this problem
6774: */
6775: list = xmlNewDocText(ctxt->myDoc, value);
6776: if (list != NULL) {
6777: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6778: (ent->children == NULL)) {
6779: ent->children = list;
6780: ent->last = list;
6781: list->parent = (xmlNodePtr) ent;
6782: } else {
6783: xmlFreeNodeList(list);
6784: }
6785: } else if (list != NULL) {
6786: xmlFreeNodeList(list);
6787: }
1.181 daniel 6788: } else {
1.180 daniel 6789: /*
6790: * 4.3.2: An internal general parsed entity is well-formed
6791: * if its replacement text matches the production labeled
6792: * content.
6793: */
1.185 daniel 6794: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6795: ctxt->depth++;
1.180 daniel 6796: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 6797: ctxt->sax, NULL, ctxt->depth,
6798: value, &list);
6799: ctxt->depth--;
6800: } else if (ent->etype ==
6801: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6802: ctxt->depth++;
1.180 daniel 6803: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 6804: ctxt->sax, NULL, ctxt->depth,
6805: ent->SystemID, ent->ExternalID, &list);
6806: ctxt->depth--;
6807: } else {
1.180 daniel 6808: ret = -1;
6809: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6810: ctxt->sax->error(ctxt->userData,
6811: "Internal: invalid entity type\n");
6812: }
1.185 daniel 6813: if (ret == XML_ERR_ENTITY_LOOP) {
6814: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6815: ctxt->sax->error(ctxt->userData,
6816: "Detected entity reference loop\n");
6817: ctxt->wellFormed = 0;
6818: ctxt->disableSAX = 1;
6819: ctxt->errNo = XML_ERR_ENTITY_LOOP;
6820: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 6821: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6822: (ent->children == NULL)) {
6823: ent->children = list;
6824: while (list != NULL) {
6825: list->parent = (xmlNodePtr) ent;
6826: if (list->next == NULL)
6827: ent->last = list;
6828: list = list->next;
6829: }
6830: } else {
6831: xmlFreeNodeList(list);
6832: }
6833: } else if (ret > 0) {
6834: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6835: ctxt->sax->error(ctxt->userData,
6836: "Entity value required\n");
6837: ctxt->errNo = ret;
6838: ctxt->wellFormed = 0;
6839: ctxt->disableSAX = 1;
6840: } else if (list != NULL) {
6841: xmlFreeNodeList(list);
6842: }
6843: }
6844: }
1.113 daniel 6845: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6846: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6847: /*
6848: * Create a node.
6849: */
6850: ctxt->sax->reference(ctxt->userData, ent->name);
6851: return;
6852: } else if (ctxt->replaceEntities) {
6853: xmlParserInputPtr input;
1.79 daniel 6854:
1.113 daniel 6855: input = xmlNewEntityInputStream(ctxt, ent);
6856: xmlPushInput(ctxt, input);
1.167 daniel 6857: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6858: (RAW == '<') && (NXT(1) == '?') &&
6859: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6860: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6861: xmlParseTextDecl(ctxt);
1.193 daniel 6862: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6863: /*
6864: * The XML REC instructs us to stop parsing right here
6865: */
6866: ctxt->instate = XML_PARSER_EOF;
6867: return;
6868: }
1.199 daniel 6869: if (input->standalone == 1) {
1.167 daniel 6870: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6871: ctxt->sax->error(ctxt->userData,
6872: "external parsed entities cannot be standalone\n");
6873: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6874: ctxt->wellFormed = 0;
1.180 daniel 6875: ctxt->disableSAX = 1;
1.167 daniel 6876: }
6877: }
1.179 daniel 6878: /*
6879: * !!! TODO: build the tree under the entity first
6880: * 1234
6881: */
1.113 daniel 6882: return;
6883: }
1.77 daniel 6884: }
6885: val = ent->content;
6886: if (val == NULL) return;
6887: /*
6888: * inline the entity.
6889: */
1.171 daniel 6890: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6891: (!ctxt->disableSAX))
1.77 daniel 6892: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6893: }
1.24 daniel 6894: }
6895:
1.50 daniel 6896: /**
6897: * xmlParseEntityRef:
6898: * @ctxt: an XML parser context
6899: *
6900: * parse ENTITY references declarations
1.24 daniel 6901: *
6902: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6903: *
1.98 daniel 6904: * [ WFC: Entity Declared ]
6905: * In a document without any DTD, a document with only an internal DTD
6906: * subset which contains no parameter entity references, or a document
6907: * with "standalone='yes'", the Name given in the entity reference
6908: * must match that in an entity declaration, except that well-formed
6909: * documents need not declare any of the following entities: amp, lt,
6910: * gt, apos, quot. The declaration of a parameter entity must precede
6911: * any reference to it. Similarly, the declaration of a general entity
6912: * must precede any reference to it which appears in a default value in an
6913: * attribute-list declaration. Note that if entities are declared in the
6914: * external subset or in external parameter entities, a non-validating
6915: * processor is not obligated to read and process their declarations;
6916: * for such documents, the rule that an entity must be declared is a
6917: * well-formedness constraint only if standalone='yes'.
6918: *
6919: * [ WFC: Parsed Entity ]
6920: * An entity reference must not contain the name of an unparsed entity
6921: *
1.77 daniel 6922: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6923: */
1.77 daniel 6924: xmlEntityPtr
1.55 daniel 6925: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6926: xmlChar *name;
1.72 daniel 6927: xmlEntityPtr ent = NULL;
1.24 daniel 6928:
1.91 daniel 6929: GROW;
1.111 daniel 6930:
1.152 daniel 6931: if (RAW == '&') {
1.40 daniel 6932: NEXT;
1.24 daniel 6933: name = xmlParseName(ctxt);
6934: if (name == NULL) {
1.55 daniel 6935: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6936: ctxt->sax->error(ctxt->userData,
6937: "xmlParseEntityRef: no name\n");
1.123 daniel 6938: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6939: ctxt->wellFormed = 0;
1.180 daniel 6940: ctxt->disableSAX = 1;
1.24 daniel 6941: } else {
1.152 daniel 6942: if (RAW == ';') {
1.40 daniel 6943: NEXT;
1.24 daniel 6944: /*
1.77 daniel 6945: * Ask first SAX for entity resolution, otherwise try the
6946: * predefined set.
6947: */
6948: if (ctxt->sax != NULL) {
6949: if (ctxt->sax->getEntity != NULL)
6950: ent = ctxt->sax->getEntity(ctxt->userData, name);
6951: if (ent == NULL)
6952: ent = xmlGetPredefinedEntity(name);
6953: }
6954: /*
1.98 daniel 6955: * [ WFC: Entity Declared ]
6956: * In a document without any DTD, a document with only an
6957: * internal DTD subset which contains no parameter entity
6958: * references, or a document with "standalone='yes'", the
6959: * Name given in the entity reference must match that in an
6960: * entity declaration, except that well-formed documents
6961: * need not declare any of the following entities: amp, lt,
6962: * gt, apos, quot.
6963: * The declaration of a parameter entity must precede any
6964: * reference to it.
6965: * Similarly, the declaration of a general entity must
6966: * precede any reference to it which appears in a default
6967: * value in an attribute-list declaration. Note that if
6968: * entities are declared in the external subset or in
6969: * external parameter entities, a non-validating processor
6970: * is not obligated to read and process their declarations;
6971: * for such documents, the rule that an entity must be
6972: * declared is a well-formedness constraint only if
6973: * standalone='yes'.
1.59 daniel 6974: */
1.77 daniel 6975: if (ent == NULL) {
1.98 daniel 6976: if ((ctxt->standalone == 1) ||
6977: ((ctxt->hasExternalSubset == 0) &&
6978: (ctxt->hasPErefs == 0))) {
6979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6980: ctxt->sax->error(ctxt->userData,
6981: "Entity '%s' not defined\n", name);
1.123 daniel 6982: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6983: ctxt->wellFormed = 0;
1.180 daniel 6984: ctxt->disableSAX = 1;
1.77 daniel 6985: } else {
1.98 daniel 6986: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6987: ctxt->sax->warning(ctxt->userData,
6988: "Entity '%s' not defined\n", name);
1.123 daniel 6989: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6990: }
1.77 daniel 6991: }
1.59 daniel 6992:
6993: /*
1.98 daniel 6994: * [ WFC: Parsed Entity ]
6995: * An entity reference must not contain the name of an
6996: * unparsed entity
6997: */
1.159 daniel 6998: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6999: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7000: ctxt->sax->error(ctxt->userData,
7001: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 7002: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 7003: ctxt->wellFormed = 0;
1.180 daniel 7004: ctxt->disableSAX = 1;
1.98 daniel 7005: }
7006:
7007: /*
7008: * [ WFC: No External Entity References ]
7009: * Attribute values cannot contain direct or indirect
7010: * entity references to external entities.
7011: */
7012: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 7013: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 7014: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7015: ctxt->sax->error(ctxt->userData,
7016: "Attribute references external entity '%s'\n", name);
1.123 daniel 7017: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 7018: ctxt->wellFormed = 0;
1.180 daniel 7019: ctxt->disableSAX = 1;
1.98 daniel 7020: }
7021: /*
7022: * [ WFC: No < in Attribute Values ]
7023: * The replacement text of any entity referred to directly or
7024: * indirectly in an attribute value (other than "<") must
7025: * not contain a <.
1.59 daniel 7026: */
1.98 daniel 7027: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 7028: (ent != NULL) &&
7029: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 7030: (ent->content != NULL) &&
7031: (xmlStrchr(ent->content, '<'))) {
7032: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7033: ctxt->sax->error(ctxt->userData,
7034: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 7035: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 7036: ctxt->wellFormed = 0;
1.180 daniel 7037: ctxt->disableSAX = 1;
1.98 daniel 7038: }
7039:
7040: /*
7041: * Internal check, no parameter entities here ...
7042: */
7043: else {
1.159 daniel 7044: switch (ent->etype) {
1.59 daniel 7045: case XML_INTERNAL_PARAMETER_ENTITY:
7046: case XML_EXTERNAL_PARAMETER_ENTITY:
7047: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7048: ctxt->sax->error(ctxt->userData,
1.59 daniel 7049: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 7050: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 7051: ctxt->wellFormed = 0;
1.180 daniel 7052: ctxt->disableSAX = 1;
7053: break;
7054: default:
1.59 daniel 7055: break;
7056: }
7057: }
7058:
7059: /*
1.98 daniel 7060: * [ WFC: No Recursion ]
1.117 daniel 7061: * TODO A parsed entity must not contain a recursive reference
7062: * to itself, either directly or indirectly.
1.59 daniel 7063: */
1.77 daniel 7064:
1.24 daniel 7065: } else {
1.55 daniel 7066: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7067: ctxt->sax->error(ctxt->userData,
1.59 daniel 7068: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 7069: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 7070: ctxt->wellFormed = 0;
1.180 daniel 7071: ctxt->disableSAX = 1;
1.24 daniel 7072: }
1.119 daniel 7073: xmlFree(name);
1.24 daniel 7074: }
7075: }
1.77 daniel 7076: return(ent);
1.24 daniel 7077: }
1.135 daniel 7078: /**
7079: * xmlParseStringEntityRef:
7080: * @ctxt: an XML parser context
7081: * @str: a pointer to an index in the string
7082: *
7083: * parse ENTITY references declarations, but this version parses it from
7084: * a string value.
7085: *
7086: * [68] EntityRef ::= '&' Name ';'
7087: *
7088: * [ WFC: Entity Declared ]
7089: * In a document without any DTD, a document with only an internal DTD
7090: * subset which contains no parameter entity references, or a document
7091: * with "standalone='yes'", the Name given in the entity reference
7092: * must match that in an entity declaration, except that well-formed
7093: * documents need not declare any of the following entities: amp, lt,
7094: * gt, apos, quot. The declaration of a parameter entity must precede
7095: * any reference to it. Similarly, the declaration of a general entity
7096: * must precede any reference to it which appears in a default value in an
7097: * attribute-list declaration. Note that if entities are declared in the
7098: * external subset or in external parameter entities, a non-validating
7099: * processor is not obligated to read and process their declarations;
7100: * for such documents, the rule that an entity must be declared is a
7101: * well-formedness constraint only if standalone='yes'.
7102: *
7103: * [ WFC: Parsed Entity ]
7104: * An entity reference must not contain the name of an unparsed entity
7105: *
7106: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7107: * is updated to the current location in the string.
7108: */
7109: xmlEntityPtr
7110: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7111: xmlChar *name;
7112: const xmlChar *ptr;
7113: xmlChar cur;
7114: xmlEntityPtr ent = NULL;
7115:
1.156 daniel 7116: if ((str == NULL) || (*str == NULL))
7117: return(NULL);
1.135 daniel 7118: ptr = *str;
7119: cur = *ptr;
7120: if (cur == '&') {
7121: ptr++;
7122: cur = *ptr;
7123: name = xmlParseStringName(ctxt, &ptr);
7124: if (name == NULL) {
7125: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7126: ctxt->sax->error(ctxt->userData,
7127: "xmlParseEntityRef: no name\n");
7128: ctxt->errNo = XML_ERR_NAME_REQUIRED;
7129: ctxt->wellFormed = 0;
1.180 daniel 7130: ctxt->disableSAX = 1;
1.135 daniel 7131: } else {
1.185 daniel 7132: if (*ptr == ';') {
7133: ptr++;
1.135 daniel 7134: /*
7135: * Ask first SAX for entity resolution, otherwise try the
7136: * predefined set.
7137: */
7138: if (ctxt->sax != NULL) {
7139: if (ctxt->sax->getEntity != NULL)
7140: ent = ctxt->sax->getEntity(ctxt->userData, name);
7141: if (ent == NULL)
7142: ent = xmlGetPredefinedEntity(name);
7143: }
7144: /*
7145: * [ WFC: Entity Declared ]
7146: * In a document without any DTD, a document with only an
7147: * internal DTD subset which contains no parameter entity
7148: * references, or a document with "standalone='yes'", the
7149: * Name given in the entity reference must match that in an
7150: * entity declaration, except that well-formed documents
7151: * need not declare any of the following entities: amp, lt,
7152: * gt, apos, quot.
7153: * The declaration of a parameter entity must precede any
7154: * reference to it.
7155: * Similarly, the declaration of a general entity must
7156: * precede any reference to it which appears in a default
7157: * value in an attribute-list declaration. Note that if
7158: * entities are declared in the external subset or in
7159: * external parameter entities, a non-validating processor
7160: * is not obligated to read and process their declarations;
7161: * for such documents, the rule that an entity must be
7162: * declared is a well-formedness constraint only if
7163: * standalone='yes'.
7164: */
7165: if (ent == NULL) {
7166: if ((ctxt->standalone == 1) ||
7167: ((ctxt->hasExternalSubset == 0) &&
7168: (ctxt->hasPErefs == 0))) {
7169: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7170: ctxt->sax->error(ctxt->userData,
7171: "Entity '%s' not defined\n", name);
7172: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7173: ctxt->wellFormed = 0;
1.180 daniel 7174: ctxt->disableSAX = 1;
1.135 daniel 7175: } else {
7176: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7177: ctxt->sax->warning(ctxt->userData,
7178: "Entity '%s' not defined\n", name);
7179: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
7180: }
7181: }
7182:
7183: /*
7184: * [ WFC: Parsed Entity ]
7185: * An entity reference must not contain the name of an
7186: * unparsed entity
7187: */
1.159 daniel 7188: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 7189: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7190: ctxt->sax->error(ctxt->userData,
7191: "Entity reference to unparsed entity %s\n", name);
7192: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
7193: ctxt->wellFormed = 0;
1.180 daniel 7194: ctxt->disableSAX = 1;
1.135 daniel 7195: }
7196:
7197: /*
7198: * [ WFC: No External Entity References ]
7199: * Attribute values cannot contain direct or indirect
7200: * entity references to external entities.
7201: */
7202: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 7203: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 7204: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7205: ctxt->sax->error(ctxt->userData,
7206: "Attribute references external entity '%s'\n", name);
7207: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
7208: ctxt->wellFormed = 0;
1.180 daniel 7209: ctxt->disableSAX = 1;
1.135 daniel 7210: }
7211: /*
7212: * [ WFC: No < in Attribute Values ]
7213: * The replacement text of any entity referred to directly or
7214: * indirectly in an attribute value (other than "<") must
7215: * not contain a <.
7216: */
7217: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7218: (ent != NULL) &&
7219: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
7220: (ent->content != NULL) &&
7221: (xmlStrchr(ent->content, '<'))) {
7222: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7223: ctxt->sax->error(ctxt->userData,
7224: "'<' in entity '%s' is not allowed in attributes values\n", name);
7225: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
7226: ctxt->wellFormed = 0;
1.180 daniel 7227: ctxt->disableSAX = 1;
1.135 daniel 7228: }
7229:
7230: /*
7231: * Internal check, no parameter entities here ...
7232: */
7233: else {
1.159 daniel 7234: switch (ent->etype) {
1.135 daniel 7235: case XML_INTERNAL_PARAMETER_ENTITY:
7236: case XML_EXTERNAL_PARAMETER_ENTITY:
7237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7238: ctxt->sax->error(ctxt->userData,
7239: "Attempt to reference the parameter entity '%s'\n", name);
7240: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
7241: ctxt->wellFormed = 0;
1.180 daniel 7242: ctxt->disableSAX = 1;
7243: break;
7244: default:
1.135 daniel 7245: break;
7246: }
7247: }
7248:
7249: /*
7250: * [ WFC: No Recursion ]
7251: * TODO A parsed entity must not contain a recursive reference
7252: * to itself, either directly or indirectly.
7253: */
7254:
7255: } else {
7256: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7257: ctxt->sax->error(ctxt->userData,
7258: "xmlParseEntityRef: expecting ';'\n");
7259: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7260: ctxt->wellFormed = 0;
1.180 daniel 7261: ctxt->disableSAX = 1;
1.135 daniel 7262: }
7263: xmlFree(name);
7264: }
7265: }
1.185 daniel 7266: *str = ptr;
1.135 daniel 7267: return(ent);
7268: }
1.24 daniel 7269:
1.50 daniel 7270: /**
7271: * xmlParsePEReference:
7272: * @ctxt: an XML parser context
7273: *
7274: * parse PEReference declarations
1.77 daniel 7275: * The entity content is handled directly by pushing it's content as
7276: * a new input stream.
1.22 daniel 7277: *
7278: * [69] PEReference ::= '%' Name ';'
1.68 daniel 7279: *
1.98 daniel 7280: * [ WFC: No Recursion ]
7281: * TODO A parsed entity must not contain a recursive
7282: * reference to itself, either directly or indirectly.
7283: *
7284: * [ WFC: Entity Declared ]
7285: * In a document without any DTD, a document with only an internal DTD
7286: * subset which contains no parameter entity references, or a document
7287: * with "standalone='yes'", ... ... The declaration of a parameter
7288: * entity must precede any reference to it...
7289: *
7290: * [ VC: Entity Declared ]
7291: * In a document with an external subset or external parameter entities
7292: * with "standalone='no'", ... ... The declaration of a parameter entity
7293: * must precede any reference to it...
7294: *
7295: * [ WFC: In DTD ]
7296: * Parameter-entity references may only appear in the DTD.
7297: * NOTE: misleading but this is handled.
1.22 daniel 7298: */
1.77 daniel 7299: void
1.55 daniel 7300: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 7301: xmlChar *name;
1.72 daniel 7302: xmlEntityPtr entity = NULL;
1.50 daniel 7303: xmlParserInputPtr input;
1.22 daniel 7304:
1.152 daniel 7305: if (RAW == '%') {
1.40 daniel 7306: NEXT;
1.22 daniel 7307: name = xmlParseName(ctxt);
7308: if (name == NULL) {
1.55 daniel 7309: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7310: ctxt->sax->error(ctxt->userData,
7311: "xmlParsePEReference: no name\n");
1.123 daniel 7312: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7313: ctxt->wellFormed = 0;
1.180 daniel 7314: ctxt->disableSAX = 1;
1.22 daniel 7315: } else {
1.152 daniel 7316: if (RAW == ';') {
1.40 daniel 7317: NEXT;
1.98 daniel 7318: if ((ctxt->sax != NULL) &&
7319: (ctxt->sax->getParameterEntity != NULL))
7320: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7321: name);
1.45 daniel 7322: if (entity == NULL) {
1.98 daniel 7323: /*
7324: * [ WFC: Entity Declared ]
7325: * In a document without any DTD, a document with only an
7326: * internal DTD subset which contains no parameter entity
7327: * references, or a document with "standalone='yes'", ...
7328: * ... The declaration of a parameter entity must precede
7329: * any reference to it...
7330: */
7331: if ((ctxt->standalone == 1) ||
7332: ((ctxt->hasExternalSubset == 0) &&
7333: (ctxt->hasPErefs == 0))) {
7334: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7335: ctxt->sax->error(ctxt->userData,
7336: "PEReference: %%%s; not found\n", name);
1.123 daniel 7337: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 7338: ctxt->wellFormed = 0;
1.180 daniel 7339: ctxt->disableSAX = 1;
1.98 daniel 7340: } else {
7341: /*
7342: * [ VC: Entity Declared ]
7343: * In a document with an external subset or external
7344: * parameter entities with "standalone='no'", ...
7345: * ... The declaration of a parameter entity must precede
7346: * any reference to it...
7347: */
7348: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7349: ctxt->sax->warning(ctxt->userData,
7350: "PEReference: %%%s; not found\n", name);
7351: ctxt->valid = 0;
7352: }
1.50 daniel 7353: } else {
1.98 daniel 7354: /*
7355: * Internal checking in case the entity quest barfed
7356: */
1.159 daniel 7357: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7358: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 7359: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7360: ctxt->sax->warning(ctxt->userData,
7361: "Internal: %%%s; is not a parameter entity\n", name);
7362: } else {
1.164 daniel 7363: /*
7364: * TODO !!!
7365: * handle the extra spaces added before and after
7366: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7367: */
1.98 daniel 7368: input = xmlNewEntityInputStream(ctxt, entity);
7369: xmlPushInput(ctxt, input);
1.164 daniel 7370: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7371: (RAW == '<') && (NXT(1) == '?') &&
7372: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7373: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 7374: xmlParseTextDecl(ctxt);
1.193 daniel 7375: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7376: /*
7377: * The XML REC instructs us to stop parsing
7378: * right here
7379: */
7380: ctxt->instate = XML_PARSER_EOF;
7381: xmlFree(name);
7382: return;
7383: }
1.164 daniel 7384: }
7385: if (ctxt->token == 0)
7386: ctxt->token = ' ';
1.98 daniel 7387: }
1.45 daniel 7388: }
1.98 daniel 7389: ctxt->hasPErefs = 1;
1.22 daniel 7390: } else {
1.55 daniel 7391: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7392: ctxt->sax->error(ctxt->userData,
1.59 daniel 7393: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 7394: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 7395: ctxt->wellFormed = 0;
1.180 daniel 7396: ctxt->disableSAX = 1;
1.22 daniel 7397: }
1.119 daniel 7398: xmlFree(name);
1.3 veillard 7399: }
7400: }
7401: }
7402:
1.50 daniel 7403: /**
1.135 daniel 7404: * xmlParseStringPEReference:
7405: * @ctxt: an XML parser context
7406: * @str: a pointer to an index in the string
7407: *
7408: * parse PEReference declarations
7409: *
7410: * [69] PEReference ::= '%' Name ';'
7411: *
7412: * [ WFC: No Recursion ]
7413: * TODO A parsed entity must not contain a recursive
7414: * reference to itself, either directly or indirectly.
7415: *
7416: * [ WFC: Entity Declared ]
7417: * In a document without any DTD, a document with only an internal DTD
7418: * subset which contains no parameter entity references, or a document
7419: * with "standalone='yes'", ... ... The declaration of a parameter
7420: * entity must precede any reference to it...
7421: *
7422: * [ VC: Entity Declared ]
7423: * In a document with an external subset or external parameter entities
7424: * with "standalone='no'", ... ... The declaration of a parameter entity
7425: * must precede any reference to it...
7426: *
7427: * [ WFC: In DTD ]
7428: * Parameter-entity references may only appear in the DTD.
7429: * NOTE: misleading but this is handled.
7430: *
7431: * Returns the string of the entity content.
7432: * str is updated to the current value of the index
7433: */
7434: xmlEntityPtr
7435: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7436: const xmlChar *ptr;
7437: xmlChar cur;
7438: xmlChar *name;
7439: xmlEntityPtr entity = NULL;
7440:
7441: if ((str == NULL) || (*str == NULL)) return(NULL);
7442: ptr = *str;
7443: cur = *ptr;
7444: if (cur == '%') {
7445: ptr++;
7446: cur = *ptr;
7447: name = xmlParseStringName(ctxt, &ptr);
7448: if (name == NULL) {
7449: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7450: ctxt->sax->error(ctxt->userData,
7451: "xmlParseStringPEReference: no name\n");
7452: ctxt->errNo = XML_ERR_NAME_REQUIRED;
7453: ctxt->wellFormed = 0;
1.180 daniel 7454: ctxt->disableSAX = 1;
1.135 daniel 7455: } else {
7456: cur = *ptr;
7457: if (cur == ';') {
7458: ptr++;
7459: cur = *ptr;
7460: if ((ctxt->sax != NULL) &&
7461: (ctxt->sax->getParameterEntity != NULL))
7462: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7463: name);
7464: if (entity == NULL) {
7465: /*
7466: * [ WFC: Entity Declared ]
7467: * In a document without any DTD, a document with only an
7468: * internal DTD subset which contains no parameter entity
7469: * references, or a document with "standalone='yes'", ...
7470: * ... The declaration of a parameter entity must precede
7471: * any reference to it...
7472: */
7473: if ((ctxt->standalone == 1) ||
7474: ((ctxt->hasExternalSubset == 0) &&
7475: (ctxt->hasPErefs == 0))) {
7476: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7477: ctxt->sax->error(ctxt->userData,
7478: "PEReference: %%%s; not found\n", name);
7479: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7480: ctxt->wellFormed = 0;
1.180 daniel 7481: ctxt->disableSAX = 1;
1.135 daniel 7482: } else {
7483: /*
7484: * [ VC: Entity Declared ]
7485: * In a document with an external subset or external
7486: * parameter entities with "standalone='no'", ...
7487: * ... The declaration of a parameter entity must
7488: * precede any reference to it...
7489: */
7490: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7491: ctxt->sax->warning(ctxt->userData,
7492: "PEReference: %%%s; not found\n", name);
7493: ctxt->valid = 0;
7494: }
7495: } else {
7496: /*
7497: * Internal checking in case the entity quest barfed
7498: */
1.159 daniel 7499: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7500: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 7501: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7502: ctxt->sax->warning(ctxt->userData,
7503: "Internal: %%%s; is not a parameter entity\n", name);
7504: }
7505: }
7506: ctxt->hasPErefs = 1;
7507: } else {
7508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7509: ctxt->sax->error(ctxt->userData,
7510: "xmlParseStringPEReference: expecting ';'\n");
7511: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7512: ctxt->wellFormed = 0;
1.180 daniel 7513: ctxt->disableSAX = 1;
1.135 daniel 7514: }
7515: xmlFree(name);
7516: }
7517: }
7518: *str = ptr;
7519: return(entity);
7520: }
7521:
7522: /**
1.181 daniel 7523: * xmlParseDocTypeDecl:
1.50 daniel 7524: * @ctxt: an XML parser context
7525: *
7526: * parse a DOCTYPE declaration
1.21 daniel 7527: *
1.22 daniel 7528: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7529: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 7530: *
7531: * [ VC: Root Element Type ]
1.99 daniel 7532: * The Name in the document type declaration must match the element
1.98 daniel 7533: * type of the root element.
1.21 daniel 7534: */
7535:
1.55 daniel 7536: void
7537: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 7538: xmlChar *name = NULL;
1.123 daniel 7539: xmlChar *ExternalID = NULL;
7540: xmlChar *URI = NULL;
1.21 daniel 7541:
7542: /*
7543: * We know that '<!DOCTYPE' has been detected.
7544: */
1.40 daniel 7545: SKIP(9);
1.21 daniel 7546:
1.42 daniel 7547: SKIP_BLANKS;
1.21 daniel 7548:
7549: /*
7550: * Parse the DOCTYPE name.
7551: */
7552: name = xmlParseName(ctxt);
7553: if (name == NULL) {
1.55 daniel 7554: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7555: ctxt->sax->error(ctxt->userData,
7556: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 7557: ctxt->wellFormed = 0;
1.180 daniel 7558: ctxt->disableSAX = 1;
1.123 daniel 7559: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 7560: }
1.165 daniel 7561: ctxt->intSubName = name;
1.21 daniel 7562:
1.42 daniel 7563: SKIP_BLANKS;
1.21 daniel 7564:
7565: /*
1.22 daniel 7566: * Check for SystemID and ExternalID
7567: */
1.67 daniel 7568: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 7569:
7570: if ((URI != NULL) || (ExternalID != NULL)) {
7571: ctxt->hasExternalSubset = 1;
7572: }
1.165 daniel 7573: ctxt->extSubURI = URI;
7574: ctxt->extSubSystem = ExternalID;
1.98 daniel 7575:
1.42 daniel 7576: SKIP_BLANKS;
1.36 daniel 7577:
1.76 daniel 7578: /*
1.165 daniel 7579: * Create and update the internal subset.
1.76 daniel 7580: */
1.171 daniel 7581: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7582: (!ctxt->disableSAX))
1.74 daniel 7583: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 7584:
7585: /*
1.140 daniel 7586: * Is there any internal subset declarations ?
7587: * they are handled separately in xmlParseInternalSubset()
7588: */
1.152 daniel 7589: if (RAW == '[')
1.140 daniel 7590: return;
7591:
7592: /*
7593: * We should be at the end of the DOCTYPE declaration.
7594: */
1.152 daniel 7595: if (RAW != '>') {
1.140 daniel 7596: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7597: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7598: ctxt->wellFormed = 0;
1.180 daniel 7599: ctxt->disableSAX = 1;
1.140 daniel 7600: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7601: }
7602: NEXT;
7603: }
7604:
7605: /**
1.181 daniel 7606: * xmlParseInternalsubset:
1.140 daniel 7607: * @ctxt: an XML parser context
7608: *
7609: * parse the internal subset declaration
7610: *
7611: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7612: */
7613:
7614: void
7615: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7616: /*
1.22 daniel 7617: * Is there any DTD definition ?
7618: */
1.152 daniel 7619: if (RAW == '[') {
1.96 daniel 7620: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 7621: NEXT;
1.22 daniel 7622: /*
7623: * Parse the succession of Markup declarations and
7624: * PEReferences.
7625: * Subsequence (markupdecl | PEReference | S)*
7626: */
1.152 daniel 7627: while (RAW != ']') {
1.123 daniel 7628: const xmlChar *check = CUR_PTR;
1.115 daniel 7629: int cons = ctxt->input->consumed;
1.22 daniel 7630:
1.42 daniel 7631: SKIP_BLANKS;
1.22 daniel 7632: xmlParseMarkupDecl(ctxt);
1.50 daniel 7633: xmlParsePEReference(ctxt);
1.22 daniel 7634:
1.115 daniel 7635: /*
7636: * Pop-up of finished entities.
7637: */
1.152 daniel 7638: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7639: xmlPopInput(ctxt);
7640:
1.118 daniel 7641: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7642: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7643: ctxt->sax->error(ctxt->userData,
1.140 daniel 7644: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7645: ctxt->wellFormed = 0;
1.180 daniel 7646: ctxt->disableSAX = 1;
1.123 daniel 7647: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7648: break;
7649: }
7650: }
1.209 veillard 7651: if (RAW == ']') {
7652: NEXT;
7653: SKIP_BLANKS;
7654: }
1.22 daniel 7655: }
7656:
7657: /*
7658: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7659: */
1.152 daniel 7660: if (RAW != '>') {
1.55 daniel 7661: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7662: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7663: ctxt->wellFormed = 0;
1.180 daniel 7664: ctxt->disableSAX = 1;
1.123 daniel 7665: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7666: }
1.40 daniel 7667: NEXT;
1.21 daniel 7668: }
7669:
1.50 daniel 7670: /**
7671: * xmlParseAttribute:
7672: * @ctxt: an XML parser context
1.123 daniel 7673: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7674: *
7675: * parse an attribute
1.3 veillard 7676: *
1.22 daniel 7677: * [41] Attribute ::= Name Eq AttValue
7678: *
1.98 daniel 7679: * [ WFC: No External Entity References ]
7680: * Attribute values cannot contain direct or indirect entity references
7681: * to external entities.
7682: *
7683: * [ WFC: No < in Attribute Values ]
7684: * The replacement text of any entity referred to directly or indirectly in
7685: * an attribute value (other than "<") must not contain a <.
7686: *
7687: * [ VC: Attribute Value Type ]
1.117 daniel 7688: * The attribute must have been declared; the value must be of the type
1.99 daniel 7689: * declared for it.
1.98 daniel 7690: *
1.22 daniel 7691: * [25] Eq ::= S? '=' S?
7692: *
1.29 daniel 7693: * With namespace:
7694: *
7695: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7696: *
7697: * Also the case QName == xmlns:??? is handled independently as a namespace
7698: * definition.
1.69 daniel 7699: *
1.72 daniel 7700: * Returns the attribute name, and the value in *value.
1.3 veillard 7701: */
7702:
1.123 daniel 7703: xmlChar *
7704: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7705: xmlChar *name, *val;
1.3 veillard 7706:
1.72 daniel 7707: *value = NULL;
7708: name = xmlParseName(ctxt);
1.22 daniel 7709: if (name == NULL) {
1.55 daniel 7710: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7711: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7712: ctxt->wellFormed = 0;
1.180 daniel 7713: ctxt->disableSAX = 1;
1.123 daniel 7714: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7715: return(NULL);
1.3 veillard 7716: }
7717:
7718: /*
1.29 daniel 7719: * read the value
1.3 veillard 7720: */
1.42 daniel 7721: SKIP_BLANKS;
1.152 daniel 7722: if (RAW == '=') {
1.40 daniel 7723: NEXT;
1.42 daniel 7724: SKIP_BLANKS;
1.72 daniel 7725: val = xmlParseAttValue(ctxt);
1.96 daniel 7726: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7727: } else {
1.55 daniel 7728: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7729: ctxt->sax->error(ctxt->userData,
1.59 daniel 7730: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7731: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7732: ctxt->wellFormed = 0;
1.180 daniel 7733: ctxt->disableSAX = 1;
1.170 daniel 7734: xmlFree(name);
1.52 daniel 7735: return(NULL);
1.43 daniel 7736: }
7737:
1.172 daniel 7738: /*
7739: * Check that xml:lang conforms to the specification
7740: */
7741: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7742: if (!xmlCheckLanguageID(val)) {
7743: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7744: ctxt->sax->error(ctxt->userData,
7745: "Invalid value for xml:lang : %s\n", val);
7746: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7747: ctxt->wellFormed = 0;
1.180 daniel 7748: ctxt->disableSAX = 1;
1.172 daniel 7749: }
7750: }
7751:
1.176 daniel 7752: /*
7753: * Check that xml:space conforms to the specification
7754: */
7755: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7756: if (!xmlStrcmp(val, BAD_CAST "default"))
7757: *(ctxt->space) = 0;
7758: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7759: *(ctxt->space) = 1;
7760: else {
7761: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7762: ctxt->sax->error(ctxt->userData,
7763: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7764: val);
7765: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7766: ctxt->wellFormed = 0;
1.180 daniel 7767: ctxt->disableSAX = 1;
1.176 daniel 7768: }
7769: }
7770:
1.72 daniel 7771: *value = val;
7772: return(name);
1.3 veillard 7773: }
7774:
1.50 daniel 7775: /**
7776: * xmlParseStartTag:
7777: * @ctxt: an XML parser context
7778: *
7779: * parse a start of tag either for rule element or
7780: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7781: *
7782: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7783: *
1.98 daniel 7784: * [ WFC: Unique Att Spec ]
7785: * No attribute name may appear more than once in the same start-tag or
7786: * empty-element tag.
7787: *
1.29 daniel 7788: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7789: *
1.98 daniel 7790: * [ WFC: Unique Att Spec ]
7791: * No attribute name may appear more than once in the same start-tag or
7792: * empty-element tag.
7793: *
1.29 daniel 7794: * With namespace:
7795: *
7796: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7797: *
7798: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7799: *
1.192 daniel 7800: * Returns the element name parsed
1.2 veillard 7801: */
7802:
1.123 daniel 7803: xmlChar *
1.69 daniel 7804: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7805: xmlChar *name;
7806: xmlChar *attname;
7807: xmlChar *attvalue;
7808: const xmlChar **atts = NULL;
1.72 daniel 7809: int nbatts = 0;
7810: int maxatts = 0;
7811: int i;
1.2 veillard 7812:
1.152 daniel 7813: if (RAW != '<') return(NULL);
1.40 daniel 7814: NEXT;
1.3 veillard 7815:
1.72 daniel 7816: name = xmlParseName(ctxt);
1.59 daniel 7817: if (name == NULL) {
7818: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7819: ctxt->sax->error(ctxt->userData,
1.59 daniel 7820: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7821: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7822: ctxt->wellFormed = 0;
1.180 daniel 7823: ctxt->disableSAX = 1;
1.83 daniel 7824: return(NULL);
1.50 daniel 7825: }
7826:
7827: /*
1.3 veillard 7828: * Now parse the attributes, it ends up with the ending
7829: *
7830: * (S Attribute)* S?
7831: */
1.42 daniel 7832: SKIP_BLANKS;
1.91 daniel 7833: GROW;
1.168 daniel 7834:
1.153 daniel 7835: while ((IS_CHAR(RAW)) &&
1.152 daniel 7836: (RAW != '>') &&
7837: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7838: const xmlChar *q = CUR_PTR;
1.91 daniel 7839: int cons = ctxt->input->consumed;
1.29 daniel 7840:
1.72 daniel 7841: attname = xmlParseAttribute(ctxt, &attvalue);
7842: if ((attname != NULL) && (attvalue != NULL)) {
7843: /*
1.98 daniel 7844: * [ WFC: Unique Att Spec ]
7845: * No attribute name may appear more than once in the same
7846: * start-tag or empty-element tag.
1.72 daniel 7847: */
7848: for (i = 0; i < nbatts;i += 2) {
7849: if (!xmlStrcmp(atts[i], attname)) {
7850: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7851: ctxt->sax->error(ctxt->userData,
7852: "Attribute %s redefined\n",
7853: attname);
1.72 daniel 7854: ctxt->wellFormed = 0;
1.180 daniel 7855: ctxt->disableSAX = 1;
1.123 daniel 7856: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7857: xmlFree(attname);
7858: xmlFree(attvalue);
1.98 daniel 7859: goto failed;
1.72 daniel 7860: }
7861: }
7862:
7863: /*
7864: * Add the pair to atts
7865: */
7866: if (atts == NULL) {
7867: maxatts = 10;
1.123 daniel 7868: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7869: if (atts == NULL) {
1.86 daniel 7870: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7871: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7872: return(NULL);
1.72 daniel 7873: }
1.127 daniel 7874: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7875: maxatts *= 2;
1.123 daniel 7876: atts = (const xmlChar **) xmlRealloc(atts,
7877: maxatts * sizeof(xmlChar *));
1.72 daniel 7878: if (atts == NULL) {
1.86 daniel 7879: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7880: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7881: return(NULL);
1.72 daniel 7882: }
7883: }
7884: atts[nbatts++] = attname;
7885: atts[nbatts++] = attvalue;
7886: atts[nbatts] = NULL;
7887: atts[nbatts + 1] = NULL;
1.176 daniel 7888: } else {
7889: if (attname != NULL)
7890: xmlFree(attname);
7891: if (attvalue != NULL)
7892: xmlFree(attvalue);
1.72 daniel 7893: }
7894:
1.116 daniel 7895: failed:
1.168 daniel 7896:
7897: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7898: break;
7899: if (!IS_BLANK(RAW)) {
7900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7901: ctxt->sax->error(ctxt->userData,
7902: "attributes construct error\n");
7903: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7904: ctxt->wellFormed = 0;
1.180 daniel 7905: ctxt->disableSAX = 1;
1.168 daniel 7906: }
1.42 daniel 7907: SKIP_BLANKS;
1.91 daniel 7908: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7910: ctxt->sax->error(ctxt->userData,
1.31 daniel 7911: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7912: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7913: ctxt->wellFormed = 0;
1.180 daniel 7914: ctxt->disableSAX = 1;
1.29 daniel 7915: break;
1.3 veillard 7916: }
1.91 daniel 7917: GROW;
1.3 veillard 7918: }
7919:
1.43 daniel 7920: /*
1.72 daniel 7921: * SAX: Start of Element !
1.43 daniel 7922: */
1.171 daniel 7923: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7924: (!ctxt->disableSAX))
1.74 daniel 7925: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7926:
1.72 daniel 7927: if (atts != NULL) {
1.123 daniel 7928: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7929: xmlFree(atts);
1.72 daniel 7930: }
1.83 daniel 7931: return(name);
1.3 veillard 7932: }
7933:
1.50 daniel 7934: /**
7935: * xmlParseEndTag:
7936: * @ctxt: an XML parser context
7937: *
7938: * parse an end of tag
1.27 daniel 7939: *
7940: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7941: *
7942: * With namespace
7943: *
1.72 daniel 7944: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7945: */
7946:
1.55 daniel 7947: void
1.140 daniel 7948: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7949: xmlChar *name;
1.140 daniel 7950: xmlChar *oldname;
1.7 veillard 7951:
1.91 daniel 7952: GROW;
1.152 daniel 7953: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7954: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7955: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7956: ctxt->wellFormed = 0;
1.180 daniel 7957: ctxt->disableSAX = 1;
1.123 daniel 7958: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7959: return;
7960: }
1.40 daniel 7961: SKIP(2);
1.7 veillard 7962:
1.72 daniel 7963: name = xmlParseName(ctxt);
1.7 veillard 7964:
7965: /*
7966: * We should definitely be at the ending "S? '>'" part
7967: */
1.91 daniel 7968: GROW;
1.42 daniel 7969: SKIP_BLANKS;
1.153 daniel 7970: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7971: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7972: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7973: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7974: ctxt->wellFormed = 0;
1.180 daniel 7975: ctxt->disableSAX = 1;
1.7 veillard 7976: } else
1.40 daniel 7977: NEXT;
1.7 veillard 7978:
1.72 daniel 7979: /*
1.98 daniel 7980: * [ WFC: Element Type Match ]
7981: * The Name in an element's end-tag must match the element type in the
7982: * start-tag.
7983: *
1.83 daniel 7984: */
1.147 daniel 7985: if ((name == NULL) || (ctxt->name == NULL) ||
7986: (xmlStrcmp(name, ctxt->name))) {
7987: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7988: if ((name != NULL) && (ctxt->name != NULL)) {
7989: ctxt->sax->error(ctxt->userData,
7990: "Opening and ending tag mismatch: %s and %s\n",
7991: ctxt->name, name);
7992: } else if (ctxt->name != NULL) {
7993: ctxt->sax->error(ctxt->userData,
7994: "Ending tag eror for: %s\n", ctxt->name);
7995: } else {
7996: ctxt->sax->error(ctxt->userData,
7997: "Ending tag error: internal error ???\n");
7998: }
1.122 daniel 7999:
1.147 daniel 8000: }
1.123 daniel 8001: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 8002: ctxt->wellFormed = 0;
1.180 daniel 8003: ctxt->disableSAX = 1;
1.83 daniel 8004: }
8005:
8006: /*
1.72 daniel 8007: * SAX: End of Tag
8008: */
1.171 daniel 8009: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8010: (!ctxt->disableSAX))
1.74 daniel 8011: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 8012:
8013: if (name != NULL)
1.119 daniel 8014: xmlFree(name);
1.140 daniel 8015: oldname = namePop(ctxt);
1.176 daniel 8016: spacePop(ctxt);
1.140 daniel 8017: if (oldname != NULL) {
8018: #ifdef DEBUG_STACK
8019: fprintf(stderr,"Close: popped %s\n", oldname);
8020: #endif
8021: xmlFree(oldname);
8022: }
1.7 veillard 8023: return;
8024: }
8025:
1.50 daniel 8026: /**
8027: * xmlParseCDSect:
8028: * @ctxt: an XML parser context
8029: *
8030: * Parse escaped pure raw content.
1.29 daniel 8031: *
8032: * [18] CDSect ::= CDStart CData CDEnd
8033: *
8034: * [19] CDStart ::= '<![CDATA['
8035: *
8036: * [20] Data ::= (Char* - (Char* ']]>' Char*))
8037: *
8038: * [21] CDEnd ::= ']]>'
1.3 veillard 8039: */
1.55 daniel 8040: void
8041: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 8042: xmlChar *buf = NULL;
8043: int len = 0;
1.140 daniel 8044: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 8045: int r, rl;
8046: int s, sl;
8047: int cur, l;
1.3 veillard 8048:
1.106 daniel 8049: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 8050: (NXT(2) == '[') && (NXT(3) == 'C') &&
8051: (NXT(4) == 'D') && (NXT(5) == 'A') &&
8052: (NXT(6) == 'T') && (NXT(7) == 'A') &&
8053: (NXT(8) == '[')) {
8054: SKIP(9);
1.29 daniel 8055: } else
1.45 daniel 8056: return;
1.109 daniel 8057:
8058: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 8059: r = CUR_CHAR(rl);
8060: if (!IS_CHAR(r)) {
1.55 daniel 8061: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8062: ctxt->sax->error(ctxt->userData,
1.135 daniel 8063: "CData section not finished\n");
1.59 daniel 8064: ctxt->wellFormed = 0;
1.180 daniel 8065: ctxt->disableSAX = 1;
1.123 daniel 8066: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 8067: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 8068: return;
1.3 veillard 8069: }
1.152 daniel 8070: NEXTL(rl);
8071: s = CUR_CHAR(sl);
8072: if (!IS_CHAR(s)) {
1.55 daniel 8073: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8074: ctxt->sax->error(ctxt->userData,
1.135 daniel 8075: "CData section not finished\n");
1.123 daniel 8076: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 8077: ctxt->wellFormed = 0;
1.180 daniel 8078: ctxt->disableSAX = 1;
1.109 daniel 8079: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 8080: return;
1.3 veillard 8081: }
1.152 daniel 8082: NEXTL(sl);
8083: cur = CUR_CHAR(l);
1.135 daniel 8084: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8085: if (buf == NULL) {
8086: fprintf(stderr, "malloc of %d byte failed\n", size);
8087: return;
8088: }
1.108 veillard 8089: while (IS_CHAR(cur) &&
1.110 daniel 8090: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 8091: if (len + 5 >= size) {
1.135 daniel 8092: size *= 2;
1.204 veillard 8093: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8094: if (buf == NULL) {
8095: fprintf(stderr, "realloc of %d byte failed\n", size);
8096: return;
8097: }
8098: }
1.152 daniel 8099: COPY_BUF(rl,buf,len,r);
1.110 daniel 8100: r = s;
1.152 daniel 8101: rl = sl;
1.110 daniel 8102: s = cur;
1.152 daniel 8103: sl = l;
8104: NEXTL(l);
8105: cur = CUR_CHAR(l);
1.3 veillard 8106: }
1.135 daniel 8107: buf[len] = 0;
1.109 daniel 8108: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 8109: if (cur != '>') {
1.55 daniel 8110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8111: ctxt->sax->error(ctxt->userData,
1.135 daniel 8112: "CData section not finished\n%.50s\n", buf);
1.123 daniel 8113: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 8114: ctxt->wellFormed = 0;
1.180 daniel 8115: ctxt->disableSAX = 1;
1.135 daniel 8116: xmlFree(buf);
1.45 daniel 8117: return;
1.3 veillard 8118: }
1.152 daniel 8119: NEXTL(l);
1.16 daniel 8120:
1.45 daniel 8121: /*
1.135 daniel 8122: * Ok the buffer is to be consumed as cdata.
1.45 daniel 8123: */
1.171 daniel 8124: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 8125: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 8126: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 8127: }
1.135 daniel 8128: xmlFree(buf);
1.2 veillard 8129: }
8130:
1.50 daniel 8131: /**
8132: * xmlParseContent:
8133: * @ctxt: an XML parser context
8134: *
8135: * Parse a content:
1.2 veillard 8136: *
1.27 daniel 8137: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 8138: */
8139:
1.55 daniel 8140: void
8141: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 8142: GROW;
1.176 daniel 8143: while (((RAW != 0) || (ctxt->token != 0)) &&
8144: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 8145: const xmlChar *test = CUR_PTR;
1.91 daniel 8146: int cons = ctxt->input->consumed;
1.123 daniel 8147: xmlChar tok = ctxt->token;
1.27 daniel 8148:
8149: /*
1.152 daniel 8150: * Handle possible processed charrefs.
8151: */
8152: if (ctxt->token != 0) {
8153: xmlParseCharData(ctxt, 0);
8154: }
8155: /*
1.27 daniel 8156: * First case : a Processing Instruction.
8157: */
1.152 daniel 8158: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 8159: xmlParsePI(ctxt);
8160: }
1.72 daniel 8161:
1.27 daniel 8162: /*
8163: * Second case : a CDSection
8164: */
1.152 daniel 8165: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8166: (NXT(2) == '[') && (NXT(3) == 'C') &&
8167: (NXT(4) == 'D') && (NXT(5) == 'A') &&
8168: (NXT(6) == 'T') && (NXT(7) == 'A') &&
8169: (NXT(8) == '[')) {
1.45 daniel 8170: xmlParseCDSect(ctxt);
1.27 daniel 8171: }
1.72 daniel 8172:
1.27 daniel 8173: /*
8174: * Third case : a comment
8175: */
1.152 daniel 8176: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8177: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 8178: xmlParseComment(ctxt);
1.97 daniel 8179: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 8180: }
1.72 daniel 8181:
1.27 daniel 8182: /*
8183: * Fourth case : a sub-element.
8184: */
1.152 daniel 8185: else if (RAW == '<') {
1.72 daniel 8186: xmlParseElement(ctxt);
1.45 daniel 8187: }
1.72 daniel 8188:
1.45 daniel 8189: /*
1.50 daniel 8190: * Fifth case : a reference. If if has not been resolved,
8191: * parsing returns it's Name, create the node
1.45 daniel 8192: */
1.97 daniel 8193:
1.152 daniel 8194: else if (RAW == '&') {
1.77 daniel 8195: xmlParseReference(ctxt);
1.27 daniel 8196: }
1.72 daniel 8197:
1.27 daniel 8198: /*
8199: * Last case, text. Note that References are handled directly.
8200: */
8201: else {
1.45 daniel 8202: xmlParseCharData(ctxt, 0);
1.3 veillard 8203: }
1.14 veillard 8204:
1.91 daniel 8205: GROW;
1.14 veillard 8206: /*
1.45 daniel 8207: * Pop-up of finished entities.
1.14 veillard 8208: */
1.152 daniel 8209: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 8210: xmlPopInput(ctxt);
1.135 daniel 8211: SHRINK;
1.45 daniel 8212:
1.113 daniel 8213: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8214: (tok == ctxt->token)) {
1.55 daniel 8215: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8216: ctxt->sax->error(ctxt->userData,
1.59 daniel 8217: "detected an error in element content\n");
1.123 daniel 8218: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 8219: ctxt->wellFormed = 0;
1.180 daniel 8220: ctxt->disableSAX = 1;
1.29 daniel 8221: break;
8222: }
1.3 veillard 8223: }
1.2 veillard 8224: }
8225:
1.50 daniel 8226: /**
8227: * xmlParseElement:
8228: * @ctxt: an XML parser context
8229: *
8230: * parse an XML element, this is highly recursive
1.26 daniel 8231: *
8232: * [39] element ::= EmptyElemTag | STag content ETag
8233: *
1.98 daniel 8234: * [ WFC: Element Type Match ]
8235: * The Name in an element's end-tag must match the element type in the
8236: * start-tag.
8237: *
8238: * [ VC: Element Valid ]
1.117 daniel 8239: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 8240: * where the Name matches the element type and one of the following holds:
8241: * - The declaration matches EMPTY and the element has no content.
8242: * - The declaration matches children and the sequence of child elements
8243: * belongs to the language generated by the regular expression in the
8244: * content model, with optional white space (characters matching the
8245: * nonterminal S) between each pair of child elements.
8246: * - The declaration matches Mixed and the content consists of character
8247: * data and child elements whose types match names in the content model.
8248: * - The declaration matches ANY, and the types of any child elements have
8249: * been declared.
1.2 veillard 8250: */
1.26 daniel 8251:
1.72 daniel 8252: void
1.69 daniel 8253: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 8254: const xmlChar *openTag = CUR_PTR;
8255: xmlChar *name;
1.140 daniel 8256: xmlChar *oldname;
1.32 daniel 8257: xmlParserNodeInfo node_info;
1.118 daniel 8258: xmlNodePtr ret;
1.2 veillard 8259:
1.32 daniel 8260: /* Capture start position */
1.118 daniel 8261: if (ctxt->record_info) {
8262: node_info.begin_pos = ctxt->input->consumed +
8263: (CUR_PTR - ctxt->input->base);
8264: node_info.begin_line = ctxt->input->line;
8265: }
1.32 daniel 8266:
1.176 daniel 8267: if (ctxt->spaceNr == 0)
8268: spacePush(ctxt, -1);
8269: else
8270: spacePush(ctxt, *ctxt->space);
8271:
1.83 daniel 8272: name = xmlParseStartTag(ctxt);
8273: if (name == NULL) {
1.176 daniel 8274: spacePop(ctxt);
1.83 daniel 8275: return;
8276: }
1.140 daniel 8277: namePush(ctxt, name);
1.118 daniel 8278: ret = ctxt->node;
1.2 veillard 8279:
8280: /*
1.99 daniel 8281: * [ VC: Root Element Type ]
8282: * The Name in the document type declaration must match the element
8283: * type of the root element.
8284: */
1.105 daniel 8285: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 8286: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 8287: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 8288:
8289: /*
1.2 veillard 8290: * Check for an Empty Element.
8291: */
1.152 daniel 8292: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 8293: SKIP(2);
1.171 daniel 8294: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8295: (!ctxt->disableSAX))
1.83 daniel 8296: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 8297: oldname = namePop(ctxt);
1.176 daniel 8298: spacePop(ctxt);
1.140 daniel 8299: if (oldname != NULL) {
8300: #ifdef DEBUG_STACK
8301: fprintf(stderr,"Close: popped %s\n", oldname);
8302: #endif
8303: xmlFree(oldname);
1.211 veillard 8304: }
8305: if ( ret != NULL && ctxt->record_info ) {
8306: node_info.end_pos = ctxt->input->consumed +
8307: (CUR_PTR - ctxt->input->base);
8308: node_info.end_line = ctxt->input->line;
8309: node_info.node = ret;
8310: xmlParserAddNodeInfo(ctxt, &node_info);
1.140 daniel 8311: }
1.72 daniel 8312: return;
1.2 veillard 8313: }
1.152 daniel 8314: if (RAW == '>') {
1.91 daniel 8315: NEXT;
8316: } else {
1.55 daniel 8317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8318: ctxt->sax->error(ctxt->userData,
8319: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 8320: openTag);
1.59 daniel 8321: ctxt->wellFormed = 0;
1.180 daniel 8322: ctxt->disableSAX = 1;
1.123 daniel 8323: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 8324:
8325: /*
8326: * end of parsing of this node.
8327: */
8328: nodePop(ctxt);
1.140 daniel 8329: oldname = namePop(ctxt);
1.176 daniel 8330: spacePop(ctxt);
1.140 daniel 8331: if (oldname != NULL) {
8332: #ifdef DEBUG_STACK
8333: fprintf(stderr,"Close: popped %s\n", oldname);
8334: #endif
8335: xmlFree(oldname);
8336: }
1.118 daniel 8337:
8338: /*
8339: * Capture end position and add node
8340: */
8341: if ( ret != NULL && ctxt->record_info ) {
8342: node_info.end_pos = ctxt->input->consumed +
8343: (CUR_PTR - ctxt->input->base);
8344: node_info.end_line = ctxt->input->line;
8345: node_info.node = ret;
8346: xmlParserAddNodeInfo(ctxt, &node_info);
8347: }
1.72 daniel 8348: return;
1.2 veillard 8349: }
8350:
8351: /*
8352: * Parse the content of the element:
8353: */
1.45 daniel 8354: xmlParseContent(ctxt);
1.153 daniel 8355: if (!IS_CHAR(RAW)) {
1.55 daniel 8356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8357: ctxt->sax->error(ctxt->userData,
1.57 daniel 8358: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 8359: ctxt->wellFormed = 0;
1.180 daniel 8360: ctxt->disableSAX = 1;
1.123 daniel 8361: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 8362:
8363: /*
8364: * end of parsing of this node.
8365: */
8366: nodePop(ctxt);
1.140 daniel 8367: oldname = namePop(ctxt);
1.176 daniel 8368: spacePop(ctxt);
1.140 daniel 8369: if (oldname != NULL) {
8370: #ifdef DEBUG_STACK
8371: fprintf(stderr,"Close: popped %s\n", oldname);
8372: #endif
8373: xmlFree(oldname);
8374: }
1.72 daniel 8375: return;
1.2 veillard 8376: }
8377:
8378: /*
1.27 daniel 8379: * parse the end of tag: '</' should be here.
1.2 veillard 8380: */
1.140 daniel 8381: xmlParseEndTag(ctxt);
1.118 daniel 8382:
8383: /*
8384: * Capture end position and add node
8385: */
8386: if ( ret != NULL && ctxt->record_info ) {
8387: node_info.end_pos = ctxt->input->consumed +
8388: (CUR_PTR - ctxt->input->base);
8389: node_info.end_line = ctxt->input->line;
8390: node_info.node = ret;
8391: xmlParserAddNodeInfo(ctxt, &node_info);
8392: }
1.2 veillard 8393: }
8394:
1.50 daniel 8395: /**
8396: * xmlParseVersionNum:
8397: * @ctxt: an XML parser context
8398: *
8399: * parse the XML version value.
1.29 daniel 8400: *
8401: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 8402: *
8403: * Returns the string giving the XML version number, or NULL
1.29 daniel 8404: */
1.123 daniel 8405: xmlChar *
1.55 daniel 8406: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 8407: xmlChar *buf = NULL;
8408: int len = 0;
8409: int size = 10;
8410: xmlChar cur;
1.29 daniel 8411:
1.135 daniel 8412: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8413: if (buf == NULL) {
8414: fprintf(stderr, "malloc of %d byte failed\n", size);
8415: return(NULL);
8416: }
8417: cur = CUR;
1.152 daniel 8418: while (((cur >= 'a') && (cur <= 'z')) ||
8419: ((cur >= 'A') && (cur <= 'Z')) ||
8420: ((cur >= '0') && (cur <= '9')) ||
8421: (cur == '_') || (cur == '.') ||
8422: (cur == ':') || (cur == '-')) {
1.135 daniel 8423: if (len + 1 >= size) {
8424: size *= 2;
1.204 veillard 8425: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8426: if (buf == NULL) {
8427: fprintf(stderr, "realloc of %d byte failed\n", size);
8428: return(NULL);
8429: }
8430: }
8431: buf[len++] = cur;
8432: NEXT;
8433: cur=CUR;
8434: }
8435: buf[len] = 0;
8436: return(buf);
1.29 daniel 8437: }
8438:
1.50 daniel 8439: /**
8440: * xmlParseVersionInfo:
8441: * @ctxt: an XML parser context
8442: *
8443: * parse the XML version.
1.29 daniel 8444: *
8445: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8446: *
8447: * [25] Eq ::= S? '=' S?
1.50 daniel 8448: *
1.68 daniel 8449: * Returns the version string, e.g. "1.0"
1.29 daniel 8450: */
8451:
1.123 daniel 8452: xmlChar *
1.55 daniel 8453: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 8454: xmlChar *version = NULL;
8455: const xmlChar *q;
1.29 daniel 8456:
1.152 daniel 8457: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 8458: (NXT(2) == 'r') && (NXT(3) == 's') &&
8459: (NXT(4) == 'i') && (NXT(5) == 'o') &&
8460: (NXT(6) == 'n')) {
8461: SKIP(7);
1.42 daniel 8462: SKIP_BLANKS;
1.152 daniel 8463: if (RAW != '=') {
1.55 daniel 8464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8465: ctxt->sax->error(ctxt->userData,
8466: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 8467: ctxt->wellFormed = 0;
1.180 daniel 8468: ctxt->disableSAX = 1;
1.123 daniel 8469: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8470: return(NULL);
8471: }
1.40 daniel 8472: NEXT;
1.42 daniel 8473: SKIP_BLANKS;
1.152 daniel 8474: if (RAW == '"') {
1.40 daniel 8475: NEXT;
8476: q = CUR_PTR;
1.29 daniel 8477: version = xmlParseVersionNum(ctxt);
1.152 daniel 8478: if (RAW != '"') {
1.55 daniel 8479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8480: ctxt->sax->error(ctxt->userData,
8481: "String not closed\n%.50s\n", q);
1.59 daniel 8482: ctxt->wellFormed = 0;
1.180 daniel 8483: ctxt->disableSAX = 1;
1.123 daniel 8484: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8485: } else
1.40 daniel 8486: NEXT;
1.152 daniel 8487: } else if (RAW == '\''){
1.40 daniel 8488: NEXT;
8489: q = CUR_PTR;
1.29 daniel 8490: version = xmlParseVersionNum(ctxt);
1.152 daniel 8491: if (RAW != '\'') {
1.55 daniel 8492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8493: ctxt->sax->error(ctxt->userData,
8494: "String not closed\n%.50s\n", q);
1.123 daniel 8495: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8496: ctxt->wellFormed = 0;
1.180 daniel 8497: ctxt->disableSAX = 1;
1.55 daniel 8498: } else
1.40 daniel 8499: NEXT;
1.31 daniel 8500: } else {
1.55 daniel 8501: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8502: ctxt->sax->error(ctxt->userData,
1.59 daniel 8503: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 8504: ctxt->wellFormed = 0;
1.180 daniel 8505: ctxt->disableSAX = 1;
1.123 daniel 8506: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8507: }
8508: }
8509: return(version);
8510: }
8511:
1.50 daniel 8512: /**
8513: * xmlParseEncName:
8514: * @ctxt: an XML parser context
8515: *
8516: * parse the XML encoding name
1.29 daniel 8517: *
8518: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 8519: *
1.68 daniel 8520: * Returns the encoding name value or NULL
1.29 daniel 8521: */
1.123 daniel 8522: xmlChar *
1.55 daniel 8523: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 8524: xmlChar *buf = NULL;
8525: int len = 0;
8526: int size = 10;
8527: xmlChar cur;
1.29 daniel 8528:
1.135 daniel 8529: cur = CUR;
8530: if (((cur >= 'a') && (cur <= 'z')) ||
8531: ((cur >= 'A') && (cur <= 'Z'))) {
8532: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8533: if (buf == NULL) {
8534: fprintf(stderr, "malloc of %d byte failed\n", size);
8535: return(NULL);
8536: }
8537:
8538: buf[len++] = cur;
1.40 daniel 8539: NEXT;
1.135 daniel 8540: cur = CUR;
1.152 daniel 8541: while (((cur >= 'a') && (cur <= 'z')) ||
8542: ((cur >= 'A') && (cur <= 'Z')) ||
8543: ((cur >= '0') && (cur <= '9')) ||
8544: (cur == '.') || (cur == '_') ||
8545: (cur == '-')) {
1.135 daniel 8546: if (len + 1 >= size) {
8547: size *= 2;
1.204 veillard 8548: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8549: if (buf == NULL) {
8550: fprintf(stderr, "realloc of %d byte failed\n", size);
8551: return(NULL);
8552: }
8553: }
8554: buf[len++] = cur;
8555: NEXT;
8556: cur = CUR;
8557: if (cur == 0) {
8558: SHRINK;
8559: GROW;
8560: cur = CUR;
8561: }
8562: }
8563: buf[len] = 0;
1.29 daniel 8564: } else {
1.55 daniel 8565: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8566: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 8567: ctxt->wellFormed = 0;
1.180 daniel 8568: ctxt->disableSAX = 1;
1.123 daniel 8569: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 8570: }
1.135 daniel 8571: return(buf);
1.29 daniel 8572: }
8573:
1.50 daniel 8574: /**
8575: * xmlParseEncodingDecl:
8576: * @ctxt: an XML parser context
8577: *
8578: * parse the XML encoding declaration
1.29 daniel 8579: *
8580: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 8581: *
8582: * TODO: this should setup the conversion filters.
8583: *
1.68 daniel 8584: * Returns the encoding value or NULL
1.29 daniel 8585: */
8586:
1.123 daniel 8587: xmlChar *
1.55 daniel 8588: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8589: xmlChar *encoding = NULL;
8590: const xmlChar *q;
1.29 daniel 8591:
1.42 daniel 8592: SKIP_BLANKS;
1.152 daniel 8593: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 8594: (NXT(2) == 'c') && (NXT(3) == 'o') &&
8595: (NXT(4) == 'd') && (NXT(5) == 'i') &&
8596: (NXT(6) == 'n') && (NXT(7) == 'g')) {
8597: SKIP(8);
1.42 daniel 8598: SKIP_BLANKS;
1.152 daniel 8599: if (RAW != '=') {
1.55 daniel 8600: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8601: ctxt->sax->error(ctxt->userData,
8602: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 8603: ctxt->wellFormed = 0;
1.180 daniel 8604: ctxt->disableSAX = 1;
1.123 daniel 8605: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8606: return(NULL);
8607: }
1.40 daniel 8608: NEXT;
1.42 daniel 8609: SKIP_BLANKS;
1.152 daniel 8610: if (RAW == '"') {
1.40 daniel 8611: NEXT;
8612: q = CUR_PTR;
1.29 daniel 8613: encoding = xmlParseEncName(ctxt);
1.152 daniel 8614: if (RAW != '"') {
1.55 daniel 8615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8616: ctxt->sax->error(ctxt->userData,
8617: "String not closed\n%.50s\n", q);
1.59 daniel 8618: ctxt->wellFormed = 0;
1.180 daniel 8619: ctxt->disableSAX = 1;
1.123 daniel 8620: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8621: } else
1.40 daniel 8622: NEXT;
1.152 daniel 8623: } else if (RAW == '\''){
1.40 daniel 8624: NEXT;
8625: q = CUR_PTR;
1.29 daniel 8626: encoding = xmlParseEncName(ctxt);
1.152 daniel 8627: if (RAW != '\'') {
1.55 daniel 8628: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8629: ctxt->sax->error(ctxt->userData,
8630: "String not closed\n%.50s\n", q);
1.59 daniel 8631: ctxt->wellFormed = 0;
1.180 daniel 8632: ctxt->disableSAX = 1;
1.123 daniel 8633: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8634: } else
1.40 daniel 8635: NEXT;
1.152 daniel 8636: } else if (RAW == '"'){
1.55 daniel 8637: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8638: ctxt->sax->error(ctxt->userData,
1.59 daniel 8639: "xmlParseEncodingDecl : expected ' or \"\n");
8640: ctxt->wellFormed = 0;
1.180 daniel 8641: ctxt->disableSAX = 1;
1.123 daniel 8642: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8643: }
1.193 daniel 8644: if (encoding != NULL) {
8645: xmlCharEncoding enc;
8646: xmlCharEncodingHandlerPtr handler;
8647:
1.195 daniel 8648: if (ctxt->input->encoding != NULL)
8649: xmlFree((xmlChar *) ctxt->input->encoding);
8650: ctxt->input->encoding = encoding;
8651:
1.193 daniel 8652: enc = xmlParseCharEncoding((const char *) encoding);
8653: /*
8654: * registered set of known encodings
8655: */
8656: if (enc != XML_CHAR_ENCODING_ERROR) {
8657: xmlSwitchEncoding(ctxt, enc);
8658: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8659: xmlFree(encoding);
8660: return(NULL);
8661: }
8662: } else {
8663: /*
8664: * fallback for unknown encodings
8665: */
8666: handler = xmlFindCharEncodingHandler((const char *) encoding);
8667: if (handler != NULL) {
8668: xmlSwitchToEncoding(ctxt, handler);
8669: } else {
8670: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.208 veillard 8671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8672: ctxt->sax->error(ctxt->userData,
8673: "Unsupported encoding %s\n", encoding);
1.193 daniel 8674: return(NULL);
8675: }
8676: }
8677: }
1.29 daniel 8678: }
8679: return(encoding);
8680: }
8681:
1.50 daniel 8682: /**
8683: * xmlParseSDDecl:
8684: * @ctxt: an XML parser context
8685: *
8686: * parse the XML standalone declaration
1.29 daniel 8687: *
8688: * [32] SDDecl ::= S 'standalone' Eq
8689: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8690: *
8691: * [ VC: Standalone Document Declaration ]
8692: * TODO The standalone document declaration must have the value "no"
8693: * if any external markup declarations contain declarations of:
8694: * - attributes with default values, if elements to which these
8695: * attributes apply appear in the document without specifications
8696: * of values for these attributes, or
8697: * - entities (other than amp, lt, gt, apos, quot), if references
8698: * to those entities appear in the document, or
8699: * - attributes with values subject to normalization, where the
8700: * attribute appears in the document with a value which will change
8701: * as a result of normalization, or
8702: * - element types with element content, if white space occurs directly
8703: * within any instance of those types.
1.68 daniel 8704: *
8705: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8706: */
8707:
1.55 daniel 8708: int
8709: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8710: int standalone = -1;
8711:
1.42 daniel 8712: SKIP_BLANKS;
1.152 daniel 8713: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8714: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8715: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8716: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8717: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8718: SKIP(10);
1.81 daniel 8719: SKIP_BLANKS;
1.152 daniel 8720: if (RAW != '=') {
1.55 daniel 8721: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8722: ctxt->sax->error(ctxt->userData,
1.59 daniel 8723: "XML standalone declaration : expected '='\n");
1.123 daniel 8724: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8725: ctxt->wellFormed = 0;
1.180 daniel 8726: ctxt->disableSAX = 1;
1.32 daniel 8727: return(standalone);
8728: }
1.40 daniel 8729: NEXT;
1.42 daniel 8730: SKIP_BLANKS;
1.152 daniel 8731: if (RAW == '\''){
1.40 daniel 8732: NEXT;
1.152 daniel 8733: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8734: standalone = 0;
1.40 daniel 8735: SKIP(2);
1.152 daniel 8736: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8737: (NXT(2) == 's')) {
1.29 daniel 8738: standalone = 1;
1.40 daniel 8739: SKIP(3);
1.29 daniel 8740: } else {
1.55 daniel 8741: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8742: ctxt->sax->error(ctxt->userData,
8743: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8744: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8745: ctxt->wellFormed = 0;
1.180 daniel 8746: ctxt->disableSAX = 1;
1.29 daniel 8747: }
1.152 daniel 8748: if (RAW != '\'') {
1.55 daniel 8749: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8750: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8751: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8752: ctxt->wellFormed = 0;
1.180 daniel 8753: ctxt->disableSAX = 1;
1.55 daniel 8754: } else
1.40 daniel 8755: NEXT;
1.152 daniel 8756: } else if (RAW == '"'){
1.40 daniel 8757: NEXT;
1.152 daniel 8758: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8759: standalone = 0;
1.40 daniel 8760: SKIP(2);
1.152 daniel 8761: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8762: (NXT(2) == 's')) {
1.29 daniel 8763: standalone = 1;
1.40 daniel 8764: SKIP(3);
1.29 daniel 8765: } else {
1.55 daniel 8766: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8767: ctxt->sax->error(ctxt->userData,
1.59 daniel 8768: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8769: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8770: ctxt->wellFormed = 0;
1.180 daniel 8771: ctxt->disableSAX = 1;
1.29 daniel 8772: }
1.152 daniel 8773: if (RAW != '"') {
1.55 daniel 8774: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8775: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8776: ctxt->wellFormed = 0;
1.180 daniel 8777: ctxt->disableSAX = 1;
1.123 daniel 8778: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8779: } else
1.40 daniel 8780: NEXT;
1.37 daniel 8781: } else {
1.55 daniel 8782: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8783: ctxt->sax->error(ctxt->userData,
8784: "Standalone value not found\n");
1.59 daniel 8785: ctxt->wellFormed = 0;
1.180 daniel 8786: ctxt->disableSAX = 1;
1.123 daniel 8787: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8788: }
1.29 daniel 8789: }
8790: return(standalone);
8791: }
8792:
1.50 daniel 8793: /**
8794: * xmlParseXMLDecl:
8795: * @ctxt: an XML parser context
8796: *
8797: * parse an XML declaration header
1.29 daniel 8798: *
8799: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8800: */
8801:
1.55 daniel 8802: void
8803: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8804: xmlChar *version;
1.1 veillard 8805:
8806: /*
1.19 daniel 8807: * We know that '<?xml' is here.
1.1 veillard 8808: */
1.40 daniel 8809: SKIP(5);
1.1 veillard 8810:
1.153 daniel 8811: if (!IS_BLANK(RAW)) {
1.59 daniel 8812: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8813: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8814: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8815: ctxt->wellFormed = 0;
1.180 daniel 8816: ctxt->disableSAX = 1;
1.59 daniel 8817: }
1.42 daniel 8818: SKIP_BLANKS;
1.1 veillard 8819:
8820: /*
1.29 daniel 8821: * We should have the VersionInfo here.
1.1 veillard 8822: */
1.29 daniel 8823: version = xmlParseVersionInfo(ctxt);
8824: if (version == NULL)
1.45 daniel 8825: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8826: ctxt->version = xmlStrdup(version);
1.119 daniel 8827: xmlFree(version);
1.29 daniel 8828:
8829: /*
8830: * We may have the encoding declaration
8831: */
1.153 daniel 8832: if (!IS_BLANK(RAW)) {
1.152 daniel 8833: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8834: SKIP(2);
8835: return;
8836: }
8837: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8838: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8839: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8840: ctxt->wellFormed = 0;
1.180 daniel 8841: ctxt->disableSAX = 1;
1.59 daniel 8842: }
1.195 daniel 8843: xmlParseEncodingDecl(ctxt);
1.193 daniel 8844: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8845: /*
8846: * The XML REC instructs us to stop parsing right here
8847: */
8848: return;
8849: }
1.1 veillard 8850:
8851: /*
1.29 daniel 8852: * We may have the standalone status.
1.1 veillard 8853: */
1.164 daniel 8854: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8855: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8856: SKIP(2);
8857: return;
8858: }
8859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8860: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8861: ctxt->wellFormed = 0;
1.180 daniel 8862: ctxt->disableSAX = 1;
1.123 daniel 8863: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8864: }
8865: SKIP_BLANKS;
1.167 daniel 8866: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8867:
1.42 daniel 8868: SKIP_BLANKS;
1.152 daniel 8869: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8870: SKIP(2);
1.152 daniel 8871: } else if (RAW == '>') {
1.31 daniel 8872: /* Deprecated old WD ... */
1.55 daniel 8873: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8874: ctxt->sax->error(ctxt->userData,
8875: "XML declaration must end-up with '?>'\n");
1.59 daniel 8876: ctxt->wellFormed = 0;
1.180 daniel 8877: ctxt->disableSAX = 1;
1.123 daniel 8878: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8879: NEXT;
1.29 daniel 8880: } else {
1.55 daniel 8881: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8882: ctxt->sax->error(ctxt->userData,
8883: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8884: ctxt->wellFormed = 0;
1.180 daniel 8885: ctxt->disableSAX = 1;
1.123 daniel 8886: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8887: MOVETO_ENDTAG(CUR_PTR);
8888: NEXT;
1.29 daniel 8889: }
1.1 veillard 8890: }
8891:
1.50 daniel 8892: /**
8893: * xmlParseMisc:
8894: * @ctxt: an XML parser context
8895: *
8896: * parse an XML Misc* optionnal field.
1.21 daniel 8897: *
1.22 daniel 8898: * [27] Misc ::= Comment | PI | S
1.1 veillard 8899: */
8900:
1.55 daniel 8901: void
8902: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8903: while (((RAW == '<') && (NXT(1) == '?')) ||
8904: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8905: (NXT(2) == '-') && (NXT(3) == '-')) ||
8906: IS_BLANK(CUR)) {
1.152 daniel 8907: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8908: xmlParsePI(ctxt);
1.40 daniel 8909: } else if (IS_BLANK(CUR)) {
8910: NEXT;
1.1 veillard 8911: } else
1.114 daniel 8912: xmlParseComment(ctxt);
1.1 veillard 8913: }
8914: }
8915:
1.50 daniel 8916: /**
1.181 daniel 8917: * xmlParseDocument:
1.50 daniel 8918: * @ctxt: an XML parser context
8919: *
8920: * parse an XML document (and build a tree if using the standard SAX
8921: * interface).
1.21 daniel 8922: *
1.22 daniel 8923: * [1] document ::= prolog element Misc*
1.29 daniel 8924: *
8925: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8926: *
1.68 daniel 8927: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8928: * as a result of the parsing.
1.1 veillard 8929: */
8930:
1.55 daniel 8931: int
8932: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8933: xmlChar start[4];
8934: xmlCharEncoding enc;
8935:
1.45 daniel 8936: xmlDefaultSAXHandlerInit();
8937:
1.91 daniel 8938: GROW;
8939:
1.14 veillard 8940: /*
1.44 daniel 8941: * SAX: beginning of the document processing.
8942: */
1.72 daniel 8943: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8944: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8945:
1.156 daniel 8946: /*
8947: * Get the 4 first bytes and decode the charset
8948: * if enc != XML_CHAR_ENCODING_NONE
8949: * plug some encoding conversion routines.
8950: */
8951: start[0] = RAW;
8952: start[1] = NXT(1);
8953: start[2] = NXT(2);
8954: start[3] = NXT(3);
8955: enc = xmlDetectCharEncoding(start, 4);
8956: if (enc != XML_CHAR_ENCODING_NONE) {
8957: xmlSwitchEncoding(ctxt, enc);
8958: }
8959:
1.1 veillard 8960:
1.59 daniel 8961: if (CUR == 0) {
8962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8963: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8964: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8965: ctxt->wellFormed = 0;
1.180 daniel 8966: ctxt->disableSAX = 1;
1.59 daniel 8967: }
1.1 veillard 8968:
8969: /*
8970: * Check for the XMLDecl in the Prolog.
8971: */
1.91 daniel 8972: GROW;
1.152 daniel 8973: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8974: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8975: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 8976:
8977: /*
8978: * Note that we will switch encoding on the fly.
8979: */
1.19 daniel 8980: xmlParseXMLDecl(ctxt);
1.193 daniel 8981: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8982: /*
8983: * The XML REC instructs us to stop parsing right here
8984: */
8985: return(-1);
8986: }
1.167 daniel 8987: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8988: SKIP_BLANKS;
1.1 veillard 8989: } else {
1.72 daniel 8990: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8991: }
1.171 daniel 8992: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8993: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8994:
8995: /*
8996: * The Misc part of the Prolog
8997: */
1.91 daniel 8998: GROW;
1.16 daniel 8999: xmlParseMisc(ctxt);
1.1 veillard 9000:
9001: /*
1.29 daniel 9002: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 9003: * (doctypedecl Misc*)?
9004: */
1.91 daniel 9005: GROW;
1.152 daniel 9006: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 9007: (NXT(2) == 'D') && (NXT(3) == 'O') &&
9008: (NXT(4) == 'C') && (NXT(5) == 'T') &&
9009: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
9010: (NXT(8) == 'E')) {
1.165 daniel 9011:
1.166 daniel 9012: ctxt->inSubset = 1;
1.22 daniel 9013: xmlParseDocTypeDecl(ctxt);
1.152 daniel 9014: if (RAW == '[') {
1.140 daniel 9015: ctxt->instate = XML_PARSER_DTD;
9016: xmlParseInternalSubset(ctxt);
9017: }
1.165 daniel 9018:
9019: /*
9020: * Create and update the external subset.
9021: */
1.166 daniel 9022: ctxt->inSubset = 2;
1.171 daniel 9023: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9024: (!ctxt->disableSAX))
1.165 daniel 9025: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9026: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 9027: ctxt->inSubset = 0;
1.165 daniel 9028:
9029:
1.96 daniel 9030: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 9031: xmlParseMisc(ctxt);
1.21 daniel 9032: }
9033:
9034: /*
9035: * Time to start parsing the tree itself
1.1 veillard 9036: */
1.91 daniel 9037: GROW;
1.152 daniel 9038: if (RAW != '<') {
1.59 daniel 9039: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 9040: ctxt->sax->error(ctxt->userData,
1.151 daniel 9041: "Start tag expected, '<' not found\n");
1.140 daniel 9042: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 9043: ctxt->wellFormed = 0;
1.180 daniel 9044: ctxt->disableSAX = 1;
1.140 daniel 9045: ctxt->instate = XML_PARSER_EOF;
9046: } else {
9047: ctxt->instate = XML_PARSER_CONTENT;
9048: xmlParseElement(ctxt);
9049: ctxt->instate = XML_PARSER_EPILOG;
9050:
9051:
9052: /*
9053: * The Misc part at the end
9054: */
9055: xmlParseMisc(ctxt);
9056:
1.152 daniel 9057: if (RAW != 0) {
1.140 daniel 9058: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9059: ctxt->sax->error(ctxt->userData,
9060: "Extra content at the end of the document\n");
9061: ctxt->wellFormed = 0;
1.180 daniel 9062: ctxt->disableSAX = 1;
1.140 daniel 9063: ctxt->errNo = XML_ERR_DOCUMENT_END;
9064: }
9065: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 9066: }
9067:
1.44 daniel 9068: /*
9069: * SAX: end of the document processing.
9070: */
1.171 daniel 9071: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9072: (!ctxt->disableSAX))
1.74 daniel 9073: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 9074:
1.59 daniel 9075: if (! ctxt->wellFormed) return(-1);
1.16 daniel 9076: return(0);
9077: }
9078:
1.98 daniel 9079: /************************************************************************
9080: * *
1.128 daniel 9081: * Progressive parsing interfaces *
9082: * *
9083: ************************************************************************/
9084:
9085: /**
9086: * xmlParseLookupSequence:
9087: * @ctxt: an XML parser context
9088: * @first: the first char to lookup
1.140 daniel 9089: * @next: the next char to lookup or zero
9090: * @third: the next char to lookup or zero
1.128 daniel 9091: *
1.140 daniel 9092: * Try to find if a sequence (first, next, third) or just (first next) or
9093: * (first) is available in the input stream.
9094: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9095: * to avoid rescanning sequences of bytes, it DOES change the state of the
9096: * parser, do not use liberally.
1.128 daniel 9097: *
1.140 daniel 9098: * Returns the index to the current parsing point if the full sequence
9099: * is available, -1 otherwise.
1.128 daniel 9100: */
9101: int
1.140 daniel 9102: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9103: xmlChar next, xmlChar third) {
9104: int base, len;
9105: xmlParserInputPtr in;
9106: const xmlChar *buf;
9107:
9108: in = ctxt->input;
9109: if (in == NULL) return(-1);
9110: base = in->cur - in->base;
9111: if (base < 0) return(-1);
9112: if (ctxt->checkIndex > base)
9113: base = ctxt->checkIndex;
9114: if (in->buf == NULL) {
9115: buf = in->base;
9116: len = in->length;
9117: } else {
9118: buf = in->buf->buffer->content;
9119: len = in->buf->buffer->use;
9120: }
9121: /* take into account the sequence length */
9122: if (third) len -= 2;
9123: else if (next) len --;
9124: for (;base < len;base++) {
9125: if (buf[base] == first) {
9126: if (third != 0) {
9127: if ((buf[base + 1] != next) ||
9128: (buf[base + 2] != third)) continue;
9129: } else if (next != 0) {
9130: if (buf[base + 1] != next) continue;
9131: }
9132: ctxt->checkIndex = 0;
9133: #ifdef DEBUG_PUSH
9134: if (next == 0)
9135: fprintf(stderr, "PP: lookup '%c' found at %d\n",
9136: first, base);
9137: else if (third == 0)
9138: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
9139: first, next, base);
9140: else
9141: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
9142: first, next, third, base);
9143: #endif
9144: return(base - (in->cur - in->base));
9145: }
9146: }
9147: ctxt->checkIndex = base;
9148: #ifdef DEBUG_PUSH
9149: if (next == 0)
9150: fprintf(stderr, "PP: lookup '%c' failed\n", first);
9151: else if (third == 0)
9152: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
9153: else
9154: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
9155: #endif
9156: return(-1);
1.128 daniel 9157: }
9158:
9159: /**
1.143 daniel 9160: * xmlParseTryOrFinish:
1.128 daniel 9161: * @ctxt: an XML parser context
1.143 daniel 9162: * @terminate: last chunk indicator
1.128 daniel 9163: *
9164: * Try to progress on parsing
9165: *
9166: * Returns zero if no parsing was possible
9167: */
9168: int
1.143 daniel 9169: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 9170: int ret = 0;
1.140 daniel 9171: int avail;
9172: xmlChar cur, next;
9173:
9174: #ifdef DEBUG_PUSH
9175: switch (ctxt->instate) {
9176: case XML_PARSER_EOF:
9177: fprintf(stderr, "PP: try EOF\n"); break;
9178: case XML_PARSER_START:
9179: fprintf(stderr, "PP: try START\n"); break;
9180: case XML_PARSER_MISC:
9181: fprintf(stderr, "PP: try MISC\n");break;
9182: case XML_PARSER_COMMENT:
9183: fprintf(stderr, "PP: try COMMENT\n");break;
9184: case XML_PARSER_PROLOG:
9185: fprintf(stderr, "PP: try PROLOG\n");break;
9186: case XML_PARSER_START_TAG:
9187: fprintf(stderr, "PP: try START_TAG\n");break;
9188: case XML_PARSER_CONTENT:
9189: fprintf(stderr, "PP: try CONTENT\n");break;
9190: case XML_PARSER_CDATA_SECTION:
9191: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
9192: case XML_PARSER_END_TAG:
9193: fprintf(stderr, "PP: try END_TAG\n");break;
9194: case XML_PARSER_ENTITY_DECL:
9195: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
9196: case XML_PARSER_ENTITY_VALUE:
9197: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
9198: case XML_PARSER_ATTRIBUTE_VALUE:
9199: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
9200: case XML_PARSER_DTD:
9201: fprintf(stderr, "PP: try DTD\n");break;
9202: case XML_PARSER_EPILOG:
9203: fprintf(stderr, "PP: try EPILOG\n");break;
9204: case XML_PARSER_PI:
9205: fprintf(stderr, "PP: try PI\n");break;
9206: }
9207: #endif
1.128 daniel 9208:
9209: while (1) {
1.140 daniel 9210: /*
9211: * Pop-up of finished entities.
9212: */
1.152 daniel 9213: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9214: xmlPopInput(ctxt);
9215:
1.184 daniel 9216: if (ctxt->input ==NULL) break;
9217: if (ctxt->input->buf == NULL)
9218: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9219: else
1.184 daniel 9220: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9221: if (avail < 1)
9222: goto done;
1.128 daniel 9223: switch (ctxt->instate) {
9224: case XML_PARSER_EOF:
1.140 daniel 9225: /*
9226: * Document parsing is done !
9227: */
9228: goto done;
9229: case XML_PARSER_START:
9230: /*
9231: * Very first chars read from the document flow.
9232: */
1.184 daniel 9233: cur = ctxt->input->cur[0];
1.140 daniel 9234: if (IS_BLANK(cur)) {
9235: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9236: ctxt->sax->setDocumentLocator(ctxt->userData,
9237: &xmlDefaultSAXLocator);
9238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9239: ctxt->sax->error(ctxt->userData,
9240: "Extra spaces at the beginning of the document are not allowed\n");
9241: ctxt->errNo = XML_ERR_DOCUMENT_START;
9242: ctxt->wellFormed = 0;
1.180 daniel 9243: ctxt->disableSAX = 1;
1.140 daniel 9244: SKIP_BLANKS;
9245: ret++;
1.184 daniel 9246: if (ctxt->input->buf == NULL)
9247: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9248: else
1.184 daniel 9249: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9250: }
9251: if (avail < 2)
9252: goto done;
9253:
1.184 daniel 9254: cur = ctxt->input->cur[0];
9255: next = ctxt->input->cur[1];
1.140 daniel 9256: if (cur == 0) {
9257: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9258: ctxt->sax->setDocumentLocator(ctxt->userData,
9259: &xmlDefaultSAXLocator);
9260: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9261: ctxt->sax->error(ctxt->userData, "Document is empty\n");
9262: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9263: ctxt->wellFormed = 0;
1.180 daniel 9264: ctxt->disableSAX = 1;
1.140 daniel 9265: ctxt->instate = XML_PARSER_EOF;
9266: #ifdef DEBUG_PUSH
9267: fprintf(stderr, "PP: entering EOF\n");
9268: #endif
9269: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9270: ctxt->sax->endDocument(ctxt->userData);
9271: goto done;
9272: }
9273: if ((cur == '<') && (next == '?')) {
9274: /* PI or XML decl */
9275: if (avail < 5) return(ret);
1.143 daniel 9276: if ((!terminate) &&
9277: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9278: return(ret);
9279: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9280: ctxt->sax->setDocumentLocator(ctxt->userData,
9281: &xmlDefaultSAXLocator);
1.184 daniel 9282: if ((ctxt->input->cur[2] == 'x') &&
9283: (ctxt->input->cur[3] == 'm') &&
9284: (ctxt->input->cur[4] == 'l') &&
9285: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 9286: ret += 5;
9287: #ifdef DEBUG_PUSH
9288: fprintf(stderr, "PP: Parsing XML Decl\n");
9289: #endif
9290: xmlParseXMLDecl(ctxt);
1.193 daniel 9291: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9292: /*
9293: * The XML REC instructs us to stop parsing right
9294: * here
9295: */
9296: ctxt->instate = XML_PARSER_EOF;
9297: return(0);
9298: }
1.167 daniel 9299: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 9300: if ((ctxt->encoding == NULL) &&
9301: (ctxt->input->encoding != NULL))
9302: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 9303: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9304: (!ctxt->disableSAX))
1.140 daniel 9305: ctxt->sax->startDocument(ctxt->userData);
9306: ctxt->instate = XML_PARSER_MISC;
9307: #ifdef DEBUG_PUSH
9308: fprintf(stderr, "PP: entering MISC\n");
9309: #endif
9310: } else {
9311: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 9312: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9313: (!ctxt->disableSAX))
1.140 daniel 9314: ctxt->sax->startDocument(ctxt->userData);
9315: ctxt->instate = XML_PARSER_MISC;
9316: #ifdef DEBUG_PUSH
9317: fprintf(stderr, "PP: entering MISC\n");
9318: #endif
9319: }
9320: } else {
9321: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9322: ctxt->sax->setDocumentLocator(ctxt->userData,
9323: &xmlDefaultSAXLocator);
9324: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 9325: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9326: (!ctxt->disableSAX))
1.140 daniel 9327: ctxt->sax->startDocument(ctxt->userData);
9328: ctxt->instate = XML_PARSER_MISC;
9329: #ifdef DEBUG_PUSH
9330: fprintf(stderr, "PP: entering MISC\n");
9331: #endif
9332: }
9333: break;
9334: case XML_PARSER_MISC:
9335: SKIP_BLANKS;
1.184 daniel 9336: if (ctxt->input->buf == NULL)
9337: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9338: else
1.184 daniel 9339: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9340: if (avail < 2)
9341: goto done;
1.184 daniel 9342: cur = ctxt->input->cur[0];
9343: next = ctxt->input->cur[1];
1.140 daniel 9344: if ((cur == '<') && (next == '?')) {
1.143 daniel 9345: if ((!terminate) &&
9346: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9347: goto done;
9348: #ifdef DEBUG_PUSH
9349: fprintf(stderr, "PP: Parsing PI\n");
9350: #endif
9351: xmlParsePI(ctxt);
9352: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9353: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9354: if ((!terminate) &&
9355: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9356: goto done;
9357: #ifdef DEBUG_PUSH
9358: fprintf(stderr, "PP: Parsing Comment\n");
9359: #endif
9360: xmlParseComment(ctxt);
9361: ctxt->instate = XML_PARSER_MISC;
9362: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9363: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
9364: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
9365: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
9366: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 9367: if ((!terminate) &&
9368: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9369: goto done;
9370: #ifdef DEBUG_PUSH
9371: fprintf(stderr, "PP: Parsing internal subset\n");
9372: #endif
1.166 daniel 9373: ctxt->inSubset = 1;
1.140 daniel 9374: xmlParseDocTypeDecl(ctxt);
1.152 daniel 9375: if (RAW == '[') {
1.140 daniel 9376: ctxt->instate = XML_PARSER_DTD;
9377: #ifdef DEBUG_PUSH
9378: fprintf(stderr, "PP: entering DTD\n");
9379: #endif
9380: } else {
1.166 daniel 9381: /*
9382: * Create and update the external subset.
9383: */
9384: ctxt->inSubset = 2;
1.171 daniel 9385: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9386: (ctxt->sax->externalSubset != NULL))
9387: ctxt->sax->externalSubset(ctxt->userData,
9388: ctxt->intSubName, ctxt->extSubSystem,
9389: ctxt->extSubURI);
9390: ctxt->inSubset = 0;
1.140 daniel 9391: ctxt->instate = XML_PARSER_PROLOG;
9392: #ifdef DEBUG_PUSH
9393: fprintf(stderr, "PP: entering PROLOG\n");
9394: #endif
9395: }
9396: } else if ((cur == '<') && (next == '!') &&
9397: (avail < 9)) {
9398: goto done;
9399: } else {
9400: ctxt->instate = XML_PARSER_START_TAG;
9401: #ifdef DEBUG_PUSH
9402: fprintf(stderr, "PP: entering START_TAG\n");
9403: #endif
9404: }
9405: break;
1.128 daniel 9406: case XML_PARSER_PROLOG:
1.140 daniel 9407: SKIP_BLANKS;
1.184 daniel 9408: if (ctxt->input->buf == NULL)
9409: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9410: else
1.184 daniel 9411: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9412: if (avail < 2)
9413: goto done;
1.184 daniel 9414: cur = ctxt->input->cur[0];
9415: next = ctxt->input->cur[1];
1.140 daniel 9416: if ((cur == '<') && (next == '?')) {
1.143 daniel 9417: if ((!terminate) &&
9418: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9419: goto done;
9420: #ifdef DEBUG_PUSH
9421: fprintf(stderr, "PP: Parsing PI\n");
9422: #endif
9423: xmlParsePI(ctxt);
9424: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9425: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9426: if ((!terminate) &&
9427: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9428: goto done;
9429: #ifdef DEBUG_PUSH
9430: fprintf(stderr, "PP: Parsing Comment\n");
9431: #endif
9432: xmlParseComment(ctxt);
9433: ctxt->instate = XML_PARSER_PROLOG;
9434: } else if ((cur == '<') && (next == '!') &&
9435: (avail < 4)) {
9436: goto done;
9437: } else {
9438: ctxt->instate = XML_PARSER_START_TAG;
9439: #ifdef DEBUG_PUSH
9440: fprintf(stderr, "PP: entering START_TAG\n");
9441: #endif
9442: }
9443: break;
9444: case XML_PARSER_EPILOG:
9445: SKIP_BLANKS;
1.184 daniel 9446: if (ctxt->input->buf == NULL)
9447: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9448: else
1.184 daniel 9449: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9450: if (avail < 2)
9451: goto done;
1.184 daniel 9452: cur = ctxt->input->cur[0];
9453: next = ctxt->input->cur[1];
1.140 daniel 9454: if ((cur == '<') && (next == '?')) {
1.143 daniel 9455: if ((!terminate) &&
9456: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9457: goto done;
9458: #ifdef DEBUG_PUSH
9459: fprintf(stderr, "PP: Parsing PI\n");
9460: #endif
9461: xmlParsePI(ctxt);
9462: ctxt->instate = XML_PARSER_EPILOG;
9463: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9464: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9465: if ((!terminate) &&
9466: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9467: goto done;
9468: #ifdef DEBUG_PUSH
9469: fprintf(stderr, "PP: Parsing Comment\n");
9470: #endif
9471: xmlParseComment(ctxt);
9472: ctxt->instate = XML_PARSER_EPILOG;
9473: } else if ((cur == '<') && (next == '!') &&
9474: (avail < 4)) {
9475: goto done;
9476: } else {
9477: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9478: ctxt->sax->error(ctxt->userData,
9479: "Extra content at the end of the document\n");
9480: ctxt->wellFormed = 0;
1.180 daniel 9481: ctxt->disableSAX = 1;
1.140 daniel 9482: ctxt->errNo = XML_ERR_DOCUMENT_END;
9483: ctxt->instate = XML_PARSER_EOF;
9484: #ifdef DEBUG_PUSH
9485: fprintf(stderr, "PP: entering EOF\n");
9486: #endif
1.171 daniel 9487: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9488: (!ctxt->disableSAX))
1.140 daniel 9489: ctxt->sax->endDocument(ctxt->userData);
9490: goto done;
9491: }
9492: break;
9493: case XML_PARSER_START_TAG: {
9494: xmlChar *name, *oldname;
9495:
1.184 daniel 9496: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9497: goto done;
1.184 daniel 9498: cur = ctxt->input->cur[0];
1.140 daniel 9499: if (cur != '<') {
9500: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9501: ctxt->sax->error(ctxt->userData,
9502: "Start tag expect, '<' not found\n");
9503: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9504: ctxt->wellFormed = 0;
1.180 daniel 9505: ctxt->disableSAX = 1;
1.140 daniel 9506: ctxt->instate = XML_PARSER_EOF;
9507: #ifdef DEBUG_PUSH
9508: fprintf(stderr, "PP: entering EOF\n");
9509: #endif
1.171 daniel 9510: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9511: (!ctxt->disableSAX))
1.140 daniel 9512: ctxt->sax->endDocument(ctxt->userData);
9513: goto done;
9514: }
1.143 daniel 9515: if ((!terminate) &&
9516: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9517: goto done;
1.176 daniel 9518: if (ctxt->spaceNr == 0)
9519: spacePush(ctxt, -1);
9520: else
9521: spacePush(ctxt, *ctxt->space);
1.140 daniel 9522: name = xmlParseStartTag(ctxt);
9523: if (name == NULL) {
1.176 daniel 9524: spacePop(ctxt);
1.140 daniel 9525: ctxt->instate = XML_PARSER_EOF;
9526: #ifdef DEBUG_PUSH
9527: fprintf(stderr, "PP: entering EOF\n");
9528: #endif
1.171 daniel 9529: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9530: (!ctxt->disableSAX))
1.140 daniel 9531: ctxt->sax->endDocument(ctxt->userData);
9532: goto done;
9533: }
9534: namePush(ctxt, xmlStrdup(name));
9535:
9536: /*
9537: * [ VC: Root Element Type ]
9538: * The Name in the document type declaration must match
9539: * the element type of the root element.
9540: */
9541: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 9542: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 9543: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9544:
9545: /*
9546: * Check for an Empty Element.
9547: */
1.152 daniel 9548: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 9549: SKIP(2);
1.171 daniel 9550: if ((ctxt->sax != NULL) &&
9551: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 9552: ctxt->sax->endElement(ctxt->userData, name);
9553: xmlFree(name);
9554: oldname = namePop(ctxt);
1.176 daniel 9555: spacePop(ctxt);
1.140 daniel 9556: if (oldname != NULL) {
9557: #ifdef DEBUG_STACK
9558: fprintf(stderr,"Close: popped %s\n", oldname);
9559: #endif
9560: xmlFree(oldname);
9561: }
9562: if (ctxt->name == NULL) {
9563: ctxt->instate = XML_PARSER_EPILOG;
9564: #ifdef DEBUG_PUSH
9565: fprintf(stderr, "PP: entering EPILOG\n");
9566: #endif
9567: } else {
9568: ctxt->instate = XML_PARSER_CONTENT;
9569: #ifdef DEBUG_PUSH
9570: fprintf(stderr, "PP: entering CONTENT\n");
9571: #endif
9572: }
9573: break;
9574: }
1.152 daniel 9575: if (RAW == '>') {
1.140 daniel 9576: NEXT;
9577: } else {
9578: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9579: ctxt->sax->error(ctxt->userData,
9580: "Couldn't find end of Start Tag %s\n",
9581: name);
9582: ctxt->wellFormed = 0;
1.180 daniel 9583: ctxt->disableSAX = 1;
1.140 daniel 9584: ctxt->errNo = XML_ERR_GT_REQUIRED;
9585:
9586: /*
9587: * end of parsing of this node.
9588: */
9589: nodePop(ctxt);
9590: oldname = namePop(ctxt);
1.176 daniel 9591: spacePop(ctxt);
1.140 daniel 9592: if (oldname != NULL) {
9593: #ifdef DEBUG_STACK
9594: fprintf(stderr,"Close: popped %s\n", oldname);
9595: #endif
9596: xmlFree(oldname);
9597: }
9598: }
9599: xmlFree(name);
9600: ctxt->instate = XML_PARSER_CONTENT;
9601: #ifdef DEBUG_PUSH
9602: fprintf(stderr, "PP: entering CONTENT\n");
9603: #endif
9604: break;
9605: }
1.128 daniel 9606: case XML_PARSER_CONTENT:
1.140 daniel 9607: /*
9608: * Handle preparsed entities and charRef
9609: */
9610: if (ctxt->token != 0) {
9611: xmlChar cur[2] = { 0 , 0 } ;
9612:
9613: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 9614: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9615: (ctxt->sax->characters != NULL))
1.140 daniel 9616: ctxt->sax->characters(ctxt->userData, cur, 1);
9617: ctxt->token = 0;
9618: }
1.184 daniel 9619: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9620: goto done;
1.184 daniel 9621: cur = ctxt->input->cur[0];
9622: next = ctxt->input->cur[1];
1.140 daniel 9623: if ((cur == '<') && (next == '?')) {
1.143 daniel 9624: if ((!terminate) &&
9625: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9626: goto done;
9627: #ifdef DEBUG_PUSH
9628: fprintf(stderr, "PP: Parsing PI\n");
9629: #endif
9630: xmlParsePI(ctxt);
9631: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9632: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9633: if ((!terminate) &&
9634: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9635: goto done;
9636: #ifdef DEBUG_PUSH
9637: fprintf(stderr, "PP: Parsing Comment\n");
9638: #endif
9639: xmlParseComment(ctxt);
9640: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 9641: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9642: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9643: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9644: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9645: (ctxt->input->cur[8] == '[')) {
1.140 daniel 9646: SKIP(9);
9647: ctxt->instate = XML_PARSER_CDATA_SECTION;
9648: #ifdef DEBUG_PUSH
9649: fprintf(stderr, "PP: entering CDATA_SECTION\n");
9650: #endif
9651: break;
9652: } else if ((cur == '<') && (next == '!') &&
9653: (avail < 9)) {
9654: goto done;
9655: } else if ((cur == '<') && (next == '/')) {
9656: ctxt->instate = XML_PARSER_END_TAG;
9657: #ifdef DEBUG_PUSH
9658: fprintf(stderr, "PP: entering END_TAG\n");
9659: #endif
9660: break;
9661: } else if (cur == '<') {
9662: ctxt->instate = XML_PARSER_START_TAG;
9663: #ifdef DEBUG_PUSH
9664: fprintf(stderr, "PP: entering START_TAG\n");
9665: #endif
9666: break;
9667: } else if (cur == '&') {
1.143 daniel 9668: if ((!terminate) &&
9669: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 9670: goto done;
9671: #ifdef DEBUG_PUSH
9672: fprintf(stderr, "PP: Parsing Reference\n");
9673: #endif
9674: /* TODO: check generation of subtrees if noent !!! */
9675: xmlParseReference(ctxt);
9676: } else {
1.156 daniel 9677: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9678: /*
1.181 daniel 9679: * Goal of the following test is:
1.140 daniel 9680: * - minimize calls to the SAX 'character' callback
9681: * when they are mergeable
9682: * - handle an problem for isBlank when we only parse
9683: * a sequence of blank chars and the next one is
9684: * not available to check against '<' presence.
9685: * - tries to homogenize the differences in SAX
9686: * callbacks beween the push and pull versions
9687: * of the parser.
9688: */
9689: if ((ctxt->inputNr == 1) &&
9690: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9691: if ((!terminate) &&
9692: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9693: goto done;
9694: }
9695: ctxt->checkIndex = 0;
9696: #ifdef DEBUG_PUSH
9697: fprintf(stderr, "PP: Parsing char data\n");
9698: #endif
9699: xmlParseCharData(ctxt, 0);
9700: }
9701: /*
9702: * Pop-up of finished entities.
9703: */
1.152 daniel 9704: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9705: xmlPopInput(ctxt);
9706: break;
9707: case XML_PARSER_CDATA_SECTION: {
9708: /*
9709: * The Push mode need to have the SAX callback for
9710: * cdataBlock merge back contiguous callbacks.
9711: */
9712: int base;
9713:
9714: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9715: if (base < 0) {
9716: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9717: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9718: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 9719: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 9720: XML_PARSER_BIG_BUFFER_SIZE);
9721: }
9722: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9723: ctxt->checkIndex = 0;
9724: }
9725: goto done;
9726: } else {
1.171 daniel 9727: if ((ctxt->sax != NULL) && (base > 0) &&
9728: (!ctxt->disableSAX)) {
1.140 daniel 9729: if (ctxt->sax->cdataBlock != NULL)
9730: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 9731: ctxt->input->cur, base);
1.140 daniel 9732: }
9733: SKIP(base + 3);
9734: ctxt->checkIndex = 0;
9735: ctxt->instate = XML_PARSER_CONTENT;
9736: #ifdef DEBUG_PUSH
9737: fprintf(stderr, "PP: entering CONTENT\n");
9738: #endif
9739: }
9740: break;
9741: }
1.141 daniel 9742: case XML_PARSER_END_TAG:
1.140 daniel 9743: if (avail < 2)
9744: goto done;
1.143 daniel 9745: if ((!terminate) &&
9746: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9747: goto done;
9748: xmlParseEndTag(ctxt);
9749: if (ctxt->name == NULL) {
9750: ctxt->instate = XML_PARSER_EPILOG;
9751: #ifdef DEBUG_PUSH
9752: fprintf(stderr, "PP: entering EPILOG\n");
9753: #endif
9754: } else {
9755: ctxt->instate = XML_PARSER_CONTENT;
9756: #ifdef DEBUG_PUSH
9757: fprintf(stderr, "PP: entering CONTENT\n");
9758: #endif
9759: }
9760: break;
9761: case XML_PARSER_DTD: {
9762: /*
9763: * Sorry but progressive parsing of the internal subset
9764: * is not expected to be supported. We first check that
9765: * the full content of the internal subset is available and
9766: * the parsing is launched only at that point.
9767: * Internal subset ends up with "']' S? '>'" in an unescaped
9768: * section and not in a ']]>' sequence which are conditional
9769: * sections (whoever argued to keep that crap in XML deserve
9770: * a place in hell !).
9771: */
9772: int base, i;
9773: xmlChar *buf;
9774: xmlChar quote = 0;
9775:
1.184 daniel 9776: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 9777: if (base < 0) return(0);
9778: if (ctxt->checkIndex > base)
9779: base = ctxt->checkIndex;
1.184 daniel 9780: buf = ctxt->input->buf->buffer->content;
1.202 daniel 9781: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9782: base++) {
1.140 daniel 9783: if (quote != 0) {
9784: if (buf[base] == quote)
9785: quote = 0;
9786: continue;
9787: }
9788: if (buf[base] == '"') {
9789: quote = '"';
9790: continue;
9791: }
9792: if (buf[base] == '\'') {
9793: quote = '\'';
9794: continue;
9795: }
9796: if (buf[base] == ']') {
1.202 daniel 9797: if ((unsigned int) base +1 >=
9798: ctxt->input->buf->buffer->use)
1.140 daniel 9799: break;
9800: if (buf[base + 1] == ']') {
9801: /* conditional crap, skip both ']' ! */
9802: base++;
9803: continue;
9804: }
1.202 daniel 9805: for (i = 0;
9806: (unsigned int) base + i < ctxt->input->buf->buffer->use;
9807: i++) {
1.140 daniel 9808: if (buf[base + i] == '>')
9809: goto found_end_int_subset;
9810: }
9811: break;
9812: }
9813: }
9814: /*
9815: * We didn't found the end of the Internal subset
9816: */
9817: if (quote == 0)
9818: ctxt->checkIndex = base;
9819: #ifdef DEBUG_PUSH
9820: if (next == 0)
9821: fprintf(stderr, "PP: lookup of int subset end filed\n");
9822: #endif
9823: goto done;
9824:
9825: found_end_int_subset:
9826: xmlParseInternalSubset(ctxt);
1.166 daniel 9827: ctxt->inSubset = 2;
1.171 daniel 9828: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9829: (ctxt->sax->externalSubset != NULL))
9830: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9831: ctxt->extSubSystem, ctxt->extSubURI);
9832: ctxt->inSubset = 0;
1.140 daniel 9833: ctxt->instate = XML_PARSER_PROLOG;
9834: ctxt->checkIndex = 0;
9835: #ifdef DEBUG_PUSH
9836: fprintf(stderr, "PP: entering PROLOG\n");
9837: #endif
9838: break;
9839: }
9840: case XML_PARSER_COMMENT:
9841: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9842: ctxt->instate = XML_PARSER_CONTENT;
9843: #ifdef DEBUG_PUSH
9844: fprintf(stderr, "PP: entering CONTENT\n");
9845: #endif
9846: break;
9847: case XML_PARSER_PI:
9848: fprintf(stderr, "PP: internal error, state == PI\n");
9849: ctxt->instate = XML_PARSER_CONTENT;
9850: #ifdef DEBUG_PUSH
9851: fprintf(stderr, "PP: entering CONTENT\n");
9852: #endif
9853: break;
1.128 daniel 9854: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9855: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9856: ctxt->instate = XML_PARSER_DTD;
9857: #ifdef DEBUG_PUSH
9858: fprintf(stderr, "PP: entering DTD\n");
9859: #endif
9860: break;
1.128 daniel 9861: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9862: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9863: ctxt->instate = XML_PARSER_CONTENT;
9864: #ifdef DEBUG_PUSH
9865: fprintf(stderr, "PP: entering DTD\n");
9866: #endif
9867: break;
1.128 daniel 9868: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9869: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9870: ctxt->instate = XML_PARSER_START_TAG;
9871: #ifdef DEBUG_PUSH
9872: fprintf(stderr, "PP: entering START_TAG\n");
9873: #endif
9874: break;
9875: case XML_PARSER_SYSTEM_LITERAL:
9876: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9877: ctxt->instate = XML_PARSER_START_TAG;
9878: #ifdef DEBUG_PUSH
9879: fprintf(stderr, "PP: entering START_TAG\n");
9880: #endif
9881: break;
1.128 daniel 9882: }
9883: }
1.140 daniel 9884: done:
9885: #ifdef DEBUG_PUSH
9886: fprintf(stderr, "PP: done %d\n", ret);
9887: #endif
1.128 daniel 9888: return(ret);
9889: }
9890:
9891: /**
1.143 daniel 9892: * xmlParseTry:
9893: * @ctxt: an XML parser context
9894: *
9895: * Try to progress on parsing
9896: *
9897: * Returns zero if no parsing was possible
9898: */
9899: int
9900: xmlParseTry(xmlParserCtxtPtr ctxt) {
9901: return(xmlParseTryOrFinish(ctxt, 0));
9902: }
9903:
9904: /**
1.128 daniel 9905: * xmlParseChunk:
9906: * @ctxt: an XML parser context
9907: * @chunk: an char array
9908: * @size: the size in byte of the chunk
9909: * @terminate: last chunk indicator
9910: *
9911: * Parse a Chunk of memory
9912: *
9913: * Returns zero if no error, the xmlParserErrors otherwise.
9914: */
1.140 daniel 9915: int
1.128 daniel 9916: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9917: int terminate) {
1.132 daniel 9918: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9919: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9920: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9921: int cur = ctxt->input->cur - ctxt->input->base;
9922:
1.132 daniel 9923: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9924: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9925: ctxt->input->cur = ctxt->input->base + cur;
9926: #ifdef DEBUG_PUSH
9927: fprintf(stderr, "PP: pushed %d\n", size);
9928: #endif
9929:
1.150 daniel 9930: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9931: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9932: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9933: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9934: if (terminate) {
1.151 daniel 9935: /*
9936: * Check for termination
9937: */
1.140 daniel 9938: if ((ctxt->instate != XML_PARSER_EOF) &&
9939: (ctxt->instate != XML_PARSER_EPILOG)) {
9940: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9941: ctxt->sax->error(ctxt->userData,
9942: "Extra content at the end of the document\n");
9943: ctxt->wellFormed = 0;
1.180 daniel 9944: ctxt->disableSAX = 1;
1.140 daniel 9945: ctxt->errNo = XML_ERR_DOCUMENT_END;
9946: }
9947: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9948: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9949: (!ctxt->disableSAX))
1.140 daniel 9950: ctxt->sax->endDocument(ctxt->userData);
9951: }
9952: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9953: }
9954: return((xmlParserErrors) ctxt->errNo);
9955: }
9956:
9957: /************************************************************************
9958: * *
1.98 daniel 9959: * I/O front end functions to the parser *
9960: * *
9961: ************************************************************************/
1.201 daniel 9962:
9963: /**
9964: * xmlCreatePushParserCtxt:
9965: * @ctxt: an XML parser context
9966: *
9967: * Blocks further parser processing
9968: */
9969: void
9970: xmlStopParser(xmlParserCtxtPtr ctxt) {
9971: ctxt->instate = XML_PARSER_EOF;
9972: if (ctxt->input != NULL)
9973: ctxt->input->cur = BAD_CAST"";
9974: }
1.98 daniel 9975:
1.50 daniel 9976: /**
1.181 daniel 9977: * xmlCreatePushParserCtxt:
1.140 daniel 9978: * @sax: a SAX handler
9979: * @user_data: The user data returned on SAX callbacks
9980: * @chunk: a pointer to an array of chars
9981: * @size: number of chars in the array
9982: * @filename: an optional file name or URI
9983: *
9984: * Create a parser context for using the XML parser in push mode
9985: * To allow content encoding detection, @size should be >= 4
9986: * The value of @filename is used for fetching external entities
9987: * and error/warning reports.
9988: *
9989: * Returns the new parser context or NULL
9990: */
9991: xmlParserCtxtPtr
9992: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9993: const char *chunk, int size, const char *filename) {
9994: xmlParserCtxtPtr ctxt;
9995: xmlParserInputPtr inputStream;
9996: xmlParserInputBufferPtr buf;
9997: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9998:
9999: /*
1.156 daniel 10000: * plug some encoding conversion routines
1.140 daniel 10001: */
10002: if ((chunk != NULL) && (size >= 4))
1.156 daniel 10003: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 10004:
10005: buf = xmlAllocParserInputBuffer(enc);
10006: if (buf == NULL) return(NULL);
10007:
10008: ctxt = xmlNewParserCtxt();
10009: if (ctxt == NULL) {
10010: xmlFree(buf);
10011: return(NULL);
10012: }
10013: if (sax != NULL) {
10014: if (ctxt->sax != &xmlDefaultSAXHandler)
10015: xmlFree(ctxt->sax);
10016: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10017: if (ctxt->sax == NULL) {
10018: xmlFree(buf);
10019: xmlFree(ctxt);
10020: return(NULL);
10021: }
10022: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10023: if (user_data != NULL)
10024: ctxt->userData = user_data;
10025: }
10026: if (filename == NULL) {
10027: ctxt->directory = NULL;
10028: } else {
10029: ctxt->directory = xmlParserGetDirectory(filename);
10030: }
10031:
10032: inputStream = xmlNewInputStream(ctxt);
10033: if (inputStream == NULL) {
10034: xmlFreeParserCtxt(ctxt);
10035: return(NULL);
10036: }
10037:
10038: if (filename == NULL)
10039: inputStream->filename = NULL;
10040: else
10041: inputStream->filename = xmlMemStrdup(filename);
10042: inputStream->buf = buf;
10043: inputStream->base = inputStream->buf->buffer->content;
10044: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 10045: if (enc != XML_CHAR_ENCODING_NONE) {
10046: xmlSwitchEncoding(ctxt, enc);
10047: }
1.140 daniel 10048:
10049: inputPush(ctxt, inputStream);
10050:
10051: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10052: (ctxt->input->buf != NULL)) {
10053: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10054: #ifdef DEBUG_PUSH
10055: fprintf(stderr, "PP: pushed %d\n", size);
10056: #endif
10057: }
1.190 daniel 10058:
10059: return(ctxt);
10060: }
10061:
10062: /**
10063: * xmlCreateIOParserCtxt:
10064: * @sax: a SAX handler
10065: * @user_data: The user data returned on SAX callbacks
10066: * @ioread: an I/O read function
10067: * @ioclose: an I/O close function
10068: * @ioctx: an I/O handler
10069: * @enc: the charset encoding if known
10070: *
10071: * Create a parser context for using the XML parser with an existing
10072: * I/O stream
10073: *
10074: * Returns the new parser context or NULL
10075: */
10076: xmlParserCtxtPtr
10077: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10078: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10079: void *ioctx, xmlCharEncoding enc) {
10080: xmlParserCtxtPtr ctxt;
10081: xmlParserInputPtr inputStream;
10082: xmlParserInputBufferPtr buf;
10083:
10084: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10085: if (buf == NULL) return(NULL);
10086:
10087: ctxt = xmlNewParserCtxt();
10088: if (ctxt == NULL) {
10089: xmlFree(buf);
10090: return(NULL);
10091: }
10092: if (sax != NULL) {
10093: if (ctxt->sax != &xmlDefaultSAXHandler)
10094: xmlFree(ctxt->sax);
10095: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10096: if (ctxt->sax == NULL) {
10097: xmlFree(buf);
10098: xmlFree(ctxt);
10099: return(NULL);
10100: }
10101: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10102: if (user_data != NULL)
10103: ctxt->userData = user_data;
10104: }
10105:
10106: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10107: if (inputStream == NULL) {
10108: xmlFreeParserCtxt(ctxt);
10109: return(NULL);
10110: }
10111: inputPush(ctxt, inputStream);
1.140 daniel 10112:
10113: return(ctxt);
10114: }
10115:
10116: /**
1.181 daniel 10117: * xmlCreateDocParserCtxt:
1.123 daniel 10118: * @cur: a pointer to an array of xmlChar
1.50 daniel 10119: *
1.192 daniel 10120: * Creates a parser context for an XML in-memory document.
1.69 daniel 10121: *
10122: * Returns the new parser context or NULL
1.16 daniel 10123: */
1.69 daniel 10124: xmlParserCtxtPtr
1.123 daniel 10125: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 10126: xmlParserCtxtPtr ctxt;
1.40 daniel 10127: xmlParserInputPtr input;
1.16 daniel 10128:
1.97 daniel 10129: ctxt = xmlNewParserCtxt();
1.16 daniel 10130: if (ctxt == NULL) {
10131: return(NULL);
10132: }
1.96 daniel 10133: input = xmlNewInputStream(ctxt);
1.40 daniel 10134: if (input == NULL) {
1.97 daniel 10135: xmlFreeParserCtxt(ctxt);
1.40 daniel 10136: return(NULL);
10137: }
10138:
10139: input->base = cur;
10140: input->cur = cur;
10141:
10142: inputPush(ctxt, input);
1.69 daniel 10143: return(ctxt);
10144: }
10145:
10146: /**
1.181 daniel 10147: * xmlSAXParseDoc:
1.69 daniel 10148: * @sax: the SAX handler block
1.123 daniel 10149: * @cur: a pointer to an array of xmlChar
1.69 daniel 10150: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10151: * documents
10152: *
10153: * parse an XML in-memory document and build a tree.
10154: * It use the given SAX function block to handle the parsing callback.
10155: * If sax is NULL, fallback to the default DOM tree building routines.
10156: *
10157: * Returns the resulting document tree
10158: */
10159:
10160: xmlDocPtr
1.123 daniel 10161: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 10162: xmlDocPtr ret;
10163: xmlParserCtxtPtr ctxt;
10164:
10165: if (cur == NULL) return(NULL);
1.16 daniel 10166:
10167:
1.69 daniel 10168: ctxt = xmlCreateDocParserCtxt(cur);
10169: if (ctxt == NULL) return(NULL);
1.74 daniel 10170: if (sax != NULL) {
10171: ctxt->sax = sax;
10172: ctxt->userData = NULL;
10173: }
1.69 daniel 10174:
1.16 daniel 10175: xmlParseDocument(ctxt);
1.72 daniel 10176: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10177: else {
10178: ret = NULL;
1.72 daniel 10179: xmlFreeDoc(ctxt->myDoc);
10180: ctxt->myDoc = NULL;
1.59 daniel 10181: }
1.86 daniel 10182: if (sax != NULL)
10183: ctxt->sax = NULL;
1.69 daniel 10184: xmlFreeParserCtxt(ctxt);
1.16 daniel 10185:
1.1 veillard 10186: return(ret);
10187: }
10188:
1.50 daniel 10189: /**
1.181 daniel 10190: * xmlParseDoc:
1.123 daniel 10191: * @cur: a pointer to an array of xmlChar
1.55 daniel 10192: *
10193: * parse an XML in-memory document and build a tree.
10194: *
1.68 daniel 10195: * Returns the resulting document tree
1.55 daniel 10196: */
10197:
1.69 daniel 10198: xmlDocPtr
1.123 daniel 10199: xmlParseDoc(xmlChar *cur) {
1.59 daniel 10200: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 10201: }
10202:
10203: /**
1.181 daniel 10204: * xmlSAXParseDTD:
1.76 daniel 10205: * @sax: the SAX handler block
10206: * @ExternalID: a NAME* containing the External ID of the DTD
10207: * @SystemID: a NAME* containing the URL to the DTD
10208: *
10209: * Load and parse an external subset.
10210: *
10211: * Returns the resulting xmlDtdPtr or NULL in case of error.
10212: */
10213:
10214: xmlDtdPtr
1.123 daniel 10215: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10216: const xmlChar *SystemID) {
1.76 daniel 10217: xmlDtdPtr ret = NULL;
10218: xmlParserCtxtPtr ctxt;
1.83 daniel 10219: xmlParserInputPtr input = NULL;
1.76 daniel 10220: xmlCharEncoding enc;
10221:
10222: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10223:
1.97 daniel 10224: ctxt = xmlNewParserCtxt();
1.76 daniel 10225: if (ctxt == NULL) {
10226: return(NULL);
10227: }
10228:
10229: /*
10230: * Set-up the SAX context
10231: */
10232: if (sax != NULL) {
1.93 veillard 10233: if (ctxt->sax != NULL)
1.119 daniel 10234: xmlFree(ctxt->sax);
1.76 daniel 10235: ctxt->sax = sax;
10236: ctxt->userData = NULL;
10237: }
10238:
10239: /*
10240: * Ask the Entity resolver to load the damn thing
10241: */
10242:
10243: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
10244: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
10245: if (input == NULL) {
1.86 daniel 10246: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 10247: xmlFreeParserCtxt(ctxt);
10248: return(NULL);
10249: }
10250:
10251: /*
1.156 daniel 10252: * plug some encoding conversion routines here.
1.76 daniel 10253: */
10254: xmlPushInput(ctxt, input);
1.156 daniel 10255: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 10256: xmlSwitchEncoding(ctxt, enc);
10257:
1.95 veillard 10258: if (input->filename == NULL)
1.156 daniel 10259: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 10260: input->line = 1;
10261: input->col = 1;
10262: input->base = ctxt->input->cur;
10263: input->cur = ctxt->input->cur;
10264: input->free = NULL;
10265:
10266: /*
10267: * let's parse that entity knowing it's an external subset.
10268: */
1.191 daniel 10269: ctxt->inSubset = 2;
10270: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10271: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10272: ExternalID, SystemID);
1.79 daniel 10273: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 10274:
10275: if (ctxt->myDoc != NULL) {
10276: if (ctxt->wellFormed) {
1.191 daniel 10277: ret = ctxt->myDoc->extSubset;
10278: ctxt->myDoc->extSubset = NULL;
1.76 daniel 10279: } else {
10280: ret = NULL;
10281: }
10282: xmlFreeDoc(ctxt->myDoc);
10283: ctxt->myDoc = NULL;
10284: }
1.86 daniel 10285: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 10286: xmlFreeParserCtxt(ctxt);
10287:
10288: return(ret);
10289: }
10290:
10291: /**
1.181 daniel 10292: * xmlParseDTD:
1.76 daniel 10293: * @ExternalID: a NAME* containing the External ID of the DTD
10294: * @SystemID: a NAME* containing the URL to the DTD
10295: *
10296: * Load and parse an external subset.
10297: *
10298: * Returns the resulting xmlDtdPtr or NULL in case of error.
10299: */
10300:
10301: xmlDtdPtr
1.123 daniel 10302: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 10303: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 10304: }
10305:
10306: /**
1.181 daniel 10307: * xmlSAXParseBalancedChunk:
1.144 daniel 10308: * @ctx: an XML parser context (possibly NULL)
10309: * @sax: the SAX handler bloc (possibly NULL)
10310: * @user_data: The user data returned on SAX callbacks (possibly NULL)
10311: * @input: a parser input stream
10312: * @enc: the encoding
10313: *
10314: * Parse a well-balanced chunk of an XML document
10315: * The user has to provide SAX callback block whose routines will be
10316: * called by the parser
10317: * The allowed sequence for the Well Balanced Chunk is the one defined by
10318: * the content production in the XML grammar:
10319: *
10320: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10321: *
1.176 daniel 10322: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 10323: * the error code otherwise
10324: */
10325:
10326: int
10327: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
10328: void *user_data, xmlParserInputPtr input,
10329: xmlCharEncoding enc) {
10330: xmlParserCtxtPtr ctxt;
10331: int ret;
10332:
10333: if (input == NULL) return(-1);
10334:
10335: if (ctx != NULL)
10336: ctxt = ctx;
10337: else {
10338: ctxt = xmlNewParserCtxt();
10339: if (ctxt == NULL)
10340: return(-1);
10341: if (sax == NULL)
10342: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10343: }
10344:
10345: /*
10346: * Set-up the SAX context
10347: */
10348: if (sax != NULL) {
10349: if (ctxt->sax != NULL)
10350: xmlFree(ctxt->sax);
10351: ctxt->sax = sax;
10352: ctxt->userData = user_data;
10353: }
10354:
10355: /*
10356: * plug some encoding conversion routines here.
10357: */
10358: xmlPushInput(ctxt, input);
10359: if (enc != XML_CHAR_ENCODING_NONE)
10360: xmlSwitchEncoding(ctxt, enc);
10361:
10362: /*
10363: * let's parse that entity knowing it's an external subset.
10364: */
10365: xmlParseContent(ctxt);
10366: ret = ctxt->errNo;
10367:
10368: if (ctx == NULL) {
10369: if (sax != NULL)
10370: ctxt->sax = NULL;
10371: else
10372: xmlFreeDoc(ctxt->myDoc);
10373: xmlFreeParserCtxt(ctxt);
10374: }
10375: return(ret);
10376: }
10377:
10378: /**
1.213 veillard 10379: * xmlParseCtxtExternalEntity:
10380: * @ctx: the existing parsing context
10381: * @URL: the URL for the entity to load
10382: * @ID: the System ID for the entity to load
10383: * @list: the return value for the set of parsed nodes
10384: *
10385: * Parse an external general entity within an existing parsing context
10386: * An external general parsed entity is well-formed if it matches the
10387: * production labeled extParsedEnt.
10388: *
10389: * [78] extParsedEnt ::= TextDecl? content
10390: *
10391: * Returns 0 if the entity is well formed, -1 in case of args problem and
10392: * the parser error code otherwise
10393: */
10394:
10395: int
10396: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
10397: const xmlChar *ID, xmlNodePtr *list) {
10398: xmlParserCtxtPtr ctxt;
10399: xmlDocPtr newDoc;
10400: xmlSAXHandlerPtr oldsax = NULL;
10401: int ret = 0;
10402:
10403: if (ctx->depth > 40) {
10404: return(XML_ERR_ENTITY_LOOP);
10405: }
10406:
10407: if (list != NULL)
10408: *list = NULL;
10409: if ((URL == NULL) && (ID == NULL))
10410: return(-1);
10411: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10412: return(-1);
10413:
10414:
10415: ctxt = xmlCreateEntityParserCtxt(URL, ID, ctx->myDoc->URL);
10416: if (ctxt == NULL) return(-1);
10417: ctxt->userData = ctxt;
10418: oldsax = ctxt->sax;
10419: ctxt->sax = ctx->sax;
10420: newDoc = xmlNewDoc(BAD_CAST "1.0");
10421: if (newDoc == NULL) {
10422: xmlFreeParserCtxt(ctxt);
10423: return(-1);
10424: }
10425: if (ctx->myDoc != NULL) {
10426: newDoc->intSubset = ctx->myDoc->intSubset;
10427: newDoc->extSubset = ctx->myDoc->extSubset;
10428: }
10429: if (ctx->myDoc->URL != NULL) {
10430: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10431: }
10432: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10433: if (newDoc->children == NULL) {
10434: ctxt->sax = oldsax;
10435: xmlFreeParserCtxt(ctxt);
10436: newDoc->intSubset = NULL;
10437: newDoc->extSubset = NULL;
10438: xmlFreeDoc(newDoc);
10439: return(-1);
10440: }
10441: nodePush(ctxt, newDoc->children);
10442: if (ctx->myDoc == NULL) {
10443: ctxt->myDoc = newDoc;
10444: } else {
10445: ctxt->myDoc = ctx->myDoc;
10446: newDoc->children->doc = ctx->myDoc;
10447: }
10448:
10449: /*
10450: * Parse a possible text declaration first
10451: */
10452: GROW;
10453: if ((RAW == '<') && (NXT(1) == '?') &&
10454: (NXT(2) == 'x') && (NXT(3) == 'm') &&
10455: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10456: xmlParseTextDecl(ctxt);
10457: }
10458:
10459: /*
10460: * Doing validity checking on chunk doesn't make sense
10461: */
10462: ctxt->instate = XML_PARSER_CONTENT;
10463: ctxt->validate = ctx->validate;
10464: ctxt->depth = ctx->depth + 1;
10465: ctxt->replaceEntities = ctx->replaceEntities;
10466: if (ctxt->validate) {
10467: ctxt->vctxt.error = ctx->vctxt.error;
10468: ctxt->vctxt.warning = ctx->vctxt.warning;
10469: /* Allocate the Node stack */
10470: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
10471: ctxt->vctxt.nodeNr = 0;
10472: ctxt->vctxt.nodeMax = 4;
10473: ctxt->vctxt.node = NULL;
10474: } else {
10475: ctxt->vctxt.error = NULL;
10476: ctxt->vctxt.warning = NULL;
10477: }
10478:
10479: xmlParseContent(ctxt);
10480:
10481: if ((RAW == '<') && (NXT(1) == '/')) {
10482: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10483: ctxt->sax->error(ctxt->userData,
10484: "chunk is not well balanced\n");
10485: ctxt->wellFormed = 0;
10486: ctxt->disableSAX = 1;
10487: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10488: } else if (RAW != 0) {
10489: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10490: ctxt->sax->error(ctxt->userData,
10491: "extra content at the end of well balanced chunk\n");
10492: ctxt->wellFormed = 0;
10493: ctxt->disableSAX = 1;
10494: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10495: }
10496: if (ctxt->node != newDoc->children) {
10497: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10498: ctxt->sax->error(ctxt->userData,
10499: "chunk is not well balanced\n");
10500: ctxt->wellFormed = 0;
10501: ctxt->disableSAX = 1;
10502: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10503: }
10504:
10505: if (!ctxt->wellFormed) {
10506: if (ctxt->errNo == 0)
10507: ret = 1;
10508: else
10509: ret = ctxt->errNo;
10510: } else {
10511: if (list != NULL) {
10512: xmlNodePtr cur;
10513:
10514: /*
10515: * Return the newly created nodeset after unlinking it from
10516: * they pseudo parent.
10517: */
10518: cur = newDoc->children->children;
10519: *list = cur;
10520: while (cur != NULL) {
10521: cur->parent = NULL;
10522: cur = cur->next;
10523: }
10524: newDoc->children->children = NULL;
10525: }
10526: ret = 0;
10527: }
10528: ctxt->sax = oldsax;
10529: xmlFreeParserCtxt(ctxt);
10530: newDoc->intSubset = NULL;
10531: newDoc->extSubset = NULL;
10532: xmlFreeDoc(newDoc);
10533:
10534: return(ret);
10535: }
10536:
10537: /**
1.181 daniel 10538: * xmlParseExternalEntity:
10539: * @doc: the document the chunk pertains to
10540: * @sax: the SAX handler bloc (possibly NULL)
10541: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10542: * @depth: Used for loop detection, use 0
1.181 daniel 10543: * @URL: the URL for the entity to load
10544: * @ID: the System ID for the entity to load
10545: * @list: the return value for the set of parsed nodes
10546: *
10547: * Parse an external general entity
10548: * An external general parsed entity is well-formed if it matches the
10549: * production labeled extParsedEnt.
10550: *
10551: * [78] extParsedEnt ::= TextDecl? content
10552: *
10553: * Returns 0 if the entity is well formed, -1 in case of args problem and
10554: * the parser error code otherwise
10555: */
10556:
10557: int
10558: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 10559: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 10560: xmlParserCtxtPtr ctxt;
10561: xmlDocPtr newDoc;
10562: xmlSAXHandlerPtr oldsax = NULL;
10563: int ret = 0;
10564:
1.185 daniel 10565: if (depth > 40) {
10566: return(XML_ERR_ENTITY_LOOP);
10567: }
10568:
10569:
1.181 daniel 10570:
10571: if (list != NULL)
10572: *list = NULL;
10573: if ((URL == NULL) && (ID == NULL))
1.213 veillard 10574: return(-1);
10575: if (doc == NULL) /* @@ relax but check for dereferences */
1.181 daniel 10576: return(-1);
10577:
10578:
10579: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
10580: if (ctxt == NULL) return(-1);
10581: ctxt->userData = ctxt;
10582: if (sax != NULL) {
10583: oldsax = ctxt->sax;
10584: ctxt->sax = sax;
10585: if (user_data != NULL)
10586: ctxt->userData = user_data;
10587: }
10588: newDoc = xmlNewDoc(BAD_CAST "1.0");
10589: if (newDoc == NULL) {
10590: xmlFreeParserCtxt(ctxt);
10591: return(-1);
10592: }
10593: if (doc != NULL) {
10594: newDoc->intSubset = doc->intSubset;
10595: newDoc->extSubset = doc->extSubset;
10596: }
10597: if (doc->URL != NULL) {
10598: newDoc->URL = xmlStrdup(doc->URL);
10599: }
10600: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10601: if (newDoc->children == NULL) {
10602: if (sax != NULL)
10603: ctxt->sax = oldsax;
10604: xmlFreeParserCtxt(ctxt);
10605: newDoc->intSubset = NULL;
10606: newDoc->extSubset = NULL;
10607: xmlFreeDoc(newDoc);
10608: return(-1);
10609: }
10610: nodePush(ctxt, newDoc->children);
10611: if (doc == NULL) {
10612: ctxt->myDoc = newDoc;
10613: } else {
10614: ctxt->myDoc = doc;
10615: newDoc->children->doc = doc;
10616: }
10617:
10618: /*
10619: * Parse a possible text declaration first
10620: */
10621: GROW;
10622: if ((RAW == '<') && (NXT(1) == '?') &&
10623: (NXT(2) == 'x') && (NXT(3) == 'm') &&
10624: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10625: xmlParseTextDecl(ctxt);
10626: }
10627:
10628: /*
10629: * Doing validity checking on chunk doesn't make sense
10630: */
10631: ctxt->instate = XML_PARSER_CONTENT;
10632: ctxt->validate = 0;
1.185 daniel 10633: ctxt->depth = depth;
1.181 daniel 10634:
10635: xmlParseContent(ctxt);
10636:
10637: if ((RAW == '<') && (NXT(1) == '/')) {
10638: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10639: ctxt->sax->error(ctxt->userData,
10640: "chunk is not well balanced\n");
10641: ctxt->wellFormed = 0;
10642: ctxt->disableSAX = 1;
10643: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10644: } else if (RAW != 0) {
10645: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10646: ctxt->sax->error(ctxt->userData,
10647: "extra content at the end of well balanced chunk\n");
10648: ctxt->wellFormed = 0;
10649: ctxt->disableSAX = 1;
10650: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10651: }
10652: if (ctxt->node != newDoc->children) {
10653: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10654: ctxt->sax->error(ctxt->userData,
10655: "chunk is not well balanced\n");
10656: ctxt->wellFormed = 0;
10657: ctxt->disableSAX = 1;
10658: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10659: }
10660:
10661: if (!ctxt->wellFormed) {
10662: if (ctxt->errNo == 0)
10663: ret = 1;
10664: else
10665: ret = ctxt->errNo;
10666: } else {
10667: if (list != NULL) {
10668: xmlNodePtr cur;
10669:
10670: /*
10671: * Return the newly created nodeset after unlinking it from
10672: * they pseudo parent.
10673: */
10674: cur = newDoc->children->children;
10675: *list = cur;
10676: while (cur != NULL) {
10677: cur->parent = NULL;
10678: cur = cur->next;
10679: }
10680: newDoc->children->children = NULL;
10681: }
10682: ret = 0;
10683: }
10684: if (sax != NULL)
10685: ctxt->sax = oldsax;
10686: xmlFreeParserCtxt(ctxt);
10687: newDoc->intSubset = NULL;
10688: newDoc->extSubset = NULL;
10689: xmlFreeDoc(newDoc);
10690:
10691: return(ret);
10692: }
10693:
10694: /**
10695: * xmlParseBalancedChunk:
1.176 daniel 10696: * @doc: the document the chunk pertains to
10697: * @sax: the SAX handler bloc (possibly NULL)
10698: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10699: * @depth: Used for loop detection, use 0
1.176 daniel 10700: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10701: * @list: the return value for the set of parsed nodes
10702: *
10703: * Parse a well-balanced chunk of an XML document
10704: * called by the parser
10705: * The allowed sequence for the Well Balanced Chunk is the one defined by
10706: * the content production in the XML grammar:
1.144 daniel 10707: *
1.175 daniel 10708: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10709: *
1.176 daniel 10710: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10711: * the parser error code otherwise
1.144 daniel 10712: */
10713:
1.175 daniel 10714: int
10715: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 10716: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 10717: xmlParserCtxtPtr ctxt;
1.175 daniel 10718: xmlDocPtr newDoc;
1.181 daniel 10719: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 10720: int size;
1.176 daniel 10721: int ret = 0;
1.175 daniel 10722:
1.185 daniel 10723: if (depth > 40) {
10724: return(XML_ERR_ENTITY_LOOP);
10725: }
10726:
1.175 daniel 10727:
1.176 daniel 10728: if (list != NULL)
10729: *list = NULL;
10730: if (string == NULL)
10731: return(-1);
10732:
10733: size = xmlStrlen(string);
10734:
1.183 daniel 10735: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 10736: if (ctxt == NULL) return(-1);
10737: ctxt->userData = ctxt;
1.175 daniel 10738: if (sax != NULL) {
1.176 daniel 10739: oldsax = ctxt->sax;
10740: ctxt->sax = sax;
10741: if (user_data != NULL)
10742: ctxt->userData = user_data;
1.175 daniel 10743: }
10744: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 10745: if (newDoc == NULL) {
10746: xmlFreeParserCtxt(ctxt);
10747: return(-1);
10748: }
1.175 daniel 10749: if (doc != NULL) {
10750: newDoc->intSubset = doc->intSubset;
10751: newDoc->extSubset = doc->extSubset;
10752: }
1.176 daniel 10753: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10754: if (newDoc->children == NULL) {
10755: if (sax != NULL)
10756: ctxt->sax = oldsax;
10757: xmlFreeParserCtxt(ctxt);
10758: newDoc->intSubset = NULL;
10759: newDoc->extSubset = NULL;
10760: xmlFreeDoc(newDoc);
10761: return(-1);
10762: }
10763: nodePush(ctxt, newDoc->children);
10764: if (doc == NULL) {
10765: ctxt->myDoc = newDoc;
10766: } else {
10767: ctxt->myDoc = doc;
10768: newDoc->children->doc = doc;
10769: }
10770: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 10771: ctxt->depth = depth;
1.176 daniel 10772:
10773: /*
10774: * Doing validity checking on chunk doesn't make sense
10775: */
10776: ctxt->validate = 0;
10777:
1.175 daniel 10778: xmlParseContent(ctxt);
1.176 daniel 10779:
10780: if ((RAW == '<') && (NXT(1) == '/')) {
10781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10782: ctxt->sax->error(ctxt->userData,
10783: "chunk is not well balanced\n");
10784: ctxt->wellFormed = 0;
1.180 daniel 10785: ctxt->disableSAX = 1;
1.176 daniel 10786: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10787: } else if (RAW != 0) {
10788: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10789: ctxt->sax->error(ctxt->userData,
10790: "extra content at the end of well balanced chunk\n");
10791: ctxt->wellFormed = 0;
1.180 daniel 10792: ctxt->disableSAX = 1;
1.176 daniel 10793: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10794: }
10795: if (ctxt->node != newDoc->children) {
10796: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10797: ctxt->sax->error(ctxt->userData,
10798: "chunk is not well balanced\n");
10799: ctxt->wellFormed = 0;
1.180 daniel 10800: ctxt->disableSAX = 1;
1.176 daniel 10801: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10802: }
1.175 daniel 10803:
1.176 daniel 10804: if (!ctxt->wellFormed) {
10805: if (ctxt->errNo == 0)
10806: ret = 1;
10807: else
10808: ret = ctxt->errNo;
10809: } else {
10810: if (list != NULL) {
10811: xmlNodePtr cur;
1.175 daniel 10812:
1.176 daniel 10813: /*
10814: * Return the newly created nodeset after unlinking it from
10815: * they pseudo parent.
10816: */
10817: cur = newDoc->children->children;
10818: *list = cur;
10819: while (cur != NULL) {
10820: cur->parent = NULL;
10821: cur = cur->next;
10822: }
10823: newDoc->children->children = NULL;
10824: }
10825: ret = 0;
1.175 daniel 10826: }
1.176 daniel 10827: if (sax != NULL)
10828: ctxt->sax = oldsax;
1.175 daniel 10829: xmlFreeParserCtxt(ctxt);
10830: newDoc->intSubset = NULL;
10831: newDoc->extSubset = NULL;
1.176 daniel 10832: xmlFreeDoc(newDoc);
1.175 daniel 10833:
1.176 daniel 10834: return(ret);
1.144 daniel 10835: }
10836:
10837: /**
1.181 daniel 10838: * xmlParseBalancedChunkFile:
1.144 daniel 10839: * @doc: the document the chunk pertains to
10840: *
10841: * Parse a well-balanced chunk of an XML document contained in a file
10842: *
10843: * Returns the resulting list of nodes resulting from the parsing,
10844: * they are not added to @node
10845: */
10846:
10847: xmlNodePtr
10848: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 10849: /* TODO !!! */
10850: return(NULL);
1.144 daniel 10851: }
10852:
10853: /**
1.181 daniel 10854: * xmlRecoverDoc:
1.123 daniel 10855: * @cur: a pointer to an array of xmlChar
1.59 daniel 10856: *
10857: * parse an XML in-memory document and build a tree.
10858: * In the case the document is not Well Formed, a tree is built anyway
10859: *
1.68 daniel 10860: * Returns the resulting document tree
1.59 daniel 10861: */
10862:
1.69 daniel 10863: xmlDocPtr
1.123 daniel 10864: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 10865: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 10866: }
10867:
10868: /**
1.181 daniel 10869: * xmlCreateEntityParserCtxt:
10870: * @URL: the entity URL
10871: * @ID: the entity PUBLIC ID
10872: * @base: a posible base for the target URI
10873: *
10874: * Create a parser context for an external entity
10875: * Automatic support for ZLIB/Compress compressed document is provided
10876: * by default if found at compile-time.
10877: *
10878: * Returns the new parser context or NULL
10879: */
10880: xmlParserCtxtPtr
10881: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10882: const xmlChar *base) {
10883: xmlParserCtxtPtr ctxt;
10884: xmlParserInputPtr inputStream;
10885: char *directory = NULL;
1.210 veillard 10886: xmlChar *uri;
10887:
1.181 daniel 10888: ctxt = xmlNewParserCtxt();
10889: if (ctxt == NULL) {
10890: return(NULL);
10891: }
10892:
1.210 veillard 10893: uri = xmlBuildURI(URL, base);
10894:
10895: if (uri == NULL) {
10896: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10897: if (inputStream == NULL) {
10898: xmlFreeParserCtxt(ctxt);
10899: return(NULL);
10900: }
10901:
10902: inputPush(ctxt, inputStream);
10903:
10904: if ((ctxt->directory == NULL) && (directory == NULL))
10905: directory = xmlParserGetDirectory((char *)URL);
10906: if ((ctxt->directory == NULL) && (directory != NULL))
10907: ctxt->directory = directory;
10908: } else {
10909: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10910: if (inputStream == NULL) {
10911: xmlFreeParserCtxt(ctxt);
10912: return(NULL);
10913: }
1.181 daniel 10914:
1.210 veillard 10915: inputPush(ctxt, inputStream);
1.181 daniel 10916:
1.210 veillard 10917: if ((ctxt->directory == NULL) && (directory == NULL))
10918: directory = xmlParserGetDirectory((char *)uri);
10919: if ((ctxt->directory == NULL) && (directory != NULL))
10920: ctxt->directory = directory;
10921: xmlFree(uri);
10922: }
1.181 daniel 10923:
10924: return(ctxt);
10925: }
10926:
10927: /**
10928: * xmlCreateFileParserCtxt:
1.50 daniel 10929: * @filename: the filename
10930: *
1.69 daniel 10931: * Create a parser context for a file content.
10932: * Automatic support for ZLIB/Compress compressed document is provided
10933: * by default if found at compile-time.
1.50 daniel 10934: *
1.69 daniel 10935: * Returns the new parser context or NULL
1.9 httpng 10936: */
1.69 daniel 10937: xmlParserCtxtPtr
10938: xmlCreateFileParserCtxt(const char *filename)
10939: {
10940: xmlParserCtxtPtr ctxt;
1.40 daniel 10941: xmlParserInputPtr inputStream;
1.91 daniel 10942: xmlParserInputBufferPtr buf;
1.111 daniel 10943: char *directory = NULL;
1.9 httpng 10944:
1.91 daniel 10945: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10946: if (buf == NULL) return(NULL);
1.9 httpng 10947:
1.97 daniel 10948: ctxt = xmlNewParserCtxt();
1.16 daniel 10949: if (ctxt == NULL) {
10950: return(NULL);
10951: }
1.97 daniel 10952:
1.96 daniel 10953: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10954: if (inputStream == NULL) {
1.97 daniel 10955: xmlFreeParserCtxt(ctxt);
1.40 daniel 10956: return(NULL);
10957: }
10958:
1.119 daniel 10959: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10960: inputStream->buf = buf;
10961: inputStream->base = inputStream->buf->buffer->content;
10962: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10963:
1.40 daniel 10964: inputPush(ctxt, inputStream);
1.110 daniel 10965: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10966: directory = xmlParserGetDirectory(filename);
10967: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10968: ctxt->directory = directory;
1.106 daniel 10969:
1.69 daniel 10970: return(ctxt);
10971: }
10972:
10973: /**
1.181 daniel 10974: * xmlSAXParseFile:
1.69 daniel 10975: * @sax: the SAX handler block
10976: * @filename: the filename
10977: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10978: * documents
10979: *
10980: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10981: * compressed document is provided by default if found at compile-time.
10982: * It use the given SAX function block to handle the parsing callback.
10983: * If sax is NULL, fallback to the default DOM tree building routines.
10984: *
10985: * Returns the resulting document tree
10986: */
10987:
1.79 daniel 10988: xmlDocPtr
10989: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10990: int recovery) {
10991: xmlDocPtr ret;
10992: xmlParserCtxtPtr ctxt;
1.111 daniel 10993: char *directory = NULL;
1.69 daniel 10994:
10995: ctxt = xmlCreateFileParserCtxt(filename);
10996: if (ctxt == NULL) return(NULL);
1.74 daniel 10997: if (sax != NULL) {
1.93 veillard 10998: if (ctxt->sax != NULL)
1.119 daniel 10999: xmlFree(ctxt->sax);
1.74 daniel 11000: ctxt->sax = sax;
11001: ctxt->userData = NULL;
11002: }
1.106 daniel 11003:
1.110 daniel 11004: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 11005: directory = xmlParserGetDirectory(filename);
11006: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 11007: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 11008:
11009: xmlParseDocument(ctxt);
1.40 daniel 11010:
1.72 daniel 11011: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 11012: else {
11013: ret = NULL;
1.72 daniel 11014: xmlFreeDoc(ctxt->myDoc);
11015: ctxt->myDoc = NULL;
1.59 daniel 11016: }
1.86 daniel 11017: if (sax != NULL)
11018: ctxt->sax = NULL;
1.69 daniel 11019: xmlFreeParserCtxt(ctxt);
1.20 daniel 11020:
11021: return(ret);
11022: }
11023:
1.55 daniel 11024: /**
1.181 daniel 11025: * xmlParseFile:
1.55 daniel 11026: * @filename: the filename
11027: *
11028: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11029: * compressed document is provided by default if found at compile-time.
11030: *
1.68 daniel 11031: * Returns the resulting document tree
1.55 daniel 11032: */
11033:
1.79 daniel 11034: xmlDocPtr
11035: xmlParseFile(const char *filename) {
1.59 daniel 11036: return(xmlSAXParseFile(NULL, filename, 0));
11037: }
11038:
11039: /**
1.181 daniel 11040: * xmlRecoverFile:
1.59 daniel 11041: * @filename: the filename
11042: *
11043: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11044: * compressed document is provided by default if found at compile-time.
11045: * In the case the document is not Well Formed, a tree is built anyway
11046: *
1.68 daniel 11047: * Returns the resulting document tree
1.59 daniel 11048: */
11049:
1.79 daniel 11050: xmlDocPtr
11051: xmlRecoverFile(const char *filename) {
1.59 daniel 11052: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 11053: }
1.32 daniel 11054:
1.50 daniel 11055: /**
1.181 daniel 11056: * xmlCreateMemoryParserCtxt:
11057: * @buffer: a pointer to a zero terminated char array
11058: * @size: the size of the array (without the trailing 0)
1.50 daniel 11059: *
1.69 daniel 11060: * Create a parser context for an XML in-memory document.
1.50 daniel 11061: *
1.69 daniel 11062: * Returns the new parser context or NULL
1.20 daniel 11063: */
1.69 daniel 11064: xmlParserCtxtPtr
11065: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 11066: xmlParserCtxtPtr ctxt;
1.40 daniel 11067: xmlParserInputPtr input;
1.209 veillard 11068: xmlParserInputBufferPtr buf;
1.40 daniel 11069:
1.179 daniel 11070: if (buffer[size] != 0)
1.181 daniel 11071: return(NULL);
1.40 daniel 11072:
1.97 daniel 11073: ctxt = xmlNewParserCtxt();
1.181 daniel 11074: if (ctxt == NULL)
1.20 daniel 11075: return(NULL);
1.97 daniel 11076:
1.209 veillard 11077: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
11078: if (buf == NULL) return(NULL);
11079:
1.96 daniel 11080: input = xmlNewInputStream(ctxt);
1.40 daniel 11081: if (input == NULL) {
1.97 daniel 11082: xmlFreeParserCtxt(ctxt);
1.40 daniel 11083: return(NULL);
11084: }
1.20 daniel 11085:
1.40 daniel 11086: input->filename = NULL;
1.209 veillard 11087: input->buf = buf;
11088: input->base = input->buf->buffer->content;
11089: input->cur = input->buf->buffer->content;
1.20 daniel 11090:
1.40 daniel 11091: inputPush(ctxt, input);
1.69 daniel 11092: return(ctxt);
11093: }
11094:
11095: /**
1.181 daniel 11096: * xmlSAXParseMemory:
1.69 daniel 11097: * @sax: the SAX handler block
11098: * @buffer: an pointer to a char array
1.127 daniel 11099: * @size: the size of the array
11100: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 11101: * documents
11102: *
11103: * parse an XML in-memory block and use the given SAX function block
11104: * to handle the parsing callback. If sax is NULL, fallback to the default
11105: * DOM tree building routines.
11106: *
11107: * Returns the resulting document tree
11108: */
11109: xmlDocPtr
11110: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
11111: xmlDocPtr ret;
11112: xmlParserCtxtPtr ctxt;
11113:
11114: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11115: if (ctxt == NULL) return(NULL);
1.74 daniel 11116: if (sax != NULL) {
11117: ctxt->sax = sax;
11118: ctxt->userData = NULL;
11119: }
1.20 daniel 11120:
11121: xmlParseDocument(ctxt);
1.40 daniel 11122:
1.72 daniel 11123: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 11124: else {
11125: ret = NULL;
1.72 daniel 11126: xmlFreeDoc(ctxt->myDoc);
11127: ctxt->myDoc = NULL;
1.59 daniel 11128: }
1.86 daniel 11129: if (sax != NULL)
11130: ctxt->sax = NULL;
1.69 daniel 11131: xmlFreeParserCtxt(ctxt);
1.16 daniel 11132:
1.9 httpng 11133: return(ret);
1.17 daniel 11134: }
11135:
1.55 daniel 11136: /**
1.181 daniel 11137: * xmlParseMemory:
1.68 daniel 11138: * @buffer: an pointer to a char array
1.55 daniel 11139: * @size: the size of the array
11140: *
11141: * parse an XML in-memory block and build a tree.
11142: *
1.68 daniel 11143: * Returns the resulting document tree
1.55 daniel 11144: */
11145:
11146: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 11147: return(xmlSAXParseMemory(NULL, buffer, size, 0));
11148: }
11149:
11150: /**
1.181 daniel 11151: * xmlRecoverMemory:
1.68 daniel 11152: * @buffer: an pointer to a char array
1.59 daniel 11153: * @size: the size of the array
11154: *
11155: * parse an XML in-memory block and build a tree.
11156: * In the case the document is not Well Formed, a tree is built anyway
11157: *
1.68 daniel 11158: * Returns the resulting document tree
1.59 daniel 11159: */
11160:
11161: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
11162: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 11163: }
11164:
11165:
1.50 daniel 11166: /**
11167: * xmlSetupParserForBuffer:
11168: * @ctxt: an XML parser context
1.123 daniel 11169: * @buffer: a xmlChar * buffer
1.50 daniel 11170: * @filename: a file name
11171: *
1.19 daniel 11172: * Setup the parser context to parse a new buffer; Clears any prior
11173: * contents from the parser context. The buffer parameter must not be
11174: * NULL, but the filename parameter can be
11175: */
1.55 daniel 11176: void
1.123 daniel 11177: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 11178: const char* filename)
11179: {
1.96 daniel 11180: xmlParserInputPtr input;
1.40 daniel 11181:
1.96 daniel 11182: input = xmlNewInputStream(ctxt);
11183: if (input == NULL) {
11184: perror("malloc");
1.119 daniel 11185: xmlFree(ctxt);
1.145 daniel 11186: return;
1.96 daniel 11187: }
11188:
11189: xmlClearParserCtxt(ctxt);
11190: if (filename != NULL)
1.119 daniel 11191: input->filename = xmlMemStrdup(filename);
1.96 daniel 11192: input->base = buffer;
11193: input->cur = buffer;
11194: inputPush(ctxt, input);
1.17 daniel 11195: }
11196:
1.123 daniel 11197: /**
11198: * xmlSAXUserParseFile:
11199: * @sax: a SAX handler
11200: * @user_data: The user data returned on SAX callbacks
11201: * @filename: a file name
11202: *
11203: * parse an XML file and call the given SAX handler routines.
11204: * Automatic support for ZLIB/Compress compressed document is provided
11205: *
11206: * Returns 0 in case of success or a error number otherwise
11207: */
1.131 daniel 11208: int
11209: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11210: const char *filename) {
1.123 daniel 11211: int ret = 0;
11212: xmlParserCtxtPtr ctxt;
11213:
11214: ctxt = xmlCreateFileParserCtxt(filename);
11215: if (ctxt == NULL) return -1;
1.134 daniel 11216: if (ctxt->sax != &xmlDefaultSAXHandler)
11217: xmlFree(ctxt->sax);
1.123 daniel 11218: ctxt->sax = sax;
1.140 daniel 11219: if (user_data != NULL)
11220: ctxt->userData = user_data;
1.123 daniel 11221:
11222: xmlParseDocument(ctxt);
11223:
11224: if (ctxt->wellFormed)
11225: ret = 0;
11226: else {
11227: if (ctxt->errNo != 0)
11228: ret = ctxt->errNo;
11229: else
11230: ret = -1;
11231: }
11232: if (sax != NULL)
11233: ctxt->sax = NULL;
11234: xmlFreeParserCtxt(ctxt);
11235:
11236: return ret;
11237: }
11238:
11239: /**
11240: * xmlSAXUserParseMemory:
11241: * @sax: a SAX handler
11242: * @user_data: The user data returned on SAX callbacks
11243: * @buffer: an in-memory XML document input
1.127 daniel 11244: * @size: the length of the XML document in bytes
1.123 daniel 11245: *
11246: * A better SAX parsing routine.
11247: * parse an XML in-memory buffer and call the given SAX handler routines.
11248: *
11249: * Returns 0 in case of success or a error number otherwise
11250: */
11251: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
11252: char *buffer, int size) {
11253: int ret = 0;
11254: xmlParserCtxtPtr ctxt;
1.216 veillard 11255: xmlSAXHandlerPtr oldsax;
1.123 daniel 11256:
11257: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11258: if (ctxt == NULL) return -1;
1.216 veillard 11259: if (sax != NULL) {
11260: oldsax = ctxt->sax;
11261: ctxt->sax = sax;
11262: }
1.123 daniel 11263: ctxt->userData = user_data;
11264:
11265: xmlParseDocument(ctxt);
11266:
11267: if (ctxt->wellFormed)
11268: ret = 0;
11269: else {
11270: if (ctxt->errNo != 0)
11271: ret = ctxt->errNo;
11272: else
11273: ret = -1;
11274: }
1.216 veillard 11275: if (sax != NULL) {
11276: ctxt->sax = oldsax;
11277: }
1.123 daniel 11278: xmlFreeParserCtxt(ctxt);
11279:
11280: return ret;
11281: }
11282:
1.32 daniel 11283:
1.98 daniel 11284: /************************************************************************
11285: * *
1.127 daniel 11286: * Miscellaneous *
1.98 daniel 11287: * *
11288: ************************************************************************/
11289:
1.132 daniel 11290: /**
11291: * xmlCleanupParser:
11292: *
11293: * Cleanup function for the XML parser. It tries to reclaim all
11294: * parsing related global memory allocated for the parser processing.
11295: * It doesn't deallocate any document related memory. Calling this
11296: * function should not prevent reusing the parser.
11297: */
11298:
11299: void
11300: xmlCleanupParser(void) {
11301: xmlCleanupCharEncodingHandlers();
1.133 daniel 11302: xmlCleanupPredefinedEntities();
1.132 daniel 11303: }
1.98 daniel 11304:
1.50 daniel 11305: /**
11306: * xmlParserFindNodeInfo:
11307: * @ctxt: an XML parser context
11308: * @node: an XML node within the tree
11309: *
11310: * Find the parser node info struct for a given node
11311: *
1.68 daniel 11312: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 11313: */
11314: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
11315: const xmlNode* node)
11316: {
11317: unsigned long pos;
11318:
11319: /* Find position where node should be at */
11320: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
11321: if ( ctx->node_seq.buffer[pos].node == node )
11322: return &ctx->node_seq.buffer[pos];
11323: else
11324: return NULL;
11325: }
11326:
11327:
1.50 daniel 11328: /**
1.181 daniel 11329: * xmlInitNodeInfoSeq:
1.50 daniel 11330: * @seq: a node info sequence pointer
11331: *
11332: * -- Initialize (set to initial state) node info sequence
1.32 daniel 11333: */
1.55 daniel 11334: void
11335: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 11336: {
11337: seq->length = 0;
11338: seq->maximum = 0;
11339: seq->buffer = NULL;
11340: }
11341:
1.50 daniel 11342: /**
1.181 daniel 11343: * xmlClearNodeInfoSeq:
1.50 daniel 11344: * @seq: a node info sequence pointer
11345: *
11346: * -- Clear (release memory and reinitialize) node
1.32 daniel 11347: * info sequence
11348: */
1.55 daniel 11349: void
11350: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 11351: {
11352: if ( seq->buffer != NULL )
1.119 daniel 11353: xmlFree(seq->buffer);
1.32 daniel 11354: xmlInitNodeInfoSeq(seq);
11355: }
11356:
11357:
1.50 daniel 11358: /**
11359: * xmlParserFindNodeInfoIndex:
11360: * @seq: a node info sequence pointer
11361: * @node: an XML node pointer
11362: *
11363: *
1.32 daniel 11364: * xmlParserFindNodeInfoIndex : Find the index that the info record for
11365: * the given node is or should be at in a sorted sequence
1.68 daniel 11366: *
11367: * Returns a long indicating the position of the record
1.32 daniel 11368: */
11369: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
11370: const xmlNode* node)
11371: {
11372: unsigned long upper, lower, middle;
11373: int found = 0;
11374:
11375: /* Do a binary search for the key */
11376: lower = 1;
11377: upper = seq->length;
11378: middle = 0;
11379: while ( lower <= upper && !found) {
11380: middle = lower + (upper - lower) / 2;
11381: if ( node == seq->buffer[middle - 1].node )
11382: found = 1;
11383: else if ( node < seq->buffer[middle - 1].node )
11384: upper = middle - 1;
11385: else
11386: lower = middle + 1;
11387: }
11388:
11389: /* Return position */
11390: if ( middle == 0 || seq->buffer[middle - 1].node < node )
11391: return middle;
11392: else
11393: return middle - 1;
11394: }
11395:
11396:
1.50 daniel 11397: /**
11398: * xmlParserAddNodeInfo:
11399: * @ctxt: an XML parser context
1.68 daniel 11400: * @info: a node info sequence pointer
1.50 daniel 11401: *
11402: * Insert node info record into the sorted sequence
1.32 daniel 11403: */
1.55 daniel 11404: void
11405: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 11406: const xmlParserNodeInfo* info)
1.32 daniel 11407: {
11408: unsigned long pos;
11409: static unsigned int block_size = 5;
11410:
11411: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 11412: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
11413: if ( pos < ctxt->node_seq.length
11414: && ctxt->node_seq.buffer[pos].node == info->node ) {
11415: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 11416: }
11417:
11418: /* Otherwise, we need to add new node to buffer */
11419: else {
11420: /* Expand buffer by 5 if needed */
1.55 daniel 11421: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 11422: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 11423: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
11424: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 11425:
1.55 daniel 11426: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 11427: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 11428: else
1.119 daniel 11429: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 11430:
11431: if ( tmp_buffer == NULL ) {
1.55 daniel 11432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 11433: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 11434: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 11435: return;
11436: }
1.55 daniel 11437: ctxt->node_seq.buffer = tmp_buffer;
11438: ctxt->node_seq.maximum += block_size;
1.32 daniel 11439: }
11440:
11441: /* If position is not at end, move elements out of the way */
1.55 daniel 11442: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 11443: unsigned long i;
11444:
1.55 daniel 11445: for ( i = ctxt->node_seq.length; i > pos; i-- )
11446: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 11447: }
11448:
11449: /* Copy element and increase length */
1.55 daniel 11450: ctxt->node_seq.buffer[pos] = *info;
11451: ctxt->node_seq.length++;
1.32 daniel 11452: }
11453: }
1.77 daniel 11454:
1.98 daniel 11455:
11456: /**
1.181 daniel 11457: * xmlSubstituteEntitiesDefault:
1.98 daniel 11458: * @val: int 0 or 1
11459: *
11460: * Set and return the previous value for default entity support.
11461: * Initially the parser always keep entity references instead of substituting
11462: * entity values in the output. This function has to be used to change the
11463: * default parser behaviour
11464: * SAX::subtituteEntities() has to be used for changing that on a file by
11465: * file basis.
11466: *
11467: * Returns the last value for 0 for no substitution, 1 for substitution.
11468: */
11469:
11470: int
11471: xmlSubstituteEntitiesDefault(int val) {
11472: int old = xmlSubstituteEntitiesDefaultValue;
11473:
11474: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 11475: return(old);
11476: }
11477:
11478: /**
11479: * xmlKeepBlanksDefault:
11480: * @val: int 0 or 1
11481: *
11482: * Set and return the previous value for default blanks text nodes support.
11483: * The 1.x version of the parser used an heuristic to try to detect
11484: * ignorable white spaces. As a result the SAX callback was generating
11485: * ignorableWhitespace() callbacks instead of characters() one, and when
11486: * using the DOM output text nodes containing those blanks were not generated.
11487: * The 2.x and later version will switch to the XML standard way and
11488: * ignorableWhitespace() are only generated when running the parser in
11489: * validating mode and when the current element doesn't allow CDATA or
11490: * mixed content.
11491: * This function is provided as a way to force the standard behaviour
11492: * on 1.X libs and to switch back to the old mode for compatibility when
11493: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
11494: * by using xmlIsBlankNode() commodity function to detect the "empty"
11495: * nodes generated.
11496: * This value also affect autogeneration of indentation when saving code
11497: * if blanks sections are kept, indentation is not generated.
11498: *
11499: * Returns the last value for 0 for no substitution, 1 for substitution.
11500: */
11501:
11502: int
11503: xmlKeepBlanksDefault(int val) {
11504: int old = xmlKeepBlanksDefaultValue;
11505:
11506: xmlKeepBlanksDefaultValue = val;
11507: xmlIndentTreeOutput = !val;
1.98 daniel 11508: return(old);
11509: }
1.77 daniel 11510:
Webmaster