Annotation of XML/parser.c, revision 1.216
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.204 veillard 16: #include <string.h>
1.121 daniel 17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.188 daniel 36: #include <libxml/xmlmemory.h>
37: #include <libxml/tree.h>
38: #include <libxml/parser.h>
39: #include <libxml/entities.h>
40: #include <libxml/encoding.h>
41: #include <libxml/valid.h>
42: #include <libxml/parserInternals.h>
43: #include <libxml/xmlIO.h>
1.193 daniel 44: #include <libxml/uri.h>
1.122 daniel 45: #include "xml-error.h"
1.1 veillard 46:
1.140 daniel 47: #define XML_PARSER_BIG_BUFFER_SIZE 1000
48: #define XML_PARSER_BUFFER_SIZE 100
49:
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.214 veillard 65: /************************************************************************
66: * *
67: * Version and Features handling *
68: * *
69: ************************************************************************/
1.200 daniel 70: const char *xmlParserVersion = LIBXML_VERSION_STRING;
71:
72: /*
73: * xmlCheckVersion:
74: * @version: the include version number
75: *
76: * check the compiled lib version against the include one.
77: * This can warn or immediately kill the application
78: */
79: void
80: xmlCheckVersion(int version) {
1.202 daniel 81: int myversion = (int) LIBXML_VERSION;
1.200 daniel 82:
83: if ((myversion / 10000) != (version / 10000)) {
84: fprintf(stderr,
85: "Fatal: program compiled against libxml %d using libxml %d\n",
86: (version / 10000), (myversion / 10000));
87: exit(1);
88: }
89: if ((myversion / 100) < (version / 100)) {
90: fprintf(stderr,
91: "Warning: program compiled against libxml %d using older %d\n",
92: (version / 100), (myversion / 100));
93: }
94: }
95:
96:
1.214 veillard 97: const char *xmlFeaturesList[] = {
98: "validate",
99: "keep blanks",
100: "disable SAX",
101: "fetch external entities",
102: "substitute entities",
103: "gather line info",
104: "user data",
105: "is html",
106: "is standalone",
107: "stop parser",
108: "document",
109: "is well formed",
110: "is valid",
111: "SAX block",
112: "SAX function internalSubset",
113: "SAX function isStandalone",
114: "SAX function hasInternalSubset",
115: "SAX function hasExternalSubset",
116: "SAX function resolveEntity",
117: "SAX function getEntity",
118: "SAX function entityDecl",
119: "SAX function notationDecl",
120: "SAX function attributeDecl",
121: "SAX function elementDecl",
122: "SAX function unparsedEntityDecl",
123: "SAX function setDocumentLocator",
124: "SAX function startDocument",
125: "SAX function endDocument",
126: "SAX function startElement",
127: "SAX function endElement",
128: "SAX function reference",
129: "SAX function characters",
130: "SAX function ignorableWhitespace",
131: "SAX function processingInstruction",
132: "SAX function comment",
133: "SAX function warning",
134: "SAX function error",
135: "SAX function fatalError",
136: "SAX function getParameterEntity",
137: "SAX function cdataBlock",
138: "SAX function externalSubset",
139: };
140:
141: /*
142: * xmlGetFeaturesList:
143: * @len: the length of the features name array (input/output)
144: * @result: an array of string to be filled with the features name.
145: *
146: * Copy at most *@len feature names into the @result array
147: *
148: * Returns -1 in case or error, or the total number of features,
149: * len is updated with the number of strings copied,
150: * strings must not be deallocated
151: */
152: int
153: xmlGetFeaturesList(int *len, const char **result) {
154: int ret, i;
155:
156: ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
157: if ((len == NULL) || (result == NULL))
158: return(ret);
159: if ((*len < 0) || (*len >= 1000))
160: return(-1);
161: if (*len > ret)
162: *len = ret;
163: for (i = 0;i < *len;i++)
164: result[i] = xmlFeaturesList[i];
165: return(ret);
166: }
167:
168: /*
169: * xmlGetFeature:
170: * @ctxt: an XML/HTML parser context
171: * @name: the feature name
172: * @result: location to store the result
173: *
174: * Read the current value of one feature of this parser instance
175: *
176: * Returns -1 in case or error, 0 otherwise
177: */
178: int
179: xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
180: if ((ctxt == NULL) || (name == NULL) || (result == NULL))
181: return(-1);
182:
183: if (!strcmp(name, "validate")) {
184: *((int *) result) = ctxt->validate;
185: } else if (!strcmp(name, "keep blanks")) {
186: *((int *) result) = ctxt->keepBlanks;
187: } else if (!strcmp(name, "disable SAX")) {
188: *((int *) result) = ctxt->disableSAX;
189: } else if (!strcmp(name, "fetch external entities")) {
190: *((int *) result) = ctxt->validate;
191: } else if (!strcmp(name, "substitute entities")) {
192: *((int *) result) = ctxt->replaceEntities;
193: } else if (!strcmp(name, "gather line info")) {
194: *((int *) result) = ctxt->record_info;
195: } else if (!strcmp(name, "user data")) {
196: *((void **)result) = ctxt->userData;
197: } else if (!strcmp(name, "is html")) {
198: *((int *) result) = ctxt->html;
199: } else if (!strcmp(name, "is standalone")) {
200: *((int *) result) = ctxt->standalone;
201: } else if (!strcmp(name, "document")) {
202: *((xmlDocPtr *) result) = ctxt->myDoc;
203: } else if (!strcmp(name, "is well formed")) {
204: *((int *) result) = ctxt->wellFormed;
205: } else if (!strcmp(name, "is valid")) {
206: *((int *) result) = ctxt->valid;
207: } else if (!strcmp(name, "SAX block")) {
208: *((xmlSAXHandlerPtr *) result) = ctxt->sax;
209: } else if (!strcmp(name, "SAX function internalSubset")) {
210: *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
211: } else if (!strcmp(name, "SAX function isStandalone")) {
212: *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
213: } else if (!strcmp(name, "SAX function hasInternalSubset")) {
214: *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
215: } else if (!strcmp(name, "SAX function hasExternalSubset")) {
216: *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
217: } else if (!strcmp(name, "SAX function resolveEntity")) {
218: *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
219: } else if (!strcmp(name, "SAX function getEntity")) {
220: *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
221: } else if (!strcmp(name, "SAX function entityDecl")) {
222: *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
223: } else if (!strcmp(name, "SAX function notationDecl")) {
224: *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
225: } else if (!strcmp(name, "SAX function attributeDecl")) {
226: *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
227: } else if (!strcmp(name, "SAX function elementDecl")) {
228: *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
229: } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
230: *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
231: } else if (!strcmp(name, "SAX function setDocumentLocator")) {
232: *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
233: } else if (!strcmp(name, "SAX function startDocument")) {
234: *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
235: } else if (!strcmp(name, "SAX function endDocument")) {
236: *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
237: } else if (!strcmp(name, "SAX function startElement")) {
238: *((startElementSAXFunc *) result) = ctxt->sax->startElement;
239: } else if (!strcmp(name, "SAX function endElement")) {
240: *((endElementSAXFunc *) result) = ctxt->sax->endElement;
241: } else if (!strcmp(name, "SAX function reference")) {
242: *((referenceSAXFunc *) result) = ctxt->sax->reference;
243: } else if (!strcmp(name, "SAX function characters")) {
244: *((charactersSAXFunc *) result) = ctxt->sax->characters;
245: } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
246: *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
247: } else if (!strcmp(name, "SAX function processingInstruction")) {
248: *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
249: } else if (!strcmp(name, "SAX function comment")) {
250: *((commentSAXFunc *) result) = ctxt->sax->comment;
251: } else if (!strcmp(name, "SAX function warning")) {
252: *((warningSAXFunc *) result) = ctxt->sax->warning;
253: } else if (!strcmp(name, "SAX function error")) {
254: *((errorSAXFunc *) result) = ctxt->sax->error;
255: } else if (!strcmp(name, "SAX function fatalError")) {
256: *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
257: } else if (!strcmp(name, "SAX function getParameterEntity")) {
258: *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
259: } else if (!strcmp(name, "SAX function cdataBlock")) {
260: *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
261: } else if (!strcmp(name, "SAX function externalSubset")) {
262: *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
263: } else {
264: return(-1);
265: }
266: return(0);
267: }
268:
269: /*
270: * xmlSetFeature:
271: * @ctxt: an XML/HTML parser context
272: * @name: the feature name
273: * @value: pointer to the location of the new value
274: *
275: * Change the current value of one feature of this parser instance
276: *
277: * Returns -1 in case or error, 0 otherwise
278: */
279: int
280: xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
281: if ((ctxt == NULL) || (name == NULL) || (value == NULL))
282: return(-1);
283:
284: if (!strcmp(name, "validate")) {
1.215 veillard 285: ctxt->validate = *((int *) value);
1.214 veillard 286: } else if (!strcmp(name, "keep blanks")) {
1.215 veillard 287: ctxt->keepBlanks = *((int *) value);
1.214 veillard 288: } else if (!strcmp(name, "disable SAX")) {
1.215 veillard 289: ctxt->disableSAX = *((int *) value);
1.214 veillard 290: } else if (!strcmp(name, "fetch external entities")) {
1.215 veillard 291: int newvalid = *((int *) value);
292: if ((!ctxt->validate) && (newvalid != 0)) {
293: if (ctxt->vctxt.warning == NULL)
294: ctxt->vctxt.warning = xmlParserValidityWarning;
295: if (ctxt->vctxt.error == NULL)
296: ctxt->vctxt.error = xmlParserValidityError;
297: /* Allocate the Node stack */
298: ctxt->vctxt.nodeTab = (xmlNodePtr *)
299: xmlMalloc(4 * sizeof(xmlNodePtr));
300: ctxt->vctxt.nodeNr = 0;
301: ctxt->vctxt.nodeMax = 4;
302: ctxt->vctxt.node = NULL;
303: }
304: ctxt->validate = newvalid;
1.214 veillard 305: } else if (!strcmp(name, "substitute entities")) {
1.215 veillard 306: ctxt->replaceEntities = *((int *) value);
1.214 veillard 307: } else if (!strcmp(name, "gather line info")) {
1.215 veillard 308: ctxt->record_info = *((int *) value);
1.214 veillard 309: } else if (!strcmp(name, "user data")) {
1.215 veillard 310: ctxt->userData = *((void **)value);
1.214 veillard 311: } else if (!strcmp(name, "is html")) {
1.215 veillard 312: ctxt->html = *((int *) value);
1.214 veillard 313: } else if (!strcmp(name, "is standalone")) {
1.215 veillard 314: ctxt->standalone = *((int *) value);
1.214 veillard 315: } else if (!strcmp(name, "document")) {
1.215 veillard 316: ctxt->myDoc = *((xmlDocPtr *) value);
1.214 veillard 317: } else if (!strcmp(name, "is well formed")) {
1.215 veillard 318: ctxt->wellFormed = *((int *) value);
1.214 veillard 319: } else if (!strcmp(name, "is valid")) {
1.215 veillard 320: ctxt->valid = *((int *) value);
1.214 veillard 321: } else if (!strcmp(name, "SAX block")) {
1.215 veillard 322: ctxt->sax = *((xmlSAXHandlerPtr *) value);
1.214 veillard 323: } else if (!strcmp(name, "SAX function internalSubset")) {
324: ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
325: } else if (!strcmp(name, "SAX function isStandalone")) {
326: ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
327: } else if (!strcmp(name, "SAX function hasInternalSubset")) {
328: ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
329: } else if (!strcmp(name, "SAX function hasExternalSubset")) {
330: ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
331: } else if (!strcmp(name, "SAX function resolveEntity")) {
332: ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
333: } else if (!strcmp(name, "SAX function getEntity")) {
334: ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
335: } else if (!strcmp(name, "SAX function entityDecl")) {
336: ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
337: } else if (!strcmp(name, "SAX function notationDecl")) {
338: ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
339: } else if (!strcmp(name, "SAX function attributeDecl")) {
340: ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
341: } else if (!strcmp(name, "SAX function elementDecl")) {
342: ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
343: } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
344: ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
345: } else if (!strcmp(name, "SAX function setDocumentLocator")) {
346: ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
347: } else if (!strcmp(name, "SAX function startDocument")) {
348: ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
349: } else if (!strcmp(name, "SAX function endDocument")) {
350: ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
351: } else if (!strcmp(name, "SAX function startElement")) {
352: ctxt->sax->startElement = *((startElementSAXFunc *) value);
353: } else if (!strcmp(name, "SAX function endElement")) {
354: ctxt->sax->endElement = *((endElementSAXFunc *) value);
355: } else if (!strcmp(name, "SAX function reference")) {
356: ctxt->sax->reference = *((referenceSAXFunc *) value);
357: } else if (!strcmp(name, "SAX function characters")) {
358: ctxt->sax->characters = *((charactersSAXFunc *) value);
359: } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
360: ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
361: } else if (!strcmp(name, "SAX function processingInstruction")) {
362: ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
363: } else if (!strcmp(name, "SAX function comment")) {
364: ctxt->sax->comment = *((commentSAXFunc *) value);
365: } else if (!strcmp(name, "SAX function warning")) {
366: ctxt->sax->warning = *((warningSAXFunc *) value);
367: } else if (!strcmp(name, "SAX function error")) {
368: ctxt->sax->error = *((errorSAXFunc *) value);
369: } else if (!strcmp(name, "SAX function fatalError")) {
370: ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
371: } else if (!strcmp(name, "SAX function getParameterEntity")) {
372: ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
373: } else if (!strcmp(name, "SAX function cdataBlock")) {
374: ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
375: } else if (!strcmp(name, "SAX function externalSubset")) {
376: ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
377: } else {
378: return(-1);
379: }
380: return(0);
381: }
382:
383:
1.91 daniel 384: /************************************************************************
385: * *
386: * Input handling functions for progressive parsing *
387: * *
388: ************************************************************************/
389:
390: /* #define DEBUG_INPUT */
1.140 daniel 391: /* #define DEBUG_STACK */
392: /* #define DEBUG_PUSH */
393:
1.91 daniel 394:
1.110 daniel 395: #define INPUT_CHUNK 250
396: /* we need to keep enough input to show errors in context */
397: #define LINE_LEN 80
1.91 daniel 398:
399: #ifdef DEBUG_INPUT
400: #define CHECK_BUFFER(in) check_buffer(in)
401:
402: void check_buffer(xmlParserInputPtr in) {
403: if (in->base != in->buf->buffer->content) {
404: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
405: }
406: if (in->cur < in->base) {
407: fprintf(stderr, "xmlParserInput: cur < base problem\n");
408: }
409: if (in->cur > in->base + in->buf->buffer->use) {
410: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
411: }
412: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
413: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
414: in->buf->buffer->use, in->buf->buffer->size);
415: }
416:
1.110 daniel 417: #else
418: #define CHECK_BUFFER(in)
419: #endif
420:
1.91 daniel 421:
422: /**
423: * xmlParserInputRead:
424: * @in: an XML parser input
425: * @len: an indicative size for the lookahead
426: *
427: * This function refresh the input for the parser. It doesn't try to
428: * preserve pointers to the input buffer, and discard already read data
429: *
1.123 daniel 430: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 431: * end of this entity
432: */
433: int
434: xmlParserInputRead(xmlParserInputPtr in, int len) {
435: int ret;
436: int used;
437: int index;
438:
439: #ifdef DEBUG_INPUT
440: fprintf(stderr, "Read\n");
441: #endif
442: if (in->buf == NULL) return(-1);
443: if (in->base == NULL) return(-1);
444: if (in->cur == NULL) return(-1);
445: if (in->buf->buffer == NULL) return(-1);
446:
447: CHECK_BUFFER(in);
448:
449: used = in->cur - in->buf->buffer->content;
450: ret = xmlBufferShrink(in->buf->buffer, used);
451: if (ret > 0) {
452: in->cur -= ret;
453: in->consumed += ret;
454: }
455: ret = xmlParserInputBufferRead(in->buf, len);
456: if (in->base != in->buf->buffer->content) {
457: /*
458: * the buffer has been realloced
459: */
460: index = in->cur - in->base;
461: in->base = in->buf->buffer->content;
462: in->cur = &in->buf->buffer->content[index];
463: }
464:
465: CHECK_BUFFER(in);
466:
467: return(ret);
468: }
469:
470: /**
471: * xmlParserInputGrow:
472: * @in: an XML parser input
473: * @len: an indicative size for the lookahead
474: *
475: * This function increase the input for the parser. It tries to
476: * preserve pointers to the input buffer, and keep already read data
477: *
1.123 daniel 478: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 479: * end of this entity
480: */
481: int
482: xmlParserInputGrow(xmlParserInputPtr in, int len) {
483: int ret;
484: int index;
485:
486: #ifdef DEBUG_INPUT
487: fprintf(stderr, "Grow\n");
488: #endif
489: if (in->buf == NULL) return(-1);
490: if (in->base == NULL) return(-1);
491: if (in->cur == NULL) return(-1);
492: if (in->buf->buffer == NULL) return(-1);
493:
494: CHECK_BUFFER(in);
495:
496: index = in->cur - in->base;
1.202 daniel 497: if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
1.91 daniel 498:
499: CHECK_BUFFER(in);
500:
501: return(0);
502: }
1.189 daniel 503: if (in->buf->readcallback != NULL)
1.140 daniel 504: ret = xmlParserInputBufferGrow(in->buf, len);
505: else
506: return(0);
1.135 daniel 507:
508: /*
509: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
510: * block, but we use it really as an integer to do some
511: * pointer arithmetic. Insure will raise it as a bug but in
512: * that specific case, that's not !
513: */
1.91 daniel 514: if (in->base != in->buf->buffer->content) {
515: /*
516: * the buffer has been realloced
517: */
518: index = in->cur - in->base;
519: in->base = in->buf->buffer->content;
520: in->cur = &in->buf->buffer->content[index];
521: }
522:
523: CHECK_BUFFER(in);
524:
525: return(ret);
526: }
527:
528: /**
529: * xmlParserInputShrink:
530: * @in: an XML parser input
531: *
532: * This function removes used input for the parser.
533: */
534: void
535: xmlParserInputShrink(xmlParserInputPtr in) {
536: int used;
537: int ret;
538: int index;
539:
540: #ifdef DEBUG_INPUT
541: fprintf(stderr, "Shrink\n");
542: #endif
543: if (in->buf == NULL) return;
544: if (in->base == NULL) return;
545: if (in->cur == NULL) return;
546: if (in->buf->buffer == NULL) return;
547:
548: CHECK_BUFFER(in);
549:
550: used = in->cur - in->buf->buffer->content;
551: if (used > INPUT_CHUNK) {
1.110 daniel 552: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 553: if (ret > 0) {
554: in->cur -= ret;
555: in->consumed += ret;
556: }
557: }
558:
559: CHECK_BUFFER(in);
560:
561: if (in->buf->buffer->use > INPUT_CHUNK) {
562: return;
563: }
564: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
565: if (in->base != in->buf->buffer->content) {
566: /*
567: * the buffer has been realloced
568: */
569: index = in->cur - in->base;
570: in->base = in->buf->buffer->content;
571: in->cur = &in->buf->buffer->content[index];
572: }
573:
574: CHECK_BUFFER(in);
575: }
576:
1.45 daniel 577: /************************************************************************
578: * *
579: * Parser stacks related functions and macros *
580: * *
581: ************************************************************************/
1.79 daniel 582:
583: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 584: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 585: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 586: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
587: const xmlChar ** str);
1.79 daniel 588:
1.1 veillard 589: /*
1.40 daniel 590: * Generic function for accessing stacks in the Parser Context
1.1 veillard 591: */
592:
1.140 daniel 593: #define PUSH_AND_POP(scope, type, name) \
594: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 595: if (ctxt->name##Nr >= ctxt->name##Max) { \
596: ctxt->name##Max *= 2; \
1.204 veillard 597: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 598: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
599: if (ctxt->name##Tab == NULL) { \
1.31 daniel 600: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 601: return(0); \
1.31 daniel 602: } \
603: } \
1.40 daniel 604: ctxt->name##Tab[ctxt->name##Nr] = value; \
605: ctxt->name = value; \
606: return(ctxt->name##Nr++); \
1.31 daniel 607: } \
1.140 daniel 608: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 609: type ret; \
1.40 daniel 610: if (ctxt->name##Nr <= 0) return(0); \
611: ctxt->name##Nr--; \
1.50 daniel 612: if (ctxt->name##Nr > 0) \
613: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
614: else \
615: ctxt->name = NULL; \
1.69 daniel 616: ret = ctxt->name##Tab[ctxt->name##Nr]; \
617: ctxt->name##Tab[ctxt->name##Nr] = 0; \
618: return(ret); \
1.31 daniel 619: } \
620:
1.140 daniel 621: PUSH_AND_POP(extern, xmlParserInputPtr, input)
622: PUSH_AND_POP(extern, xmlNodePtr, node)
623: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 624:
1.176 daniel 625: int spacePush(xmlParserCtxtPtr ctxt, int val) {
626: if (ctxt->spaceNr >= ctxt->spaceMax) {
627: ctxt->spaceMax *= 2;
1.204 veillard 628: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 629: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
630: if (ctxt->spaceTab == NULL) {
631: fprintf(stderr, "realloc failed !\n");
632: return(0);
633: }
634: }
635: ctxt->spaceTab[ctxt->spaceNr] = val;
636: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
637: return(ctxt->spaceNr++);
638: }
639:
640: int spacePop(xmlParserCtxtPtr ctxt) {
641: int ret;
642: if (ctxt->spaceNr <= 0) return(0);
643: ctxt->spaceNr--;
644: if (ctxt->spaceNr > 0)
645: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
646: else
647: ctxt->space = NULL;
648: ret = ctxt->spaceTab[ctxt->spaceNr];
649: ctxt->spaceTab[ctxt->spaceNr] = -1;
650: return(ret);
651: }
652:
1.55 daniel 653: /*
654: * Macros for accessing the content. Those should be used only by the parser,
655: * and not exported.
656: *
657: * Dirty macros, i.e. one need to make assumption on the context to use them
658: *
1.123 daniel 659: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 660: * To be used with extreme caution since operations consuming
661: * characters may move the input buffer to a different location !
1.123 daniel 662: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 663: * in ISO-Latin or UTF-8.
1.151 daniel 664: * This should be used internally by the parser
1.55 daniel 665: * only to compare to ASCII values otherwise it would break when
666: * running with UTF-8 encoding.
1.123 daniel 667: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 668: * to compare on ASCII based substring.
1.123 daniel 669: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 670: * strings within the parser.
671: *
1.77 daniel 672: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 673: *
674: * NEXT Skip to the next character, this does the proper decoding
675: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 676: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 677: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 678: */
1.45 daniel 679:
1.152 daniel 680: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 681: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 682: #define NXT(val) ctxt->input->cur[(val)]
683: #define CUR_PTR ctxt->input->cur
1.154 daniel 684:
1.164 daniel 685: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
686: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 687: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
688: if ((*ctxt->input->cur == 0) && \
689: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
690: xmlPopInput(ctxt)
1.164 daniel 691:
1.97 daniel 692: #define SHRINK xmlParserInputShrink(ctxt->input); \
693: if ((*ctxt->input->cur == 0) && \
694: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
695: xmlPopInput(ctxt)
696:
697: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
698: if ((*ctxt->input->cur == 0) && \
699: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
700: xmlPopInput(ctxt)
1.55 daniel 701:
1.155 daniel 702: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 703:
1.151 daniel 704: #define NEXT xmlNextChar(ctxt);
1.154 daniel 705:
1.153 daniel 706: #define NEXTL(l) \
707: if (*(ctxt->input->cur) == '\n') { \
708: ctxt->input->line++; ctxt->input->col = 1; \
709: } else ctxt->input->col++; \
1.154 daniel 710: ctxt->token = 0; ctxt->input->cur += l; \
711: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
712: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
713:
1.152 daniel 714: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 715: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 716:
1.152 daniel 717: #define COPY_BUF(l,b,i,v) \
718: if (l == 1) b[i++] = (xmlChar) v; \
719: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 720:
721: /**
722: * xmlNextChar:
723: * @ctxt: the XML parser context
724: *
725: * Skip to the next char input char.
726: */
1.55 daniel 727:
1.151 daniel 728: void
729: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.201 daniel 730: if (ctxt->instate == XML_PARSER_EOF)
731: return;
732:
1.176 daniel 733: /*
734: * TODO: 2.11 End-of-Line Handling
735: * the literal two-character sequence "#xD#xA" or a standalone
736: * literal #xD, an XML processor must pass to the application
737: * the single character #xA.
738: */
1.151 daniel 739: if (ctxt->token != 0) ctxt->token = 0;
1.208 veillard 740: else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.151 daniel 741: if ((*ctxt->input->cur == 0) &&
742: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
743: (ctxt->instate != XML_PARSER_COMMENT)) {
744: /*
745: * If we are at the end of the current entity and
746: * the context allows it, we pop consumed entities
747: * automatically.
748: * TODO: the auto closing should be blocked in other cases
749: */
750: xmlPopInput(ctxt);
751: } else {
752: if (*(ctxt->input->cur) == '\n') {
753: ctxt->input->line++; ctxt->input->col = 1;
754: } else ctxt->input->col++;
1.198 daniel 755: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.151 daniel 756: /*
757: * We are supposed to handle UTF8, check it's valid
758: * From rfc2044: encoding of the Unicode values on UTF-8:
759: *
760: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
761: * 0000 0000-0000 007F 0xxxxxxx
762: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
763: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
764: *
1.160 daniel 765: * Check for the 0x110000 limit too
1.151 daniel 766: */
767: const unsigned char *cur = ctxt->input->cur;
768: unsigned char c;
1.91 daniel 769:
1.151 daniel 770: c = *cur;
771: if (c & 0x80) {
772: if (cur[1] == 0)
773: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
774: if ((cur[1] & 0xc0) != 0x80)
775: goto encoding_error;
776: if ((c & 0xe0) == 0xe0) {
777: unsigned int val;
778:
779: if (cur[2] == 0)
780: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
781: if ((cur[2] & 0xc0) != 0x80)
782: goto encoding_error;
783: if ((c & 0xf0) == 0xf0) {
784: if (cur[3] == 0)
785: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
786: if (((c & 0xf8) != 0xf0) ||
787: ((cur[3] & 0xc0) != 0x80))
788: goto encoding_error;
789: /* 4-byte code */
790: ctxt->input->cur += 4;
791: val = (cur[0] & 0x7) << 18;
792: val |= (cur[1] & 0x3f) << 12;
793: val |= (cur[2] & 0x3f) << 6;
794: val |= cur[3] & 0x3f;
795: } else {
796: /* 3-byte code */
797: ctxt->input->cur += 3;
798: val = (cur[0] & 0xf) << 12;
799: val |= (cur[1] & 0x3f) << 6;
800: val |= cur[2] & 0x3f;
801: }
802: if (((val > 0xd7ff) && (val < 0xe000)) ||
803: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 804: (val >= 0x110000)) {
1.151 daniel 805: if ((ctxt->sax != NULL) &&
806: (ctxt->sax->error != NULL))
807: ctxt->sax->error(ctxt->userData,
1.196 daniel 808: "Char 0x%X out of allowed range\n", val);
1.151 daniel 809: ctxt->errNo = XML_ERR_INVALID_ENCODING;
810: ctxt->wellFormed = 0;
1.180 daniel 811: ctxt->disableSAX = 1;
1.151 daniel 812: }
813: } else
814: /* 2-byte code */
815: ctxt->input->cur += 2;
816: } else
817: /* 1-byte code */
818: ctxt->input->cur++;
819: } else {
820: /*
821: * Assume it's a fixed lenght encoding (1) with
822: * a compatibke encoding for the ASCII set, since
823: * XML constructs only use < 128 chars
824: */
825: ctxt->input->cur++;
826: }
827: ctxt->nbChars++;
828: if (*ctxt->input->cur == 0)
829: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
830: }
1.208 veillard 831: } else {
832: ctxt->input->cur++;
833: ctxt->nbChars++;
834: if (*ctxt->input->cur == 0)
835: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1.151 daniel 836: }
1.207 veillard 837: if ((*ctxt->input->cur == '%') && (!ctxt->html))
838: xmlParserHandlePEReference(ctxt);
839: if ((*ctxt->input->cur == '&')&& (!ctxt->html))
840: xmlParserHandleReference(ctxt);
1.168 daniel 841: if ((*ctxt->input->cur == 0) &&
842: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
843: xmlPopInput(ctxt);
1.151 daniel 844: return;
845: encoding_error:
846: /*
847: * If we detect an UTF8 error that probably mean that the
848: * input encoding didn't get properly advertized in the
849: * declaration header. Report the error and switch the encoding
850: * to ISO-Latin-1 (if you don't like this policy, just declare the
851: * encoding !)
852: */
1.198 daniel 853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.151 daniel 854: ctxt->sax->error(ctxt->userData,
855: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 856: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
857: ctxt->input->cur[0], ctxt->input->cur[1],
858: ctxt->input->cur[2], ctxt->input->cur[3]);
859: }
1.151 daniel 860: ctxt->errNo = XML_ERR_INVALID_ENCODING;
861:
1.198 daniel 862: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.151 daniel 863: ctxt->input->cur++;
864: return;
865: }
1.42 daniel 866:
1.152 daniel 867: /**
868: * xmlCurrentChar:
869: * @ctxt: the XML parser context
870: * @len: pointer to the length of the char read
871: *
872: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 873: * bytes in the input buffer. Implement the end of line normalization:
874: * 2.11 End-of-Line Handling
875: * Wherever an external parsed entity or the literal entity value
876: * of an internal parsed entity contains either the literal two-character
877: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
878: * must pass to the application the single character #xA.
879: * This behavior can conveniently be produced by normalizing all
880: * line breaks to #xA on input, before parsing.)
1.152 daniel 881: *
882: * Returns the current char value and its lenght
883: */
884:
885: int
886: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1.201 daniel 887: if (ctxt->instate == XML_PARSER_EOF)
888: return(0);
889:
1.152 daniel 890: if (ctxt->token != 0) {
891: *len = 0;
892: return(ctxt->token);
893: }
1.198 daniel 894: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.152 daniel 895: /*
896: * We are supposed to handle UTF8, check it's valid
897: * From rfc2044: encoding of the Unicode values on UTF-8:
898: *
899: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
900: * 0000 0000-0000 007F 0xxxxxxx
901: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
902: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
903: *
1.160 daniel 904: * Check for the 0x110000 limit too
1.152 daniel 905: */
906: const unsigned char *cur = ctxt->input->cur;
907: unsigned char c;
908: unsigned int val;
909:
910: c = *cur;
911: if (c & 0x80) {
912: if (cur[1] == 0)
913: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
914: if ((cur[1] & 0xc0) != 0x80)
915: goto encoding_error;
916: if ((c & 0xe0) == 0xe0) {
917:
918: if (cur[2] == 0)
919: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
920: if ((cur[2] & 0xc0) != 0x80)
921: goto encoding_error;
922: if ((c & 0xf0) == 0xf0) {
923: if (cur[3] == 0)
924: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
925: if (((c & 0xf8) != 0xf0) ||
926: ((cur[3] & 0xc0) != 0x80))
927: goto encoding_error;
928: /* 4-byte code */
929: *len = 4;
930: val = (cur[0] & 0x7) << 18;
931: val |= (cur[1] & 0x3f) << 12;
932: val |= (cur[2] & 0x3f) << 6;
933: val |= cur[3] & 0x3f;
934: } else {
935: /* 3-byte code */
936: *len = 3;
937: val = (cur[0] & 0xf) << 12;
938: val |= (cur[1] & 0x3f) << 6;
939: val |= cur[2] & 0x3f;
940: }
941: } else {
942: /* 2-byte code */
943: *len = 2;
944: val = (cur[0] & 0x1f) << 6;
1.168 daniel 945: val |= cur[1] & 0x3f;
1.152 daniel 946: }
947: if (!IS_CHAR(val)) {
948: if ((ctxt->sax != NULL) &&
949: (ctxt->sax->error != NULL))
950: ctxt->sax->error(ctxt->userData,
1.196 daniel 951: "Char 0x%X out of allowed range\n", val);
1.152 daniel 952: ctxt->errNo = XML_ERR_INVALID_ENCODING;
953: ctxt->wellFormed = 0;
1.180 daniel 954: ctxt->disableSAX = 1;
1.152 daniel 955: }
956: return(val);
957: } else {
958: /* 1-byte code */
959: *len = 1;
1.180 daniel 960: if (*ctxt->input->cur == 0xD) {
961: if (ctxt->input->cur[1] == 0xA) {
962: ctxt->nbChars++;
963: ctxt->input->cur++;
964: }
965: return(0xA);
966: }
1.152 daniel 967: return((int) *ctxt->input->cur);
968: }
969: }
970: /*
971: * Assume it's a fixed lenght encoding (1) with
972: * a compatibke encoding for the ASCII set, since
973: * XML constructs only use < 128 chars
974: */
975: *len = 1;
1.180 daniel 976: if (*ctxt->input->cur == 0xD) {
977: if (ctxt->input->cur[1] == 0xA) {
978: ctxt->nbChars++;
979: ctxt->input->cur++;
980: }
981: return(0xA);
982: }
1.152 daniel 983: return((int) *ctxt->input->cur);
984: encoding_error:
985: /*
986: * If we detect an UTF8 error that probably mean that the
987: * input encoding didn't get properly advertized in the
988: * declaration header. Report the error and switch the encoding
989: * to ISO-Latin-1 (if you don't like this policy, just declare the
990: * encoding !)
991: */
1.198 daniel 992: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.152 daniel 993: ctxt->sax->error(ctxt->userData,
994: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 995: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
996: ctxt->input->cur[0], ctxt->input->cur[1],
997: ctxt->input->cur[2], ctxt->input->cur[3]);
998: }
1.152 daniel 999: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1000:
1.198 daniel 1001: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.152 daniel 1002: *len = 1;
1003: return((int) *ctxt->input->cur);
1004: }
1005:
1006: /**
1.162 daniel 1007: * xmlStringCurrentChar:
1008: * @ctxt: the XML parser context
1009: * @cur: pointer to the beginning of the char
1010: * @len: pointer to the length of the char read
1011: *
1012: * The current char value, if using UTF-8 this may actaully span multiple
1013: * bytes in the input buffer.
1014: *
1015: * Returns the current char value and its lenght
1016: */
1017:
1018: int
1019: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1.198 daniel 1020: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.162 daniel 1021: /*
1022: * We are supposed to handle UTF8, check it's valid
1023: * From rfc2044: encoding of the Unicode values on UTF-8:
1024: *
1025: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1026: * 0000 0000-0000 007F 0xxxxxxx
1027: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1028: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1029: *
1030: * Check for the 0x110000 limit too
1031: */
1032: unsigned char c;
1033: unsigned int val;
1034:
1035: c = *cur;
1036: if (c & 0x80) {
1037: if ((cur[1] & 0xc0) != 0x80)
1038: goto encoding_error;
1039: if ((c & 0xe0) == 0xe0) {
1040:
1041: if ((cur[2] & 0xc0) != 0x80)
1042: goto encoding_error;
1043: if ((c & 0xf0) == 0xf0) {
1044: if (((c & 0xf8) != 0xf0) ||
1045: ((cur[3] & 0xc0) != 0x80))
1046: goto encoding_error;
1047: /* 4-byte code */
1048: *len = 4;
1049: val = (cur[0] & 0x7) << 18;
1050: val |= (cur[1] & 0x3f) << 12;
1051: val |= (cur[2] & 0x3f) << 6;
1052: val |= cur[3] & 0x3f;
1053: } else {
1054: /* 3-byte code */
1055: *len = 3;
1056: val = (cur[0] & 0xf) << 12;
1057: val |= (cur[1] & 0x3f) << 6;
1058: val |= cur[2] & 0x3f;
1059: }
1060: } else {
1061: /* 2-byte code */
1062: *len = 2;
1063: val = (cur[0] & 0x1f) << 6;
1064: val |= cur[2] & 0x3f;
1065: }
1066: if (!IS_CHAR(val)) {
1067: if ((ctxt->sax != NULL) &&
1068: (ctxt->sax->error != NULL))
1069: ctxt->sax->error(ctxt->userData,
1.196 daniel 1070: "Char 0x%X out of allowed range\n", val);
1.162 daniel 1071: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1072: ctxt->wellFormed = 0;
1.180 daniel 1073: ctxt->disableSAX = 1;
1.162 daniel 1074: }
1075: return(val);
1076: } else {
1077: /* 1-byte code */
1078: *len = 1;
1079: return((int) *cur);
1080: }
1081: }
1082: /*
1083: * Assume it's a fixed lenght encoding (1) with
1084: * a compatibke encoding for the ASCII set, since
1085: * XML constructs only use < 128 chars
1086: */
1087: *len = 1;
1088: return((int) *cur);
1089: encoding_error:
1090: /*
1091: * If we detect an UTF8 error that probably mean that the
1092: * input encoding didn't get properly advertized in the
1093: * declaration header. Report the error and switch the encoding
1094: * to ISO-Latin-1 (if you don't like this policy, just declare the
1095: * encoding !)
1096: */
1.198 daniel 1097: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.162 daniel 1098: ctxt->sax->error(ctxt->userData,
1099: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 1100: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1101: ctxt->input->cur[0], ctxt->input->cur[1],
1102: ctxt->input->cur[2], ctxt->input->cur[3]);
1103: }
1.162 daniel 1104: ctxt->errNo = XML_ERR_INVALID_ENCODING;
1105:
1106: *len = 1;
1107: return((int) *cur);
1108: }
1109:
1110: /**
1.152 daniel 1111: * xmlCopyChar:
1112: * @len: pointer to the length of the char read (or zero)
1113: * @array: pointer to an arry of xmlChar
1114: * @val: the char value
1115: *
1116: * append the char value in the array
1117: *
1118: * Returns the number of xmlChar written
1119: */
1120:
1121: int
1122: xmlCopyChar(int len, xmlChar *out, int val) {
1123: /*
1124: * We are supposed to handle UTF8, check it's valid
1125: * From rfc2044: encoding of the Unicode values on UTF-8:
1126: *
1127: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1128: * 0000 0000-0000 007F 0xxxxxxx
1129: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1130: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1131: */
1132: if (len == 0) {
1133: if (val < 0) len = 0;
1.160 daniel 1134: else if (val < 0x80) len = 1;
1135: else if (val < 0x800) len = 2;
1136: else if (val < 0x10000) len = 3;
1137: else if (val < 0x110000) len = 4;
1.152 daniel 1138: if (len == 0) {
1139: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
1140: val);
1141: return(0);
1142: }
1143: }
1144: if (len > 1) {
1145: int bits;
1146:
1147: if (val < 0x80) { *out++= val; bits= -6; }
1148: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1149: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1150: else { *out++= (val >> 18) | 0xF0; bits= 12; }
1151:
1152: for ( ; bits >= 0; bits-= 6)
1153: *out++= ((val >> bits) & 0x3F) | 0x80 ;
1154:
1155: return(len);
1156: }
1157: *out = (xmlChar) val;
1158: return(1);
1.155 daniel 1159: }
1160:
1161: /**
1162: * xmlSkipBlankChars:
1163: * @ctxt: the XML parser context
1164: *
1165: * skip all blanks character found at that point in the input streams.
1166: * It pops up finished entities in the process if allowable at that point.
1167: *
1168: * Returns the number of space chars skipped
1169: */
1170:
1171: int
1172: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1173: int cur, res = 0;
1174:
1175: do {
1176: cur = CUR;
1177: while (IS_BLANK(cur)) {
1178: NEXT;
1179: cur = CUR;
1180: res++;
1181: }
1182: while ((cur == 0) && (ctxt->inputNr > 1) &&
1183: (ctxt->instate != XML_PARSER_COMMENT)) {
1184: xmlPopInput(ctxt);
1185: cur = CUR;
1186: }
1187: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1188: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1189: } while (IS_BLANK(cur));
1190: return(res);
1.152 daniel 1191: }
1192:
1.97 daniel 1193: /************************************************************************
1194: * *
1195: * Commodity functions to handle entities processing *
1196: * *
1197: ************************************************************************/
1.40 daniel 1198:
1.50 daniel 1199: /**
1200: * xmlPopInput:
1201: * @ctxt: an XML parser context
1202: *
1.40 daniel 1203: * xmlPopInput: the current input pointed by ctxt->input came to an end
1204: * pop it and return the next char.
1.45 daniel 1205: *
1.123 daniel 1206: * Returns the current xmlChar in the parser context
1.40 daniel 1207: */
1.123 daniel 1208: xmlChar
1.55 daniel 1209: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 1210: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 1211: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 1212: if ((*ctxt->input->cur == 0) &&
1213: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1214: return(xmlPopInput(ctxt));
1.40 daniel 1215: return(CUR);
1216: }
1217:
1.50 daniel 1218: /**
1219: * xmlPushInput:
1220: * @ctxt: an XML parser context
1221: * @input: an XML parser input fragment (entity, XML fragment ...).
1222: *
1.40 daniel 1223: * xmlPushInput: switch to a new input stream which is stacked on top
1224: * of the previous one(s).
1225: */
1.55 daniel 1226: void
1227: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 1228: if (input == NULL) return;
1229: inputPush(ctxt, input);
1.164 daniel 1230: GROW;
1.40 daniel 1231: }
1232:
1.50 daniel 1233: /**
1.69 daniel 1234: * xmlFreeInputStream:
1.127 daniel 1235: * @input: an xmlParserInputPtr
1.69 daniel 1236: *
1237: * Free up an input stream.
1238: */
1239: void
1240: xmlFreeInputStream(xmlParserInputPtr input) {
1241: if (input == NULL) return;
1242:
1.119 daniel 1243: if (input->filename != NULL) xmlFree((char *) input->filename);
1244: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 1245: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 1246: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 1247: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 1248: input->free((xmlChar *) input->base);
1.93 veillard 1249: if (input->buf != NULL)
1250: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 1251: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 1252: xmlFree(input);
1.69 daniel 1253: }
1254:
1255: /**
1.96 daniel 1256: * xmlNewInputStream:
1257: * @ctxt: an XML parser context
1258: *
1259: * Create a new input stream structure
1260: * Returns the new input stream or NULL
1261: */
1262: xmlParserInputPtr
1263: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1264: xmlParserInputPtr input;
1265:
1.119 daniel 1266: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 1267: if (input == NULL) {
1.190 daniel 1268: if (ctxt != NULL) {
1269: ctxt->errNo = XML_ERR_NO_MEMORY;
1270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1271: ctxt->sax->error(ctxt->userData,
1272: "malloc: couldn't allocate a new input stream\n");
1273: ctxt->errNo = XML_ERR_NO_MEMORY;
1274: }
1.96 daniel 1275: return(NULL);
1276: }
1.165 daniel 1277: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 1278: input->line = 1;
1279: input->col = 1;
1.167 daniel 1280: input->standalone = -1;
1.96 daniel 1281: return(input);
1282: }
1283:
1284: /**
1.190 daniel 1285: * xmlNewIOInputStream:
1286: * @ctxt: an XML parser context
1287: * @input: an I/O Input
1288: * @enc: the charset encoding if known
1289: *
1290: * Create a new input stream structure encapsulating the @input into
1291: * a stream suitable for the parser.
1292: *
1293: * Returns the new input stream or NULL
1294: */
1295: xmlParserInputPtr
1296: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1297: xmlCharEncoding enc) {
1298: xmlParserInputPtr inputStream;
1299:
1300: inputStream = xmlNewInputStream(ctxt);
1301: if (inputStream == NULL) {
1302: return(NULL);
1303: }
1304: inputStream->filename = NULL;
1305: inputStream->buf = input;
1306: inputStream->base = inputStream->buf->buffer->content;
1307: inputStream->cur = inputStream->buf->buffer->content;
1308: if (enc != XML_CHAR_ENCODING_NONE) {
1309: xmlSwitchEncoding(ctxt, enc);
1310: }
1311:
1312: return(inputStream);
1313: }
1314:
1315: /**
1.50 daniel 1316: * xmlNewEntityInputStream:
1317: * @ctxt: an XML parser context
1318: * @entity: an Entity pointer
1319: *
1.82 daniel 1320: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 1321: *
1322: * Returns the new input stream or NULL
1.45 daniel 1323: */
1.50 daniel 1324: xmlParserInputPtr
1325: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1326: xmlParserInputPtr input;
1327:
1328: if (entity == NULL) {
1.123 daniel 1329: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 1330: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1331: ctxt->sax->error(ctxt->userData,
1.45 daniel 1332: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 1333: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 1334: return(NULL);
1.45 daniel 1335: }
1336: if (entity->content == NULL) {
1.159 daniel 1337: switch (entity->etype) {
1.113 daniel 1338: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 1339: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 1340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1341: ctxt->sax->error(ctxt->userData,
1342: "xmlNewEntityInputStream unparsed entity !\n");
1343: break;
1344: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1345: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 1346: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 1347: (char *) entity->ExternalID, ctxt));
1.113 daniel 1348: case XML_INTERNAL_GENERAL_ENTITY:
1349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1350: ctxt->sax->error(ctxt->userData,
1351: "Internal entity %s without content !\n", entity->name);
1352: break;
1353: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 1354: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1356: ctxt->sax->error(ctxt->userData,
1357: "Internal parameter entity %s without content !\n", entity->name);
1358: break;
1359: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 1360: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1361: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1362: ctxt->sax->error(ctxt->userData,
1363: "Predefined entity %s without content !\n", entity->name);
1364: break;
1365: }
1.50 daniel 1366: return(NULL);
1.45 daniel 1367: }
1.96 daniel 1368: input = xmlNewInputStream(ctxt);
1.45 daniel 1369: if (input == NULL) {
1.50 daniel 1370: return(NULL);
1.45 daniel 1371: }
1.156 daniel 1372: input->filename = (char *) entity->SystemID;
1.45 daniel 1373: input->base = entity->content;
1374: input->cur = entity->content;
1.140 daniel 1375: input->length = entity->length;
1.50 daniel 1376: return(input);
1.45 daniel 1377: }
1378:
1.59 daniel 1379: /**
1380: * xmlNewStringInputStream:
1381: * @ctxt: an XML parser context
1.96 daniel 1382: * @buffer: an memory buffer
1.59 daniel 1383: *
1384: * Create a new input stream based on a memory buffer.
1.68 daniel 1385: * Returns the new input stream
1.59 daniel 1386: */
1387: xmlParserInputPtr
1.123 daniel 1388: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1389: xmlParserInputPtr input;
1390:
1.96 daniel 1391: if (buffer == NULL) {
1.123 daniel 1392: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1394: ctxt->sax->error(ctxt->userData,
1.59 daniel 1395: "internal: xmlNewStringInputStream string = NULL\n");
1396: return(NULL);
1397: }
1.96 daniel 1398: input = xmlNewInputStream(ctxt);
1.59 daniel 1399: if (input == NULL) {
1400: return(NULL);
1401: }
1.96 daniel 1402: input->base = buffer;
1403: input->cur = buffer;
1.140 daniel 1404: input->length = xmlStrlen(buffer);
1.59 daniel 1405: return(input);
1406: }
1407:
1.76 daniel 1408: /**
1409: * xmlNewInputFromFile:
1410: * @ctxt: an XML parser context
1411: * @filename: the filename to use as entity
1412: *
1413: * Create a new input stream based on a file.
1414: *
1415: * Returns the new input stream or NULL in case of error
1416: */
1417: xmlParserInputPtr
1.79 daniel 1418: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1419: xmlParserInputBufferPtr buf;
1.76 daniel 1420: xmlParserInputPtr inputStream;
1.111 daniel 1421: char *directory = NULL;
1.76 daniel 1422:
1.96 daniel 1423: if (ctxt == NULL) return(NULL);
1.91 daniel 1424: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1425: if (buf == NULL) {
1.140 daniel 1426: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1427:
1.94 daniel 1428: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1429: #ifdef WIN32
1430: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1431: #else
1432: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1433: #endif
1434: buf = xmlParserInputBufferCreateFilename(name,
1435: XML_CHAR_ENCODING_NONE);
1.106 daniel 1436: if (buf != NULL)
1.142 daniel 1437: directory = xmlParserGetDirectory(name);
1.106 daniel 1438: }
1439: if ((buf == NULL) && (ctxt->directory != NULL)) {
1440: #ifdef WIN32
1441: sprintf(name, "%s\\%s", ctxt->directory, filename);
1442: #else
1443: sprintf(name, "%s/%s", ctxt->directory, filename);
1444: #endif
1445: buf = xmlParserInputBufferCreateFilename(name,
1446: XML_CHAR_ENCODING_NONE);
1447: if (buf != NULL)
1.142 daniel 1448: directory = xmlParserGetDirectory(name);
1.106 daniel 1449: }
1450: if (buf == NULL)
1.94 daniel 1451: return(NULL);
1452: }
1453: if (directory == NULL)
1454: directory = xmlParserGetDirectory(filename);
1.76 daniel 1455:
1.96 daniel 1456: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1457: if (inputStream == NULL) {
1.119 daniel 1458: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1459: return(NULL);
1460: }
1461:
1.119 daniel 1462: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1463: inputStream->directory = directory;
1.91 daniel 1464: inputStream->buf = buf;
1.76 daniel 1465:
1.91 daniel 1466: inputStream->base = inputStream->buf->buffer->content;
1467: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1468: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1469: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1470: return(inputStream);
1471: }
1472:
1.77 daniel 1473: /************************************************************************
1474: * *
1.97 daniel 1475: * Commodity functions to handle parser contexts *
1476: * *
1477: ************************************************************************/
1478:
1479: /**
1480: * xmlInitParserCtxt:
1481: * @ctxt: an XML parser context
1482: *
1483: * Initialize a parser context
1484: */
1485:
1486: void
1487: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1488: {
1489: xmlSAXHandler *sax;
1490:
1.168 daniel 1491: xmlDefaultSAXHandlerInit();
1492:
1.119 daniel 1493: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1494: if (sax == NULL) {
1495: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1496: }
1.180 daniel 1497: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1498:
1499: /* Allocate the Input stack */
1.119 daniel 1500: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1501: ctxt->inputNr = 0;
1502: ctxt->inputMax = 5;
1503: ctxt->input = NULL;
1.165 daniel 1504:
1.97 daniel 1505: ctxt->version = NULL;
1506: ctxt->encoding = NULL;
1507: ctxt->standalone = -1;
1.98 daniel 1508: ctxt->hasExternalSubset = 0;
1509: ctxt->hasPErefs = 0;
1.97 daniel 1510: ctxt->html = 0;
1.98 daniel 1511: ctxt->external = 0;
1.140 daniel 1512: ctxt->instate = XML_PARSER_START;
1.97 daniel 1513: ctxt->token = 0;
1.106 daniel 1514: ctxt->directory = NULL;
1.97 daniel 1515:
1516: /* Allocate the Node stack */
1.119 daniel 1517: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1518: ctxt->nodeNr = 0;
1519: ctxt->nodeMax = 10;
1520: ctxt->node = NULL;
1521:
1.140 daniel 1522: /* Allocate the Name stack */
1523: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1524: ctxt->nameNr = 0;
1525: ctxt->nameMax = 10;
1526: ctxt->name = NULL;
1527:
1.176 daniel 1528: /* Allocate the space stack */
1529: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1530: ctxt->spaceNr = 1;
1531: ctxt->spaceMax = 10;
1532: ctxt->spaceTab[0] = -1;
1533: ctxt->space = &ctxt->spaceTab[0];
1534:
1.160 daniel 1535: if (sax == NULL) {
1536: ctxt->sax = &xmlDefaultSAXHandler;
1537: } else {
1.97 daniel 1538: ctxt->sax = sax;
1539: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1540: }
1541: ctxt->userData = ctxt;
1542: ctxt->myDoc = NULL;
1543: ctxt->wellFormed = 1;
1.99 daniel 1544: ctxt->valid = 1;
1.100 daniel 1545: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1546: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1547: ctxt->vctxt.userData = ctxt;
1.149 daniel 1548: if (ctxt->validate) {
1549: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1550: if (xmlGetWarningsDefaultValue == 0)
1551: ctxt->vctxt.warning = NULL;
1552: else
1553: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1554: /* Allocate the Node stack */
1555: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1556: ctxt->vctxt.nodeNr = 0;
1557: ctxt->vctxt.nodeMax = 4;
1558: ctxt->vctxt.node = NULL;
1.149 daniel 1559: } else {
1560: ctxt->vctxt.error = NULL;
1561: ctxt->vctxt.warning = NULL;
1562: }
1.97 daniel 1563: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1564: ctxt->record_info = 0;
1.135 daniel 1565: ctxt->nbChars = 0;
1.140 daniel 1566: ctxt->checkIndex = 0;
1.180 daniel 1567: ctxt->inSubset = 0;
1.140 daniel 1568: ctxt->errNo = XML_ERR_OK;
1.185 daniel 1569: ctxt->depth = 0;
1.198 daniel 1570: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.97 daniel 1571: xmlInitNodeInfoSeq(&ctxt->node_seq);
1572: }
1573:
1574: /**
1575: * xmlFreeParserCtxt:
1576: * @ctxt: an XML parser context
1577: *
1578: * Free all the memory used by a parser context. However the parsed
1579: * document in ctxt->myDoc is not freed.
1580: */
1581:
1582: void
1583: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1584: {
1585: xmlParserInputPtr input;
1.140 daniel 1586: xmlChar *oldname;
1.97 daniel 1587:
1588: if (ctxt == NULL) return;
1589:
1590: while ((input = inputPop(ctxt)) != NULL) {
1591: xmlFreeInputStream(input);
1592: }
1.140 daniel 1593: while ((oldname = namePop(ctxt)) != NULL) {
1594: xmlFree(oldname);
1595: }
1.176 daniel 1596: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1597: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1598: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1599: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1600: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1601: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1602: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1603: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1604: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1605: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1606: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1607: xmlFree(ctxt->sax);
1608: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1609: xmlFree(ctxt);
1.97 daniel 1610: }
1611:
1612: /**
1613: * xmlNewParserCtxt:
1614: *
1615: * Allocate and initialize a new parser context.
1616: *
1617: * Returns the xmlParserCtxtPtr or NULL
1618: */
1619:
1620: xmlParserCtxtPtr
1621: xmlNewParserCtxt()
1622: {
1623: xmlParserCtxtPtr ctxt;
1624:
1.119 daniel 1625: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1626: if (ctxt == NULL) {
1627: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1628: perror("malloc");
1629: return(NULL);
1630: }
1.165 daniel 1631: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1632: xmlInitParserCtxt(ctxt);
1633: return(ctxt);
1634: }
1635:
1636: /**
1637: * xmlClearParserCtxt:
1638: * @ctxt: an XML parser context
1639: *
1640: * Clear (release owned resources) and reinitialize a parser context
1641: */
1642:
1643: void
1644: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1645: {
1646: xmlClearNodeInfoSeq(&ctxt->node_seq);
1647: xmlInitParserCtxt(ctxt);
1648: }
1649:
1650: /************************************************************************
1651: * *
1.77 daniel 1652: * Commodity functions to handle entities *
1653: * *
1654: ************************************************************************/
1655:
1.174 daniel 1656: /**
1657: * xmlCheckEntity:
1658: * @ctxt: an XML parser context
1659: * @content: the entity content string
1660: *
1661: * Parse an entity content and checks the WF constraints
1662: *
1663: */
1664:
1665: void
1666: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1667: }
1.97 daniel 1668:
1669: /**
1670: * xmlParseCharRef:
1671: * @ctxt: an XML parser context
1672: *
1673: * parse Reference declarations
1674: *
1675: * [66] CharRef ::= '&#' [0-9]+ ';' |
1676: * '&#x' [0-9a-fA-F]+ ';'
1677: *
1.98 daniel 1678: * [ WFC: Legal Character ]
1679: * Characters referred to using character references must match the
1680: * production for Char.
1681: *
1.135 daniel 1682: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1683: */
1.97 daniel 1684: int
1685: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1686: int val = 0;
1687:
1.111 daniel 1688: if (ctxt->token != 0) {
1689: val = ctxt->token;
1690: ctxt->token = 0;
1691: return(val);
1692: }
1.152 daniel 1693: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1694: (NXT(2) == 'x')) {
1695: SKIP(3);
1.152 daniel 1696: while (RAW != ';') {
1697: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1698: val = val * 16 + (CUR - '0');
1.152 daniel 1699: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1700: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1701: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1702: val = val * 16 + (CUR - 'A') + 10;
1703: else {
1.123 daniel 1704: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1705: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1706: ctxt->sax->error(ctxt->userData,
1707: "xmlParseCharRef: invalid hexadecimal value\n");
1708: ctxt->wellFormed = 0;
1.180 daniel 1709: ctxt->disableSAX = 1;
1.97 daniel 1710: val = 0;
1711: break;
1712: }
1713: NEXT;
1714: }
1.164 daniel 1715: if (RAW == ';') {
1716: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1717: ctxt->nbChars ++;
1718: ctxt->input->cur++;
1719: }
1.152 daniel 1720: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1721: SKIP(2);
1.152 daniel 1722: while (RAW != ';') {
1723: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1724: val = val * 10 + (CUR - '0');
1725: else {
1.123 daniel 1726: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1727: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1728: ctxt->sax->error(ctxt->userData,
1729: "xmlParseCharRef: invalid decimal value\n");
1730: ctxt->wellFormed = 0;
1.180 daniel 1731: ctxt->disableSAX = 1;
1.97 daniel 1732: val = 0;
1733: break;
1734: }
1735: NEXT;
1736: }
1.164 daniel 1737: if (RAW == ';') {
1738: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1739: ctxt->nbChars ++;
1740: ctxt->input->cur++;
1741: }
1.97 daniel 1742: } else {
1.123 daniel 1743: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1744: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1745: ctxt->sax->error(ctxt->userData,
1746: "xmlParseCharRef: invalid value\n");
1.97 daniel 1747: ctxt->wellFormed = 0;
1.180 daniel 1748: ctxt->disableSAX = 1;
1.97 daniel 1749: }
1.98 daniel 1750:
1.97 daniel 1751: /*
1.98 daniel 1752: * [ WFC: Legal Character ]
1753: * Characters referred to using character references must match the
1754: * production for Char.
1.97 daniel 1755: */
1756: if (IS_CHAR(val)) {
1757: return(val);
1758: } else {
1.123 daniel 1759: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1760: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1761: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1762: val);
1763: ctxt->wellFormed = 0;
1.180 daniel 1764: ctxt->disableSAX = 1;
1.97 daniel 1765: }
1766: return(0);
1.77 daniel 1767: }
1768:
1.96 daniel 1769: /**
1.135 daniel 1770: * xmlParseStringCharRef:
1771: * @ctxt: an XML parser context
1772: * @str: a pointer to an index in the string
1773: *
1774: * parse Reference declarations, variant parsing from a string rather
1775: * than an an input flow.
1776: *
1777: * [66] CharRef ::= '&#' [0-9]+ ';' |
1778: * '&#x' [0-9a-fA-F]+ ';'
1779: *
1780: * [ WFC: Legal Character ]
1781: * Characters referred to using character references must match the
1782: * production for Char.
1783: *
1784: * Returns the value parsed (as an int), 0 in case of error, str will be
1785: * updated to the current value of the index
1786: */
1787: int
1788: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1789: const xmlChar *ptr;
1790: xmlChar cur;
1791: int val = 0;
1792:
1793: if ((str == NULL) || (*str == NULL)) return(0);
1794: ptr = *str;
1795: cur = *ptr;
1.137 daniel 1796: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1797: ptr += 3;
1798: cur = *ptr;
1799: while (cur != ';') {
1800: if ((cur >= '0') && (cur <= '9'))
1801: val = val * 16 + (cur - '0');
1802: else if ((cur >= 'a') && (cur <= 'f'))
1803: val = val * 16 + (cur - 'a') + 10;
1804: else if ((cur >= 'A') && (cur <= 'F'))
1805: val = val * 16 + (cur - 'A') + 10;
1806: else {
1807: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1808: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1809: ctxt->sax->error(ctxt->userData,
1.198 daniel 1810: "xmlParseStringCharRef: invalid hexadecimal value\n");
1.135 daniel 1811: ctxt->wellFormed = 0;
1.180 daniel 1812: ctxt->disableSAX = 1;
1.135 daniel 1813: val = 0;
1814: break;
1815: }
1816: ptr++;
1817: cur = *ptr;
1818: }
1819: if (cur == ';')
1820: ptr++;
1.145 daniel 1821: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1822: ptr += 2;
1823: cur = *ptr;
1824: while (cur != ';') {
1825: if ((cur >= '0') && (cur <= '9'))
1826: val = val * 10 + (cur - '0');
1827: else {
1828: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1829: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1830: ctxt->sax->error(ctxt->userData,
1.198 daniel 1831: "xmlParseStringCharRef: invalid decimal value\n");
1.135 daniel 1832: ctxt->wellFormed = 0;
1.180 daniel 1833: ctxt->disableSAX = 1;
1.135 daniel 1834: val = 0;
1835: break;
1836: }
1837: ptr++;
1838: cur = *ptr;
1839: }
1840: if (cur == ';')
1841: ptr++;
1842: } else {
1843: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1844: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1845: ctxt->sax->error(ctxt->userData,
1846: "xmlParseCharRef: invalid value\n");
1847: ctxt->wellFormed = 0;
1.180 daniel 1848: ctxt->disableSAX = 1;
1.135 daniel 1849: return(0);
1850: }
1851: *str = ptr;
1852:
1853: /*
1854: * [ WFC: Legal Character ]
1855: * Characters referred to using character references must match the
1856: * production for Char.
1857: */
1858: if (IS_CHAR(val)) {
1859: return(val);
1860: } else {
1861: ctxt->errNo = XML_ERR_INVALID_CHAR;
1862: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1863: ctxt->sax->error(ctxt->userData,
1864: "CharRef: invalid xmlChar value %d\n", val);
1865: ctxt->wellFormed = 0;
1.180 daniel 1866: ctxt->disableSAX = 1;
1.135 daniel 1867: }
1868: return(0);
1869: }
1870:
1871: /**
1.96 daniel 1872: * xmlParserHandleReference:
1873: * @ctxt: the parser context
1874: *
1.97 daniel 1875: * [67] Reference ::= EntityRef | CharRef
1876: *
1.96 daniel 1877: * [68] EntityRef ::= '&' Name ';'
1878: *
1.98 daniel 1879: * [ WFC: Entity Declared ]
1880: * the Name given in the entity reference must match that in an entity
1881: * declaration, except that well-formed documents need not declare any
1882: * of the following entities: amp, lt, gt, apos, quot.
1883: *
1884: * [ WFC: Parsed Entity ]
1885: * An entity reference must not contain the name of an unparsed entity
1886: *
1.97 daniel 1887: * [66] CharRef ::= '&#' [0-9]+ ';' |
1888: * '&#x' [0-9a-fA-F]+ ';'
1889: *
1.96 daniel 1890: * A PEReference may have been detectect in the current input stream
1891: * the handling is done accordingly to
1892: * http://www.w3.org/TR/REC-xml#entproc
1893: */
1894: void
1895: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1896: xmlParserInputPtr input;
1.123 daniel 1897: xmlChar *name;
1.97 daniel 1898: xmlEntityPtr ent = NULL;
1899:
1.126 daniel 1900: if (ctxt->token != 0) {
1901: return;
1902: }
1.152 daniel 1903: if (RAW != '&') return;
1.97 daniel 1904: GROW;
1.152 daniel 1905: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1906: switch(ctxt->instate) {
1.140 daniel 1907: case XML_PARSER_ENTITY_DECL:
1908: case XML_PARSER_PI:
1.109 daniel 1909: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1910: case XML_PARSER_COMMENT:
1.168 daniel 1911: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1912: /* we just ignore it there */
1913: return;
1914: case XML_PARSER_START_TAG:
1.109 daniel 1915: return;
1.140 daniel 1916: case XML_PARSER_END_TAG:
1.97 daniel 1917: return;
1918: case XML_PARSER_EOF:
1.123 daniel 1919: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1920: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1921: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1922: ctxt->wellFormed = 0;
1.180 daniel 1923: ctxt->disableSAX = 1;
1.97 daniel 1924: return;
1925: case XML_PARSER_PROLOG:
1.140 daniel 1926: case XML_PARSER_START:
1927: case XML_PARSER_MISC:
1.123 daniel 1928: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1929: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1930: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1931: ctxt->wellFormed = 0;
1.180 daniel 1932: ctxt->disableSAX = 1;
1.97 daniel 1933: return;
1934: case XML_PARSER_EPILOG:
1.123 daniel 1935: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1936: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1937: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1938: ctxt->wellFormed = 0;
1.180 daniel 1939: ctxt->disableSAX = 1;
1.97 daniel 1940: return;
1941: case XML_PARSER_DTD:
1.123 daniel 1942: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1944: ctxt->sax->error(ctxt->userData,
1945: "CharRef are forbiden in DTDs!\n");
1946: ctxt->wellFormed = 0;
1.180 daniel 1947: ctxt->disableSAX = 1;
1.97 daniel 1948: return;
1949: case XML_PARSER_ENTITY_VALUE:
1950: /*
1951: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1952: * substitution here since we need the literal
1.97 daniel 1953: * entity value to be able to save the internal
1954: * subset of the document.
1955: * This will be handled by xmlDecodeEntities
1956: */
1957: return;
1958: case XML_PARSER_CONTENT:
1959: case XML_PARSER_ATTRIBUTE_VALUE:
1960: ctxt->token = xmlParseCharRef(ctxt);
1961: return;
1962: }
1963: return;
1964: }
1965:
1966: switch(ctxt->instate) {
1.109 daniel 1967: case XML_PARSER_CDATA_SECTION:
1968: return;
1.140 daniel 1969: case XML_PARSER_PI:
1.97 daniel 1970: case XML_PARSER_COMMENT:
1.168 daniel 1971: case XML_PARSER_SYSTEM_LITERAL:
1972: case XML_PARSER_CONTENT:
1.97 daniel 1973: return;
1.140 daniel 1974: case XML_PARSER_START_TAG:
1975: return;
1976: case XML_PARSER_END_TAG:
1977: return;
1.97 daniel 1978: case XML_PARSER_EOF:
1.123 daniel 1979: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1980: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1981: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1982: ctxt->wellFormed = 0;
1.180 daniel 1983: ctxt->disableSAX = 1;
1.97 daniel 1984: return;
1985: case XML_PARSER_PROLOG:
1.140 daniel 1986: case XML_PARSER_START:
1987: case XML_PARSER_MISC:
1.123 daniel 1988: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1990: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1991: ctxt->wellFormed = 0;
1.180 daniel 1992: ctxt->disableSAX = 1;
1.97 daniel 1993: return;
1994: case XML_PARSER_EPILOG:
1.123 daniel 1995: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1998: ctxt->wellFormed = 0;
1.180 daniel 1999: ctxt->disableSAX = 1;
1.97 daniel 2000: return;
2001: case XML_PARSER_ENTITY_VALUE:
2002: /*
2003: * NOTE: in the case of entity values, we don't do the
1.127 daniel 2004: * substitution here since we need the literal
1.97 daniel 2005: * entity value to be able to save the internal
2006: * subset of the document.
2007: * This will be handled by xmlDecodeEntities
2008: */
2009: return;
2010: case XML_PARSER_ATTRIBUTE_VALUE:
2011: /*
2012: * NOTE: in the case of attributes values, we don't do the
2013: * substitution here unless we are in a mode where
2014: * the parser is explicitely asked to substitute
2015: * entities. The SAX callback is called with values
2016: * without entity substitution.
2017: * This will then be handled by xmlDecodeEntities
2018: */
1.113 daniel 2019: return;
1.97 daniel 2020: case XML_PARSER_ENTITY_DECL:
2021: /*
2022: * we just ignore it there
2023: * the substitution will be done once the entity is referenced
2024: */
2025: return;
2026: case XML_PARSER_DTD:
1.123 daniel 2027: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 2028: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2029: ctxt->sax->error(ctxt->userData,
2030: "Entity references are forbiden in DTDs!\n");
2031: ctxt->wellFormed = 0;
1.180 daniel 2032: ctxt->disableSAX = 1;
1.97 daniel 2033: return;
2034: }
2035:
2036: NEXT;
2037: name = xmlScanName(ctxt);
2038: if (name == NULL) {
1.123 daniel 2039: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 2040: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2041: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
2042: ctxt->wellFormed = 0;
1.180 daniel 2043: ctxt->disableSAX = 1;
1.97 daniel 2044: ctxt->token = '&';
2045: return;
2046: }
2047: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 2048: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 2049: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2050: ctxt->sax->error(ctxt->userData,
2051: "Entity reference: ';' expected\n");
2052: ctxt->wellFormed = 0;
1.180 daniel 2053: ctxt->disableSAX = 1;
1.97 daniel 2054: ctxt->token = '&';
1.119 daniel 2055: xmlFree(name);
1.97 daniel 2056: return;
2057: }
2058: SKIP(xmlStrlen(name) + 1);
2059: if (ctxt->sax != NULL) {
2060: if (ctxt->sax->getEntity != NULL)
2061: ent = ctxt->sax->getEntity(ctxt->userData, name);
2062: }
1.98 daniel 2063:
2064: /*
2065: * [ WFC: Entity Declared ]
2066: * the Name given in the entity reference must match that in an entity
2067: * declaration, except that well-formed documents need not declare any
2068: * of the following entities: amp, lt, gt, apos, quot.
2069: */
1.97 daniel 2070: if (ent == NULL)
2071: ent = xmlGetPredefinedEntity(name);
2072: if (ent == NULL) {
1.123 daniel 2073: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 2074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2075: ctxt->sax->error(ctxt->userData,
1.98 daniel 2076: "Entity reference: entity %s not declared\n",
2077: name);
1.97 daniel 2078: ctxt->wellFormed = 0;
1.180 daniel 2079: ctxt->disableSAX = 1;
1.119 daniel 2080: xmlFree(name);
1.97 daniel 2081: return;
2082: }
1.98 daniel 2083:
2084: /*
2085: * [ WFC: Parsed Entity ]
2086: * An entity reference must not contain the name of an unparsed entity
2087: */
1.159 daniel 2088: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 2089: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 2090: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2091: ctxt->sax->error(ctxt->userData,
2092: "Entity reference to unparsed entity %s\n", name);
2093: ctxt->wellFormed = 0;
1.180 daniel 2094: ctxt->disableSAX = 1;
1.98 daniel 2095: }
2096:
1.159 daniel 2097: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 2098: ctxt->token = ent->content[0];
1.119 daniel 2099: xmlFree(name);
1.97 daniel 2100: return;
2101: }
2102: input = xmlNewEntityInputStream(ctxt, ent);
2103: xmlPushInput(ctxt, input);
1.119 daniel 2104: xmlFree(name);
1.96 daniel 2105: return;
2106: }
2107:
2108: /**
2109: * xmlParserHandlePEReference:
2110: * @ctxt: the parser context
2111: *
2112: * [69] PEReference ::= '%' Name ';'
2113: *
1.98 daniel 2114: * [ WFC: No Recursion ]
2115: * TODO A parsed entity must not contain a recursive
2116: * reference to itself, either directly or indirectly.
2117: *
2118: * [ WFC: Entity Declared ]
2119: * In a document without any DTD, a document with only an internal DTD
2120: * subset which contains no parameter entity references, or a document
2121: * with "standalone='yes'", ... ... The declaration of a parameter
2122: * entity must precede any reference to it...
2123: *
2124: * [ VC: Entity Declared ]
2125: * In a document with an external subset or external parameter entities
2126: * with "standalone='no'", ... ... The declaration of a parameter entity
2127: * must precede any reference to it...
2128: *
2129: * [ WFC: In DTD ]
2130: * Parameter-entity references may only appear in the DTD.
2131: * NOTE: misleading but this is handled.
2132: *
2133: * A PEReference may have been detected in the current input stream
1.96 daniel 2134: * the handling is done accordingly to
2135: * http://www.w3.org/TR/REC-xml#entproc
2136: * i.e.
2137: * - Included in literal in entity values
2138: * - Included as Paraemeter Entity reference within DTDs
2139: */
2140: void
2141: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 2142: xmlChar *name;
1.96 daniel 2143: xmlEntityPtr entity = NULL;
2144: xmlParserInputPtr input;
2145:
1.126 daniel 2146: if (ctxt->token != 0) {
2147: return;
2148: }
1.152 daniel 2149: if (RAW != '%') return;
1.96 daniel 2150: switch(ctxt->instate) {
1.109 daniel 2151: case XML_PARSER_CDATA_SECTION:
2152: return;
1.97 daniel 2153: case XML_PARSER_COMMENT:
2154: return;
1.140 daniel 2155: case XML_PARSER_START_TAG:
2156: return;
2157: case XML_PARSER_END_TAG:
2158: return;
1.96 daniel 2159: case XML_PARSER_EOF:
1.123 daniel 2160: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 2161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2162: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
2163: ctxt->wellFormed = 0;
1.180 daniel 2164: ctxt->disableSAX = 1;
1.96 daniel 2165: return;
2166: case XML_PARSER_PROLOG:
1.140 daniel 2167: case XML_PARSER_START:
2168: case XML_PARSER_MISC:
1.123 daniel 2169: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 2170: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2171: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
2172: ctxt->wellFormed = 0;
1.180 daniel 2173: ctxt->disableSAX = 1;
1.96 daniel 2174: return;
1.97 daniel 2175: case XML_PARSER_ENTITY_DECL:
1.96 daniel 2176: case XML_PARSER_CONTENT:
2177: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 2178: case XML_PARSER_PI:
1.168 daniel 2179: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 2180: /* we just ignore it there */
2181: return;
2182: case XML_PARSER_EPILOG:
1.123 daniel 2183: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 2184: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 2185: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 2186: ctxt->wellFormed = 0;
1.180 daniel 2187: ctxt->disableSAX = 1;
1.96 daniel 2188: return;
1.97 daniel 2189: case XML_PARSER_ENTITY_VALUE:
2190: /*
2191: * NOTE: in the case of entity values, we don't do the
1.127 daniel 2192: * substitution here since we need the literal
1.97 daniel 2193: * entity value to be able to save the internal
2194: * subset of the document.
2195: * This will be handled by xmlDecodeEntities
2196: */
2197: return;
1.96 daniel 2198: case XML_PARSER_DTD:
1.98 daniel 2199: /*
2200: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2201: * In the internal DTD subset, parameter-entity references
2202: * can occur only where markup declarations can occur, not
2203: * within markup declarations.
2204: * In that case this is handled in xmlParseMarkupDecl
2205: */
2206: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2207: return;
1.96 daniel 2208: }
2209:
2210: NEXT;
2211: name = xmlParseName(ctxt);
2212: if (name == NULL) {
1.123 daniel 2213: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 2214: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2215: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
2216: ctxt->wellFormed = 0;
1.180 daniel 2217: ctxt->disableSAX = 1;
1.96 daniel 2218: } else {
1.152 daniel 2219: if (RAW == ';') {
1.96 daniel 2220: NEXT;
1.98 daniel 2221: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2222: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 2223: if (entity == NULL) {
1.98 daniel 2224:
2225: /*
2226: * [ WFC: Entity Declared ]
2227: * In a document without any DTD, a document with only an
2228: * internal DTD subset which contains no parameter entity
2229: * references, or a document with "standalone='yes'", ...
2230: * ... The declaration of a parameter entity must precede
2231: * any reference to it...
2232: */
2233: if ((ctxt->standalone == 1) ||
2234: ((ctxt->hasExternalSubset == 0) &&
2235: (ctxt->hasPErefs == 0))) {
2236: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2237: ctxt->sax->error(ctxt->userData,
2238: "PEReference: %%%s; not found\n", name);
2239: ctxt->wellFormed = 0;
1.180 daniel 2240: ctxt->disableSAX = 1;
1.98 daniel 2241: } else {
2242: /*
2243: * [ VC: Entity Declared ]
2244: * In a document with an external subset or external
2245: * parameter entities with "standalone='no'", ...
2246: * ... The declaration of a parameter entity must precede
2247: * any reference to it...
2248: */
1.212 veillard 2249: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2250: ctxt->vctxt.error(ctxt->vctxt.userData,
2251: "PEReference: %%%s; not found\n", name);
2252: } else
1.98 daniel 2253: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
2254: ctxt->sax->warning(ctxt->userData,
2255: "PEReference: %%%s; not found\n", name);
2256: ctxt->valid = 0;
2257: }
1.96 daniel 2258: } else {
1.159 daniel 2259: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2260: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 2261: /*
1.156 daniel 2262: * TODO !!! handle the extra spaces added before and after
1.96 daniel 2263: * c.f. http://www.w3.org/TR/REC-xml#as-PE
2264: */
2265: input = xmlNewEntityInputStream(ctxt, entity);
2266: xmlPushInput(ctxt, input);
1.164 daniel 2267: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2268: (RAW == '<') && (NXT(1) == '?') &&
2269: (NXT(2) == 'x') && (NXT(3) == 'm') &&
2270: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 2271: xmlParseTextDecl(ctxt);
1.164 daniel 2272: }
2273: if (ctxt->token == 0)
2274: ctxt->token = ' ';
1.96 daniel 2275: } else {
2276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2277: ctxt->sax->error(ctxt->userData,
2278: "xmlHandlePEReference: %s is not a parameter entity\n",
2279: name);
2280: ctxt->wellFormed = 0;
1.180 daniel 2281: ctxt->disableSAX = 1;
1.96 daniel 2282: }
2283: }
2284: } else {
1.123 daniel 2285: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 2286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2287: ctxt->sax->error(ctxt->userData,
2288: "xmlHandlePEReference: expecting ';'\n");
2289: ctxt->wellFormed = 0;
1.180 daniel 2290: ctxt->disableSAX = 1;
1.96 daniel 2291: }
1.119 daniel 2292: xmlFree(name);
1.97 daniel 2293: }
2294: }
2295:
2296: /*
2297: * Macro used to grow the current buffer.
2298: */
2299: #define growBuffer(buffer) { \
2300: buffer##_size *= 2; \
1.145 daniel 2301: buffer = (xmlChar *) \
2302: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 2303: if (buffer == NULL) { \
2304: perror("realloc failed"); \
1.145 daniel 2305: return(NULL); \
1.97 daniel 2306: } \
1.96 daniel 2307: }
1.77 daniel 2308:
2309: /**
2310: * xmlDecodeEntities:
2311: * @ctxt: the parser context
2312: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2313: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 2314: * @end: an end marker xmlChar, 0 if none
2315: * @end2: an end marker xmlChar, 0 if none
2316: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 2317: *
2318: * [67] Reference ::= EntityRef | CharRef
2319: *
2320: * [69] PEReference ::= '%' Name ';'
2321: *
2322: * Returns A newly allocated string with the substitution done. The caller
2323: * must deallocate it !
2324: */
1.123 daniel 2325: xmlChar *
1.77 daniel 2326: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 2327: xmlChar end, xmlChar end2, xmlChar end3) {
2328: xmlChar *buffer = NULL;
1.202 daniel 2329: unsigned int buffer_size = 0;
2330: unsigned int nbchars = 0;
1.78 daniel 2331:
1.123 daniel 2332: xmlChar *current = NULL;
1.77 daniel 2333: xmlEntityPtr ent;
2334: unsigned int max = (unsigned int) len;
1.161 daniel 2335: int c,l;
1.77 daniel 2336:
1.185 daniel 2337: if (ctxt->depth > 40) {
2338: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2339: ctxt->sax->error(ctxt->userData,
2340: "Detected entity reference loop\n");
2341: ctxt->wellFormed = 0;
2342: ctxt->disableSAX = 1;
2343: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2344: return(NULL);
2345: }
2346:
1.77 daniel 2347: /*
2348: * allocate a translation buffer.
2349: */
1.140 daniel 2350: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 2351: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 2352: if (buffer == NULL) {
2353: perror("xmlDecodeEntities: malloc failed");
2354: return(NULL);
2355: }
2356:
1.78 daniel 2357: /*
2358: * Ok loop until we reach one of the ending char or a size limit.
2359: */
1.161 daniel 2360: c = CUR_CHAR(l);
2361: while ((nbchars < max) && (c != end) &&
2362: (c != end2) && (c != end3)) {
1.77 daniel 2363:
1.161 daniel 2364: if (c == 0) break;
2365: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 2366: int val = xmlParseCharRef(ctxt);
1.161 daniel 2367: COPY_BUF(0,buffer,nbchars,val);
2368: NEXTL(l);
2369: } else if ((c == '&') && (ctxt->token != '&') &&
2370: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 2371: ent = xmlParseEntityRef(ctxt);
2372: if ((ent != NULL) &&
2373: (ctxt->replaceEntities != 0)) {
2374: current = ent->content;
2375: while (*current != 0) {
1.161 daniel 2376: buffer[nbchars++] = *current++;
2377: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2378: growBuffer(buffer);
1.77 daniel 2379: }
2380: }
1.98 daniel 2381: } else if (ent != NULL) {
1.123 daniel 2382: const xmlChar *cur = ent->name;
1.98 daniel 2383:
1.161 daniel 2384: buffer[nbchars++] = '&';
2385: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2386: growBuffer(buffer);
2387: }
1.161 daniel 2388: while (*cur != 0) {
2389: buffer[nbchars++] = *cur++;
2390: }
2391: buffer[nbchars++] = ';';
1.77 daniel 2392: }
1.161 daniel 2393: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2394: /*
1.77 daniel 2395: * a PEReference induce to switch the entity flow,
2396: * we break here to flush the current set of chars
2397: * parsed if any. We will be called back later.
1.97 daniel 2398: */
1.91 daniel 2399: if (nbchars != 0) break;
1.77 daniel 2400:
2401: xmlParsePEReference(ctxt);
1.79 daniel 2402:
1.97 daniel 2403: /*
1.79 daniel 2404: * Pop-up of finished entities.
1.97 daniel 2405: */
1.152 daniel 2406: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2407: xmlPopInput(ctxt);
2408:
1.98 daniel 2409: break;
1.77 daniel 2410: } else {
1.161 daniel 2411: COPY_BUF(l,buffer,nbchars,c);
2412: NEXTL(l);
2413: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2414: growBuffer(buffer);
2415: }
1.77 daniel 2416: }
1.161 daniel 2417: c = CUR_CHAR(l);
1.77 daniel 2418: }
1.161 daniel 2419: buffer[nbchars++] = 0;
1.77 daniel 2420: return(buffer);
2421: }
2422:
1.135 daniel 2423: /**
2424: * xmlStringDecodeEntities:
2425: * @ctxt: the parser context
2426: * @str: the input string
2427: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2428: * @end: an end marker xmlChar, 0 if none
2429: * @end2: an end marker xmlChar, 0 if none
2430: * @end3: an end marker xmlChar, 0 if none
2431: *
2432: * [67] Reference ::= EntityRef | CharRef
2433: *
2434: * [69] PEReference ::= '%' Name ';'
2435: *
2436: * Returns A newly allocated string with the substitution done. The caller
2437: * must deallocate it !
2438: */
2439: xmlChar *
2440: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2441: xmlChar end, xmlChar end2, xmlChar end3) {
2442: xmlChar *buffer = NULL;
2443: int buffer_size = 0;
2444:
2445: xmlChar *current = NULL;
2446: xmlEntityPtr ent;
1.176 daniel 2447: int c,l;
2448: int nbchars = 0;
1.135 daniel 2449:
1.211 veillard 2450: if (str == NULL)
2451: return(NULL);
2452:
1.185 daniel 2453: if (ctxt->depth > 40) {
2454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2455: ctxt->sax->error(ctxt->userData,
2456: "Detected entity reference loop\n");
2457: ctxt->wellFormed = 0;
2458: ctxt->disableSAX = 1;
2459: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2460: return(NULL);
2461: }
2462:
1.135 daniel 2463: /*
2464: * allocate a translation buffer.
2465: */
1.140 daniel 2466: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2467: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2468: if (buffer == NULL) {
2469: perror("xmlDecodeEntities: malloc failed");
2470: return(NULL);
2471: }
2472:
2473: /*
2474: * Ok loop until we reach one of the ending char or a size limit.
2475: */
1.176 daniel 2476: c = CUR_SCHAR(str, l);
2477: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2478:
1.176 daniel 2479: if (c == 0) break;
2480: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2481: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2482: if (val != 0) {
2483: COPY_BUF(0,buffer,nbchars,val);
2484: }
2485: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2486: ent = xmlParseStringEntityRef(ctxt, &str);
1.185 daniel 2487: if ((ent != NULL) && (ent->content != NULL)) {
2488: xmlChar *rep;
2489:
2490: ctxt->depth++;
2491: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2492: 0, 0, 0);
2493: ctxt->depth--;
2494: if (rep != NULL) {
2495: current = rep;
2496: while (*current != 0) {
2497: buffer[nbchars++] = *current++;
2498: if (nbchars >
2499: buffer_size - XML_PARSER_BUFFER_SIZE) {
2500: growBuffer(buffer);
2501: }
1.135 daniel 2502: }
1.185 daniel 2503: xmlFree(rep);
1.135 daniel 2504: }
2505: } else if (ent != NULL) {
2506: int i = xmlStrlen(ent->name);
2507: const xmlChar *cur = ent->name;
2508:
1.176 daniel 2509: buffer[nbchars++] = '&';
2510: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2511: growBuffer(buffer);
2512: }
2513: for (;i > 0;i--)
1.176 daniel 2514: buffer[nbchars++] = *cur++;
2515: buffer[nbchars++] = ';';
1.135 daniel 2516: }
1.176 daniel 2517: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2518: ent = xmlParseStringPEReference(ctxt, &str);
2519: if (ent != NULL) {
1.185 daniel 2520: xmlChar *rep;
2521:
2522: ctxt->depth++;
2523: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2524: 0, 0, 0);
2525: ctxt->depth--;
2526: if (rep != NULL) {
2527: current = rep;
2528: while (*current != 0) {
2529: buffer[nbchars++] = *current++;
2530: if (nbchars >
2531: buffer_size - XML_PARSER_BUFFER_SIZE) {
2532: growBuffer(buffer);
2533: }
1.135 daniel 2534: }
1.185 daniel 2535: xmlFree(rep);
1.135 daniel 2536: }
2537: }
2538: } else {
1.176 daniel 2539: COPY_BUF(l,buffer,nbchars,c);
2540: str += l;
2541: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2542: growBuffer(buffer);
2543: }
2544: }
1.176 daniel 2545: c = CUR_SCHAR(str, l);
1.135 daniel 2546: }
1.176 daniel 2547: buffer[nbchars++] = 0;
1.135 daniel 2548: return(buffer);
2549: }
2550:
1.1 veillard 2551:
1.28 daniel 2552: /************************************************************************
2553: * *
1.75 daniel 2554: * Commodity functions to handle encodings *
2555: * *
2556: ************************************************************************/
2557:
1.172 daniel 2558: /*
2559: * xmlCheckLanguageID
2560: * @lang: pointer to the string value
2561: *
2562: * Checks that the value conforms to the LanguageID production:
2563: *
2564: * [33] LanguageID ::= Langcode ('-' Subcode)*
2565: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2566: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2567: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2568: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2569: * [38] Subcode ::= ([a-z] | [A-Z])+
2570: *
2571: * Returns 1 if correct 0 otherwise
2572: **/
2573: int
2574: xmlCheckLanguageID(const xmlChar *lang) {
2575: const xmlChar *cur = lang;
2576:
2577: if (cur == NULL)
2578: return(0);
2579: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2580: ((cur[0] == 'I') && (cur[1] == '-'))) {
2581: /*
2582: * IANA code
2583: */
2584: cur += 2;
2585: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2586: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2587: cur++;
2588: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2589: ((cur[0] == 'X') && (cur[1] == '-'))) {
2590: /*
2591: * User code
2592: */
2593: cur += 2;
2594: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2595: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2596: cur++;
2597: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2598: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2599: /*
2600: * ISO639
2601: */
2602: cur++;
2603: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2604: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2605: cur++;
2606: else
2607: return(0);
2608: } else
2609: return(0);
2610: while (cur[0] != 0) {
2611: if (cur[0] != '-')
2612: return(0);
2613: cur++;
2614: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2615: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2616: cur++;
2617: else
2618: return(0);
2619: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2620: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2621: cur++;
2622: }
2623: return(1);
2624: }
2625:
1.75 daniel 2626: /**
2627: * xmlSwitchEncoding:
2628: * @ctxt: the parser context
1.124 daniel 2629: * @enc: the encoding value (number)
1.75 daniel 2630: *
2631: * change the input functions when discovering the character encoding
2632: * of a given entity.
1.193 daniel 2633: *
2634: * Returns 0 in case of success, -1 otherwise
1.75 daniel 2635: */
1.193 daniel 2636: int
1.75 daniel 2637: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2638: {
1.156 daniel 2639: xmlCharEncodingHandlerPtr handler;
2640:
1.193 daniel 2641: switch (enc) {
2642: case XML_CHAR_ENCODING_ERROR:
2643: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2645: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2646: ctxt->wellFormed = 0;
2647: ctxt->disableSAX = 1;
2648: break;
2649: case XML_CHAR_ENCODING_NONE:
2650: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2651: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2652: return(0);
2653: case XML_CHAR_ENCODING_UTF8:
2654: /* default encoding, no conversion should be needed */
1.198 daniel 2655: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2656: return(0);
2657: default:
2658: break;
2659: }
1.156 daniel 2660: handler = xmlGetCharEncodingHandler(enc);
1.193 daniel 2661: if (handler == NULL) {
2662: /*
2663: * Default handlers.
2664: */
2665: switch (enc) {
2666: case XML_CHAR_ENCODING_ERROR:
2667: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2668: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2670: ctxt->wellFormed = 0;
2671: ctxt->disableSAX = 1;
1.198 daniel 2672: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2673: break;
2674: case XML_CHAR_ENCODING_NONE:
2675: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2676: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2677: return(0);
2678: case XML_CHAR_ENCODING_UTF8:
1.211 veillard 2679: case XML_CHAR_ENCODING_ASCII:
1.193 daniel 2680: /* default encoding, no conversion should be needed */
1.198 daniel 2681: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2682: return(0);
2683: case XML_CHAR_ENCODING_UTF16LE:
2684: break;
2685: case XML_CHAR_ENCODING_UTF16BE:
2686: break;
2687: case XML_CHAR_ENCODING_UCS4LE:
2688: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2689: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2690: ctxt->sax->error(ctxt->userData,
2691: "char encoding USC4 little endian not supported\n");
2692: break;
2693: case XML_CHAR_ENCODING_UCS4BE:
2694: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2696: ctxt->sax->error(ctxt->userData,
2697: "char encoding USC4 big endian not supported\n");
2698: break;
2699: case XML_CHAR_ENCODING_EBCDIC:
2700: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2702: ctxt->sax->error(ctxt->userData,
2703: "char encoding EBCDIC not supported\n");
2704: break;
2705: case XML_CHAR_ENCODING_UCS4_2143:
2706: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2707: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2708: ctxt->sax->error(ctxt->userData,
2709: "char encoding UCS4 2143 not supported\n");
2710: break;
2711: case XML_CHAR_ENCODING_UCS4_3412:
2712: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2714: ctxt->sax->error(ctxt->userData,
2715: "char encoding UCS4 3412 not supported\n");
2716: break;
2717: case XML_CHAR_ENCODING_UCS2:
2718: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2719: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2720: ctxt->sax->error(ctxt->userData,
2721: "char encoding UCS2 not supported\n");
2722: break;
2723: case XML_CHAR_ENCODING_8859_1:
2724: case XML_CHAR_ENCODING_8859_2:
2725: case XML_CHAR_ENCODING_8859_3:
2726: case XML_CHAR_ENCODING_8859_4:
2727: case XML_CHAR_ENCODING_8859_5:
2728: case XML_CHAR_ENCODING_8859_6:
2729: case XML_CHAR_ENCODING_8859_7:
2730: case XML_CHAR_ENCODING_8859_8:
2731: case XML_CHAR_ENCODING_8859_9:
1.195 daniel 2732: /*
1.203 veillard 2733: * We used to keep the internal content in the
2734: * document encoding however this turns being unmaintainable
2735: * So xmlGetCharEncodingHandler() will return non-null
2736: * values for this now.
1.195 daniel 2737: */
2738: if ((ctxt->inputNr == 1) &&
2739: (ctxt->encoding == NULL) &&
2740: (ctxt->input->encoding != NULL)) {
2741: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
2742: }
1.198 daniel 2743: ctxt->charset = enc;
1.195 daniel 2744: return(0);
1.193 daniel 2745: case XML_CHAR_ENCODING_2022_JP:
2746: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2747: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2748: ctxt->sax->error(ctxt->userData,
2749: "char encoding ISO-2022-JPnot supported\n");
2750: break;
2751: case XML_CHAR_ENCODING_SHIFT_JIS:
2752: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2753: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2754: ctxt->sax->error(ctxt->userData,
2755: "char encoding Shift_JIS not supported\n");
2756: break;
2757: case XML_CHAR_ENCODING_EUC_JP:
2758: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2759: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2760: ctxt->sax->error(ctxt->userData,
2761: "char encoding EUC-JPnot supported\n");
2762: break;
2763: }
2764: }
2765: if (handler == NULL)
2766: return(-1);
1.198 daniel 2767: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2768: return(xmlSwitchToEncoding(ctxt, handler));
2769: }
2770:
2771: /**
2772: * xmlSwitchToEncoding:
2773: * @ctxt: the parser context
2774: * @handler: the encoding handler
2775: *
2776: * change the input functions when discovering the character encoding
2777: * of a given entity.
2778: *
2779: * Returns 0 in case of success, -1 otherwise
2780: */
2781: int
2782: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
2783: {
1.194 daniel 2784: int nbchars;
2785:
1.156 daniel 2786: if (handler != NULL) {
2787: if (ctxt->input != NULL) {
2788: if (ctxt->input->buf != NULL) {
2789: if (ctxt->input->buf->encoder != NULL) {
1.193 daniel 2790: if (ctxt->input->buf->encoder == handler)
2791: return(0);
1.197 daniel 2792: /*
2793: * Note: this is a bit dangerous, but that's what it
2794: * takes to use nearly compatible signature for different
2795: * encodings.
2796: */
2797: xmlCharEncCloseFunc(ctxt->input->buf->encoder);
2798: ctxt->input->buf->encoder = handler;
2799: return(0);
1.156 daniel 2800: }
2801: ctxt->input->buf->encoder = handler;
2802:
2803: /*
1.194 daniel 2804: * Is there already some content down the pipe to convert ?
1.156 daniel 2805: */
2806: if ((ctxt->input->buf->buffer != NULL) &&
2807: (ctxt->input->buf->buffer->use > 0)) {
2808: int processed;
2809:
2810: /*
2811: * Specific handling of the Byte Order Mark for
2812: * UTF-16
2813: */
1.195 daniel 2814: if ((handler->name != NULL) &&
2815: (!strcmp(handler->name, "UTF-16LE")) &&
1.156 daniel 2816: (ctxt->input->cur[0] == 0xFF) &&
2817: (ctxt->input->cur[1] == 0xFE)) {
1.194 daniel 2818: ctxt->input->cur += 2;
1.156 daniel 2819: }
1.195 daniel 2820: if ((handler->name != NULL) &&
2821: (!strcmp(handler->name, "UTF-16BE")) &&
1.156 daniel 2822: (ctxt->input->cur[0] == 0xFE) &&
2823: (ctxt->input->cur[1] == 0xFF)) {
1.194 daniel 2824: ctxt->input->cur += 2;
1.156 daniel 2825: }
2826:
2827: /*
1.194 daniel 2828: * Shring the current input buffer.
2829: * Move it as the raw buffer and create a new input buffer
1.156 daniel 2830: */
2831: processed = ctxt->input->cur - ctxt->input->base;
1.194 daniel 2832: xmlBufferShrink(ctxt->input->buf->buffer, processed);
2833: ctxt->input->buf->raw = ctxt->input->buf->buffer;
2834: ctxt->input->buf->buffer = xmlBufferCreate();
2835:
2836: /*
1.197 daniel 2837: * convert just enough to get
2838: * '<?xml version="1.0" encoding="xxx"?>'
2839: * parsed with the autodetected encoding
2840: * into the parser reading buffer.
1.194 daniel 2841: */
1.197 daniel 2842: nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
2843: ctxt->input->buf->buffer,
2844: ctxt->input->buf->raw);
1.194 daniel 2845: if (nbchars < 0) {
2846: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2847: return(-1);
1.156 daniel 2848: }
1.194 daniel 2849: ctxt->input->base =
2850: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2851: }
1.193 daniel 2852: return(0);
1.156 daniel 2853: } else {
1.209 veillard 2854: if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1.156 daniel 2855: /*
2856: * When parsing a static memory array one must know the
2857: * size to be able to convert the buffer.
2858: */
2859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2860: ctxt->sax->error(ctxt->userData,
2861: "xmlSwitchEncoding : no input\n");
1.193 daniel 2862: return(-1);
1.156 daniel 2863: } else {
1.194 daniel 2864: int processed;
2865:
2866: /*
2867: * Shring the current input buffer.
2868: * Move it as the raw buffer and create a new input buffer
2869: */
2870: processed = ctxt->input->cur - ctxt->input->base;
1.209 veillard 2871:
1.194 daniel 2872: ctxt->input->buf->raw = xmlBufferCreate();
2873: xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1.209 veillard 2874: ctxt->input->length - processed);
1.194 daniel 2875: ctxt->input->buf->buffer = xmlBufferCreate();
1.156 daniel 2876:
2877: /*
1.194 daniel 2878: * convert as much as possible of the raw input
2879: * to the parser reading buffer.
2880: */
2881: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2882: ctxt->input->buf->buffer,
2883: ctxt->input->buf->raw);
2884: if (nbchars < 0) {
2885: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2886: return(-1);
1.156 daniel 2887: }
1.194 daniel 2888:
1.156 daniel 2889: /*
2890: * Conversion succeeded, get rid of the old buffer
2891: */
2892: if ((ctxt->input->free != NULL) &&
2893: (ctxt->input->base != NULL))
2894: ctxt->input->free((xmlChar *) ctxt->input->base);
1.194 daniel 2895: ctxt->input->base =
2896: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2897: }
2898: }
2899: } else {
2900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2901: ctxt->sax->error(ctxt->userData,
2902: "xmlSwitchEncoding : no input\n");
1.193 daniel 2903: return(-1);
1.156 daniel 2904: }
1.195 daniel 2905: /*
2906: * The parsing is now done in UTF8 natively
2907: */
1.198 daniel 2908: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2909: } else
2910: return(-1);
2911: return(0);
1.156 daniel 2912:
1.75 daniel 2913: }
2914:
2915: /************************************************************************
2916: * *
1.123 daniel 2917: * Commodity functions to handle xmlChars *
1.28 daniel 2918: * *
2919: ************************************************************************/
2920:
1.50 daniel 2921: /**
2922: * xmlStrndup:
1.123 daniel 2923: * @cur: the input xmlChar *
1.50 daniel 2924: * @len: the len of @cur
2925: *
1.123 daniel 2926: * a strndup for array of xmlChar's
1.68 daniel 2927: *
1.123 daniel 2928: * Returns a new xmlChar * or NULL
1.1 veillard 2929: */
1.123 daniel 2930: xmlChar *
2931: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2932: xmlChar *ret;
2933:
2934: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 2935: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2936: if (ret == NULL) {
1.86 daniel 2937: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2938: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2939: return(NULL);
2940: }
1.123 daniel 2941: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2942: ret[len] = 0;
2943: return(ret);
2944: }
2945:
1.50 daniel 2946: /**
2947: * xmlStrdup:
1.123 daniel 2948: * @cur: the input xmlChar *
1.50 daniel 2949: *
1.152 daniel 2950: * a strdup for array of xmlChar's. Since they are supposed to be
2951: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2952: * a termination mark of '0'.
1.68 daniel 2953: *
1.123 daniel 2954: * Returns a new xmlChar * or NULL
1.1 veillard 2955: */
1.123 daniel 2956: xmlChar *
2957: xmlStrdup(const xmlChar *cur) {
2958: const xmlChar *p = cur;
1.1 veillard 2959:
1.135 daniel 2960: if (cur == NULL) return(NULL);
1.152 daniel 2961: while (*p != 0) p++;
1.1 veillard 2962: return(xmlStrndup(cur, p - cur));
2963: }
2964:
1.50 daniel 2965: /**
2966: * xmlCharStrndup:
2967: * @cur: the input char *
2968: * @len: the len of @cur
2969: *
1.123 daniel 2970: * a strndup for char's to xmlChar's
1.68 daniel 2971: *
1.123 daniel 2972: * Returns a new xmlChar * or NULL
1.45 daniel 2973: */
2974:
1.123 daniel 2975: xmlChar *
1.55 daniel 2976: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2977: int i;
1.135 daniel 2978: xmlChar *ret;
2979:
2980: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 2981: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2982: if (ret == NULL) {
1.86 daniel 2983: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2984: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2985: return(NULL);
2986: }
2987: for (i = 0;i < len;i++)
1.123 daniel 2988: ret[i] = (xmlChar) cur[i];
1.45 daniel 2989: ret[len] = 0;
2990: return(ret);
2991: }
2992:
1.50 daniel 2993: /**
2994: * xmlCharStrdup:
2995: * @cur: the input char *
2996: * @len: the len of @cur
2997: *
1.123 daniel 2998: * a strdup for char's to xmlChar's
1.68 daniel 2999: *
1.123 daniel 3000: * Returns a new xmlChar * or NULL
1.45 daniel 3001: */
3002:
1.123 daniel 3003: xmlChar *
1.55 daniel 3004: xmlCharStrdup(const char *cur) {
1.45 daniel 3005: const char *p = cur;
3006:
1.135 daniel 3007: if (cur == NULL) return(NULL);
1.45 daniel 3008: while (*p != '\0') p++;
3009: return(xmlCharStrndup(cur, p - cur));
3010: }
3011:
1.50 daniel 3012: /**
3013: * xmlStrcmp:
1.123 daniel 3014: * @str1: the first xmlChar *
3015: * @str2: the second xmlChar *
1.50 daniel 3016: *
1.123 daniel 3017: * a strcmp for xmlChar's
1.68 daniel 3018: *
3019: * Returns the integer result of the comparison
1.14 veillard 3020: */
3021:
1.55 daniel 3022: int
1.123 daniel 3023: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 3024: register int tmp;
3025:
1.135 daniel 3026: if ((str1 == NULL) && (str2 == NULL)) return(0);
3027: if (str1 == NULL) return(-1);
3028: if (str2 == NULL) return(1);
1.14 veillard 3029: do {
3030: tmp = *str1++ - *str2++;
3031: if (tmp != 0) return(tmp);
3032: } while ((*str1 != 0) && (*str2 != 0));
3033: return (*str1 - *str2);
3034: }
3035:
1.50 daniel 3036: /**
3037: * xmlStrncmp:
1.123 daniel 3038: * @str1: the first xmlChar *
3039: * @str2: the second xmlChar *
1.50 daniel 3040: * @len: the max comparison length
3041: *
1.123 daniel 3042: * a strncmp for xmlChar's
1.68 daniel 3043: *
3044: * Returns the integer result of the comparison
1.14 veillard 3045: */
3046:
1.55 daniel 3047: int
1.123 daniel 3048: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 3049: register int tmp;
3050:
3051: if (len <= 0) return(0);
1.135 daniel 3052: if ((str1 == NULL) && (str2 == NULL)) return(0);
3053: if (str1 == NULL) return(-1);
3054: if (str2 == NULL) return(1);
1.14 veillard 3055: do {
3056: tmp = *str1++ - *str2++;
3057: if (tmp != 0) return(tmp);
3058: len--;
3059: if (len <= 0) return(0);
3060: } while ((*str1 != 0) && (*str2 != 0));
3061: return (*str1 - *str2);
3062: }
3063:
1.50 daniel 3064: /**
3065: * xmlStrchr:
1.123 daniel 3066: * @str: the xmlChar * array
3067: * @val: the xmlChar to search
1.50 daniel 3068: *
1.123 daniel 3069: * a strchr for xmlChar's
1.68 daniel 3070: *
1.123 daniel 3071: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 3072: */
3073:
1.123 daniel 3074: const xmlChar *
3075: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 3076: if (str == NULL) return(NULL);
1.14 veillard 3077: while (*str != 0) {
1.123 daniel 3078: if (*str == val) return((xmlChar *) str);
1.14 veillard 3079: str++;
3080: }
3081: return(NULL);
1.89 daniel 3082: }
3083:
3084: /**
3085: * xmlStrstr:
1.123 daniel 3086: * @str: the xmlChar * array (haystack)
3087: * @val: the xmlChar to search (needle)
1.89 daniel 3088: *
1.123 daniel 3089: * a strstr for xmlChar's
1.89 daniel 3090: *
1.123 daniel 3091: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 3092: */
3093:
1.123 daniel 3094: const xmlChar *
3095: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 3096: int n;
3097:
3098: if (str == NULL) return(NULL);
3099: if (val == NULL) return(NULL);
3100: n = xmlStrlen(val);
3101:
3102: if (n == 0) return(str);
3103: while (*str != 0) {
3104: if (*str == *val) {
1.123 daniel 3105: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 3106: }
3107: str++;
3108: }
3109: return(NULL);
3110: }
3111:
3112: /**
3113: * xmlStrsub:
1.123 daniel 3114: * @str: the xmlChar * array (haystack)
1.89 daniel 3115: * @start: the index of the first char (zero based)
3116: * @len: the length of the substring
3117: *
3118: * Extract a substring of a given string
3119: *
1.123 daniel 3120: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 3121: */
3122:
1.123 daniel 3123: xmlChar *
3124: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 3125: int i;
3126:
3127: if (str == NULL) return(NULL);
3128: if (start < 0) return(NULL);
1.90 daniel 3129: if (len < 0) return(NULL);
1.89 daniel 3130:
3131: for (i = 0;i < start;i++) {
3132: if (*str == 0) return(NULL);
3133: str++;
3134: }
3135: if (*str == 0) return(NULL);
3136: return(xmlStrndup(str, len));
1.14 veillard 3137: }
1.28 daniel 3138:
1.50 daniel 3139: /**
3140: * xmlStrlen:
1.123 daniel 3141: * @str: the xmlChar * array
1.50 daniel 3142: *
1.127 daniel 3143: * length of a xmlChar's string
1.68 daniel 3144: *
1.123 daniel 3145: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 3146: */
3147:
1.55 daniel 3148: int
1.123 daniel 3149: xmlStrlen(const xmlChar *str) {
1.45 daniel 3150: int len = 0;
3151:
3152: if (str == NULL) return(0);
3153: while (*str != 0) {
3154: str++;
3155: len++;
3156: }
3157: return(len);
3158: }
3159:
1.50 daniel 3160: /**
3161: * xmlStrncat:
1.123 daniel 3162: * @cur: the original xmlChar * array
3163: * @add: the xmlChar * array added
1.50 daniel 3164: * @len: the length of @add
3165: *
1.123 daniel 3166: * a strncat for array of xmlChar's
1.68 daniel 3167: *
1.123 daniel 3168: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 3169: */
3170:
1.123 daniel 3171: xmlChar *
3172: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 3173: int size;
1.123 daniel 3174: xmlChar *ret;
1.45 daniel 3175:
3176: if ((add == NULL) || (len == 0))
3177: return(cur);
3178: if (cur == NULL)
3179: return(xmlStrndup(add, len));
3180:
3181: size = xmlStrlen(cur);
1.204 veillard 3182: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 3183: if (ret == NULL) {
1.86 daniel 3184: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 3185: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 3186: return(cur);
3187: }
1.123 daniel 3188: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 3189: ret[size + len] = 0;
3190: return(ret);
3191: }
3192:
1.50 daniel 3193: /**
3194: * xmlStrcat:
1.123 daniel 3195: * @cur: the original xmlChar * array
3196: * @add: the xmlChar * array added
1.50 daniel 3197: *
1.152 daniel 3198: * a strcat for array of xmlChar's. Since they are supposed to be
3199: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
3200: * a termination mark of '0'.
1.68 daniel 3201: *
1.123 daniel 3202: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 3203: */
1.123 daniel 3204: xmlChar *
3205: xmlStrcat(xmlChar *cur, const xmlChar *add) {
3206: const xmlChar *p = add;
1.45 daniel 3207:
3208: if (add == NULL) return(cur);
3209: if (cur == NULL)
3210: return(xmlStrdup(add));
3211:
1.152 daniel 3212: while (*p != 0) p++;
1.45 daniel 3213: return(xmlStrncat(cur, add, p - add));
3214: }
3215:
3216: /************************************************************************
3217: * *
3218: * Commodity functions, cleanup needed ? *
3219: * *
3220: ************************************************************************/
3221:
1.50 daniel 3222: /**
3223: * areBlanks:
3224: * @ctxt: an XML parser context
1.123 daniel 3225: * @str: a xmlChar *
1.50 daniel 3226: * @len: the size of @str
3227: *
1.45 daniel 3228: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 3229: *
1.68 daniel 3230: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 3231: */
3232:
1.123 daniel 3233: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 3234: int i, ret;
1.45 daniel 3235: xmlNodePtr lastChild;
3236:
1.176 daniel 3237: /*
3238: * Check for xml:space value.
3239: */
3240: if (*(ctxt->space) == 1)
3241: return(0);
3242:
3243: /*
3244: * Check that the string is made of blanks
3245: */
1.45 daniel 3246: for (i = 0;i < len;i++)
3247: if (!(IS_BLANK(str[i]))) return(0);
3248:
1.176 daniel 3249: /*
3250: * Look if the element is mixed content in the Dtd if available
3251: */
1.104 daniel 3252: if (ctxt->myDoc != NULL) {
3253: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3254: if (ret == 0) return(1);
3255: if (ret == 1) return(0);
3256: }
1.176 daniel 3257:
1.104 daniel 3258: /*
1.176 daniel 3259: * Otherwise, heuristic :-\
1.104 daniel 3260: */
1.179 daniel 3261: if (ctxt->keepBlanks)
3262: return(0);
3263: if (RAW != '<') return(0);
3264: if (ctxt->node == NULL) return(0);
3265: if ((ctxt->node->children == NULL) &&
3266: (RAW == '<') && (NXT(1) == '/')) return(0);
3267:
1.45 daniel 3268: lastChild = xmlGetLastChild(ctxt->node);
3269: if (lastChild == NULL) {
3270: if (ctxt->node->content != NULL) return(0);
3271: } else if (xmlNodeIsText(lastChild))
3272: return(0);
1.157 daniel 3273: else if ((ctxt->node->children != NULL) &&
3274: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 3275: return(0);
1.45 daniel 3276: return(1);
3277: }
3278:
1.50 daniel 3279: /**
3280: * xmlHandleEntity:
3281: * @ctxt: an XML parser context
3282: * @entity: an XML entity pointer.
3283: *
3284: * Default handling of defined entities, when should we define a new input
1.45 daniel 3285: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 3286: *
3287: * OBSOLETE: to be removed at some point.
1.45 daniel 3288: */
3289:
1.55 daniel 3290: void
3291: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 3292: int len;
1.50 daniel 3293: xmlParserInputPtr input;
1.45 daniel 3294:
3295: if (entity->content == NULL) {
1.123 daniel 3296: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 3297: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3298: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 3299: entity->name);
1.59 daniel 3300: ctxt->wellFormed = 0;
1.180 daniel 3301: ctxt->disableSAX = 1;
1.45 daniel 3302: return;
3303: }
3304: len = xmlStrlen(entity->content);
3305: if (len <= 2) goto handle_as_char;
3306:
3307: /*
3308: * Redefine its content as an input stream.
3309: */
1.50 daniel 3310: input = xmlNewEntityInputStream(ctxt, entity);
3311: xmlPushInput(ctxt, input);
1.45 daniel 3312: return;
3313:
3314: handle_as_char:
3315: /*
3316: * Just handle the content as a set of chars.
3317: */
1.171 daniel 3318: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3319: (ctxt->sax->characters != NULL))
1.74 daniel 3320: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 3321:
3322: }
3323:
3324: /*
3325: * Forward definition for recusive behaviour.
3326: */
1.77 daniel 3327: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
3328: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 3329:
1.28 daniel 3330: /************************************************************************
3331: * *
3332: * Extra stuff for namespace support *
3333: * Relates to http://www.w3.org/TR/WD-xml-names *
3334: * *
3335: ************************************************************************/
3336:
1.50 daniel 3337: /**
3338: * xmlNamespaceParseNCName:
3339: * @ctxt: an XML parser context
3340: *
3341: * parse an XML namespace name.
1.28 daniel 3342: *
3343: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
3344: *
3345: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3346: * CombiningChar | Extender
1.68 daniel 3347: *
3348: * Returns the namespace name or NULL
1.28 daniel 3349: */
3350:
1.123 daniel 3351: xmlChar *
1.55 daniel 3352: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 3353: xmlChar buf[XML_MAX_NAMELEN + 5];
3354: int len = 0, l;
3355: int cur = CUR_CHAR(l);
1.28 daniel 3356:
1.156 daniel 3357: /* load first the value of the char !!! */
1.152 daniel 3358: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 3359:
1.152 daniel 3360: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3361: (cur == '.') || (cur == '-') ||
3362: (cur == '_') ||
3363: (IS_COMBINING(cur)) ||
3364: (IS_EXTENDER(cur))) {
3365: COPY_BUF(l,buf,len,cur);
3366: NEXTL(l);
3367: cur = CUR_CHAR(l);
1.91 daniel 3368: if (len >= XML_MAX_NAMELEN) {
3369: fprintf(stderr,
3370: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 3371: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3372: (cur == '.') || (cur == '-') ||
3373: (cur == '_') ||
3374: (IS_COMBINING(cur)) ||
3375: (IS_EXTENDER(cur))) {
3376: NEXTL(l);
3377: cur = CUR_CHAR(l);
3378: }
1.91 daniel 3379: break;
3380: }
3381: }
3382: return(xmlStrndup(buf, len));
1.28 daniel 3383: }
3384:
1.50 daniel 3385: /**
3386: * xmlNamespaceParseQName:
3387: * @ctxt: an XML parser context
1.123 daniel 3388: * @prefix: a xmlChar **
1.50 daniel 3389: *
3390: * parse an XML qualified name
1.28 daniel 3391: *
3392: * [NS 5] QName ::= (Prefix ':')? LocalPart
3393: *
3394: * [NS 6] Prefix ::= NCName
3395: *
3396: * [NS 7] LocalPart ::= NCName
1.68 daniel 3397: *
1.127 daniel 3398: * Returns the local part, and prefix is updated
1.50 daniel 3399: * to get the Prefix if any.
1.28 daniel 3400: */
3401:
1.123 daniel 3402: xmlChar *
3403: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3404: xmlChar *ret = NULL;
1.28 daniel 3405:
3406: *prefix = NULL;
3407: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 3408: if (RAW == ':') {
1.28 daniel 3409: *prefix = ret;
1.40 daniel 3410: NEXT;
1.28 daniel 3411: ret = xmlNamespaceParseNCName(ctxt);
3412: }
3413:
3414: return(ret);
3415: }
3416:
1.50 daniel 3417: /**
1.72 daniel 3418: * xmlSplitQName:
1.162 daniel 3419: * @ctxt: an XML parser context
1.72 daniel 3420: * @name: an XML parser context
1.123 daniel 3421: * @prefix: a xmlChar **
1.72 daniel 3422: *
1.206 veillard 3423: * parse an UTF8 encoded XML qualified name string
1.72 daniel 3424: *
3425: * [NS 5] QName ::= (Prefix ':')? LocalPart
3426: *
3427: * [NS 6] Prefix ::= NCName
3428: *
3429: * [NS 7] LocalPart ::= NCName
3430: *
1.127 daniel 3431: * Returns the local part, and prefix is updated
1.72 daniel 3432: * to get the Prefix if any.
3433: */
3434:
1.123 daniel 3435: xmlChar *
1.162 daniel 3436: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3437: xmlChar buf[XML_MAX_NAMELEN + 5];
3438: int len = 0;
1.123 daniel 3439: xmlChar *ret = NULL;
3440: const xmlChar *cur = name;
1.206 veillard 3441: int c;
1.72 daniel 3442:
3443: *prefix = NULL;
1.113 daniel 3444:
3445: /* xml: prefix is not really a namespace */
3446: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3447: (cur[2] == 'l') && (cur[3] == ':'))
3448: return(xmlStrdup(name));
3449:
1.162 daniel 3450: /* nasty but valid */
3451: if (cur[0] == ':')
3452: return(xmlStrdup(name));
3453:
1.206 veillard 3454: c = *cur++;
3455: while ((c != 0) && (c != ':')) {
3456: buf[len++] = c;
3457: c = *cur++;
1.162 daniel 3458: }
1.72 daniel 3459:
1.162 daniel 3460: ret = xmlStrndup(buf, len);
1.72 daniel 3461:
1.162 daniel 3462: if (c == ':') {
1.206 veillard 3463: c = *cur++;
3464: if (c == 0) return(ret);
1.72 daniel 3465: *prefix = ret;
1.162 daniel 3466: len = 0;
1.72 daniel 3467:
1.206 veillard 3468: while (c != 0) {
3469: buf[len++] = c;
3470: c = *cur++;
1.162 daniel 3471: }
1.72 daniel 3472:
1.162 daniel 3473: ret = xmlStrndup(buf, len);
1.72 daniel 3474: }
3475:
3476: return(ret);
3477: }
1.206 veillard 3478:
1.72 daniel 3479: /**
1.50 daniel 3480: * xmlNamespaceParseNSDef:
3481: * @ctxt: an XML parser context
3482: *
3483: * parse a namespace prefix declaration
1.28 daniel 3484: *
3485: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3486: *
3487: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3488: *
3489: * Returns the namespace name
1.28 daniel 3490: */
3491:
1.123 daniel 3492: xmlChar *
1.55 daniel 3493: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3494: xmlChar *name = NULL;
1.28 daniel 3495:
1.152 daniel 3496: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3497: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3498: (NXT(4) == 's')) {
3499: SKIP(5);
1.152 daniel 3500: if (RAW == ':') {
1.40 daniel 3501: NEXT;
1.28 daniel 3502: name = xmlNamespaceParseNCName(ctxt);
3503: }
3504: }
1.39 daniel 3505: return(name);
1.28 daniel 3506: }
3507:
1.50 daniel 3508: /**
3509: * xmlParseQuotedString:
3510: * @ctxt: an XML parser context
3511: *
1.45 daniel 3512: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3513: * To be removed at next drop of binary compatibility
1.68 daniel 3514: *
3515: * Returns the string parser or NULL.
1.45 daniel 3516: */
1.123 daniel 3517: xmlChar *
1.55 daniel 3518: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3519: xmlChar *buf = NULL;
1.152 daniel 3520: int len = 0,l;
1.140 daniel 3521: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3522: int c;
1.45 daniel 3523:
1.135 daniel 3524: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3525: if (buf == NULL) {
3526: fprintf(stderr, "malloc of %d byte failed\n", size);
3527: return(NULL);
3528: }
1.152 daniel 3529: if (RAW == '"') {
1.45 daniel 3530: NEXT;
1.152 daniel 3531: c = CUR_CHAR(l);
1.135 daniel 3532: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3533: if (len + 5 >= size) {
1.135 daniel 3534: size *= 2;
1.204 veillard 3535: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3536: if (buf == NULL) {
3537: fprintf(stderr, "realloc of %d byte failed\n", size);
3538: return(NULL);
3539: }
3540: }
1.152 daniel 3541: COPY_BUF(l,buf,len,c);
3542: NEXTL(l);
3543: c = CUR_CHAR(l);
1.135 daniel 3544: }
3545: if (c != '"') {
1.123 daniel 3546: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3548: ctxt->sax->error(ctxt->userData,
3549: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3550: ctxt->wellFormed = 0;
1.180 daniel 3551: ctxt->disableSAX = 1;
1.55 daniel 3552: } else {
1.45 daniel 3553: NEXT;
3554: }
1.152 daniel 3555: } else if (RAW == '\''){
1.45 daniel 3556: NEXT;
1.135 daniel 3557: c = CUR;
3558: while (IS_CHAR(c) && (c != '\'')) {
3559: if (len + 1 >= size) {
3560: size *= 2;
1.204 veillard 3561: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3562: if (buf == NULL) {
3563: fprintf(stderr, "realloc of %d byte failed\n", size);
3564: return(NULL);
3565: }
3566: }
3567: buf[len++] = c;
3568: NEXT;
3569: c = CUR;
3570: }
1.152 daniel 3571: if (RAW != '\'') {
1.123 daniel 3572: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3573: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3574: ctxt->sax->error(ctxt->userData,
3575: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3576: ctxt->wellFormed = 0;
1.180 daniel 3577: ctxt->disableSAX = 1;
1.55 daniel 3578: } else {
1.45 daniel 3579: NEXT;
3580: }
3581: }
1.135 daniel 3582: return(buf);
1.45 daniel 3583: }
3584:
1.50 daniel 3585: /**
3586: * xmlParseNamespace:
3587: * @ctxt: an XML parser context
3588: *
1.45 daniel 3589: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3590: *
3591: * This is what the older xml-name Working Draft specified, a bunch of
3592: * other stuff may still rely on it, so support is still here as
1.127 daniel 3593: * if it was declared on the root of the Tree:-(
1.110 daniel 3594: *
3595: * To be removed at next drop of binary compatibility
1.45 daniel 3596: */
3597:
1.55 daniel 3598: void
3599: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3600: xmlChar *href = NULL;
3601: xmlChar *prefix = NULL;
1.45 daniel 3602: int garbage = 0;
3603:
3604: /*
3605: * We just skipped "namespace" or "xml:namespace"
3606: */
3607: SKIP_BLANKS;
3608:
1.153 daniel 3609: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3610: /*
3611: * We can have "ns" or "prefix" attributes
3612: * Old encoding as 'href' or 'AS' attributes is still supported
3613: */
1.152 daniel 3614: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3615: garbage = 0;
3616: SKIP(2);
3617: SKIP_BLANKS;
3618:
1.152 daniel 3619: if (RAW != '=') continue;
1.45 daniel 3620: NEXT;
3621: SKIP_BLANKS;
3622:
3623: href = xmlParseQuotedString(ctxt);
3624: SKIP_BLANKS;
1.152 daniel 3625: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3626: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3627: garbage = 0;
3628: SKIP(4);
3629: SKIP_BLANKS;
3630:
1.152 daniel 3631: if (RAW != '=') continue;
1.45 daniel 3632: NEXT;
3633: SKIP_BLANKS;
3634:
3635: href = xmlParseQuotedString(ctxt);
3636: SKIP_BLANKS;
1.152 daniel 3637: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3638: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3639: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3640: garbage = 0;
3641: SKIP(6);
3642: SKIP_BLANKS;
3643:
1.152 daniel 3644: if (RAW != '=') continue;
1.45 daniel 3645: NEXT;
3646: SKIP_BLANKS;
3647:
3648: prefix = xmlParseQuotedString(ctxt);
3649: SKIP_BLANKS;
1.152 daniel 3650: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3651: garbage = 0;
3652: SKIP(2);
3653: SKIP_BLANKS;
3654:
1.152 daniel 3655: if (RAW != '=') continue;
1.45 daniel 3656: NEXT;
3657: SKIP_BLANKS;
3658:
3659: prefix = xmlParseQuotedString(ctxt);
3660: SKIP_BLANKS;
1.152 daniel 3661: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3662: garbage = 0;
1.91 daniel 3663: NEXT;
1.45 daniel 3664: } else {
3665: /*
3666: * Found garbage when parsing the namespace
3667: */
1.122 daniel 3668: if (!garbage) {
1.55 daniel 3669: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3670: ctxt->sax->error(ctxt->userData,
3671: "xmlParseNamespace found garbage\n");
3672: }
1.123 daniel 3673: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3674: ctxt->wellFormed = 0;
1.180 daniel 3675: ctxt->disableSAX = 1;
1.45 daniel 3676: NEXT;
3677: }
3678: }
3679:
3680: MOVETO_ENDTAG(CUR_PTR);
3681: NEXT;
3682:
3683: /*
3684: * Register the DTD.
1.72 daniel 3685: if (href != NULL)
3686: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3687: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3688: */
3689:
1.119 daniel 3690: if (prefix != NULL) xmlFree(prefix);
3691: if (href != NULL) xmlFree(href);
1.45 daniel 3692: }
3693:
1.28 daniel 3694: /************************************************************************
3695: * *
3696: * The parser itself *
3697: * Relates to http://www.w3.org/TR/REC-xml *
3698: * *
3699: ************************************************************************/
1.14 veillard 3700:
1.50 daniel 3701: /**
1.97 daniel 3702: * xmlScanName:
3703: * @ctxt: an XML parser context
3704: *
3705: * Trickery: parse an XML name but without consuming the input flow
3706: * Needed for rollback cases.
3707: *
3708: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3709: * CombiningChar | Extender
3710: *
3711: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3712: *
3713: * [6] Names ::= Name (S Name)*
3714: *
3715: * Returns the Name parsed or NULL
3716: */
3717:
1.123 daniel 3718: xmlChar *
1.97 daniel 3719: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3720: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3721: int len = 0;
3722:
3723: GROW;
1.152 daniel 3724: if (!IS_LETTER(RAW) && (RAW != '_') &&
3725: (RAW != ':')) {
1.97 daniel 3726: return(NULL);
3727: }
3728:
3729: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3730: (NXT(len) == '.') || (NXT(len) == '-') ||
3731: (NXT(len) == '_') || (NXT(len) == ':') ||
3732: (IS_COMBINING(NXT(len))) ||
3733: (IS_EXTENDER(NXT(len)))) {
3734: buf[len] = NXT(len);
3735: len++;
3736: if (len >= XML_MAX_NAMELEN) {
3737: fprintf(stderr,
3738: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3739: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3740: (NXT(len) == '.') || (NXT(len) == '-') ||
3741: (NXT(len) == '_') || (NXT(len) == ':') ||
3742: (IS_COMBINING(NXT(len))) ||
3743: (IS_EXTENDER(NXT(len))))
3744: len++;
3745: break;
3746: }
3747: }
3748: return(xmlStrndup(buf, len));
3749: }
3750:
3751: /**
1.50 daniel 3752: * xmlParseName:
3753: * @ctxt: an XML parser context
3754: *
3755: * parse an XML name.
1.22 daniel 3756: *
3757: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3758: * CombiningChar | Extender
3759: *
3760: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3761: *
3762: * [6] Names ::= Name (S Name)*
1.68 daniel 3763: *
3764: * Returns the Name parsed or NULL
1.1 veillard 3765: */
3766:
1.123 daniel 3767: xmlChar *
1.55 daniel 3768: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3769: xmlChar buf[XML_MAX_NAMELEN + 5];
3770: int len = 0, l;
3771: int c;
1.1 veillard 3772:
1.91 daniel 3773: GROW;
1.160 daniel 3774: c = CUR_CHAR(l);
1.190 daniel 3775: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3776: (!IS_LETTER(c) && (c != '_') &&
3777: (c != ':'))) {
1.91 daniel 3778: return(NULL);
3779: }
1.40 daniel 3780:
1.190 daniel 3781: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3782: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3783: (c == '.') || (c == '-') ||
3784: (c == '_') || (c == ':') ||
3785: (IS_COMBINING(c)) ||
3786: (IS_EXTENDER(c)))) {
1.160 daniel 3787: COPY_BUF(l,buf,len,c);
3788: NEXTL(l);
3789: c = CUR_CHAR(l);
1.91 daniel 3790: if (len >= XML_MAX_NAMELEN) {
3791: fprintf(stderr,
3792: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3793: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3794: (c == '.') || (c == '-') ||
3795: (c == '_') || (c == ':') ||
3796: (IS_COMBINING(c)) ||
3797: (IS_EXTENDER(c))) {
3798: NEXTL(l);
3799: c = CUR_CHAR(l);
1.97 daniel 3800: }
1.91 daniel 3801: break;
3802: }
3803: }
3804: return(xmlStrndup(buf, len));
1.22 daniel 3805: }
3806:
1.50 daniel 3807: /**
1.135 daniel 3808: * xmlParseStringName:
3809: * @ctxt: an XML parser context
3810: * @str: a pointer to an index in the string
3811: *
3812: * parse an XML name.
3813: *
3814: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3815: * CombiningChar | Extender
3816: *
3817: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3818: *
3819: * [6] Names ::= Name (S Name)*
3820: *
3821: * Returns the Name parsed or NULL. The str pointer
3822: * is updated to the current location in the string.
3823: */
3824:
3825: xmlChar *
3826: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3827: xmlChar buf[XML_MAX_NAMELEN + 5];
3828: const xmlChar *cur = *str;
3829: int len = 0, l;
3830: int c;
1.135 daniel 3831:
1.176 daniel 3832: c = CUR_SCHAR(cur, l);
3833: if (!IS_LETTER(c) && (c != '_') &&
3834: (c != ':')) {
1.135 daniel 3835: return(NULL);
3836: }
3837:
1.176 daniel 3838: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3839: (c == '.') || (c == '-') ||
3840: (c == '_') || (c == ':') ||
3841: (IS_COMBINING(c)) ||
3842: (IS_EXTENDER(c))) {
3843: COPY_BUF(l,buf,len,c);
3844: cur += l;
3845: c = CUR_SCHAR(cur, l);
3846: if (len >= XML_MAX_NAMELEN) {
3847: fprintf(stderr,
3848: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3849: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3850: (c == '.') || (c == '-') ||
3851: (c == '_') || (c == ':') ||
3852: (IS_COMBINING(c)) ||
3853: (IS_EXTENDER(c))) {
3854: cur += l;
3855: c = CUR_SCHAR(cur, l);
3856: }
3857: break;
3858: }
1.135 daniel 3859: }
1.176 daniel 3860: *str = cur;
3861: return(xmlStrndup(buf, len));
1.135 daniel 3862: }
3863:
3864: /**
1.50 daniel 3865: * xmlParseNmtoken:
3866: * @ctxt: an XML parser context
3867: *
3868: * parse an XML Nmtoken.
1.22 daniel 3869: *
3870: * [7] Nmtoken ::= (NameChar)+
3871: *
3872: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3873: *
3874: * Returns the Nmtoken parsed or NULL
1.22 daniel 3875: */
3876:
1.123 daniel 3877: xmlChar *
1.55 daniel 3878: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3879: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3880: int len = 0;
1.160 daniel 3881: int c,l;
1.22 daniel 3882:
1.91 daniel 3883: GROW;
1.160 daniel 3884: c = CUR_CHAR(l);
3885: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3886: (c == '.') || (c == '-') ||
3887: (c == '_') || (c == ':') ||
3888: (IS_COMBINING(c)) ||
3889: (IS_EXTENDER(c))) {
3890: COPY_BUF(l,buf,len,c);
3891: NEXTL(l);
3892: c = CUR_CHAR(l);
1.91 daniel 3893: if (len >= XML_MAX_NAMELEN) {
3894: fprintf(stderr,
3895: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3896: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3897: (c == '.') || (c == '-') ||
3898: (c == '_') || (c == ':') ||
3899: (IS_COMBINING(c)) ||
3900: (IS_EXTENDER(c))) {
3901: NEXTL(l);
3902: c = CUR_CHAR(l);
3903: }
1.91 daniel 3904: break;
3905: }
3906: }
1.168 daniel 3907: if (len == 0)
3908: return(NULL);
1.91 daniel 3909: return(xmlStrndup(buf, len));
1.1 veillard 3910: }
3911:
1.50 daniel 3912: /**
3913: * xmlParseEntityValue:
3914: * @ctxt: an XML parser context
1.78 daniel 3915: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3916: *
3917: * parse a value for ENTITY decl.
1.24 daniel 3918: *
3919: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3920: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3921: *
1.78 daniel 3922: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3923: */
3924:
1.123 daniel 3925: xmlChar *
3926: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3927: xmlChar *buf = NULL;
3928: int len = 0;
1.140 daniel 3929: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3930: int c, l;
1.135 daniel 3931: xmlChar stop;
1.123 daniel 3932: xmlChar *ret = NULL;
1.176 daniel 3933: const xmlChar *cur = NULL;
1.98 daniel 3934: xmlParserInputPtr input;
1.24 daniel 3935:
1.152 daniel 3936: if (RAW == '"') stop = '"';
3937: else if (RAW == '\'') stop = '\'';
1.135 daniel 3938: else {
3939: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3940: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3941: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3942: ctxt->wellFormed = 0;
1.180 daniel 3943: ctxt->disableSAX = 1;
1.135 daniel 3944: return(NULL);
3945: }
3946: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3947: if (buf == NULL) {
3948: fprintf(stderr, "malloc of %d byte failed\n", size);
3949: return(NULL);
3950: }
1.94 daniel 3951:
1.135 daniel 3952: /*
3953: * The content of the entity definition is copied in a buffer.
3954: */
1.94 daniel 3955:
1.135 daniel 3956: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3957: input = ctxt->input;
3958: GROW;
3959: NEXT;
1.152 daniel 3960: c = CUR_CHAR(l);
1.135 daniel 3961: /*
3962: * NOTE: 4.4.5 Included in Literal
3963: * When a parameter entity reference appears in a literal entity
3964: * value, ... a single or double quote character in the replacement
3965: * text is always treated as a normal data character and will not
3966: * terminate the literal.
3967: * In practice it means we stop the loop only when back at parsing
3968: * the initial entity and the quote is found
3969: */
3970: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3971: if (len + 5 >= size) {
1.135 daniel 3972: size *= 2;
1.204 veillard 3973: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3974: if (buf == NULL) {
3975: fprintf(stderr, "realloc of %d byte failed\n", size);
3976: return(NULL);
1.94 daniel 3977: }
1.79 daniel 3978: }
1.152 daniel 3979: COPY_BUF(l,buf,len,c);
3980: NEXTL(l);
1.98 daniel 3981: /*
1.135 daniel 3982: * Pop-up of finished entities.
1.98 daniel 3983: */
1.152 daniel 3984: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3985: xmlPopInput(ctxt);
1.152 daniel 3986:
3987: c = CUR_CHAR(l);
1.135 daniel 3988: if (c == 0) {
1.94 daniel 3989: GROW;
1.152 daniel 3990: c = CUR_CHAR(l);
1.79 daniel 3991: }
1.135 daniel 3992: }
3993: buf[len] = 0;
3994:
3995: /*
1.176 daniel 3996: * Raise problem w.r.t. '&' and '%' being used in non-entities
3997: * reference constructs. Note Charref will be handled in
3998: * xmlStringDecodeEntities()
3999: */
4000: cur = buf;
4001: while (*cur != 0) {
4002: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
4003: xmlChar *name;
4004: xmlChar tmp = *cur;
4005:
4006: cur++;
4007: name = xmlParseStringName(ctxt, &cur);
4008: if ((name == NULL) || (*cur != ';')) {
4009: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4010: ctxt->sax->error(ctxt->userData,
4011: "EntityValue: '%c' forbidden except for entities references\n",
4012: tmp);
4013: ctxt->wellFormed = 0;
1.180 daniel 4014: ctxt->disableSAX = 1;
1.176 daniel 4015: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
4016: }
4017: if ((ctxt->inSubset == 1) && (tmp == '%')) {
4018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4019: ctxt->sax->error(ctxt->userData,
4020: "EntityValue: PEReferences forbidden in internal subset\n",
4021: tmp);
4022: ctxt->wellFormed = 0;
1.180 daniel 4023: ctxt->disableSAX = 1;
1.176 daniel 4024: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
4025: }
4026: if (name != NULL)
4027: xmlFree(name);
4028: }
4029: cur++;
4030: }
4031:
4032: /*
1.135 daniel 4033: * Then PEReference entities are substituted.
4034: */
4035: if (c != stop) {
4036: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 4037: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 4038: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 4039: ctxt->wellFormed = 0;
1.180 daniel 4040: ctxt->disableSAX = 1;
1.170 daniel 4041: xmlFree(buf);
1.135 daniel 4042: } else {
4043: NEXT;
4044: /*
4045: * NOTE: 4.4.7 Bypassed
4046: * When a general entity reference appears in the EntityValue in
4047: * an entity declaration, it is bypassed and left as is.
1.176 daniel 4048: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 4049: */
4050: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
4051: 0, 0, 0);
4052: if (orig != NULL)
4053: *orig = buf;
4054: else
4055: xmlFree(buf);
1.24 daniel 4056: }
4057:
4058: return(ret);
4059: }
4060:
1.50 daniel 4061: /**
4062: * xmlParseAttValue:
4063: * @ctxt: an XML parser context
4064: *
4065: * parse a value for an attribute
1.78 daniel 4066: * Note: the parser won't do substitution of entities here, this
1.113 daniel 4067: * will be handled later in xmlStringGetNodeList
1.29 daniel 4068: *
4069: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4070: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 4071: *
1.129 daniel 4072: * 3.3.3 Attribute-Value Normalization:
4073: * Before the value of an attribute is passed to the application or
4074: * checked for validity, the XML processor must normalize it as follows:
4075: * - a character reference is processed by appending the referenced
4076: * character to the attribute value
4077: * - an entity reference is processed by recursively processing the
4078: * replacement text of the entity
4079: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4080: * appending #x20 to the normalized value, except that only a single
4081: * #x20 is appended for a "#xD#xA" sequence that is part of an external
4082: * parsed entity or the literal entity value of an internal parsed entity
4083: * - other characters are processed by appending them to the normalized value
1.130 daniel 4084: * If the declared value is not CDATA, then the XML processor must further
4085: * process the normalized attribute value by discarding any leading and
4086: * trailing space (#x20) characters, and by replacing sequences of space
4087: * (#x20) characters by a single space (#x20) character.
4088: * All attributes for which no declaration has been read should be treated
4089: * by a non-validating parser as if declared CDATA.
1.129 daniel 4090: *
4091: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 4092: */
4093:
1.123 daniel 4094: xmlChar *
1.55 daniel 4095: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 4096: xmlChar limit = 0;
1.198 daniel 4097: xmlChar *buf = NULL;
4098: int len = 0;
4099: int buf_size = 0;
4100: int c, l;
1.129 daniel 4101: xmlChar *current = NULL;
4102: xmlEntityPtr ent;
4103:
1.29 daniel 4104:
1.91 daniel 4105: SHRINK;
1.151 daniel 4106: if (NXT(0) == '"') {
1.96 daniel 4107: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 4108: limit = '"';
1.40 daniel 4109: NEXT;
1.151 daniel 4110: } else if (NXT(0) == '\'') {
1.129 daniel 4111: limit = '\'';
1.96 daniel 4112: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 4113: NEXT;
1.29 daniel 4114: } else {
1.123 daniel 4115: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 4116: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4117: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 4118: ctxt->wellFormed = 0;
1.180 daniel 4119: ctxt->disableSAX = 1;
1.129 daniel 4120: return(NULL);
1.29 daniel 4121: }
4122:
1.129 daniel 4123: /*
4124: * allocate a translation buffer.
4125: */
1.198 daniel 4126: buf_size = XML_PARSER_BUFFER_SIZE;
4127: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
4128: if (buf == NULL) {
1.129 daniel 4129: perror("xmlParseAttValue: malloc failed");
4130: return(NULL);
4131: }
4132:
4133: /*
4134: * Ok loop until we reach one of the ending char or a size limit.
4135: */
1.198 daniel 4136: c = CUR_CHAR(l);
4137: while (((NXT(0) != limit) && (c != '<')) || (ctxt->token != 0)) {
4138: if (c == 0) break;
1.205 veillard 4139: if (ctxt->token == '&') {
4140: static xmlChar buffer[6] = "&";
4141:
4142: if (len > buf_size - 10) {
4143: growBuffer(buf);
4144: }
4145: current = &buffer[0];
4146: while (*current != 0) {
4147: buf[len++] = *current++;
4148: }
4149: ctxt->token = 0;
4150: } else if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 4151: int val = xmlParseCharRef(ctxt);
1.198 daniel 4152: COPY_BUF(l,buf,len,val);
4153: NEXTL(l);
4154: } else if (c == '&') {
1.129 daniel 4155: ent = xmlParseEntityRef(ctxt);
4156: if ((ent != NULL) &&
4157: (ctxt->replaceEntities != 0)) {
1.185 daniel 4158: xmlChar *rep;
4159:
1.186 daniel 4160: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4161: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 4162: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 4163: if (rep != NULL) {
4164: current = rep;
4165: while (*current != 0) {
1.198 daniel 4166: buf[len++] = *current++;
4167: if (len > buf_size - 10) {
4168: growBuffer(buf);
1.186 daniel 4169: }
1.185 daniel 4170: }
1.186 daniel 4171: xmlFree(rep);
1.129 daniel 4172: }
1.186 daniel 4173: } else {
4174: if (ent->content != NULL)
1.198 daniel 4175: buf[len++] = ent->content[0];
1.129 daniel 4176: }
4177: } else if (ent != NULL) {
4178: int i = xmlStrlen(ent->name);
4179: const xmlChar *cur = ent->name;
4180:
1.186 daniel 4181: /*
4182: * This may look absurd but is needed to detect
4183: * entities problems
4184: */
1.211 veillard 4185: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4186: (ent->content != NULL)) {
1.186 daniel 4187: xmlChar *rep;
4188: rep = xmlStringDecodeEntities(ctxt, ent->content,
4189: XML_SUBSTITUTE_REF, 0, 0, 0);
4190: if (rep != NULL)
4191: xmlFree(rep);
4192: }
4193:
4194: /*
4195: * Just output the reference
4196: */
1.198 daniel 4197: buf[len++] = '&';
4198: if (len > buf_size - i - 10) {
4199: growBuffer(buf);
1.129 daniel 4200: }
4201: for (;i > 0;i--)
1.198 daniel 4202: buf[len++] = *cur++;
4203: buf[len++] = ';';
1.129 daniel 4204: }
4205: } else {
1.198 daniel 4206: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4207: COPY_BUF(l,buf,len,0x20);
4208: if (len > buf_size - 10) {
4209: growBuffer(buf);
1.129 daniel 4210: }
4211: } else {
1.198 daniel 4212: COPY_BUF(l,buf,len,c);
4213: if (len > buf_size - 10) {
4214: growBuffer(buf);
1.129 daniel 4215: }
4216: }
1.198 daniel 4217: NEXTL(l);
1.129 daniel 4218: }
1.198 daniel 4219: GROW;
4220: c = CUR_CHAR(l);
1.129 daniel 4221: }
1.198 daniel 4222: buf[len++] = 0;
1.152 daniel 4223: if (RAW == '<') {
1.129 daniel 4224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4225: ctxt->sax->error(ctxt->userData,
4226: "Unescaped '<' not allowed in attributes values\n");
4227: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
4228: ctxt->wellFormed = 0;
1.180 daniel 4229: ctxt->disableSAX = 1;
1.152 daniel 4230: } else if (RAW != limit) {
1.129 daniel 4231: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4232: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
4233: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
4234: ctxt->wellFormed = 0;
1.180 daniel 4235: ctxt->disableSAX = 1;
1.129 daniel 4236: } else
4237: NEXT;
1.198 daniel 4238: return(buf);
1.29 daniel 4239: }
4240:
1.50 daniel 4241: /**
4242: * xmlParseSystemLiteral:
4243: * @ctxt: an XML parser context
4244: *
4245: * parse an XML Literal
1.21 daniel 4246: *
1.22 daniel 4247: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 4248: *
4249: * Returns the SystemLiteral parsed or NULL
1.21 daniel 4250: */
4251:
1.123 daniel 4252: xmlChar *
1.55 daniel 4253: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 4254: xmlChar *buf = NULL;
4255: int len = 0;
1.140 daniel 4256: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4257: int cur, l;
1.135 daniel 4258: xmlChar stop;
1.168 daniel 4259: int state = ctxt->instate;
1.21 daniel 4260:
1.91 daniel 4261: SHRINK;
1.152 daniel 4262: if (RAW == '"') {
1.40 daniel 4263: NEXT;
1.135 daniel 4264: stop = '"';
1.152 daniel 4265: } else if (RAW == '\'') {
1.40 daniel 4266: NEXT;
1.135 daniel 4267: stop = '\'';
1.21 daniel 4268: } else {
1.55 daniel 4269: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4270: ctxt->sax->error(ctxt->userData,
4271: "SystemLiteral \" or ' expected\n");
1.123 daniel 4272: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 4273: ctxt->wellFormed = 0;
1.180 daniel 4274: ctxt->disableSAX = 1;
1.135 daniel 4275: return(NULL);
1.21 daniel 4276: }
4277:
1.135 daniel 4278: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4279: if (buf == NULL) {
4280: fprintf(stderr, "malloc of %d byte failed\n", size);
4281: return(NULL);
4282: }
1.168 daniel 4283: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 4284: cur = CUR_CHAR(l);
1.135 daniel 4285: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 4286: if (len + 5 >= size) {
1.135 daniel 4287: size *= 2;
1.204 veillard 4288: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4289: if (buf == NULL) {
4290: fprintf(stderr, "realloc of %d byte failed\n", size);
1.204 veillard 4291: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 4292: return(NULL);
4293: }
4294: }
1.152 daniel 4295: COPY_BUF(l,buf,len,cur);
4296: NEXTL(l);
4297: cur = CUR_CHAR(l);
1.135 daniel 4298: if (cur == 0) {
4299: GROW;
4300: SHRINK;
1.152 daniel 4301: cur = CUR_CHAR(l);
1.135 daniel 4302: }
4303: }
4304: buf[len] = 0;
1.204 veillard 4305: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 4306: if (!IS_CHAR(cur)) {
4307: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4308: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
4309: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4310: ctxt->wellFormed = 0;
1.180 daniel 4311: ctxt->disableSAX = 1;
1.135 daniel 4312: } else {
4313: NEXT;
4314: }
4315: return(buf);
1.21 daniel 4316: }
4317:
1.50 daniel 4318: /**
4319: * xmlParsePubidLiteral:
4320: * @ctxt: an XML parser context
1.21 daniel 4321: *
1.50 daniel 4322: * parse an XML public literal
1.68 daniel 4323: *
4324: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4325: *
4326: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 4327: */
4328:
1.123 daniel 4329: xmlChar *
1.55 daniel 4330: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 4331: xmlChar *buf = NULL;
4332: int len = 0;
1.140 daniel 4333: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 4334: xmlChar cur;
4335: xmlChar stop;
1.125 daniel 4336:
1.91 daniel 4337: SHRINK;
1.152 daniel 4338: if (RAW == '"') {
1.40 daniel 4339: NEXT;
1.135 daniel 4340: stop = '"';
1.152 daniel 4341: } else if (RAW == '\'') {
1.40 daniel 4342: NEXT;
1.135 daniel 4343: stop = '\'';
1.21 daniel 4344: } else {
1.55 daniel 4345: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4346: ctxt->sax->error(ctxt->userData,
4347: "SystemLiteral \" or ' expected\n");
1.123 daniel 4348: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 4349: ctxt->wellFormed = 0;
1.180 daniel 4350: ctxt->disableSAX = 1;
1.135 daniel 4351: return(NULL);
4352: }
4353: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4354: if (buf == NULL) {
4355: fprintf(stderr, "malloc of %d byte failed\n", size);
4356: return(NULL);
4357: }
4358: cur = CUR;
4359: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
4360: if (len + 1 >= size) {
4361: size *= 2;
1.204 veillard 4362: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4363: if (buf == NULL) {
4364: fprintf(stderr, "realloc of %d byte failed\n", size);
4365: return(NULL);
4366: }
4367: }
4368: buf[len++] = cur;
4369: NEXT;
4370: cur = CUR;
4371: if (cur == 0) {
4372: GROW;
4373: SHRINK;
4374: cur = CUR;
4375: }
4376: }
4377: buf[len] = 0;
4378: if (cur != stop) {
4379: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4380: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4381: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4382: ctxt->wellFormed = 0;
1.180 daniel 4383: ctxt->disableSAX = 1;
1.135 daniel 4384: } else {
4385: NEXT;
1.21 daniel 4386: }
1.135 daniel 4387: return(buf);
1.21 daniel 4388: }
4389:
1.50 daniel 4390: /**
4391: * xmlParseCharData:
4392: * @ctxt: an XML parser context
4393: * @cdata: int indicating whether we are within a CDATA section
4394: *
4395: * parse a CharData section.
4396: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 4397: *
1.151 daniel 4398: * The right angle bracket (>) may be represented using the string ">",
4399: * and must, for compatibility, be escaped using ">" or a character
4400: * reference when it appears in the string "]]>" in content, when that
4401: * string is not marking the end of a CDATA section.
4402: *
1.27 daniel 4403: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4404: */
4405:
1.55 daniel 4406: void
4407: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 4408: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 4409: int nbchar = 0;
1.152 daniel 4410: int cur, l;
1.27 daniel 4411:
1.91 daniel 4412: SHRINK;
1.152 daniel 4413: cur = CUR_CHAR(l);
1.190 daniel 4414: while (((cur != '<') || (ctxt->token == '<')) &&
4415: ((cur != '&') || (ctxt->token == '&')) &&
4416: (IS_CHAR(cur))) {
1.97 daniel 4417: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 4418: (NXT(2) == '>')) {
4419: if (cdata) break;
4420: else {
4421: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 4422: ctxt->sax->error(ctxt->userData,
1.59 daniel 4423: "Sequence ']]>' not allowed in content\n");
1.123 daniel 4424: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 4425: /* Should this be relaxed ??? I see a "must here */
4426: ctxt->wellFormed = 0;
1.180 daniel 4427: ctxt->disableSAX = 1;
1.59 daniel 4428: }
4429: }
1.152 daniel 4430: COPY_BUF(l,buf,nbchar,cur);
4431: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4432: /*
4433: * Ok the segment is to be consumed as chars.
4434: */
1.171 daniel 4435: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4436: if (areBlanks(ctxt, buf, nbchar)) {
4437: if (ctxt->sax->ignorableWhitespace != NULL)
4438: ctxt->sax->ignorableWhitespace(ctxt->userData,
4439: buf, nbchar);
4440: } else {
4441: if (ctxt->sax->characters != NULL)
4442: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4443: }
4444: }
4445: nbchar = 0;
4446: }
1.152 daniel 4447: NEXTL(l);
4448: cur = CUR_CHAR(l);
1.27 daniel 4449: }
1.91 daniel 4450: if (nbchar != 0) {
4451: /*
4452: * Ok the segment is to be consumed as chars.
4453: */
1.171 daniel 4454: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4455: if (areBlanks(ctxt, buf, nbchar)) {
4456: if (ctxt->sax->ignorableWhitespace != NULL)
4457: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4458: } else {
4459: if (ctxt->sax->characters != NULL)
4460: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4461: }
4462: }
1.45 daniel 4463: }
1.27 daniel 4464: }
4465:
1.50 daniel 4466: /**
4467: * xmlParseExternalID:
4468: * @ctxt: an XML parser context
1.123 daniel 4469: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4470: * @strict: indicate whether we should restrict parsing to only
4471: * production [75], see NOTE below
1.50 daniel 4472: *
1.67 daniel 4473: * Parse an External ID or a Public ID
4474: *
4475: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4476: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4477: *
4478: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4479: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4480: *
4481: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4482: *
1.68 daniel 4483: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4484: * case publicID receives PubidLiteral, is strict is off
4485: * it is possible to return NULL and have publicID set.
1.22 daniel 4486: */
4487:
1.123 daniel 4488: xmlChar *
4489: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4490: xmlChar *URI = NULL;
1.22 daniel 4491:
1.91 daniel 4492: SHRINK;
1.152 daniel 4493: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4494: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4495: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4496: SKIP(6);
1.59 daniel 4497: if (!IS_BLANK(CUR)) {
4498: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4499: ctxt->sax->error(ctxt->userData,
1.59 daniel 4500: "Space required after 'SYSTEM'\n");
1.123 daniel 4501: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4502: ctxt->wellFormed = 0;
1.180 daniel 4503: ctxt->disableSAX = 1;
1.59 daniel 4504: }
1.42 daniel 4505: SKIP_BLANKS;
1.39 daniel 4506: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4507: if (URI == NULL) {
1.55 daniel 4508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4509: ctxt->sax->error(ctxt->userData,
1.39 daniel 4510: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4511: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4512: ctxt->wellFormed = 0;
1.180 daniel 4513: ctxt->disableSAX = 1;
1.59 daniel 4514: }
1.152 daniel 4515: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4516: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4517: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4518: SKIP(6);
1.59 daniel 4519: if (!IS_BLANK(CUR)) {
4520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4521: ctxt->sax->error(ctxt->userData,
1.59 daniel 4522: "Space required after 'PUBLIC'\n");
1.123 daniel 4523: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4524: ctxt->wellFormed = 0;
1.180 daniel 4525: ctxt->disableSAX = 1;
1.59 daniel 4526: }
1.42 daniel 4527: SKIP_BLANKS;
1.39 daniel 4528: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4529: if (*publicID == NULL) {
1.55 daniel 4530: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4531: ctxt->sax->error(ctxt->userData,
1.39 daniel 4532: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4533: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4534: ctxt->wellFormed = 0;
1.180 daniel 4535: ctxt->disableSAX = 1;
1.59 daniel 4536: }
1.67 daniel 4537: if (strict) {
4538: /*
4539: * We don't handle [83] so "S SystemLiteral" is required.
4540: */
4541: if (!IS_BLANK(CUR)) {
4542: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4543: ctxt->sax->error(ctxt->userData,
1.67 daniel 4544: "Space required after the Public Identifier\n");
1.123 daniel 4545: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4546: ctxt->wellFormed = 0;
1.180 daniel 4547: ctxt->disableSAX = 1;
1.67 daniel 4548: }
4549: } else {
4550: /*
4551: * We handle [83] so we return immediately, if
4552: * "S SystemLiteral" is not detected. From a purely parsing
4553: * point of view that's a nice mess.
4554: */
1.135 daniel 4555: const xmlChar *ptr;
4556: GROW;
4557:
4558: ptr = CUR_PTR;
1.67 daniel 4559: if (!IS_BLANK(*ptr)) return(NULL);
4560:
4561: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4562: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4563: }
1.42 daniel 4564: SKIP_BLANKS;
1.39 daniel 4565: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4566: if (URI == NULL) {
1.55 daniel 4567: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4568: ctxt->sax->error(ctxt->userData,
1.39 daniel 4569: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4570: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4571: ctxt->wellFormed = 0;
1.180 daniel 4572: ctxt->disableSAX = 1;
1.59 daniel 4573: }
1.22 daniel 4574: }
1.39 daniel 4575: return(URI);
1.22 daniel 4576: }
4577:
1.50 daniel 4578: /**
4579: * xmlParseComment:
1.69 daniel 4580: * @ctxt: an XML parser context
1.50 daniel 4581: *
1.3 veillard 4582: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4583: * The spec says that "For compatibility, the string "--" (double-hyphen)
4584: * must not occur within comments. "
1.22 daniel 4585: *
4586: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4587: */
1.72 daniel 4588: void
1.114 daniel 4589: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4590: xmlChar *buf = NULL;
1.195 daniel 4591: int len;
1.140 daniel 4592: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4593: int q, ql;
4594: int r, rl;
4595: int cur, l;
1.140 daniel 4596: xmlParserInputState state;
1.187 daniel 4597: xmlParserInputPtr input = ctxt->input;
1.3 veillard 4598:
4599: /*
1.22 daniel 4600: * Check that there is a comment right here.
1.3 veillard 4601: */
1.152 daniel 4602: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4603: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4604:
1.140 daniel 4605: state = ctxt->instate;
1.97 daniel 4606: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4607: SHRINK;
1.40 daniel 4608: SKIP(4);
1.135 daniel 4609: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4610: if (buf == NULL) {
4611: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4612: ctxt->instate = state;
1.135 daniel 4613: return;
4614: }
1.152 daniel 4615: q = CUR_CHAR(ql);
4616: NEXTL(ql);
4617: r = CUR_CHAR(rl);
4618: NEXTL(rl);
4619: cur = CUR_CHAR(l);
1.195 daniel 4620: len = 0;
1.135 daniel 4621: while (IS_CHAR(cur) &&
4622: ((cur != '>') ||
4623: (r != '-') || (q != '-'))) {
1.195 daniel 4624: if ((r == '-') && (q == '-') && (len > 1)) {
1.55 daniel 4625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4626: ctxt->sax->error(ctxt->userData,
1.38 daniel 4627: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4628: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4629: ctxt->wellFormed = 0;
1.180 daniel 4630: ctxt->disableSAX = 1;
1.59 daniel 4631: }
1.152 daniel 4632: if (len + 5 >= size) {
1.135 daniel 4633: size *= 2;
1.204 veillard 4634: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4635: if (buf == NULL) {
4636: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4637: ctxt->instate = state;
1.135 daniel 4638: return;
4639: }
4640: }
1.152 daniel 4641: COPY_BUF(ql,buf,len,q);
1.135 daniel 4642: q = r;
1.152 daniel 4643: ql = rl;
1.135 daniel 4644: r = cur;
1.152 daniel 4645: rl = l;
4646: NEXTL(l);
4647: cur = CUR_CHAR(l);
1.135 daniel 4648: if (cur == 0) {
4649: SHRINK;
4650: GROW;
1.152 daniel 4651: cur = CUR_CHAR(l);
1.135 daniel 4652: }
1.3 veillard 4653: }
1.135 daniel 4654: buf[len] = 0;
4655: if (!IS_CHAR(cur)) {
1.55 daniel 4656: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4657: ctxt->sax->error(ctxt->userData,
1.135 daniel 4658: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4659: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4660: ctxt->wellFormed = 0;
1.180 daniel 4661: ctxt->disableSAX = 1;
1.178 daniel 4662: xmlFree(buf);
1.3 veillard 4663: } else {
1.187 daniel 4664: if (input != ctxt->input) {
4665: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4666: ctxt->sax->error(ctxt->userData,
4667: "Comment doesn't start and stop in the same entity\n");
4668: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4669: ctxt->wellFormed = 0;
4670: ctxt->disableSAX = 1;
4671: }
1.40 daniel 4672: NEXT;
1.171 daniel 4673: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4674: (!ctxt->disableSAX))
1.135 daniel 4675: ctxt->sax->comment(ctxt->userData, buf);
4676: xmlFree(buf);
1.3 veillard 4677: }
1.140 daniel 4678: ctxt->instate = state;
1.3 veillard 4679: }
4680:
1.50 daniel 4681: /**
4682: * xmlParsePITarget:
4683: * @ctxt: an XML parser context
4684: *
4685: * parse the name of a PI
1.22 daniel 4686: *
4687: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4688: *
4689: * Returns the PITarget name or NULL
1.22 daniel 4690: */
4691:
1.123 daniel 4692: xmlChar *
1.55 daniel 4693: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4694: xmlChar *name;
1.22 daniel 4695:
4696: name = xmlParseName(ctxt);
1.139 daniel 4697: if ((name != NULL) &&
1.22 daniel 4698: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4699: ((name[1] == 'm') || (name[1] == 'M')) &&
4700: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4701: int i;
1.177 daniel 4702: if ((name[0] == 'x') && (name[1] == 'm') &&
4703: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4704: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4705: ctxt->sax->error(ctxt->userData,
4706: "XML declaration allowed only at the start of the document\n");
4707: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4708: ctxt->wellFormed = 0;
1.180 daniel 4709: ctxt->disableSAX = 1;
1.151 daniel 4710: return(name);
4711: } else if (name[3] == 0) {
4712: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4713: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4714: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4715: ctxt->wellFormed = 0;
1.180 daniel 4716: ctxt->disableSAX = 1;
1.151 daniel 4717: return(name);
4718: }
1.139 daniel 4719: for (i = 0;;i++) {
4720: if (xmlW3CPIs[i] == NULL) break;
4721: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4722: return(name);
4723: }
4724: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4725: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4726: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4727: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4728: }
1.22 daniel 4729: }
4730: return(name);
4731: }
4732:
1.50 daniel 4733: /**
4734: * xmlParsePI:
4735: * @ctxt: an XML parser context
4736: *
4737: * parse an XML Processing Instruction.
1.22 daniel 4738: *
4739: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4740: *
1.69 daniel 4741: * The processing is transfered to SAX once parsed.
1.3 veillard 4742: */
4743:
1.55 daniel 4744: void
4745: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4746: xmlChar *buf = NULL;
4747: int len = 0;
1.140 daniel 4748: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4749: int cur, l;
1.123 daniel 4750: xmlChar *target;
1.140 daniel 4751: xmlParserInputState state;
1.22 daniel 4752:
1.152 daniel 4753: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 4754: xmlParserInputPtr input = ctxt->input;
1.140 daniel 4755: state = ctxt->instate;
4756: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4757: /*
4758: * this is a Processing Instruction.
4759: */
1.40 daniel 4760: SKIP(2);
1.91 daniel 4761: SHRINK;
1.3 veillard 4762:
4763: /*
1.22 daniel 4764: * Parse the target name and check for special support like
4765: * namespace.
1.3 veillard 4766: */
1.22 daniel 4767: target = xmlParsePITarget(ctxt);
4768: if (target != NULL) {
1.156 daniel 4769: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 4770: if (input != ctxt->input) {
4771: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4772: ctxt->sax->error(ctxt->userData,
4773: "PI declaration doesn't start and stop in the same entity\n");
4774: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4775: ctxt->wellFormed = 0;
4776: ctxt->disableSAX = 1;
4777: }
1.156 daniel 4778: SKIP(2);
4779:
4780: /*
4781: * SAX: PI detected.
4782: */
1.171 daniel 4783: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4784: (ctxt->sax->processingInstruction != NULL))
4785: ctxt->sax->processingInstruction(ctxt->userData,
4786: target, NULL);
4787: ctxt->instate = state;
1.170 daniel 4788: xmlFree(target);
1.156 daniel 4789: return;
4790: }
1.135 daniel 4791: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4792: if (buf == NULL) {
4793: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4794: ctxt->instate = state;
1.135 daniel 4795: return;
4796: }
4797: cur = CUR;
4798: if (!IS_BLANK(cur)) {
1.114 daniel 4799: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4800: ctxt->sax->error(ctxt->userData,
4801: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4802: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4803: ctxt->wellFormed = 0;
1.180 daniel 4804: ctxt->disableSAX = 1;
1.114 daniel 4805: }
4806: SKIP_BLANKS;
1.152 daniel 4807: cur = CUR_CHAR(l);
1.135 daniel 4808: while (IS_CHAR(cur) &&
4809: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4810: if (len + 5 >= size) {
1.135 daniel 4811: size *= 2;
1.204 veillard 4812: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4813: if (buf == NULL) {
4814: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4815: ctxt->instate = state;
1.135 daniel 4816: return;
4817: }
4818: }
1.152 daniel 4819: COPY_BUF(l,buf,len,cur);
4820: NEXTL(l);
4821: cur = CUR_CHAR(l);
1.135 daniel 4822: if (cur == 0) {
4823: SHRINK;
4824: GROW;
1.152 daniel 4825: cur = CUR_CHAR(l);
1.135 daniel 4826: }
4827: }
4828: buf[len] = 0;
1.152 daniel 4829: if (cur != '?') {
1.72 daniel 4830: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4831: ctxt->sax->error(ctxt->userData,
1.72 daniel 4832: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4833: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4834: ctxt->wellFormed = 0;
1.180 daniel 4835: ctxt->disableSAX = 1;
1.22 daniel 4836: } else {
1.187 daniel 4837: if (input != ctxt->input) {
4838: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4839: ctxt->sax->error(ctxt->userData,
4840: "PI declaration doesn't start and stop in the same entity\n");
4841: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4842: ctxt->wellFormed = 0;
4843: ctxt->disableSAX = 1;
4844: }
1.72 daniel 4845: SKIP(2);
1.44 daniel 4846:
1.72 daniel 4847: /*
4848: * SAX: PI detected.
4849: */
1.171 daniel 4850: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4851: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4852: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4853: target, buf);
1.22 daniel 4854: }
1.135 daniel 4855: xmlFree(buf);
1.119 daniel 4856: xmlFree(target);
1.3 veillard 4857: } else {
1.55 daniel 4858: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4859: ctxt->sax->error(ctxt->userData,
4860: "xmlParsePI : no target name\n");
1.123 daniel 4861: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4862: ctxt->wellFormed = 0;
1.180 daniel 4863: ctxt->disableSAX = 1;
1.22 daniel 4864: }
1.140 daniel 4865: ctxt->instate = state;
1.22 daniel 4866: }
4867: }
4868:
1.50 daniel 4869: /**
4870: * xmlParseNotationDecl:
4871: * @ctxt: an XML parser context
4872: *
4873: * parse a notation declaration
1.22 daniel 4874: *
4875: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4876: *
4877: * Hence there is actually 3 choices:
4878: * 'PUBLIC' S PubidLiteral
4879: * 'PUBLIC' S PubidLiteral S SystemLiteral
4880: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4881: *
1.67 daniel 4882: * See the NOTE on xmlParseExternalID().
1.22 daniel 4883: */
4884:
1.55 daniel 4885: void
4886: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4887: xmlChar *name;
4888: xmlChar *Pubid;
4889: xmlChar *Systemid;
1.22 daniel 4890:
1.152 daniel 4891: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4892: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4893: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4894: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4895: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 4896: xmlParserInputPtr input = ctxt->input;
1.91 daniel 4897: SHRINK;
1.40 daniel 4898: SKIP(10);
1.67 daniel 4899: if (!IS_BLANK(CUR)) {
4900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4901: ctxt->sax->error(ctxt->userData,
4902: "Space required after '<!NOTATION'\n");
1.123 daniel 4903: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4904: ctxt->wellFormed = 0;
1.180 daniel 4905: ctxt->disableSAX = 1;
1.67 daniel 4906: return;
4907: }
4908: SKIP_BLANKS;
1.22 daniel 4909:
4910: name = xmlParseName(ctxt);
4911: if (name == NULL) {
1.55 daniel 4912: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4913: ctxt->sax->error(ctxt->userData,
4914: "NOTATION: Name expected here\n");
1.123 daniel 4915: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4916: ctxt->wellFormed = 0;
1.180 daniel 4917: ctxt->disableSAX = 1;
1.67 daniel 4918: return;
4919: }
4920: if (!IS_BLANK(CUR)) {
4921: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4922: ctxt->sax->error(ctxt->userData,
1.67 daniel 4923: "Space required after the NOTATION name'\n");
1.123 daniel 4924: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4925: ctxt->wellFormed = 0;
1.180 daniel 4926: ctxt->disableSAX = 1;
1.22 daniel 4927: return;
4928: }
1.42 daniel 4929: SKIP_BLANKS;
1.67 daniel 4930:
1.22 daniel 4931: /*
1.67 daniel 4932: * Parse the IDs.
1.22 daniel 4933: */
1.160 daniel 4934: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4935: SKIP_BLANKS;
4936:
1.152 daniel 4937: if (RAW == '>') {
1.187 daniel 4938: if (input != ctxt->input) {
4939: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4940: ctxt->sax->error(ctxt->userData,
4941: "Notation declaration doesn't start and stop in the same entity\n");
4942: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4943: ctxt->wellFormed = 0;
4944: ctxt->disableSAX = 1;
4945: }
1.40 daniel 4946: NEXT;
1.171 daniel 4947: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4948: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4949: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4950: } else {
4951: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4952: ctxt->sax->error(ctxt->userData,
1.67 daniel 4953: "'>' required to close NOTATION declaration\n");
1.123 daniel 4954: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4955: ctxt->wellFormed = 0;
1.180 daniel 4956: ctxt->disableSAX = 1;
1.67 daniel 4957: }
1.119 daniel 4958: xmlFree(name);
4959: if (Systemid != NULL) xmlFree(Systemid);
4960: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4961: }
4962: }
4963:
1.50 daniel 4964: /**
4965: * xmlParseEntityDecl:
4966: * @ctxt: an XML parser context
4967: *
4968: * parse <!ENTITY declarations
1.22 daniel 4969: *
4970: * [70] EntityDecl ::= GEDecl | PEDecl
4971: *
4972: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4973: *
4974: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4975: *
4976: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4977: *
4978: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4979: *
4980: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4981: *
4982: * [ VC: Notation Declared ]
1.116 daniel 4983: * The Name must match the declared name of a notation.
1.22 daniel 4984: */
4985:
1.55 daniel 4986: void
4987: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4988: xmlChar *name = NULL;
4989: xmlChar *value = NULL;
4990: xmlChar *URI = NULL, *literal = NULL;
4991: xmlChar *ndata = NULL;
1.39 daniel 4992: int isParameter = 0;
1.123 daniel 4993: xmlChar *orig = NULL;
1.22 daniel 4994:
1.94 daniel 4995: GROW;
1.152 daniel 4996: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4997: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4998: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4999: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 5000: xmlParserInputPtr input = ctxt->input;
1.96 daniel 5001: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 5002: SHRINK;
1.40 daniel 5003: SKIP(8);
1.59 daniel 5004: if (!IS_BLANK(CUR)) {
5005: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5006: ctxt->sax->error(ctxt->userData,
5007: "Space required after '<!ENTITY'\n");
1.123 daniel 5008: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5009: ctxt->wellFormed = 0;
1.180 daniel 5010: ctxt->disableSAX = 1;
1.59 daniel 5011: }
5012: SKIP_BLANKS;
1.40 daniel 5013:
1.152 daniel 5014: if (RAW == '%') {
1.40 daniel 5015: NEXT;
1.59 daniel 5016: if (!IS_BLANK(CUR)) {
5017: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5018: ctxt->sax->error(ctxt->userData,
5019: "Space required after '%'\n");
1.123 daniel 5020: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5021: ctxt->wellFormed = 0;
1.180 daniel 5022: ctxt->disableSAX = 1;
1.59 daniel 5023: }
1.42 daniel 5024: SKIP_BLANKS;
1.39 daniel 5025: isParameter = 1;
1.22 daniel 5026: }
5027:
5028: name = xmlParseName(ctxt);
1.24 daniel 5029: if (name == NULL) {
1.55 daniel 5030: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5031: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 5032: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5033: ctxt->wellFormed = 0;
1.180 daniel 5034: ctxt->disableSAX = 1;
1.24 daniel 5035: return;
5036: }
1.59 daniel 5037: if (!IS_BLANK(CUR)) {
5038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5039: ctxt->sax->error(ctxt->userData,
1.59 daniel 5040: "Space required after the entity name\n");
1.123 daniel 5041: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5042: ctxt->wellFormed = 0;
1.180 daniel 5043: ctxt->disableSAX = 1;
1.59 daniel 5044: }
1.42 daniel 5045: SKIP_BLANKS;
1.24 daniel 5046:
1.22 daniel 5047: /*
1.68 daniel 5048: * handle the various case of definitions...
1.22 daniel 5049: */
1.39 daniel 5050: if (isParameter) {
1.152 daniel 5051: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 5052: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 5053: if (value) {
1.171 daniel 5054: if ((ctxt->sax != NULL) &&
5055: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 5056: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 5057: XML_INTERNAL_PARAMETER_ENTITY,
5058: NULL, NULL, value);
5059: }
1.24 daniel 5060: else {
1.67 daniel 5061: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 5062: if ((URI == NULL) && (literal == NULL)) {
5063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5064: ctxt->sax->error(ctxt->userData,
5065: "Entity value required\n");
5066: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
5067: ctxt->wellFormed = 0;
1.180 daniel 5068: ctxt->disableSAX = 1;
1.169 daniel 5069: }
1.39 daniel 5070: if (URI) {
1.193 daniel 5071: xmlURIPtr uri;
5072:
5073: uri = xmlParseURI((const char *) URI);
5074: if (uri == NULL) {
5075: if ((ctxt->sax != NULL) &&
5076: (!ctxt->disableSAX) &&
5077: (ctxt->sax->error != NULL))
5078: ctxt->sax->error(ctxt->userData,
5079: "Invalid URI: %s\n", URI);
5080: ctxt->wellFormed = 0;
5081: ctxt->errNo = XML_ERR_INVALID_URI;
5082: } else {
5083: if (uri->fragment != NULL) {
5084: if ((ctxt->sax != NULL) &&
5085: (!ctxt->disableSAX) &&
5086: (ctxt->sax->error != NULL))
5087: ctxt->sax->error(ctxt->userData,
5088: "Fragment not allowed: %s\n", URI);
5089: ctxt->wellFormed = 0;
5090: ctxt->errNo = XML_ERR_URI_FRAGMENT;
5091: } else {
5092: if ((ctxt->sax != NULL) &&
5093: (!ctxt->disableSAX) &&
5094: (ctxt->sax->entityDecl != NULL))
5095: ctxt->sax->entityDecl(ctxt->userData, name,
5096: XML_EXTERNAL_PARAMETER_ENTITY,
5097: literal, URI, NULL);
5098: }
5099: xmlFreeURI(uri);
5100: }
1.39 daniel 5101: }
1.24 daniel 5102: }
5103: } else {
1.152 daniel 5104: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 5105: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 5106: if ((ctxt->sax != NULL) &&
5107: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 5108: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 5109: XML_INTERNAL_GENERAL_ENTITY,
5110: NULL, NULL, value);
5111: } else {
1.67 daniel 5112: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 5113: if ((URI == NULL) && (literal == NULL)) {
5114: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5115: ctxt->sax->error(ctxt->userData,
5116: "Entity value required\n");
5117: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
5118: ctxt->wellFormed = 0;
1.180 daniel 5119: ctxt->disableSAX = 1;
1.169 daniel 5120: }
1.193 daniel 5121: if (URI) {
5122: xmlURIPtr uri;
5123:
5124: uri = xmlParseURI((const char *)URI);
5125: if (uri == NULL) {
5126: if ((ctxt->sax != NULL) &&
5127: (!ctxt->disableSAX) &&
5128: (ctxt->sax->error != NULL))
5129: ctxt->sax->error(ctxt->userData,
5130: "Invalid URI: %s\n", URI);
5131: ctxt->wellFormed = 0;
5132: ctxt->errNo = XML_ERR_INVALID_URI;
5133: } else {
5134: if (uri->fragment != NULL) {
5135: if ((ctxt->sax != NULL) &&
5136: (!ctxt->disableSAX) &&
5137: (ctxt->sax->error != NULL))
5138: ctxt->sax->error(ctxt->userData,
5139: "Fragment not allowed: %s\n", URI);
5140: ctxt->wellFormed = 0;
5141: ctxt->errNo = XML_ERR_URI_FRAGMENT;
5142: }
5143: xmlFreeURI(uri);
5144: }
5145: }
1.152 daniel 5146: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 5147: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5148: ctxt->sax->error(ctxt->userData,
1.59 daniel 5149: "Space required before 'NDATA'\n");
1.123 daniel 5150: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5151: ctxt->wellFormed = 0;
1.180 daniel 5152: ctxt->disableSAX = 1;
1.59 daniel 5153: }
1.42 daniel 5154: SKIP_BLANKS;
1.152 daniel 5155: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 5156: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5157: (NXT(4) == 'A')) {
5158: SKIP(5);
1.59 daniel 5159: if (!IS_BLANK(CUR)) {
5160: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5161: ctxt->sax->error(ctxt->userData,
1.59 daniel 5162: "Space required after 'NDATA'\n");
1.123 daniel 5163: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5164: ctxt->wellFormed = 0;
1.180 daniel 5165: ctxt->disableSAX = 1;
1.59 daniel 5166: }
1.42 daniel 5167: SKIP_BLANKS;
1.24 daniel 5168: ndata = xmlParseName(ctxt);
1.171 daniel 5169: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 5170: (ctxt->sax->unparsedEntityDecl != NULL))
5171: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 5172: literal, URI, ndata);
5173: } else {
1.171 daniel 5174: if ((ctxt->sax != NULL) &&
5175: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 5176: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 5177: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5178: literal, URI, NULL);
1.24 daniel 5179: }
5180: }
5181: }
1.42 daniel 5182: SKIP_BLANKS;
1.152 daniel 5183: if (RAW != '>') {
1.55 daniel 5184: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5185: ctxt->sax->error(ctxt->userData,
1.31 daniel 5186: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 5187: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 5188: ctxt->wellFormed = 0;
1.180 daniel 5189: ctxt->disableSAX = 1;
1.187 daniel 5190: } else {
5191: if (input != ctxt->input) {
5192: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5193: ctxt->sax->error(ctxt->userData,
5194: "Entity declaration doesn't start and stop in the same entity\n");
5195: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5196: ctxt->wellFormed = 0;
5197: ctxt->disableSAX = 1;
5198: }
1.40 daniel 5199: NEXT;
1.187 daniel 5200: }
1.78 daniel 5201: if (orig != NULL) {
5202: /*
1.98 daniel 5203: * Ugly mechanism to save the raw entity value.
1.78 daniel 5204: */
5205: xmlEntityPtr cur = NULL;
5206:
1.98 daniel 5207: if (isParameter) {
5208: if ((ctxt->sax != NULL) &&
5209: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 5210: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 5211: } else {
5212: if ((ctxt->sax != NULL) &&
5213: (ctxt->sax->getEntity != NULL))
1.120 daniel 5214: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 5215: }
5216: if (cur != NULL) {
5217: if (cur->orig != NULL)
1.119 daniel 5218: xmlFree(orig);
1.98 daniel 5219: else
5220: cur->orig = orig;
5221: } else
1.119 daniel 5222: xmlFree(orig);
1.78 daniel 5223: }
1.119 daniel 5224: if (name != NULL) xmlFree(name);
5225: if (value != NULL) xmlFree(value);
5226: if (URI != NULL) xmlFree(URI);
5227: if (literal != NULL) xmlFree(literal);
5228: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 5229: }
5230: }
5231:
1.50 daniel 5232: /**
1.59 daniel 5233: * xmlParseDefaultDecl:
5234: * @ctxt: an XML parser context
5235: * @value: Receive a possible fixed default value for the attribute
5236: *
5237: * Parse an attribute default declaration
5238: *
5239: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5240: *
1.99 daniel 5241: * [ VC: Required Attribute ]
1.117 daniel 5242: * if the default declaration is the keyword #REQUIRED, then the
5243: * attribute must be specified for all elements of the type in the
5244: * attribute-list declaration.
1.99 daniel 5245: *
5246: * [ VC: Attribute Default Legal ]
1.102 daniel 5247: * The declared default value must meet the lexical constraints of
5248: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 5249: *
5250: * [ VC: Fixed Attribute Default ]
1.117 daniel 5251: * if an attribute has a default value declared with the #FIXED
5252: * keyword, instances of that attribute must match the default value.
1.99 daniel 5253: *
5254: * [ WFC: No < in Attribute Values ]
5255: * handled in xmlParseAttValue()
5256: *
1.59 daniel 5257: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5258: * or XML_ATTRIBUTE_FIXED.
5259: */
5260:
5261: int
1.123 daniel 5262: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 5263: int val;
1.123 daniel 5264: xmlChar *ret;
1.59 daniel 5265:
5266: *value = NULL;
1.152 daniel 5267: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 5268: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
5269: (NXT(4) == 'U') && (NXT(5) == 'I') &&
5270: (NXT(6) == 'R') && (NXT(7) == 'E') &&
5271: (NXT(8) == 'D')) {
5272: SKIP(9);
5273: return(XML_ATTRIBUTE_REQUIRED);
5274: }
1.152 daniel 5275: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 5276: (NXT(2) == 'M') && (NXT(3) == 'P') &&
5277: (NXT(4) == 'L') && (NXT(5) == 'I') &&
5278: (NXT(6) == 'E') && (NXT(7) == 'D')) {
5279: SKIP(8);
5280: return(XML_ATTRIBUTE_IMPLIED);
5281: }
5282: val = XML_ATTRIBUTE_NONE;
1.152 daniel 5283: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 5284: (NXT(2) == 'I') && (NXT(3) == 'X') &&
5285: (NXT(4) == 'E') && (NXT(5) == 'D')) {
5286: SKIP(6);
5287: val = XML_ATTRIBUTE_FIXED;
5288: if (!IS_BLANK(CUR)) {
5289: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5290: ctxt->sax->error(ctxt->userData,
5291: "Space required after '#FIXED'\n");
1.123 daniel 5292: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5293: ctxt->wellFormed = 0;
1.180 daniel 5294: ctxt->disableSAX = 1;
1.59 daniel 5295: }
5296: SKIP_BLANKS;
5297: }
5298: ret = xmlParseAttValue(ctxt);
1.96 daniel 5299: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 5300: if (ret == NULL) {
5301: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5302: ctxt->sax->error(ctxt->userData,
1.59 daniel 5303: "Attribute default value declaration error\n");
5304: ctxt->wellFormed = 0;
1.180 daniel 5305: ctxt->disableSAX = 1;
1.59 daniel 5306: } else
5307: *value = ret;
5308: return(val);
5309: }
5310:
5311: /**
1.66 daniel 5312: * xmlParseNotationType:
5313: * @ctxt: an XML parser context
5314: *
5315: * parse an Notation attribute type.
5316: *
1.99 daniel 5317: * Note: the leading 'NOTATION' S part has already being parsed...
5318: *
1.66 daniel 5319: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5320: *
1.99 daniel 5321: * [ VC: Notation Attributes ]
1.117 daniel 5322: * Values of this type must match one of the notation names included
1.99 daniel 5323: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 5324: *
5325: * Returns: the notation attribute tree built while parsing
5326: */
5327:
5328: xmlEnumerationPtr
5329: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5330: xmlChar *name;
1.66 daniel 5331: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5332:
1.152 daniel 5333: if (RAW != '(') {
1.66 daniel 5334: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5335: ctxt->sax->error(ctxt->userData,
5336: "'(' required to start 'NOTATION'\n");
1.123 daniel 5337: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 5338: ctxt->wellFormed = 0;
1.180 daniel 5339: ctxt->disableSAX = 1;
1.66 daniel 5340: return(NULL);
5341: }
1.91 daniel 5342: SHRINK;
1.66 daniel 5343: do {
5344: NEXT;
5345: SKIP_BLANKS;
5346: name = xmlParseName(ctxt);
5347: if (name == NULL) {
5348: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5349: ctxt->sax->error(ctxt->userData,
1.66 daniel 5350: "Name expected in NOTATION declaration\n");
1.123 daniel 5351: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 5352: ctxt->wellFormed = 0;
1.180 daniel 5353: ctxt->disableSAX = 1;
1.66 daniel 5354: return(ret);
5355: }
5356: cur = xmlCreateEnumeration(name);
1.119 daniel 5357: xmlFree(name);
1.66 daniel 5358: if (cur == NULL) return(ret);
5359: if (last == NULL) ret = last = cur;
5360: else {
5361: last->next = cur;
5362: last = cur;
5363: }
5364: SKIP_BLANKS;
1.152 daniel 5365: } while (RAW == '|');
5366: if (RAW != ')') {
1.66 daniel 5367: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5368: ctxt->sax->error(ctxt->userData,
1.66 daniel 5369: "')' required to finish NOTATION declaration\n");
1.123 daniel 5370: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 5371: ctxt->wellFormed = 0;
1.180 daniel 5372: ctxt->disableSAX = 1;
1.170 daniel 5373: if ((last != NULL) && (last != ret))
5374: xmlFreeEnumeration(last);
1.66 daniel 5375: return(ret);
5376: }
5377: NEXT;
5378: return(ret);
5379: }
5380:
5381: /**
5382: * xmlParseEnumerationType:
5383: * @ctxt: an XML parser context
5384: *
5385: * parse an Enumeration attribute type.
5386: *
5387: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5388: *
1.99 daniel 5389: * [ VC: Enumeration ]
1.117 daniel 5390: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 5391: * the declaration
5392: *
1.66 daniel 5393: * Returns: the enumeration attribute tree built while parsing
5394: */
5395:
5396: xmlEnumerationPtr
5397: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5398: xmlChar *name;
1.66 daniel 5399: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5400:
1.152 daniel 5401: if (RAW != '(') {
1.66 daniel 5402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5403: ctxt->sax->error(ctxt->userData,
1.66 daniel 5404: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 5405: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 5406: ctxt->wellFormed = 0;
1.180 daniel 5407: ctxt->disableSAX = 1;
1.66 daniel 5408: return(NULL);
5409: }
1.91 daniel 5410: SHRINK;
1.66 daniel 5411: do {
5412: NEXT;
5413: SKIP_BLANKS;
5414: name = xmlParseNmtoken(ctxt);
5415: if (name == NULL) {
5416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5417: ctxt->sax->error(ctxt->userData,
1.66 daniel 5418: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 5419: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 5420: ctxt->wellFormed = 0;
1.180 daniel 5421: ctxt->disableSAX = 1;
1.66 daniel 5422: return(ret);
5423: }
5424: cur = xmlCreateEnumeration(name);
1.119 daniel 5425: xmlFree(name);
1.66 daniel 5426: if (cur == NULL) return(ret);
5427: if (last == NULL) ret = last = cur;
5428: else {
5429: last->next = cur;
5430: last = cur;
5431: }
5432: SKIP_BLANKS;
1.152 daniel 5433: } while (RAW == '|');
5434: if (RAW != ')') {
1.66 daniel 5435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5436: ctxt->sax->error(ctxt->userData,
1.66 daniel 5437: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 5438: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 5439: ctxt->wellFormed = 0;
1.180 daniel 5440: ctxt->disableSAX = 1;
1.66 daniel 5441: return(ret);
5442: }
5443: NEXT;
5444: return(ret);
5445: }
5446:
5447: /**
1.50 daniel 5448: * xmlParseEnumeratedType:
5449: * @ctxt: an XML parser context
1.66 daniel 5450: * @tree: the enumeration tree built while parsing
1.50 daniel 5451: *
1.66 daniel 5452: * parse an Enumerated attribute type.
1.22 daniel 5453: *
5454: * [57] EnumeratedType ::= NotationType | Enumeration
5455: *
5456: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5457: *
1.50 daniel 5458: *
1.66 daniel 5459: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 5460: */
5461:
1.66 daniel 5462: int
5463: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 5464: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 5465: (NXT(2) == 'T') && (NXT(3) == 'A') &&
5466: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5467: (NXT(6) == 'O') && (NXT(7) == 'N')) {
5468: SKIP(8);
5469: if (!IS_BLANK(CUR)) {
5470: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5471: ctxt->sax->error(ctxt->userData,
5472: "Space required after 'NOTATION'\n");
1.123 daniel 5473: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 5474: ctxt->wellFormed = 0;
1.180 daniel 5475: ctxt->disableSAX = 1;
1.66 daniel 5476: return(0);
5477: }
5478: SKIP_BLANKS;
5479: *tree = xmlParseNotationType(ctxt);
5480: if (*tree == NULL) return(0);
5481: return(XML_ATTRIBUTE_NOTATION);
5482: }
5483: *tree = xmlParseEnumerationType(ctxt);
5484: if (*tree == NULL) return(0);
5485: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 5486: }
5487:
1.50 daniel 5488: /**
5489: * xmlParseAttributeType:
5490: * @ctxt: an XML parser context
1.66 daniel 5491: * @tree: the enumeration tree built while parsing
1.50 daniel 5492: *
1.59 daniel 5493: * parse the Attribute list def for an element
1.22 daniel 5494: *
5495: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5496: *
5497: * [55] StringType ::= 'CDATA'
5498: *
5499: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5500: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 5501: *
1.102 daniel 5502: * Validity constraints for attribute values syntax are checked in
5503: * xmlValidateAttributeValue()
5504: *
1.99 daniel 5505: * [ VC: ID ]
1.117 daniel 5506: * Values of type ID must match the Name production. A name must not
1.99 daniel 5507: * appear more than once in an XML document as a value of this type;
5508: * i.e., ID values must uniquely identify the elements which bear them.
5509: *
5510: * [ VC: One ID per Element Type ]
1.117 daniel 5511: * No element type may have more than one ID attribute specified.
1.99 daniel 5512: *
5513: * [ VC: ID Attribute Default ]
1.117 daniel 5514: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 5515: *
5516: * [ VC: IDREF ]
1.102 daniel 5517: * Values of type IDREF must match the Name production, and values
1.140 daniel 5518: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 5519: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 5520: * values must match the value of some ID attribute.
5521: *
5522: * [ VC: Entity Name ]
1.102 daniel 5523: * Values of type ENTITY must match the Name production, values
1.140 daniel 5524: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 5525: * name of an unparsed entity declared in the DTD.
1.99 daniel 5526: *
5527: * [ VC: Name Token ]
1.102 daniel 5528: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5529: * of type NMTOKENS must match Nmtokens.
5530: *
1.69 daniel 5531: * Returns the attribute type
1.22 daniel 5532: */
1.59 daniel 5533: int
1.66 daniel 5534: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5535: SHRINK;
1.152 daniel 5536: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5537: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5538: (NXT(4) == 'A')) {
5539: SKIP(5);
1.66 daniel 5540: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5541: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5542: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5543: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5544: SKIP(6);
5545: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5546: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5547: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5548: (NXT(4) == 'F')) {
5549: SKIP(5);
1.59 daniel 5550: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5551: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5552: SKIP(2);
5553: return(XML_ATTRIBUTE_ID);
1.152 daniel 5554: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5555: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5556: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5557: SKIP(6);
1.59 daniel 5558: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5559: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5560: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5561: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5562: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5563: SKIP(8);
1.59 daniel 5564: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5565: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5566: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5567: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5568: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5569: SKIP(8);
5570: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5571: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5572: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5573: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5574: (NXT(6) == 'N')) {
5575: SKIP(7);
1.59 daniel 5576: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5577: }
1.66 daniel 5578: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5579: }
5580:
1.50 daniel 5581: /**
5582: * xmlParseAttributeListDecl:
5583: * @ctxt: an XML parser context
5584: *
5585: * : parse the Attribute list def for an element
1.22 daniel 5586: *
5587: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5588: *
5589: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5590: *
1.22 daniel 5591: */
1.55 daniel 5592: void
5593: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5594: xmlChar *elemName;
5595: xmlChar *attrName;
1.103 daniel 5596: xmlEnumerationPtr tree;
1.22 daniel 5597:
1.152 daniel 5598: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5599: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5600: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5601: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5602: (NXT(8) == 'T')) {
1.187 daniel 5603: xmlParserInputPtr input = ctxt->input;
5604:
1.40 daniel 5605: SKIP(9);
1.59 daniel 5606: if (!IS_BLANK(CUR)) {
5607: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5608: ctxt->sax->error(ctxt->userData,
5609: "Space required after '<!ATTLIST'\n");
1.123 daniel 5610: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5611: ctxt->wellFormed = 0;
1.180 daniel 5612: ctxt->disableSAX = 1;
1.59 daniel 5613: }
1.42 daniel 5614: SKIP_BLANKS;
1.59 daniel 5615: elemName = xmlParseName(ctxt);
5616: if (elemName == NULL) {
1.55 daniel 5617: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5618: ctxt->sax->error(ctxt->userData,
5619: "ATTLIST: no name for Element\n");
1.123 daniel 5620: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5621: ctxt->wellFormed = 0;
1.180 daniel 5622: ctxt->disableSAX = 1;
1.22 daniel 5623: return;
5624: }
1.42 daniel 5625: SKIP_BLANKS;
1.152 daniel 5626: while (RAW != '>') {
1.123 daniel 5627: const xmlChar *check = CUR_PTR;
1.59 daniel 5628: int type;
5629: int def;
1.123 daniel 5630: xmlChar *defaultValue = NULL;
1.59 daniel 5631:
1.103 daniel 5632: tree = NULL;
1.59 daniel 5633: attrName = xmlParseName(ctxt);
5634: if (attrName == NULL) {
5635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5636: ctxt->sax->error(ctxt->userData,
5637: "ATTLIST: no name for Attribute\n");
1.123 daniel 5638: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5639: ctxt->wellFormed = 0;
1.180 daniel 5640: ctxt->disableSAX = 1;
1.59 daniel 5641: break;
5642: }
1.97 daniel 5643: GROW;
1.59 daniel 5644: if (!IS_BLANK(CUR)) {
5645: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5646: ctxt->sax->error(ctxt->userData,
1.59 daniel 5647: "Space required after the attribute name\n");
1.123 daniel 5648: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5649: ctxt->wellFormed = 0;
1.180 daniel 5650: ctxt->disableSAX = 1;
1.170 daniel 5651: if (attrName != NULL)
5652: xmlFree(attrName);
5653: if (defaultValue != NULL)
5654: xmlFree(defaultValue);
1.59 daniel 5655: break;
5656: }
5657: SKIP_BLANKS;
5658:
1.66 daniel 5659: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5660: if (type <= 0) {
5661: if (attrName != NULL)
5662: xmlFree(attrName);
5663: if (defaultValue != NULL)
5664: xmlFree(defaultValue);
5665: break;
5666: }
1.22 daniel 5667:
1.97 daniel 5668: GROW;
1.59 daniel 5669: if (!IS_BLANK(CUR)) {
5670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5671: ctxt->sax->error(ctxt->userData,
1.59 daniel 5672: "Space required after the attribute type\n");
1.123 daniel 5673: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5674: ctxt->wellFormed = 0;
1.180 daniel 5675: ctxt->disableSAX = 1;
1.170 daniel 5676: if (attrName != NULL)
5677: xmlFree(attrName);
5678: if (defaultValue != NULL)
5679: xmlFree(defaultValue);
5680: if (tree != NULL)
5681: xmlFreeEnumeration(tree);
1.59 daniel 5682: break;
5683: }
1.42 daniel 5684: SKIP_BLANKS;
1.59 daniel 5685:
5686: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5687: if (def <= 0) {
5688: if (attrName != NULL)
5689: xmlFree(attrName);
5690: if (defaultValue != NULL)
5691: xmlFree(defaultValue);
5692: if (tree != NULL)
5693: xmlFreeEnumeration(tree);
5694: break;
5695: }
1.59 daniel 5696:
1.97 daniel 5697: GROW;
1.152 daniel 5698: if (RAW != '>') {
1.59 daniel 5699: if (!IS_BLANK(CUR)) {
5700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5701: ctxt->sax->error(ctxt->userData,
1.59 daniel 5702: "Space required after the attribute default value\n");
1.123 daniel 5703: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5704: ctxt->wellFormed = 0;
1.180 daniel 5705: ctxt->disableSAX = 1;
1.170 daniel 5706: if (attrName != NULL)
5707: xmlFree(attrName);
5708: if (defaultValue != NULL)
5709: xmlFree(defaultValue);
5710: if (tree != NULL)
5711: xmlFreeEnumeration(tree);
1.59 daniel 5712: break;
5713: }
5714: SKIP_BLANKS;
5715: }
1.40 daniel 5716: if (check == CUR_PTR) {
1.55 daniel 5717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5718: ctxt->sax->error(ctxt->userData,
1.59 daniel 5719: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5720: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5721: if (attrName != NULL)
5722: xmlFree(attrName);
5723: if (defaultValue != NULL)
5724: xmlFree(defaultValue);
5725: if (tree != NULL)
5726: xmlFreeEnumeration(tree);
1.22 daniel 5727: break;
5728: }
1.171 daniel 5729: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5730: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5731: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5732: type, def, defaultValue, tree);
1.59 daniel 5733: if (attrName != NULL)
1.119 daniel 5734: xmlFree(attrName);
1.59 daniel 5735: if (defaultValue != NULL)
1.119 daniel 5736: xmlFree(defaultValue);
1.97 daniel 5737: GROW;
1.22 daniel 5738: }
1.187 daniel 5739: if (RAW == '>') {
5740: if (input != ctxt->input) {
5741: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5742: ctxt->sax->error(ctxt->userData,
5743: "Attribute list declaration doesn't start and stop in the same entity\n");
5744: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5745: ctxt->wellFormed = 0;
5746: ctxt->disableSAX = 1;
5747: }
1.40 daniel 5748: NEXT;
1.187 daniel 5749: }
1.22 daniel 5750:
1.119 daniel 5751: xmlFree(elemName);
1.22 daniel 5752: }
5753: }
5754:
1.50 daniel 5755: /**
1.61 daniel 5756: * xmlParseElementMixedContentDecl:
5757: * @ctxt: an XML parser context
5758: *
5759: * parse the declaration for a Mixed Element content
5760: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5761: *
5762: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5763: * '(' S? '#PCDATA' S? ')'
5764: *
1.99 daniel 5765: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5766: *
5767: * [ VC: No Duplicate Types ]
1.117 daniel 5768: * The same name must not appear more than once in a single
5769: * mixed-content declaration.
1.99 daniel 5770: *
1.61 daniel 5771: * returns: the list of the xmlElementContentPtr describing the element choices
5772: */
5773: xmlElementContentPtr
1.62 daniel 5774: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5775: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5776: xmlChar *elem = NULL;
1.61 daniel 5777:
1.97 daniel 5778: GROW;
1.152 daniel 5779: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5780: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5781: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5782: (NXT(6) == 'A')) {
5783: SKIP(7);
5784: SKIP_BLANKS;
1.91 daniel 5785: SHRINK;
1.152 daniel 5786: if (RAW == ')') {
1.187 daniel 5787: ctxt->entity = ctxt->input;
1.63 daniel 5788: NEXT;
5789: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5790: if (RAW == '*') {
1.136 daniel 5791: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5792: NEXT;
5793: }
1.63 daniel 5794: return(ret);
5795: }
1.152 daniel 5796: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5797: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5798: if (ret == NULL) return(NULL);
1.99 daniel 5799: }
1.152 daniel 5800: while (RAW == '|') {
1.64 daniel 5801: NEXT;
1.61 daniel 5802: if (elem == NULL) {
5803: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5804: if (ret == NULL) return(NULL);
5805: ret->c1 = cur;
1.64 daniel 5806: cur = ret;
1.61 daniel 5807: } else {
1.64 daniel 5808: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5809: if (n == NULL) return(NULL);
5810: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5811: cur->c2 = n;
5812: cur = n;
1.119 daniel 5813: xmlFree(elem);
1.61 daniel 5814: }
5815: SKIP_BLANKS;
5816: elem = xmlParseName(ctxt);
5817: if (elem == NULL) {
5818: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5819: ctxt->sax->error(ctxt->userData,
1.61 daniel 5820: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5821: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5822: ctxt->wellFormed = 0;
1.180 daniel 5823: ctxt->disableSAX = 1;
1.61 daniel 5824: xmlFreeElementContent(cur);
5825: return(NULL);
5826: }
5827: SKIP_BLANKS;
1.97 daniel 5828: GROW;
1.61 daniel 5829: }
1.152 daniel 5830: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5831: if (elem != NULL) {
1.61 daniel 5832: cur->c2 = xmlNewElementContent(elem,
5833: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5834: xmlFree(elem);
1.66 daniel 5835: }
1.65 daniel 5836: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 5837: ctxt->entity = ctxt->input;
1.64 daniel 5838: SKIP(2);
1.61 daniel 5839: } else {
1.119 daniel 5840: if (elem != NULL) xmlFree(elem);
1.61 daniel 5841: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5842: ctxt->sax->error(ctxt->userData,
1.63 daniel 5843: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5844: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5845: ctxt->wellFormed = 0;
1.180 daniel 5846: ctxt->disableSAX = 1;
1.61 daniel 5847: xmlFreeElementContent(ret);
5848: return(NULL);
5849: }
5850:
5851: } else {
5852: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5853: ctxt->sax->error(ctxt->userData,
1.61 daniel 5854: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5855: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5856: ctxt->wellFormed = 0;
1.180 daniel 5857: ctxt->disableSAX = 1;
1.61 daniel 5858: }
5859: return(ret);
5860: }
5861:
5862: /**
5863: * xmlParseElementChildrenContentDecl:
1.50 daniel 5864: * @ctxt: an XML parser context
5865: *
1.61 daniel 5866: * parse the declaration for a Mixed Element content
5867: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5868: *
1.61 daniel 5869: *
1.22 daniel 5870: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5871: *
5872: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5873: *
5874: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5875: *
5876: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5877: *
1.99 daniel 5878: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5879: * TODO Parameter-entity replacement text must be properly nested
5880: * with parenthetized groups. That is to say, if either of the
5881: * opening or closing parentheses in a choice, seq, or Mixed
5882: * construct is contained in the replacement text for a parameter
5883: * entity, both must be contained in the same replacement text. For
5884: * interoperability, if a parameter-entity reference appears in a
5885: * choice, seq, or Mixed construct, its replacement text should not
5886: * be empty, and neither the first nor last non-blank character of
5887: * the replacement text should be a connector (| or ,).
5888: *
1.62 daniel 5889: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5890: * hierarchy.
5891: */
5892: xmlElementContentPtr
1.62 daniel 5893: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5894: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5895: xmlChar *elem;
5896: xmlChar type = 0;
1.62 daniel 5897:
5898: SKIP_BLANKS;
1.94 daniel 5899: GROW;
1.152 daniel 5900: if (RAW == '(') {
1.63 daniel 5901: /* Recurse on first child */
1.62 daniel 5902: NEXT;
5903: SKIP_BLANKS;
5904: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5905: SKIP_BLANKS;
1.101 daniel 5906: GROW;
1.62 daniel 5907: } else {
5908: elem = xmlParseName(ctxt);
5909: if (elem == NULL) {
5910: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5911: ctxt->sax->error(ctxt->userData,
1.62 daniel 5912: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5913: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5914: ctxt->wellFormed = 0;
1.180 daniel 5915: ctxt->disableSAX = 1;
1.62 daniel 5916: return(NULL);
5917: }
5918: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5919: GROW;
1.152 daniel 5920: if (RAW == '?') {
1.104 daniel 5921: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5922: NEXT;
1.152 daniel 5923: } else if (RAW == '*') {
1.104 daniel 5924: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5925: NEXT;
1.152 daniel 5926: } else if (RAW == '+') {
1.104 daniel 5927: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5928: NEXT;
5929: } else {
1.104 daniel 5930: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5931: }
1.119 daniel 5932: xmlFree(elem);
1.101 daniel 5933: GROW;
1.62 daniel 5934: }
5935: SKIP_BLANKS;
1.91 daniel 5936: SHRINK;
1.152 daniel 5937: while (RAW != ')') {
1.63 daniel 5938: /*
5939: * Each loop we parse one separator and one element.
5940: */
1.152 daniel 5941: if (RAW == ',') {
1.62 daniel 5942: if (type == 0) type = CUR;
5943:
5944: /*
5945: * Detect "Name | Name , Name" error
5946: */
5947: else if (type != CUR) {
5948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5949: ctxt->sax->error(ctxt->userData,
1.62 daniel 5950: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5951: type);
1.123 daniel 5952: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5953: ctxt->wellFormed = 0;
1.180 daniel 5954: ctxt->disableSAX = 1;
1.170 daniel 5955: if ((op != NULL) && (op != ret))
5956: xmlFreeElementContent(op);
1.211 veillard 5957: if ((last != NULL) && (last != ret) &&
5958: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5959: xmlFreeElementContent(last);
5960: if (ret != NULL)
5961: xmlFreeElementContent(ret);
1.62 daniel 5962: return(NULL);
5963: }
1.64 daniel 5964: NEXT;
1.62 daniel 5965:
1.63 daniel 5966: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5967: if (op == NULL) {
5968: xmlFreeElementContent(ret);
5969: return(NULL);
5970: }
5971: if (last == NULL) {
5972: op->c1 = ret;
1.65 daniel 5973: ret = cur = op;
1.63 daniel 5974: } else {
5975: cur->c2 = op;
5976: op->c1 = last;
5977: cur =op;
1.65 daniel 5978: last = NULL;
1.63 daniel 5979: }
1.152 daniel 5980: } else if (RAW == '|') {
1.62 daniel 5981: if (type == 0) type = CUR;
5982:
5983: /*
1.63 daniel 5984: * Detect "Name , Name | Name" error
1.62 daniel 5985: */
5986: else if (type != CUR) {
5987: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5988: ctxt->sax->error(ctxt->userData,
1.62 daniel 5989: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5990: type);
1.123 daniel 5991: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5992: ctxt->wellFormed = 0;
1.180 daniel 5993: ctxt->disableSAX = 1;
1.211 veillard 5994: if ((op != NULL) && (op != ret) && (op != last))
1.170 daniel 5995: xmlFreeElementContent(op);
1.211 veillard 5996: if ((last != NULL) && (last != ret) &&
5997: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5998: xmlFreeElementContent(last);
5999: if (ret != NULL)
6000: xmlFreeElementContent(ret);
1.62 daniel 6001: return(NULL);
6002: }
1.64 daniel 6003: NEXT;
1.62 daniel 6004:
1.63 daniel 6005: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
6006: if (op == NULL) {
1.170 daniel 6007: if ((op != NULL) && (op != ret))
6008: xmlFreeElementContent(op);
1.211 veillard 6009: if ((last != NULL) && (last != ret) &&
6010: (last != ret->c1) && (last != ret->c2))
1.170 daniel 6011: xmlFreeElementContent(last);
6012: if (ret != NULL)
6013: xmlFreeElementContent(ret);
1.63 daniel 6014: return(NULL);
6015: }
6016: if (last == NULL) {
6017: op->c1 = ret;
1.65 daniel 6018: ret = cur = op;
1.63 daniel 6019: } else {
6020: cur->c2 = op;
6021: op->c1 = last;
6022: cur =op;
1.65 daniel 6023: last = NULL;
1.63 daniel 6024: }
1.62 daniel 6025: } else {
6026: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6027: ctxt->sax->error(ctxt->userData,
1.62 daniel 6028: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
6029: ctxt->wellFormed = 0;
1.180 daniel 6030: ctxt->disableSAX = 1;
1.123 daniel 6031: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 6032: if ((op != NULL) && (op != ret))
6033: xmlFreeElementContent(op);
1.211 veillard 6034: if ((last != NULL) && (last != ret) &&
6035: (last != ret->c1) && (last != ret->c2))
1.170 daniel 6036: xmlFreeElementContent(last);
6037: if (ret != NULL)
6038: xmlFreeElementContent(ret);
1.62 daniel 6039: return(NULL);
6040: }
1.101 daniel 6041: GROW;
1.62 daniel 6042: SKIP_BLANKS;
1.101 daniel 6043: GROW;
1.152 daniel 6044: if (RAW == '(') {
1.63 daniel 6045: /* Recurse on second child */
1.62 daniel 6046: NEXT;
6047: SKIP_BLANKS;
1.65 daniel 6048: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 6049: SKIP_BLANKS;
6050: } else {
6051: elem = xmlParseName(ctxt);
6052: if (elem == NULL) {
6053: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6054: ctxt->sax->error(ctxt->userData,
1.122 daniel 6055: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 6056: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 6057: ctxt->wellFormed = 0;
1.180 daniel 6058: ctxt->disableSAX = 1;
1.170 daniel 6059: if ((op != NULL) && (op != ret))
6060: xmlFreeElementContent(op);
1.211 veillard 6061: if ((last != NULL) && (last != ret) &&
6062: (last != ret->c1) && (last != ret->c2))
1.170 daniel 6063: xmlFreeElementContent(last);
6064: if (ret != NULL)
6065: xmlFreeElementContent(ret);
1.62 daniel 6066: return(NULL);
6067: }
1.65 daniel 6068: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 6069: xmlFree(elem);
1.152 daniel 6070: if (RAW == '?') {
1.105 daniel 6071: last->ocur = XML_ELEMENT_CONTENT_OPT;
6072: NEXT;
1.152 daniel 6073: } else if (RAW == '*') {
1.105 daniel 6074: last->ocur = XML_ELEMENT_CONTENT_MULT;
6075: NEXT;
1.152 daniel 6076: } else if (RAW == '+') {
1.105 daniel 6077: last->ocur = XML_ELEMENT_CONTENT_PLUS;
6078: NEXT;
6079: } else {
6080: last->ocur = XML_ELEMENT_CONTENT_ONCE;
6081: }
1.63 daniel 6082: }
6083: SKIP_BLANKS;
1.97 daniel 6084: GROW;
1.64 daniel 6085: }
1.65 daniel 6086: if ((cur != NULL) && (last != NULL)) {
6087: cur->c2 = last;
1.62 daniel 6088: }
1.187 daniel 6089: ctxt->entity = ctxt->input;
1.62 daniel 6090: NEXT;
1.152 daniel 6091: if (RAW == '?') {
1.62 daniel 6092: ret->ocur = XML_ELEMENT_CONTENT_OPT;
6093: NEXT;
1.152 daniel 6094: } else if (RAW == '*') {
1.62 daniel 6095: ret->ocur = XML_ELEMENT_CONTENT_MULT;
6096: NEXT;
1.152 daniel 6097: } else if (RAW == '+') {
1.62 daniel 6098: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6099: NEXT;
6100: }
6101: return(ret);
1.61 daniel 6102: }
6103:
6104: /**
6105: * xmlParseElementContentDecl:
6106: * @ctxt: an XML parser context
6107: * @name: the name of the element being defined.
6108: * @result: the Element Content pointer will be stored here if any
1.22 daniel 6109: *
1.61 daniel 6110: * parse the declaration for an Element content either Mixed or Children,
6111: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6112: *
6113: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 6114: *
1.61 daniel 6115: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 6116: */
6117:
1.61 daniel 6118: int
1.123 daniel 6119: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 6120: xmlElementContentPtr *result) {
6121:
6122: xmlElementContentPtr tree = NULL;
1.187 daniel 6123: xmlParserInputPtr input = ctxt->input;
1.61 daniel 6124: int res;
6125:
6126: *result = NULL;
6127:
1.152 daniel 6128: if (RAW != '(') {
1.61 daniel 6129: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6130: ctxt->sax->error(ctxt->userData,
1.61 daniel 6131: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 6132: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 6133: ctxt->wellFormed = 0;
1.180 daniel 6134: ctxt->disableSAX = 1;
1.61 daniel 6135: return(-1);
6136: }
6137: NEXT;
1.97 daniel 6138: GROW;
1.61 daniel 6139: SKIP_BLANKS;
1.152 daniel 6140: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 6141: (NXT(2) == 'C') && (NXT(3) == 'D') &&
6142: (NXT(4) == 'A') && (NXT(5) == 'T') &&
6143: (NXT(6) == 'A')) {
1.62 daniel 6144: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 6145: res = XML_ELEMENT_TYPE_MIXED;
6146: } else {
1.62 daniel 6147: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 6148: res = XML_ELEMENT_TYPE_ELEMENT;
6149: }
1.187 daniel 6150: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
6151: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6152: ctxt->sax->error(ctxt->userData,
6153: "Element content declaration doesn't start and stop in the same entity\n");
6154: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
6155: ctxt->wellFormed = 0;
6156: ctxt->disableSAX = 1;
6157: }
1.61 daniel 6158: SKIP_BLANKS;
1.63 daniel 6159: /****************************
1.152 daniel 6160: if (RAW != ')') {
1.61 daniel 6161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6162: ctxt->sax->error(ctxt->userData,
1.61 daniel 6163: "xmlParseElementContentDecl : ')' expected\n");
6164: ctxt->wellFormed = 0;
1.180 daniel 6165: ctxt->disableSAX = 1;
1.61 daniel 6166: return(-1);
6167: }
1.63 daniel 6168: ****************************/
6169: *result = tree;
1.61 daniel 6170: return(res);
1.22 daniel 6171: }
6172:
1.50 daniel 6173: /**
6174: * xmlParseElementDecl:
6175: * @ctxt: an XML parser context
6176: *
6177: * parse an Element declaration.
1.22 daniel 6178: *
6179: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6180: *
1.99 daniel 6181: * [ VC: Unique Element Type Declaration ]
1.117 daniel 6182: * No element type may be declared more than once
1.69 daniel 6183: *
6184: * Returns the type of the element, or -1 in case of error
1.22 daniel 6185: */
1.59 daniel 6186: int
1.55 daniel 6187: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6188: xmlChar *name;
1.59 daniel 6189: int ret = -1;
1.61 daniel 6190: xmlElementContentPtr content = NULL;
1.22 daniel 6191:
1.97 daniel 6192: GROW;
1.152 daniel 6193: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6194: (NXT(2) == 'E') && (NXT(3) == 'L') &&
6195: (NXT(4) == 'E') && (NXT(5) == 'M') &&
6196: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 6197: (NXT(8) == 'T')) {
1.187 daniel 6198: xmlParserInputPtr input = ctxt->input;
6199:
1.40 daniel 6200: SKIP(9);
1.59 daniel 6201: if (!IS_BLANK(CUR)) {
6202: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6203: ctxt->sax->error(ctxt->userData,
1.59 daniel 6204: "Space required after 'ELEMENT'\n");
1.123 daniel 6205: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6206: ctxt->wellFormed = 0;
1.180 daniel 6207: ctxt->disableSAX = 1;
1.59 daniel 6208: }
1.42 daniel 6209: SKIP_BLANKS;
1.22 daniel 6210: name = xmlParseName(ctxt);
6211: if (name == NULL) {
1.55 daniel 6212: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6213: ctxt->sax->error(ctxt->userData,
1.59 daniel 6214: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 6215: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6216: ctxt->wellFormed = 0;
1.180 daniel 6217: ctxt->disableSAX = 1;
1.59 daniel 6218: return(-1);
6219: }
6220: if (!IS_BLANK(CUR)) {
6221: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6222: ctxt->sax->error(ctxt->userData,
1.59 daniel 6223: "Space required after the element name\n");
1.123 daniel 6224: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6225: ctxt->wellFormed = 0;
1.180 daniel 6226: ctxt->disableSAX = 1;
1.22 daniel 6227: }
1.42 daniel 6228: SKIP_BLANKS;
1.152 daniel 6229: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 6230: (NXT(2) == 'P') && (NXT(3) == 'T') &&
6231: (NXT(4) == 'Y')) {
6232: SKIP(5);
1.22 daniel 6233: /*
6234: * Element must always be empty.
6235: */
1.59 daniel 6236: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 6237: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 6238: (NXT(2) == 'Y')) {
6239: SKIP(3);
1.22 daniel 6240: /*
6241: * Element is a generic container.
6242: */
1.59 daniel 6243: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 6244: } else if (RAW == '(') {
1.61 daniel 6245: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 6246: } else {
1.98 daniel 6247: /*
6248: * [ WFC: PEs in Internal Subset ] error handling.
6249: */
1.152 daniel 6250: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 6251: (ctxt->inputNr == 1)) {
6252: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6253: ctxt->sax->error(ctxt->userData,
6254: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 6255: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 6256: } else {
6257: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6258: ctxt->sax->error(ctxt->userData,
6259: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 6260: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 6261: }
1.61 daniel 6262: ctxt->wellFormed = 0;
1.180 daniel 6263: ctxt->disableSAX = 1;
1.119 daniel 6264: if (name != NULL) xmlFree(name);
1.61 daniel 6265: return(-1);
1.22 daniel 6266: }
1.142 daniel 6267:
6268: SKIP_BLANKS;
6269: /*
6270: * Pop-up of finished entities.
6271: */
1.152 daniel 6272: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 6273: xmlPopInput(ctxt);
1.42 daniel 6274: SKIP_BLANKS;
1.142 daniel 6275:
1.152 daniel 6276: if (RAW != '>') {
1.55 daniel 6277: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6278: ctxt->sax->error(ctxt->userData,
1.31 daniel 6279: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 6280: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 6281: ctxt->wellFormed = 0;
1.180 daniel 6282: ctxt->disableSAX = 1;
1.61 daniel 6283: } else {
1.187 daniel 6284: if (input != ctxt->input) {
6285: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6286: ctxt->sax->error(ctxt->userData,
6287: "Element declaration doesn't start and stop in the same entity\n");
6288: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
6289: ctxt->wellFormed = 0;
6290: ctxt->disableSAX = 1;
6291: }
6292:
1.40 daniel 6293: NEXT;
1.171 daniel 6294: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6295: (ctxt->sax->elementDecl != NULL))
1.76 daniel 6296: ctxt->sax->elementDecl(ctxt->userData, name, ret,
6297: content);
1.61 daniel 6298: }
1.84 daniel 6299: if (content != NULL) {
6300: xmlFreeElementContent(content);
6301: }
1.61 daniel 6302: if (name != NULL) {
1.119 daniel 6303: xmlFree(name);
1.61 daniel 6304: }
1.22 daniel 6305: }
1.59 daniel 6306: return(ret);
1.22 daniel 6307: }
6308:
1.50 daniel 6309: /**
6310: * xmlParseMarkupDecl:
6311: * @ctxt: an XML parser context
6312: *
6313: * parse Markup declarations
1.22 daniel 6314: *
6315: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6316: * NotationDecl | PI | Comment
6317: *
1.98 daniel 6318: * [ VC: Proper Declaration/PE Nesting ]
6319: * TODO Parameter-entity replacement text must be properly nested with
6320: * markup declarations. That is to say, if either the first character
6321: * or the last character of a markup declaration (markupdecl above) is
6322: * contained in the replacement text for a parameter-entity reference,
6323: * both must be contained in the same replacement text.
6324: *
6325: * [ WFC: PEs in Internal Subset ]
6326: * In the internal DTD subset, parameter-entity references can occur
6327: * only where markup declarations can occur, not within markup declarations.
6328: * (This does not apply to references that occur in external parameter
6329: * entities or to the external subset.)
1.22 daniel 6330: */
1.55 daniel 6331: void
6332: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 6333: GROW;
1.22 daniel 6334: xmlParseElementDecl(ctxt);
6335: xmlParseAttributeListDecl(ctxt);
6336: xmlParseEntityDecl(ctxt);
6337: xmlParseNotationDecl(ctxt);
6338: xmlParsePI(ctxt);
1.114 daniel 6339: xmlParseComment(ctxt);
1.98 daniel 6340: /*
6341: * This is only for internal subset. On external entities,
6342: * the replacement is done before parsing stage
6343: */
6344: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6345: xmlParsePEReference(ctxt);
1.97 daniel 6346: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 6347: }
6348:
1.50 daniel 6349: /**
1.76 daniel 6350: * xmlParseTextDecl:
6351: * @ctxt: an XML parser context
6352: *
6353: * parse an XML declaration header for external entities
6354: *
6355: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 6356: *
6357: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 6358: */
6359:
1.172 daniel 6360: void
1.76 daniel 6361: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6362: xmlChar *version;
1.76 daniel 6363:
6364: /*
6365: * We know that '<?xml' is here.
6366: */
1.193 daniel 6367: if ((RAW == '<') && (NXT(1) == '?') &&
6368: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6369: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6370: SKIP(5);
6371: } else {
6372: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6373: ctxt->sax->error(ctxt->userData,
6374: "Text declaration '<?xml' required\n");
6375: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
6376: ctxt->wellFormed = 0;
6377: ctxt->disableSAX = 1;
6378:
6379: return;
6380: }
1.76 daniel 6381:
6382: if (!IS_BLANK(CUR)) {
6383: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6384: ctxt->sax->error(ctxt->userData,
6385: "Space needed after '<?xml'\n");
1.123 daniel 6386: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6387: ctxt->wellFormed = 0;
1.180 daniel 6388: ctxt->disableSAX = 1;
1.76 daniel 6389: }
6390: SKIP_BLANKS;
6391:
6392: /*
6393: * We may have the VersionInfo here.
6394: */
6395: version = xmlParseVersionInfo(ctxt);
6396: if (version == NULL)
6397: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 6398: ctxt->input->version = version;
1.76 daniel 6399:
6400: /*
6401: * We must have the encoding declaration
6402: */
6403: if (!IS_BLANK(CUR)) {
6404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6405: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 6406: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6407: ctxt->wellFormed = 0;
1.180 daniel 6408: ctxt->disableSAX = 1;
1.76 daniel 6409: }
1.195 daniel 6410: xmlParseEncodingDecl(ctxt);
1.193 daniel 6411: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6412: /*
6413: * The XML REC instructs us to stop parsing right here
6414: */
6415: return;
6416: }
1.76 daniel 6417:
6418: SKIP_BLANKS;
1.152 daniel 6419: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 6420: SKIP(2);
1.152 daniel 6421: } else if (RAW == '>') {
1.76 daniel 6422: /* Deprecated old WD ... */
6423: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6424: ctxt->sax->error(ctxt->userData,
6425: "XML declaration must end-up with '?>'\n");
1.123 daniel 6426: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6427: ctxt->wellFormed = 0;
1.180 daniel 6428: ctxt->disableSAX = 1;
1.76 daniel 6429: NEXT;
6430: } else {
6431: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6432: ctxt->sax->error(ctxt->userData,
6433: "parsing XML declaration: '?>' expected\n");
1.123 daniel 6434: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6435: ctxt->wellFormed = 0;
1.180 daniel 6436: ctxt->disableSAX = 1;
1.76 daniel 6437: MOVETO_ENDTAG(CUR_PTR);
6438: NEXT;
6439: }
6440: }
6441:
6442: /*
6443: * xmlParseConditionalSections
6444: * @ctxt: an XML parser context
6445: *
6446: * TODO : Conditionnal section are not yet supported !
6447: *
6448: * [61] conditionalSect ::= includeSect | ignoreSect
6449: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6450: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6451: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6452: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6453: */
6454:
6455: void
6456: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 6457: SKIP(3);
6458: SKIP_BLANKS;
1.168 daniel 6459: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6460: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6461: (NXT(6) == 'E')) {
1.165 daniel 6462: SKIP(7);
1.168 daniel 6463: SKIP_BLANKS;
6464: if (RAW != '[') {
6465: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6466: ctxt->sax->error(ctxt->userData,
6467: "XML conditional section '[' expected\n");
6468: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6469: ctxt->wellFormed = 0;
1.180 daniel 6470: ctxt->disableSAX = 1;
1.168 daniel 6471: } else {
6472: NEXT;
6473: }
1.165 daniel 6474: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6475: (NXT(2) != '>'))) {
6476: const xmlChar *check = CUR_PTR;
6477: int cons = ctxt->input->consumed;
6478: int tok = ctxt->token;
6479:
6480: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6481: xmlParseConditionalSections(ctxt);
6482: } else if (IS_BLANK(CUR)) {
6483: NEXT;
6484: } else if (RAW == '%') {
6485: xmlParsePEReference(ctxt);
6486: } else
6487: xmlParseMarkupDecl(ctxt);
6488:
6489: /*
6490: * Pop-up of finished entities.
6491: */
6492: while ((RAW == 0) && (ctxt->inputNr > 1))
6493: xmlPopInput(ctxt);
6494:
6495: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6496: (tok == ctxt->token)) {
6497: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6498: ctxt->sax->error(ctxt->userData,
6499: "Content error in the external subset\n");
6500: ctxt->wellFormed = 0;
1.180 daniel 6501: ctxt->disableSAX = 1;
1.165 daniel 6502: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6503: break;
6504: }
6505: }
1.168 daniel 6506: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6507: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 6508: int state;
6509:
1.168 daniel 6510: SKIP(6);
6511: SKIP_BLANKS;
6512: if (RAW != '[') {
6513: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6514: ctxt->sax->error(ctxt->userData,
6515: "XML conditional section '[' expected\n");
6516: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6517: ctxt->wellFormed = 0;
1.180 daniel 6518: ctxt->disableSAX = 1;
1.168 daniel 6519: } else {
6520: NEXT;
6521: }
1.171 daniel 6522:
1.143 daniel 6523: /*
1.171 daniel 6524: * Parse up to the end of the conditionnal section
6525: * But disable SAX event generating DTD building in the meantime
1.143 daniel 6526: */
1.171 daniel 6527: state = ctxt->disableSAX;
1.165 daniel 6528: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6529: (NXT(2) != '>'))) {
1.171 daniel 6530: const xmlChar *check = CUR_PTR;
6531: int cons = ctxt->input->consumed;
6532: int tok = ctxt->token;
6533:
6534: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6535: xmlParseConditionalSections(ctxt);
6536: } else if (IS_BLANK(CUR)) {
6537: NEXT;
6538: } else if (RAW == '%') {
6539: xmlParsePEReference(ctxt);
6540: } else
6541: xmlParseMarkupDecl(ctxt);
6542:
1.165 daniel 6543: /*
6544: * Pop-up of finished entities.
6545: */
6546: while ((RAW == 0) && (ctxt->inputNr > 1))
6547: xmlPopInput(ctxt);
1.143 daniel 6548:
1.171 daniel 6549: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6550: (tok == ctxt->token)) {
6551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6552: ctxt->sax->error(ctxt->userData,
6553: "Content error in the external subset\n");
6554: ctxt->wellFormed = 0;
1.180 daniel 6555: ctxt->disableSAX = 1;
1.171 daniel 6556: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6557: break;
6558: }
1.165 daniel 6559: }
1.171 daniel 6560: ctxt->disableSAX = state;
1.168 daniel 6561: } else {
6562: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6563: ctxt->sax->error(ctxt->userData,
6564: "XML conditional section INCLUDE or IGNORE keyword expected\n");
6565: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6566: ctxt->wellFormed = 0;
1.180 daniel 6567: ctxt->disableSAX = 1;
1.143 daniel 6568: }
6569:
1.152 daniel 6570: if (RAW == 0)
1.143 daniel 6571: SHRINK;
6572:
1.152 daniel 6573: if (RAW == 0) {
1.76 daniel 6574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6575: ctxt->sax->error(ctxt->userData,
6576: "XML conditional section not closed\n");
1.123 daniel 6577: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 6578: ctxt->wellFormed = 0;
1.180 daniel 6579: ctxt->disableSAX = 1;
1.143 daniel 6580: } else {
6581: SKIP(3);
1.76 daniel 6582: }
6583: }
6584:
6585: /**
1.124 daniel 6586: * xmlParseExternalSubset:
1.76 daniel 6587: * @ctxt: an XML parser context
1.124 daniel 6588: * @ExternalID: the external identifier
6589: * @SystemID: the system identifier (or URL)
1.76 daniel 6590: *
6591: * parse Markup declarations from an external subset
6592: *
6593: * [30] extSubset ::= textDecl? extSubsetDecl
6594: *
6595: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6596: */
6597: void
1.123 daniel 6598: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6599: const xmlChar *SystemID) {
1.132 daniel 6600: GROW;
1.152 daniel 6601: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6602: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6603: (NXT(4) == 'l')) {
1.172 daniel 6604: xmlParseTextDecl(ctxt);
1.193 daniel 6605: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6606: /*
6607: * The XML REC instructs us to stop parsing right here
6608: */
6609: ctxt->instate = XML_PARSER_EOF;
6610: return;
6611: }
1.76 daniel 6612: }
1.79 daniel 6613: if (ctxt->myDoc == NULL) {
1.116 daniel 6614: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6615: }
6616: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6617: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6618:
1.96 daniel 6619: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6620: ctxt->external = 1;
1.152 daniel 6621: while (((RAW == '<') && (NXT(1) == '?')) ||
6622: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6623: IS_BLANK(CUR)) {
1.123 daniel 6624: const xmlChar *check = CUR_PTR;
1.115 daniel 6625: int cons = ctxt->input->consumed;
1.164 daniel 6626: int tok = ctxt->token;
1.115 daniel 6627:
1.152 daniel 6628: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6629: xmlParseConditionalSections(ctxt);
6630: } else if (IS_BLANK(CUR)) {
6631: NEXT;
1.152 daniel 6632: } else if (RAW == '%') {
1.76 daniel 6633: xmlParsePEReference(ctxt);
6634: } else
6635: xmlParseMarkupDecl(ctxt);
1.77 daniel 6636:
6637: /*
6638: * Pop-up of finished entities.
6639: */
1.166 daniel 6640: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6641: xmlPopInput(ctxt);
6642:
1.164 daniel 6643: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6644: (tok == ctxt->token)) {
1.115 daniel 6645: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6646: ctxt->sax->error(ctxt->userData,
6647: "Content error in the external subset\n");
6648: ctxt->wellFormed = 0;
1.180 daniel 6649: ctxt->disableSAX = 1;
1.123 daniel 6650: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6651: break;
6652: }
1.76 daniel 6653: }
6654:
1.152 daniel 6655: if (RAW != 0) {
1.76 daniel 6656: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6657: ctxt->sax->error(ctxt->userData,
6658: "Extra content at the end of the document\n");
1.123 daniel 6659: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6660: ctxt->wellFormed = 0;
1.180 daniel 6661: ctxt->disableSAX = 1;
1.76 daniel 6662: }
6663:
6664: }
6665:
6666: /**
1.77 daniel 6667: * xmlParseReference:
6668: * @ctxt: an XML parser context
6669: *
6670: * parse and handle entity references in content, depending on the SAX
6671: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6672: * CharRef, a predefined entity, if there is no reference() callback.
6673: * or if the parser was asked to switch to that mode.
1.77 daniel 6674: *
6675: * [67] Reference ::= EntityRef | CharRef
6676: */
6677: void
6678: xmlParseReference(xmlParserCtxtPtr ctxt) {
6679: xmlEntityPtr ent;
1.123 daniel 6680: xmlChar *val;
1.152 daniel 6681: if (RAW != '&') return;
1.77 daniel 6682:
1.113 daniel 6683: if (ctxt->inputNr > 1) {
1.123 daniel 6684: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6685:
1.171 daniel 6686: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6687: (!ctxt->disableSAX))
1.113 daniel 6688: ctxt->sax->characters(ctxt->userData, cur, 1);
6689: if (ctxt->token == '&')
6690: ctxt->token = 0;
6691: else {
6692: SKIP(1);
6693: }
6694: return;
6695: }
1.77 daniel 6696: if (NXT(1) == '#') {
1.152 daniel 6697: int i = 0;
1.153 daniel 6698: xmlChar out[10];
6699: int hex = NXT(2);
1.77 daniel 6700: int val = xmlParseCharRef(ctxt);
1.152 daniel 6701:
1.198 daniel 6702: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 6703: /*
6704: * So we are using non-UTF-8 buffers
6705: * Check that the char fit on 8bits, if not
6706: * generate a CharRef.
6707: */
6708: if (val <= 0xFF) {
6709: out[0] = val;
6710: out[1] = 0;
1.171 daniel 6711: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6712: (!ctxt->disableSAX))
1.153 daniel 6713: ctxt->sax->characters(ctxt->userData, out, 1);
6714: } else {
6715: if ((hex == 'x') || (hex == 'X'))
6716: sprintf((char *)out, "#x%X", val);
6717: else
6718: sprintf((char *)out, "#%d", val);
1.171 daniel 6719: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6720: (!ctxt->disableSAX))
1.153 daniel 6721: ctxt->sax->reference(ctxt->userData, out);
6722: }
6723: } else {
6724: /*
6725: * Just encode the value in UTF-8
6726: */
6727: COPY_BUF(0 ,out, i, val);
6728: out[i] = 0;
1.171 daniel 6729: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6730: (!ctxt->disableSAX))
1.153 daniel 6731: ctxt->sax->characters(ctxt->userData, out, i);
6732: }
1.77 daniel 6733: } else {
6734: ent = xmlParseEntityRef(ctxt);
6735: if (ent == NULL) return;
6736: if ((ent->name != NULL) &&
1.159 daniel 6737: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6738: xmlNodePtr list = NULL;
6739: int ret;
6740:
6741:
6742: /*
6743: * The first reference to the entity trigger a parsing phase
6744: * where the ent->children is filled with the result from
6745: * the parsing.
6746: */
6747: if (ent->children == NULL) {
6748: xmlChar *value;
6749: value = ent->content;
6750:
6751: /*
6752: * Check that this entity is well formed
6753: */
6754: if ((value != NULL) &&
6755: (value[1] == 0) && (value[0] == '<') &&
6756: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6757: /*
6758: * TODO: get definite answer on this !!!
6759: * Lots of entity decls are used to declare a single
6760: * char
6761: * <!ENTITY lt "<">
6762: * Which seems to be valid since
6763: * 2.4: The ampersand character (&) and the left angle
6764: * bracket (<) may appear in their literal form only
6765: * when used ... They are also legal within the literal
6766: * entity value of an internal entity declaration;i
6767: * see "4.3.2 Well-Formed Parsed Entities".
6768: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6769: * Looking at the OASIS test suite and James Clark
6770: * tests, this is broken. However the XML REC uses
6771: * it. Is the XML REC not well-formed ????
6772: * This is a hack to avoid this problem
6773: */
6774: list = xmlNewDocText(ctxt->myDoc, value);
6775: if (list != NULL) {
6776: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6777: (ent->children == NULL)) {
6778: ent->children = list;
6779: ent->last = list;
6780: list->parent = (xmlNodePtr) ent;
6781: } else {
6782: xmlFreeNodeList(list);
6783: }
6784: } else if (list != NULL) {
6785: xmlFreeNodeList(list);
6786: }
1.181 daniel 6787: } else {
1.180 daniel 6788: /*
6789: * 4.3.2: An internal general parsed entity is well-formed
6790: * if its replacement text matches the production labeled
6791: * content.
6792: */
1.185 daniel 6793: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6794: ctxt->depth++;
1.180 daniel 6795: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 6796: ctxt->sax, NULL, ctxt->depth,
6797: value, &list);
6798: ctxt->depth--;
6799: } else if (ent->etype ==
6800: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6801: ctxt->depth++;
1.180 daniel 6802: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 6803: ctxt->sax, NULL, ctxt->depth,
6804: ent->SystemID, ent->ExternalID, &list);
6805: ctxt->depth--;
6806: } else {
1.180 daniel 6807: ret = -1;
6808: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6809: ctxt->sax->error(ctxt->userData,
6810: "Internal: invalid entity type\n");
6811: }
1.185 daniel 6812: if (ret == XML_ERR_ENTITY_LOOP) {
6813: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6814: ctxt->sax->error(ctxt->userData,
6815: "Detected entity reference loop\n");
6816: ctxt->wellFormed = 0;
6817: ctxt->disableSAX = 1;
6818: ctxt->errNo = XML_ERR_ENTITY_LOOP;
6819: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 6820: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6821: (ent->children == NULL)) {
6822: ent->children = list;
6823: while (list != NULL) {
6824: list->parent = (xmlNodePtr) ent;
6825: if (list->next == NULL)
6826: ent->last = list;
6827: list = list->next;
6828: }
6829: } else {
6830: xmlFreeNodeList(list);
6831: }
6832: } else if (ret > 0) {
6833: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6834: ctxt->sax->error(ctxt->userData,
6835: "Entity value required\n");
6836: ctxt->errNo = ret;
6837: ctxt->wellFormed = 0;
6838: ctxt->disableSAX = 1;
6839: } else if (list != NULL) {
6840: xmlFreeNodeList(list);
6841: }
6842: }
6843: }
1.113 daniel 6844: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6845: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6846: /*
6847: * Create a node.
6848: */
6849: ctxt->sax->reference(ctxt->userData, ent->name);
6850: return;
6851: } else if (ctxt->replaceEntities) {
6852: xmlParserInputPtr input;
1.79 daniel 6853:
1.113 daniel 6854: input = xmlNewEntityInputStream(ctxt, ent);
6855: xmlPushInput(ctxt, input);
1.167 daniel 6856: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6857: (RAW == '<') && (NXT(1) == '?') &&
6858: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6859: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6860: xmlParseTextDecl(ctxt);
1.193 daniel 6861: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6862: /*
6863: * The XML REC instructs us to stop parsing right here
6864: */
6865: ctxt->instate = XML_PARSER_EOF;
6866: return;
6867: }
1.199 daniel 6868: if (input->standalone == 1) {
1.167 daniel 6869: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6870: ctxt->sax->error(ctxt->userData,
6871: "external parsed entities cannot be standalone\n");
6872: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6873: ctxt->wellFormed = 0;
1.180 daniel 6874: ctxt->disableSAX = 1;
1.167 daniel 6875: }
6876: }
1.179 daniel 6877: /*
6878: * !!! TODO: build the tree under the entity first
6879: * 1234
6880: */
1.113 daniel 6881: return;
6882: }
1.77 daniel 6883: }
6884: val = ent->content;
6885: if (val == NULL) return;
6886: /*
6887: * inline the entity.
6888: */
1.171 daniel 6889: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6890: (!ctxt->disableSAX))
1.77 daniel 6891: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6892: }
1.24 daniel 6893: }
6894:
1.50 daniel 6895: /**
6896: * xmlParseEntityRef:
6897: * @ctxt: an XML parser context
6898: *
6899: * parse ENTITY references declarations
1.24 daniel 6900: *
6901: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6902: *
1.98 daniel 6903: * [ WFC: Entity Declared ]
6904: * In a document without any DTD, a document with only an internal DTD
6905: * subset which contains no parameter entity references, or a document
6906: * with "standalone='yes'", the Name given in the entity reference
6907: * must match that in an entity declaration, except that well-formed
6908: * documents need not declare any of the following entities: amp, lt,
6909: * gt, apos, quot. The declaration of a parameter entity must precede
6910: * any reference to it. Similarly, the declaration of a general entity
6911: * must precede any reference to it which appears in a default value in an
6912: * attribute-list declaration. Note that if entities are declared in the
6913: * external subset or in external parameter entities, a non-validating
6914: * processor is not obligated to read and process their declarations;
6915: * for such documents, the rule that an entity must be declared is a
6916: * well-formedness constraint only if standalone='yes'.
6917: *
6918: * [ WFC: Parsed Entity ]
6919: * An entity reference must not contain the name of an unparsed entity
6920: *
1.77 daniel 6921: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6922: */
1.77 daniel 6923: xmlEntityPtr
1.55 daniel 6924: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6925: xmlChar *name;
1.72 daniel 6926: xmlEntityPtr ent = NULL;
1.24 daniel 6927:
1.91 daniel 6928: GROW;
1.111 daniel 6929:
1.152 daniel 6930: if (RAW == '&') {
1.40 daniel 6931: NEXT;
1.24 daniel 6932: name = xmlParseName(ctxt);
6933: if (name == NULL) {
1.55 daniel 6934: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6935: ctxt->sax->error(ctxt->userData,
6936: "xmlParseEntityRef: no name\n");
1.123 daniel 6937: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6938: ctxt->wellFormed = 0;
1.180 daniel 6939: ctxt->disableSAX = 1;
1.24 daniel 6940: } else {
1.152 daniel 6941: if (RAW == ';') {
1.40 daniel 6942: NEXT;
1.24 daniel 6943: /*
1.77 daniel 6944: * Ask first SAX for entity resolution, otherwise try the
6945: * predefined set.
6946: */
6947: if (ctxt->sax != NULL) {
6948: if (ctxt->sax->getEntity != NULL)
6949: ent = ctxt->sax->getEntity(ctxt->userData, name);
6950: if (ent == NULL)
6951: ent = xmlGetPredefinedEntity(name);
6952: }
6953: /*
1.98 daniel 6954: * [ WFC: Entity Declared ]
6955: * In a document without any DTD, a document with only an
6956: * internal DTD subset which contains no parameter entity
6957: * references, or a document with "standalone='yes'", the
6958: * Name given in the entity reference must match that in an
6959: * entity declaration, except that well-formed documents
6960: * need not declare any of the following entities: amp, lt,
6961: * gt, apos, quot.
6962: * The declaration of a parameter entity must precede any
6963: * reference to it.
6964: * Similarly, the declaration of a general entity must
6965: * precede any reference to it which appears in a default
6966: * value in an attribute-list declaration. Note that if
6967: * entities are declared in the external subset or in
6968: * external parameter entities, a non-validating processor
6969: * is not obligated to read and process their declarations;
6970: * for such documents, the rule that an entity must be
6971: * declared is a well-formedness constraint only if
6972: * standalone='yes'.
1.59 daniel 6973: */
1.77 daniel 6974: if (ent == NULL) {
1.98 daniel 6975: if ((ctxt->standalone == 1) ||
6976: ((ctxt->hasExternalSubset == 0) &&
6977: (ctxt->hasPErefs == 0))) {
6978: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6979: ctxt->sax->error(ctxt->userData,
6980: "Entity '%s' not defined\n", name);
1.123 daniel 6981: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6982: ctxt->wellFormed = 0;
1.180 daniel 6983: ctxt->disableSAX = 1;
1.77 daniel 6984: } else {
1.98 daniel 6985: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6986: ctxt->sax->warning(ctxt->userData,
6987: "Entity '%s' not defined\n", name);
1.123 daniel 6988: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6989: }
1.77 daniel 6990: }
1.59 daniel 6991:
6992: /*
1.98 daniel 6993: * [ WFC: Parsed Entity ]
6994: * An entity reference must not contain the name of an
6995: * unparsed entity
6996: */
1.159 daniel 6997: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6998: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6999: ctxt->sax->error(ctxt->userData,
7000: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 7001: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 7002: ctxt->wellFormed = 0;
1.180 daniel 7003: ctxt->disableSAX = 1;
1.98 daniel 7004: }
7005:
7006: /*
7007: * [ WFC: No External Entity References ]
7008: * Attribute values cannot contain direct or indirect
7009: * entity references to external entities.
7010: */
7011: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 7012: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 7013: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7014: ctxt->sax->error(ctxt->userData,
7015: "Attribute references external entity '%s'\n", name);
1.123 daniel 7016: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 7017: ctxt->wellFormed = 0;
1.180 daniel 7018: ctxt->disableSAX = 1;
1.98 daniel 7019: }
7020: /*
7021: * [ WFC: No < in Attribute Values ]
7022: * The replacement text of any entity referred to directly or
7023: * indirectly in an attribute value (other than "<") must
7024: * not contain a <.
1.59 daniel 7025: */
1.98 daniel 7026: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 7027: (ent != NULL) &&
7028: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 7029: (ent->content != NULL) &&
7030: (xmlStrchr(ent->content, '<'))) {
7031: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7032: ctxt->sax->error(ctxt->userData,
7033: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 7034: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 7035: ctxt->wellFormed = 0;
1.180 daniel 7036: ctxt->disableSAX = 1;
1.98 daniel 7037: }
7038:
7039: /*
7040: * Internal check, no parameter entities here ...
7041: */
7042: else {
1.159 daniel 7043: switch (ent->etype) {
1.59 daniel 7044: case XML_INTERNAL_PARAMETER_ENTITY:
7045: case XML_EXTERNAL_PARAMETER_ENTITY:
7046: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7047: ctxt->sax->error(ctxt->userData,
1.59 daniel 7048: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 7049: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 7050: ctxt->wellFormed = 0;
1.180 daniel 7051: ctxt->disableSAX = 1;
7052: break;
7053: default:
1.59 daniel 7054: break;
7055: }
7056: }
7057:
7058: /*
1.98 daniel 7059: * [ WFC: No Recursion ]
1.117 daniel 7060: * TODO A parsed entity must not contain a recursive reference
7061: * to itself, either directly or indirectly.
1.59 daniel 7062: */
1.77 daniel 7063:
1.24 daniel 7064: } else {
1.55 daniel 7065: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7066: ctxt->sax->error(ctxt->userData,
1.59 daniel 7067: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 7068: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 7069: ctxt->wellFormed = 0;
1.180 daniel 7070: ctxt->disableSAX = 1;
1.24 daniel 7071: }
1.119 daniel 7072: xmlFree(name);
1.24 daniel 7073: }
7074: }
1.77 daniel 7075: return(ent);
1.24 daniel 7076: }
1.135 daniel 7077: /**
7078: * xmlParseStringEntityRef:
7079: * @ctxt: an XML parser context
7080: * @str: a pointer to an index in the string
7081: *
7082: * parse ENTITY references declarations, but this version parses it from
7083: * a string value.
7084: *
7085: * [68] EntityRef ::= '&' Name ';'
7086: *
7087: * [ WFC: Entity Declared ]
7088: * In a document without any DTD, a document with only an internal DTD
7089: * subset which contains no parameter entity references, or a document
7090: * with "standalone='yes'", the Name given in the entity reference
7091: * must match that in an entity declaration, except that well-formed
7092: * documents need not declare any of the following entities: amp, lt,
7093: * gt, apos, quot. The declaration of a parameter entity must precede
7094: * any reference to it. Similarly, the declaration of a general entity
7095: * must precede any reference to it which appears in a default value in an
7096: * attribute-list declaration. Note that if entities are declared in the
7097: * external subset or in external parameter entities, a non-validating
7098: * processor is not obligated to read and process their declarations;
7099: * for such documents, the rule that an entity must be declared is a
7100: * well-formedness constraint only if standalone='yes'.
7101: *
7102: * [ WFC: Parsed Entity ]
7103: * An entity reference must not contain the name of an unparsed entity
7104: *
7105: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7106: * is updated to the current location in the string.
7107: */
7108: xmlEntityPtr
7109: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7110: xmlChar *name;
7111: const xmlChar *ptr;
7112: xmlChar cur;
7113: xmlEntityPtr ent = NULL;
7114:
1.156 daniel 7115: if ((str == NULL) || (*str == NULL))
7116: return(NULL);
1.135 daniel 7117: ptr = *str;
7118: cur = *ptr;
7119: if (cur == '&') {
7120: ptr++;
7121: cur = *ptr;
7122: name = xmlParseStringName(ctxt, &ptr);
7123: if (name == NULL) {
7124: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7125: ctxt->sax->error(ctxt->userData,
7126: "xmlParseEntityRef: no name\n");
7127: ctxt->errNo = XML_ERR_NAME_REQUIRED;
7128: ctxt->wellFormed = 0;
1.180 daniel 7129: ctxt->disableSAX = 1;
1.135 daniel 7130: } else {
1.185 daniel 7131: if (*ptr == ';') {
7132: ptr++;
1.135 daniel 7133: /*
7134: * Ask first SAX for entity resolution, otherwise try the
7135: * predefined set.
7136: */
7137: if (ctxt->sax != NULL) {
7138: if (ctxt->sax->getEntity != NULL)
7139: ent = ctxt->sax->getEntity(ctxt->userData, name);
7140: if (ent == NULL)
7141: ent = xmlGetPredefinedEntity(name);
7142: }
7143: /*
7144: * [ WFC: Entity Declared ]
7145: * In a document without any DTD, a document with only an
7146: * internal DTD subset which contains no parameter entity
7147: * references, or a document with "standalone='yes'", the
7148: * Name given in the entity reference must match that in an
7149: * entity declaration, except that well-formed documents
7150: * need not declare any of the following entities: amp, lt,
7151: * gt, apos, quot.
7152: * The declaration of a parameter entity must precede any
7153: * reference to it.
7154: * Similarly, the declaration of a general entity must
7155: * precede any reference to it which appears in a default
7156: * value in an attribute-list declaration. Note that if
7157: * entities are declared in the external subset or in
7158: * external parameter entities, a non-validating processor
7159: * is not obligated to read and process their declarations;
7160: * for such documents, the rule that an entity must be
7161: * declared is a well-formedness constraint only if
7162: * standalone='yes'.
7163: */
7164: if (ent == NULL) {
7165: if ((ctxt->standalone == 1) ||
7166: ((ctxt->hasExternalSubset == 0) &&
7167: (ctxt->hasPErefs == 0))) {
7168: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7169: ctxt->sax->error(ctxt->userData,
7170: "Entity '%s' not defined\n", name);
7171: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7172: ctxt->wellFormed = 0;
1.180 daniel 7173: ctxt->disableSAX = 1;
1.135 daniel 7174: } else {
7175: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7176: ctxt->sax->warning(ctxt->userData,
7177: "Entity '%s' not defined\n", name);
7178: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
7179: }
7180: }
7181:
7182: /*
7183: * [ WFC: Parsed Entity ]
7184: * An entity reference must not contain the name of an
7185: * unparsed entity
7186: */
1.159 daniel 7187: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 7188: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7189: ctxt->sax->error(ctxt->userData,
7190: "Entity reference to unparsed entity %s\n", name);
7191: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
7192: ctxt->wellFormed = 0;
1.180 daniel 7193: ctxt->disableSAX = 1;
1.135 daniel 7194: }
7195:
7196: /*
7197: * [ WFC: No External Entity References ]
7198: * Attribute values cannot contain direct or indirect
7199: * entity references to external entities.
7200: */
7201: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 7202: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 7203: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7204: ctxt->sax->error(ctxt->userData,
7205: "Attribute references external entity '%s'\n", name);
7206: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
7207: ctxt->wellFormed = 0;
1.180 daniel 7208: ctxt->disableSAX = 1;
1.135 daniel 7209: }
7210: /*
7211: * [ WFC: No < in Attribute Values ]
7212: * The replacement text of any entity referred to directly or
7213: * indirectly in an attribute value (other than "<") must
7214: * not contain a <.
7215: */
7216: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7217: (ent != NULL) &&
7218: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
7219: (ent->content != NULL) &&
7220: (xmlStrchr(ent->content, '<'))) {
7221: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7222: ctxt->sax->error(ctxt->userData,
7223: "'<' in entity '%s' is not allowed in attributes values\n", name);
7224: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
7225: ctxt->wellFormed = 0;
1.180 daniel 7226: ctxt->disableSAX = 1;
1.135 daniel 7227: }
7228:
7229: /*
7230: * Internal check, no parameter entities here ...
7231: */
7232: else {
1.159 daniel 7233: switch (ent->etype) {
1.135 daniel 7234: case XML_INTERNAL_PARAMETER_ENTITY:
7235: case XML_EXTERNAL_PARAMETER_ENTITY:
7236: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7237: ctxt->sax->error(ctxt->userData,
7238: "Attempt to reference the parameter entity '%s'\n", name);
7239: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
7240: ctxt->wellFormed = 0;
1.180 daniel 7241: ctxt->disableSAX = 1;
7242: break;
7243: default:
1.135 daniel 7244: break;
7245: }
7246: }
7247:
7248: /*
7249: * [ WFC: No Recursion ]
7250: * TODO A parsed entity must not contain a recursive reference
7251: * to itself, either directly or indirectly.
7252: */
7253:
7254: } else {
7255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7256: ctxt->sax->error(ctxt->userData,
7257: "xmlParseEntityRef: expecting ';'\n");
7258: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7259: ctxt->wellFormed = 0;
1.180 daniel 7260: ctxt->disableSAX = 1;
1.135 daniel 7261: }
7262: xmlFree(name);
7263: }
7264: }
1.185 daniel 7265: *str = ptr;
1.135 daniel 7266: return(ent);
7267: }
1.24 daniel 7268:
1.50 daniel 7269: /**
7270: * xmlParsePEReference:
7271: * @ctxt: an XML parser context
7272: *
7273: * parse PEReference declarations
1.77 daniel 7274: * The entity content is handled directly by pushing it's content as
7275: * a new input stream.
1.22 daniel 7276: *
7277: * [69] PEReference ::= '%' Name ';'
1.68 daniel 7278: *
1.98 daniel 7279: * [ WFC: No Recursion ]
7280: * TODO A parsed entity must not contain a recursive
7281: * reference to itself, either directly or indirectly.
7282: *
7283: * [ WFC: Entity Declared ]
7284: * In a document without any DTD, a document with only an internal DTD
7285: * subset which contains no parameter entity references, or a document
7286: * with "standalone='yes'", ... ... The declaration of a parameter
7287: * entity must precede any reference to it...
7288: *
7289: * [ VC: Entity Declared ]
7290: * In a document with an external subset or external parameter entities
7291: * with "standalone='no'", ... ... The declaration of a parameter entity
7292: * must precede any reference to it...
7293: *
7294: * [ WFC: In DTD ]
7295: * Parameter-entity references may only appear in the DTD.
7296: * NOTE: misleading but this is handled.
1.22 daniel 7297: */
1.77 daniel 7298: void
1.55 daniel 7299: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 7300: xmlChar *name;
1.72 daniel 7301: xmlEntityPtr entity = NULL;
1.50 daniel 7302: xmlParserInputPtr input;
1.22 daniel 7303:
1.152 daniel 7304: if (RAW == '%') {
1.40 daniel 7305: NEXT;
1.22 daniel 7306: name = xmlParseName(ctxt);
7307: if (name == NULL) {
1.55 daniel 7308: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7309: ctxt->sax->error(ctxt->userData,
7310: "xmlParsePEReference: no name\n");
1.123 daniel 7311: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7312: ctxt->wellFormed = 0;
1.180 daniel 7313: ctxt->disableSAX = 1;
1.22 daniel 7314: } else {
1.152 daniel 7315: if (RAW == ';') {
1.40 daniel 7316: NEXT;
1.98 daniel 7317: if ((ctxt->sax != NULL) &&
7318: (ctxt->sax->getParameterEntity != NULL))
7319: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7320: name);
1.45 daniel 7321: if (entity == NULL) {
1.98 daniel 7322: /*
7323: * [ WFC: Entity Declared ]
7324: * In a document without any DTD, a document with only an
7325: * internal DTD subset which contains no parameter entity
7326: * references, or a document with "standalone='yes'", ...
7327: * ... The declaration of a parameter entity must precede
7328: * any reference to it...
7329: */
7330: if ((ctxt->standalone == 1) ||
7331: ((ctxt->hasExternalSubset == 0) &&
7332: (ctxt->hasPErefs == 0))) {
7333: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7334: ctxt->sax->error(ctxt->userData,
7335: "PEReference: %%%s; not found\n", name);
1.123 daniel 7336: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 7337: ctxt->wellFormed = 0;
1.180 daniel 7338: ctxt->disableSAX = 1;
1.98 daniel 7339: } else {
7340: /*
7341: * [ VC: Entity Declared ]
7342: * In a document with an external subset or external
7343: * parameter entities with "standalone='no'", ...
7344: * ... The declaration of a parameter entity must precede
7345: * any reference to it...
7346: */
7347: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7348: ctxt->sax->warning(ctxt->userData,
7349: "PEReference: %%%s; not found\n", name);
7350: ctxt->valid = 0;
7351: }
1.50 daniel 7352: } else {
1.98 daniel 7353: /*
7354: * Internal checking in case the entity quest barfed
7355: */
1.159 daniel 7356: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7357: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 7358: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7359: ctxt->sax->warning(ctxt->userData,
7360: "Internal: %%%s; is not a parameter entity\n", name);
7361: } else {
1.164 daniel 7362: /*
7363: * TODO !!!
7364: * handle the extra spaces added before and after
7365: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7366: */
1.98 daniel 7367: input = xmlNewEntityInputStream(ctxt, entity);
7368: xmlPushInput(ctxt, input);
1.164 daniel 7369: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7370: (RAW == '<') && (NXT(1) == '?') &&
7371: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7372: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 7373: xmlParseTextDecl(ctxt);
1.193 daniel 7374: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7375: /*
7376: * The XML REC instructs us to stop parsing
7377: * right here
7378: */
7379: ctxt->instate = XML_PARSER_EOF;
7380: xmlFree(name);
7381: return;
7382: }
1.164 daniel 7383: }
7384: if (ctxt->token == 0)
7385: ctxt->token = ' ';
1.98 daniel 7386: }
1.45 daniel 7387: }
1.98 daniel 7388: ctxt->hasPErefs = 1;
1.22 daniel 7389: } else {
1.55 daniel 7390: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7391: ctxt->sax->error(ctxt->userData,
1.59 daniel 7392: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 7393: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 7394: ctxt->wellFormed = 0;
1.180 daniel 7395: ctxt->disableSAX = 1;
1.22 daniel 7396: }
1.119 daniel 7397: xmlFree(name);
1.3 veillard 7398: }
7399: }
7400: }
7401:
1.50 daniel 7402: /**
1.135 daniel 7403: * xmlParseStringPEReference:
7404: * @ctxt: an XML parser context
7405: * @str: a pointer to an index in the string
7406: *
7407: * parse PEReference declarations
7408: *
7409: * [69] PEReference ::= '%' Name ';'
7410: *
7411: * [ WFC: No Recursion ]
7412: * TODO A parsed entity must not contain a recursive
7413: * reference to itself, either directly or indirectly.
7414: *
7415: * [ WFC: Entity Declared ]
7416: * In a document without any DTD, a document with only an internal DTD
7417: * subset which contains no parameter entity references, or a document
7418: * with "standalone='yes'", ... ... The declaration of a parameter
7419: * entity must precede any reference to it...
7420: *
7421: * [ VC: Entity Declared ]
7422: * In a document with an external subset or external parameter entities
7423: * with "standalone='no'", ... ... The declaration of a parameter entity
7424: * must precede any reference to it...
7425: *
7426: * [ WFC: In DTD ]
7427: * Parameter-entity references may only appear in the DTD.
7428: * NOTE: misleading but this is handled.
7429: *
7430: * Returns the string of the entity content.
7431: * str is updated to the current value of the index
7432: */
7433: xmlEntityPtr
7434: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7435: const xmlChar *ptr;
7436: xmlChar cur;
7437: xmlChar *name;
7438: xmlEntityPtr entity = NULL;
7439:
7440: if ((str == NULL) || (*str == NULL)) return(NULL);
7441: ptr = *str;
7442: cur = *ptr;
7443: if (cur == '%') {
7444: ptr++;
7445: cur = *ptr;
7446: name = xmlParseStringName(ctxt, &ptr);
7447: if (name == NULL) {
7448: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7449: ctxt->sax->error(ctxt->userData,
7450: "xmlParseStringPEReference: no name\n");
7451: ctxt->errNo = XML_ERR_NAME_REQUIRED;
7452: ctxt->wellFormed = 0;
1.180 daniel 7453: ctxt->disableSAX = 1;
1.135 daniel 7454: } else {
7455: cur = *ptr;
7456: if (cur == ';') {
7457: ptr++;
7458: cur = *ptr;
7459: if ((ctxt->sax != NULL) &&
7460: (ctxt->sax->getParameterEntity != NULL))
7461: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7462: name);
7463: if (entity == NULL) {
7464: /*
7465: * [ WFC: Entity Declared ]
7466: * In a document without any DTD, a document with only an
7467: * internal DTD subset which contains no parameter entity
7468: * references, or a document with "standalone='yes'", ...
7469: * ... The declaration of a parameter entity must precede
7470: * any reference to it...
7471: */
7472: if ((ctxt->standalone == 1) ||
7473: ((ctxt->hasExternalSubset == 0) &&
7474: (ctxt->hasPErefs == 0))) {
7475: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7476: ctxt->sax->error(ctxt->userData,
7477: "PEReference: %%%s; not found\n", name);
7478: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7479: ctxt->wellFormed = 0;
1.180 daniel 7480: ctxt->disableSAX = 1;
1.135 daniel 7481: } else {
7482: /*
7483: * [ VC: Entity Declared ]
7484: * In a document with an external subset or external
7485: * parameter entities with "standalone='no'", ...
7486: * ... The declaration of a parameter entity must
7487: * precede any reference to it...
7488: */
7489: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7490: ctxt->sax->warning(ctxt->userData,
7491: "PEReference: %%%s; not found\n", name);
7492: ctxt->valid = 0;
7493: }
7494: } else {
7495: /*
7496: * Internal checking in case the entity quest barfed
7497: */
1.159 daniel 7498: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7499: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 7500: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7501: ctxt->sax->warning(ctxt->userData,
7502: "Internal: %%%s; is not a parameter entity\n", name);
7503: }
7504: }
7505: ctxt->hasPErefs = 1;
7506: } else {
7507: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7508: ctxt->sax->error(ctxt->userData,
7509: "xmlParseStringPEReference: expecting ';'\n");
7510: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7511: ctxt->wellFormed = 0;
1.180 daniel 7512: ctxt->disableSAX = 1;
1.135 daniel 7513: }
7514: xmlFree(name);
7515: }
7516: }
7517: *str = ptr;
7518: return(entity);
7519: }
7520:
7521: /**
1.181 daniel 7522: * xmlParseDocTypeDecl:
1.50 daniel 7523: * @ctxt: an XML parser context
7524: *
7525: * parse a DOCTYPE declaration
1.21 daniel 7526: *
1.22 daniel 7527: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7528: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 7529: *
7530: * [ VC: Root Element Type ]
1.99 daniel 7531: * The Name in the document type declaration must match the element
1.98 daniel 7532: * type of the root element.
1.21 daniel 7533: */
7534:
1.55 daniel 7535: void
7536: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 7537: xmlChar *name = NULL;
1.123 daniel 7538: xmlChar *ExternalID = NULL;
7539: xmlChar *URI = NULL;
1.21 daniel 7540:
7541: /*
7542: * We know that '<!DOCTYPE' has been detected.
7543: */
1.40 daniel 7544: SKIP(9);
1.21 daniel 7545:
1.42 daniel 7546: SKIP_BLANKS;
1.21 daniel 7547:
7548: /*
7549: * Parse the DOCTYPE name.
7550: */
7551: name = xmlParseName(ctxt);
7552: if (name == NULL) {
1.55 daniel 7553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7554: ctxt->sax->error(ctxt->userData,
7555: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 7556: ctxt->wellFormed = 0;
1.180 daniel 7557: ctxt->disableSAX = 1;
1.123 daniel 7558: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 7559: }
1.165 daniel 7560: ctxt->intSubName = name;
1.21 daniel 7561:
1.42 daniel 7562: SKIP_BLANKS;
1.21 daniel 7563:
7564: /*
1.22 daniel 7565: * Check for SystemID and ExternalID
7566: */
1.67 daniel 7567: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 7568:
7569: if ((URI != NULL) || (ExternalID != NULL)) {
7570: ctxt->hasExternalSubset = 1;
7571: }
1.165 daniel 7572: ctxt->extSubURI = URI;
7573: ctxt->extSubSystem = ExternalID;
1.98 daniel 7574:
1.42 daniel 7575: SKIP_BLANKS;
1.36 daniel 7576:
1.76 daniel 7577: /*
1.165 daniel 7578: * Create and update the internal subset.
1.76 daniel 7579: */
1.171 daniel 7580: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7581: (!ctxt->disableSAX))
1.74 daniel 7582: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 7583:
7584: /*
1.140 daniel 7585: * Is there any internal subset declarations ?
7586: * they are handled separately in xmlParseInternalSubset()
7587: */
1.152 daniel 7588: if (RAW == '[')
1.140 daniel 7589: return;
7590:
7591: /*
7592: * We should be at the end of the DOCTYPE declaration.
7593: */
1.152 daniel 7594: if (RAW != '>') {
1.140 daniel 7595: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7596: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7597: ctxt->wellFormed = 0;
1.180 daniel 7598: ctxt->disableSAX = 1;
1.140 daniel 7599: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7600: }
7601: NEXT;
7602: }
7603:
7604: /**
1.181 daniel 7605: * xmlParseInternalsubset:
1.140 daniel 7606: * @ctxt: an XML parser context
7607: *
7608: * parse the internal subset declaration
7609: *
7610: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7611: */
7612:
7613: void
7614: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7615: /*
1.22 daniel 7616: * Is there any DTD definition ?
7617: */
1.152 daniel 7618: if (RAW == '[') {
1.96 daniel 7619: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 7620: NEXT;
1.22 daniel 7621: /*
7622: * Parse the succession of Markup declarations and
7623: * PEReferences.
7624: * Subsequence (markupdecl | PEReference | S)*
7625: */
1.152 daniel 7626: while (RAW != ']') {
1.123 daniel 7627: const xmlChar *check = CUR_PTR;
1.115 daniel 7628: int cons = ctxt->input->consumed;
1.22 daniel 7629:
1.42 daniel 7630: SKIP_BLANKS;
1.22 daniel 7631: xmlParseMarkupDecl(ctxt);
1.50 daniel 7632: xmlParsePEReference(ctxt);
1.22 daniel 7633:
1.115 daniel 7634: /*
7635: * Pop-up of finished entities.
7636: */
1.152 daniel 7637: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7638: xmlPopInput(ctxt);
7639:
1.118 daniel 7640: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7642: ctxt->sax->error(ctxt->userData,
1.140 daniel 7643: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7644: ctxt->wellFormed = 0;
1.180 daniel 7645: ctxt->disableSAX = 1;
1.123 daniel 7646: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7647: break;
7648: }
7649: }
1.209 veillard 7650: if (RAW == ']') {
7651: NEXT;
7652: SKIP_BLANKS;
7653: }
1.22 daniel 7654: }
7655:
7656: /*
7657: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7658: */
1.152 daniel 7659: if (RAW != '>') {
1.55 daniel 7660: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7661: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7662: ctxt->wellFormed = 0;
1.180 daniel 7663: ctxt->disableSAX = 1;
1.123 daniel 7664: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7665: }
1.40 daniel 7666: NEXT;
1.21 daniel 7667: }
7668:
1.50 daniel 7669: /**
7670: * xmlParseAttribute:
7671: * @ctxt: an XML parser context
1.123 daniel 7672: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7673: *
7674: * parse an attribute
1.3 veillard 7675: *
1.22 daniel 7676: * [41] Attribute ::= Name Eq AttValue
7677: *
1.98 daniel 7678: * [ WFC: No External Entity References ]
7679: * Attribute values cannot contain direct or indirect entity references
7680: * to external entities.
7681: *
7682: * [ WFC: No < in Attribute Values ]
7683: * The replacement text of any entity referred to directly or indirectly in
7684: * an attribute value (other than "<") must not contain a <.
7685: *
7686: * [ VC: Attribute Value Type ]
1.117 daniel 7687: * The attribute must have been declared; the value must be of the type
1.99 daniel 7688: * declared for it.
1.98 daniel 7689: *
1.22 daniel 7690: * [25] Eq ::= S? '=' S?
7691: *
1.29 daniel 7692: * With namespace:
7693: *
7694: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7695: *
7696: * Also the case QName == xmlns:??? is handled independently as a namespace
7697: * definition.
1.69 daniel 7698: *
1.72 daniel 7699: * Returns the attribute name, and the value in *value.
1.3 veillard 7700: */
7701:
1.123 daniel 7702: xmlChar *
7703: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7704: xmlChar *name, *val;
1.3 veillard 7705:
1.72 daniel 7706: *value = NULL;
7707: name = xmlParseName(ctxt);
1.22 daniel 7708: if (name == NULL) {
1.55 daniel 7709: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7710: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7711: ctxt->wellFormed = 0;
1.180 daniel 7712: ctxt->disableSAX = 1;
1.123 daniel 7713: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7714: return(NULL);
1.3 veillard 7715: }
7716:
7717: /*
1.29 daniel 7718: * read the value
1.3 veillard 7719: */
1.42 daniel 7720: SKIP_BLANKS;
1.152 daniel 7721: if (RAW == '=') {
1.40 daniel 7722: NEXT;
1.42 daniel 7723: SKIP_BLANKS;
1.72 daniel 7724: val = xmlParseAttValue(ctxt);
1.96 daniel 7725: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7726: } else {
1.55 daniel 7727: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7728: ctxt->sax->error(ctxt->userData,
1.59 daniel 7729: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7730: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7731: ctxt->wellFormed = 0;
1.180 daniel 7732: ctxt->disableSAX = 1;
1.170 daniel 7733: xmlFree(name);
1.52 daniel 7734: return(NULL);
1.43 daniel 7735: }
7736:
1.172 daniel 7737: /*
7738: * Check that xml:lang conforms to the specification
7739: */
7740: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7741: if (!xmlCheckLanguageID(val)) {
7742: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7743: ctxt->sax->error(ctxt->userData,
7744: "Invalid value for xml:lang : %s\n", val);
7745: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7746: ctxt->wellFormed = 0;
1.180 daniel 7747: ctxt->disableSAX = 1;
1.172 daniel 7748: }
7749: }
7750:
1.176 daniel 7751: /*
7752: * Check that xml:space conforms to the specification
7753: */
7754: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7755: if (!xmlStrcmp(val, BAD_CAST "default"))
7756: *(ctxt->space) = 0;
7757: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7758: *(ctxt->space) = 1;
7759: else {
7760: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7761: ctxt->sax->error(ctxt->userData,
7762: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7763: val);
7764: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7765: ctxt->wellFormed = 0;
1.180 daniel 7766: ctxt->disableSAX = 1;
1.176 daniel 7767: }
7768: }
7769:
1.72 daniel 7770: *value = val;
7771: return(name);
1.3 veillard 7772: }
7773:
1.50 daniel 7774: /**
7775: * xmlParseStartTag:
7776: * @ctxt: an XML parser context
7777: *
7778: * parse a start of tag either for rule element or
7779: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7780: *
7781: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7782: *
1.98 daniel 7783: * [ WFC: Unique Att Spec ]
7784: * No attribute name may appear more than once in the same start-tag or
7785: * empty-element tag.
7786: *
1.29 daniel 7787: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7788: *
1.98 daniel 7789: * [ WFC: Unique Att Spec ]
7790: * No attribute name may appear more than once in the same start-tag or
7791: * empty-element tag.
7792: *
1.29 daniel 7793: * With namespace:
7794: *
7795: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7796: *
7797: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7798: *
1.192 daniel 7799: * Returns the element name parsed
1.2 veillard 7800: */
7801:
1.123 daniel 7802: xmlChar *
1.69 daniel 7803: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7804: xmlChar *name;
7805: xmlChar *attname;
7806: xmlChar *attvalue;
7807: const xmlChar **atts = NULL;
1.72 daniel 7808: int nbatts = 0;
7809: int maxatts = 0;
7810: int i;
1.2 veillard 7811:
1.152 daniel 7812: if (RAW != '<') return(NULL);
1.40 daniel 7813: NEXT;
1.3 veillard 7814:
1.72 daniel 7815: name = xmlParseName(ctxt);
1.59 daniel 7816: if (name == NULL) {
7817: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7818: ctxt->sax->error(ctxt->userData,
1.59 daniel 7819: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7820: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7821: ctxt->wellFormed = 0;
1.180 daniel 7822: ctxt->disableSAX = 1;
1.83 daniel 7823: return(NULL);
1.50 daniel 7824: }
7825:
7826: /*
1.3 veillard 7827: * Now parse the attributes, it ends up with the ending
7828: *
7829: * (S Attribute)* S?
7830: */
1.42 daniel 7831: SKIP_BLANKS;
1.91 daniel 7832: GROW;
1.168 daniel 7833:
1.153 daniel 7834: while ((IS_CHAR(RAW)) &&
1.152 daniel 7835: (RAW != '>') &&
7836: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7837: const xmlChar *q = CUR_PTR;
1.91 daniel 7838: int cons = ctxt->input->consumed;
1.29 daniel 7839:
1.72 daniel 7840: attname = xmlParseAttribute(ctxt, &attvalue);
7841: if ((attname != NULL) && (attvalue != NULL)) {
7842: /*
1.98 daniel 7843: * [ WFC: Unique Att Spec ]
7844: * No attribute name may appear more than once in the same
7845: * start-tag or empty-element tag.
1.72 daniel 7846: */
7847: for (i = 0; i < nbatts;i += 2) {
7848: if (!xmlStrcmp(atts[i], attname)) {
7849: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7850: ctxt->sax->error(ctxt->userData,
7851: "Attribute %s redefined\n",
7852: attname);
1.72 daniel 7853: ctxt->wellFormed = 0;
1.180 daniel 7854: ctxt->disableSAX = 1;
1.123 daniel 7855: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7856: xmlFree(attname);
7857: xmlFree(attvalue);
1.98 daniel 7858: goto failed;
1.72 daniel 7859: }
7860: }
7861:
7862: /*
7863: * Add the pair to atts
7864: */
7865: if (atts == NULL) {
7866: maxatts = 10;
1.123 daniel 7867: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7868: if (atts == NULL) {
1.86 daniel 7869: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7870: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7871: return(NULL);
1.72 daniel 7872: }
1.127 daniel 7873: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7874: maxatts *= 2;
1.123 daniel 7875: atts = (const xmlChar **) xmlRealloc(atts,
7876: maxatts * sizeof(xmlChar *));
1.72 daniel 7877: if (atts == NULL) {
1.86 daniel 7878: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7879: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7880: return(NULL);
1.72 daniel 7881: }
7882: }
7883: atts[nbatts++] = attname;
7884: atts[nbatts++] = attvalue;
7885: atts[nbatts] = NULL;
7886: atts[nbatts + 1] = NULL;
1.176 daniel 7887: } else {
7888: if (attname != NULL)
7889: xmlFree(attname);
7890: if (attvalue != NULL)
7891: xmlFree(attvalue);
1.72 daniel 7892: }
7893:
1.116 daniel 7894: failed:
1.168 daniel 7895:
7896: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7897: break;
7898: if (!IS_BLANK(RAW)) {
7899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7900: ctxt->sax->error(ctxt->userData,
7901: "attributes construct error\n");
7902: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7903: ctxt->wellFormed = 0;
1.180 daniel 7904: ctxt->disableSAX = 1;
1.168 daniel 7905: }
1.42 daniel 7906: SKIP_BLANKS;
1.91 daniel 7907: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7908: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7909: ctxt->sax->error(ctxt->userData,
1.31 daniel 7910: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7911: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7912: ctxt->wellFormed = 0;
1.180 daniel 7913: ctxt->disableSAX = 1;
1.29 daniel 7914: break;
1.3 veillard 7915: }
1.91 daniel 7916: GROW;
1.3 veillard 7917: }
7918:
1.43 daniel 7919: /*
1.72 daniel 7920: * SAX: Start of Element !
1.43 daniel 7921: */
1.171 daniel 7922: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7923: (!ctxt->disableSAX))
1.74 daniel 7924: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7925:
1.72 daniel 7926: if (atts != NULL) {
1.123 daniel 7927: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7928: xmlFree(atts);
1.72 daniel 7929: }
1.83 daniel 7930: return(name);
1.3 veillard 7931: }
7932:
1.50 daniel 7933: /**
7934: * xmlParseEndTag:
7935: * @ctxt: an XML parser context
7936: *
7937: * parse an end of tag
1.27 daniel 7938: *
7939: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7940: *
7941: * With namespace
7942: *
1.72 daniel 7943: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7944: */
7945:
1.55 daniel 7946: void
1.140 daniel 7947: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7948: xmlChar *name;
1.140 daniel 7949: xmlChar *oldname;
1.7 veillard 7950:
1.91 daniel 7951: GROW;
1.152 daniel 7952: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7953: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7954: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7955: ctxt->wellFormed = 0;
1.180 daniel 7956: ctxt->disableSAX = 1;
1.123 daniel 7957: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7958: return;
7959: }
1.40 daniel 7960: SKIP(2);
1.7 veillard 7961:
1.72 daniel 7962: name = xmlParseName(ctxt);
1.7 veillard 7963:
7964: /*
7965: * We should definitely be at the ending "S? '>'" part
7966: */
1.91 daniel 7967: GROW;
1.42 daniel 7968: SKIP_BLANKS;
1.153 daniel 7969: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7970: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7971: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7972: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7973: ctxt->wellFormed = 0;
1.180 daniel 7974: ctxt->disableSAX = 1;
1.7 veillard 7975: } else
1.40 daniel 7976: NEXT;
1.7 veillard 7977:
1.72 daniel 7978: /*
1.98 daniel 7979: * [ WFC: Element Type Match ]
7980: * The Name in an element's end-tag must match the element type in the
7981: * start-tag.
7982: *
1.83 daniel 7983: */
1.147 daniel 7984: if ((name == NULL) || (ctxt->name == NULL) ||
7985: (xmlStrcmp(name, ctxt->name))) {
7986: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7987: if ((name != NULL) && (ctxt->name != NULL)) {
7988: ctxt->sax->error(ctxt->userData,
7989: "Opening and ending tag mismatch: %s and %s\n",
7990: ctxt->name, name);
7991: } else if (ctxt->name != NULL) {
7992: ctxt->sax->error(ctxt->userData,
7993: "Ending tag eror for: %s\n", ctxt->name);
7994: } else {
7995: ctxt->sax->error(ctxt->userData,
7996: "Ending tag error: internal error ???\n");
7997: }
1.122 daniel 7998:
1.147 daniel 7999: }
1.123 daniel 8000: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 8001: ctxt->wellFormed = 0;
1.180 daniel 8002: ctxt->disableSAX = 1;
1.83 daniel 8003: }
8004:
8005: /*
1.72 daniel 8006: * SAX: End of Tag
8007: */
1.171 daniel 8008: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8009: (!ctxt->disableSAX))
1.74 daniel 8010: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 8011:
8012: if (name != NULL)
1.119 daniel 8013: xmlFree(name);
1.140 daniel 8014: oldname = namePop(ctxt);
1.176 daniel 8015: spacePop(ctxt);
1.140 daniel 8016: if (oldname != NULL) {
8017: #ifdef DEBUG_STACK
8018: fprintf(stderr,"Close: popped %s\n", oldname);
8019: #endif
8020: xmlFree(oldname);
8021: }
1.7 veillard 8022: return;
8023: }
8024:
1.50 daniel 8025: /**
8026: * xmlParseCDSect:
8027: * @ctxt: an XML parser context
8028: *
8029: * Parse escaped pure raw content.
1.29 daniel 8030: *
8031: * [18] CDSect ::= CDStart CData CDEnd
8032: *
8033: * [19] CDStart ::= '<![CDATA['
8034: *
8035: * [20] Data ::= (Char* - (Char* ']]>' Char*))
8036: *
8037: * [21] CDEnd ::= ']]>'
1.3 veillard 8038: */
1.55 daniel 8039: void
8040: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 8041: xmlChar *buf = NULL;
8042: int len = 0;
1.140 daniel 8043: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 8044: int r, rl;
8045: int s, sl;
8046: int cur, l;
1.3 veillard 8047:
1.106 daniel 8048: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 8049: (NXT(2) == '[') && (NXT(3) == 'C') &&
8050: (NXT(4) == 'D') && (NXT(5) == 'A') &&
8051: (NXT(6) == 'T') && (NXT(7) == 'A') &&
8052: (NXT(8) == '[')) {
8053: SKIP(9);
1.29 daniel 8054: } else
1.45 daniel 8055: return;
1.109 daniel 8056:
8057: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 8058: r = CUR_CHAR(rl);
8059: if (!IS_CHAR(r)) {
1.55 daniel 8060: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8061: ctxt->sax->error(ctxt->userData,
1.135 daniel 8062: "CData section not finished\n");
1.59 daniel 8063: ctxt->wellFormed = 0;
1.180 daniel 8064: ctxt->disableSAX = 1;
1.123 daniel 8065: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 8066: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 8067: return;
1.3 veillard 8068: }
1.152 daniel 8069: NEXTL(rl);
8070: s = CUR_CHAR(sl);
8071: if (!IS_CHAR(s)) {
1.55 daniel 8072: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8073: ctxt->sax->error(ctxt->userData,
1.135 daniel 8074: "CData section not finished\n");
1.123 daniel 8075: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 8076: ctxt->wellFormed = 0;
1.180 daniel 8077: ctxt->disableSAX = 1;
1.109 daniel 8078: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 8079: return;
1.3 veillard 8080: }
1.152 daniel 8081: NEXTL(sl);
8082: cur = CUR_CHAR(l);
1.135 daniel 8083: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8084: if (buf == NULL) {
8085: fprintf(stderr, "malloc of %d byte failed\n", size);
8086: return;
8087: }
1.108 veillard 8088: while (IS_CHAR(cur) &&
1.110 daniel 8089: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 8090: if (len + 5 >= size) {
1.135 daniel 8091: size *= 2;
1.204 veillard 8092: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8093: if (buf == NULL) {
8094: fprintf(stderr, "realloc of %d byte failed\n", size);
8095: return;
8096: }
8097: }
1.152 daniel 8098: COPY_BUF(rl,buf,len,r);
1.110 daniel 8099: r = s;
1.152 daniel 8100: rl = sl;
1.110 daniel 8101: s = cur;
1.152 daniel 8102: sl = l;
8103: NEXTL(l);
8104: cur = CUR_CHAR(l);
1.3 veillard 8105: }
1.135 daniel 8106: buf[len] = 0;
1.109 daniel 8107: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 8108: if (cur != '>') {
1.55 daniel 8109: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8110: ctxt->sax->error(ctxt->userData,
1.135 daniel 8111: "CData section not finished\n%.50s\n", buf);
1.123 daniel 8112: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 8113: ctxt->wellFormed = 0;
1.180 daniel 8114: ctxt->disableSAX = 1;
1.135 daniel 8115: xmlFree(buf);
1.45 daniel 8116: return;
1.3 veillard 8117: }
1.152 daniel 8118: NEXTL(l);
1.16 daniel 8119:
1.45 daniel 8120: /*
1.135 daniel 8121: * Ok the buffer is to be consumed as cdata.
1.45 daniel 8122: */
1.171 daniel 8123: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 8124: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 8125: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 8126: }
1.135 daniel 8127: xmlFree(buf);
1.2 veillard 8128: }
8129:
1.50 daniel 8130: /**
8131: * xmlParseContent:
8132: * @ctxt: an XML parser context
8133: *
8134: * Parse a content:
1.2 veillard 8135: *
1.27 daniel 8136: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 8137: */
8138:
1.55 daniel 8139: void
8140: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 8141: GROW;
1.176 daniel 8142: while (((RAW != 0) || (ctxt->token != 0)) &&
8143: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 8144: const xmlChar *test = CUR_PTR;
1.91 daniel 8145: int cons = ctxt->input->consumed;
1.123 daniel 8146: xmlChar tok = ctxt->token;
1.27 daniel 8147:
8148: /*
1.152 daniel 8149: * Handle possible processed charrefs.
8150: */
8151: if (ctxt->token != 0) {
8152: xmlParseCharData(ctxt, 0);
8153: }
8154: /*
1.27 daniel 8155: * First case : a Processing Instruction.
8156: */
1.152 daniel 8157: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 8158: xmlParsePI(ctxt);
8159: }
1.72 daniel 8160:
1.27 daniel 8161: /*
8162: * Second case : a CDSection
8163: */
1.152 daniel 8164: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8165: (NXT(2) == '[') && (NXT(3) == 'C') &&
8166: (NXT(4) == 'D') && (NXT(5) == 'A') &&
8167: (NXT(6) == 'T') && (NXT(7) == 'A') &&
8168: (NXT(8) == '[')) {
1.45 daniel 8169: xmlParseCDSect(ctxt);
1.27 daniel 8170: }
1.72 daniel 8171:
1.27 daniel 8172: /*
8173: * Third case : a comment
8174: */
1.152 daniel 8175: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8176: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 8177: xmlParseComment(ctxt);
1.97 daniel 8178: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 8179: }
1.72 daniel 8180:
1.27 daniel 8181: /*
8182: * Fourth case : a sub-element.
8183: */
1.152 daniel 8184: else if (RAW == '<') {
1.72 daniel 8185: xmlParseElement(ctxt);
1.45 daniel 8186: }
1.72 daniel 8187:
1.45 daniel 8188: /*
1.50 daniel 8189: * Fifth case : a reference. If if has not been resolved,
8190: * parsing returns it's Name, create the node
1.45 daniel 8191: */
1.97 daniel 8192:
1.152 daniel 8193: else if (RAW == '&') {
1.77 daniel 8194: xmlParseReference(ctxt);
1.27 daniel 8195: }
1.72 daniel 8196:
1.27 daniel 8197: /*
8198: * Last case, text. Note that References are handled directly.
8199: */
8200: else {
1.45 daniel 8201: xmlParseCharData(ctxt, 0);
1.3 veillard 8202: }
1.14 veillard 8203:
1.91 daniel 8204: GROW;
1.14 veillard 8205: /*
1.45 daniel 8206: * Pop-up of finished entities.
1.14 veillard 8207: */
1.152 daniel 8208: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 8209: xmlPopInput(ctxt);
1.135 daniel 8210: SHRINK;
1.45 daniel 8211:
1.113 daniel 8212: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8213: (tok == ctxt->token)) {
1.55 daniel 8214: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8215: ctxt->sax->error(ctxt->userData,
1.59 daniel 8216: "detected an error in element content\n");
1.123 daniel 8217: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 8218: ctxt->wellFormed = 0;
1.180 daniel 8219: ctxt->disableSAX = 1;
1.29 daniel 8220: break;
8221: }
1.3 veillard 8222: }
1.2 veillard 8223: }
8224:
1.50 daniel 8225: /**
8226: * xmlParseElement:
8227: * @ctxt: an XML parser context
8228: *
8229: * parse an XML element, this is highly recursive
1.26 daniel 8230: *
8231: * [39] element ::= EmptyElemTag | STag content ETag
8232: *
1.98 daniel 8233: * [ WFC: Element Type Match ]
8234: * The Name in an element's end-tag must match the element type in the
8235: * start-tag.
8236: *
8237: * [ VC: Element Valid ]
1.117 daniel 8238: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 8239: * where the Name matches the element type and one of the following holds:
8240: * - The declaration matches EMPTY and the element has no content.
8241: * - The declaration matches children and the sequence of child elements
8242: * belongs to the language generated by the regular expression in the
8243: * content model, with optional white space (characters matching the
8244: * nonterminal S) between each pair of child elements.
8245: * - The declaration matches Mixed and the content consists of character
8246: * data and child elements whose types match names in the content model.
8247: * - The declaration matches ANY, and the types of any child elements have
8248: * been declared.
1.2 veillard 8249: */
1.26 daniel 8250:
1.72 daniel 8251: void
1.69 daniel 8252: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 8253: const xmlChar *openTag = CUR_PTR;
8254: xmlChar *name;
1.140 daniel 8255: xmlChar *oldname;
1.32 daniel 8256: xmlParserNodeInfo node_info;
1.118 daniel 8257: xmlNodePtr ret;
1.2 veillard 8258:
1.32 daniel 8259: /* Capture start position */
1.118 daniel 8260: if (ctxt->record_info) {
8261: node_info.begin_pos = ctxt->input->consumed +
8262: (CUR_PTR - ctxt->input->base);
8263: node_info.begin_line = ctxt->input->line;
8264: }
1.32 daniel 8265:
1.176 daniel 8266: if (ctxt->spaceNr == 0)
8267: spacePush(ctxt, -1);
8268: else
8269: spacePush(ctxt, *ctxt->space);
8270:
1.83 daniel 8271: name = xmlParseStartTag(ctxt);
8272: if (name == NULL) {
1.176 daniel 8273: spacePop(ctxt);
1.83 daniel 8274: return;
8275: }
1.140 daniel 8276: namePush(ctxt, name);
1.118 daniel 8277: ret = ctxt->node;
1.2 veillard 8278:
8279: /*
1.99 daniel 8280: * [ VC: Root Element Type ]
8281: * The Name in the document type declaration must match the element
8282: * type of the root element.
8283: */
1.105 daniel 8284: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 8285: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 8286: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 8287:
8288: /*
1.2 veillard 8289: * Check for an Empty Element.
8290: */
1.152 daniel 8291: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 8292: SKIP(2);
1.171 daniel 8293: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8294: (!ctxt->disableSAX))
1.83 daniel 8295: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 8296: oldname = namePop(ctxt);
1.176 daniel 8297: spacePop(ctxt);
1.140 daniel 8298: if (oldname != NULL) {
8299: #ifdef DEBUG_STACK
8300: fprintf(stderr,"Close: popped %s\n", oldname);
8301: #endif
8302: xmlFree(oldname);
1.211 veillard 8303: }
8304: if ( ret != NULL && ctxt->record_info ) {
8305: node_info.end_pos = ctxt->input->consumed +
8306: (CUR_PTR - ctxt->input->base);
8307: node_info.end_line = ctxt->input->line;
8308: node_info.node = ret;
8309: xmlParserAddNodeInfo(ctxt, &node_info);
1.140 daniel 8310: }
1.72 daniel 8311: return;
1.2 veillard 8312: }
1.152 daniel 8313: if (RAW == '>') {
1.91 daniel 8314: NEXT;
8315: } else {
1.55 daniel 8316: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8317: ctxt->sax->error(ctxt->userData,
8318: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 8319: openTag);
1.59 daniel 8320: ctxt->wellFormed = 0;
1.180 daniel 8321: ctxt->disableSAX = 1;
1.123 daniel 8322: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 8323:
8324: /*
8325: * end of parsing of this node.
8326: */
8327: nodePop(ctxt);
1.140 daniel 8328: oldname = namePop(ctxt);
1.176 daniel 8329: spacePop(ctxt);
1.140 daniel 8330: if (oldname != NULL) {
8331: #ifdef DEBUG_STACK
8332: fprintf(stderr,"Close: popped %s\n", oldname);
8333: #endif
8334: xmlFree(oldname);
8335: }
1.118 daniel 8336:
8337: /*
8338: * Capture end position and add node
8339: */
8340: if ( ret != NULL && ctxt->record_info ) {
8341: node_info.end_pos = ctxt->input->consumed +
8342: (CUR_PTR - ctxt->input->base);
8343: node_info.end_line = ctxt->input->line;
8344: node_info.node = ret;
8345: xmlParserAddNodeInfo(ctxt, &node_info);
8346: }
1.72 daniel 8347: return;
1.2 veillard 8348: }
8349:
8350: /*
8351: * Parse the content of the element:
8352: */
1.45 daniel 8353: xmlParseContent(ctxt);
1.153 daniel 8354: if (!IS_CHAR(RAW)) {
1.55 daniel 8355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8356: ctxt->sax->error(ctxt->userData,
1.57 daniel 8357: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 8358: ctxt->wellFormed = 0;
1.180 daniel 8359: ctxt->disableSAX = 1;
1.123 daniel 8360: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 8361:
8362: /*
8363: * end of parsing of this node.
8364: */
8365: nodePop(ctxt);
1.140 daniel 8366: oldname = namePop(ctxt);
1.176 daniel 8367: spacePop(ctxt);
1.140 daniel 8368: if (oldname != NULL) {
8369: #ifdef DEBUG_STACK
8370: fprintf(stderr,"Close: popped %s\n", oldname);
8371: #endif
8372: xmlFree(oldname);
8373: }
1.72 daniel 8374: return;
1.2 veillard 8375: }
8376:
8377: /*
1.27 daniel 8378: * parse the end of tag: '</' should be here.
1.2 veillard 8379: */
1.140 daniel 8380: xmlParseEndTag(ctxt);
1.118 daniel 8381:
8382: /*
8383: * Capture end position and add node
8384: */
8385: if ( ret != NULL && ctxt->record_info ) {
8386: node_info.end_pos = ctxt->input->consumed +
8387: (CUR_PTR - ctxt->input->base);
8388: node_info.end_line = ctxt->input->line;
8389: node_info.node = ret;
8390: xmlParserAddNodeInfo(ctxt, &node_info);
8391: }
1.2 veillard 8392: }
8393:
1.50 daniel 8394: /**
8395: * xmlParseVersionNum:
8396: * @ctxt: an XML parser context
8397: *
8398: * parse the XML version value.
1.29 daniel 8399: *
8400: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 8401: *
8402: * Returns the string giving the XML version number, or NULL
1.29 daniel 8403: */
1.123 daniel 8404: xmlChar *
1.55 daniel 8405: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 8406: xmlChar *buf = NULL;
8407: int len = 0;
8408: int size = 10;
8409: xmlChar cur;
1.29 daniel 8410:
1.135 daniel 8411: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8412: if (buf == NULL) {
8413: fprintf(stderr, "malloc of %d byte failed\n", size);
8414: return(NULL);
8415: }
8416: cur = CUR;
1.152 daniel 8417: while (((cur >= 'a') && (cur <= 'z')) ||
8418: ((cur >= 'A') && (cur <= 'Z')) ||
8419: ((cur >= '0') && (cur <= '9')) ||
8420: (cur == '_') || (cur == '.') ||
8421: (cur == ':') || (cur == '-')) {
1.135 daniel 8422: if (len + 1 >= size) {
8423: size *= 2;
1.204 veillard 8424: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8425: if (buf == NULL) {
8426: fprintf(stderr, "realloc of %d byte failed\n", size);
8427: return(NULL);
8428: }
8429: }
8430: buf[len++] = cur;
8431: NEXT;
8432: cur=CUR;
8433: }
8434: buf[len] = 0;
8435: return(buf);
1.29 daniel 8436: }
8437:
1.50 daniel 8438: /**
8439: * xmlParseVersionInfo:
8440: * @ctxt: an XML parser context
8441: *
8442: * parse the XML version.
1.29 daniel 8443: *
8444: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8445: *
8446: * [25] Eq ::= S? '=' S?
1.50 daniel 8447: *
1.68 daniel 8448: * Returns the version string, e.g. "1.0"
1.29 daniel 8449: */
8450:
1.123 daniel 8451: xmlChar *
1.55 daniel 8452: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 8453: xmlChar *version = NULL;
8454: const xmlChar *q;
1.29 daniel 8455:
1.152 daniel 8456: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 8457: (NXT(2) == 'r') && (NXT(3) == 's') &&
8458: (NXT(4) == 'i') && (NXT(5) == 'o') &&
8459: (NXT(6) == 'n')) {
8460: SKIP(7);
1.42 daniel 8461: SKIP_BLANKS;
1.152 daniel 8462: if (RAW != '=') {
1.55 daniel 8463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8464: ctxt->sax->error(ctxt->userData,
8465: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 8466: ctxt->wellFormed = 0;
1.180 daniel 8467: ctxt->disableSAX = 1;
1.123 daniel 8468: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8469: return(NULL);
8470: }
1.40 daniel 8471: NEXT;
1.42 daniel 8472: SKIP_BLANKS;
1.152 daniel 8473: if (RAW == '"') {
1.40 daniel 8474: NEXT;
8475: q = CUR_PTR;
1.29 daniel 8476: version = xmlParseVersionNum(ctxt);
1.152 daniel 8477: if (RAW != '"') {
1.55 daniel 8478: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8479: ctxt->sax->error(ctxt->userData,
8480: "String not closed\n%.50s\n", q);
1.59 daniel 8481: ctxt->wellFormed = 0;
1.180 daniel 8482: ctxt->disableSAX = 1;
1.123 daniel 8483: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8484: } else
1.40 daniel 8485: NEXT;
1.152 daniel 8486: } else if (RAW == '\''){
1.40 daniel 8487: NEXT;
8488: q = CUR_PTR;
1.29 daniel 8489: version = xmlParseVersionNum(ctxt);
1.152 daniel 8490: if (RAW != '\'') {
1.55 daniel 8491: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8492: ctxt->sax->error(ctxt->userData,
8493: "String not closed\n%.50s\n", q);
1.123 daniel 8494: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8495: ctxt->wellFormed = 0;
1.180 daniel 8496: ctxt->disableSAX = 1;
1.55 daniel 8497: } else
1.40 daniel 8498: NEXT;
1.31 daniel 8499: } else {
1.55 daniel 8500: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8501: ctxt->sax->error(ctxt->userData,
1.59 daniel 8502: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 8503: ctxt->wellFormed = 0;
1.180 daniel 8504: ctxt->disableSAX = 1;
1.123 daniel 8505: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8506: }
8507: }
8508: return(version);
8509: }
8510:
1.50 daniel 8511: /**
8512: * xmlParseEncName:
8513: * @ctxt: an XML parser context
8514: *
8515: * parse the XML encoding name
1.29 daniel 8516: *
8517: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 8518: *
1.68 daniel 8519: * Returns the encoding name value or NULL
1.29 daniel 8520: */
1.123 daniel 8521: xmlChar *
1.55 daniel 8522: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 8523: xmlChar *buf = NULL;
8524: int len = 0;
8525: int size = 10;
8526: xmlChar cur;
1.29 daniel 8527:
1.135 daniel 8528: cur = CUR;
8529: if (((cur >= 'a') && (cur <= 'z')) ||
8530: ((cur >= 'A') && (cur <= 'Z'))) {
8531: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8532: if (buf == NULL) {
8533: fprintf(stderr, "malloc of %d byte failed\n", size);
8534: return(NULL);
8535: }
8536:
8537: buf[len++] = cur;
1.40 daniel 8538: NEXT;
1.135 daniel 8539: cur = CUR;
1.152 daniel 8540: while (((cur >= 'a') && (cur <= 'z')) ||
8541: ((cur >= 'A') && (cur <= 'Z')) ||
8542: ((cur >= '0') && (cur <= '9')) ||
8543: (cur == '.') || (cur == '_') ||
8544: (cur == '-')) {
1.135 daniel 8545: if (len + 1 >= size) {
8546: size *= 2;
1.204 veillard 8547: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8548: if (buf == NULL) {
8549: fprintf(stderr, "realloc of %d byte failed\n", size);
8550: return(NULL);
8551: }
8552: }
8553: buf[len++] = cur;
8554: NEXT;
8555: cur = CUR;
8556: if (cur == 0) {
8557: SHRINK;
8558: GROW;
8559: cur = CUR;
8560: }
8561: }
8562: buf[len] = 0;
1.29 daniel 8563: } else {
1.55 daniel 8564: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8565: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 8566: ctxt->wellFormed = 0;
1.180 daniel 8567: ctxt->disableSAX = 1;
1.123 daniel 8568: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 8569: }
1.135 daniel 8570: return(buf);
1.29 daniel 8571: }
8572:
1.50 daniel 8573: /**
8574: * xmlParseEncodingDecl:
8575: * @ctxt: an XML parser context
8576: *
8577: * parse the XML encoding declaration
1.29 daniel 8578: *
8579: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 8580: *
8581: * TODO: this should setup the conversion filters.
8582: *
1.68 daniel 8583: * Returns the encoding value or NULL
1.29 daniel 8584: */
8585:
1.123 daniel 8586: xmlChar *
1.55 daniel 8587: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8588: xmlChar *encoding = NULL;
8589: const xmlChar *q;
1.29 daniel 8590:
1.42 daniel 8591: SKIP_BLANKS;
1.152 daniel 8592: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 8593: (NXT(2) == 'c') && (NXT(3) == 'o') &&
8594: (NXT(4) == 'd') && (NXT(5) == 'i') &&
8595: (NXT(6) == 'n') && (NXT(7) == 'g')) {
8596: SKIP(8);
1.42 daniel 8597: SKIP_BLANKS;
1.152 daniel 8598: if (RAW != '=') {
1.55 daniel 8599: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8600: ctxt->sax->error(ctxt->userData,
8601: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 8602: ctxt->wellFormed = 0;
1.180 daniel 8603: ctxt->disableSAX = 1;
1.123 daniel 8604: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8605: return(NULL);
8606: }
1.40 daniel 8607: NEXT;
1.42 daniel 8608: SKIP_BLANKS;
1.152 daniel 8609: if (RAW == '"') {
1.40 daniel 8610: NEXT;
8611: q = CUR_PTR;
1.29 daniel 8612: encoding = xmlParseEncName(ctxt);
1.152 daniel 8613: if (RAW != '"') {
1.55 daniel 8614: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8615: ctxt->sax->error(ctxt->userData,
8616: "String not closed\n%.50s\n", q);
1.59 daniel 8617: ctxt->wellFormed = 0;
1.180 daniel 8618: ctxt->disableSAX = 1;
1.123 daniel 8619: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8620: } else
1.40 daniel 8621: NEXT;
1.152 daniel 8622: } else if (RAW == '\''){
1.40 daniel 8623: NEXT;
8624: q = CUR_PTR;
1.29 daniel 8625: encoding = xmlParseEncName(ctxt);
1.152 daniel 8626: if (RAW != '\'') {
1.55 daniel 8627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8628: ctxt->sax->error(ctxt->userData,
8629: "String not closed\n%.50s\n", q);
1.59 daniel 8630: ctxt->wellFormed = 0;
1.180 daniel 8631: ctxt->disableSAX = 1;
1.123 daniel 8632: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8633: } else
1.40 daniel 8634: NEXT;
1.152 daniel 8635: } else if (RAW == '"'){
1.55 daniel 8636: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8637: ctxt->sax->error(ctxt->userData,
1.59 daniel 8638: "xmlParseEncodingDecl : expected ' or \"\n");
8639: ctxt->wellFormed = 0;
1.180 daniel 8640: ctxt->disableSAX = 1;
1.123 daniel 8641: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8642: }
1.193 daniel 8643: if (encoding != NULL) {
8644: xmlCharEncoding enc;
8645: xmlCharEncodingHandlerPtr handler;
8646:
1.195 daniel 8647: if (ctxt->input->encoding != NULL)
8648: xmlFree((xmlChar *) ctxt->input->encoding);
8649: ctxt->input->encoding = encoding;
8650:
1.193 daniel 8651: enc = xmlParseCharEncoding((const char *) encoding);
8652: /*
8653: * registered set of known encodings
8654: */
8655: if (enc != XML_CHAR_ENCODING_ERROR) {
8656: xmlSwitchEncoding(ctxt, enc);
8657: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8658: xmlFree(encoding);
8659: return(NULL);
8660: }
8661: } else {
8662: /*
8663: * fallback for unknown encodings
8664: */
8665: handler = xmlFindCharEncodingHandler((const char *) encoding);
8666: if (handler != NULL) {
8667: xmlSwitchToEncoding(ctxt, handler);
8668: } else {
8669: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.208 veillard 8670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8671: ctxt->sax->error(ctxt->userData,
8672: "Unsupported encoding %s\n", encoding);
1.193 daniel 8673: return(NULL);
8674: }
8675: }
8676: }
1.29 daniel 8677: }
8678: return(encoding);
8679: }
8680:
1.50 daniel 8681: /**
8682: * xmlParseSDDecl:
8683: * @ctxt: an XML parser context
8684: *
8685: * parse the XML standalone declaration
1.29 daniel 8686: *
8687: * [32] SDDecl ::= S 'standalone' Eq
8688: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8689: *
8690: * [ VC: Standalone Document Declaration ]
8691: * TODO The standalone document declaration must have the value "no"
8692: * if any external markup declarations contain declarations of:
8693: * - attributes with default values, if elements to which these
8694: * attributes apply appear in the document without specifications
8695: * of values for these attributes, or
8696: * - entities (other than amp, lt, gt, apos, quot), if references
8697: * to those entities appear in the document, or
8698: * - attributes with values subject to normalization, where the
8699: * attribute appears in the document with a value which will change
8700: * as a result of normalization, or
8701: * - element types with element content, if white space occurs directly
8702: * within any instance of those types.
1.68 daniel 8703: *
8704: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8705: */
8706:
1.55 daniel 8707: int
8708: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8709: int standalone = -1;
8710:
1.42 daniel 8711: SKIP_BLANKS;
1.152 daniel 8712: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8713: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8714: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8715: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8716: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8717: SKIP(10);
1.81 daniel 8718: SKIP_BLANKS;
1.152 daniel 8719: if (RAW != '=') {
1.55 daniel 8720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8721: ctxt->sax->error(ctxt->userData,
1.59 daniel 8722: "XML standalone declaration : expected '='\n");
1.123 daniel 8723: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8724: ctxt->wellFormed = 0;
1.180 daniel 8725: ctxt->disableSAX = 1;
1.32 daniel 8726: return(standalone);
8727: }
1.40 daniel 8728: NEXT;
1.42 daniel 8729: SKIP_BLANKS;
1.152 daniel 8730: if (RAW == '\''){
1.40 daniel 8731: NEXT;
1.152 daniel 8732: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8733: standalone = 0;
1.40 daniel 8734: SKIP(2);
1.152 daniel 8735: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8736: (NXT(2) == 's')) {
1.29 daniel 8737: standalone = 1;
1.40 daniel 8738: SKIP(3);
1.29 daniel 8739: } else {
1.55 daniel 8740: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8741: ctxt->sax->error(ctxt->userData,
8742: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8743: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8744: ctxt->wellFormed = 0;
1.180 daniel 8745: ctxt->disableSAX = 1;
1.29 daniel 8746: }
1.152 daniel 8747: if (RAW != '\'') {
1.55 daniel 8748: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8749: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8750: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8751: ctxt->wellFormed = 0;
1.180 daniel 8752: ctxt->disableSAX = 1;
1.55 daniel 8753: } else
1.40 daniel 8754: NEXT;
1.152 daniel 8755: } else if (RAW == '"'){
1.40 daniel 8756: NEXT;
1.152 daniel 8757: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8758: standalone = 0;
1.40 daniel 8759: SKIP(2);
1.152 daniel 8760: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8761: (NXT(2) == 's')) {
1.29 daniel 8762: standalone = 1;
1.40 daniel 8763: SKIP(3);
1.29 daniel 8764: } else {
1.55 daniel 8765: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8766: ctxt->sax->error(ctxt->userData,
1.59 daniel 8767: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8768: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8769: ctxt->wellFormed = 0;
1.180 daniel 8770: ctxt->disableSAX = 1;
1.29 daniel 8771: }
1.152 daniel 8772: if (RAW != '"') {
1.55 daniel 8773: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8774: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8775: ctxt->wellFormed = 0;
1.180 daniel 8776: ctxt->disableSAX = 1;
1.123 daniel 8777: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8778: } else
1.40 daniel 8779: NEXT;
1.37 daniel 8780: } else {
1.55 daniel 8781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8782: ctxt->sax->error(ctxt->userData,
8783: "Standalone value not found\n");
1.59 daniel 8784: ctxt->wellFormed = 0;
1.180 daniel 8785: ctxt->disableSAX = 1;
1.123 daniel 8786: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8787: }
1.29 daniel 8788: }
8789: return(standalone);
8790: }
8791:
1.50 daniel 8792: /**
8793: * xmlParseXMLDecl:
8794: * @ctxt: an XML parser context
8795: *
8796: * parse an XML declaration header
1.29 daniel 8797: *
8798: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8799: */
8800:
1.55 daniel 8801: void
8802: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8803: xmlChar *version;
1.1 veillard 8804:
8805: /*
1.19 daniel 8806: * We know that '<?xml' is here.
1.1 veillard 8807: */
1.40 daniel 8808: SKIP(5);
1.1 veillard 8809:
1.153 daniel 8810: if (!IS_BLANK(RAW)) {
1.59 daniel 8811: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8812: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8813: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8814: ctxt->wellFormed = 0;
1.180 daniel 8815: ctxt->disableSAX = 1;
1.59 daniel 8816: }
1.42 daniel 8817: SKIP_BLANKS;
1.1 veillard 8818:
8819: /*
1.29 daniel 8820: * We should have the VersionInfo here.
1.1 veillard 8821: */
1.29 daniel 8822: version = xmlParseVersionInfo(ctxt);
8823: if (version == NULL)
1.45 daniel 8824: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8825: ctxt->version = xmlStrdup(version);
1.119 daniel 8826: xmlFree(version);
1.29 daniel 8827:
8828: /*
8829: * We may have the encoding declaration
8830: */
1.153 daniel 8831: if (!IS_BLANK(RAW)) {
1.152 daniel 8832: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8833: SKIP(2);
8834: return;
8835: }
8836: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8837: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8838: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8839: ctxt->wellFormed = 0;
1.180 daniel 8840: ctxt->disableSAX = 1;
1.59 daniel 8841: }
1.195 daniel 8842: xmlParseEncodingDecl(ctxt);
1.193 daniel 8843: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8844: /*
8845: * The XML REC instructs us to stop parsing right here
8846: */
8847: return;
8848: }
1.1 veillard 8849:
8850: /*
1.29 daniel 8851: * We may have the standalone status.
1.1 veillard 8852: */
1.164 daniel 8853: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8854: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8855: SKIP(2);
8856: return;
8857: }
8858: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8859: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8860: ctxt->wellFormed = 0;
1.180 daniel 8861: ctxt->disableSAX = 1;
1.123 daniel 8862: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8863: }
8864: SKIP_BLANKS;
1.167 daniel 8865: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8866:
1.42 daniel 8867: SKIP_BLANKS;
1.152 daniel 8868: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8869: SKIP(2);
1.152 daniel 8870: } else if (RAW == '>') {
1.31 daniel 8871: /* Deprecated old WD ... */
1.55 daniel 8872: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8873: ctxt->sax->error(ctxt->userData,
8874: "XML declaration must end-up with '?>'\n");
1.59 daniel 8875: ctxt->wellFormed = 0;
1.180 daniel 8876: ctxt->disableSAX = 1;
1.123 daniel 8877: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8878: NEXT;
1.29 daniel 8879: } else {
1.55 daniel 8880: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8881: ctxt->sax->error(ctxt->userData,
8882: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8883: ctxt->wellFormed = 0;
1.180 daniel 8884: ctxt->disableSAX = 1;
1.123 daniel 8885: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8886: MOVETO_ENDTAG(CUR_PTR);
8887: NEXT;
1.29 daniel 8888: }
1.1 veillard 8889: }
8890:
1.50 daniel 8891: /**
8892: * xmlParseMisc:
8893: * @ctxt: an XML parser context
8894: *
8895: * parse an XML Misc* optionnal field.
1.21 daniel 8896: *
1.22 daniel 8897: * [27] Misc ::= Comment | PI | S
1.1 veillard 8898: */
8899:
1.55 daniel 8900: void
8901: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8902: while (((RAW == '<') && (NXT(1) == '?')) ||
8903: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8904: (NXT(2) == '-') && (NXT(3) == '-')) ||
8905: IS_BLANK(CUR)) {
1.152 daniel 8906: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8907: xmlParsePI(ctxt);
1.40 daniel 8908: } else if (IS_BLANK(CUR)) {
8909: NEXT;
1.1 veillard 8910: } else
1.114 daniel 8911: xmlParseComment(ctxt);
1.1 veillard 8912: }
8913: }
8914:
1.50 daniel 8915: /**
1.181 daniel 8916: * xmlParseDocument:
1.50 daniel 8917: * @ctxt: an XML parser context
8918: *
8919: * parse an XML document (and build a tree if using the standard SAX
8920: * interface).
1.21 daniel 8921: *
1.22 daniel 8922: * [1] document ::= prolog element Misc*
1.29 daniel 8923: *
8924: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8925: *
1.68 daniel 8926: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8927: * as a result of the parsing.
1.1 veillard 8928: */
8929:
1.55 daniel 8930: int
8931: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8932: xmlChar start[4];
8933: xmlCharEncoding enc;
8934:
1.45 daniel 8935: xmlDefaultSAXHandlerInit();
8936:
1.91 daniel 8937: GROW;
8938:
1.14 veillard 8939: /*
1.44 daniel 8940: * SAX: beginning of the document processing.
8941: */
1.72 daniel 8942: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8943: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8944:
1.156 daniel 8945: /*
8946: * Get the 4 first bytes and decode the charset
8947: * if enc != XML_CHAR_ENCODING_NONE
8948: * plug some encoding conversion routines.
8949: */
8950: start[0] = RAW;
8951: start[1] = NXT(1);
8952: start[2] = NXT(2);
8953: start[3] = NXT(3);
8954: enc = xmlDetectCharEncoding(start, 4);
8955: if (enc != XML_CHAR_ENCODING_NONE) {
8956: xmlSwitchEncoding(ctxt, enc);
8957: }
8958:
1.1 veillard 8959:
1.59 daniel 8960: if (CUR == 0) {
8961: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8962: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8963: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8964: ctxt->wellFormed = 0;
1.180 daniel 8965: ctxt->disableSAX = 1;
1.59 daniel 8966: }
1.1 veillard 8967:
8968: /*
8969: * Check for the XMLDecl in the Prolog.
8970: */
1.91 daniel 8971: GROW;
1.152 daniel 8972: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8973: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8974: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 8975:
8976: /*
8977: * Note that we will switch encoding on the fly.
8978: */
1.19 daniel 8979: xmlParseXMLDecl(ctxt);
1.193 daniel 8980: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8981: /*
8982: * The XML REC instructs us to stop parsing right here
8983: */
8984: return(-1);
8985: }
1.167 daniel 8986: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8987: SKIP_BLANKS;
1.1 veillard 8988: } else {
1.72 daniel 8989: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8990: }
1.171 daniel 8991: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8992: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8993:
8994: /*
8995: * The Misc part of the Prolog
8996: */
1.91 daniel 8997: GROW;
1.16 daniel 8998: xmlParseMisc(ctxt);
1.1 veillard 8999:
9000: /*
1.29 daniel 9001: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 9002: * (doctypedecl Misc*)?
9003: */
1.91 daniel 9004: GROW;
1.152 daniel 9005: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 9006: (NXT(2) == 'D') && (NXT(3) == 'O') &&
9007: (NXT(4) == 'C') && (NXT(5) == 'T') &&
9008: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
9009: (NXT(8) == 'E')) {
1.165 daniel 9010:
1.166 daniel 9011: ctxt->inSubset = 1;
1.22 daniel 9012: xmlParseDocTypeDecl(ctxt);
1.152 daniel 9013: if (RAW == '[') {
1.140 daniel 9014: ctxt->instate = XML_PARSER_DTD;
9015: xmlParseInternalSubset(ctxt);
9016: }
1.165 daniel 9017:
9018: /*
9019: * Create and update the external subset.
9020: */
1.166 daniel 9021: ctxt->inSubset = 2;
1.171 daniel 9022: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9023: (!ctxt->disableSAX))
1.165 daniel 9024: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9025: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 9026: ctxt->inSubset = 0;
1.165 daniel 9027:
9028:
1.96 daniel 9029: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 9030: xmlParseMisc(ctxt);
1.21 daniel 9031: }
9032:
9033: /*
9034: * Time to start parsing the tree itself
1.1 veillard 9035: */
1.91 daniel 9036: GROW;
1.152 daniel 9037: if (RAW != '<') {
1.59 daniel 9038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 9039: ctxt->sax->error(ctxt->userData,
1.151 daniel 9040: "Start tag expected, '<' not found\n");
1.140 daniel 9041: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 9042: ctxt->wellFormed = 0;
1.180 daniel 9043: ctxt->disableSAX = 1;
1.140 daniel 9044: ctxt->instate = XML_PARSER_EOF;
9045: } else {
9046: ctxt->instate = XML_PARSER_CONTENT;
9047: xmlParseElement(ctxt);
9048: ctxt->instate = XML_PARSER_EPILOG;
9049:
9050:
9051: /*
9052: * The Misc part at the end
9053: */
9054: xmlParseMisc(ctxt);
9055:
1.152 daniel 9056: if (RAW != 0) {
1.140 daniel 9057: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9058: ctxt->sax->error(ctxt->userData,
9059: "Extra content at the end of the document\n");
9060: ctxt->wellFormed = 0;
1.180 daniel 9061: ctxt->disableSAX = 1;
1.140 daniel 9062: ctxt->errNo = XML_ERR_DOCUMENT_END;
9063: }
9064: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 9065: }
9066:
1.44 daniel 9067: /*
9068: * SAX: end of the document processing.
9069: */
1.171 daniel 9070: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9071: (!ctxt->disableSAX))
1.74 daniel 9072: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 9073:
1.59 daniel 9074: if (! ctxt->wellFormed) return(-1);
1.16 daniel 9075: return(0);
9076: }
9077:
1.98 daniel 9078: /************************************************************************
9079: * *
1.128 daniel 9080: * Progressive parsing interfaces *
9081: * *
9082: ************************************************************************/
9083:
9084: /**
9085: * xmlParseLookupSequence:
9086: * @ctxt: an XML parser context
9087: * @first: the first char to lookup
1.140 daniel 9088: * @next: the next char to lookup or zero
9089: * @third: the next char to lookup or zero
1.128 daniel 9090: *
1.140 daniel 9091: * Try to find if a sequence (first, next, third) or just (first next) or
9092: * (first) is available in the input stream.
9093: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9094: * to avoid rescanning sequences of bytes, it DOES change the state of the
9095: * parser, do not use liberally.
1.128 daniel 9096: *
1.140 daniel 9097: * Returns the index to the current parsing point if the full sequence
9098: * is available, -1 otherwise.
1.128 daniel 9099: */
9100: int
1.140 daniel 9101: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9102: xmlChar next, xmlChar third) {
9103: int base, len;
9104: xmlParserInputPtr in;
9105: const xmlChar *buf;
9106:
9107: in = ctxt->input;
9108: if (in == NULL) return(-1);
9109: base = in->cur - in->base;
9110: if (base < 0) return(-1);
9111: if (ctxt->checkIndex > base)
9112: base = ctxt->checkIndex;
9113: if (in->buf == NULL) {
9114: buf = in->base;
9115: len = in->length;
9116: } else {
9117: buf = in->buf->buffer->content;
9118: len = in->buf->buffer->use;
9119: }
9120: /* take into account the sequence length */
9121: if (third) len -= 2;
9122: else if (next) len --;
9123: for (;base < len;base++) {
9124: if (buf[base] == first) {
9125: if (third != 0) {
9126: if ((buf[base + 1] != next) ||
9127: (buf[base + 2] != third)) continue;
9128: } else if (next != 0) {
9129: if (buf[base + 1] != next) continue;
9130: }
9131: ctxt->checkIndex = 0;
9132: #ifdef DEBUG_PUSH
9133: if (next == 0)
9134: fprintf(stderr, "PP: lookup '%c' found at %d\n",
9135: first, base);
9136: else if (third == 0)
9137: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
9138: first, next, base);
9139: else
9140: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
9141: first, next, third, base);
9142: #endif
9143: return(base - (in->cur - in->base));
9144: }
9145: }
9146: ctxt->checkIndex = base;
9147: #ifdef DEBUG_PUSH
9148: if (next == 0)
9149: fprintf(stderr, "PP: lookup '%c' failed\n", first);
9150: else if (third == 0)
9151: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
9152: else
9153: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
9154: #endif
9155: return(-1);
1.128 daniel 9156: }
9157:
9158: /**
1.143 daniel 9159: * xmlParseTryOrFinish:
1.128 daniel 9160: * @ctxt: an XML parser context
1.143 daniel 9161: * @terminate: last chunk indicator
1.128 daniel 9162: *
9163: * Try to progress on parsing
9164: *
9165: * Returns zero if no parsing was possible
9166: */
9167: int
1.143 daniel 9168: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 9169: int ret = 0;
1.140 daniel 9170: int avail;
9171: xmlChar cur, next;
9172:
9173: #ifdef DEBUG_PUSH
9174: switch (ctxt->instate) {
9175: case XML_PARSER_EOF:
9176: fprintf(stderr, "PP: try EOF\n"); break;
9177: case XML_PARSER_START:
9178: fprintf(stderr, "PP: try START\n"); break;
9179: case XML_PARSER_MISC:
9180: fprintf(stderr, "PP: try MISC\n");break;
9181: case XML_PARSER_COMMENT:
9182: fprintf(stderr, "PP: try COMMENT\n");break;
9183: case XML_PARSER_PROLOG:
9184: fprintf(stderr, "PP: try PROLOG\n");break;
9185: case XML_PARSER_START_TAG:
9186: fprintf(stderr, "PP: try START_TAG\n");break;
9187: case XML_PARSER_CONTENT:
9188: fprintf(stderr, "PP: try CONTENT\n");break;
9189: case XML_PARSER_CDATA_SECTION:
9190: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
9191: case XML_PARSER_END_TAG:
9192: fprintf(stderr, "PP: try END_TAG\n");break;
9193: case XML_PARSER_ENTITY_DECL:
9194: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
9195: case XML_PARSER_ENTITY_VALUE:
9196: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
9197: case XML_PARSER_ATTRIBUTE_VALUE:
9198: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
9199: case XML_PARSER_DTD:
9200: fprintf(stderr, "PP: try DTD\n");break;
9201: case XML_PARSER_EPILOG:
9202: fprintf(stderr, "PP: try EPILOG\n");break;
9203: case XML_PARSER_PI:
9204: fprintf(stderr, "PP: try PI\n");break;
9205: }
9206: #endif
1.128 daniel 9207:
9208: while (1) {
1.140 daniel 9209: /*
9210: * Pop-up of finished entities.
9211: */
1.152 daniel 9212: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9213: xmlPopInput(ctxt);
9214:
1.184 daniel 9215: if (ctxt->input ==NULL) break;
9216: if (ctxt->input->buf == NULL)
9217: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9218: else
1.184 daniel 9219: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9220: if (avail < 1)
9221: goto done;
1.128 daniel 9222: switch (ctxt->instate) {
9223: case XML_PARSER_EOF:
1.140 daniel 9224: /*
9225: * Document parsing is done !
9226: */
9227: goto done;
9228: case XML_PARSER_START:
9229: /*
9230: * Very first chars read from the document flow.
9231: */
1.184 daniel 9232: cur = ctxt->input->cur[0];
1.140 daniel 9233: if (IS_BLANK(cur)) {
9234: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9235: ctxt->sax->setDocumentLocator(ctxt->userData,
9236: &xmlDefaultSAXLocator);
9237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9238: ctxt->sax->error(ctxt->userData,
9239: "Extra spaces at the beginning of the document are not allowed\n");
9240: ctxt->errNo = XML_ERR_DOCUMENT_START;
9241: ctxt->wellFormed = 0;
1.180 daniel 9242: ctxt->disableSAX = 1;
1.140 daniel 9243: SKIP_BLANKS;
9244: ret++;
1.184 daniel 9245: if (ctxt->input->buf == NULL)
9246: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9247: else
1.184 daniel 9248: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9249: }
9250: if (avail < 2)
9251: goto done;
9252:
1.184 daniel 9253: cur = ctxt->input->cur[0];
9254: next = ctxt->input->cur[1];
1.140 daniel 9255: if (cur == 0) {
9256: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9257: ctxt->sax->setDocumentLocator(ctxt->userData,
9258: &xmlDefaultSAXLocator);
9259: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9260: ctxt->sax->error(ctxt->userData, "Document is empty\n");
9261: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9262: ctxt->wellFormed = 0;
1.180 daniel 9263: ctxt->disableSAX = 1;
1.140 daniel 9264: ctxt->instate = XML_PARSER_EOF;
9265: #ifdef DEBUG_PUSH
9266: fprintf(stderr, "PP: entering EOF\n");
9267: #endif
9268: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9269: ctxt->sax->endDocument(ctxt->userData);
9270: goto done;
9271: }
9272: if ((cur == '<') && (next == '?')) {
9273: /* PI or XML decl */
9274: if (avail < 5) return(ret);
1.143 daniel 9275: if ((!terminate) &&
9276: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9277: return(ret);
9278: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9279: ctxt->sax->setDocumentLocator(ctxt->userData,
9280: &xmlDefaultSAXLocator);
1.184 daniel 9281: if ((ctxt->input->cur[2] == 'x') &&
9282: (ctxt->input->cur[3] == 'm') &&
9283: (ctxt->input->cur[4] == 'l') &&
9284: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 9285: ret += 5;
9286: #ifdef DEBUG_PUSH
9287: fprintf(stderr, "PP: Parsing XML Decl\n");
9288: #endif
9289: xmlParseXMLDecl(ctxt);
1.193 daniel 9290: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9291: /*
9292: * The XML REC instructs us to stop parsing right
9293: * here
9294: */
9295: ctxt->instate = XML_PARSER_EOF;
9296: return(0);
9297: }
1.167 daniel 9298: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 9299: if ((ctxt->encoding == NULL) &&
9300: (ctxt->input->encoding != NULL))
9301: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 9302: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9303: (!ctxt->disableSAX))
1.140 daniel 9304: ctxt->sax->startDocument(ctxt->userData);
9305: ctxt->instate = XML_PARSER_MISC;
9306: #ifdef DEBUG_PUSH
9307: fprintf(stderr, "PP: entering MISC\n");
9308: #endif
9309: } else {
9310: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 9311: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9312: (!ctxt->disableSAX))
1.140 daniel 9313: ctxt->sax->startDocument(ctxt->userData);
9314: ctxt->instate = XML_PARSER_MISC;
9315: #ifdef DEBUG_PUSH
9316: fprintf(stderr, "PP: entering MISC\n");
9317: #endif
9318: }
9319: } else {
9320: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9321: ctxt->sax->setDocumentLocator(ctxt->userData,
9322: &xmlDefaultSAXLocator);
9323: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 9324: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9325: (!ctxt->disableSAX))
1.140 daniel 9326: ctxt->sax->startDocument(ctxt->userData);
9327: ctxt->instate = XML_PARSER_MISC;
9328: #ifdef DEBUG_PUSH
9329: fprintf(stderr, "PP: entering MISC\n");
9330: #endif
9331: }
9332: break;
9333: case XML_PARSER_MISC:
9334: SKIP_BLANKS;
1.184 daniel 9335: if (ctxt->input->buf == NULL)
9336: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9337: else
1.184 daniel 9338: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9339: if (avail < 2)
9340: goto done;
1.184 daniel 9341: cur = ctxt->input->cur[0];
9342: next = ctxt->input->cur[1];
1.140 daniel 9343: if ((cur == '<') && (next == '?')) {
1.143 daniel 9344: if ((!terminate) &&
9345: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9346: goto done;
9347: #ifdef DEBUG_PUSH
9348: fprintf(stderr, "PP: Parsing PI\n");
9349: #endif
9350: xmlParsePI(ctxt);
9351: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9352: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9353: if ((!terminate) &&
9354: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9355: goto done;
9356: #ifdef DEBUG_PUSH
9357: fprintf(stderr, "PP: Parsing Comment\n");
9358: #endif
9359: xmlParseComment(ctxt);
9360: ctxt->instate = XML_PARSER_MISC;
9361: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9362: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
9363: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
9364: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
9365: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 9366: if ((!terminate) &&
9367: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9368: goto done;
9369: #ifdef DEBUG_PUSH
9370: fprintf(stderr, "PP: Parsing internal subset\n");
9371: #endif
1.166 daniel 9372: ctxt->inSubset = 1;
1.140 daniel 9373: xmlParseDocTypeDecl(ctxt);
1.152 daniel 9374: if (RAW == '[') {
1.140 daniel 9375: ctxt->instate = XML_PARSER_DTD;
9376: #ifdef DEBUG_PUSH
9377: fprintf(stderr, "PP: entering DTD\n");
9378: #endif
9379: } else {
1.166 daniel 9380: /*
9381: * Create and update the external subset.
9382: */
9383: ctxt->inSubset = 2;
1.171 daniel 9384: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9385: (ctxt->sax->externalSubset != NULL))
9386: ctxt->sax->externalSubset(ctxt->userData,
9387: ctxt->intSubName, ctxt->extSubSystem,
9388: ctxt->extSubURI);
9389: ctxt->inSubset = 0;
1.140 daniel 9390: ctxt->instate = XML_PARSER_PROLOG;
9391: #ifdef DEBUG_PUSH
9392: fprintf(stderr, "PP: entering PROLOG\n");
9393: #endif
9394: }
9395: } else if ((cur == '<') && (next == '!') &&
9396: (avail < 9)) {
9397: goto done;
9398: } else {
9399: ctxt->instate = XML_PARSER_START_TAG;
9400: #ifdef DEBUG_PUSH
9401: fprintf(stderr, "PP: entering START_TAG\n");
9402: #endif
9403: }
9404: break;
1.128 daniel 9405: case XML_PARSER_PROLOG:
1.140 daniel 9406: SKIP_BLANKS;
1.184 daniel 9407: if (ctxt->input->buf == NULL)
9408: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9409: else
1.184 daniel 9410: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9411: if (avail < 2)
9412: goto done;
1.184 daniel 9413: cur = ctxt->input->cur[0];
9414: next = ctxt->input->cur[1];
1.140 daniel 9415: if ((cur == '<') && (next == '?')) {
1.143 daniel 9416: if ((!terminate) &&
9417: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9418: goto done;
9419: #ifdef DEBUG_PUSH
9420: fprintf(stderr, "PP: Parsing PI\n");
9421: #endif
9422: xmlParsePI(ctxt);
9423: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9424: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9425: if ((!terminate) &&
9426: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9427: goto done;
9428: #ifdef DEBUG_PUSH
9429: fprintf(stderr, "PP: Parsing Comment\n");
9430: #endif
9431: xmlParseComment(ctxt);
9432: ctxt->instate = XML_PARSER_PROLOG;
9433: } else if ((cur == '<') && (next == '!') &&
9434: (avail < 4)) {
9435: goto done;
9436: } else {
9437: ctxt->instate = XML_PARSER_START_TAG;
9438: #ifdef DEBUG_PUSH
9439: fprintf(stderr, "PP: entering START_TAG\n");
9440: #endif
9441: }
9442: break;
9443: case XML_PARSER_EPILOG:
9444: SKIP_BLANKS;
1.184 daniel 9445: if (ctxt->input->buf == NULL)
9446: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9447: else
1.184 daniel 9448: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9449: if (avail < 2)
9450: goto done;
1.184 daniel 9451: cur = ctxt->input->cur[0];
9452: next = ctxt->input->cur[1];
1.140 daniel 9453: if ((cur == '<') && (next == '?')) {
1.143 daniel 9454: if ((!terminate) &&
9455: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9456: goto done;
9457: #ifdef DEBUG_PUSH
9458: fprintf(stderr, "PP: Parsing PI\n");
9459: #endif
9460: xmlParsePI(ctxt);
9461: ctxt->instate = XML_PARSER_EPILOG;
9462: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9463: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9464: if ((!terminate) &&
9465: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9466: goto done;
9467: #ifdef DEBUG_PUSH
9468: fprintf(stderr, "PP: Parsing Comment\n");
9469: #endif
9470: xmlParseComment(ctxt);
9471: ctxt->instate = XML_PARSER_EPILOG;
9472: } else if ((cur == '<') && (next == '!') &&
9473: (avail < 4)) {
9474: goto done;
9475: } else {
9476: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9477: ctxt->sax->error(ctxt->userData,
9478: "Extra content at the end of the document\n");
9479: ctxt->wellFormed = 0;
1.180 daniel 9480: ctxt->disableSAX = 1;
1.140 daniel 9481: ctxt->errNo = XML_ERR_DOCUMENT_END;
9482: ctxt->instate = XML_PARSER_EOF;
9483: #ifdef DEBUG_PUSH
9484: fprintf(stderr, "PP: entering EOF\n");
9485: #endif
1.171 daniel 9486: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9487: (!ctxt->disableSAX))
1.140 daniel 9488: ctxt->sax->endDocument(ctxt->userData);
9489: goto done;
9490: }
9491: break;
9492: case XML_PARSER_START_TAG: {
9493: xmlChar *name, *oldname;
9494:
1.184 daniel 9495: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9496: goto done;
1.184 daniel 9497: cur = ctxt->input->cur[0];
1.140 daniel 9498: if (cur != '<') {
9499: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9500: ctxt->sax->error(ctxt->userData,
9501: "Start tag expect, '<' not found\n");
9502: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9503: ctxt->wellFormed = 0;
1.180 daniel 9504: ctxt->disableSAX = 1;
1.140 daniel 9505: ctxt->instate = XML_PARSER_EOF;
9506: #ifdef DEBUG_PUSH
9507: fprintf(stderr, "PP: entering EOF\n");
9508: #endif
1.171 daniel 9509: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9510: (!ctxt->disableSAX))
1.140 daniel 9511: ctxt->sax->endDocument(ctxt->userData);
9512: goto done;
9513: }
1.143 daniel 9514: if ((!terminate) &&
9515: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9516: goto done;
1.176 daniel 9517: if (ctxt->spaceNr == 0)
9518: spacePush(ctxt, -1);
9519: else
9520: spacePush(ctxt, *ctxt->space);
1.140 daniel 9521: name = xmlParseStartTag(ctxt);
9522: if (name == NULL) {
1.176 daniel 9523: spacePop(ctxt);
1.140 daniel 9524: ctxt->instate = XML_PARSER_EOF;
9525: #ifdef DEBUG_PUSH
9526: fprintf(stderr, "PP: entering EOF\n");
9527: #endif
1.171 daniel 9528: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9529: (!ctxt->disableSAX))
1.140 daniel 9530: ctxt->sax->endDocument(ctxt->userData);
9531: goto done;
9532: }
9533: namePush(ctxt, xmlStrdup(name));
9534:
9535: /*
9536: * [ VC: Root Element Type ]
9537: * The Name in the document type declaration must match
9538: * the element type of the root element.
9539: */
9540: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 9541: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 9542: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9543:
9544: /*
9545: * Check for an Empty Element.
9546: */
1.152 daniel 9547: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 9548: SKIP(2);
1.171 daniel 9549: if ((ctxt->sax != NULL) &&
9550: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 9551: ctxt->sax->endElement(ctxt->userData, name);
9552: xmlFree(name);
9553: oldname = namePop(ctxt);
1.176 daniel 9554: spacePop(ctxt);
1.140 daniel 9555: if (oldname != NULL) {
9556: #ifdef DEBUG_STACK
9557: fprintf(stderr,"Close: popped %s\n", oldname);
9558: #endif
9559: xmlFree(oldname);
9560: }
9561: if (ctxt->name == NULL) {
9562: ctxt->instate = XML_PARSER_EPILOG;
9563: #ifdef DEBUG_PUSH
9564: fprintf(stderr, "PP: entering EPILOG\n");
9565: #endif
9566: } else {
9567: ctxt->instate = XML_PARSER_CONTENT;
9568: #ifdef DEBUG_PUSH
9569: fprintf(stderr, "PP: entering CONTENT\n");
9570: #endif
9571: }
9572: break;
9573: }
1.152 daniel 9574: if (RAW == '>') {
1.140 daniel 9575: NEXT;
9576: } else {
9577: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9578: ctxt->sax->error(ctxt->userData,
9579: "Couldn't find end of Start Tag %s\n",
9580: name);
9581: ctxt->wellFormed = 0;
1.180 daniel 9582: ctxt->disableSAX = 1;
1.140 daniel 9583: ctxt->errNo = XML_ERR_GT_REQUIRED;
9584:
9585: /*
9586: * end of parsing of this node.
9587: */
9588: nodePop(ctxt);
9589: oldname = namePop(ctxt);
1.176 daniel 9590: spacePop(ctxt);
1.140 daniel 9591: if (oldname != NULL) {
9592: #ifdef DEBUG_STACK
9593: fprintf(stderr,"Close: popped %s\n", oldname);
9594: #endif
9595: xmlFree(oldname);
9596: }
9597: }
9598: xmlFree(name);
9599: ctxt->instate = XML_PARSER_CONTENT;
9600: #ifdef DEBUG_PUSH
9601: fprintf(stderr, "PP: entering CONTENT\n");
9602: #endif
9603: break;
9604: }
1.128 daniel 9605: case XML_PARSER_CONTENT:
1.140 daniel 9606: /*
9607: * Handle preparsed entities and charRef
9608: */
9609: if (ctxt->token != 0) {
9610: xmlChar cur[2] = { 0 , 0 } ;
9611:
9612: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 9613: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9614: (ctxt->sax->characters != NULL))
1.140 daniel 9615: ctxt->sax->characters(ctxt->userData, cur, 1);
9616: ctxt->token = 0;
9617: }
1.184 daniel 9618: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9619: goto done;
1.184 daniel 9620: cur = ctxt->input->cur[0];
9621: next = ctxt->input->cur[1];
1.140 daniel 9622: if ((cur == '<') && (next == '?')) {
1.143 daniel 9623: if ((!terminate) &&
9624: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9625: goto done;
9626: #ifdef DEBUG_PUSH
9627: fprintf(stderr, "PP: Parsing PI\n");
9628: #endif
9629: xmlParsePI(ctxt);
9630: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9631: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9632: if ((!terminate) &&
9633: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9634: goto done;
9635: #ifdef DEBUG_PUSH
9636: fprintf(stderr, "PP: Parsing Comment\n");
9637: #endif
9638: xmlParseComment(ctxt);
9639: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 9640: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9641: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9642: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9643: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9644: (ctxt->input->cur[8] == '[')) {
1.140 daniel 9645: SKIP(9);
9646: ctxt->instate = XML_PARSER_CDATA_SECTION;
9647: #ifdef DEBUG_PUSH
9648: fprintf(stderr, "PP: entering CDATA_SECTION\n");
9649: #endif
9650: break;
9651: } else if ((cur == '<') && (next == '!') &&
9652: (avail < 9)) {
9653: goto done;
9654: } else if ((cur == '<') && (next == '/')) {
9655: ctxt->instate = XML_PARSER_END_TAG;
9656: #ifdef DEBUG_PUSH
9657: fprintf(stderr, "PP: entering END_TAG\n");
9658: #endif
9659: break;
9660: } else if (cur == '<') {
9661: ctxt->instate = XML_PARSER_START_TAG;
9662: #ifdef DEBUG_PUSH
9663: fprintf(stderr, "PP: entering START_TAG\n");
9664: #endif
9665: break;
9666: } else if (cur == '&') {
1.143 daniel 9667: if ((!terminate) &&
9668: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 9669: goto done;
9670: #ifdef DEBUG_PUSH
9671: fprintf(stderr, "PP: Parsing Reference\n");
9672: #endif
9673: /* TODO: check generation of subtrees if noent !!! */
9674: xmlParseReference(ctxt);
9675: } else {
1.156 daniel 9676: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9677: /*
1.181 daniel 9678: * Goal of the following test is:
1.140 daniel 9679: * - minimize calls to the SAX 'character' callback
9680: * when they are mergeable
9681: * - handle an problem for isBlank when we only parse
9682: * a sequence of blank chars and the next one is
9683: * not available to check against '<' presence.
9684: * - tries to homogenize the differences in SAX
9685: * callbacks beween the push and pull versions
9686: * of the parser.
9687: */
9688: if ((ctxt->inputNr == 1) &&
9689: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9690: if ((!terminate) &&
9691: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9692: goto done;
9693: }
9694: ctxt->checkIndex = 0;
9695: #ifdef DEBUG_PUSH
9696: fprintf(stderr, "PP: Parsing char data\n");
9697: #endif
9698: xmlParseCharData(ctxt, 0);
9699: }
9700: /*
9701: * Pop-up of finished entities.
9702: */
1.152 daniel 9703: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9704: xmlPopInput(ctxt);
9705: break;
9706: case XML_PARSER_CDATA_SECTION: {
9707: /*
9708: * The Push mode need to have the SAX callback for
9709: * cdataBlock merge back contiguous callbacks.
9710: */
9711: int base;
9712:
9713: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9714: if (base < 0) {
9715: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9716: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9717: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 9718: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 9719: XML_PARSER_BIG_BUFFER_SIZE);
9720: }
9721: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9722: ctxt->checkIndex = 0;
9723: }
9724: goto done;
9725: } else {
1.171 daniel 9726: if ((ctxt->sax != NULL) && (base > 0) &&
9727: (!ctxt->disableSAX)) {
1.140 daniel 9728: if (ctxt->sax->cdataBlock != NULL)
9729: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 9730: ctxt->input->cur, base);
1.140 daniel 9731: }
9732: SKIP(base + 3);
9733: ctxt->checkIndex = 0;
9734: ctxt->instate = XML_PARSER_CONTENT;
9735: #ifdef DEBUG_PUSH
9736: fprintf(stderr, "PP: entering CONTENT\n");
9737: #endif
9738: }
9739: break;
9740: }
1.141 daniel 9741: case XML_PARSER_END_TAG:
1.140 daniel 9742: if (avail < 2)
9743: goto done;
1.143 daniel 9744: if ((!terminate) &&
9745: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9746: goto done;
9747: xmlParseEndTag(ctxt);
9748: if (ctxt->name == NULL) {
9749: ctxt->instate = XML_PARSER_EPILOG;
9750: #ifdef DEBUG_PUSH
9751: fprintf(stderr, "PP: entering EPILOG\n");
9752: #endif
9753: } else {
9754: ctxt->instate = XML_PARSER_CONTENT;
9755: #ifdef DEBUG_PUSH
9756: fprintf(stderr, "PP: entering CONTENT\n");
9757: #endif
9758: }
9759: break;
9760: case XML_PARSER_DTD: {
9761: /*
9762: * Sorry but progressive parsing of the internal subset
9763: * is not expected to be supported. We first check that
9764: * the full content of the internal subset is available and
9765: * the parsing is launched only at that point.
9766: * Internal subset ends up with "']' S? '>'" in an unescaped
9767: * section and not in a ']]>' sequence which are conditional
9768: * sections (whoever argued to keep that crap in XML deserve
9769: * a place in hell !).
9770: */
9771: int base, i;
9772: xmlChar *buf;
9773: xmlChar quote = 0;
9774:
1.184 daniel 9775: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 9776: if (base < 0) return(0);
9777: if (ctxt->checkIndex > base)
9778: base = ctxt->checkIndex;
1.184 daniel 9779: buf = ctxt->input->buf->buffer->content;
1.202 daniel 9780: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9781: base++) {
1.140 daniel 9782: if (quote != 0) {
9783: if (buf[base] == quote)
9784: quote = 0;
9785: continue;
9786: }
9787: if (buf[base] == '"') {
9788: quote = '"';
9789: continue;
9790: }
9791: if (buf[base] == '\'') {
9792: quote = '\'';
9793: continue;
9794: }
9795: if (buf[base] == ']') {
1.202 daniel 9796: if ((unsigned int) base +1 >=
9797: ctxt->input->buf->buffer->use)
1.140 daniel 9798: break;
9799: if (buf[base + 1] == ']') {
9800: /* conditional crap, skip both ']' ! */
9801: base++;
9802: continue;
9803: }
1.202 daniel 9804: for (i = 0;
9805: (unsigned int) base + i < ctxt->input->buf->buffer->use;
9806: i++) {
1.140 daniel 9807: if (buf[base + i] == '>')
9808: goto found_end_int_subset;
9809: }
9810: break;
9811: }
9812: }
9813: /*
9814: * We didn't found the end of the Internal subset
9815: */
9816: if (quote == 0)
9817: ctxt->checkIndex = base;
9818: #ifdef DEBUG_PUSH
9819: if (next == 0)
9820: fprintf(stderr, "PP: lookup of int subset end filed\n");
9821: #endif
9822: goto done;
9823:
9824: found_end_int_subset:
9825: xmlParseInternalSubset(ctxt);
1.166 daniel 9826: ctxt->inSubset = 2;
1.171 daniel 9827: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9828: (ctxt->sax->externalSubset != NULL))
9829: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9830: ctxt->extSubSystem, ctxt->extSubURI);
9831: ctxt->inSubset = 0;
1.140 daniel 9832: ctxt->instate = XML_PARSER_PROLOG;
9833: ctxt->checkIndex = 0;
9834: #ifdef DEBUG_PUSH
9835: fprintf(stderr, "PP: entering PROLOG\n");
9836: #endif
9837: break;
9838: }
9839: case XML_PARSER_COMMENT:
9840: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9841: ctxt->instate = XML_PARSER_CONTENT;
9842: #ifdef DEBUG_PUSH
9843: fprintf(stderr, "PP: entering CONTENT\n");
9844: #endif
9845: break;
9846: case XML_PARSER_PI:
9847: fprintf(stderr, "PP: internal error, state == PI\n");
9848: ctxt->instate = XML_PARSER_CONTENT;
9849: #ifdef DEBUG_PUSH
9850: fprintf(stderr, "PP: entering CONTENT\n");
9851: #endif
9852: break;
1.128 daniel 9853: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9854: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9855: ctxt->instate = XML_PARSER_DTD;
9856: #ifdef DEBUG_PUSH
9857: fprintf(stderr, "PP: entering DTD\n");
9858: #endif
9859: break;
1.128 daniel 9860: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9861: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9862: ctxt->instate = XML_PARSER_CONTENT;
9863: #ifdef DEBUG_PUSH
9864: fprintf(stderr, "PP: entering DTD\n");
9865: #endif
9866: break;
1.128 daniel 9867: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9868: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9869: ctxt->instate = XML_PARSER_START_TAG;
9870: #ifdef DEBUG_PUSH
9871: fprintf(stderr, "PP: entering START_TAG\n");
9872: #endif
9873: break;
9874: case XML_PARSER_SYSTEM_LITERAL:
9875: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9876: ctxt->instate = XML_PARSER_START_TAG;
9877: #ifdef DEBUG_PUSH
9878: fprintf(stderr, "PP: entering START_TAG\n");
9879: #endif
9880: break;
1.128 daniel 9881: }
9882: }
1.140 daniel 9883: done:
9884: #ifdef DEBUG_PUSH
9885: fprintf(stderr, "PP: done %d\n", ret);
9886: #endif
1.128 daniel 9887: return(ret);
9888: }
9889:
9890: /**
1.143 daniel 9891: * xmlParseTry:
9892: * @ctxt: an XML parser context
9893: *
9894: * Try to progress on parsing
9895: *
9896: * Returns zero if no parsing was possible
9897: */
9898: int
9899: xmlParseTry(xmlParserCtxtPtr ctxt) {
9900: return(xmlParseTryOrFinish(ctxt, 0));
9901: }
9902:
9903: /**
1.128 daniel 9904: * xmlParseChunk:
9905: * @ctxt: an XML parser context
9906: * @chunk: an char array
9907: * @size: the size in byte of the chunk
9908: * @terminate: last chunk indicator
9909: *
9910: * Parse a Chunk of memory
9911: *
9912: * Returns zero if no error, the xmlParserErrors otherwise.
9913: */
1.140 daniel 9914: int
1.128 daniel 9915: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9916: int terminate) {
1.132 daniel 9917: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9918: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9919: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9920: int cur = ctxt->input->cur - ctxt->input->base;
9921:
1.132 daniel 9922: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9923: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9924: ctxt->input->cur = ctxt->input->base + cur;
9925: #ifdef DEBUG_PUSH
9926: fprintf(stderr, "PP: pushed %d\n", size);
9927: #endif
9928:
1.150 daniel 9929: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9930: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9931: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9932: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9933: if (terminate) {
1.151 daniel 9934: /*
9935: * Check for termination
9936: */
1.140 daniel 9937: if ((ctxt->instate != XML_PARSER_EOF) &&
9938: (ctxt->instate != XML_PARSER_EPILOG)) {
9939: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9940: ctxt->sax->error(ctxt->userData,
9941: "Extra content at the end of the document\n");
9942: ctxt->wellFormed = 0;
1.180 daniel 9943: ctxt->disableSAX = 1;
1.140 daniel 9944: ctxt->errNo = XML_ERR_DOCUMENT_END;
9945: }
9946: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9947: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9948: (!ctxt->disableSAX))
1.140 daniel 9949: ctxt->sax->endDocument(ctxt->userData);
9950: }
9951: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9952: }
9953: return((xmlParserErrors) ctxt->errNo);
9954: }
9955:
9956: /************************************************************************
9957: * *
1.98 daniel 9958: * I/O front end functions to the parser *
9959: * *
9960: ************************************************************************/
1.201 daniel 9961:
9962: /**
9963: * xmlCreatePushParserCtxt:
9964: * @ctxt: an XML parser context
9965: *
9966: * Blocks further parser processing
9967: */
9968: void
9969: xmlStopParser(xmlParserCtxtPtr ctxt) {
9970: ctxt->instate = XML_PARSER_EOF;
9971: if (ctxt->input != NULL)
9972: ctxt->input->cur = BAD_CAST"";
9973: }
1.98 daniel 9974:
1.50 daniel 9975: /**
1.181 daniel 9976: * xmlCreatePushParserCtxt:
1.140 daniel 9977: * @sax: a SAX handler
9978: * @user_data: The user data returned on SAX callbacks
9979: * @chunk: a pointer to an array of chars
9980: * @size: number of chars in the array
9981: * @filename: an optional file name or URI
9982: *
9983: * Create a parser context for using the XML parser in push mode
9984: * To allow content encoding detection, @size should be >= 4
9985: * The value of @filename is used for fetching external entities
9986: * and error/warning reports.
9987: *
9988: * Returns the new parser context or NULL
9989: */
9990: xmlParserCtxtPtr
9991: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9992: const char *chunk, int size, const char *filename) {
9993: xmlParserCtxtPtr ctxt;
9994: xmlParserInputPtr inputStream;
9995: xmlParserInputBufferPtr buf;
9996: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9997:
9998: /*
1.156 daniel 9999: * plug some encoding conversion routines
1.140 daniel 10000: */
10001: if ((chunk != NULL) && (size >= 4))
1.156 daniel 10002: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 10003:
10004: buf = xmlAllocParserInputBuffer(enc);
10005: if (buf == NULL) return(NULL);
10006:
10007: ctxt = xmlNewParserCtxt();
10008: if (ctxt == NULL) {
10009: xmlFree(buf);
10010: return(NULL);
10011: }
10012: if (sax != NULL) {
10013: if (ctxt->sax != &xmlDefaultSAXHandler)
10014: xmlFree(ctxt->sax);
10015: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10016: if (ctxt->sax == NULL) {
10017: xmlFree(buf);
10018: xmlFree(ctxt);
10019: return(NULL);
10020: }
10021: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10022: if (user_data != NULL)
10023: ctxt->userData = user_data;
10024: }
10025: if (filename == NULL) {
10026: ctxt->directory = NULL;
10027: } else {
10028: ctxt->directory = xmlParserGetDirectory(filename);
10029: }
10030:
10031: inputStream = xmlNewInputStream(ctxt);
10032: if (inputStream == NULL) {
10033: xmlFreeParserCtxt(ctxt);
10034: return(NULL);
10035: }
10036:
10037: if (filename == NULL)
10038: inputStream->filename = NULL;
10039: else
10040: inputStream->filename = xmlMemStrdup(filename);
10041: inputStream->buf = buf;
10042: inputStream->base = inputStream->buf->buffer->content;
10043: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 10044: if (enc != XML_CHAR_ENCODING_NONE) {
10045: xmlSwitchEncoding(ctxt, enc);
10046: }
1.140 daniel 10047:
10048: inputPush(ctxt, inputStream);
10049:
10050: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10051: (ctxt->input->buf != NULL)) {
10052: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10053: #ifdef DEBUG_PUSH
10054: fprintf(stderr, "PP: pushed %d\n", size);
10055: #endif
10056: }
1.190 daniel 10057:
10058: return(ctxt);
10059: }
10060:
10061: /**
10062: * xmlCreateIOParserCtxt:
10063: * @sax: a SAX handler
10064: * @user_data: The user data returned on SAX callbacks
10065: * @ioread: an I/O read function
10066: * @ioclose: an I/O close function
10067: * @ioctx: an I/O handler
10068: * @enc: the charset encoding if known
10069: *
10070: * Create a parser context for using the XML parser with an existing
10071: * I/O stream
10072: *
10073: * Returns the new parser context or NULL
10074: */
10075: xmlParserCtxtPtr
10076: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10077: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10078: void *ioctx, xmlCharEncoding enc) {
10079: xmlParserCtxtPtr ctxt;
10080: xmlParserInputPtr inputStream;
10081: xmlParserInputBufferPtr buf;
10082:
10083: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10084: if (buf == NULL) return(NULL);
10085:
10086: ctxt = xmlNewParserCtxt();
10087: if (ctxt == NULL) {
10088: xmlFree(buf);
10089: return(NULL);
10090: }
10091: if (sax != NULL) {
10092: if (ctxt->sax != &xmlDefaultSAXHandler)
10093: xmlFree(ctxt->sax);
10094: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10095: if (ctxt->sax == NULL) {
10096: xmlFree(buf);
10097: xmlFree(ctxt);
10098: return(NULL);
10099: }
10100: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10101: if (user_data != NULL)
10102: ctxt->userData = user_data;
10103: }
10104:
10105: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10106: if (inputStream == NULL) {
10107: xmlFreeParserCtxt(ctxt);
10108: return(NULL);
10109: }
10110: inputPush(ctxt, inputStream);
1.140 daniel 10111:
10112: return(ctxt);
10113: }
10114:
10115: /**
1.181 daniel 10116: * xmlCreateDocParserCtxt:
1.123 daniel 10117: * @cur: a pointer to an array of xmlChar
1.50 daniel 10118: *
1.192 daniel 10119: * Creates a parser context for an XML in-memory document.
1.69 daniel 10120: *
10121: * Returns the new parser context or NULL
1.16 daniel 10122: */
1.69 daniel 10123: xmlParserCtxtPtr
1.123 daniel 10124: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 10125: xmlParserCtxtPtr ctxt;
1.40 daniel 10126: xmlParserInputPtr input;
1.16 daniel 10127:
1.97 daniel 10128: ctxt = xmlNewParserCtxt();
1.16 daniel 10129: if (ctxt == NULL) {
10130: return(NULL);
10131: }
1.96 daniel 10132: input = xmlNewInputStream(ctxt);
1.40 daniel 10133: if (input == NULL) {
1.97 daniel 10134: xmlFreeParserCtxt(ctxt);
1.40 daniel 10135: return(NULL);
10136: }
10137:
10138: input->base = cur;
10139: input->cur = cur;
10140:
10141: inputPush(ctxt, input);
1.69 daniel 10142: return(ctxt);
10143: }
10144:
10145: /**
1.181 daniel 10146: * xmlSAXParseDoc:
1.69 daniel 10147: * @sax: the SAX handler block
1.123 daniel 10148: * @cur: a pointer to an array of xmlChar
1.69 daniel 10149: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10150: * documents
10151: *
10152: * parse an XML in-memory document and build a tree.
10153: * It use the given SAX function block to handle the parsing callback.
10154: * If sax is NULL, fallback to the default DOM tree building routines.
10155: *
10156: * Returns the resulting document tree
10157: */
10158:
10159: xmlDocPtr
1.123 daniel 10160: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 10161: xmlDocPtr ret;
10162: xmlParserCtxtPtr ctxt;
10163:
10164: if (cur == NULL) return(NULL);
1.16 daniel 10165:
10166:
1.69 daniel 10167: ctxt = xmlCreateDocParserCtxt(cur);
10168: if (ctxt == NULL) return(NULL);
1.74 daniel 10169: if (sax != NULL) {
10170: ctxt->sax = sax;
10171: ctxt->userData = NULL;
10172: }
1.69 daniel 10173:
1.16 daniel 10174: xmlParseDocument(ctxt);
1.72 daniel 10175: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10176: else {
10177: ret = NULL;
1.72 daniel 10178: xmlFreeDoc(ctxt->myDoc);
10179: ctxt->myDoc = NULL;
1.59 daniel 10180: }
1.86 daniel 10181: if (sax != NULL)
10182: ctxt->sax = NULL;
1.69 daniel 10183: xmlFreeParserCtxt(ctxt);
1.16 daniel 10184:
1.1 veillard 10185: return(ret);
10186: }
10187:
1.50 daniel 10188: /**
1.181 daniel 10189: * xmlParseDoc:
1.123 daniel 10190: * @cur: a pointer to an array of xmlChar
1.55 daniel 10191: *
10192: * parse an XML in-memory document and build a tree.
10193: *
1.68 daniel 10194: * Returns the resulting document tree
1.55 daniel 10195: */
10196:
1.69 daniel 10197: xmlDocPtr
1.123 daniel 10198: xmlParseDoc(xmlChar *cur) {
1.59 daniel 10199: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 10200: }
10201:
10202: /**
1.181 daniel 10203: * xmlSAXParseDTD:
1.76 daniel 10204: * @sax: the SAX handler block
10205: * @ExternalID: a NAME* containing the External ID of the DTD
10206: * @SystemID: a NAME* containing the URL to the DTD
10207: *
10208: * Load and parse an external subset.
10209: *
10210: * Returns the resulting xmlDtdPtr or NULL in case of error.
10211: */
10212:
10213: xmlDtdPtr
1.123 daniel 10214: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10215: const xmlChar *SystemID) {
1.76 daniel 10216: xmlDtdPtr ret = NULL;
10217: xmlParserCtxtPtr ctxt;
1.83 daniel 10218: xmlParserInputPtr input = NULL;
1.76 daniel 10219: xmlCharEncoding enc;
10220:
10221: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10222:
1.97 daniel 10223: ctxt = xmlNewParserCtxt();
1.76 daniel 10224: if (ctxt == NULL) {
10225: return(NULL);
10226: }
10227:
10228: /*
10229: * Set-up the SAX context
10230: */
10231: if (sax != NULL) {
1.93 veillard 10232: if (ctxt->sax != NULL)
1.119 daniel 10233: xmlFree(ctxt->sax);
1.76 daniel 10234: ctxt->sax = sax;
10235: ctxt->userData = NULL;
10236: }
10237:
10238: /*
10239: * Ask the Entity resolver to load the damn thing
10240: */
10241:
10242: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
10243: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
10244: if (input == NULL) {
1.86 daniel 10245: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 10246: xmlFreeParserCtxt(ctxt);
10247: return(NULL);
10248: }
10249:
10250: /*
1.156 daniel 10251: * plug some encoding conversion routines here.
1.76 daniel 10252: */
10253: xmlPushInput(ctxt, input);
1.156 daniel 10254: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 10255: xmlSwitchEncoding(ctxt, enc);
10256:
1.95 veillard 10257: if (input->filename == NULL)
1.156 daniel 10258: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 10259: input->line = 1;
10260: input->col = 1;
10261: input->base = ctxt->input->cur;
10262: input->cur = ctxt->input->cur;
10263: input->free = NULL;
10264:
10265: /*
10266: * let's parse that entity knowing it's an external subset.
10267: */
1.191 daniel 10268: ctxt->inSubset = 2;
10269: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10270: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10271: ExternalID, SystemID);
1.79 daniel 10272: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 10273:
10274: if (ctxt->myDoc != NULL) {
10275: if (ctxt->wellFormed) {
1.191 daniel 10276: ret = ctxt->myDoc->extSubset;
10277: ctxt->myDoc->extSubset = NULL;
1.76 daniel 10278: } else {
10279: ret = NULL;
10280: }
10281: xmlFreeDoc(ctxt->myDoc);
10282: ctxt->myDoc = NULL;
10283: }
1.86 daniel 10284: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 10285: xmlFreeParserCtxt(ctxt);
10286:
10287: return(ret);
10288: }
10289:
10290: /**
1.181 daniel 10291: * xmlParseDTD:
1.76 daniel 10292: * @ExternalID: a NAME* containing the External ID of the DTD
10293: * @SystemID: a NAME* containing the URL to the DTD
10294: *
10295: * Load and parse an external subset.
10296: *
10297: * Returns the resulting xmlDtdPtr or NULL in case of error.
10298: */
10299:
10300: xmlDtdPtr
1.123 daniel 10301: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 10302: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 10303: }
10304:
10305: /**
1.181 daniel 10306: * xmlSAXParseBalancedChunk:
1.144 daniel 10307: * @ctx: an XML parser context (possibly NULL)
10308: * @sax: the SAX handler bloc (possibly NULL)
10309: * @user_data: The user data returned on SAX callbacks (possibly NULL)
10310: * @input: a parser input stream
10311: * @enc: the encoding
10312: *
10313: * Parse a well-balanced chunk of an XML document
10314: * The user has to provide SAX callback block whose routines will be
10315: * called by the parser
10316: * The allowed sequence for the Well Balanced Chunk is the one defined by
10317: * the content production in the XML grammar:
10318: *
10319: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10320: *
1.176 daniel 10321: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 10322: * the error code otherwise
10323: */
10324:
10325: int
10326: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
10327: void *user_data, xmlParserInputPtr input,
10328: xmlCharEncoding enc) {
10329: xmlParserCtxtPtr ctxt;
10330: int ret;
10331:
10332: if (input == NULL) return(-1);
10333:
10334: if (ctx != NULL)
10335: ctxt = ctx;
10336: else {
10337: ctxt = xmlNewParserCtxt();
10338: if (ctxt == NULL)
10339: return(-1);
10340: if (sax == NULL)
10341: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10342: }
10343:
10344: /*
10345: * Set-up the SAX context
10346: */
10347: if (sax != NULL) {
10348: if (ctxt->sax != NULL)
10349: xmlFree(ctxt->sax);
10350: ctxt->sax = sax;
10351: ctxt->userData = user_data;
10352: }
10353:
10354: /*
10355: * plug some encoding conversion routines here.
10356: */
10357: xmlPushInput(ctxt, input);
10358: if (enc != XML_CHAR_ENCODING_NONE)
10359: xmlSwitchEncoding(ctxt, enc);
10360:
10361: /*
10362: * let's parse that entity knowing it's an external subset.
10363: */
10364: xmlParseContent(ctxt);
10365: ret = ctxt->errNo;
10366:
10367: if (ctx == NULL) {
10368: if (sax != NULL)
10369: ctxt->sax = NULL;
10370: else
10371: xmlFreeDoc(ctxt->myDoc);
10372: xmlFreeParserCtxt(ctxt);
10373: }
10374: return(ret);
10375: }
10376:
10377: /**
1.213 veillard 10378: * xmlParseCtxtExternalEntity:
10379: * @ctx: the existing parsing context
10380: * @URL: the URL for the entity to load
10381: * @ID: the System ID for the entity to load
10382: * @list: the return value for the set of parsed nodes
10383: *
10384: * Parse an external general entity within an existing parsing context
10385: * An external general parsed entity is well-formed if it matches the
10386: * production labeled extParsedEnt.
10387: *
10388: * [78] extParsedEnt ::= TextDecl? content
10389: *
10390: * Returns 0 if the entity is well formed, -1 in case of args problem and
10391: * the parser error code otherwise
10392: */
10393:
10394: int
10395: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
10396: const xmlChar *ID, xmlNodePtr *list) {
10397: xmlParserCtxtPtr ctxt;
10398: xmlDocPtr newDoc;
10399: xmlSAXHandlerPtr oldsax = NULL;
10400: int ret = 0;
10401:
10402: if (ctx->depth > 40) {
10403: return(XML_ERR_ENTITY_LOOP);
10404: }
10405:
10406: if (list != NULL)
10407: *list = NULL;
10408: if ((URL == NULL) && (ID == NULL))
10409: return(-1);
10410: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10411: return(-1);
10412:
10413:
10414: ctxt = xmlCreateEntityParserCtxt(URL, ID, ctx->myDoc->URL);
10415: if (ctxt == NULL) return(-1);
10416: ctxt->userData = ctxt;
10417: oldsax = ctxt->sax;
10418: ctxt->sax = ctx->sax;
10419: newDoc = xmlNewDoc(BAD_CAST "1.0");
10420: if (newDoc == NULL) {
10421: xmlFreeParserCtxt(ctxt);
10422: return(-1);
10423: }
10424: if (ctx->myDoc != NULL) {
10425: newDoc->intSubset = ctx->myDoc->intSubset;
10426: newDoc->extSubset = ctx->myDoc->extSubset;
10427: }
10428: if (ctx->myDoc->URL != NULL) {
10429: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10430: }
10431: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10432: if (newDoc->children == NULL) {
10433: ctxt->sax = oldsax;
10434: xmlFreeParserCtxt(ctxt);
10435: newDoc->intSubset = NULL;
10436: newDoc->extSubset = NULL;
10437: xmlFreeDoc(newDoc);
10438: return(-1);
10439: }
10440: nodePush(ctxt, newDoc->children);
10441: if (ctx->myDoc == NULL) {
10442: ctxt->myDoc = newDoc;
10443: } else {
10444: ctxt->myDoc = ctx->myDoc;
10445: newDoc->children->doc = ctx->myDoc;
10446: }
10447:
10448: /*
10449: * Parse a possible text declaration first
10450: */
10451: GROW;
10452: if ((RAW == '<') && (NXT(1) == '?') &&
10453: (NXT(2) == 'x') && (NXT(3) == 'm') &&
10454: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10455: xmlParseTextDecl(ctxt);
10456: }
10457:
10458: /*
10459: * Doing validity checking on chunk doesn't make sense
10460: */
10461: ctxt->instate = XML_PARSER_CONTENT;
10462: ctxt->validate = ctx->validate;
10463: ctxt->depth = ctx->depth + 1;
10464: ctxt->replaceEntities = ctx->replaceEntities;
10465: if (ctxt->validate) {
10466: ctxt->vctxt.error = ctx->vctxt.error;
10467: ctxt->vctxt.warning = ctx->vctxt.warning;
10468: /* Allocate the Node stack */
10469: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
10470: ctxt->vctxt.nodeNr = 0;
10471: ctxt->vctxt.nodeMax = 4;
10472: ctxt->vctxt.node = NULL;
10473: } else {
10474: ctxt->vctxt.error = NULL;
10475: ctxt->vctxt.warning = NULL;
10476: }
10477:
10478: xmlParseContent(ctxt);
10479:
10480: if ((RAW == '<') && (NXT(1) == '/')) {
10481: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10482: ctxt->sax->error(ctxt->userData,
10483: "chunk is not well balanced\n");
10484: ctxt->wellFormed = 0;
10485: ctxt->disableSAX = 1;
10486: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10487: } else if (RAW != 0) {
10488: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10489: ctxt->sax->error(ctxt->userData,
10490: "extra content at the end of well balanced chunk\n");
10491: ctxt->wellFormed = 0;
10492: ctxt->disableSAX = 1;
10493: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10494: }
10495: if (ctxt->node != newDoc->children) {
10496: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10497: ctxt->sax->error(ctxt->userData,
10498: "chunk is not well balanced\n");
10499: ctxt->wellFormed = 0;
10500: ctxt->disableSAX = 1;
10501: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10502: }
10503:
10504: if (!ctxt->wellFormed) {
10505: if (ctxt->errNo == 0)
10506: ret = 1;
10507: else
10508: ret = ctxt->errNo;
10509: } else {
10510: if (list != NULL) {
10511: xmlNodePtr cur;
10512:
10513: /*
10514: * Return the newly created nodeset after unlinking it from
10515: * they pseudo parent.
10516: */
10517: cur = newDoc->children->children;
10518: *list = cur;
10519: while (cur != NULL) {
10520: cur->parent = NULL;
10521: cur = cur->next;
10522: }
10523: newDoc->children->children = NULL;
10524: }
10525: ret = 0;
10526: }
10527: ctxt->sax = oldsax;
10528: xmlFreeParserCtxt(ctxt);
10529: newDoc->intSubset = NULL;
10530: newDoc->extSubset = NULL;
10531: xmlFreeDoc(newDoc);
10532:
10533: return(ret);
10534: }
10535:
10536: /**
1.181 daniel 10537: * xmlParseExternalEntity:
10538: * @doc: the document the chunk pertains to
10539: * @sax: the SAX handler bloc (possibly NULL)
10540: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10541: * @depth: Used for loop detection, use 0
1.181 daniel 10542: * @URL: the URL for the entity to load
10543: * @ID: the System ID for the entity to load
10544: * @list: the return value for the set of parsed nodes
10545: *
10546: * Parse an external general entity
10547: * An external general parsed entity is well-formed if it matches the
10548: * production labeled extParsedEnt.
10549: *
10550: * [78] extParsedEnt ::= TextDecl? content
10551: *
10552: * Returns 0 if the entity is well formed, -1 in case of args problem and
10553: * the parser error code otherwise
10554: */
10555:
10556: int
10557: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 10558: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 10559: xmlParserCtxtPtr ctxt;
10560: xmlDocPtr newDoc;
10561: xmlSAXHandlerPtr oldsax = NULL;
10562: int ret = 0;
10563:
1.185 daniel 10564: if (depth > 40) {
10565: return(XML_ERR_ENTITY_LOOP);
10566: }
10567:
10568:
1.181 daniel 10569:
10570: if (list != NULL)
10571: *list = NULL;
10572: if ((URL == NULL) && (ID == NULL))
1.213 veillard 10573: return(-1);
10574: if (doc == NULL) /* @@ relax but check for dereferences */
1.181 daniel 10575: return(-1);
10576:
10577:
10578: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
10579: if (ctxt == NULL) return(-1);
10580: ctxt->userData = ctxt;
10581: if (sax != NULL) {
10582: oldsax = ctxt->sax;
10583: ctxt->sax = sax;
10584: if (user_data != NULL)
10585: ctxt->userData = user_data;
10586: }
10587: newDoc = xmlNewDoc(BAD_CAST "1.0");
10588: if (newDoc == NULL) {
10589: xmlFreeParserCtxt(ctxt);
10590: return(-1);
10591: }
10592: if (doc != NULL) {
10593: newDoc->intSubset = doc->intSubset;
10594: newDoc->extSubset = doc->extSubset;
10595: }
10596: if (doc->URL != NULL) {
10597: newDoc->URL = xmlStrdup(doc->URL);
10598: }
10599: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10600: if (newDoc->children == NULL) {
10601: if (sax != NULL)
10602: ctxt->sax = oldsax;
10603: xmlFreeParserCtxt(ctxt);
10604: newDoc->intSubset = NULL;
10605: newDoc->extSubset = NULL;
10606: xmlFreeDoc(newDoc);
10607: return(-1);
10608: }
10609: nodePush(ctxt, newDoc->children);
10610: if (doc == NULL) {
10611: ctxt->myDoc = newDoc;
10612: } else {
10613: ctxt->myDoc = doc;
10614: newDoc->children->doc = doc;
10615: }
10616:
10617: /*
10618: * Parse a possible text declaration first
10619: */
10620: GROW;
10621: if ((RAW == '<') && (NXT(1) == '?') &&
10622: (NXT(2) == 'x') && (NXT(3) == 'm') &&
10623: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10624: xmlParseTextDecl(ctxt);
10625: }
10626:
10627: /*
10628: * Doing validity checking on chunk doesn't make sense
10629: */
10630: ctxt->instate = XML_PARSER_CONTENT;
10631: ctxt->validate = 0;
1.185 daniel 10632: ctxt->depth = depth;
1.181 daniel 10633:
10634: xmlParseContent(ctxt);
10635:
10636: if ((RAW == '<') && (NXT(1) == '/')) {
10637: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10638: ctxt->sax->error(ctxt->userData,
10639: "chunk is not well balanced\n");
10640: ctxt->wellFormed = 0;
10641: ctxt->disableSAX = 1;
10642: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10643: } else if (RAW != 0) {
10644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10645: ctxt->sax->error(ctxt->userData,
10646: "extra content at the end of well balanced chunk\n");
10647: ctxt->wellFormed = 0;
10648: ctxt->disableSAX = 1;
10649: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10650: }
10651: if (ctxt->node != newDoc->children) {
10652: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10653: ctxt->sax->error(ctxt->userData,
10654: "chunk is not well balanced\n");
10655: ctxt->wellFormed = 0;
10656: ctxt->disableSAX = 1;
10657: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10658: }
10659:
10660: if (!ctxt->wellFormed) {
10661: if (ctxt->errNo == 0)
10662: ret = 1;
10663: else
10664: ret = ctxt->errNo;
10665: } else {
10666: if (list != NULL) {
10667: xmlNodePtr cur;
10668:
10669: /*
10670: * Return the newly created nodeset after unlinking it from
10671: * they pseudo parent.
10672: */
10673: cur = newDoc->children->children;
10674: *list = cur;
10675: while (cur != NULL) {
10676: cur->parent = NULL;
10677: cur = cur->next;
10678: }
10679: newDoc->children->children = NULL;
10680: }
10681: ret = 0;
10682: }
10683: if (sax != NULL)
10684: ctxt->sax = oldsax;
10685: xmlFreeParserCtxt(ctxt);
10686: newDoc->intSubset = NULL;
10687: newDoc->extSubset = NULL;
10688: xmlFreeDoc(newDoc);
10689:
10690: return(ret);
10691: }
10692:
10693: /**
10694: * xmlParseBalancedChunk:
1.176 daniel 10695: * @doc: the document the chunk pertains to
10696: * @sax: the SAX handler bloc (possibly NULL)
10697: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10698: * @depth: Used for loop detection, use 0
1.176 daniel 10699: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10700: * @list: the return value for the set of parsed nodes
10701: *
10702: * Parse a well-balanced chunk of an XML document
10703: * called by the parser
10704: * The allowed sequence for the Well Balanced Chunk is the one defined by
10705: * the content production in the XML grammar:
1.144 daniel 10706: *
1.175 daniel 10707: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10708: *
1.176 daniel 10709: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10710: * the parser error code otherwise
1.144 daniel 10711: */
10712:
1.175 daniel 10713: int
10714: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 10715: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 10716: xmlParserCtxtPtr ctxt;
1.175 daniel 10717: xmlDocPtr newDoc;
1.181 daniel 10718: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 10719: int size;
1.176 daniel 10720: int ret = 0;
1.175 daniel 10721:
1.185 daniel 10722: if (depth > 40) {
10723: return(XML_ERR_ENTITY_LOOP);
10724: }
10725:
1.175 daniel 10726:
1.176 daniel 10727: if (list != NULL)
10728: *list = NULL;
10729: if (string == NULL)
10730: return(-1);
10731:
10732: size = xmlStrlen(string);
10733:
1.183 daniel 10734: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 10735: if (ctxt == NULL) return(-1);
10736: ctxt->userData = ctxt;
1.175 daniel 10737: if (sax != NULL) {
1.176 daniel 10738: oldsax = ctxt->sax;
10739: ctxt->sax = sax;
10740: if (user_data != NULL)
10741: ctxt->userData = user_data;
1.175 daniel 10742: }
10743: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 10744: if (newDoc == NULL) {
10745: xmlFreeParserCtxt(ctxt);
10746: return(-1);
10747: }
1.175 daniel 10748: if (doc != NULL) {
10749: newDoc->intSubset = doc->intSubset;
10750: newDoc->extSubset = doc->extSubset;
10751: }
1.176 daniel 10752: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10753: if (newDoc->children == NULL) {
10754: if (sax != NULL)
10755: ctxt->sax = oldsax;
10756: xmlFreeParserCtxt(ctxt);
10757: newDoc->intSubset = NULL;
10758: newDoc->extSubset = NULL;
10759: xmlFreeDoc(newDoc);
10760: return(-1);
10761: }
10762: nodePush(ctxt, newDoc->children);
10763: if (doc == NULL) {
10764: ctxt->myDoc = newDoc;
10765: } else {
10766: ctxt->myDoc = doc;
10767: newDoc->children->doc = doc;
10768: }
10769: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 10770: ctxt->depth = depth;
1.176 daniel 10771:
10772: /*
10773: * Doing validity checking on chunk doesn't make sense
10774: */
10775: ctxt->validate = 0;
10776:
1.175 daniel 10777: xmlParseContent(ctxt);
1.176 daniel 10778:
10779: if ((RAW == '<') && (NXT(1) == '/')) {
10780: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10781: ctxt->sax->error(ctxt->userData,
10782: "chunk is not well balanced\n");
10783: ctxt->wellFormed = 0;
1.180 daniel 10784: ctxt->disableSAX = 1;
1.176 daniel 10785: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10786: } else if (RAW != 0) {
10787: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10788: ctxt->sax->error(ctxt->userData,
10789: "extra content at the end of well balanced chunk\n");
10790: ctxt->wellFormed = 0;
1.180 daniel 10791: ctxt->disableSAX = 1;
1.176 daniel 10792: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10793: }
10794: if (ctxt->node != newDoc->children) {
10795: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10796: ctxt->sax->error(ctxt->userData,
10797: "chunk is not well balanced\n");
10798: ctxt->wellFormed = 0;
1.180 daniel 10799: ctxt->disableSAX = 1;
1.176 daniel 10800: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10801: }
1.175 daniel 10802:
1.176 daniel 10803: if (!ctxt->wellFormed) {
10804: if (ctxt->errNo == 0)
10805: ret = 1;
10806: else
10807: ret = ctxt->errNo;
10808: } else {
10809: if (list != NULL) {
10810: xmlNodePtr cur;
1.175 daniel 10811:
1.176 daniel 10812: /*
10813: * Return the newly created nodeset after unlinking it from
10814: * they pseudo parent.
10815: */
10816: cur = newDoc->children->children;
10817: *list = cur;
10818: while (cur != NULL) {
10819: cur->parent = NULL;
10820: cur = cur->next;
10821: }
10822: newDoc->children->children = NULL;
10823: }
10824: ret = 0;
1.175 daniel 10825: }
1.176 daniel 10826: if (sax != NULL)
10827: ctxt->sax = oldsax;
1.175 daniel 10828: xmlFreeParserCtxt(ctxt);
10829: newDoc->intSubset = NULL;
10830: newDoc->extSubset = NULL;
1.176 daniel 10831: xmlFreeDoc(newDoc);
1.175 daniel 10832:
1.176 daniel 10833: return(ret);
1.144 daniel 10834: }
10835:
10836: /**
1.181 daniel 10837: * xmlParseBalancedChunkFile:
1.144 daniel 10838: * @doc: the document the chunk pertains to
10839: *
10840: * Parse a well-balanced chunk of an XML document contained in a file
10841: *
10842: * Returns the resulting list of nodes resulting from the parsing,
10843: * they are not added to @node
10844: */
10845:
10846: xmlNodePtr
10847: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 10848: /* TODO !!! */
10849: return(NULL);
1.144 daniel 10850: }
10851:
10852: /**
1.181 daniel 10853: * xmlRecoverDoc:
1.123 daniel 10854: * @cur: a pointer to an array of xmlChar
1.59 daniel 10855: *
10856: * parse an XML in-memory document and build a tree.
10857: * In the case the document is not Well Formed, a tree is built anyway
10858: *
1.68 daniel 10859: * Returns the resulting document tree
1.59 daniel 10860: */
10861:
1.69 daniel 10862: xmlDocPtr
1.123 daniel 10863: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 10864: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 10865: }
10866:
10867: /**
1.181 daniel 10868: * xmlCreateEntityParserCtxt:
10869: * @URL: the entity URL
10870: * @ID: the entity PUBLIC ID
10871: * @base: a posible base for the target URI
10872: *
10873: * Create a parser context for an external entity
10874: * Automatic support for ZLIB/Compress compressed document is provided
10875: * by default if found at compile-time.
10876: *
10877: * Returns the new parser context or NULL
10878: */
10879: xmlParserCtxtPtr
10880: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10881: const xmlChar *base) {
10882: xmlParserCtxtPtr ctxt;
10883: xmlParserInputPtr inputStream;
10884: char *directory = NULL;
1.210 veillard 10885: xmlChar *uri;
10886:
1.181 daniel 10887: ctxt = xmlNewParserCtxt();
10888: if (ctxt == NULL) {
10889: return(NULL);
10890: }
10891:
1.210 veillard 10892: uri = xmlBuildURI(URL, base);
10893:
10894: if (uri == NULL) {
10895: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10896: if (inputStream == NULL) {
10897: xmlFreeParserCtxt(ctxt);
10898: return(NULL);
10899: }
10900:
10901: inputPush(ctxt, inputStream);
10902:
10903: if ((ctxt->directory == NULL) && (directory == NULL))
10904: directory = xmlParserGetDirectory((char *)URL);
10905: if ((ctxt->directory == NULL) && (directory != NULL))
10906: ctxt->directory = directory;
10907: } else {
10908: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10909: if (inputStream == NULL) {
10910: xmlFreeParserCtxt(ctxt);
10911: return(NULL);
10912: }
1.181 daniel 10913:
1.210 veillard 10914: inputPush(ctxt, inputStream);
1.181 daniel 10915:
1.210 veillard 10916: if ((ctxt->directory == NULL) && (directory == NULL))
10917: directory = xmlParserGetDirectory((char *)uri);
10918: if ((ctxt->directory == NULL) && (directory != NULL))
10919: ctxt->directory = directory;
10920: xmlFree(uri);
10921: }
1.181 daniel 10922:
10923: return(ctxt);
10924: }
10925:
10926: /**
10927: * xmlCreateFileParserCtxt:
1.50 daniel 10928: * @filename: the filename
10929: *
1.69 daniel 10930: * Create a parser context for a file content.
10931: * Automatic support for ZLIB/Compress compressed document is provided
10932: * by default if found at compile-time.
1.50 daniel 10933: *
1.69 daniel 10934: * Returns the new parser context or NULL
1.9 httpng 10935: */
1.69 daniel 10936: xmlParserCtxtPtr
10937: xmlCreateFileParserCtxt(const char *filename)
10938: {
10939: xmlParserCtxtPtr ctxt;
1.40 daniel 10940: xmlParserInputPtr inputStream;
1.91 daniel 10941: xmlParserInputBufferPtr buf;
1.111 daniel 10942: char *directory = NULL;
1.9 httpng 10943:
1.91 daniel 10944: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10945: if (buf == NULL) return(NULL);
1.9 httpng 10946:
1.97 daniel 10947: ctxt = xmlNewParserCtxt();
1.16 daniel 10948: if (ctxt == NULL) {
10949: return(NULL);
10950: }
1.97 daniel 10951:
1.96 daniel 10952: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10953: if (inputStream == NULL) {
1.97 daniel 10954: xmlFreeParserCtxt(ctxt);
1.40 daniel 10955: return(NULL);
10956: }
10957:
1.119 daniel 10958: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10959: inputStream->buf = buf;
10960: inputStream->base = inputStream->buf->buffer->content;
10961: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10962:
1.40 daniel 10963: inputPush(ctxt, inputStream);
1.110 daniel 10964: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10965: directory = xmlParserGetDirectory(filename);
10966: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10967: ctxt->directory = directory;
1.106 daniel 10968:
1.69 daniel 10969: return(ctxt);
10970: }
10971:
10972: /**
1.181 daniel 10973: * xmlSAXParseFile:
1.69 daniel 10974: * @sax: the SAX handler block
10975: * @filename: the filename
10976: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10977: * documents
10978: *
10979: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10980: * compressed document is provided by default if found at compile-time.
10981: * It use the given SAX function block to handle the parsing callback.
10982: * If sax is NULL, fallback to the default DOM tree building routines.
10983: *
10984: * Returns the resulting document tree
10985: */
10986:
1.79 daniel 10987: xmlDocPtr
10988: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10989: int recovery) {
10990: xmlDocPtr ret;
10991: xmlParserCtxtPtr ctxt;
1.111 daniel 10992: char *directory = NULL;
1.69 daniel 10993:
10994: ctxt = xmlCreateFileParserCtxt(filename);
10995: if (ctxt == NULL) return(NULL);
1.74 daniel 10996: if (sax != NULL) {
1.93 veillard 10997: if (ctxt->sax != NULL)
1.119 daniel 10998: xmlFree(ctxt->sax);
1.74 daniel 10999: ctxt->sax = sax;
11000: ctxt->userData = NULL;
11001: }
1.106 daniel 11002:
1.110 daniel 11003: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 11004: directory = xmlParserGetDirectory(filename);
11005: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 11006: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 11007:
11008: xmlParseDocument(ctxt);
1.40 daniel 11009:
1.72 daniel 11010: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 11011: else {
11012: ret = NULL;
1.72 daniel 11013: xmlFreeDoc(ctxt->myDoc);
11014: ctxt->myDoc = NULL;
1.59 daniel 11015: }
1.86 daniel 11016: if (sax != NULL)
11017: ctxt->sax = NULL;
1.69 daniel 11018: xmlFreeParserCtxt(ctxt);
1.20 daniel 11019:
11020: return(ret);
11021: }
11022:
1.55 daniel 11023: /**
1.181 daniel 11024: * xmlParseFile:
1.55 daniel 11025: * @filename: the filename
11026: *
11027: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11028: * compressed document is provided by default if found at compile-time.
11029: *
1.68 daniel 11030: * Returns the resulting document tree
1.55 daniel 11031: */
11032:
1.79 daniel 11033: xmlDocPtr
11034: xmlParseFile(const char *filename) {
1.59 daniel 11035: return(xmlSAXParseFile(NULL, filename, 0));
11036: }
11037:
11038: /**
1.181 daniel 11039: * xmlRecoverFile:
1.59 daniel 11040: * @filename: the filename
11041: *
11042: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11043: * compressed document is provided by default if found at compile-time.
11044: * In the case the document is not Well Formed, a tree is built anyway
11045: *
1.68 daniel 11046: * Returns the resulting document tree
1.59 daniel 11047: */
11048:
1.79 daniel 11049: xmlDocPtr
11050: xmlRecoverFile(const char *filename) {
1.59 daniel 11051: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 11052: }
1.32 daniel 11053:
1.50 daniel 11054: /**
1.181 daniel 11055: * xmlCreateMemoryParserCtxt:
11056: * @buffer: a pointer to a zero terminated char array
11057: * @size: the size of the array (without the trailing 0)
1.50 daniel 11058: *
1.69 daniel 11059: * Create a parser context for an XML in-memory document.
1.50 daniel 11060: *
1.69 daniel 11061: * Returns the new parser context or NULL
1.20 daniel 11062: */
1.69 daniel 11063: xmlParserCtxtPtr
11064: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 11065: xmlParserCtxtPtr ctxt;
1.40 daniel 11066: xmlParserInputPtr input;
1.209 veillard 11067: xmlParserInputBufferPtr buf;
1.40 daniel 11068:
1.179 daniel 11069: if (buffer[size] != 0)
1.181 daniel 11070: return(NULL);
1.40 daniel 11071:
1.97 daniel 11072: ctxt = xmlNewParserCtxt();
1.181 daniel 11073: if (ctxt == NULL)
1.20 daniel 11074: return(NULL);
1.97 daniel 11075:
1.209 veillard 11076: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
11077: if (buf == NULL) return(NULL);
11078:
1.96 daniel 11079: input = xmlNewInputStream(ctxt);
1.40 daniel 11080: if (input == NULL) {
1.97 daniel 11081: xmlFreeParserCtxt(ctxt);
1.40 daniel 11082: return(NULL);
11083: }
1.20 daniel 11084:
1.40 daniel 11085: input->filename = NULL;
1.209 veillard 11086: input->buf = buf;
11087: input->base = input->buf->buffer->content;
11088: input->cur = input->buf->buffer->content;
1.20 daniel 11089:
1.40 daniel 11090: inputPush(ctxt, input);
1.69 daniel 11091: return(ctxt);
11092: }
11093:
11094: /**
1.181 daniel 11095: * xmlSAXParseMemory:
1.69 daniel 11096: * @sax: the SAX handler block
11097: * @buffer: an pointer to a char array
1.127 daniel 11098: * @size: the size of the array
11099: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 11100: * documents
11101: *
11102: * parse an XML in-memory block and use the given SAX function block
11103: * to handle the parsing callback. If sax is NULL, fallback to the default
11104: * DOM tree building routines.
11105: *
11106: * Returns the resulting document tree
11107: */
11108: xmlDocPtr
11109: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
11110: xmlDocPtr ret;
11111: xmlParserCtxtPtr ctxt;
11112:
11113: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11114: if (ctxt == NULL) return(NULL);
1.74 daniel 11115: if (sax != NULL) {
11116: ctxt->sax = sax;
11117: ctxt->userData = NULL;
11118: }
1.20 daniel 11119:
11120: xmlParseDocument(ctxt);
1.40 daniel 11121:
1.72 daniel 11122: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 11123: else {
11124: ret = NULL;
1.72 daniel 11125: xmlFreeDoc(ctxt->myDoc);
11126: ctxt->myDoc = NULL;
1.59 daniel 11127: }
1.86 daniel 11128: if (sax != NULL)
11129: ctxt->sax = NULL;
1.69 daniel 11130: xmlFreeParserCtxt(ctxt);
1.16 daniel 11131:
1.9 httpng 11132: return(ret);
1.17 daniel 11133: }
11134:
1.55 daniel 11135: /**
1.181 daniel 11136: * xmlParseMemory:
1.68 daniel 11137: * @buffer: an pointer to a char array
1.55 daniel 11138: * @size: the size of the array
11139: *
11140: * parse an XML in-memory block and build a tree.
11141: *
1.68 daniel 11142: * Returns the resulting document tree
1.55 daniel 11143: */
11144:
11145: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 11146: return(xmlSAXParseMemory(NULL, buffer, size, 0));
11147: }
11148:
11149: /**
1.181 daniel 11150: * xmlRecoverMemory:
1.68 daniel 11151: * @buffer: an pointer to a char array
1.59 daniel 11152: * @size: the size of the array
11153: *
11154: * parse an XML in-memory block and build a tree.
11155: * In the case the document is not Well Formed, a tree is built anyway
11156: *
1.68 daniel 11157: * Returns the resulting document tree
1.59 daniel 11158: */
11159:
11160: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
11161: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 11162: }
11163:
11164:
1.50 daniel 11165: /**
11166: * xmlSetupParserForBuffer:
11167: * @ctxt: an XML parser context
1.123 daniel 11168: * @buffer: a xmlChar * buffer
1.50 daniel 11169: * @filename: a file name
11170: *
1.19 daniel 11171: * Setup the parser context to parse a new buffer; Clears any prior
11172: * contents from the parser context. The buffer parameter must not be
11173: * NULL, but the filename parameter can be
11174: */
1.55 daniel 11175: void
1.123 daniel 11176: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 11177: const char* filename)
11178: {
1.96 daniel 11179: xmlParserInputPtr input;
1.40 daniel 11180:
1.96 daniel 11181: input = xmlNewInputStream(ctxt);
11182: if (input == NULL) {
11183: perror("malloc");
1.119 daniel 11184: xmlFree(ctxt);
1.145 daniel 11185: return;
1.96 daniel 11186: }
11187:
11188: xmlClearParserCtxt(ctxt);
11189: if (filename != NULL)
1.119 daniel 11190: input->filename = xmlMemStrdup(filename);
1.96 daniel 11191: input->base = buffer;
11192: input->cur = buffer;
11193: inputPush(ctxt, input);
1.17 daniel 11194: }
11195:
1.123 daniel 11196: /**
11197: * xmlSAXUserParseFile:
11198: * @sax: a SAX handler
11199: * @user_data: The user data returned on SAX callbacks
11200: * @filename: a file name
11201: *
11202: * parse an XML file and call the given SAX handler routines.
11203: * Automatic support for ZLIB/Compress compressed document is provided
11204: *
11205: * Returns 0 in case of success or a error number otherwise
11206: */
1.131 daniel 11207: int
11208: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11209: const char *filename) {
1.123 daniel 11210: int ret = 0;
11211: xmlParserCtxtPtr ctxt;
11212:
11213: ctxt = xmlCreateFileParserCtxt(filename);
11214: if (ctxt == NULL) return -1;
1.134 daniel 11215: if (ctxt->sax != &xmlDefaultSAXHandler)
11216: xmlFree(ctxt->sax);
1.123 daniel 11217: ctxt->sax = sax;
1.140 daniel 11218: if (user_data != NULL)
11219: ctxt->userData = user_data;
1.123 daniel 11220:
11221: xmlParseDocument(ctxt);
11222:
11223: if (ctxt->wellFormed)
11224: ret = 0;
11225: else {
11226: if (ctxt->errNo != 0)
11227: ret = ctxt->errNo;
11228: else
11229: ret = -1;
11230: }
11231: if (sax != NULL)
11232: ctxt->sax = NULL;
11233: xmlFreeParserCtxt(ctxt);
11234:
11235: return ret;
11236: }
11237:
11238: /**
11239: * xmlSAXUserParseMemory:
11240: * @sax: a SAX handler
11241: * @user_data: The user data returned on SAX callbacks
11242: * @buffer: an in-memory XML document input
1.127 daniel 11243: * @size: the length of the XML document in bytes
1.123 daniel 11244: *
11245: * A better SAX parsing routine.
11246: * parse an XML in-memory buffer and call the given SAX handler routines.
11247: *
11248: * Returns 0 in case of success or a error number otherwise
11249: */
11250: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
11251: char *buffer, int size) {
11252: int ret = 0;
11253: xmlParserCtxtPtr ctxt;
1.216 ! veillard 11254: xmlSAXHandlerPtr oldsax;
1.123 daniel 11255:
11256: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11257: if (ctxt == NULL) return -1;
1.216 ! veillard 11258: if (sax != NULL) {
! 11259: oldsax = ctxt->sax;
! 11260: ctxt->sax = sax;
! 11261: }
1.123 daniel 11262: ctxt->userData = user_data;
11263:
11264: xmlParseDocument(ctxt);
11265:
11266: if (ctxt->wellFormed)
11267: ret = 0;
11268: else {
11269: if (ctxt->errNo != 0)
11270: ret = ctxt->errNo;
11271: else
11272: ret = -1;
11273: }
1.216 ! veillard 11274: if (sax != NULL) {
! 11275: ctxt->sax = oldsax;
! 11276: }
1.123 daniel 11277: xmlFreeParserCtxt(ctxt);
11278:
11279: return ret;
11280: }
11281:
1.32 daniel 11282:
1.98 daniel 11283: /************************************************************************
11284: * *
1.127 daniel 11285: * Miscellaneous *
1.98 daniel 11286: * *
11287: ************************************************************************/
11288:
1.132 daniel 11289: /**
11290: * xmlCleanupParser:
11291: *
11292: * Cleanup function for the XML parser. It tries to reclaim all
11293: * parsing related global memory allocated for the parser processing.
11294: * It doesn't deallocate any document related memory. Calling this
11295: * function should not prevent reusing the parser.
11296: */
11297:
11298: void
11299: xmlCleanupParser(void) {
11300: xmlCleanupCharEncodingHandlers();
1.133 daniel 11301: xmlCleanupPredefinedEntities();
1.132 daniel 11302: }
1.98 daniel 11303:
1.50 daniel 11304: /**
11305: * xmlParserFindNodeInfo:
11306: * @ctxt: an XML parser context
11307: * @node: an XML node within the tree
11308: *
11309: * Find the parser node info struct for a given node
11310: *
1.68 daniel 11311: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 11312: */
11313: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
11314: const xmlNode* node)
11315: {
11316: unsigned long pos;
11317:
11318: /* Find position where node should be at */
11319: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
11320: if ( ctx->node_seq.buffer[pos].node == node )
11321: return &ctx->node_seq.buffer[pos];
11322: else
11323: return NULL;
11324: }
11325:
11326:
1.50 daniel 11327: /**
1.181 daniel 11328: * xmlInitNodeInfoSeq:
1.50 daniel 11329: * @seq: a node info sequence pointer
11330: *
11331: * -- Initialize (set to initial state) node info sequence
1.32 daniel 11332: */
1.55 daniel 11333: void
11334: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 11335: {
11336: seq->length = 0;
11337: seq->maximum = 0;
11338: seq->buffer = NULL;
11339: }
11340:
1.50 daniel 11341: /**
1.181 daniel 11342: * xmlClearNodeInfoSeq:
1.50 daniel 11343: * @seq: a node info sequence pointer
11344: *
11345: * -- Clear (release memory and reinitialize) node
1.32 daniel 11346: * info sequence
11347: */
1.55 daniel 11348: void
11349: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 11350: {
11351: if ( seq->buffer != NULL )
1.119 daniel 11352: xmlFree(seq->buffer);
1.32 daniel 11353: xmlInitNodeInfoSeq(seq);
11354: }
11355:
11356:
1.50 daniel 11357: /**
11358: * xmlParserFindNodeInfoIndex:
11359: * @seq: a node info sequence pointer
11360: * @node: an XML node pointer
11361: *
11362: *
1.32 daniel 11363: * xmlParserFindNodeInfoIndex : Find the index that the info record for
11364: * the given node is or should be at in a sorted sequence
1.68 daniel 11365: *
11366: * Returns a long indicating the position of the record
1.32 daniel 11367: */
11368: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
11369: const xmlNode* node)
11370: {
11371: unsigned long upper, lower, middle;
11372: int found = 0;
11373:
11374: /* Do a binary search for the key */
11375: lower = 1;
11376: upper = seq->length;
11377: middle = 0;
11378: while ( lower <= upper && !found) {
11379: middle = lower + (upper - lower) / 2;
11380: if ( node == seq->buffer[middle - 1].node )
11381: found = 1;
11382: else if ( node < seq->buffer[middle - 1].node )
11383: upper = middle - 1;
11384: else
11385: lower = middle + 1;
11386: }
11387:
11388: /* Return position */
11389: if ( middle == 0 || seq->buffer[middle - 1].node < node )
11390: return middle;
11391: else
11392: return middle - 1;
11393: }
11394:
11395:
1.50 daniel 11396: /**
11397: * xmlParserAddNodeInfo:
11398: * @ctxt: an XML parser context
1.68 daniel 11399: * @info: a node info sequence pointer
1.50 daniel 11400: *
11401: * Insert node info record into the sorted sequence
1.32 daniel 11402: */
1.55 daniel 11403: void
11404: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 11405: const xmlParserNodeInfo* info)
1.32 daniel 11406: {
11407: unsigned long pos;
11408: static unsigned int block_size = 5;
11409:
11410: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 11411: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
11412: if ( pos < ctxt->node_seq.length
11413: && ctxt->node_seq.buffer[pos].node == info->node ) {
11414: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 11415: }
11416:
11417: /* Otherwise, we need to add new node to buffer */
11418: else {
11419: /* Expand buffer by 5 if needed */
1.55 daniel 11420: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 11421: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 11422: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
11423: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 11424:
1.55 daniel 11425: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 11426: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 11427: else
1.119 daniel 11428: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 11429:
11430: if ( tmp_buffer == NULL ) {
1.55 daniel 11431: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 11432: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 11433: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 11434: return;
11435: }
1.55 daniel 11436: ctxt->node_seq.buffer = tmp_buffer;
11437: ctxt->node_seq.maximum += block_size;
1.32 daniel 11438: }
11439:
11440: /* If position is not at end, move elements out of the way */
1.55 daniel 11441: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 11442: unsigned long i;
11443:
1.55 daniel 11444: for ( i = ctxt->node_seq.length; i > pos; i-- )
11445: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 11446: }
11447:
11448: /* Copy element and increase length */
1.55 daniel 11449: ctxt->node_seq.buffer[pos] = *info;
11450: ctxt->node_seq.length++;
1.32 daniel 11451: }
11452: }
1.77 daniel 11453:
1.98 daniel 11454:
11455: /**
1.181 daniel 11456: * xmlSubstituteEntitiesDefault:
1.98 daniel 11457: * @val: int 0 or 1
11458: *
11459: * Set and return the previous value for default entity support.
11460: * Initially the parser always keep entity references instead of substituting
11461: * entity values in the output. This function has to be used to change the
11462: * default parser behaviour
11463: * SAX::subtituteEntities() has to be used for changing that on a file by
11464: * file basis.
11465: *
11466: * Returns the last value for 0 for no substitution, 1 for substitution.
11467: */
11468:
11469: int
11470: xmlSubstituteEntitiesDefault(int val) {
11471: int old = xmlSubstituteEntitiesDefaultValue;
11472:
11473: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 11474: return(old);
11475: }
11476:
11477: /**
11478: * xmlKeepBlanksDefault:
11479: * @val: int 0 or 1
11480: *
11481: * Set and return the previous value for default blanks text nodes support.
11482: * The 1.x version of the parser used an heuristic to try to detect
11483: * ignorable white spaces. As a result the SAX callback was generating
11484: * ignorableWhitespace() callbacks instead of characters() one, and when
11485: * using the DOM output text nodes containing those blanks were not generated.
11486: * The 2.x and later version will switch to the XML standard way and
11487: * ignorableWhitespace() are only generated when running the parser in
11488: * validating mode and when the current element doesn't allow CDATA or
11489: * mixed content.
11490: * This function is provided as a way to force the standard behaviour
11491: * on 1.X libs and to switch back to the old mode for compatibility when
11492: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
11493: * by using xmlIsBlankNode() commodity function to detect the "empty"
11494: * nodes generated.
11495: * This value also affect autogeneration of indentation when saving code
11496: * if blanks sections are kept, indentation is not generated.
11497: *
11498: * Returns the last value for 0 for no substitution, 1 for substitution.
11499: */
11500:
11501: int
11502: xmlKeepBlanksDefault(int val) {
11503: int old = xmlKeepBlanksDefaultValue;
11504:
11505: xmlKeepBlanksDefaultValue = val;
11506: xmlIndentTreeOutput = !val;
1.98 daniel 11507: return(old);
11508: }
1.77 daniel 11509:
Webmaster