Annotation of XML/entities.c, revision 1.68
1.1 httpng 1: /*
2: * entities.c : implementation for the XML entities handking
1.9 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.23 daniel 6: * Daniel.Veillard@w3.org
1.1 httpng 7: */
8:
1.42 daniel 9: #ifdef WIN32
10: #include "win32config.h"
11: #else
1.37 daniel 12: #include "config.h"
13: #endif
14:
1.1 httpng 15: #include <stdio.h>
1.37 daniel 16: #include <string.h>
17: #ifdef HAVE_STDLIB_H
1.17 daniel 18: #include <stdlib.h>
1.37 daniel 19: #endif
1.56 daniel 20: #include <libxml/xmlmemory.h>
1.67 veillard 21: #include <libxml/hash.h>
1.56 daniel 22: #include <libxml/entities.h>
23: #include <libxml/parser.h>
1.1 httpng 24:
1.54 daniel 25: #define DEBUG_ENT_REF /* debugging of cross entities dependancies */
1.61 veillard 26: #define ENTITY_HASH_SIZE 256 /* modify xmlEntityComputeHash accordingly */
27:
28: /*
29: * xmlEntityComputeHash:
30: *
31: * Computes the hash value for this given entity
32: */
33: int
34: xmlEntityComputeHash(const xmlChar *name) {
35: register const unsigned char *cur = (const unsigned char *) name;
36: register unsigned char val = 0;
37:
38: if (name == NULL)
39: return(val);
40: while (*cur) val += *cur++;
41: return(val);
42: }
1.54 daniel 43:
1.1 httpng 44: /*
1.15 daniel 45: * The XML predefined entities.
46: */
47:
48: struct xmlPredefinedEntityValue {
49: const char *name;
50: const char *value;
51: };
52: struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
53: { "lt", "<" },
54: { "gt", ">" },
55: { "apos", "'" },
56: { "quot", "\"" },
57: { "amp", "&" }
58: };
59:
1.61 veillard 60: /*
61: * TODO: !!!!!!! This is GROSS, allocation of a 256 entry hash for
62: * a fixed number of 4 elements !
63: */
1.67 veillard 64: xmlHashTablePtr xmlPredefinedEntities = NULL;
1.15 daniel 65:
66: /*
1.2 httpng 67: * xmlFreeEntity : clean-up an entity record.
1.1 httpng 68: */
1.2 httpng 69: void xmlFreeEntity(xmlEntityPtr entity) {
70: if (entity == NULL) return;
71:
1.52 daniel 72: if (entity->children)
73: xmlFreeNodeList(entity->children);
1.11 daniel 74: if (entity->name != NULL)
1.36 daniel 75: xmlFree((char *) entity->name);
1.14 daniel 76: if (entity->ExternalID != NULL)
1.36 daniel 77: xmlFree((char *) entity->ExternalID);
1.14 daniel 78: if (entity->SystemID != NULL)
1.36 daniel 79: xmlFree((char *) entity->SystemID);
1.65 veillard 80: if (entity->URI != NULL)
81: xmlFree((char *) entity->URI);
1.14 daniel 82: if (entity->content != NULL)
1.36 daniel 83: xmlFree((char *) entity->content);
1.27 daniel 84: if (entity->orig != NULL)
1.36 daniel 85: xmlFree((char *) entity->orig);
1.14 daniel 86: memset(entity, -1, sizeof(xmlEntity));
1.51 daniel 87: xmlFree(entity);
1.2 httpng 88: }
1.1 httpng 89:
90: /*
1.22 daniel 91: * xmlAddEntity : register a new entity for an entities table.
1.1 httpng 92: */
1.51 daniel 93: static xmlEntityPtr
1.67 veillard 94: xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
1.61 veillard 95: const xmlChar *ExternalID, const xmlChar *SystemID,
96: const xmlChar *content) {
1.67 veillard 97: xmlEntitiesTablePtr table = NULL;
1.51 daniel 98: xmlEntityPtr ret;
1.1 httpng 99:
1.61 veillard 100: if (name == NULL)
101: return(NULL);
1.67 veillard 102: switch (type) {
103: case XML_INTERNAL_GENERAL_ENTITY:
104: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
105: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
106: if (dtd->entities == NULL)
107: dtd->entities = xmlHashCreate(0);
108: table = dtd->entities;
109: break;
110: case XML_INTERNAL_PARAMETER_ENTITY:
111: case XML_EXTERNAL_PARAMETER_ENTITY:
112: if (dtd->pentities == NULL)
113: dtd->pentities = xmlHashCreate(0);
114: table = dtd->pentities;
115: break;
116: case XML_INTERNAL_PREDEFINED_ENTITY:
117: if (xmlPredefinedEntities == NULL)
118: xmlPredefinedEntities = xmlHashCreate(8);
119: table = xmlPredefinedEntities;
1.61 veillard 120: }
1.67 veillard 121: if (table == NULL)
122: return(NULL);
1.51 daniel 123: ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
124: if (ret == NULL) {
125: fprintf(stderr, "xmlAddEntity: out of memory\n");
126: return(NULL);
127: }
128: memset(ret, 0, sizeof(xmlEntity));
129: ret->type = XML_ENTITY_DECL;
130:
131: /*
132: * fill the structure.
133: */
134: ret->name = xmlStrdup(name);
1.58 veillard 135: ret->etype = (xmlEntityType) type;
1.13 daniel 136: if (ExternalID != NULL)
1.51 daniel 137: ret->ExternalID = xmlStrdup(ExternalID);
1.13 daniel 138: if (SystemID != NULL)
1.51 daniel 139: ret->SystemID = xmlStrdup(SystemID);
1.43 daniel 140: if (content != NULL) {
1.51 daniel 141: ret->length = xmlStrlen(content);
142: ret->content = xmlStrndup(content, ret->length);
1.43 daniel 143: } else {
1.51 daniel 144: ret->length = 0;
145: ret->content = NULL;
1.43 daniel 146: }
1.62 veillard 147: ret->URI = NULL; /* to be computed by the layer knowing
148: the defining entity */
1.51 daniel 149: ret->orig = NULL;
150:
1.67 veillard 151: if (xmlHashAddEntry(table, name, ret)) {
152: /*
153: * entity was already defined at another level.
154: */
155: xmlFreeEntity(ret);
1.68 ! veillard 156: return(NULL);
1.67 veillard 157: }
1.51 daniel 158: return(ret);
1.2 httpng 159: }
1.1 httpng 160:
1.22 daniel 161: /**
162: * xmlInitializePredefinedEntities:
163: *
164: * Set up the predefined entities.
1.15 daniel 165: */
166: void xmlInitializePredefinedEntities(void) {
167: int i;
1.38 daniel 168: xmlChar name[50];
169: xmlChar value[50];
1.15 daniel 170: const char *in;
1.38 daniel 171: xmlChar *out;
1.15 daniel 172:
173: if (xmlPredefinedEntities != NULL) return;
174:
175: xmlPredefinedEntities = xmlCreateEntitiesTable();
176: for (i = 0;i < sizeof(xmlPredefinedEntityValues) /
177: sizeof(xmlPredefinedEntityValues[0]);i++) {
178: in = xmlPredefinedEntityValues[i].name;
179: out = &name[0];
1.38 daniel 180: for (;(*out++ = (xmlChar) *in);)in++;
1.15 daniel 181: in = xmlPredefinedEntityValues[i].value;
182: out = &value[0];
1.38 daniel 183: for (;(*out++ = (xmlChar) *in);)in++;
1.67 veillard 184:
185: xmlAddEntity(NULL, (const xmlChar *) &name[0],
1.18 daniel 186: XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
1.15 daniel 187: &value[0]);
188: }
189: }
1.17 daniel 190:
191: /**
1.40 daniel 192: * xmlCleanupPredefinedEntities:
193: *
194: * Cleanup up the predefined entities table.
195: */
196: void xmlCleanupPredefinedEntities(void) {
197: if (xmlPredefinedEntities == NULL) return;
198:
199: xmlFreeEntitiesTable(xmlPredefinedEntities);
200: xmlPredefinedEntities = NULL;
201: }
202:
203: /**
1.17 daniel 204: * xmlGetPredefinedEntity:
205: * @name: the entity name
206: *
207: * Check whether this name is an predefined entity.
208: *
1.24 daniel 209: * Returns NULL if not, othervise the entity
1.17 daniel 210: */
211: xmlEntityPtr
1.38 daniel 212: xmlGetPredefinedEntity(const xmlChar *name) {
1.17 daniel 213: if (xmlPredefinedEntities == NULL)
214: xmlInitializePredefinedEntities();
1.67 veillard 215: return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name));
1.17 daniel 216: }
217:
1.22 daniel 218: /**
219: * xmlAddDtdEntity:
220: * @doc: the document
221: * @name: the entity name
222: * @type: the entity type XML_xxx_yyy_ENTITY
223: * @ExternalID: the entity external ID if available
224: * @SystemID: the entity system ID if available
225: * @content: the entity content
226: *
1.51 daniel 227: * Register a new entity for this document DTD external subset.
228: *
229: * Returns a pointer to the entity or NULL in case of error
1.1 httpng 230: */
1.51 daniel 231: xmlEntityPtr
1.38 daniel 232: xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
1.51 daniel 233: const xmlChar *ExternalID, const xmlChar *SystemID,
234: const xmlChar *content) {
235: xmlEntityPtr ret;
236: xmlDtdPtr dtd;
1.1 httpng 237:
1.51 daniel 238: if (doc == NULL) {
239: fprintf(stderr,
240: "xmlAddDtdEntity: doc == NULL !\n");
241: return(NULL);
242: }
1.22 daniel 243: if (doc->extSubset == NULL) {
244: fprintf(stderr,
245: "xmlAddDtdEntity: document without external subset !\n");
1.51 daniel 246: return(NULL);
1.16 daniel 247: }
1.51 daniel 248: dtd = doc->extSubset;
1.67 veillard 249: ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
1.51 daniel 250: if (ret == NULL) return(NULL);
251:
252: /*
253: * Link it to the Dtd
254: */
255: ret->parent = dtd;
256: ret->doc = dtd->doc;
257: if (dtd->last == NULL) {
258: dtd->children = dtd->last = (xmlNodePtr) ret;
259: } else {
260: dtd->last->next = (xmlNodePtr) ret;
261: ret->prev = dtd->last;
262: dtd->last = (xmlNodePtr) ret;
263: }
264: return(ret);
1.1 httpng 265: }
266:
1.22 daniel 267: /**
268: * xmlAddDocEntity:
269: * @doc: the document
270: * @name: the entity name
271: * @type: the entity type XML_xxx_yyy_ENTITY
272: * @ExternalID: the entity external ID if available
273: * @SystemID: the entity system ID if available
274: * @content: the entity content
275: *
276: * Register a new entity for this document.
1.51 daniel 277: *
278: * Returns a pointer to the entity or NULL in case of error
1.1 httpng 279: */
1.51 daniel 280: xmlEntityPtr
1.38 daniel 281: xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
1.51 daniel 282: const xmlChar *ExternalID, const xmlChar *SystemID,
283: const xmlChar *content) {
284: xmlEntityPtr ret;
285: xmlDtdPtr dtd;
1.16 daniel 286:
1.22 daniel 287: if (doc == NULL) {
288: fprintf(stderr,
289: "xmlAddDocEntity: document is NULL !\n");
1.51 daniel 290: return(NULL);
1.22 daniel 291: }
292: if (doc->intSubset == NULL) {
293: fprintf(stderr,
294: "xmlAddDtdEntity: document without internal subset !\n");
1.51 daniel 295: return(NULL);
1.22 daniel 296: }
1.51 daniel 297: dtd = doc->intSubset;
1.67 veillard 298: ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
1.51 daniel 299: if (ret == NULL) return(NULL);
300:
301: /*
302: * Link it to the Dtd
303: */
304: ret->parent = dtd;
305: ret->doc = dtd->doc;
306: if (dtd->last == NULL) {
307: dtd->children = dtd->last = (xmlNodePtr) ret;
308: } else {
309: dtd->last->next = (xmlNodePtr) ret;
310: ret->prev = dtd->last;
311: dtd->last = (xmlNodePtr) ret;
312: }
313: return(ret);
1.54 daniel 314: }
315:
1.61 veillard 316: /**
317: * xmlGetEntityFromTable:
318: * @table: an entity table
319: * @name: the entity name
320: * @parameter: look for parameter entities
321: *
322: * Do an entity lookup in the table.
323: * returns the corresponding parameter entity, if found.
324: *
325: * Returns A pointer to the entity structure or NULL if not found.
326: */
327: xmlEntityPtr
1.67 veillard 328: xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
329: return((xmlEntityPtr) xmlHashLookup(table, name));
1.61 veillard 330: }
331:
1.22 daniel 332: /**
1.30 daniel 333: * xmlGetParameterEntity:
334: * @doc: the document referencing the entity
335: * @name: the entity name
336: *
337: * Do an entity lookup in the internal and external subsets and
338: * returns the corresponding parameter entity, if found.
339: *
340: * Returns A pointer to the entity structure or NULL if not found.
341: */
342: xmlEntityPtr
1.38 daniel 343: xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
1.30 daniel 344: xmlEntitiesTablePtr table;
1.61 veillard 345: xmlEntityPtr ret;
1.30 daniel 346:
1.67 veillard 347: if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
348: table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
349: ret = xmlGetEntityFromTable(table, name);
1.61 veillard 350: if (ret != NULL)
351: return(ret);
1.30 daniel 352: }
1.67 veillard 353: if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
354: table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
355: return(xmlGetEntityFromTable(table, name));
1.35 daniel 356: }
1.30 daniel 357: return(NULL);
358: }
359:
360: /**
1.22 daniel 361: * xmlGetDtdEntity:
362: * @doc: the document referencing the entity
363: * @name: the entity name
364: *
365: * Do an entity lookup in the Dtd entity hash table and
366: * returns the corresponding entity, if found.
367: *
1.24 daniel 368: * Returns A pointer to the entity structure or NULL if not found.
1.1 httpng 369: */
1.22 daniel 370: xmlEntityPtr
1.38 daniel 371: xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
1.2 httpng 372: xmlEntitiesTablePtr table;
373:
1.22 daniel 374: if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
375: table = (xmlEntitiesTablePtr) doc->extSubset->entities;
1.67 veillard 376: return(xmlGetEntityFromTable(table, name));
1.15 daniel 377: }
1.7 veillard 378: return(NULL);
1.3 httpng 379: }
380:
1.22 daniel 381: /**
382: * xmlGetDocEntity:
383: * @doc: the document referencing the entity
384: * @name: the entity name
385: *
386: * Do an entity lookup in the document entity hash table and
387: * returns the corrsponding entity, otherwise a lookup is done
388: * in the predefined entities too.
389: *
1.24 daniel 390: * Returns A pointer to the entity structure or NULL if not found.
1.14 daniel 391: */
1.22 daniel 392: xmlEntityPtr
1.38 daniel 393: xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
1.16 daniel 394: xmlEntityPtr cur;
1.14 daniel 395: xmlEntitiesTablePtr table;
396:
1.65 veillard 397: if (doc != NULL) {
398: if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
399: table = (xmlEntitiesTablePtr) doc->intSubset->entities;
1.67 veillard 400: cur = xmlGetEntityFromTable(table, name);
1.65 veillard 401: if (cur != NULL)
402: return(cur);
403: }
404: if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
405: table = (xmlEntitiesTablePtr) doc->extSubset->entities;
1.67 veillard 406: cur = xmlGetEntityFromTable(table, name);
1.65 veillard 407: if (cur != NULL)
408: return(cur);
409: }
1.34 daniel 410: }
1.15 daniel 411: if (xmlPredefinedEntities == NULL)
412: xmlInitializePredefinedEntities();
1.16 daniel 413: table = xmlPredefinedEntities;
1.67 veillard 414: return(xmlGetEntityFromTable(table, name));
1.1 httpng 415: }
416:
417: /*
1.21 daniel 418: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
419: * | [#x10000-#x10FFFF]
420: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
421: */
422: #define IS_CHAR(c) \
423: (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
424: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
425:
1.28 daniel 426: /*
427: * A buffer used for converting entities to their equivalent and back.
428: */
429: static int buffer_size = 0;
1.38 daniel 430: static xmlChar *buffer = NULL;
1.28 daniel 431:
1.44 daniel 432: int growBuffer(void) {
1.28 daniel 433: buffer_size *= 2;
1.38 daniel 434: buffer = (xmlChar *) xmlRealloc(buffer, buffer_size * sizeof(xmlChar));
1.28 daniel 435: if (buffer == NULL) {
436: perror("realloc failed");
1.44 daniel 437: return(-1);
1.28 daniel 438: }
1.44 daniel 439: return(0);
1.28 daniel 440: }
441:
442:
1.22 daniel 443: /**
444: * xmlEncodeEntities:
445: * @doc: the document containing the string
446: * @input: A string to convert to XML.
447: *
448: * Do a global encoding of a string, replacing the predefined entities
449: * and non ASCII values with their entities and CharRef counterparts.
450: *
1.33 daniel 451: * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary
452: * compatibility
1.28 daniel 453: *
454: * People must migrate their code to xmlEncodeEntitiesReentrant !
1.31 daniel 455: * This routine will issue a warning when encountered.
1.28 daniel 456: *
457: * Returns A newly allocated string with the substitution done.
458: */
1.38 daniel 459: const xmlChar *
460: xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) {
461: const xmlChar *cur = input;
462: xmlChar *out = buffer;
1.31 daniel 463: static int warning = 1;
1.39 daniel 464: int html = 0;
465:
1.31 daniel 466:
467: if (warning) {
468: fprintf(stderr, "Deprecated API xmlEncodeEntities() used\n");
469: fprintf(stderr, " change code to use xmlEncodeEntitiesReentrant()\n");
470: warning = 0;
471: }
1.28 daniel 472:
473: if (input == NULL) return(NULL);
1.39 daniel 474: if (doc != NULL)
475: html = (doc->type == XML_HTML_DOCUMENT_NODE);
476:
1.28 daniel 477: if (buffer == NULL) {
478: buffer_size = 1000;
1.38 daniel 479: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.28 daniel 480: if (buffer == NULL) {
481: perror("malloc failed");
1.44 daniel 482: return(NULL);
1.28 daniel 483: }
484: out = buffer;
485: }
486: while (*cur != '\0') {
487: if (out - buffer > buffer_size - 100) {
488: int index = out - buffer;
489:
490: growBuffer();
491: out = &buffer[index];
492: }
493:
494: /*
495: * By default one have to encode at least '<', '>', '"' and '&' !
496: */
497: if (*cur == '<') {
498: *out++ = '&';
499: *out++ = 'l';
500: *out++ = 't';
501: *out++ = ';';
502: } else if (*cur == '>') {
503: *out++ = '&';
504: *out++ = 'g';
505: *out++ = 't';
506: *out++ = ';';
507: } else if (*cur == '&') {
508: *out++ = '&';
509: *out++ = 'a';
510: *out++ = 'm';
511: *out++ = 'p';
512: *out++ = ';';
513: } else if (*cur == '"') {
514: *out++ = '&';
515: *out++ = 'q';
516: *out++ = 'u';
517: *out++ = 'o';
518: *out++ = 't';
519: *out++ = ';';
1.39 daniel 520: } else if ((*cur == '\'') && (!html)) {
1.28 daniel 521: *out++ = '&';
522: *out++ = 'a';
523: *out++ = 'p';
524: *out++ = 'o';
525: *out++ = 's';
526: *out++ = ';';
527: } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
528: (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
529: /*
530: * default case, just copy !
531: */
532: *out++ = *cur;
533: #ifndef USE_UTF_8
1.38 daniel 534: } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
1.28 daniel 535: char buf[10], *ptr;
1.64 veillard 536:
1.28 daniel 537: #ifdef HAVE_SNPRINTF
1.64 veillard 538: snprintf(buf, sizeof(buf), "&#%d;", *cur);
1.28 daniel 539: #else
540: sprintf(buf, "&#%d;", *cur);
541: #endif
1.64 veillard 542: buf[sizeof(buf) - 1] = 0;
1.28 daniel 543: ptr = buf;
544: while (*ptr != 0) *out++ = *ptr++;
545: #endif
546: } else if (IS_CHAR(*cur)) {
547: char buf[10], *ptr;
548:
549: #ifdef HAVE_SNPRINTF
1.64 veillard 550: snprintf(buf, sizeof(buf), "&#%d;", *cur);
1.28 daniel 551: #else
552: sprintf(buf, "&#%d;", *cur);
553: #endif
1.64 veillard 554: buf[sizeof(buf) - 1] = 0;
1.28 daniel 555: ptr = buf;
556: while (*ptr != 0) *out++ = *ptr++;
557: }
558: #if 0
559: else {
560: /*
561: * default case, this is not a valid char !
562: * Skip it...
563: */
564: fprintf(stderr, "xmlEncodeEntities: invalid char %d\n", (int) *cur);
565: }
566: #endif
567: cur++;
568: }
569: *out++ = 0;
570: return(buffer);
571: }
572:
573: /*
574: * Macro used to grow the current buffer.
575: */
576: #define growBufferReentrant() { \
577: buffer_size *= 2; \
1.44 daniel 578: buffer = (xmlChar *) \
579: xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \
1.28 daniel 580: if (buffer == NULL) { \
581: perror("realloc failed"); \
1.44 daniel 582: return(NULL); \
1.28 daniel 583: } \
584: }
585:
586:
587: /**
588: * xmlEncodeEntitiesReentrant:
589: * @doc: the document containing the string
590: * @input: A string to convert to XML.
591: *
592: * Do a global encoding of a string, replacing the predefined entities
593: * and non ASCII values with their entities and CharRef counterparts.
594: * Contrary to xmlEncodeEntities, this routine is reentrant, and result
595: * must be deallocated.
596: *
1.24 daniel 597: * Returns A newly allocated string with the substitution done.
1.1 httpng 598: */
1.38 daniel 599: xmlChar *
600: xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
601: const xmlChar *cur = input;
602: xmlChar *buffer = NULL;
603: xmlChar *out = NULL;
1.26 daniel 604: int buffer_size = 0;
1.39 daniel 605: int html = 0;
1.3 httpng 606:
1.19 daniel 607: if (input == NULL) return(NULL);
1.39 daniel 608: if (doc != NULL)
609: html = (doc->type == XML_HTML_DOCUMENT_NODE);
1.26 daniel 610:
611: /*
612: * allocate an translation buffer.
613: */
614: buffer_size = 1000;
1.38 daniel 615: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.3 httpng 616: if (buffer == NULL) {
1.26 daniel 617: perror("malloc failed");
1.44 daniel 618: return(NULL);
1.3 httpng 619: }
1.26 daniel 620: out = buffer;
621:
1.6 veillard 622: while (*cur != '\0') {
623: if (out - buffer > buffer_size - 100) {
624: int index = out - buffer;
625:
1.28 daniel 626: growBufferReentrant();
1.6 veillard 627: out = &buffer[index];
628: }
629:
630: /*
1.7 veillard 631: * By default one have to encode at least '<', '>', '"' and '&' !
1.6 veillard 632: */
633: if (*cur == '<') {
634: *out++ = '&';
635: *out++ = 'l';
636: *out++ = 't';
637: *out++ = ';';
1.7 veillard 638: } else if (*cur == '>') {
639: *out++ = '&';
640: *out++ = 'g';
641: *out++ = 't';
642: *out++ = ';';
1.6 veillard 643: } else if (*cur == '&') {
644: *out++ = '&';
645: *out++ = 'a';
646: *out++ = 'm';
647: *out++ = 'p';
1.7 veillard 648: *out++ = ';';
649: } else if (*cur == '"') {
650: *out++ = '&';
651: *out++ = 'q';
652: *out++ = 'u';
653: *out++ = 'o';
654: *out++ = 't';
655: *out++ = ';';
1.53 daniel 656: #if 0
1.39 daniel 657: } else if ((*cur == '\'') && (!html)) {
1.7 veillard 658: *out++ = '&';
659: *out++ = 'a';
660: *out++ = 'p';
661: *out++ = 'o';
662: *out++ = 's';
1.6 veillard 663: *out++ = ';';
1.53 daniel 664: #endif
1.21 daniel 665: } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
666: (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
667: /*
668: * default case, just copy !
669: */
670: *out++ = *cur;
1.46 daniel 671: } else if (*cur >= 0x80) {
1.59 veillard 672: if ((doc->encoding != NULL) || (html)) {
1.46 daniel 673: /*
1.63 veillard 674: * Bjørn Reese <br@sseusa.com> provided the patch
675: xmlChar xc;
676: xc = (*cur & 0x3F) << 6;
677: if (cur[1] != 0) {
678: xc += *(++cur) & 0x3F;
679: *out++ = xc;
680: } else
1.46 daniel 681: */
1.63 veillard 682: *out++ = *cur;
1.46 daniel 683: } else {
684: /*
685: * We assume we have UTF-8 input.
686: */
687: char buf[10], *ptr;
1.48 daniel 688: int val = 0, l = 1;
1.46 daniel 689:
690: if (*cur < 0xC0) {
691: fprintf(stderr,
692: "xmlEncodeEntitiesReentrant : input not UTF-8\n");
693: doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
694: #ifdef HAVE_SNPRINTF
1.64 veillard 695: snprintf(buf, sizeof(buf), "&#%d;", *cur);
1.46 daniel 696: #else
697: sprintf(buf, "&#%d;", *cur);
698: #endif
1.64 veillard 699: buf[sizeof(buf) - 1] = 0;
1.46 daniel 700: ptr = buf;
701: while (*ptr != 0) *out++ = *ptr++;
702: continue;
703: } else if (*cur < 0xE0) {
704: val = (cur[0]) & 0x1F;
705: val <<= 6;
706: val |= (cur[1]) & 0x3F;
707: l = 2;
708: } else if (*cur < 0xF0) {
709: val = (cur[0]) & 0x0F;
710: val <<= 6;
711: val |= (cur[1]) & 0x3F;
712: val <<= 6;
713: val |= (cur[2]) & 0x3F;
714: l = 3;
715: } else if (*cur < 0xF8) {
716: val = (cur[0]) & 0x07;
717: val <<= 6;
718: val |= (cur[1]) & 0x3F;
719: val <<= 6;
720: val |= (cur[2]) & 0x3F;
721: val <<= 6;
722: val |= (cur[3]) & 0x3F;
723: l = 4;
724: }
725: if ((l == 1) || (!IS_CHAR(val))) {
726: fprintf(stderr,
727: "xmlEncodeEntitiesReentrant : char out of range\n");
728: doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
729: #ifdef HAVE_SNPRINTF
1.64 veillard 730: snprintf(buf, sizeof(buf), "&#%d;", *cur);
1.46 daniel 731: #else
732: sprintf(buf, "&#%d;", *cur);
733: #endif
1.64 veillard 734: buf[sizeof(buf) - 1] = 0;
1.46 daniel 735: ptr = buf;
736: while (*ptr != 0) *out++ = *ptr++;
737: cur++;
738: continue;
739: }
740: /*
741: * We could do multiple things here. Just save as a char ref
742: */
743: #ifdef HAVE_SNPRINTF
1.64 veillard 744: snprintf(buf, sizeof(buf), "&#x%X;", val);
1.46 daniel 745: #else
1.50 daniel 746: sprintf(buf, "&#x%X;", val);
1.46 daniel 747: #endif
1.64 veillard 748: buf[sizeof(buf) - 1] = 0;
1.47 daniel 749: ptr = buf;
750: while (*ptr != 0) *out++ = *ptr++;
751: cur += l;
752: continue;
1.45 daniel 753: }
1.21 daniel 754: } else if (IS_CHAR(*cur)) {
1.20 daniel 755: char buf[10], *ptr;
756:
757: #ifdef HAVE_SNPRINTF
1.64 veillard 758: snprintf(buf, sizeof(buf), "&#%d;", *cur);
1.20 daniel 759: #else
760: sprintf(buf, "&#%d;", *cur);
761: #endif
1.64 veillard 762: buf[sizeof(buf) - 1] = 0;
1.20 daniel 763: ptr = buf;
764: while (*ptr != 0) *out++ = *ptr++;
1.21 daniel 765: }
766: #if 0
767: else {
1.6 veillard 768: /*
1.21 daniel 769: * default case, this is not a valid char !
770: * Skip it...
1.6 veillard 771: */
1.21 daniel 772: fprintf(stderr, "xmlEncodeEntities: invalid char %d\n", (int) *cur);
1.6 veillard 773: }
1.21 daniel 774: #endif
1.6 veillard 775: cur++;
776: }
777: *out++ = 0;
778: return(buffer);
1.2 httpng 779: }
780:
1.22 daniel 781: /**
1.57 daniel 782: * xmlEncodeSpecialChars:
783: * @doc: the document containing the string
784: * @input: A string to convert to XML.
785: *
786: * Do a global encoding of a string, replacing the predefined entities
787: * this routine is reentrant, and result must be deallocated.
788: *
789: * Returns A newly allocated string with the substitution done.
790: */
791: xmlChar *
792: xmlEncodeSpecialChars(xmlDocPtr doc, const xmlChar *input) {
793: const xmlChar *cur = input;
794: xmlChar *buffer = NULL;
795: xmlChar *out = NULL;
796: int buffer_size = 0;
797: int html = 0;
798:
799: if (input == NULL) return(NULL);
800: if (doc != NULL)
801: html = (doc->type == XML_HTML_DOCUMENT_NODE);
802:
803: /*
804: * allocate an translation buffer.
805: */
806: buffer_size = 1000;
807: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
808: if (buffer == NULL) {
809: perror("malloc failed");
810: return(NULL);
811: }
812: out = buffer;
813:
814: while (*cur != '\0') {
815: if (out - buffer > buffer_size - 10) {
816: int index = out - buffer;
817:
818: growBufferReentrant();
819: out = &buffer[index];
820: }
821:
822: /*
823: * By default one have to encode at least '<', '>', '"' and '&' !
824: */
825: if (*cur == '<') {
826: *out++ = '&';
827: *out++ = 'l';
828: *out++ = 't';
829: *out++ = ';';
830: } else if (*cur == '>') {
831: *out++ = '&';
832: *out++ = 'g';
833: *out++ = 't';
834: *out++ = ';';
835: } else if (*cur == '&') {
836: *out++ = '&';
837: *out++ = 'a';
838: *out++ = 'm';
839: *out++ = 'p';
840: *out++ = ';';
841: } else if (*cur == '"') {
842: *out++ = '&';
843: *out++ = 'q';
844: *out++ = 'u';
845: *out++ = 'o';
846: *out++ = 't';
847: *out++ = ';';
848: } else {
849: /*
850: * Works because on UTF-8, all extended sequences cannot
851: * result in bytes in the ASCII range.
852: */
853: *out++ = *cur;
854: }
855: cur++;
856: }
857: *out++ = 0;
858: return(buffer);
859: }
860:
861: /**
1.22 daniel 862: * xmlCreateEntitiesTable:
863: *
864: * create and initialize an empty entities hash table.
865: *
1.24 daniel 866: * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
1.2 httpng 867: */
1.22 daniel 868: xmlEntitiesTablePtr
869: xmlCreateEntitiesTable(void) {
1.67 veillard 870: return((xmlEntitiesTablePtr) xmlHashCreate(0));
1.1 httpng 871: }
872:
1.22 daniel 873: /**
874: * xmlFreeEntitiesTable:
875: * @table: An entity table
876: *
877: * Deallocate the memory used by an entities hash table.
1.1 httpng 878: */
1.22 daniel 879: void
880: xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
1.67 veillard 881: xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntity);
1.1 httpng 882: }
883:
1.22 daniel 884: /**
1.61 veillard 885: * xmlCopyEntity:
886: * @ent: An entity
887: *
888: * Build a copy of an entity
889: *
890: * Returns the new xmlEntitiesPtr or NULL in case of error.
891: */
892: xmlEntityPtr
893: xmlCopyEntity(xmlEntityPtr ent) {
894: xmlEntityPtr cur;
895:
896: cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
897: if (cur == NULL) {
898: fprintf(stderr, "xmlCopyEntity: out of memory !\n");
899: return(NULL);
900: }
901: memset(cur, 0, sizeof(xmlEntity));
902: cur->type = XML_ELEMENT_DECL;
903:
904: cur->etype = ent->etype;
905: if (ent->name != NULL)
906: cur->name = xmlStrdup(ent->name);
907: if (ent->ExternalID != NULL)
908: cur->ExternalID = xmlStrdup(ent->ExternalID);
909: if (ent->SystemID != NULL)
910: cur->SystemID = xmlStrdup(ent->SystemID);
911: if (ent->content != NULL)
912: cur->content = xmlStrdup(ent->content);
913: if (ent->orig != NULL)
914: cur->orig = xmlStrdup(ent->orig);
915: return(cur);
916: }
917:
918: /**
1.22 daniel 919: * xmlCopyEntitiesTable:
920: * @table: An entity table
921: *
922: * Build a copy of an entity table.
923: *
1.24 daniel 924: * Returns the new xmlEntitiesTablePtr or NULL in case of error.
1.22 daniel 925: */
926: xmlEntitiesTablePtr
927: xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
1.67 veillard 928: return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity));
1.22 daniel 929: }
930:
931: /**
1.53 daniel 932: * xmlDumpEntityDecl:
933: * @buf: An XML buffer.
934: * @ent: An entity table
935: *
936: * This will dump the content of the entity table as an XML DTD definition
937: */
938: void
939: xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
940: switch (ent->etype) {
941: case XML_INTERNAL_GENERAL_ENTITY:
942: xmlBufferWriteChar(buf, "<!ENTITY ");
943: xmlBufferWriteCHAR(buf, ent->name);
944: xmlBufferWriteChar(buf, " ");
945: if (ent->orig != NULL)
946: xmlBufferWriteQuotedString(buf, ent->orig);
947: else
948: xmlBufferWriteQuotedString(buf, ent->content);
949: xmlBufferWriteChar(buf, ">\n");
950: break;
951: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
952: xmlBufferWriteChar(buf, "<!ENTITY ");
953: xmlBufferWriteCHAR(buf, ent->name);
954: if (ent->ExternalID != NULL) {
955: xmlBufferWriteChar(buf, " PUBLIC ");
956: xmlBufferWriteQuotedString(buf, ent->ExternalID);
957: xmlBufferWriteChar(buf, " ");
958: xmlBufferWriteQuotedString(buf, ent->SystemID);
959: } else {
960: xmlBufferWriteChar(buf, " SYSTEM ");
961: xmlBufferWriteQuotedString(buf, ent->SystemID);
962: }
963: xmlBufferWriteChar(buf, ">\n");
964: break;
965: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
966: xmlBufferWriteChar(buf, "<!ENTITY ");
967: xmlBufferWriteCHAR(buf, ent->name);
968: if (ent->ExternalID != NULL) {
969: xmlBufferWriteChar(buf, " PUBLIC ");
970: xmlBufferWriteQuotedString(buf, ent->ExternalID);
971: xmlBufferWriteChar(buf, " ");
972: xmlBufferWriteQuotedString(buf, ent->SystemID);
973: } else {
974: xmlBufferWriteChar(buf, " SYSTEM ");
975: xmlBufferWriteQuotedString(buf, ent->SystemID);
976: }
977: if (ent->content != NULL) { /* Should be true ! */
978: xmlBufferWriteChar(buf, " NDATA ");
979: if (ent->orig != NULL)
980: xmlBufferWriteCHAR(buf, ent->orig);
981: else
982: xmlBufferWriteCHAR(buf, ent->content);
983: }
984: xmlBufferWriteChar(buf, ">\n");
985: break;
986: case XML_INTERNAL_PARAMETER_ENTITY:
987: xmlBufferWriteChar(buf, "<!ENTITY % ");
988: xmlBufferWriteCHAR(buf, ent->name);
989: xmlBufferWriteChar(buf, " ");
990: if (ent->orig == NULL)
991: xmlBufferWriteQuotedString(buf, ent->content);
992: else
993: xmlBufferWriteQuotedString(buf, ent->orig);
994: xmlBufferWriteChar(buf, ">\n");
995: break;
996: case XML_EXTERNAL_PARAMETER_ENTITY:
997: xmlBufferWriteChar(buf, "<!ENTITY % ");
998: xmlBufferWriteCHAR(buf, ent->name);
999: if (ent->ExternalID != NULL) {
1000: xmlBufferWriteChar(buf, " PUBLIC ");
1001: xmlBufferWriteQuotedString(buf, ent->ExternalID);
1002: xmlBufferWriteChar(buf, " ");
1003: xmlBufferWriteQuotedString(buf, ent->SystemID);
1004: } else {
1005: xmlBufferWriteChar(buf, " SYSTEM ");
1006: xmlBufferWriteQuotedString(buf, ent->SystemID);
1007: }
1008: xmlBufferWriteChar(buf, ">\n");
1009: break;
1010: default:
1011: fprintf(stderr,
1012: "xmlDumpEntitiesTable: internal: unknown type %d\n",
1013: ent->etype);
1014: }
1015: }
1016:
1017: /**
1.22 daniel 1018: * xmlDumpEntitiesTable:
1.25 daniel 1019: * @buf: An XML buffer.
1.22 daniel 1020: * @table: An entity table
1021: *
1022: * This will dump the content of the entity table as an XML DTD definition
1.13 daniel 1023: */
1.22 daniel 1024: void
1.25 daniel 1025: xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1.67 veillard 1026: xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf);
1.13 daniel 1027: }
Webmaster