Annotation of XML/encoding.c, revision 1.26
1.1 daniel 1: /*
2: * encoding.c : implements the encoding conversion functions needed for XML
3: *
4: * Related specs:
5: * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6: * [ISO-10646] UTF-8 and UTF-16 in Annexes
7: * [ISO-8859-1] ISO Latin-1 characters codes.
8: * [UNICODE] The Unicode Consortium, "The Unicode Standard --
9: * Worldwide Character Encoding -- Version 1.0", Addison-
10: * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
11: * described in Unicode Technical Report #4.
12: * [US-ASCII] Coded Character Set--7-bit American Standard Code for
13: * Information Interchange, ANSI X3.4-1986.
14: *
1.9 daniel 15: * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
1.1 daniel 16: *
17: * See Copyright for the status of this software.
18: *
19: * Daniel.Veillard@w3.org
20: */
21:
1.21 daniel 22: #ifdef WIN32
23: #include "win32config.h"
24: #else
1.14 daniel 25: #include "config.h"
1.17 daniel 26: #endif
27:
28: #include <stdio.h>
29: #include <string.h>
30:
31: #ifdef HAVE_CTYPE_H
1.7 daniel 32: #include <ctype.h>
1.17 daniel 33: #endif
1.20 daniel 34: #ifdef HAVE_STDLIB_H
35: #include <stdlib.h>
36: #endif
1.1 daniel 37: #include "encoding.h"
1.16 daniel 38: #include "xmlmemory.h"
1.3 daniel 39:
1.25 daniel 40: xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
41: xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
42:
1.3 daniel 43: /*
44: * From rfc2044: encoding of the Unicode values on UTF-8:
45: *
46: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
47: * 0000 0000-0000 007F 0xxxxxxx
48: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
49: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
50: *
51: * I hope we won't use values > 0xFFFF anytime soon !
52: */
1.1 daniel 53:
54: /**
1.22 daniel 55: * xmlCheckUTF8: Check utf-8 string for legality.
56: * @utf: Pointer to putative utf-8 encoded string.
57: *
58: * Checks @utf for being valid utf-8. @utf is assumed to be
59: * null-terminated. This function is not super-strict, as it will
60: * allow longer utf-8 sequences than necessary. Note that Java is
61: * capable of producing these sequences if provoked. Also note, this
62: * routine checks for the 4-byte maxiumum size, but does not check for
63: * 0x10ffff maximum value.
64: *
65: * Return value: true if @utf is valid.
66: **/
67: int
68: xmlCheckUTF8(const unsigned char *utf)
69: {
70: int ix;
71: unsigned char c;
72:
73: for (ix = 0; (c = utf[ix]);) {
74: if (c & 0x80) {
75: if ((utf[ix + 1] & 0xc0) != 0x80)
76: return(0);
77: if ((c & 0xe0) == 0xe0) {
78: if ((utf[ix + 2] & 0xc0) != 0x80)
79: return(0);
80: if ((c & 0xf0) == 0xf0) {
81: if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
82: return(0);
83: ix += 4;
84: /* 4-byte code */
85: } else
86: /* 3-byte code */
87: ix += 3;
88: } else
89: /* 2-byte code */
90: ix += 2;
91: } else
92: /* 1-byte code */
93: ix++;
94: }
95: return(1);
96: }
97:
98: /**
1.1 daniel 99: * isolat1ToUTF8:
1.18 daniel 100: * @out: a pointer to an array of bytes to store the result
101: * @outlen: the length of @out
102: * @in: a pointer to an array of ISO Latin 1 chars
103: * @inlen: the length of @in
1.1 daniel 104: *
105: * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
106: * block of chars out.
1.6 daniel 107: * Returns the number of byte written, or -1 by lack of space.
1.1 daniel 108: */
109: int
1.25 daniel 110: isolat1ToUTF8(unsigned char* out, int outlen,
111: const unsigned char* in, int *inlen) {
1.1 daniel 112: unsigned char* outstart= out;
113: unsigned char* outend= out+outlen;
1.25 daniel 114: const unsigned char* inend= in+*inlen;
1.1 daniel 115: unsigned char c;
116:
117: while (in < inend) {
118: c= *in++;
119: if (c < 0x80) {
120: if (out >= outend) return -1;
121: *out++ = c;
122: }
123: else {
124: if (out >= outend) return -1;
125: *out++ = 0xC0 | (c >> 6);
126: if (out >= outend) return -1;
127: *out++ = 0x80 | (0x3F & c);
128: }
129: }
130: return out-outstart;
131: }
132:
133: /**
134: * UTF8Toisolat1:
1.18 daniel 135: * @out: a pointer to an array of bytes to store the result
136: * @outlen: the length of @out
137: * @in: a pointer to an array of UTF-8 chars
138: * @inlen: the length of @in
1.1 daniel 139: *
140: * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
141: * block of chars out.
1.15 daniel 142: * TODO: UTF8Toisolat1 need a fallback mechanism ...
143: *
1.6 daniel 144: * Returns the number of byte written, or -1 by lack of space, or -2
1.23 daniel 145: * if the transcoding faile (for *in is not valid utf8 string or
146: * the result of transformation can't fit into the encoding we want)
1.1 daniel 147: */
148: int
1.25 daniel 149: UTF8Toisolat1(unsigned char* out, int outlen,
150: const unsigned char* in, int *inlen) {
1.1 daniel 151: unsigned char* outstart= out;
152: unsigned char* outend= out+outlen;
1.25 daniel 153: const unsigned char* inend= in+*inlen;
1.1 daniel 154: unsigned char c;
155:
156: while (in < inend) {
157: c= *in++;
158: if (c < 0x80) {
159: if (out >= outend) return -1;
160: *out++= c;
161: }
1.23 daniel 162: else if (in == inend) {
163: *inlen -= 1;
164: break;
165: }
166: else if (((c & 0xFC) == 0xC0) && ((*in & 0xC0) == 0x80)) {
167: /* a two byte utf-8 and can be encoding as isolate1 */
1.1 daniel 168: *out++= ((c & 0x03) << 6) | (*in++ & 0x3F);
1.23 daniel 169: }
170: else return -2;
171: /* TODO : some should be represent as "&#x____;" */
1.1 daniel 172: }
173: return out-outstart;
174: }
175:
176: /**
177: * UTF16ToUTF8:
1.18 daniel 178: * @out: a pointer to an array of bytes to store the result
179: * @outlen: the length of @out
1.25 daniel 180: * @inb: a pointer to an array of UTF-16 passwd as a byte array
181: * @inlenb: the length of @in in UTF-16 chars
1.1 daniel 182: *
183: * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
184: * block of chars out.
1.25 daniel 185: *
1.6 daniel 186: * Returns the number of byte written, or -1 by lack of space.
1.1 daniel 187: */
188: int
1.25 daniel 189: UTF16ToUTF8(unsigned char* out, int outlen,
190: const unsigned char* inb, int *inlenb)
1.1 daniel 191: {
192: unsigned char* outstart= out;
193: unsigned char* outend= out+outlen;
1.25 daniel 194: unsigned short* in = (unsigned short*) inb;
195: unsigned short* inend;
196: unsigned int c, d, inlen;
1.1 daniel 197: int bits;
198:
1.25 daniel 199: inlen = *inlenb / 2;
200: inend= in + inlen;
1.1 daniel 201: while (in < inend) {
202: c= *in++;
203: if ((c & 0xFC00) == 0xD800) { /* surrogates */
204: if ((in<inend) && (((d=*in++) & 0xFC00) == 0xDC00)) {
205: c &= 0x03FF;
206: c <<= 10;
207: c |= d & 0x03FF;
208: c += 0x10000;
209: }
210: else return -1;
211: }
212:
1.25 daniel 213: /* assertion: c is a single UTF-4 value */
1.1 daniel 214: if (out >= outend) return -1;
215: if (c < 0x80) { *out++= c; bits= -6; }
1.26 ! daniel 216: else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
! 217: else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
! 218: else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
1.1 daniel 219:
1.26 ! daniel 220: for ( ; bits >= 0; bits-= 6) {
1.1 daniel 221: if (out >= outend) return -1;
1.26 ! daniel 222: *out++= ((c >> bits) & 0x3F) | 0x80;
1.1 daniel 223: }
224: }
225: return out-outstart;
226: }
227:
228: /**
229: * UTF8ToUTF16:
1.25 daniel 230: * @outb: a pointer to an array of bytes to store the result
231: * @outlen: the length of @outb
1.18 daniel 232: * @in: a pointer to an array of UTF-8 chars
233: * @inlen: the length of @in
1.1 daniel 234: *
235: * Take a block of UTF-8 chars in and try to convert it to an UTF-16
236: * block of chars out.
1.15 daniel 237: * TODO: UTF8ToUTF16 need a fallback mechanism ...
238: *
1.6 daniel 239: * Returns the number of byte written, or -1 by lack of space, or -2
1.25 daniel 240: * if the transcoding failed.
1.1 daniel 241: */
242: int
1.25 daniel 243: UTF8ToUTF16(unsigned char* outb, int outlen,
244: const unsigned char* in, int *inlen)
1.1 daniel 245: {
1.25 daniel 246: unsigned short* out = (unsigned short*) outb;
1.1 daniel 247: unsigned short* outstart= out;
248: unsigned short* outend= out+outlen;
1.25 daniel 249: const unsigned char* inend= in+*inlen;
1.1 daniel 250: unsigned int c, d, trailing;
251:
1.25 daniel 252: outlen /= 2; /* convert in short length */
1.1 daniel 253: while (in < inend) {
254: d= *in++;
255: if (d < 0x80) { c= d; trailing= 0; }
256: else if (d < 0xC0) return -2; /* trailing byte in leading position */
257: else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
258: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
259: else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
260: else return -2; /* no chance for this in UTF-16 */
261:
262: for ( ; trailing; trailing--) {
263: if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) return -1;
264: c <<= 6;
265: c |= d & 0x3F;
266: }
267:
268: /* assertion: c is a single UTF-4 value */
269: if (c < 0x10000) {
270: if (out >= outend) return -1;
271: *out++ = c;
272: }
273: else if (c < 0x110000) {
274: if (out+1 >= outend) return -1;
275: c -= 0x10000;
276: *out++ = 0xD800 | (c >> 10);
277: *out++ = 0xDC00 | (c & 0x03FF);
278: }
279: else return -1;
280: }
281: return out-outstart;
282: }
283:
1.7 daniel 284: /**
285: * xmlDetectCharEncoding:
286: * @in: a pointer to the first bytes of the XML entity, must be at least
287: * 4 bytes long.
1.25 daniel 288: * @len: pointer to the length of the buffer
1.7 daniel 289: *
290: * Guess the encoding of the entity using the first bytes of the entity content
291: * accordingly of the non-normative appendix F of the XML-1.0 recommendation.
292: *
293: * Returns one of the XML_CHAR_ENCODING_... values.
294: */
295: xmlCharEncoding
1.25 daniel 296: xmlDetectCharEncoding(const unsigned char* in, int len)
1.7 daniel 297: {
1.25 daniel 298: if (len >= 4) {
299: if ((in[0] == 0x00) && (in[1] == 0x00) &&
300: (in[2] == 0x00) && (in[3] == 0x3C))
301: return(XML_CHAR_ENCODING_UCS4BE);
302: if ((in[0] == 0x3C) && (in[1] == 0x00) &&
303: (in[2] == 0x00) && (in[3] == 0x00))
304: return(XML_CHAR_ENCODING_UCS4LE);
305: if ((in[0] == 0x00) && (in[1] == 0x00) &&
306: (in[2] == 0x3C) && (in[3] == 0x00))
307: return(XML_CHAR_ENCODING_UCS4_2143);
308: if ((in[0] == 0x00) && (in[1] == 0x3C) &&
309: (in[2] == 0x00) && (in[3] == 0x00))
310: return(XML_CHAR_ENCODING_UCS4_3412);
311: if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
312: (in[2] == 0xA7) && (in[3] == 0x94))
313: return(XML_CHAR_ENCODING_EBCDIC);
314: if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
315: (in[2] == 0x78) && (in[3] == 0x6D))
316: return(XML_CHAR_ENCODING_UTF8);
317: }
318: if (len >= 2) {
319: if ((in[0] == 0xFE) && (in[1] == 0xFF))
320: return(XML_CHAR_ENCODING_UTF16BE);
321: if ((in[0] == 0xFF) && (in[1] == 0xFE))
322: return(XML_CHAR_ENCODING_UTF16LE);
323: }
1.7 daniel 324: return(XML_CHAR_ENCODING_NONE);
325: }
326:
327: /**
328: * xmlParseCharEncoding:
1.18 daniel 329: * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1.7 daniel 330: *
331: * Conpare the string to the known encoding schemes already known. Note
332: * that the comparison is case insensitive accordingly to the section
333: * [XML] 4.3.3 Character Encoding in Entities.
334: *
335: * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
336: * if not recognized.
337: */
338: xmlCharEncoding
1.8 daniel 339: xmlParseCharEncoding(const char* name)
1.7 daniel 340: {
341: char upper[500];
342: int i;
343:
344: for (i = 0;i < 499;i++) {
345: upper[i] = toupper(name[i]);
346: if (upper[i] == 0) break;
347: }
348: upper[i] = 0;
349:
350: if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
351: if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
352: if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
353:
354: /*
355: * NOTE: if we were able to parse this, the endianness of UTF16 is
356: * already found and in use
357: */
358: if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
359: if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
360:
361: if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
362: if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
363: if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
364:
365: /*
366: * NOTE: if we were able to parse this, the endianness of UCS4 is
367: * already found and in use
368: */
369: if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
370: if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
371: if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
372:
373:
374: if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
375: if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
376: if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
377:
378: if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
379: if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
380: if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
381:
382: if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
383: if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
384: if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
385: if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
386: if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
387: if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
388: if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
389:
390: if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
391: if (!strcmp(upper, "Shift_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
392: if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
393: return(XML_CHAR_ENCODING_ERROR);
394: }
1.9 daniel 395:
396: /****************************************************************
397: * *
398: * Char encoding handlers *
399: * *
400: ****************************************************************/
401:
402: /* the size should be growable, but it's not a big deal ... */
403: #define MAX_ENCODING_HANDLERS 50
404: static xmlCharEncodingHandlerPtr *handlers = NULL;
405: static int nbCharEncodingHandler = 0;
406:
407: /*
408: * The default is UTF-8 for XML, that's also the default used for the
409: * parser internals, so the default encoding handler is NULL
410: */
411:
412: static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
413:
414: /**
415: * xmlNewCharEncodingHandler:
1.18 daniel 416: * @name: the encoding name, in UTF-8 format (ASCII actually)
1.9 daniel 417: * @input: the xmlCharEncodingInputFunc to read that encoding
418: * @output: the xmlCharEncodingOutputFunc to write that encoding
419: *
420: * Create and registers an xmlCharEncodingHandler.
421: * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
422: */
423: xmlCharEncodingHandlerPtr
1.25 daniel 424: xmlNewCharEncodingHandler(const char *name,
425: xmlCharEncodingInputFunc input,
1.9 daniel 426: xmlCharEncodingOutputFunc output) {
427: xmlCharEncodingHandlerPtr handler;
428: char upper[500];
429: int i;
430: char *up = 0;
431:
432: /*
433: * Keep only the uppercase version of the encoding.
434: */
435: if (name == NULL) {
436: fprintf(stderr, "xmlNewCharEncodingHandler : no name !\n");
437: return(NULL);
438: }
439: for (i = 0;i < 499;i++) {
440: upper[i] = toupper(name[i]);
441: if (upper[i] == 0) break;
442: }
443: upper[i] = 0;
1.16 daniel 444: up = xmlMemStrdup(upper);
1.9 daniel 445: if (up == NULL) {
446: fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");
447: return(NULL);
448: }
449:
450: /*
451: * allocate and fill-up an handler block.
452: */
453: handler = (xmlCharEncodingHandlerPtr)
1.16 daniel 454: xmlMalloc(sizeof(xmlCharEncodingHandler));
1.9 daniel 455: if (handler == NULL) {
456: fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");
457: return(NULL);
458: }
459: handler->input = input;
460: handler->output = output;
461: handler->name = up;
462:
463: /*
464: * registers and returns the handler.
465: */
466: xmlRegisterCharEncodingHandler(handler);
467: return(handler);
468: }
469:
470: /**
471: * xmlInitCharEncodingHandlers:
472: *
473: * Initialize the char encoding support, it registers the default
474: * encoding supported.
1.18 daniel 475: * NOTE: while public, this function usually doesn't need to be called
1.9 daniel 476: * in normal processing.
477: */
478: void
479: xmlInitCharEncodingHandlers(void) {
480: if (handlers != NULL) return;
481:
482: handlers = (xmlCharEncodingHandlerPtr *)
1.16 daniel 483: xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1.9 daniel 484:
485: if (handlers == NULL) {
486: fprintf(stderr, "xmlInitCharEncodingHandlers : out of memory !\n");
487: return;
488: }
1.10 daniel 489: xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
1.25 daniel 490: xmlUTF16LEHandler =
491: xmlNewCharEncodingHandler("UTF-16LE", UTF16ToUTF8, UTF8ToUTF16);
1.10 daniel 492: xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1.9 daniel 493: }
494:
495: /**
1.19 daniel 496: * xmlCleanupCharEncodingHandlers:
497: *
498: * Cleanup the memory allocated for the char encoding support, it
499: * unregisters all the encoding handlers.
500: */
501: void
502: xmlCleanupCharEncodingHandlers(void) {
503: if (handlers == NULL) return;
504:
505: for (;nbCharEncodingHandler > 0;) {
506: nbCharEncodingHandler--;
507: if (handlers[nbCharEncodingHandler] != NULL) {
508: xmlFree(handlers[nbCharEncodingHandler]->name);
509: xmlFree(handlers[nbCharEncodingHandler]);
510: }
511: }
512: xmlFree(handlers);
513: handlers = NULL;
514: nbCharEncodingHandler = 0;
515: xmlDefaultCharEncodingHandler = NULL;
516: }
517:
518: /**
1.9 daniel 519: * xmlRegisterCharEncodingHandler:
520: * @handler: the xmlCharEncodingHandlerPtr handler block
521: *
522: * Register the char encoding handler, surprizing, isn't it ?
523: */
524: void
525: xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
526: if (handlers == NULL) xmlInitCharEncodingHandlers();
527: if (handler == NULL) {
528: fprintf(stderr, "xmlRegisterCharEncodingHandler: NULL handler !\n");
529: return;
530: }
531:
532: if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
533: fprintf(stderr,
534: "xmlRegisterCharEncodingHandler: Too many handler registered\n");
535: fprintf(stderr, "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
536: return;
537: }
538: handlers[nbCharEncodingHandler++] = handler;
539: }
540:
541: /**
542: * xmlGetCharEncodingHandler:
543: * @enc: an xmlCharEncoding value.
544: *
545: * Search in the registrered set the handler able to read/write that encoding.
546: *
547: * Returns the handler or NULL if not found
548: */
549: xmlCharEncodingHandlerPtr
550: xmlGetCharEncodingHandler(xmlCharEncoding enc) {
551: if (handlers == NULL) xmlInitCharEncodingHandlers();
1.25 daniel 552: switch (enc) {
553: case XML_CHAR_ENCODING_ERROR:
554: return(NULL);
555: case XML_CHAR_ENCODING_NONE:
556: return(NULL);
557: case XML_CHAR_ENCODING_UTF8:
558: return(NULL);
559: case XML_CHAR_ENCODING_UTF16LE:
560: return(xmlUTF16LEHandler);
561: case XML_CHAR_ENCODING_UTF16BE:
562: return(xmlUTF16BEHandler);
563: case XML_CHAR_ENCODING_EBCDIC:
564: return(NULL);
565: case XML_CHAR_ENCODING_UCS4LE:
566: return(NULL);
567: case XML_CHAR_ENCODING_UCS4BE:
568: return(NULL);
569: case XML_CHAR_ENCODING_UCS4_2143:
570: return(NULL);
571: case XML_CHAR_ENCODING_UCS4_3412:
572: return(NULL);
573: case XML_CHAR_ENCODING_UCS2:
574: return(NULL);
575: case XML_CHAR_ENCODING_8859_1:
576: return(NULL);
577: case XML_CHAR_ENCODING_8859_2:
578: return(NULL);
579: case XML_CHAR_ENCODING_8859_3:
580: return(NULL);
581: case XML_CHAR_ENCODING_8859_4:
582: return(NULL);
583: case XML_CHAR_ENCODING_8859_5:
584: return(NULL);
585: case XML_CHAR_ENCODING_8859_6:
586: return(NULL);
587: case XML_CHAR_ENCODING_8859_7:
588: return(NULL);
589: case XML_CHAR_ENCODING_8859_8:
590: return(NULL);
591: case XML_CHAR_ENCODING_8859_9:
592: return(NULL);
593: case XML_CHAR_ENCODING_2022_JP:
594: case XML_CHAR_ENCODING_SHIFT_JIS:
595: case XML_CHAR_ENCODING_EUC_JP:
596: return(NULL);
597: }
1.9 daniel 598: return(NULL);
599: }
600:
601: /**
602: * xmlGetCharEncodingHandler:
603: * @enc: a string describing the char encoding.
604: *
605: * Search in the registrered set the handler able to read/write that encoding.
606: *
607: * Returns the handler or NULL if not found
608: */
609: xmlCharEncodingHandlerPtr
610: xmlFindCharEncodingHandler(const char *name) {
611: char upper[500];
612: int i;
613:
614: if (handlers == NULL) xmlInitCharEncodingHandlers();
615: if (name == NULL) return(xmlDefaultCharEncodingHandler);
616: if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
617:
618: for (i = 0;i < 499;i++) {
619: upper[i] = toupper(name[i]);
620: if (upper[i] == 0) break;
621: }
622: upper[i] = 0;
623:
624: for (i = 0;i < nbCharEncodingHandler; i++)
625: if (!strcmp(name, handlers[i]->name))
626: return(handlers[i]);
627:
628: return(NULL);
629: }
630:
Webmaster