Annotation of XML/encoding.c, revision 1.23

1.1       daniel      1: /*
                      2:  * encoding.c : implements the encoding conversion functions needed for XML
                      3:  *
                      4:  * Related specs: 
                      5:  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
                      6:  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
                      7:  * [ISO-8859-1]   ISO Latin-1 characters codes.
                      8:  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
                      9:  *                Worldwide Character Encoding -- Version 1.0", Addison-
                     10:  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
                     11:  *                described in Unicode Technical Report #4.
                     12:  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
                     13:  *                Information Interchange, ANSI X3.4-1986.
                     14:  *
1.9       daniel     15:  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
1.1       daniel     16:  *
                     17:  * See Copyright for the status of this software.
                     18:  *
                     19:  * Daniel.Veillard@w3.org
                     20:  */
                     21: 
1.21      daniel     22: #ifdef WIN32
                     23: #include "win32config.h"
                     24: #else
1.14      daniel     25: #include "config.h"
1.17      daniel     26: #endif
                     27: 
                     28: #include <stdio.h>
                     29: #include <string.h>
                     30: 
                     31: #ifdef HAVE_CTYPE_H
1.7       daniel     32: #include <ctype.h>
1.17      daniel     33: #endif
1.20      daniel     34: #ifdef HAVE_STDLIB_H
                     35: #include <stdlib.h>
                     36: #endif
1.1       daniel     37: #include "encoding.h"
1.12      daniel     38: #ifdef HAVE_UNICODE_H
                     39: #include <unicode.h>
                     40: #endif
1.16      daniel     41: #include "xmlmemory.h"
1.3       daniel     42: 
                     43: /*
                     44:  * From rfc2044: encoding of the Unicode values on UTF-8:
                     45:  *
                     46:  * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                     47:  * 0000 0000-0000 007F   0xxxxxxx
                     48:  * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
                     49:  * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
                     50:  *
                     51:  * I hope we won't use values > 0xFFFF anytime soon !
                     52:  */
1.1       daniel     53: 
                     54: /**
1.22      daniel     55:  * xmlCheckUTF8: Check utf-8 string for legality.
                     56:  * @utf: Pointer to putative utf-8 encoded string.
                     57:  *
                     58:  * Checks @utf for being valid utf-8. @utf is assumed to be
                     59:  * null-terminated. This function is not super-strict, as it will
                     60:  * allow longer utf-8 sequences than necessary. Note that Java is
                     61:  * capable of producing these sequences if provoked. Also note, this
                     62:  * routine checks for the 4-byte maxiumum size, but does not check for
                     63:  * 0x10ffff maximum value.
                     64:  *
                     65:  * Return value: true if @utf is valid.
                     66:  **/
                     67: int
                     68: xmlCheckUTF8(const unsigned char *utf)
                     69: {
                     70:     int ix;
                     71:     unsigned char c;
                     72: 
                     73:     for (ix = 0; (c = utf[ix]);) {
                     74:         if (c & 0x80) {
                     75:            if ((utf[ix + 1] & 0xc0) != 0x80)
                     76:                return(0);
                     77:            if ((c & 0xe0) == 0xe0) {
                     78:                if ((utf[ix + 2] & 0xc0) != 0x80)
                     79:                    return(0);
                     80:                if ((c & 0xf0) == 0xf0) {
                     81:                    if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
                     82:                        return(0);
                     83:                    ix += 4;
                     84:                    /* 4-byte code */
                     85:                } else
                     86:                  /* 3-byte code */
                     87:                    ix += 3;
                     88:            } else
                     89:              /* 2-byte code */
                     90:                ix += 2;
                     91:        } else
                     92:            /* 1-byte code */
                     93:            ix++;
                     94:       }
                     95:       return(1);
                     96: }
                     97: 
                     98: /**
1.1       daniel     99:  * isolat1ToUTF8:
1.18      daniel    100:  * @out:  a pointer to an array of bytes to store the result
                    101:  * @outlen:  the length of @out
                    102:  * @in:  a pointer to an array of ISO Latin 1 chars
                    103:  * @inlen:  the length of @in
1.1       daniel    104:  *
                    105:  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
                    106:  * block of chars out.
1.6       daniel    107:  * Returns the number of byte written, or -1 by lack of space.
1.1       daniel    108:  */
                    109: int
1.23    ! daniel    110: isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int *inlen)
1.1       daniel    111: {
                    112:     unsigned char* outstart= out;
                    113:     unsigned char* outend= out+outlen;
1.23    ! daniel    114:     unsigned char* inend= in+*inlen;
1.1       daniel    115:     unsigned char c;
                    116: 
                    117:     while (in < inend) {
                    118:         c= *in++;
                    119:         if (c < 0x80) {
                    120:             if (out >= outend)  return -1;
                    121:             *out++ = c;
                    122:         }
                    123:         else {
                    124:             if (out >= outend)  return -1;
                    125:             *out++ = 0xC0 | (c >> 6);
                    126:             if (out >= outend)  return -1;
                    127:             *out++ = 0x80 | (0x3F & c);
                    128:         }
                    129:     }
                    130:     return out-outstart;
                    131: }
                    132: 
                    133: /**
                    134:  * UTF8Toisolat1:
1.18      daniel    135:  * @out:  a pointer to an array of bytes to store the result
                    136:  * @outlen:  the length of @out
                    137:  * @in:  a pointer to an array of UTF-8 chars
                    138:  * @inlen:  the length of @in
1.1       daniel    139:  *
                    140:  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
                    141:  * block of chars out.
1.15      daniel    142:  * TODO: UTF8Toisolat1 need a fallback mechanism ...
                    143:  *
1.6       daniel    144:  * Returns the number of byte written, or -1 by lack of space, or -2
1.23    ! daniel    145:  *     if the transcoding faile (for *in is not valid utf8 string or
        !           146:  *     the result of transformation can't fit into the encoding we want)
1.1       daniel    147:  */
                    148: int
1.23    ! daniel    149: UTF8Toisolat1(unsigned char* out, int outlen, unsigned char* in, int *inlen)
1.1       daniel    150: {
                    151:     unsigned char* outstart= out;
                    152:     unsigned char* outend= out+outlen;
1.23    ! daniel    153:     unsigned char* inend= in+*inlen;
1.1       daniel    154:     unsigned char c;
                    155: 
                    156:     while (in < inend) {
                    157:         c= *in++;
                    158:         if (c < 0x80) {
                    159:             if (out >= outend)  return -1;
                    160:             *out++= c;
                    161:         }
1.23    ! daniel    162:        else if (in == inend) {
        !           163:             *inlen -= 1;
        !           164:             break;
        !           165:        }
        !           166:        else if (((c & 0xFC) == 0xC0) && ((*in & 0xC0) == 0x80)) {
        !           167:            /* a two byte utf-8 and can be encoding as isolate1 */
1.1       daniel    168:             *out++= ((c & 0x03) << 6) | (*in++ & 0x3F);
1.23    ! daniel    169:        }
        !           170:        else return -2;
        !           171:        /* TODO : some should be represent as "&#x____;" */
1.1       daniel    172:     }
                    173:     return out-outstart;
                    174: }
                    175: 
                    176: /**
                    177:  * UTF16ToUTF8:
1.18      daniel    178:  * @out:  a pointer to an array of bytes to store the result
                    179:  * @outlen:  the length of @out
                    180:  * @in:  a pointer to an array of UTF-16 chars (array of unsigned shorts)
                    181:  * @inlen:  the length of @in
1.1       daniel    182:  *
                    183:  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
                    184:  * block of chars out.
1.6       daniel    185:  * Returns the number of byte written, or -1 by lack of space.
1.1       daniel    186:  */
                    187: int
1.23    ! daniel    188: UTF16ToUTF8(unsigned char* out, int outlen, unsigned short* in, int *inlen)
1.1       daniel    189: {
                    190:     unsigned char* outstart= out;
                    191:     unsigned char* outend= out+outlen;
1.23    ! daniel    192:     unsigned short* inend= in+*inlen;
1.1       daniel    193:     unsigned int c, d;
                    194:     int bits;
                    195: 
                    196:     while (in < inend) {
                    197:         c= *in++;
                    198:         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
                    199:             if ((in<inend) && (((d=*in++) & 0xFC00) == 0xDC00)) {
                    200:                 c &= 0x03FF;
                    201:                 c <<= 10;
                    202:                 c |= d & 0x03FF;
                    203:                 c += 0x10000;
                    204:             }
                    205:             else  return -1;
                    206:         }
                    207: 
                    208:       /* assertion: c is a single UTF-4 value */
                    209: 
                    210:         if (out >= outend)  return -1;
                    211:         if      (c <    0x80) {  *out++=  c;                bits= -6; }
                    212:         else if (c <   0x800) {  *out++= (c >>  6) | 0xC0;  bits=  0; }
                    213:         else if (c < 0x10000) {  *out++= (c >> 12) | 0xE0;  bits=  6; }
                    214:         else                  {  *out++= (c >> 18) | 0xF0;  bits= 12; }
                    215:  
1.18      daniel    216:         for ( ; bits > 0; bits-= 6) {
1.1       daniel    217:             if (out >= outend)  return -1;
                    218:             *out++= (c >> bits) & 0x3F;
                    219:         }
                    220:     }
                    221:     return out-outstart;
                    222: }
                    223: 
                    224: /**
                    225:  * UTF8ToUTF16:
1.18      daniel    226:  * @out:  a pointer to an array of shorts to store the result
                    227:  * @outlen:  the length of @out (number of shorts)
                    228:  * @in:  a pointer to an array of UTF-8 chars
                    229:  * @inlen:  the length of @in
1.1       daniel    230:  *
                    231:  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
                    232:  * block of chars out.
1.15      daniel    233:  * TODO: UTF8ToUTF16 need a fallback mechanism ...
                    234:  *
1.6       daniel    235:  * Returns the number of byte written, or -1 by lack of space, or -2
1.1       daniel    236:  *     if the transcoding failed.
                    237:  */
                    238: int
1.23    ! daniel    239: UTF8ToUTF16(unsigned short* out, int outlen, unsigned char* in, int *inlen)
1.1       daniel    240: {
                    241:     unsigned short* outstart= out;
                    242:     unsigned short* outend= out+outlen;
1.23    ! daniel    243:     unsigned char* inend= in+*inlen;
1.1       daniel    244:     unsigned int c, d, trailing;
                    245: 
                    246:     while (in < inend) {
                    247:       d= *in++;
                    248:       if      (d < 0x80)  { c= d; trailing= 0; }
                    249:       else if (d < 0xC0)  return -2;    /* trailing byte in leading position */
                    250:       else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
                    251:       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
                    252:       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
                    253:       else return -2;    /* no chance for this in UTF-16 */
                    254: 
                    255:       for ( ; trailing; trailing--) {
                    256:           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  return -1;
                    257:           c <<= 6;
                    258:           c |= d & 0x3F;
                    259:       }
                    260: 
                    261:       /* assertion: c is a single UTF-4 value */
                    262:         if (c < 0x10000) {
                    263:             if (out >= outend)  return -1;
                    264:             *out++ = c;
                    265:         }
                    266:         else if (c < 0x110000) {
                    267:             if (out+1 >= outend)  return -1;
                    268:             c -= 0x10000;
                    269:             *out++ = 0xD800 | (c >> 10);
                    270:             *out++ = 0xDC00 | (c & 0x03FF);
                    271:         }
                    272:         else  return -1;
                    273:     }
                    274:     return out-outstart;
                    275: }
                    276: 
1.7       daniel    277: /**
                    278:  * xmlDetectCharEncoding:
                    279:  * @in:  a pointer to the first bytes of the XML entity, must be at least
                    280:  *       4 bytes long.
                    281:  *
                    282:  * Guess the encoding of the entity using the first bytes of the entity content
                    283:  * accordingly of the non-normative appendix F of the XML-1.0 recommendation.
                    284:  * 
                    285:  * Returns one of the XML_CHAR_ENCODING_... values.
                    286:  */
                    287: xmlCharEncoding
1.8       daniel    288: xmlDetectCharEncoding(const unsigned char* in)
1.7       daniel    289: {
                    290:     if ((in[0] == 0x00) && (in[1] == 0x00) &&
                    291:         (in[2] == 0x00) && (in[3] == 0x3C))
                    292:        return(XML_CHAR_ENCODING_UCS4BE);
                    293:     if ((in[0] == 0x3C) && (in[1] == 0x00) &&
                    294:         (in[2] == 0x00) && (in[3] == 0x00))
                    295:        return(XML_CHAR_ENCODING_UCS4LE);
                    296:     if ((in[0] == 0x00) && (in[1] == 0x00) &&
                    297:         (in[2] == 0x3C) && (in[3] == 0x00))
                    298:        return(XML_CHAR_ENCODING_UCS4_2143);
                    299:     if ((in[0] == 0x00) && (in[1] == 0x3C) &&
                    300:         (in[2] == 0x00) && (in[3] == 0x00))
                    301:        return(XML_CHAR_ENCODING_UCS4_3412);
                    302:     if ((in[0] == 0xFE) && (in[1] == 0xFF))
                    303:        return(XML_CHAR_ENCODING_UTF16BE);
                    304:     if ((in[0] == 0xFF) && (in[1] == 0xFE))
                    305:        return(XML_CHAR_ENCODING_UTF16LE);
                    306:     if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
                    307:         (in[2] == 0xA7) && (in[3] == 0x94))
                    308:        return(XML_CHAR_ENCODING_EBCDIC);
                    309:     if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
                    310:         (in[2] == 0x78) && (in[3] == 0x6D))
                    311:        return(XML_CHAR_ENCODING_UTF8);
                    312:     return(XML_CHAR_ENCODING_NONE);
                    313: }
                    314: 
                    315: /**
                    316:  * xmlParseCharEncoding:
1.18      daniel    317:  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1.7       daniel    318:  *
                    319:  * Conpare the string to the known encoding schemes already known. Note
                    320:  * that the comparison is case insensitive accordingly to the section
                    321:  * [XML] 4.3.3 Character Encoding in Entities.
                    322:  * 
                    323:  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
                    324:  * if not recognized.
                    325:  */
                    326: xmlCharEncoding
1.8       daniel    327: xmlParseCharEncoding(const char* name)
1.7       daniel    328: {
                    329:     char upper[500];
                    330:     int i;
                    331: 
                    332:     for (i = 0;i < 499;i++) {
                    333:         upper[i] = toupper(name[i]);
                    334:        if (upper[i] == 0) break;
                    335:     }
                    336:     upper[i] = 0;
                    337: 
                    338:     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
                    339:     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
                    340:     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
                    341: 
                    342:     /*
                    343:      * NOTE: if we were able to parse this, the endianness of UTF16 is
                    344:      *       already found and in use
                    345:      */
                    346:     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
                    347:     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
                    348:     
                    349:     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
                    350:     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
                    351:     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
                    352: 
                    353:     /*
                    354:      * NOTE: if we were able to parse this, the endianness of UCS4 is
                    355:      *       already found and in use
                    356:      */
                    357:     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
                    358:     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
                    359:     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
                    360: 
                    361:     
                    362:     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
                    363:     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
                    364:     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
                    365: 
                    366:     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
                    367:     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
                    368:     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
                    369: 
                    370:     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
                    371:     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
                    372:     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
                    373:     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
                    374:     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
                    375:     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
                    376:     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
                    377: 
                    378:     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
                    379:     if (!strcmp(upper, "Shift_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
                    380:     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
                    381:     return(XML_CHAR_ENCODING_ERROR);
                    382: }
1.9       daniel    383: 
                    384: /****************************************************************
                    385:  *                                                             *
                    386:  *             Char encoding handlers                          *
                    387:  *                                                             *
                    388:  ****************************************************************/
                    389: 
                    390: /* the size should be growable, but it's not a big deal ... */
                    391: #define MAX_ENCODING_HANDLERS 50
                    392: static xmlCharEncodingHandlerPtr *handlers = NULL;
                    393: static int nbCharEncodingHandler = 0;
                    394: 
                    395: /*
                    396:  * The default is UTF-8 for XML, that's also the default used for the
                    397:  * parser internals, so the default encoding handler is NULL
                    398:  */
                    399: 
                    400: static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
                    401: 
                    402: /**
                    403:  * xmlNewCharEncodingHandler:
1.18      daniel    404:  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1.9       daniel    405:  * @input:  the xmlCharEncodingInputFunc to read that encoding
                    406:  * @output:  the xmlCharEncodingOutputFunc to write that encoding
                    407:  *
                    408:  * Create and registers an xmlCharEncodingHandler.
                    409:  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
                    410:  */
                    411: xmlCharEncodingHandlerPtr
                    412: xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input,
                    413:                           xmlCharEncodingOutputFunc output) {
                    414:     xmlCharEncodingHandlerPtr handler;
                    415:     char upper[500];
                    416:     int i;
                    417:     char *up = 0;
                    418: 
                    419:     /*
                    420:      * Keep only the uppercase version of the encoding.
                    421:      */
                    422:     if (name == NULL) {
                    423:         fprintf(stderr, "xmlNewCharEncodingHandler : no name !\n");
                    424:        return(NULL);
                    425:     }
                    426:     for (i = 0;i < 499;i++) {
                    427:         upper[i] = toupper(name[i]);
                    428:        if (upper[i] == 0) break;
                    429:     }
                    430:     upper[i] = 0;
1.16      daniel    431:     up = xmlMemStrdup(upper);
1.9       daniel    432:     if (up == NULL) {
                    433:         fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");
                    434:        return(NULL);
                    435:     }
                    436: 
                    437:     /*
                    438:      * allocate and fill-up an handler block.
                    439:      */
                    440:     handler = (xmlCharEncodingHandlerPtr)
1.16      daniel    441:               xmlMalloc(sizeof(xmlCharEncodingHandler));
1.9       daniel    442:     if (handler == NULL) {
                    443:         fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");
                    444:        return(NULL);
                    445:     }
                    446:     handler->input = input;
                    447:     handler->output = output;
                    448:     handler->name = up;
                    449: 
                    450:     /*
                    451:      * registers and returns the handler.
                    452:      */
                    453:     xmlRegisterCharEncodingHandler(handler);
                    454:     return(handler);
                    455: }
                    456: 
                    457: /**
                    458:  * xmlInitCharEncodingHandlers:
                    459:  *
                    460:  * Initialize the char encoding support, it registers the default
                    461:  * encoding supported.
1.18      daniel    462:  * NOTE: while public, this function usually doesn't need to be called
1.9       daniel    463:  *       in normal processing.
                    464:  */
                    465: void
                    466: xmlInitCharEncodingHandlers(void) {
                    467:     if (handlers != NULL) return;
                    468: 
                    469:     handlers = (xmlCharEncodingHandlerPtr *)
1.16      daniel    470:         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1.9       daniel    471: 
                    472:     if (handlers == NULL) {
                    473:         fprintf(stderr, "xmlInitCharEncodingHandlers : out of memory !\n");
                    474:        return;
                    475:     }
1.10      daniel    476:     xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
1.12      daniel    477: #ifdef HAVE_UNICODE_H
                    478: #else
1.13      daniel    479:     /* xmlNewCharEncodingHandler("UTF-16", UTF16ToUTF8, UTF8ToUTF16); */
1.10      daniel    480:     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1.12      daniel    481: #endif
1.9       daniel    482: }
                    483: 
                    484: /**
1.19      daniel    485:  * xmlCleanupCharEncodingHandlers:
                    486:  *
                    487:  * Cleanup the memory allocated for the char encoding support, it
                    488:  * unregisters all the encoding handlers.
                    489:  */
                    490: void
                    491: xmlCleanupCharEncodingHandlers(void) {
                    492:     if (handlers == NULL) return;
                    493: 
                    494:     for (;nbCharEncodingHandler > 0;) {
                    495:         nbCharEncodingHandler--;
                    496:        if (handlers[nbCharEncodingHandler] != NULL) {
                    497:            xmlFree(handlers[nbCharEncodingHandler]->name);
                    498:            xmlFree(handlers[nbCharEncodingHandler]);
                    499:        }
                    500:     }
                    501:     xmlFree(handlers);
                    502:     handlers = NULL;
                    503:     nbCharEncodingHandler = 0;
                    504:     xmlDefaultCharEncodingHandler = NULL;
                    505: }
                    506: 
                    507: /**
1.9       daniel    508:  * xmlRegisterCharEncodingHandler:
                    509:  * @handler:  the xmlCharEncodingHandlerPtr handler block
                    510:  *
                    511:  * Register the char encoding handler, surprizing, isn't it ?
                    512:  */
                    513: void
                    514: xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
                    515:     if (handlers == NULL) xmlInitCharEncodingHandlers();
                    516:     if (handler == NULL) {
                    517:         fprintf(stderr, "xmlRegisterCharEncodingHandler: NULL handler !\n");
                    518:        return;
                    519:     }
                    520: 
                    521:     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
                    522:         fprintf(stderr, 
                    523:        "xmlRegisterCharEncodingHandler: Too many handler registered\n");
                    524:         fprintf(stderr, "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
                    525:        return;
                    526:     }
                    527:     handlers[nbCharEncodingHandler++] = handler;
                    528: }
                    529: 
                    530: /**
                    531:  * xmlGetCharEncodingHandler:
                    532:  * @enc:  an xmlCharEncoding value.
                    533:  *
                    534:  * Search in the registrered set the handler able to read/write that encoding.
                    535:  *
                    536:  * Returns the handler or NULL if not found
                    537:  */
                    538: xmlCharEncodingHandlerPtr
                    539: xmlGetCharEncodingHandler(xmlCharEncoding enc) {
                    540:     if (handlers == NULL) xmlInitCharEncodingHandlers();
1.15      daniel    541:     /* TODO xmlGetCharEncodingHandler !!!!!!! */
1.9       daniel    542:     return(NULL);
                    543: }
                    544: 
                    545: /**
                    546:  * xmlGetCharEncodingHandler:
                    547:  * @enc:  a string describing the char encoding.
                    548:  *
                    549:  * Search in the registrered set the handler able to read/write that encoding.
                    550:  *
                    551:  * Returns the handler or NULL if not found
                    552:  */
                    553: xmlCharEncodingHandlerPtr
                    554: xmlFindCharEncodingHandler(const char *name) {
                    555:     char upper[500];
                    556:     int i;
                    557: 
                    558:     if (handlers == NULL) xmlInitCharEncodingHandlers();
                    559:     if (name == NULL) return(xmlDefaultCharEncodingHandler);
                    560:     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
                    561: 
                    562:     for (i = 0;i < 499;i++) {
                    563:         upper[i] = toupper(name[i]);
                    564:        if (upper[i] == 0) break;
                    565:     }
                    566:     upper[i] = 0;
                    567: 
                    568:     for (i = 0;i < nbCharEncodingHandler; i++)
                    569:         if (!strcmp(name, handlers[i]->name))
                    570:            return(handlers[i]);
                    571: 
                    572:     return(NULL);
                    573: }
                    574: 

Webmaster