Annotation of XML/encoding.c, revision 1.54

1.1       daniel      1: /*
                      2:  * encoding.c : implements the encoding conversion functions needed for XML
                      3:  *
                      4:  * Related specs: 
                      5:  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
1.39      daniel      6:  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
1.1       daniel      7:  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
                      8:  * [ISO-8859-1]   ISO Latin-1 characters codes.
                      9:  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
                     10:  *                Worldwide Character Encoding -- Version 1.0", Addison-
                     11:  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
                     12:  *                described in Unicode Technical Report #4.
                     13:  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
                     14:  *                Information Interchange, ANSI X3.4-1986.
                     15:  *
1.9       daniel     16:  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
1.1       daniel     17:  *
                     18:  * See Copyright for the status of this software.
                     19:  *
                     20:  * Daniel.Veillard@w3.org
                     21:  */
                     22: 
1.21      daniel     23: #ifdef WIN32
                     24: #include "win32config.h"
                     25: #else
1.14      daniel     26: #include "config.h"
1.17      daniel     27: #endif
                     28: 
                     29: #include <stdio.h>
                     30: #include <string.h>
                     31: 
                     32: #ifdef HAVE_CTYPE_H
1.7       daniel     33: #include <ctype.h>
1.17      daniel     34: #endif
1.20      daniel     35: #ifdef HAVE_STDLIB_H
                     36: #include <stdlib.h>
                     37: #endif
1.30      daniel     38: #include <libxml/xmlversion.h>
                     39: #ifdef LIBXML_ICONV_ENABLED
                     40: #ifdef HAVE_ERRNO_H
                     41: #include <errno.h>
                     42: #endif
                     43: #endif
1.29      daniel     44: #include <libxml/encoding.h>
                     45: #include <libxml/xmlmemory.h>
1.48      veillard   46: #ifdef LIBXML_HTML_ENABLED
                     47: #include <libxml/HTMLparser.h>
                     48: #endif
1.52      veillard   49: #include <libxml/xmlerror.h>
1.3       daniel     50: 
1.25      daniel     51: xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
                     52: xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
                     53: 
1.51      veillard   54: typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
                     55: typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
                     56: struct _xmlCharEncodingAlias {
                     57:     const char *name;
                     58:     const char *alias;
                     59: };
                     60: 
                     61: static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
                     62: static int xmlCharEncodingAliasesNb = 0;
                     63: static int xmlCharEncodingAliasesMax = 0;
                     64: 
1.30      daniel     65: #ifdef LIBXML_ICONV_ENABLED
1.46      veillard   66: #if 0
1.30      daniel     67: #define DEBUG_ENCODING  /* Define this to get encoding traces */
                     68: #endif
1.33      daniel     69: #endif
1.30      daniel     70: 
1.34      daniel     71: static int xmlLittleEndian = 1;
                     72: 
1.3       daniel     73: /*
                     74:  * From rfc2044: encoding of the Unicode values on UTF-8:
                     75:  *
                     76:  * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
                     77:  * 0000 0000-0000 007F   0xxxxxxx
                     78:  * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
                     79:  * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
                     80:  *
                     81:  * I hope we won't use values > 0xFFFF anytime soon !
                     82:  */
1.1       daniel     83: 
                     84: /**
1.39      daniel     85:  * xmlGetUTF8Char:
                     86:  * @utf:  a sequence of UTF-8 encoded bytes
                     87:  * @len:  a pointer to @bytes len
                     88:  *
                     89:  * Read one UTF8 Char from @utf
                     90:  *
                     91:  * Returns the char value or -1 in case of error and update @len with the
                     92:  *        number of bytes used
                     93:  */
                     94: int
                     95: xmlGetUTF8Char(const unsigned char *utf, int *len) {
                     96:     unsigned int c;
                     97: 
                     98:     if (utf == NULL)
                     99:        goto error;
                    100:     if (len == NULL)
                    101:        goto error;
                    102:     if (*len < 1)
                    103:        goto error;
                    104: 
                    105:     c = utf[0];
                    106:     if (c & 0x80) {
                    107:        if (*len < 2)
                    108:            goto error;
                    109:        if ((utf[1] & 0xc0) != 0x80)
                    110:            goto error;
                    111:        if ((c & 0xe0) == 0xe0) {
                    112:            if (*len < 3)
                    113:                goto error;
                    114:            if ((utf[2] & 0xc0) != 0x80)
                    115:                goto error;
                    116:            if ((c & 0xf0) == 0xf0) {
                    117:                if (*len < 4)
                    118:                    goto error;
                    119:                if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
                    120:                    goto error;
                    121:                *len = 4;
                    122:                /* 4-byte code */
                    123:                c = (utf[0] & 0x7) << 18;
                    124:                c |= (utf[1] & 0x3f) << 12;
                    125:                c |= (utf[2] & 0x3f) << 6;
                    126:                c |= utf[3] & 0x3f;
                    127:            } else {
                    128:              /* 3-byte code */
                    129:                *len = 3;
                    130:                c = (utf[0] & 0xf) << 12;
                    131:                c |= (utf[1] & 0x3f) << 6;
                    132:                c |= utf[2] & 0x3f;
                    133:            }
                    134:        } else {
                    135:          /* 2-byte code */
                    136:            *len = 2;
                    137:            c = (utf[0] & 0x1f) << 6;
                    138:            c |= utf[1] & 0x3f;
                    139:        }
                    140:     } else {
                    141:        /* 1-byte code */
                    142:        *len = 1;
                    143:     }
                    144:     return(c);
                    145: 
                    146: error:
                    147:     *len = 0;
                    148:     return(-1);
                    149: }
                    150: 
                    151: /**
1.22      daniel    152:  * xmlCheckUTF8: Check utf-8 string for legality.
                    153:  * @utf: Pointer to putative utf-8 encoded string.
                    154:  *
                    155:  * Checks @utf for being valid utf-8. @utf is assumed to be
                    156:  * null-terminated. This function is not super-strict, as it will
                    157:  * allow longer utf-8 sequences than necessary. Note that Java is
                    158:  * capable of producing these sequences if provoked. Also note, this
                    159:  * routine checks for the 4-byte maxiumum size, but does not check for
                    160:  * 0x10ffff maximum value.
                    161:  *
                    162:  * Return value: true if @utf is valid.
                    163:  **/
                    164: int
                    165: xmlCheckUTF8(const unsigned char *utf)
                    166: {
                    167:     int ix;
                    168:     unsigned char c;
                    169: 
                    170:     for (ix = 0; (c = utf[ix]);) {
                    171:         if (c & 0x80) {
                    172:            if ((utf[ix + 1] & 0xc0) != 0x80)
                    173:                return(0);
                    174:            if ((c & 0xe0) == 0xe0) {
                    175:                if ((utf[ix + 2] & 0xc0) != 0x80)
                    176:                    return(0);
                    177:                if ((c & 0xf0) == 0xf0) {
                    178:                    if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
                    179:                        return(0);
                    180:                    ix += 4;
                    181:                    /* 4-byte code */
                    182:                } else
                    183:                  /* 3-byte code */
                    184:                    ix += 3;
                    185:            } else
                    186:              /* 2-byte code */
                    187:                ix += 2;
                    188:        } else
                    189:            /* 1-byte code */
                    190:            ix++;
                    191:       }
                    192:       return(1);
                    193: }
                    194: 
                    195: /**
1.47      veillard  196:  * asciiToUTF8:
                    197:  * @out:  a pointer to an array of bytes to store the result
                    198:  * @outlen:  the length of @out
                    199:  * @in:  a pointer to an array of ASCII chars
                    200:  * @inlen:  the length of @in
                    201:  *
                    202:  * Take a block of ASCII chars in and try to convert it to an UTF-8
                    203:  * block of chars out.
                    204:  * Returns 0 if success, or -1 otherwise
                    205:  * The value of @inlen after return is the number of octets consumed
                    206:  *     as the return value is positive, else unpredictiable.
                    207:  * The value of @outlen after return is the number of ocetes consumed.
                    208:  */
                    209: int
                    210: asciiToUTF8(unsigned char* out, int *outlen,
                    211:               const unsigned char* in, int *inlen) {
                    212:     unsigned char* outstart = out;
                    213:     const unsigned char* base = in;
                    214:     const unsigned char* processed = in;
                    215:     unsigned char* outend = out + *outlen;
                    216:     const unsigned char* inend;
                    217:     unsigned int c;
                    218:     int bits;
                    219: 
                    220:     inend = in + (*inlen);
                    221:     while ((in < inend) && (out - outstart + 5 < *outlen)) {
                    222:        c= *in++;
                    223: 
                    224:        /* assertion: c is a single UTF-4 value */
                    225:         if (out >= outend)
                    226:            break;
                    227:         if      (c <    0x80) {  *out++=  c;                bits= -6; }
                    228:         else { 
                    229:            *outlen = out - outstart;
                    230:            *inlen = processed - base;
                    231:            return(-1);
                    232:        }
                    233:  
                    234:         for ( ; bits >= 0; bits-= 6) {
                    235:             if (out >= outend)
                    236:                break;
                    237:             *out++= ((c >> bits) & 0x3F) | 0x80;
                    238:         }
                    239:        processed = (const unsigned char*) in;
                    240:     }
                    241:     *outlen = out - outstart;
                    242:     *inlen = processed - base;
                    243:     return(0);
                    244: }
                    245: 
                    246: /**
                    247:  * UTF8Toascii:
                    248:  * @out:  a pointer to an array of bytes to store the result
                    249:  * @outlen:  the length of @out
                    250:  * @in:  a pointer to an array of UTF-8 chars
                    251:  * @inlen:  the length of @in
                    252:  *
                    253:  * Take a block of UTF-8 chars in and try to convert it to an ASCII
                    254:  * block of chars out.
                    255:  *
                    256:  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
                    257:  * The value of @inlen after return is the number of octets consumed
                    258:  *     as the return value is positive, else unpredictiable.
                    259:  * The value of @outlen after return is the number of ocetes consumed.
                    260:  */
                    261: int
                    262: UTF8Toascii(unsigned char* out, int *outlen,
                    263:               const unsigned char* in, int *inlen) {
                    264:     const unsigned char* processed = in;
                    265:     const unsigned char* outend;
                    266:     const unsigned char* outstart = out;
                    267:     const unsigned char* instart = in;
                    268:     const unsigned char* inend;
                    269:     unsigned int c, d;
                    270:     int trailing;
                    271: 
                    272:     if (in == NULL) {
                    273:         /*
                    274:         * initialization nothing to do
                    275:         */
                    276:        *outlen = 0;
                    277:        *inlen = 0;
                    278:        return(0);
                    279:     }
                    280:     inend = in + (*inlen);
                    281:     outend = out + (*outlen);
                    282:     while (in < inend) {
                    283:        d = *in++;
                    284:        if      (d < 0x80)  { c= d; trailing= 0; }
                    285:        else if (d < 0xC0) {
                    286:            /* trailing byte in leading position */
                    287:            *outlen = out - outstart;
                    288:            *inlen = processed - instart;
                    289:            return(-2);
                    290:         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
                    291:         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
                    292:         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
                    293:        else {
                    294:            /* no chance for this in Ascii */
                    295:            *outlen = out - outstart;
                    296:            *inlen = processed - instart;
                    297:            return(-2);
                    298:        }
                    299: 
                    300:        if (inend - in < trailing) {
                    301:            break;
                    302:        } 
                    303: 
                    304:        for ( ; trailing; trailing--) {
                    305:            if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
                    306:                break;
                    307:            c <<= 6;
                    308:            c |= d & 0x3F;
                    309:        }
                    310: 
                    311:        /* assertion: c is a single UTF-4 value */
                    312:        if (c < 0x80) {
                    313:            if (out >= outend)
                    314:                break;
                    315:            *out++ = c;
                    316:        } else {
                    317:            /* no chance for this in Ascii */
                    318:            *outlen = out - outstart;
                    319:            *inlen = processed - instart;
                    320:            return(-2);
                    321:        }
                    322:        processed = in;
                    323:     }
                    324:     *outlen = out - outstart;
                    325:     *inlen = processed - instart;
                    326:     return(0);
                    327: }
                    328: 
                    329: /**
1.1       daniel    330:  * isolat1ToUTF8:
1.18      daniel    331:  * @out:  a pointer to an array of bytes to store the result
                    332:  * @outlen:  the length of @out
                    333:  * @in:  a pointer to an array of ISO Latin 1 chars
                    334:  * @inlen:  the length of @in
1.1       daniel    335:  *
                    336:  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
                    337:  * block of chars out.
1.33      daniel    338:  * Returns 0 if success, or -1 otherwise
                    339:  * The value of @inlen after return is the number of octets consumed
                    340:  *     as the return value is positive, else unpredictiable.
                    341:  * The value of @outlen after return is the number of ocetes consumed.
1.1       daniel    342:  */
                    343: int
1.33      daniel    344: isolat1ToUTF8(unsigned char* out, int *outlen,
1.25      daniel    345:               const unsigned char* in, int *inlen) {
1.33      daniel    346:     unsigned char* outstart = out;
1.45      veillard  347:     const unsigned char* base = in;
1.33      daniel    348:     const unsigned char* processed = in;
                    349:     unsigned char* outend = out + *outlen;
1.45      veillard  350:     const unsigned char* inend;
                    351:     unsigned int c;
                    352:     int bits;
                    353: 
                    354:     inend = in + (*inlen);
                    355:     while ((in < inend) && (out - outstart + 5 < *outlen)) {
                    356:        c= *in++;
1.1       daniel    357: 
1.45      veillard  358:        /* assertion: c is a single UTF-4 value */
                    359:         if (out >= outend)
                    360:            break;
                    361:         if      (c <    0x80) {  *out++=  c;                bits= -6; }
                    362:         else                  {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                    363:  
                    364:         for ( ; bits >= 0; bits-= 6) {
1.33      daniel    365:             if (out >= outend)
1.45      veillard  366:                break;
                    367:             *out++= ((c >> bits) & 0x3F) | 0x80;
1.1       daniel    368:         }
1.45      veillard  369:        processed = (const unsigned char*) in;
1.1       daniel    370:     }
1.33      daniel    371:     *outlen = out - outstart;
1.45      veillard  372:     *inlen = processed - base;
1.33      daniel    373:     return(0);
1.1       daniel    374: }
                    375: 
                    376: /**
                    377:  * UTF8Toisolat1:
1.18      daniel    378:  * @out:  a pointer to an array of bytes to store the result
                    379:  * @outlen:  the length of @out
                    380:  * @in:  a pointer to an array of UTF-8 chars
                    381:  * @inlen:  the length of @in
1.1       daniel    382:  *
                    383:  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
                    384:  * block of chars out.
1.15      daniel    385:  *
1.33      daniel    386:  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
1.28      daniel    387:  * The value of @inlen after return is the number of octets consumed
                    388:  *     as the return value is positive, else unpredictiable.
1.33      daniel    389:  * The value of @outlen after return is the number of ocetes consumed.
1.1       daniel    390:  */
                    391: int
1.33      daniel    392: UTF8Toisolat1(unsigned char* out, int *outlen,
1.25      daniel    393:               const unsigned char* in, int *inlen) {
1.33      daniel    394:     const unsigned char* processed = in;
1.45      veillard  395:     const unsigned char* outend;
                    396:     const unsigned char* outstart = out;
                    397:     const unsigned char* instart = in;
                    398:     const unsigned char* inend;
                    399:     unsigned int c, d;
                    400:     int trailing;
1.1       daniel    401: 
1.45      veillard  402:     if (in == NULL) {
                    403:         /*
                    404:         * initialization nothing to do
                    405:         */
                    406:        *outlen = 0;
                    407:        *inlen = 0;
                    408:        return(0);
                    409:     }
                    410:     inend = in + (*inlen);
                    411:     outend = out + (*outlen);
1.1       daniel    412:     while (in < inend) {
1.45      veillard  413:        d = *in++;
                    414:        if      (d < 0x80)  { c= d; trailing= 0; }
                    415:        else if (d < 0xC0) {
                    416:            /* trailing byte in leading position */
                    417:            *outlen = out - outstart;
                    418:            *inlen = processed - instart;
                    419:            return(-2);
                    420:         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
                    421:         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
                    422:         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
                    423:        else {
                    424:            /* no chance for this in IsoLat1 */
                    425:            *outlen = out - outstart;
                    426:            *inlen = processed - instart;
                    427:            return(-2);
1.23      daniel    428:        }
1.45      veillard  429: 
                    430:        if (inend - in < trailing) {
                    431:            break;
                    432:        } 
                    433: 
                    434:        for ( ; trailing; trailing--) {
1.50      veillard  435:            if (in >= inend)
1.45      veillard  436:                break;
1.50      veillard  437:            if (((d= *in++) & 0xC0) != 0x80) {
                    438:                *outlen = out - outstart;
                    439:                *inlen = processed - instart;
                    440:                return(-2);
                    441:            }
1.45      veillard  442:            c <<= 6;
                    443:            c |= d & 0x3F;
1.23      daniel    444:        }
1.45      veillard  445: 
                    446:        /* assertion: c is a single UTF-4 value */
                    447:        if (c <= 0xFF) {
                    448:            if (out >= outend)
                    449:                break;
                    450:            *out++ = c;
                    451:        } else {
                    452:            /* no chance for this in IsoLat1 */
1.33      daniel    453:            *outlen = out - outstart;
1.45      veillard  454:            *inlen = processed - instart;
1.28      daniel    455:            return(-2);
1.33      daniel    456:        }
                    457:        processed = in;
1.1       daniel    458:     }
1.33      daniel    459:     *outlen = out - outstart;
1.45      veillard  460:     *inlen = processed - instart;
1.33      daniel    461:     return(0);
1.1       daniel    462: }
                    463: 
                    464: /**
1.28      daniel    465:  * UTF16LEToUTF8:
                    466:  * @out:  a pointer to an array of bytes to store the result
                    467:  * @outlen:  the length of @out
                    468:  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
                    469:  * @inlenb:  the length of @in in UTF-16LE chars
                    470:  *
                    471:  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
                    472:  * block of chars out. This function assume the endian properity
                    473:  * is the same between the native type of this machine and the
                    474:  * inputed one.
                    475:  *
                    476:  * Returns the number of byte written, or -1 by lack of space, or -2
                    477:  *     if the transcoding fails (for *in is not valid utf16 string)
                    478:  *     The value of *inlen after return is the number of octets consumed
                    479:  *     as the return value is positive, else unpredictiable.
                    480:  */
                    481: int
1.33      daniel    482: UTF16LEToUTF8(unsigned char* out, int *outlen,
1.28      daniel    483:             const unsigned char* inb, int *inlenb)
                    484: {
1.33      daniel    485:     unsigned char* outstart = out;
                    486:     const unsigned char* processed = inb;
                    487:     unsigned char* outend = out + *outlen;
1.28      daniel    488:     unsigned short* in = (unsigned short*) inb;
                    489:     unsigned short* inend;
                    490:     unsigned int c, d, inlen;
                    491:     unsigned char *tmp;
                    492:     int bits;
                    493: 
                    494:     if ((*inlenb % 2) == 1)
                    495:         (*inlenb)--;
                    496:     inlen = *inlenb / 2;
1.33      daniel    497:     inend = in + inlen;
1.39      daniel    498:     while ((in < inend) && (out - outstart + 5 < *outlen)) {
1.34      daniel    499:         if (xmlLittleEndian) {
                    500:            c= *in++;
                    501:        } else {
                    502:            tmp = (unsigned char *) in;
                    503:            c = *tmp++;
                    504:            c = c | (((unsigned int)*tmp) << 8);
                    505:            in++;
                    506:        }
1.28      daniel    507:         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
1.39      daniel    508:            if (in >= inend) {           /* (in > inend) shouldn't happens */
                    509:                break;
                    510:            }
1.34      daniel    511:            if (xmlLittleEndian) {
                    512:                d = *in++;
                    513:            } else {
                    514:                tmp = (unsigned char *) in;
                    515:                d = *tmp++;
                    516:                d = d | (((unsigned int)*tmp) << 8);
                    517:                in++;
                    518:            }
1.28      daniel    519:             if ((d & 0xFC00) == 0xDC00) {
                    520:                 c &= 0x03FF;
                    521:                 c <<= 10;
                    522:                 c |= d & 0x03FF;
                    523:                 c += 0x10000;
                    524:             }
1.33      daniel    525:             else {
                    526:                *outlen = out - outstart;
                    527:                *inlenb = processed - inb;
1.28      daniel    528:                return(-2);
1.33      daniel    529:            }
1.28      daniel    530:         }
                    531: 
                    532:        /* assertion: c is a single UTF-4 value */
                    533:         if (out >= outend)
1.33      daniel    534:            break;
1.28      daniel    535:         if      (c <    0x80) {  *out++=  c;                bits= -6; }
                    536:         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                    537:         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                    538:         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
                    539:  
                    540:         for ( ; bits >= 0; bits-= 6) {
                    541:             if (out >= outend)
1.33      daniel    542:                break;
1.28      daniel    543:             *out++= ((c >> bits) & 0x3F) | 0x80;
                    544:         }
1.33      daniel    545:        processed = (const unsigned char*) in;
1.28      daniel    546:     }
1.33      daniel    547:     *outlen = out - outstart;
                    548:     *inlenb = processed - inb;
                    549:     return(0);
1.28      daniel    550: }
                    551: 
                    552: /**
                    553:  * UTF8ToUTF16LE:
                    554:  * @outb:  a pointer to an array of bytes to store the result
                    555:  * @outlen:  the length of @outb
                    556:  * @in:  a pointer to an array of UTF-8 chars
                    557:  * @inlen:  the length of @in
                    558:  *
                    559:  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
                    560:  * block of chars out.
                    561:  *
                    562:  * Returns the number of byte written, or -1 by lack of space, or -2
                    563:  *     if the transcoding failed. 
                    564:  */
                    565: int
1.33      daniel    566: UTF8ToUTF16LE(unsigned char* outb, int *outlen,
1.28      daniel    567:             const unsigned char* in, int *inlen)
                    568: {
                    569:     unsigned short* out = (unsigned short*) outb;
1.33      daniel    570:     const unsigned char* processed = in;
1.28      daniel    571:     unsigned short* outstart= out;
                    572:     unsigned short* outend;
                    573:     const unsigned char* inend= in+*inlen;
1.40      daniel    574:     unsigned int c, d;
                    575:     int trailing;
1.28      daniel    576:     unsigned char *tmp;
                    577:     unsigned short tmp1, tmp2;
                    578: 
1.37      daniel    579:     if (in == NULL) {
                    580:         /*
                    581:         * initialization, add the Byte Order Mark
                    582:         */
                    583:         if (*outlen >= 2) {
                    584:            outb[0] = 0xFF;
                    585:            outb[1] = 0xFE;
                    586:            *outlen = 2;
                    587:            *inlen = 0;
                    588: #ifdef DEBUG_ENCODING
1.52      veillard  589:             xmlGenericError(xmlGenericErrorContext,
                    590:                    "Added FFFE Byte Order Mark\n");
1.37      daniel    591: #endif
                    592:            return(2);
                    593:        }
                    594:        *outlen = 0;
                    595:        *inlen = 0;
                    596:        return(0);
                    597:     }
1.33      daniel    598:     outend = out + (*outlen / 2);
1.28      daniel    599:     while (in < inend) {
                    600:       d= *in++;
                    601:       if      (d < 0x80)  { c= d; trailing= 0; }
1.33      daniel    602:       else if (d < 0xC0) {
                    603:           /* trailing byte in leading position */
1.45      veillard  604:          *outlen = (out - outstart) * 2;
1.33      daniel    605:          *inlen = processed - in;
                    606:          return(-2);
                    607:       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
1.28      daniel    608:       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
                    609:       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
1.33      daniel    610:       else {
                    611:        /* no chance for this in UTF-16 */
1.45      veillard  612:        *outlen = (out - outstart) * 2;
1.33      daniel    613:        *inlen = processed - in;
                    614:        return(-2);
                    615:       }
1.28      daniel    616: 
                    617:       if (inend - in < trailing) {
                    618:           break;
                    619:       } 
                    620: 
                    621:       for ( ; trailing; trailing--) {
                    622:           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
1.33      daniel    623:              break;
1.28      daniel    624:           c <<= 6;
                    625:           c |= d & 0x3F;
                    626:       }
                    627: 
                    628:       /* assertion: c is a single UTF-4 value */
                    629:         if (c < 0x10000) {
                    630:             if (out >= outend)
1.33      daniel    631:                break;
1.34      daniel    632:            if (xmlLittleEndian) {
                    633:                *out++ = c;
                    634:            } else {
                    635:                tmp = (unsigned char *) out;
                    636:                *tmp = c ;
                    637:                *(tmp + 1) = c >> 8 ;
                    638:                out++;
                    639:            }
1.28      daniel    640:         }
                    641:         else if (c < 0x110000) {
                    642:             if (out+1 >= outend)
1.33      daniel    643:                break;
1.28      daniel    644:             c -= 0x10000;
1.34      daniel    645:            if (xmlLittleEndian) {
                    646:                *out++ = 0xD800 | (c >> 10);
                    647:                *out++ = 0xDC00 | (c & 0x03FF);
                    648:            } else {
                    649:                tmp1 = 0xD800 | (c >> 10);
                    650:                tmp = (unsigned char *) out;
1.40      daniel    651:                *tmp = (unsigned char) tmp1;
1.34      daniel    652:                *(tmp + 1) = tmp1 >> 8;
                    653:                out++;
                    654: 
                    655:                tmp2 = 0xDC00 | (c & 0x03FF);
                    656:                tmp = (unsigned char *) out;
1.40      daniel    657:                *tmp  = (unsigned char) tmp2;
1.34      daniel    658:                *(tmp + 1) = tmp2 >> 8;
                    659:                out++;
                    660:            }
1.28      daniel    661:         }
                    662:         else
1.33      daniel    663:            break;
                    664:        processed = in;
1.28      daniel    665:     }
1.36      daniel    666:     *outlen = (out - outstart) * 2;
1.33      daniel    667:     *inlen = processed - in;
                    668:     return(0);
1.28      daniel    669: }
                    670: 
                    671: /**
                    672:  * UTF16BEToUTF8:
1.18      daniel    673:  * @out:  a pointer to an array of bytes to store the result
                    674:  * @outlen:  the length of @out
1.25      daniel    675:  * @inb:  a pointer to an array of UTF-16 passwd as a byte array
                    676:  * @inlenb:  the length of @in in UTF-16 chars
1.1       daniel    677:  *
                    678:  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
1.28      daniel    679:  * block of chars out. This function assume the endian properity
                    680:  * is the same between the native type of this machine and the
                    681:  * inputed one.
1.25      daniel    682:  *
1.28      daniel    683:  * Returns the number of byte written, or -1 by lack of space, or -2
                    684:  *     if the transcoding fails (for *in is not valid utf16 string)
                    685:  * The value of *inlen after return is the number of octets consumed
                    686:  *     as the return value is positive, else unpredictiable.
1.1       daniel    687:  */
                    688: int
1.33      daniel    689: UTF16BEToUTF8(unsigned char* out, int *outlen,
1.25      daniel    690:             const unsigned char* inb, int *inlenb)
1.1       daniel    691: {
1.33      daniel    692:     unsigned char* outstart = out;
                    693:     const unsigned char* processed = inb;
                    694:     unsigned char* outend = out + *outlen;
1.25      daniel    695:     unsigned short* in = (unsigned short*) inb;
                    696:     unsigned short* inend;
                    697:     unsigned int c, d, inlen;
1.28      daniel    698:     unsigned char *tmp;
1.1       daniel    699:     int bits;
                    700: 
1.28      daniel    701:     if ((*inlenb % 2) == 1)
                    702:         (*inlenb)--;
1.25      daniel    703:     inlen = *inlenb / 2;
                    704:     inend= in + inlen;
1.1       daniel    705:     while (in < inend) {
1.34      daniel    706:        if (xmlLittleEndian) {
                    707:            tmp = (unsigned char *) in;
                    708:            c = *tmp++;
                    709:            c = c << 8;
                    710:            c = c | (unsigned int) *tmp;
                    711:            in++;
                    712:        } else {
                    713:            c= *in++;
                    714:        } 
1.1       daniel    715:         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
1.28      daniel    716:            if (in >= inend) {           /* (in > inend) shouldn't happens */
1.33      daniel    717:                *outlen = out - outstart;
                    718:                *inlenb = processed - inb;
                    719:                return(-2);
1.28      daniel    720:            }
1.34      daniel    721:            if (xmlLittleEndian) {
                    722:                tmp = (unsigned char *) in;
                    723:                d = *tmp++;
                    724:                d = d << 8;
                    725:                d = d | (unsigned int) *tmp;
                    726:                in++;
                    727:            } else {
                    728:                d= *in++;
                    729:            }
1.28      daniel    730:             if ((d & 0xFC00) == 0xDC00) {
1.1       daniel    731:                 c &= 0x03FF;
                    732:                 c <<= 10;
                    733:                 c |= d & 0x03FF;
                    734:                 c += 0x10000;
                    735:             }
1.33      daniel    736:             else {
                    737:                *outlen = out - outstart;
                    738:                *inlenb = processed - inb;
1.28      daniel    739:                return(-2);
1.33      daniel    740:            }
1.1       daniel    741:         }
                    742: 
1.25      daniel    743:        /* assertion: c is a single UTF-4 value */
1.27      daniel    744:         if (out >= outend) 
1.33      daniel    745:            break;
1.1       daniel    746:         if      (c <    0x80) {  *out++=  c;                bits= -6; }
1.26      daniel    747:         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
                    748:         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
                    749:         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
1.1       daniel    750:  
1.26      daniel    751:         for ( ; bits >= 0; bits-= 6) {
1.27      daniel    752:             if (out >= outend) 
1.33      daniel    753:                break;
1.26      daniel    754:             *out++= ((c >> bits) & 0x3F) | 0x80;
1.1       daniel    755:         }
1.33      daniel    756:        processed = (const unsigned char*) in;
1.1       daniel    757:     }
1.33      daniel    758:     *outlen = out - outstart;
                    759:     *inlenb = processed - inb;
                    760:     return(0);
1.1       daniel    761: }
                    762: 
                    763: /**
1.28      daniel    764:  * UTF8ToUTF16BE:
1.25      daniel    765:  * @outb:  a pointer to an array of bytes to store the result
                    766:  * @outlen:  the length of @outb
1.18      daniel    767:  * @in:  a pointer to an array of UTF-8 chars
                    768:  * @inlen:  the length of @in
1.1       daniel    769:  *
1.28      daniel    770:  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
1.1       daniel    771:  * block of chars out.
1.15      daniel    772:  *
1.6       daniel    773:  * Returns the number of byte written, or -1 by lack of space, or -2
1.25      daniel    774:  *     if the transcoding failed. 
1.1       daniel    775:  */
                    776: int
1.33      daniel    777: UTF8ToUTF16BE(unsigned char* outb, int *outlen,
1.25      daniel    778:             const unsigned char* in, int *inlen)
1.1       daniel    779: {
1.25      daniel    780:     unsigned short* out = (unsigned short*) outb;
1.33      daniel    781:     const unsigned char* processed = in;
1.1       daniel    782:     unsigned short* outstart= out;
1.28      daniel    783:     unsigned short* outend;
1.25      daniel    784:     const unsigned char* inend= in+*inlen;
1.40      daniel    785:     unsigned int c, d;
                    786:     int trailing;
1.28      daniel    787:     unsigned char *tmp;
                    788:     unsigned short tmp1, tmp2;
1.1       daniel    789: 
1.37      daniel    790:     if (in == NULL) {
                    791:         /*
                    792:         * initialization, add the Byte Order Mark
                    793:         */
                    794:         if (*outlen >= 2) {
                    795:            outb[0] = 0xFE;
                    796:            outb[1] = 0xFF;
                    797:            *outlen = 2;
                    798:            *inlen = 0;
                    799: #ifdef DEBUG_ENCODING
1.52      veillard  800:             xmlGenericError(xmlGenericErrorContext,
                    801:                    "Added FEFF Byte Order Mark\n");
1.37      daniel    802: #endif
                    803:            return(2);
                    804:        }
                    805:        *outlen = 0;
                    806:        *inlen = 0;
                    807:        return(0);
                    808:     }
1.33      daniel    809:     outend = out + (*outlen / 2);
1.1       daniel    810:     while (in < inend) {
                    811:       d= *in++;
                    812:       if      (d < 0x80)  { c= d; trailing= 0; }
1.33      daniel    813:       else if (d < 0xC0)  {
                    814:           /* trailing byte in leading position */
                    815:          *outlen = out - outstart;
                    816:          *inlen = processed - in;
                    817:          return(-2);
                    818:       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
1.1       daniel    819:       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
                    820:       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
1.33      daniel    821:       else {
                    822:           /* no chance for this in UTF-16 */
                    823:          *outlen = out - outstart;
                    824:          *inlen = processed - in;
                    825:          return(-2);
                    826:       }
1.28      daniel    827: 
                    828:       if (inend - in < trailing) {
                    829:           break;
                    830:       } 
1.1       daniel    831: 
                    832:       for ( ; trailing; trailing--) {
1.33      daniel    833:           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
1.1       daniel    834:           c <<= 6;
                    835:           c |= d & 0x3F;
                    836:       }
                    837: 
                    838:       /* assertion: c is a single UTF-4 value */
                    839:         if (c < 0x10000) {
1.33      daniel    840:             if (out >= outend)  break;
1.34      daniel    841:            if (xmlLittleEndian) {
                    842:                tmp = (unsigned char *) out;
                    843:                *tmp = c >> 8;
                    844:                *(tmp + 1) = c;
                    845:                out++;
                    846:            } else {
                    847:                *out++ = c;
                    848:            }
1.1       daniel    849:         }
                    850:         else if (c < 0x110000) {
1.33      daniel    851:             if (out+1 >= outend)  break;
1.1       daniel    852:             c -= 0x10000;
1.34      daniel    853:            if (xmlLittleEndian) {
                    854:                tmp1 = 0xD800 | (c >> 10);
                    855:                tmp = (unsigned char *) out;
                    856:                *tmp = tmp1 >> 8;
1.40      daniel    857:                *(tmp + 1) = (unsigned char) tmp1;
1.34      daniel    858:                out++;
                    859: 
                    860:                tmp2 = 0xDC00 | (c & 0x03FF);
                    861:                tmp = (unsigned char *) out;
                    862:                *tmp = tmp2 >> 8;
1.40      daniel    863:                *(tmp + 1) = (unsigned char) tmp2;
1.34      daniel    864:                out++;
                    865:            } else {
                    866:                *out++ = 0xD800 | (c >> 10);
                    867:                *out++ = 0xDC00 | (c & 0x03FF);
                    868:            }
1.1       daniel    869:         }
1.33      daniel    870:         else
                    871:            break;
                    872:        processed = in;
1.1       daniel    873:     }
1.36      daniel    874:     *outlen = (out - outstart) * 2;
1.33      daniel    875:     *inlen = processed - in;
                    876:     return(0);
1.1       daniel    877: }
                    878: 
1.7       daniel    879: /**
                    880:  * xmlDetectCharEncoding:
                    881:  * @in:  a pointer to the first bytes of the XML entity, must be at least
                    882:  *       4 bytes long.
1.25      daniel    883:  * @len:  pointer to the length of the buffer
1.7       daniel    884:  *
                    885:  * Guess the encoding of the entity using the first bytes of the entity content
                    886:  * accordingly of the non-normative appendix F of the XML-1.0 recommendation.
                    887:  * 
                    888:  * Returns one of the XML_CHAR_ENCODING_... values.
                    889:  */
                    890: xmlCharEncoding
1.25      daniel    891: xmlDetectCharEncoding(const unsigned char* in, int len)
1.7       daniel    892: {
1.25      daniel    893:     if (len >= 4) {
                    894:        if ((in[0] == 0x00) && (in[1] == 0x00) &&
                    895:            (in[2] == 0x00) && (in[3] == 0x3C))
                    896:            return(XML_CHAR_ENCODING_UCS4BE);
                    897:        if ((in[0] == 0x3C) && (in[1] == 0x00) &&
                    898:            (in[2] == 0x00) && (in[3] == 0x00))
                    899:            return(XML_CHAR_ENCODING_UCS4LE);
                    900:        if ((in[0] == 0x00) && (in[1] == 0x00) &&
                    901:            (in[2] == 0x3C) && (in[3] == 0x00))
                    902:            return(XML_CHAR_ENCODING_UCS4_2143);
                    903:        if ((in[0] == 0x00) && (in[1] == 0x3C) &&
                    904:            (in[2] == 0x00) && (in[3] == 0x00))
                    905:            return(XML_CHAR_ENCODING_UCS4_3412);
                    906:        if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
                    907:            (in[2] == 0xA7) && (in[3] == 0x94))
                    908:            return(XML_CHAR_ENCODING_EBCDIC);
                    909:        if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
                    910:            (in[2] == 0x78) && (in[3] == 0x6D))
                    911:            return(XML_CHAR_ENCODING_UTF8);
                    912:     }
                    913:     if (len >= 2) {
                    914:        if ((in[0] == 0xFE) && (in[1] == 0xFF))
                    915:            return(XML_CHAR_ENCODING_UTF16BE);
                    916:        if ((in[0] == 0xFF) && (in[1] == 0xFE))
                    917:            return(XML_CHAR_ENCODING_UTF16LE);
                    918:     }
1.7       daniel    919:     return(XML_CHAR_ENCODING_NONE);
                    920: }
                    921: 
                    922: /**
1.51      veillard  923:  * xmlCleanupEncodingAliases:
                    924:  *
                    925:  * Unregisters all aliases
                    926:  */
                    927: void
                    928: xmlCleanupEncodingAliases(void) {
                    929:     int i;
                    930: 
                    931:     if (xmlCharEncodingAliases == NULL)
                    932:        return;
                    933: 
                    934:     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
                    935:        if (xmlCharEncodingAliases[i].name != NULL)
                    936:            xmlFree((char *) xmlCharEncodingAliases[i].name);
                    937:        if (xmlCharEncodingAliases[i].alias != NULL)
                    938:            xmlFree((char *) xmlCharEncodingAliases[i].alias);
                    939:     }
                    940:     xmlCharEncodingAliasesNb = 0;
                    941:     xmlCharEncodingAliasesMax = 0;
                    942:     xmlFree(xmlCharEncodingAliases);
                    943: }
                    944: 
                    945: /**
                    946:  * xmlGetEncodingAlias:
                    947:  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
                    948:  *
                    949:  * Lookup an encoding name for the given alias.
                    950:  * 
                    951:  * Returns NULL if not found the original name otherwise
                    952:  */
                    953: const char *
                    954: xmlGetEncodingAlias(const char *alias) {
                    955:     int i;
                    956:     char upper[100];
                    957: 
                    958:     if (alias == NULL)
                    959:        return(NULL);
                    960: 
                    961:     if (xmlCharEncodingAliases == NULL)
                    962:        return(NULL);
                    963: 
                    964:     for (i = 0;i < 99;i++) {
                    965:         upper[i] = toupper(alias[i]);
                    966:        if (upper[i] == 0) break;
                    967:     }
                    968:     upper[i] = 0;
                    969: 
                    970:     /*
                    971:      * Walk down the list looking for a definition of the alias
                    972:      */
                    973:     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
                    974:        if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
                    975:            return(xmlCharEncodingAliases[i].name);
                    976:        }
                    977:     }
                    978:     return(NULL);
                    979: }
                    980: 
                    981: /**
                    982:  * xmlAddEncodingAlias:
                    983:  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
                    984:  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
                    985:  *
                    986:  * Registers and alias @alias for an encoding named @name. Existing alias
                    987:  * will be overwritten.
                    988:  * 
                    989:  * Returns 0 in case of success, -1 in case of error
                    990:  */
                    991: int
                    992: xmlAddEncodingAlias(const char *name, const char *alias) {
                    993:     int i;
                    994:     char upper[100];
                    995: 
                    996:     if ((name == NULL) || (alias == NULL))
                    997:        return(-1);
                    998: 
                    999:     for (i = 0;i < 99;i++) {
                   1000:         upper[i] = toupper(alias[i]);
                   1001:        if (upper[i] == 0) break;
                   1002:     }
                   1003:     upper[i] = 0;
                   1004: 
                   1005:     if (xmlCharEncodingAliases == NULL) {
                   1006:        xmlCharEncodingAliasesNb = 0;
                   1007:        xmlCharEncodingAliasesMax = 20;
                   1008:        xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 
                   1009:              xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
                   1010:        if (xmlCharEncodingAliases == NULL)
                   1011:            return(-1);
                   1012:     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
                   1013:        xmlCharEncodingAliasesMax *= 2;
                   1014:        xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 
                   1015:              xmlRealloc(xmlCharEncodingAliases,
                   1016:                         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
                   1017:     }
                   1018:     /*
                   1019:      * Walk down the list looking for a definition of the alias
                   1020:      */
                   1021:     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
                   1022:        if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
                   1023:            /*
                   1024:             * Replace the definition.
                   1025:             */
                   1026:            xmlFree((char *) xmlCharEncodingAliases[i].name);
                   1027:            xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
                   1028:            return(0);
                   1029:        }
                   1030:     }
                   1031:     /*
                   1032:      * Add the definition
                   1033:      */
                   1034:     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
                   1035:     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
                   1036:     xmlCharEncodingAliasesNb++;
                   1037:     return(0);
                   1038: }
                   1039: 
                   1040: /**
                   1041:  * xmlDelEncodingAlias:
                   1042:  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
                   1043:  *
                   1044:  * Unregisters an encoding alias @alias
                   1045:  * 
                   1046:  * Returns 0 in case of success, -1 in case of error
                   1047:  */
                   1048: int
                   1049: xmlDelEncodingAlias(const char *alias) {
                   1050:     int i;
                   1051: 
                   1052:     if (alias == NULL)
                   1053:        return(-1);
                   1054: 
                   1055:     if (xmlCharEncodingAliases == NULL)
                   1056:        return(-1);
                   1057:     /*
                   1058:      * Walk down the list looking for a definition of the alias
                   1059:      */
                   1060:     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
                   1061:        if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
                   1062:            xmlFree((char *) xmlCharEncodingAliases[i].name);
                   1063:            xmlFree((char *) xmlCharEncodingAliases[i].alias);
                   1064:            xmlCharEncodingAliasesNb--;
                   1065:            memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
                   1066:                    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
                   1067:            return(0);
                   1068:        }
                   1069:     }
                   1070:     return(-1);
                   1071: }
                   1072: 
                   1073: /**
1.7       daniel   1074:  * xmlParseCharEncoding:
1.18      daniel   1075:  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1.7       daniel   1076:  *
                   1077:  * Conpare the string to the known encoding schemes already known. Note
                   1078:  * that the comparison is case insensitive accordingly to the section
                   1079:  * [XML] 4.3.3 Character Encoding in Entities.
                   1080:  * 
                   1081:  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
                   1082:  * if not recognized.
                   1083:  */
                   1084: xmlCharEncoding
1.8       daniel   1085: xmlParseCharEncoding(const char* name)
1.7       daniel   1086: {
1.51      veillard 1087:     const char *alias;
1.7       daniel   1088:     char upper[500];
                   1089:     int i;
                   1090: 
1.51      veillard 1091:     if (name == NULL)
                   1092:        return(XML_CHAR_ENCODING_NONE);
                   1093: 
                   1094:     /*
                   1095:      * Do the alias resolution
                   1096:      */
                   1097:     alias = xmlGetEncodingAlias(name);
                   1098:     if (alias != NULL)
                   1099:        name = alias;
                   1100: 
1.7       daniel   1101:     for (i = 0;i < 499;i++) {
                   1102:         upper[i] = toupper(name[i]);
                   1103:        if (upper[i] == 0) break;
                   1104:     }
                   1105:     upper[i] = 0;
                   1106: 
                   1107:     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
                   1108:     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
                   1109:     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
                   1110: 
                   1111:     /*
                   1112:      * NOTE: if we were able to parse this, the endianness of UTF16 is
                   1113:      *       already found and in use
                   1114:      */
                   1115:     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
                   1116:     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
                   1117:     
                   1118:     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
                   1119:     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
                   1120:     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
                   1121: 
                   1122:     /*
                   1123:      * NOTE: if we were able to parse this, the endianness of UCS4 is
                   1124:      *       already found and in use
                   1125:      */
                   1126:     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
                   1127:     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
                   1128:     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
                   1129: 
                   1130:     
                   1131:     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
                   1132:     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
                   1133:     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
                   1134: 
                   1135:     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
                   1136:     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
                   1137:     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
                   1138: 
                   1139:     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
                   1140:     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
                   1141:     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
                   1142:     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
                   1143:     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
                   1144:     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
                   1145:     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
                   1146: 
                   1147:     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1.30      daniel   1148:     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1.7       daniel   1149:     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1.30      daniel   1150: 
                   1151: #ifdef DEBUG_ENCODING
1.52      veillard 1152:     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1.30      daniel   1153: #endif
1.7       daniel   1154:     return(XML_CHAR_ENCODING_ERROR);
                   1155: }
1.9       daniel   1156: 
1.38      daniel   1157: /**
                   1158:  * xmlGetCharEncodingName:
                   1159:  * @enc:  the encoding
                   1160:  *
                   1161:  * The "canonical" name for XML encoding.
                   1162:  * C.f. http://www.w3.org/TR/REC-xml#charencoding
                   1163:  * Section 4.3.3  Character Encoding in Entities
                   1164:  *
                   1165:  * Returns the canonical name for the given encoding
                   1166:  */
                   1167: 
                   1168: const char*
                   1169: xmlGetCharEncodingName(xmlCharEncoding enc) {
                   1170:     switch (enc) {
                   1171:         case XML_CHAR_ENCODING_ERROR:
                   1172:            return(NULL);
                   1173:         case XML_CHAR_ENCODING_NONE:
                   1174:            return(NULL);
                   1175:         case XML_CHAR_ENCODING_UTF8:
                   1176:            return("UTF-8");
                   1177:         case XML_CHAR_ENCODING_UTF16LE:
                   1178:            return("UTF-16");
                   1179:         case XML_CHAR_ENCODING_UTF16BE:
                   1180:            return("UTF-16");
                   1181:         case XML_CHAR_ENCODING_EBCDIC:
                   1182:             return("EBCDIC");
                   1183:         case XML_CHAR_ENCODING_UCS4LE:
                   1184:             return("ISO-10646-UCS-4");
                   1185:         case XML_CHAR_ENCODING_UCS4BE:
                   1186:             return("ISO-10646-UCS-4");
                   1187:         case XML_CHAR_ENCODING_UCS4_2143:
                   1188:             return("ISO-10646-UCS-4");
                   1189:         case XML_CHAR_ENCODING_UCS4_3412:
                   1190:             return("ISO-10646-UCS-4");
                   1191:         case XML_CHAR_ENCODING_UCS2:
                   1192:             return("ISO-10646-UCS-2");
                   1193:         case XML_CHAR_ENCODING_8859_1:
                   1194:            return("ISO-8859-1");
                   1195:         case XML_CHAR_ENCODING_8859_2:
                   1196:            return("ISO-8859-2");
                   1197:         case XML_CHAR_ENCODING_8859_3:
                   1198:            return("ISO-8859-3");
                   1199:         case XML_CHAR_ENCODING_8859_4:
                   1200:            return("ISO-8859-4");
                   1201:         case XML_CHAR_ENCODING_8859_5:
                   1202:            return("ISO-8859-5");
                   1203:         case XML_CHAR_ENCODING_8859_6:
                   1204:            return("ISO-8859-6");
                   1205:         case XML_CHAR_ENCODING_8859_7:
                   1206:            return("ISO-8859-7");
                   1207:         case XML_CHAR_ENCODING_8859_8:
                   1208:            return("ISO-8859-8");
                   1209:         case XML_CHAR_ENCODING_8859_9:
                   1210:            return("ISO-8859-9");
                   1211:         case XML_CHAR_ENCODING_2022_JP:
                   1212:             return("ISO-2022-JP");
                   1213:         case XML_CHAR_ENCODING_SHIFT_JIS:
                   1214:             return("Shift-JIS");
                   1215:         case XML_CHAR_ENCODING_EUC_JP:
                   1216:             return("EUC-JP");
1.50      veillard 1217:        case XML_CHAR_ENCODING_ASCII:
                   1218:            return(NULL);
1.38      daniel   1219:     }
                   1220:     return(NULL);
                   1221: }
                   1222: 
1.9       daniel   1223: /****************************************************************
                   1224:  *                                                             *
                   1225:  *             Char encoding handlers                          *
                   1226:  *                                                             *
                   1227:  ****************************************************************/
                   1228: 
                   1229: /* the size should be growable, but it's not a big deal ... */
                   1230: #define MAX_ENCODING_HANDLERS 50
                   1231: static xmlCharEncodingHandlerPtr *handlers = NULL;
                   1232: static int nbCharEncodingHandler = 0;
                   1233: 
                   1234: /*
                   1235:  * The default is UTF-8 for XML, that's also the default used for the
                   1236:  * parser internals, so the default encoding handler is NULL
                   1237:  */
                   1238: 
                   1239: static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
                   1240: 
                   1241: /**
                   1242:  * xmlNewCharEncodingHandler:
1.18      daniel   1243:  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1.9       daniel   1244:  * @input:  the xmlCharEncodingInputFunc to read that encoding
                   1245:  * @output:  the xmlCharEncodingOutputFunc to write that encoding
                   1246:  *
                   1247:  * Create and registers an xmlCharEncodingHandler.
                   1248:  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
                   1249:  */
                   1250: xmlCharEncodingHandlerPtr
1.25      daniel   1251: xmlNewCharEncodingHandler(const char *name, 
                   1252:                           xmlCharEncodingInputFunc input,
1.9       daniel   1253:                           xmlCharEncodingOutputFunc output) {
                   1254:     xmlCharEncodingHandlerPtr handler;
1.51      veillard 1255:     const char *alias;
1.9       daniel   1256:     char upper[500];
                   1257:     int i;
                   1258:     char *up = 0;
                   1259: 
                   1260:     /*
1.51      veillard 1261:      * Do the alias resolution
                   1262:      */
                   1263:     alias = xmlGetEncodingAlias(name);
                   1264:     if (alias != NULL)
                   1265:        name = alias;
                   1266: 
                   1267:     /*
1.9       daniel   1268:      * Keep only the uppercase version of the encoding.
                   1269:      */
                   1270:     if (name == NULL) {
1.52      veillard 1271:         xmlGenericError(xmlGenericErrorContext,
                   1272:                "xmlNewCharEncodingHandler : no name !\n");
1.9       daniel   1273:        return(NULL);
                   1274:     }
                   1275:     for (i = 0;i < 499;i++) {
                   1276:         upper[i] = toupper(name[i]);
                   1277:        if (upper[i] == 0) break;
                   1278:     }
                   1279:     upper[i] = 0;
1.16      daniel   1280:     up = xmlMemStrdup(upper);
1.9       daniel   1281:     if (up == NULL) {
1.52      veillard 1282:         xmlGenericError(xmlGenericErrorContext,
                   1283:                "xmlNewCharEncodingHandler : out of memory !\n");
1.9       daniel   1284:        return(NULL);
                   1285:     }
                   1286: 
                   1287:     /*
                   1288:      * allocate and fill-up an handler block.
                   1289:      */
                   1290:     handler = (xmlCharEncodingHandlerPtr)
1.16      daniel   1291:               xmlMalloc(sizeof(xmlCharEncodingHandler));
1.9       daniel   1292:     if (handler == NULL) {
1.52      veillard 1293:         xmlGenericError(xmlGenericErrorContext,
                   1294:                "xmlNewCharEncodingHandler : out of memory !\n");
1.9       daniel   1295:        return(NULL);
                   1296:     }
                   1297:     handler->input = input;
                   1298:     handler->output = output;
                   1299:     handler->name = up;
                   1300: 
1.50      veillard 1301: #ifdef LIBXML_ICONV_ENABLED
1.49      veillard 1302:     handler->iconv_in = NULL;
                   1303:     handler->iconv_out = NULL;
1.50      veillard 1304: #endif /* LIBXML_ICONV_ENABLED */
1.49      veillard 1305: 
1.9       daniel   1306:     /*
                   1307:      * registers and returns the handler.
                   1308:      */
                   1309:     xmlRegisterCharEncodingHandler(handler);
1.30      daniel   1310: #ifdef DEBUG_ENCODING
1.52      veillard 1311:     xmlGenericError(xmlGenericErrorContext,
                   1312:            "Registered encoding handler for %s\n", name);
1.30      daniel   1313: #endif
1.9       daniel   1314:     return(handler);
                   1315: }
                   1316: 
                   1317: /**
                   1318:  * xmlInitCharEncodingHandlers:
                   1319:  *
                   1320:  * Initialize the char encoding support, it registers the default
                   1321:  * encoding supported.
1.18      daniel   1322:  * NOTE: while public, this function usually doesn't need to be called
1.9       daniel   1323:  *       in normal processing.
                   1324:  */
                   1325: void
                   1326: xmlInitCharEncodingHandlers(void) {
1.34      daniel   1327:     unsigned short int tst = 0x1234;
                   1328:     unsigned char *ptr = (unsigned char *) &tst; 
                   1329: 
1.9       daniel   1330:     if (handlers != NULL) return;
                   1331: 
                   1332:     handlers = (xmlCharEncodingHandlerPtr *)
1.16      daniel   1333:         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1.34      daniel   1334: 
                   1335:     if (*ptr == 0x12) xmlLittleEndian = 0;
                   1336:     else if (*ptr == 0x34) xmlLittleEndian = 1;
1.52      veillard 1337:     else xmlGenericError(xmlGenericErrorContext,
                   1338:            "Odd problem at endianness detection\n");
1.9       daniel   1339: 
                   1340:     if (handlers == NULL) {
1.52      veillard 1341:         xmlGenericError(xmlGenericErrorContext,
                   1342:                "xmlInitCharEncodingHandlers : out of memory !\n");
1.9       daniel   1343:        return;
                   1344:     }
1.10      daniel   1345:     xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
1.25      daniel   1346:     xmlUTF16LEHandler = 
1.28      daniel   1347:           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
                   1348:     xmlUTF16BEHandler = 
                   1349:           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1.10      daniel   1350:     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1.47      veillard 1351:     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1.48      veillard 1352: #ifdef LIBXML_HTML_ENABLED
                   1353:     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
                   1354: #endif
1.9       daniel   1355: }
                   1356: 
                   1357: /**
1.19      daniel   1358:  * xmlCleanupCharEncodingHandlers:
                   1359:  *
                   1360:  * Cleanup the memory allocated for the char encoding support, it
1.51      veillard 1361:  * unregisters all the encoding handlers and the aliases.
1.19      daniel   1362:  */
                   1363: void
                   1364: xmlCleanupCharEncodingHandlers(void) {
1.51      veillard 1365:     xmlCleanupEncodingAliases();
                   1366: 
1.19      daniel   1367:     if (handlers == NULL) return;
                   1368: 
                   1369:     for (;nbCharEncodingHandler > 0;) {
                   1370:         nbCharEncodingHandler--;
                   1371:        if (handlers[nbCharEncodingHandler] != NULL) {
1.31      daniel   1372:            if (handlers[nbCharEncodingHandler]->name != NULL)
                   1373:                xmlFree(handlers[nbCharEncodingHandler]->name);
1.19      daniel   1374:            xmlFree(handlers[nbCharEncodingHandler]);
                   1375:        }
                   1376:     }
                   1377:     xmlFree(handlers);
                   1378:     handlers = NULL;
                   1379:     nbCharEncodingHandler = 0;
                   1380:     xmlDefaultCharEncodingHandler = NULL;
                   1381: }
                   1382: 
                   1383: /**
1.9       daniel   1384:  * xmlRegisterCharEncodingHandler:
                   1385:  * @handler:  the xmlCharEncodingHandlerPtr handler block
                   1386:  *
                   1387:  * Register the char encoding handler, surprizing, isn't it ?
                   1388:  */
                   1389: void
                   1390: xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
                   1391:     if (handlers == NULL) xmlInitCharEncodingHandlers();
                   1392:     if (handler == NULL) {
1.52      veillard 1393:         xmlGenericError(xmlGenericErrorContext,
                   1394:                "xmlRegisterCharEncodingHandler: NULL handler !\n");
1.9       daniel   1395:        return;
                   1396:     }
                   1397: 
                   1398:     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1.52      veillard 1399:         xmlGenericError(xmlGenericErrorContext, 
1.9       daniel   1400:        "xmlRegisterCharEncodingHandler: Too many handler registered\n");
1.52      veillard 1401:         xmlGenericError(xmlGenericErrorContext,
                   1402:                "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
1.9       daniel   1403:        return;
                   1404:     }
                   1405:     handlers[nbCharEncodingHandler++] = handler;
                   1406: }
                   1407: 
                   1408: /**
                   1409:  * xmlGetCharEncodingHandler:
                   1410:  * @enc:  an xmlCharEncoding value.
                   1411:  *
                   1412:  * Search in the registrered set the handler able to read/write that encoding.
                   1413:  *
                   1414:  * Returns the handler or NULL if not found
                   1415:  */
                   1416: xmlCharEncodingHandlerPtr
                   1417: xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1.30      daniel   1418:     xmlCharEncodingHandlerPtr handler;
                   1419: 
1.9       daniel   1420:     if (handlers == NULL) xmlInitCharEncodingHandlers();
1.25      daniel   1421:     switch (enc) {
                   1422:         case XML_CHAR_ENCODING_ERROR:
                   1423:            return(NULL);
                   1424:         case XML_CHAR_ENCODING_NONE:
                   1425:            return(NULL);
                   1426:         case XML_CHAR_ENCODING_UTF8:
                   1427:            return(NULL);
                   1428:         case XML_CHAR_ENCODING_UTF16LE:
                   1429:            return(xmlUTF16LEHandler);
                   1430:         case XML_CHAR_ENCODING_UTF16BE:
                   1431:            return(xmlUTF16BEHandler);
                   1432:         case XML_CHAR_ENCODING_EBCDIC:
1.30      daniel   1433:             handler = xmlFindCharEncodingHandler("EBCDIC");
                   1434:             if (handler != NULL) return(handler);
                   1435:             handler = xmlFindCharEncodingHandler("ebcdic");
                   1436:             if (handler != NULL) return(handler);
                   1437:            break;
1.38      daniel   1438:         case XML_CHAR_ENCODING_UCS4BE:
1.30      daniel   1439:             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
                   1440:             if (handler != NULL) return(handler);
                   1441:             handler = xmlFindCharEncodingHandler("UCS-4");
                   1442:             if (handler != NULL) return(handler);
                   1443:             handler = xmlFindCharEncodingHandler("UCS4");
                   1444:             if (handler != NULL) return(handler);
                   1445:            break;
1.38      daniel   1446:         case XML_CHAR_ENCODING_UCS4LE:
                   1447:             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
                   1448:             if (handler != NULL) return(handler);
                   1449:             handler = xmlFindCharEncodingHandler("UCS-4");
                   1450:             if (handler != NULL) return(handler);
                   1451:             handler = xmlFindCharEncodingHandler("UCS4");
1.30      daniel   1452:             if (handler != NULL) return(handler);
                   1453:            break;
1.25      daniel   1454:         case XML_CHAR_ENCODING_UCS4_2143:
1.30      daniel   1455:            break;
1.25      daniel   1456:         case XML_CHAR_ENCODING_UCS4_3412:
1.30      daniel   1457:            break;
1.25      daniel   1458:         case XML_CHAR_ENCODING_UCS2:
1.30      daniel   1459:             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
                   1460:             if (handler != NULL) return(handler);
                   1461:             handler = xmlFindCharEncodingHandler("UCS-2");
                   1462:             if (handler != NULL) return(handler);
                   1463:             handler = xmlFindCharEncodingHandler("UCS2");
                   1464:             if (handler != NULL) return(handler);
                   1465:            break;
1.42      veillard 1466: 
                   1467:            /*
                   1468:             * We used to keep ISO Latin encodings native in the
                   1469:             * generated data. This led to so many problems that
                   1470:             * this has been removed. One can still change this
                   1471:             * back by registering no-ops encoders for those
                   1472:             */
1.25      daniel   1473:         case XML_CHAR_ENCODING_8859_1:
1.42      veillard 1474:            handler = xmlFindCharEncodingHandler("ISO-8859-1");
                   1475:            if (handler != NULL) return(handler);
                   1476:            break;
1.25      daniel   1477:         case XML_CHAR_ENCODING_8859_2:
1.42      veillard 1478:            handler = xmlFindCharEncodingHandler("ISO-8859-2");
                   1479:            if (handler != NULL) return(handler);
                   1480:            break;
1.25      daniel   1481:         case XML_CHAR_ENCODING_8859_3:
1.42      veillard 1482:            handler = xmlFindCharEncodingHandler("ISO-8859-3");
                   1483:            if (handler != NULL) return(handler);
                   1484:            break;
1.25      daniel   1485:         case XML_CHAR_ENCODING_8859_4:
1.42      veillard 1486:            handler = xmlFindCharEncodingHandler("ISO-8859-4");
                   1487:            if (handler != NULL) return(handler);
                   1488:            break;
1.25      daniel   1489:         case XML_CHAR_ENCODING_8859_5:
1.42      veillard 1490:            handler = xmlFindCharEncodingHandler("ISO-8859-5");
                   1491:            if (handler != NULL) return(handler);
                   1492:            break;
1.25      daniel   1493:         case XML_CHAR_ENCODING_8859_6:
1.42      veillard 1494:            handler = xmlFindCharEncodingHandler("ISO-8859-6");
                   1495:            if (handler != NULL) return(handler);
                   1496:            break;
1.25      daniel   1497:         case XML_CHAR_ENCODING_8859_7:
1.42      veillard 1498:            handler = xmlFindCharEncodingHandler("ISO-8859-7");
                   1499:            if (handler != NULL) return(handler);
                   1500:            break;
1.25      daniel   1501:         case XML_CHAR_ENCODING_8859_8:
1.42      veillard 1502:            handler = xmlFindCharEncodingHandler("ISO-8859-8");
                   1503:            if (handler != NULL) return(handler);
                   1504:            break;
1.25      daniel   1505:         case XML_CHAR_ENCODING_8859_9:
1.42      veillard 1506:            handler = xmlFindCharEncodingHandler("ISO-8859-9");
                   1507:            if (handler != NULL) return(handler);
                   1508:            break;
                   1509: 
                   1510: 
1.25      daniel   1511:         case XML_CHAR_ENCODING_2022_JP:
1.30      daniel   1512:             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
                   1513:             if (handler != NULL) return(handler);
                   1514:            break;
1.25      daniel   1515:         case XML_CHAR_ENCODING_SHIFT_JIS:
1.30      daniel   1516:             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
                   1517:             if (handler != NULL) return(handler);
                   1518:             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
                   1519:             if (handler != NULL) return(handler);
                   1520:             handler = xmlFindCharEncodingHandler("Shift_JIS");
                   1521:             if (handler != NULL) return(handler);
                   1522:            break;
1.25      daniel   1523:         case XML_CHAR_ENCODING_EUC_JP:
1.30      daniel   1524:             handler = xmlFindCharEncodingHandler("EUC-JP");
                   1525:             if (handler != NULL) return(handler);
                   1526:            break;
                   1527:        default: 
                   1528:            break;
1.25      daniel   1529:     }
1.30      daniel   1530:     
                   1531: #ifdef DEBUG_ENCODING
1.52      veillard 1532:     xmlGenericError(xmlGenericErrorContext,
                   1533:            "No handler found for encoding %d\n", enc);
1.30      daniel   1534: #endif
1.9       daniel   1535:     return(NULL);
                   1536: }
                   1537: 
                   1538: /**
                   1539:  * xmlGetCharEncodingHandler:
                   1540:  * @enc:  a string describing the char encoding.
                   1541:  *
                   1542:  * Search in the registrered set the handler able to read/write that encoding.
                   1543:  *
                   1544:  * Returns the handler or NULL if not found
                   1545:  */
                   1546: xmlCharEncodingHandlerPtr
                   1547: xmlFindCharEncodingHandler(const char *name) {
1.51      veillard 1548:     const char *nalias;
                   1549:     const char *norig;
1.36      daniel   1550:     xmlCharEncoding alias;
1.30      daniel   1551: #ifdef LIBXML_ICONV_ENABLED
1.40      daniel   1552:     xmlCharEncodingHandlerPtr enc;
1.30      daniel   1553:     iconv_t icv_in, icv_out;
                   1554: #endif /* LIBXML_ICONV_ENABLED */
                   1555:     char upper[100];
1.9       daniel   1556:     int i;
                   1557: 
                   1558:     if (handlers == NULL) xmlInitCharEncodingHandlers();
                   1559:     if (name == NULL) return(xmlDefaultCharEncodingHandler);
                   1560:     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
                   1561: 
1.36      daniel   1562:     /*
1.51      veillard 1563:      * Do the alias resolution
                   1564:      */
                   1565:     norig = name;
                   1566:     nalias = xmlGetEncodingAlias(name);
                   1567:     if (nalias != NULL)
                   1568:        name = nalias;
                   1569: 
                   1570:     /*
1.36      daniel   1571:      * Check first for directly registered encoding names
                   1572:      */
1.30      daniel   1573:     for (i = 0;i < 99;i++) {
1.9       daniel   1574:         upper[i] = toupper(name[i]);
                   1575:        if (upper[i] == 0) break;
                   1576:     }
                   1577:     upper[i] = 0;
                   1578: 
                   1579:     for (i = 0;i < nbCharEncodingHandler; i++)
1.30      daniel   1580:         if (!strcmp(upper, handlers[i]->name)) {
                   1581: #ifdef DEBUG_ENCODING
1.52      veillard 1582:             xmlGenericError(xmlGenericErrorContext,
                   1583:                    "Found registered handler for encoding %s\n", name);
1.30      daniel   1584: #endif
1.9       daniel   1585:            return(handlers[i]);
1.30      daniel   1586:        }
1.9       daniel   1587: 
1.30      daniel   1588: #ifdef LIBXML_ICONV_ENABLED
                   1589:     /* check whether iconv can handle this */
1.31      daniel   1590:     icv_in = iconv_open("UTF-8", name);
                   1591:     icv_out = iconv_open(name, "UTF-8");
1.30      daniel   1592:     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1.43      veillard 1593:            enc = (xmlCharEncodingHandlerPtr)
                   1594:                  xmlMalloc(sizeof(xmlCharEncodingHandler));
1.32      daniel   1595:            if (enc == NULL) {
                   1596:                iconv_close(icv_in);
                   1597:                iconv_close(icv_out);
                   1598:                return(NULL);
                   1599:            }
1.41      daniel   1600:            enc->name = xmlMemStrdup(name);
1.30      daniel   1601:            enc->input = NULL;
                   1602:            enc->output = NULL;
                   1603:            enc->iconv_in = icv_in;
                   1604:            enc->iconv_out = icv_out;
                   1605: #ifdef DEBUG_ENCODING
1.52      veillard 1606:             xmlGenericError(xmlGenericErrorContext,
                   1607:                    "Found iconv handler for encoding %s\n", name);
1.30      daniel   1608: #endif
                   1609:            return enc;
                   1610:     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1.52      veillard 1611:            xmlGenericError(xmlGenericErrorContext,
                   1612:                    "iconv : problems with filters for '%s'\n", name);
1.30      daniel   1613:     }
                   1614: #endif /* LIBXML_ICONV_ENABLED */
1.38      daniel   1615: 
1.30      daniel   1616: #ifdef DEBUG_ENCODING
1.52      veillard 1617:     xmlGenericError(xmlGenericErrorContext,
                   1618:            "No handler found for encoding %s\n", name);
1.30      daniel   1619: #endif
1.38      daniel   1620: 
                   1621:     /*
                   1622:      * Fallback using the canonical names
                   1623:      */
1.51      veillard 1624:     alias = xmlParseCharEncoding(norig);
1.38      daniel   1625:     if (alias != XML_CHAR_ENCODING_ERROR) {
                   1626:         const char* canon;
                   1627:         canon = xmlGetCharEncodingName(alias);
                   1628:         if ((canon != NULL) && (strcmp(name, canon))) {
                   1629:            return(xmlFindCharEncodingHandler(canon));
                   1630:         }
                   1631:     }
                   1632: 
1.9       daniel   1633:     return(NULL);
1.30      daniel   1634: }
                   1635: 
                   1636: #ifdef LIBXML_ICONV_ENABLED
                   1637: /**
                   1638:  * xmlIconvWrapper:
                   1639:  * @cd:                iconv converter data structure
                   1640:  * @out:  a pointer to an array of bytes to store the result
                   1641:  * @outlen:  the length of @out
                   1642:  * @in:  a pointer to an array of ISO Latin 1 chars
                   1643:  * @inlen:  the length of @in
                   1644:  *
                   1645:  * Returns 0 if success, or 
                   1646:  *     -1 by lack of space, or
                   1647:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
                   1648:  *        the result of transformation can't fit into the encoding we want), or
                   1649:  *     -3 if there the last byte can't form a single output char.
                   1650:  *     
                   1651:  * The value of @inlen after return is the number of octets consumed
                   1652:  *     as the return value is positive, else unpredictiable.
                   1653:  * The value of @outlen after return is the number of ocetes consumed.
                   1654:  */
                   1655: static int
                   1656: xmlIconvWrapper(iconv_t cd,
                   1657:        unsigned char *out, int *outlen,
                   1658:        const unsigned char *in, int *inlen) {
                   1659: 
                   1660:        size_t icv_inlen = *inlen, icv_outlen = *outlen;
                   1661:        const char *icv_in = (const char *) in;
                   1662:        char *icv_out = (char *) out;
                   1663:        int ret;
                   1664: 
                   1665:        ret = iconv(cd,
                   1666:                &icv_in, &icv_inlen,
                   1667:                &icv_out, &icv_outlen);
1.35      daniel   1668:        if (in != NULL) {
                   1669:            *inlen -= icv_inlen;
                   1670:            *outlen -= icv_outlen;
                   1671:        } else {
                   1672:            *inlen = 0;
                   1673:            *outlen = 0;
                   1674:        }
1.30      daniel   1675:        if (icv_inlen != 0 || ret == (size_t) -1) {
                   1676: #ifdef EILSEQ
                   1677:                if (errno == EILSEQ) {
1.31      daniel   1678:                        return -2;
1.30      daniel   1679:                } else
                   1680: #endif
                   1681: #ifdef E2BIG
                   1682:                if (errno == E2BIG) {
                   1683:                        return -1;
                   1684:                } else
                   1685: #endif
                   1686: #ifdef EINVAL
                   1687:                if (errno == EINVAL) {
1.31      daniel   1688:                        return -3;
1.53      veillard 1689:                } else
1.30      daniel   1690: #endif
1.53      veillard 1691:                {
1.30      daniel   1692:                        return -3;
                   1693:                }
                   1694:        }
                   1695:        return 0;
                   1696: }
                   1697: #endif /* LIBXML_ICONV_ENABLED */
1.38      daniel   1698: 
                   1699: /**
                   1700:  * xmlCharEncFirstLine:
                   1701:  * @handler:   char enconding transformation data structure
                   1702:  * @out:  an xmlBuffer for the output.
                   1703:  * @in:  an xmlBuffer for the input
                   1704:  *     
                   1705:  * Front-end for the encoding handler input function, but handle only
                   1706:  * the very first line, i.e. limit itself to 45 chars.
                   1707:  *     
                   1708:  * Returns the number of byte written if success, or 
                   1709:  *     -1 general error
                   1710:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
                   1711:  *        the result of transformation can't fit into the encoding we want), or
                   1712:  */
                   1713: int
                   1714: xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
                   1715:                  xmlBufferPtr in) {
                   1716:     int ret = -2;
                   1717:     int written;
                   1718:     int toconv;
                   1719: 
                   1720:     if (handler == NULL) return(-1);
                   1721:     if (out == NULL) return(-1);
                   1722:     if (in == NULL) return(-1);
                   1723: 
                   1724:     written = out->size - out->use;
                   1725:     toconv = in->use;
                   1726:     if (toconv * 2 >= written) {
1.39      daniel   1727:         xmlBufferGrow(out, toconv);
1.38      daniel   1728:        written = out->size - out->use - 1;
                   1729:     }
1.39      daniel   1730: 
1.38      daniel   1731:     /*
                   1732:      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
                   1733:      * 45 chars should be sufficient to reach the end of the encoding
                   1734:      * decalration without going too far inside the document content.
                   1735:      */
                   1736:     written = 45;
                   1737: 
                   1738:     if (handler->input != NULL) {
                   1739:        ret = handler->input(&out->content[out->use], &written,
                   1740:                             in->content, &toconv);
                   1741:        xmlBufferShrink(in, toconv);
                   1742:        out->use += written;
                   1743:        out->content[out->use] = 0;
                   1744:     }
                   1745: #ifdef LIBXML_ICONV_ENABLED
                   1746:     else if (handler->iconv_in != NULL) {
                   1747:        ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
                   1748:                              &written, in->content, &toconv);
                   1749:        xmlBufferShrink(in, toconv);
                   1750:        out->use += written;
                   1751:        out->content[out->use] = 0;
                   1752:        if (ret == -1) ret = -3;
                   1753:     }
                   1754: #endif /* LIBXML_ICONV_ENABLED */
                   1755: #ifdef DEBUG_ENCODING
                   1756:     switch (ret) {
                   1757:         case 0:
1.52      veillard 1758:            xmlGenericError(xmlGenericErrorContext,
                   1759:                    "converted %d bytes to %d bytes of input\n",
1.38      daniel   1760:                    toconv, written);
                   1761:            break;
                   1762:         case -1:
1.52      veillard 1763:            xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1.38      daniel   1764:                    toconv, written, in->use);
                   1765:            break;
                   1766:         case -2:
1.52      veillard 1767:            xmlGenericError(xmlGenericErrorContext,
                   1768:                    "input conversion failed due to input error\n");
1.38      daniel   1769:            break;
                   1770:         case -3:
1.52      veillard 1771:            xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1.38      daniel   1772:                    toconv, written, in->use);
                   1773:            break;
                   1774:        default:
1.52      veillard 1775:            xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1.38      daniel   1776:     }
                   1777: #endif
                   1778:     /*
                   1779:      * Ignore when input buffer is not on a boundary
                   1780:      */
                   1781:     if (ret == -3) ret = 0;
                   1782:     if (ret == -1) ret = 0;
                   1783:     return(ret);
                   1784: }
1.30      daniel   1785: 
                   1786: /**
                   1787:  * xmlCharEncInFunc:
                   1788:  * @handler:   char enconding transformation data structure
1.31      daniel   1789:  * @out:  an xmlBuffer for the output.
                   1790:  * @in:  an xmlBuffer for the input
1.30      daniel   1791:  *     
                   1792:  * Generic front-end for the encoding handler input function
                   1793:  *     
1.31      daniel   1794:  * Returns the number of byte written if success, or 
                   1795:  *     -1 general error
1.30      daniel   1796:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
                   1797:  *        the result of transformation can't fit into the encoding we want), or
                   1798:  */
                   1799: int
1.31      daniel   1800: xmlCharEncInFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
                   1801:                  xmlBufferPtr in) {
1.30      daniel   1802:     int ret = -2;
1.31      daniel   1803:     int written;
                   1804:     int toconv;
1.30      daniel   1805: 
1.31      daniel   1806:     if (handler == NULL) return(-1);
                   1807:     if (out == NULL) return(-1);
                   1808:     if (in == NULL) return(-1);
                   1809: 
1.50      veillard 1810:     toconv = in->use;
                   1811:     if (toconv == 0)
                   1812:        return(0);
1.31      daniel   1813:     written = out->size - out->use;
                   1814:     if (toconv * 2 >= written) {
1.54    ! veillard 1815:         xmlBufferGrow(out, out->size + toconv * 2);
1.33      daniel   1816:        written = out->size - out->use - 1;
1.31      daniel   1817:     }
1.30      daniel   1818:     if (handler->input != NULL) {
1.32      daniel   1819:        ret = handler->input(&out->content[out->use], &written,
1.31      daniel   1820:                             in->content, &toconv);
                   1821:        xmlBufferShrink(in, toconv);
                   1822:        out->use += written;
1.33      daniel   1823:        out->content[out->use] = 0;
1.30      daniel   1824:     }
                   1825: #ifdef LIBXML_ICONV_ENABLED
1.31      daniel   1826:     else if (handler->iconv_in != NULL) {
                   1827:        ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
                   1828:                              &written, in->content, &toconv);
                   1829:        xmlBufferShrink(in, toconv);
                   1830:        out->use += written;
1.33      daniel   1831:        out->content[out->use] = 0;
                   1832:        if (ret == -1) ret = -3;
1.30      daniel   1833:     }
                   1834: #endif /* LIBXML_ICONV_ENABLED */
1.39      daniel   1835:     switch (ret) {
1.30      daniel   1836: #ifdef DEBUG_ENCODING
                   1837:         case 0:
1.52      veillard 1838:            xmlGenericError(xmlGenericErrorContext,
                   1839:                    "converted %d bytes to %d bytes of input\n",
1.31      daniel   1840:                    toconv, written);
1.30      daniel   1841:            break;
                   1842:         case -1:
1.52      veillard 1843:            xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1.31      daniel   1844:                    toconv, written, in->use);
1.30      daniel   1845:            break;
                   1846:         case -3:
1.52      veillard 1847:            xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1.31      daniel   1848:                    toconv, written, in->use);
1.30      daniel   1849:            break;
1.39      daniel   1850: #endif
                   1851:         case -2:
1.52      veillard 1852:            xmlGenericError(xmlGenericErrorContext,
                   1853:                    "input conversion failed due to input error\n");
                   1854:            xmlGenericError(xmlGenericErrorContext,
                   1855:                    "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1.39      daniel   1856:                    in->content[0], in->content[1],
                   1857:                    in->content[2], in->content[3]);
1.30      daniel   1858:     }
1.33      daniel   1859:     /*
                   1860:      * Ignore when input buffer is not on a boundary
                   1861:      */
                   1862:     if (ret == -3) ret = 0;
1.30      daniel   1863:     return(ret);
                   1864: }
                   1865: 
                   1866: /**
                   1867:  * xmlCharEncOutFunc:
                   1868:  * @handler:   char enconding transformation data structure
1.31      daniel   1869:  * @out:  an xmlBuffer for the output.
                   1870:  * @in:  an xmlBuffer for the input
                   1871:  *     
                   1872:  * Generic front-end for the encoding handler output function
1.35      daniel   1873:  * a first call with @in == NULL has to be made firs to initiate the 
                   1874:  * output in case of non-stateless encoding needing to initiate their
                   1875:  * state or the output (like the BOM in UTF16).
1.39      daniel   1876:  * In case of UTF8 sequence conversion errors for the given encoder,
                   1877:  * the content will be automatically remapped to a CharRef sequence.
1.30      daniel   1878:  *     
1.31      daniel   1879:  * Returns the number of byte written if success, or 
                   1880:  *     -1 general error
1.30      daniel   1881:  *     -2 if the transcoding fails (for *in is not valid utf8 string or
                   1882:  *        the result of transformation can't fit into the encoding we want), or
                   1883:  */
                   1884: int
1.31      daniel   1885: xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
                   1886:                   xmlBufferPtr in) {
1.30      daniel   1887:     int ret = -2;
1.31      daniel   1888:     int written;
1.54    ! veillard 1889:     int writtentot = 0;
1.31      daniel   1890:     int toconv;
1.39      daniel   1891:     int output = 0;
1.31      daniel   1892: 
                   1893:     if (handler == NULL) return(-1);
                   1894:     if (out == NULL) return(-1);
1.39      daniel   1895: 
                   1896: retry:
                   1897:     
1.35      daniel   1898:     written = out->size - out->use;
                   1899: 
1.39      daniel   1900:     /*
                   1901:      * First specific handling of in = NULL, i.e. the initialization call
                   1902:      */
1.35      daniel   1903:     if (in == NULL) {
                   1904:         toconv = 0;
                   1905:        if (handler->output != NULL) {
                   1906:            ret = handler->output(&out->content[out->use], &written,
                   1907:                                  NULL, &toconv);
                   1908:            out->use += written;
                   1909:            out->content[out->use] = 0;
                   1910:        }
                   1911: #ifdef LIBXML_ICONV_ENABLED
                   1912:        else if (handler->iconv_out != NULL) {
                   1913:            ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
                   1914:                                  &written, NULL, &toconv);
                   1915:            out->use += written;
                   1916:            out->content[out->use] = 0;
                   1917:        }
                   1918: #endif /* LIBXML_ICONV_ENABLED */
                   1919: #ifdef DEBUG_ENCODING
1.52      veillard 1920:        xmlGenericError(xmlGenericErrorContext,
                   1921:                "initialized encoder\n");
1.35      daniel   1922: #endif
                   1923:         return(0);
                   1924:     }
1.30      daniel   1925: 
1.39      daniel   1926:     /*
                   1927:      * Convertion itself.
                   1928:      */
1.33      daniel   1929:     toconv = in->use;
1.50      veillard 1930:     if (toconv == 0)
                   1931:        return(0);
1.33      daniel   1932:     if (toconv * 2 >= written) {
                   1933:         xmlBufferGrow(out, toconv * 2);
                   1934:        written = out->size - out->use - 1;
                   1935:     }
1.30      daniel   1936:     if (handler->output != NULL) {
1.33      daniel   1937:        ret = handler->output(&out->content[out->use], &written,
1.35      daniel   1938:                              in->content, &toconv);
1.31      daniel   1939:        xmlBufferShrink(in, toconv);
                   1940:        out->use += written;
1.54    ! veillard 1941:        writtentot += written;
1.33      daniel   1942:        out->content[out->use] = 0;
1.30      daniel   1943:     }
                   1944: #ifdef LIBXML_ICONV_ENABLED
                   1945:     else if (handler->iconv_out != NULL) {
1.31      daniel   1946:        ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
                   1947:                              &written, in->content, &toconv);
                   1948:        xmlBufferShrink(in, toconv);
                   1949:        out->use += written;
1.54    ! veillard 1950:        writtentot += written;
1.33      daniel   1951:        out->content[out->use] = 0;
1.54    ! veillard 1952:        if (ret == -1) {
        !          1953:            if (written > 0) {
        !          1954:                /*
        !          1955:                 * Can be a limitation of iconv
        !          1956:                 */
        !          1957:                goto retry;
        !          1958:            }
        !          1959:            ret = -3;
        !          1960:        }
1.30      daniel   1961:     }
                   1962: #endif /* LIBXML_ICONV_ENABLED */
1.46      veillard 1963:     else {
1.52      veillard 1964:        xmlGenericError(xmlGenericErrorContext,
                   1965:                "xmlCharEncOutFunc: no output function !\n");
1.46      veillard 1966:        return(-1);
                   1967:     }
1.39      daniel   1968: 
                   1969:     if (ret >= 0) output += ret;
                   1970: 
                   1971:     /*
                   1972:      * Attempt to handle error cases
                   1973:      */
                   1974:     switch (ret) {
1.30      daniel   1975: #ifdef DEBUG_ENCODING
                   1976:         case 0:
1.52      veillard 1977:            xmlGenericError(xmlGenericErrorContext,
                   1978:                    "converted %d bytes to %d bytes of output\n",
1.31      daniel   1979:                    toconv, written);
1.30      daniel   1980:            break;
                   1981:         case -1:
1.52      veillard 1982:            xmlGenericError(xmlGenericErrorContext,
                   1983:                    "output conversion failed by lack of space\n");
1.30      daniel   1984:            break;
1.54    ! veillard 1985: #endif
1.30      daniel   1986:         case -3:
1.52      veillard 1987:            xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
1.31      daniel   1988:                    toconv, written, in->use);
1.30      daniel   1989:            break;
1.39      daniel   1990:         case -2: {
                   1991:            int len = in->use;
1.43      veillard 1992:            const xmlChar *utf = (const xmlChar *) in->content;
1.39      daniel   1993:            int cur;
                   1994: 
                   1995:            cur = xmlGetUTF8Char(utf, &len);
                   1996:            if (cur > 0) {
                   1997:                xmlChar charref[20];
                   1998: 
                   1999: #ifdef DEBUG_ENCODING
1.52      veillard 2000:                xmlGenericError(xmlGenericErrorContext,
                   2001:                        "handling output conversion error\n");
                   2002:                xmlGenericError(xmlGenericErrorContext,
                   2003:                        "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1.39      daniel   2004:                        in->content[0], in->content[1],
                   2005:                        in->content[2], in->content[3]);
                   2006: #endif
                   2007:                /*
                   2008:                 * Removes the UTF8 sequence, and replace it by a charref
                   2009:                 * and continue the transcoding phase, hoping the error
                   2010:                 * did not mangle the encoder state.
                   2011:                 */
1.43      veillard 2012:                sprintf((char *) charref, "&#x%X;", cur);
1.39      daniel   2013:                xmlBufferShrink(in, len);
                   2014:                xmlBufferAddHead(in, charref, -1);
                   2015: 
                   2016:                goto retry;
                   2017:            } else {
1.52      veillard 2018:                xmlGenericError(xmlGenericErrorContext,
                   2019:                        "output conversion failed due to conv error\n");
                   2020:                xmlGenericError(xmlGenericErrorContext,
                   2021:                        "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1.39      daniel   2022:                        in->content[0], in->content[1],
                   2023:                        in->content[2], in->content[3]);
1.50      veillard 2024:                in->content[0] = ' ';
1.39      daniel   2025:            }
                   2026:            break;
                   2027:        }
1.30      daniel   2028:     }
                   2029:     return(ret);
                   2030: }
                   2031: 
                   2032: /**
                   2033:  * xmlCharEncCloseFunc:
                   2034:  * @handler:   char enconding transformation data structure
                   2035:  *     
                   2036:  * Generic front-end for hencoding handler close function
                   2037:  *
                   2038:  * Returns 0 if success, or -1 in case of error
                   2039:  */
                   2040: int
                   2041: xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
                   2042:     int ret = 0;
1.31      daniel   2043:     if (handler == NULL) return(-1);
                   2044:     if (handler->name == NULL) return(-1);
1.30      daniel   2045: #ifdef LIBXML_ICONV_ENABLED
1.31      daniel   2046:     /*
                   2047:      * Iconv handlers can be oused only once, free the whole block.
                   2048:      * and the associated icon resources.
                   2049:      */
1.32      daniel   2050:     if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
                   2051:        if (handler->name != NULL)
                   2052:            xmlFree(handler->name);
                   2053:        handler->name = NULL;
                   2054:        if (handler->iconv_out != NULL) {
                   2055:            if (iconv_close(handler->iconv_out))
                   2056:                ret = -1;
                   2057:            handler->iconv_out = NULL;
                   2058:        }
                   2059:        if (handler->iconv_in != NULL) {
                   2060:            if (iconv_close(handler->iconv_in))
                   2061:                ret = -1;
                   2062:            handler->iconv_in = NULL;
                   2063:        }
                   2064:        xmlFree(handler);
1.30      daniel   2065:     }
                   2066: #endif /* LIBXML_ICONV_ENABLED */
                   2067: #ifdef DEBUG_ENCODING
                   2068:     if (ret)
1.52      veillard 2069:         xmlGenericError(xmlGenericErrorContext,
                   2070:                "failed to close the encoding handler\n");
1.30      daniel   2071:     else
1.52      veillard 2072:         xmlGenericError(xmlGenericErrorContext,
                   2073:                "closed the encoding handler\n");
1.30      daniel   2074: 
                   2075: #endif
                   2076:     return(ret);
1.9       daniel   2077: }
                   2078: 

Webmaster