/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 * kanji_conv.c
 *
 * Copyright (C) 2000 Takuo Kitame <kitame@gnome.gr.jp>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "common.h"
#include "gnomeicu.h"

#include <errno.h>
#include <locale.h>

#ifdef HAVE_ICONV
#include <iconv.h>
#endif

#include "kanji_conv.h"

#define ESC          0x1b
#define SS2          0x8e

#define JCODE_LOCALE_EUC   "ja", "ja_JP", "ja_JP.ujis", "ja_JP.EUC", "ja_JP.eucJP", "ja_JP.eucjp"
#define JCODE_LOCALE_JIS   "ja_JP.JIS", "ja_JP.jis", "ja_JP.iso-2022-jp"
#define JCODE_LOCALE_SJIS  "ja_JP.SJIS", "ja_JP.sjis"

/****************************************************************************/
/* Japanese string code detector */
/****************************************************************************/
static int 
detect_kanji(unsigned char *str)
{
    int expected = KC_ASCII;
    register int c;
    int c1, c2;
    int euc_c = 0, sjis_c = 0;
    unsigned char *ptr = str;
    
    while((c = (int)*ptr)!= '\0') {
        if(c == ESC) {
            if((c = (int)*(++ptr)) == '\0')
                break;
            if(c == '$') {
                if((c = (int)*(++ptr)) == '\0')
                    break;
                if(c == 'B' || c == '@')
                    return KC_JIS;
            }
            ptr++;
            continue;
        }
        if((c >= 0x81 && c <= 0x8d) || (c >= 0x8f && c <= 0x9f))
            return KC_SJIS;
        
        if(c == SS2) {
            if((c = (int)*(++ptr)) == '\0')
                break;
            if((c >= 0x40 && c <= 0x7e) ||
               (c >= 0x80 && c <= 0xa0) || 
               (c >= 0xe0 && c <= 0xfc))
                return KC_SJIS;
            if(c >= 0xa1 && c <= 0xdf)
                break;
            
            ptr++;
            continue;
        }        
        if(c >= 0xa1 && c <= 0xdf) {
            if((c = (int)*(++ptr)) == '\0')
                break;
            
            if (c >= 0xe0 && c <= 0xfe)
                return KC_EUC;
            if (c >= 0xa1 && c <= 0xdf) {
                expected = EUCORSJIS;
                ptr++;
                continue;
            }
#if 1
            if(c == 0xa0 || (0xe0 <= c && c <= 0xfe))
                return KC_EUC;
            else {
                expected = EUCORSJIS;
                ptr++;
                continue;
            }
#else
            if(c <= 0x9f)
                return KC_SJIS;
            if(c >= 0xf0 && c <= 0xfe)
                return KC_EUC;
#endif
            
            if(c >= 0xe0 && c <= 0xef) {
                expected = EUCORSJIS;
                while(c >= 0x40) {
                    if(c >= 0x81) {
                        if(c <= 0x8d || (c >= 0x8f && c <= 0x9f))
                            return KC_SJIS;
                        else if(c >= 0xfd && c <= 0xfe) {
                            return KC_EUC;
                        }
                    }
                    if((c = (int)*(++ptr)) == '\0')
                        break;
                }
                ptr++;
                continue;
            }
            
            if(c >= 0xe0 && c <= 0xef) {
                if((c = (int)*(++ptr)) == '\0')
                    break;
                if((c >= 0x40 && c <= 0x7e) || (c >= 0x80 && c <= 0xa0))
                    return KC_SJIS;
                if(c >= 0xfd && c <= 0xfe)
                    return KC_EUC;
                if(c >= 0xa1 && c <= 0xfc)
                    expected = EUCORSJIS;
            }
        }
#if 1
        if (0xf0 <= c && c <= 0xfe)
            return KC_EUC;
#endif
        ptr++;
    }

   ptr = str;
   c2 = 0;
   while((c1 = (int)*ptr++) != '\0') {
       if(((c2 >  0x80 && c2 < 0xa0) || (c2 >= 0xe0 && c2 < 0xfd)) &&
          ((c1 >= 0x40 && c1 < 0x7f) || (c1 >= 0x80 && c1 < 0xfd)))
           sjis_c++, c1 = *ptr++;
       c2 = c1;
   }

/*
   if(sjis_c == 0)
       expected = KC_EUC;
   else {
*/
   {
       ptr = str, c2 = 0;
       while((c1 = (int)*ptr++) != '\0') {
	     if((c2 > 0xa0  && c2 < 0xff) &&
            (c1 > 0xa0  && c1 < 0xff))
             euc_c++, c1 = *ptr++;
	     c2 = c1;
       }
       if(sjis_c > euc_c)
           expected = KC_SJIS;
       else if (euc_c > 0)
           expected = KC_EUC;
       else 
           expected = KC_ASCII;
   }
   return expected;
}

/* return with int */
static int
int_detect_JCode(char *str)
{
    int detected;

    if(!str)
        return 0;

    detected = detect_kanji((unsigned char *)str);
    
    if(detected == KC_ASCII)
        return KC_ASCII;
    
    switch(detected) {
    case KC_EUC:
        return KC_EUC;
        break;
    case KC_JIS:
        return KC_JIS;
        break;
    case KC_SJIS:  
        return KC_SJIS;
        break;
    default:
        return KC_ASCII;
        break;
    }
    
    /* not reached */
    return 0;
}

/* return with codeset */
static const char *
detect_JCode(char *str)
{
    int detected;

    if(!str)
        return NULL;

    detected = detect_kanji((unsigned char *)str);
    
    if(detected == KC_ASCII)
        return "ASCII";

    switch(detected) {
    case KC_EUC:
        return "EUC-JP";
        break;
    case KC_JIS:
        return "ISO-2022-JP";
        break;
    case KC_SJIS:  
        return "Shift_JIS";
        break;
    default:
        return "ASCII";
        break;
    }
    
    /* not reached */
    return 0;
}

/* Convert to destset with auto detect srcset, return strduped */
char *
kanji_conv_auto(char *str, const char *dstset)
#ifdef HAVE_ICONV
{
    unsigned char *buf, *ret;
    iconv_t cd;
    size_t insize = 0;
    size_t outsize = 0;
    size_t nconv = 0;
    char *inptr;
    char *outptr;
    char srcset[16];

    if(!str)
        return NULL;

    if(!toggles->kanji) return strdup(str);
    
    switch (int_detect_JCode(str)) {
    case KC_EUC:
        strcpy(srcset, "EUC-JP");
        break;
    case KC_JIS:
        strcpy(srcset, "ISO-2022-JP");
        break;
    case KC_SJIS:
        strcpy(srcset, "Shift_JIS");
        break;
    default:
        /* printf("failed!! \n"); */
        return strdup(str);
        break;
    }
#ifdef TRACE_FUNCTION
    printf("kanji_conv (%s), %s to ", str, srcset);
#endif
    buf = (unsigned char *)malloc(strlen(str)* 4 + 1);
    if(!buf)
        return NULL;
    
    insize = strlen(str);
    inptr = str;
    outsize = strlen(str) * 4 ;
    outptr = buf;
    
    cd = iconv_open((const char *)dstset, (const char *)srcset);
    if(cd == (iconv_t) -1) {
        if(errno == EINVAL)
            return strdup(str);
    }
    
    nconv = iconv(cd, &inptr, &insize, &outptr, &outsize);
    if(nconv == (size_t) -1) {
        if (errno == EINVAL)
            memmove (buf, inptr, insize);
    } else
        iconv(cd, NULL, NULL, &outptr, &outsize);
    
    *outptr = '\0';
    iconv_close(cd);
    
    ret = strdup(buf);
    free(buf);

#ifdef TRACE_FUNCTION
    printf("%s (%s)\n", dstset, ret);
#endif
    
    return ret;
}
#else
{
    return strdup(str);
}
#endif
/* convert to system locale code, auto detect srcset, return strduped */
char *
kanji_conv_to_locale(char *str)
#ifdef HAVE_ICONV
{
   static char *jpcode = NULL;
   static char *locale_euc[]  = { JCODE_LOCALE_EUC, NULL };
   /* static char *locale_jis[]  = { JCODE_LOCALE_JIS, NULL }; */
   static char *locale_sjis[] = { JCODE_LOCALE_SJIS, NULL };

   static struct LOCALE_TABLE {
       char *code;
       char **name_list;
   } locale_table[] = { 
       {"EUC-JP", locale_euc},
       {"ISO-2022-JP", locale_sjis},
       {"Shift_JIS", locale_sjis}
   };

   if(!str)
       return NULL;

   if(!toggles->kanji) return strdup(str);
   
   if(jpcode == NULL) {
       char *ctype = setlocale(LC_CTYPE, "");
       int i, j;
       for( j=0; jpcode == NULL && 
                j < sizeof(locale_table)/sizeof(struct LOCALE_TABLE); j++ ) {
           char **name = locale_table[j].name_list;
           for( i=0; name[i]; i++ )
               if (strcasecmp(ctype, name[i]) == 0) {
                   jpcode = locale_table[j].code;
                   break;
               }
       }
       if(jpcode == NULL)
           jpcode = locale_table[1].code;
   }
   
   return kanji_conv_auto(str, jpcode);
}
#else
{
    return strdup(str);
}
#endif
/* convert srcset to destset, return strduped */
char *
kanji_conv(char *str, const char *dstset, const char *srcset)
#ifdef HAVE_ICONV
{
    unsigned char *buf, *ret;
    iconv_t cd;
    size_t insize = 0;
    size_t outsize = 0;
    size_t nconv = 0;
    char *inptr;
    char *outptr;
    
    if(!str)
        return NULL;

    if(!toggles->kanji) return strdup(str);
    
#ifdef TRACE_FUNCTION
    printf("kanji_conv (%s), %s to ", str, srcset);
#endif

    buf = (unsigned char *)malloc(strlen(str) * 4 + 1);
    if(!buf)
        return NULL;
    
    insize = strlen(str);
    inptr = str;
    outsize = strlen(str) * 4 ;
    outptr = buf;
    
    cd = iconv_open (dstset, srcset);
    if(cd == (iconv_t) -1) {
        if(errno == EINVAL)
            return strdup(str);
    }
    
    nconv = iconv (cd, &inptr, &insize, &outptr, &outsize);
    if (nconv == (size_t) -1) {
        if(errno == EINVAL)
            memmove (buf, inptr, insize);
    } else
       iconv (cd, NULL, NULL, &outptr, &outsize);
    
    *outptr = '\0';
    iconv_close(cd);
    
    ret = strdup(buf);
    free(buf);

#ifdef TRACE_FUNCTION
    printf(" %s (%s).\n", ret, dstset);
#endif
    
    return ret;
}
#else
{
    return strdup(str);
}
#endif

/* easy wrapper */
/* no return, modify pointer */
void
kanji_conv_auto_s(char **str, const char *destset)
{
    char *p = *str;
    char *ret = NULL;

#ifndef HAVE_ICONV /* not defined */
    return;
#endif

    if(!toggles->kanji) return;
    
    /* ret will strduped value */
    ret = kanji_conv_auto(p, destset);
    if(*p)
        free(p);
    *str = ret;
    return;
}

/* no return, modify pointer */
void
kanji_conv_to_locale_s(char **str)
{
    char *p = *str;
    char *ret = NULL;
    
#ifndef HAVE_ICONV /* not defined */
    return;
#endif

    if(!toggles->kanji) return;
    
    /* ret will strduped value */
    ret = kanji_conv_to_locale(p);
    if(*p)
        free(p);
    
    *str = ret;
    return;
}
