/*
 *  KANJI Code conversion routines.
 */

#include <kpathsea/config.h>
#include <kpathsea/c-memstr.h>
#include <kpathsea/variable.h>
#include <kpathsea/readable.h>

#include <ptexenc/c-auto.h>
#include <ptexenc/ptexenc.h>
#include <ptexenc/kanjicnv.h>
#include <ptexenc/unicode.h>
#include <ptexenc/unicode-jp.h>

#include <ctype.h>

#define ENC_UNKNOWN  0
#define ENC_JIS      1
#define ENC_EUC      2
#define ENC_SJIS     3
#define ENC_UTF8     4
#define ENC_UPTEX    5

#define ESC '\033'

#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#else
#include <limits.h>
#endif /* HAVE_SYS_PARAM_H */
#ifndef NOFILE
#define NOFILE OPEN_MAX
#endif

char *ptexenc_version_string = "ptexenc version " PTEXENC_VERSION;

static int     file_enc = ENC_UNKNOWN;
static int internal_enc = ENC_UNKNOWN;
static int terminal_enc = ENC_UNKNOWN;


static string enc_to_string P1C(int, enc)
{
    switch (enc) {
    case ENC_JIS:  return "jis";
    case ENC_EUC:  return "euc";
    case ENC_SJIS: return "sjis";
    case ENC_UTF8: return "utf8";
    case ENC_UPTEX:return "uptex";
    default:       return "?";
    }
}

static int string_to_enc P1C(string, str)
{
    if (str == NULL)                    return ENC_UNKNOWN;
    if (strcasecmp(str, "default")== 0) return DEFAULT_KANJI_ENC;
    if (strcasecmp(str, "jis")    == 0) return ENC_JIS;
    if (strcasecmp(str, "euc")    == 0) return ENC_EUC;
    if (strcasecmp(str, "sjis")   == 0) return ENC_SJIS;
    if (strcasecmp(str, "utf8")   == 0) return ENC_UTF8;
 /* if (strcasecmp(str, "uptex")  == 0) return ENC_UPTEX; */
    return -1; /* error */
}

static int get_default_enc P1H(void)
{
    /* kpse_var_value("PTEX_KANJI_ENC") aborts
       if 'kpse_program_name' is empty.  It typically occurs
       when 'ptex' and 'jmpost' print version messages. */
    string var = getenv("PTEX_KANJI_ENC");
    int enc = string_to_enc(var);
    if (enc < 0) {
        fprintf(stderr, "Warning: Unknown environment value "
                "PTEX_KANJI_ENC='%s'\n", var);
    } else if (enc != ENC_UNKNOWN) {
        return enc;
    }
    return DEFAULT_KANJI_ENC;
}

static void set_file_enc P1C(int, enc)
{
    if (enc == ENC_UPTEX) file_enc = ENC_UTF8;
    else /* rest */       file_enc = enc;
}

static void set_internal_enc P1C(int, enc)
{
    if      (enc == ENC_SJIS)  internal_enc = ENC_SJIS;
    else if (enc == ENC_UPTEX) internal_enc = ENC_UPTEX;
    else /* EUC, JIS, UTF8 */  internal_enc = ENC_EUC;
}

static int get_file_enc P1H(void)
{
    if (file_enc == ENC_UNKNOWN) set_file_enc(get_default_enc());
    return file_enc;
}

static int get_internal_enc P1H(void)
{
    if (internal_enc == ENC_UNKNOWN) set_internal_enc(get_default_enc());
    return internal_enc;
}

int get_terminal_enc P1H(void)
{
    if (terminal_enc == ENC_UNKNOWN) {
        char lang[16];  /* enough large space */
        char *s          = getenv("LC_ALL");
        if (s == NULL) s = getenv("LC_MESSAGES");
        if (s == NULL) s = getenv("LANG");
        if (s == NULL) s = getenv("LANGUAGE");
        if (s == NULL) s = "";
        if (strrchr(s, '.') != NULL) s = strrchr(s, '.') + 1;
        strncpy(lang, s, sizeof(lang) - 1);
        lang[sizeof(lang) - 1] = '\0';
        if      (strcasecmp(lang, "euc")  == 0) terminal_enc = ENC_EUC;
        else if (strcasecmp(lang, "eucJP")== 0) terminal_enc = ENC_EUC;
        else if (strcasecmp(lang, "ujis") == 0) terminal_enc = ENC_EUC;
        else if (strcasecmp(lang, "sjis") == 0) terminal_enc = ENC_SJIS;
        else if (strcasecmp(lang, "utf8") == 0) terminal_enc = ENC_UTF8;
        else if (strcasecmp(lang, "UTF-8")== 0) terminal_enc = ENC_UTF8;
        else if (strcasecmp(lang, "jis")  == 0) terminal_enc = ENC_JIS;
        else if (strcasecmp(lang, "ISO-2022-JP")== 0) terminal_enc = ENC_JIS;
        else terminal_enc = get_file_enc();
    }
    return terminal_enc;
}

string get_kanji_enc_string P1H(void)
{
    static char buffer[20]; /* enough large space */

    if (get_file_enc() == get_internal_enc()) {
        return enc_to_string(get_file_enc());
    } else {
        sprintf(buffer, "%s.%s",
                enc_to_string(get_file_enc()),
                enc_to_string(get_internal_enc()));
        return buffer;
    }
}

boolean set_kanji_enc_string P2C(string, file_str, string, internal_str)
{
    int file     = string_to_enc(file_str);
    int internal = string_to_enc(internal_str);

    if (file < 0 || internal < 0) return false; /* error */
    if (file     != ENC_UNKNOWN) {  set_file_enc(file);  nkf_disable();  }
    if (internal != ENC_UNKNOWN) set_internal_enc(internal);
    return true;
}

boolean is_internalSJIS P1H(void)
{
    return (internal_enc == ENC_SJIS);
}

boolean is_internalUPTEX P1H(void)
{
    return (internal_enc == ENC_UPTEX);
}


boolean is1stchr P1C(int, c)
{
    if (is_internalUPTEX()) return (isUTF8(2,1,c) ||
                                    isUTF8(3,1,c) ||
                                    isUTF8(4,1,c));
    if (is_internalSJIS())  return isSJISkanji1(c);
    /* EUC */               return isEUCkanji1(c);
}

/* check char range */
boolean ismultichr P3C(int, length, int, nth, int, c)
{
    if (is_internalUPTEX()) return isUTF8(length, nth, c);
    if (length == 2) {
        if (nth == 1) {
            if (is_internalSJIS()) return isSJISkanji1(c);
            /* EUC */              return isEUCkanji1(c);
        } else if (nth == 2) {
            if (is_internalSJIS()) return isSJISkanji2(c);
            /* EUC */              return isEUCkanji2(c);
        }
    }
    if ((length == 3 || length == 4) &&
        (0 < nth && nth <= length)) return false;
    fprintf(stderr, "ismultichr: unexpected param length=%d, nth=%d\n",
            length, nth);
    return false;
}

/* multi-byte char length in s[pos] */
int multistrlen P3C(unsigned char *, s, int, len, int, pos)
{
    s += pos; len -= pos;
    if (is_internalUPTEX()) {
        int ret = UTF8Slength(s, len);
        if (ret < 0) return 1;
        return ret;
    }
    if (len < 2) return 1;
    if (is_internalSJIS()) {
        if (isSJISkanji1(s[0]) && isSJISkanji2(s[1])) return 2;
    } else { /* EUC */
        if (isEUCkanji1(s[0])  && isEUCkanji2(s[1]))  return 2;
    }
    return 1;
}

/* buffer (EUC/SJIS/UTF-8) to internal (EUC/SJIS/UPTEX) code conversion */
long fromBUFF P3C(unsigned char *, s, int, len, int, pos)
{
    s += pos; len -= pos;
    if (is_internalUPTEX()) {
        if (UTF8Slength(s, len) < 0) return s[0];
        return UCStoUPTEX(UTF8StoUCS(s));
    }
    if (len < 2) return s[0];
    if (is_internalSJIS()) {
        if (isSJISkanji1(s[0]) && isSJISkanji2(s[1])) return HILO(s[0], s[1]);
    } else { /* EUC */
        if (isEUCkanji1(s[0])  && isEUCkanji2(s[1]))  return HILO(s[0], s[1]);
    }
    return s[0];
}

/* internal (EUC/SJIS/UPTEX) to buffer (EUC/SJIS/UTF-8) code conversion */
long toBUFF P1C(long, kcode)
{
    if (is_internalUPTEX()) kcode = UCStoUTF8(UPTEXtoUCS(kcode));
    return kcode;
}


/* DVI (JIS/UCS) to internal (EUC/SJIS/UPTEX) code conversion */
long fromDVI P1C(long, kcode)
{
    if (is_internalUPTEX()) return UCStoUPTEX(kcode);
    if (is_internalSJIS())  return JIStoSJIS(kcode);
    /* EUC */               return JIStoEUC(kcode);
}

/* internal (EUC/SJIS/UPTEX) to DVI (JIS/UCS) code conversion */
long toDVI P1C(long, kcode)
{
    if (is_internalUPTEX()) return UPTEXtoUCS(kcode);
    if (is_internalSJIS())  return SJIStoJIS(kcode);
    /* EUC */               return EUCtoJIS(kcode);
}


/* JIS to internal (EUC/SJIS/UCS) code conversion */
long fromJIS P1C(long, kcode)
{
    if (is_internalUPTEX()) return UCStoUPTEX(JIStoUCS2(kcode));
    if (is_internalSJIS())  return JIStoSJIS(kcode);
    /* EUC */               return JIStoEUC(kcode);
}

/* internal (EUC/SJIS/UCS) to JIS code conversion */
long toJIS P1C(long, kcode)
{
    if (is_internalUPTEX()) return UCS2toJIS(UPTEXtoUCS(kcode));
    if (is_internalSJIS())  return SJIStoJIS(kcode);
    /* EUC */               return EUCtoJIS(kcode);
}


/* EUC to internal (EUC/SJIS/UCS) code conversion */
long fromEUC P1C(long, kcode)
{
    if (!is_internalUPTEX() && !is_internalSJIS()) return kcode;
    return fromJIS(EUCtoJIS(kcode));
}

/* internal (EUC/SJIS/UCS) to EUC code conversion */
long toEUC P1C(long, kcode)
{
    if (!is_internalUPTEX() && !is_internalSJIS()) return kcode;
    return JIStoEUC(toJIS(kcode));
}


/* SJIS to internal (EUC/SJIS/UCS) code conversion */
long fromSJIS P1C(long, kcode)
{
    if (is_internalSJIS()) return kcode;
    return fromJIS(SJIStoJIS(kcode));
}

/* internal (EUC/SJIS/UCS) to SJIS code conversion */
long toSJIS P1C(long, kcode)
{
    if (is_internalSJIS()) return kcode;
    return JIStoSJIS(toJIS(kcode));
}


/* KUTEN to internal (EUC/SJIS/UCS) code conversion */
long fromKUTEN P1C(long, kcode)
{
    return fromJIS(KUTENtoJIS(kcode));
}


/* UCS to internal (EUC/SJIS/UPTEX) code conversion */
long fromUCS P1C(long, kcode)
{
    if (is_internalUPTEX()) return UCStoUPTEX(kcode);
    kcode = UCS2toJIS(kcode);
    if (kcode == 0) return 0;
    return fromJIS(kcode);
}

/* internal (EUC/SJIS/UPTEX) to UCS code conversion */
long toUCS P1C(long, kcode)
{
    if (is_internalUPTEX()) return UPTEXtoUCS(kcode);
    return JIStoUCS2(toJIS(kcode));
}

/* internal (EUC/SJIS/UPTEX) to UTF-8 code conversion */
long toUTF8 P1C(long, kcode)
{
    return UCStoUTF8(toUCS(kcode));
}

/* internal (EUC/SJIS/UPTEX) to 'enc' code conversion */
static long toENC P2C(long, kcode, int, enc)
{
    if (enc == ENC_UTF8) return toUTF8(kcode);
    if (enc == ENC_JIS)  return toJIS(kcode);
    if (enc == ENC_EUC)  return toEUC(kcode);
    if (enc == ENC_SJIS) return toSJIS(kcode);
    fprintf(stderr, "toENC: unknown enc (%d).\n", enc);
    return 0;
}



#define KANJI_IN   LONG(0, ESC, '$', 'B')
#define KANJI_OUT  LONG(0, ESC, '(', 'B')

static int put_multibyte P2C(long, c, FILE*, fp) {
    if (BYTE1(c) != 0 && putc(BYTE1(c), fp) == EOF) return EOF;
    if (BYTE2(c) != 0 && putc(BYTE2(c), fp) == EOF) return EOF;
    if (BYTE3(c) != 0 && putc(BYTE3(c), fp) == EOF) return EOF;
    /* always */  return putc(BYTE4(c), fp);
}

/* putc() with code conversion */
int putc2 P2C(int, c, FILE*, fp)
{
    static int inkanji[NOFILE]; /* 0: not in Kanji
                                   1: in JIS Kanji and first byte is in c1[]
                                  -1: in JIS Kanji and c1[] is empty */
    static unsigned char c1[NOFILE];
    const int fd = fileno(fp);
    int ret = c, output_enc;

    if (fp == stdout || fp == stderr) output_enc = get_terminal_enc();
    else                              output_enc = get_file_enc();

    if (inkanji[fd] > 0) {   /* KANJI 2nd */
        ret = put_multibyte(toENC(HILO(c1[fd], c), output_enc), fp);
        inkanji[fd] = -1;
    } else if (iskanji1(c)) { /* KANJI 1st */
        if (inkanji[fd] == 0 && output_enc == ENC_JIS) {
            ret = put_multibyte(KANJI_IN, fp);
        }
        c1[fd] = c;
        inkanji[fd] = 1;
    } else {                  /* ASCII */
        if (inkanji[fd] < 0 && output_enc == ENC_JIS) {
            ret = put_multibyte(KANJI_OUT, fp);
        }
        ret = putc(c, fp);
        inkanji[fd] = 0;
    }
    return ret;
}

/* fputs() with code conversion */
int fputs2 P2C(const char*, s, FILE*, fp)
{
    while (*s != '\0') {
        int ret = putc2((unsigned char)*s, fp);
        if (ret == EOF) return EOF;
        s++;
    }
    return 1;
}



static string buffer;
static long first, last;
static boolean combin_voiced_sound P1C(boolean, semi)
{
    int i;

    if (last-2 < first) return false;
    if (multistrlen(buffer,last,last-2) != 2) return false;
    i = toUCS(fromBUFF(buffer,last,last-2));
    i = get_voiced_sound(i, semi);
    if (i == 0) return false;
    i = toBUFF(fromUCS(i));
    buffer[last-2] = HI(i);
    buffer[last-1] = LO(i);
    return true;
}

static void write_multibyte P1C(long, i)
{
    if (BYTE1(i) != 0) buffer[last++] = BYTE1(i);
    if (BYTE2(i) != 0) buffer[last++] = BYTE2(i);
    /* always */       buffer[last++] = BYTE3(i);
    /* always */       buffer[last++] = BYTE4(i);
}

static void write_hex P1C(int, i)
{
    sprintf(buffer + last, "^^%02x", i);
    last += 4;
}

/* getc() with check of broken encode */
static int getc2 P1C(FILE *, fp)
{
    int c = getc(fp);

    if (isUTF8(2,2,c)) return c;
    ungetc(c, fp);
    return EOF;
}

static void get_utf8 P2C(int, i, FILE *, fp)
{
    long u = 0, j;
    int i2 = EOF, i3 = EOF, i4 = EOF;

    switch (UTF8length(i)) {
    case 2:
        i2 = getc2(fp); if (i2 == EOF) break;
        u = UTF8BtoUCS(i, i2);
        break;
    case 3:
        i2 = getc2(fp); if (i2 == EOF) break;
        i3 = getc2(fp); if (i3 == EOF) break;
        u = UTF8CtoUCS(i, i2, i3);
        if (u == U_BOM) return; /* just ignore */
        if (u == U_VOICED      && combin_voiced_sound(false)) return;
        if (u == U_SEMI_VOICED && combin_voiced_sound(true))  return;
        break;
    case 4:
        i2 = getc2(fp); if (i2 == EOF) break;
        i3 = getc2(fp); if (i3 == EOF) break;
        i4 = getc2(fp); if (i4 == EOF) break;
        u = UTF8DtoUCS(i, i2, i3, i4);
        break;
    default:
        u = U_REPLACEMENT_CHARACTER;
        break;
    }

    j = toBUFF(fromUCS(u));
    if (j == 0) { /* can't represent (typically umlaut o in EUC) */
        write_hex(i);
        if (i2 != EOF) write_hex(i2);
        if (i3 != EOF) write_hex(i3);
        if (i4 != EOF) write_hex(i4);
    } else {
        write_multibyte(j);
    }
}

static void get_euc P2C(int, i, FILE *, fp)
{
    int j = getc(fp);

    if (isEUCkanji2(j)) {
        write_multibyte(toBUFF(fromEUC(HILO(i,j))));
    } else {
        buffer[last++] = i;
        ungetc(j, fp);
    }
}        

static void get_sjis P2C(int, i, FILE *, fp)
{
    int j = getc(fp);

    if (isSJISkanji2(j)) {
        write_multibyte(toBUFF(fromSJIS(HILO(i,j))));
    } else {
        buffer[last++] = i;
        ungetc(j, fp);
    }
}        

static boolean is_tail P2C(long*, c, FILE*, fp)
{
    if (*c == EOF) return true;
    if (*c == '\n') return true;
    if (*c == '\r') {
        int d = getc(fp);
        if (d == '\n') *c = d;
        else ungetc(d, fp);
        return true;
    }
    return false;
}

/* input line with Kanji code conversion */
int input_line2 P5C(FILE*, fp, string, buff, long, pos,
                    const long, buffsize, int*, lastchar)
{
    long i;
    static boolean injis = false;

    buffer = buff;
    first = last = pos;

    while (last < buffsize-30 && (i=getc(fp)) != EOF && i!='\n' && i!='\r') {
        /* 30 is enough large size for one char */
        /* attention: 4 times of write_hex() eats 16byte */
        if (i == ESC) {
            if ((i=getc(fp)) == '$') { /* ESC '$' (Kanji-in) */
                i = getc(fp);
                if (i == '@' || i == 'B') {
                    injis = true;
                } else {               /* broken Kanji-in */
                    buffer[last++] = ESC;
                    buffer[last++] = '$';
                    if (is_tail(&i, fp)) break;
                    buffer[last++] = i;
                }
            } else if (i == '(') {     /* ESC '(' (Kanji-out) */
                i = getc(fp);
                if (i == 'J' || i == 'B' || i == 'H') {
                    injis = false;
                } else {               /* broken Kanji-out */
                    buffer[last++] = ESC;
                    buffer[last++] = '(';
                    if (is_tail(&i, fp)) break;
                    buffer[last++] = i;
                }
            } else { /* broken ESC */
                buffer[last++] = ESC;
                if (is_tail(&i, fp)) break;
                buffer[last++] = i;
            }
        } else { /* rather than ESC */
            if (injis) { /* in JIS */
                long j = getc(fp);
                if (is_tail(&j, fp)) {
                    buffer[last++] = i;
                    i = j;
                    break;
                } else { /* JIS encoding */
                    i = fromJIS(HILO(i,j));
		    if (i == 0) i = fromUCS(U_REPLACEMENT_CHARACTER);
                    write_multibyte(toBUFF(i));
                }
            } else {  /* normal */
                if        (get_file_enc() == ENC_SJIS && isSJISkanji1(i)) {
                    get_sjis(i, fp);
                } else if (get_file_enc() == ENC_EUC  && isEUCkanji1(i)) {
                    get_euc(i, fp);
                } else if (get_file_enc() == ENC_UTF8 && UTF8length(i) > 1) {
                    get_utf8(i, fp);
                } else {
                    buffer[last++] = i;
                }
            }
        }
    }

    buffer[last] = '\0';
    if (i == EOF || i == '\n' || i == '\r') injis = false;
    if (lastchar != NULL) *lastchar = i;
    return last;
}


static string in_filter = NULL;
static FILE *piped_fp[NOFILE];
static int piped_num = 0;

void nkf_disable P1H(void)
{
    in_filter = "";
}

#ifdef NKF_TEST
#include <stdlib.h>
static void nkf_check P1H(void)
{
    if (piped_num > 0) {
        fprintf(stderr, "nkf_check: %d nkf_open() did not closed.\n",
                piped_num);
    } else {
        fprintf(stderr, "nkf_check: nkf_open() OK.\n");
    }
}
#endif /* NKF_TEST */

/* 'mode' must be read */
FILE *nkf_open P2C(const char*, path, const char*, mode) {
    char buff[1024];
    FILE *fp;

    if (in_filter == NULL) {
        in_filter = kpse_var_value("PTEX_IN_FILTER");
        if (in_filter == NULL || strcasecmp(in_filter, "no") == 0) {
            nkf_disable();
        } else if (in_filter[0] != '/') {
            fprintf(stderr, "Warning: PTEX_IN_FILTER must be full path.\n");
            nkf_disable();
        }
#ifdef NKF_TEST
        atexit(nkf_check);
#endif /* NKF_TEST */
    }

    if (in_filter[0] == '\0') return fopen(path, mode);
    path = kpse_readable_file(path);
    if (path == NULL) return NULL; /* can't read */

    sprintf(buff, "%s '%s'", in_filter, path);
    /* fprintf(stderr, "\n`%s`", buff); */
    fp = popen(buff , "r");
    if (piped_num < NOFILE) piped_fp[piped_num++] = fp;
    return fp;
}

/* we must close in stack order (FILO) or in queue order (FIFO) */
int nkf_close P1C(FILE*, fp) {
    if (piped_num > 0) {
        if (fp == piped_fp[piped_num-1]) {  /* for FILO */
            piped_num--;
            return pclose(fp);
        }
        if (fp == piped_fp[0]) {  /* for FIFO */
            int i;
            piped_num--;
            for (i=0; i<piped_num; i++) piped_fp[i] = piped_fp[i+1];
            return pclose(fp);
        }
    }
    return fclose(fp);
}
