23#define NKF_VERSION "2.1.5"
24#define NKF_RELEASE_DATE "2018-12-15"
26 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27 "Copyright (C) 1996-2018, The nkf Project."
38# define INCL_DOSERRORS
265#if defined(DEFAULT_CODE_JIS)
266#define DEFAULT_ENCIDX ISO_2022_JP
267#elif defined(DEFAULT_CODE_SJIS)
268#define DEFAULT_ENCIDX SHIFT_JIS
269#elif defined(DEFAULT_CODE_WINDOWS_31J)
270#define DEFAULT_ENCIDX WINDOWS_31J
271#elif defined(DEFAULT_CODE_EUC)
272#define DEFAULT_ENCIDX EUC_JP
273#elif defined(DEFAULT_CODE_UTF8)
274#define DEFAULT_ENCIDX UTF_8
279 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
282#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
283#define nkf_isoctal(c) ('0'<=c && c<='7')
284#define nkf_isdigit(c) ('0'<=c && c<='9')
285#define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286#define nkf_isblank(c) (c == SP || c == TAB)
287#define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288#define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289#define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290#define nkf_isprint(c) (SP<=c && c<='~')
291#define nkf_isgraph(c) ('!'<=c && c<='~')
292#define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293 ('A'<=c&&c<='F') ? (c-'A'+10) : \
294 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295#define bin2hex(c) ("0123456789ABCDEF"[c&15])
296#define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297#define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
301#define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302#define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
304#define HOLD_SIZE 1024
305#if defined(INT_IS_SHORT)
306#define IOBUF_SIZE 2048
308#define IOBUF_SIZE 16384
322extern POINT _BufferSize;
336static const char *input_codename =
NULL;
340#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
347#define UCS_MAP_ASCII 0
349#define UCS_MAP_CP932 2
350#define UCS_MAP_CP10001 3
353#ifdef UTF8_INPUT_ENABLE
355static int no_cp932ext_f =
FALSE;
357static int no_best_fit_chars_f =
FALSE;
359static int input_bom_f =
FALSE;
360static nkf_char unicode_subchar =
'?';
364#ifdef UTF8_OUTPUT_ENABLE
365static int output_bom_f =
FALSE;
382#if !defined(PERL_XS) && !defined(WIN32DLL)
387#define NKF_UNSPECIFIED (-TRUE)
390static int unbuf_f =
FALSE;
391static int estab_f =
FALSE;
392static int nop_f =
FALSE;
393static int binmode_f =
TRUE;
394static int rot_f =
FALSE;
395static int hira_f =
FALSE;
396static int alpha_f =
FALSE;
398static int mime_decode_f =
FALSE;
399static int mimebuf_f =
FALSE;
400static int broken_f =
FALSE;
401static int iso8859_f =
FALSE;
402static int mimeout_f =
FALSE;
404static int iso2022jp_f =
FALSE;
406#ifdef UNICODE_NORMALIZATION
407static int nfc_f =
FALSE;
413static int cap_f =
FALSE;
417static int url_f =
FALSE;
422#define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
423#define CLASS_MASK NKF_INT32_C(0xFF000000)
424#define CLASS_UNICODE NKF_INT32_C(0x01000000)
425#define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
426#define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
427#define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
428#define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
429#define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
430#define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
431#define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
432#define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
434#define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
437static int numchar_f =
FALSE;
443static int noout_f =
FALSE;
445static int debug_f =
FALSE;
450static int guess_f = 0;
451static void set_input_codename(
const char *codename);
454static int exec_f = 0;
459static int cp51932_f =
FALSE;
462static int cp932inv_f =
TRUE;
467static int x0212_f =
FALSE;
468static int x0213_f =
FALSE;
470static unsigned char prefix_table[256];
476 {
"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
477 {
"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
478#ifdef UTF8_INPUT_ENABLE
479 {
"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
480 {
"UTF-16", 0, 0, 0, {0, 0, 0},
NULL, w_iconv16, 0},
481 {
"UTF-32", 0, 0, 0, {0, 0, 0},
NULL, w_iconv32, 0},
486static int mimeout_mode = 0;
487static int base64_count = 0;
492static int f_line = 0;
493static int f_prev = 0;
494static int fold_preserve_f =
FALSE;
495static int fold_f =
FALSE;
496static int fold_len = 0;
499static unsigned char kanji_intro =
DEFAULT_J;
500static unsigned char ascii_intro =
DEFAULT_R;
504#define FOLD_MARGIN 10
505#define DEFAULT_FOLD 60
522 no_connection2(c2,c1,0);
556static int output_mode =
ASCII;
557static int input_mode =
ASCII;
558static int mime_decode_mode =
FALSE;
564static const unsigned char cv[]= {
565 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
586static const unsigned char dv[]= {
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607static const unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
628static const unsigned char ev_x0213[]= {
629 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634 0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
635 0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
636 0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
637 0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
638 0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
650static const unsigned char fv[] = {
652 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
653 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
654 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
655 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
656 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
657 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
658 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
659 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
660 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
661 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
663 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
668static int option_mode = 0;
669static int file_out_f =
FALSE;
671static int overwrite_f =
FALSE;
672static int preserve_time_f =
FALSE;
673static int backup_f =
FALSE;
674static char *backup_suffix =
"";
677static int eolmode_f = 0;
678static int input_eol = 0;
685nkf_xmalloc(
size_t size)
701nkf_xrealloc(
void *
ptr,
size_t size)
714#define nkf_xfree(ptr) free(ptr)
717nkf_str_caseeql(
const char *
src,
const char *target)
720 for (
i = 0;
src[
i] && target[
i];
i++) {
728nkf_enc_from_index(
int idx)
737nkf_enc_find_index(
const char *
name)
750nkf_enc_find(
const char *
name)
753 idx = nkf_enc_find_index(
name);
754 if (idx < 0)
return 0;
755 return nkf_enc_from_index(idx);
758#define nkf_enc_name(enc) (enc)->name
759#define nkf_enc_to_index(enc) (enc)->id
760#define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
761#define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
762#define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
763#define nkf_enc_asciicompat(enc) (\
764 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
765 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
766#define nkf_enc_unicode_p(enc) (\
767 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
768 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
769 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
770#define nkf_enc_cp5022x_p(enc) (\
771 nkf_enc_to_index(enc) == CP50220 ||\
772 nkf_enc_to_index(enc) == CP50221 ||\
773 nkf_enc_to_index(enc) == CP50222)
775#ifdef DEFAULT_CODE_LOCALE
777nkf_locale_charmap(
void)
779#ifdef HAVE_LANGINFO_H
780 return nl_langinfo(CODESET);
781#elif defined(__WIN32__)
785#elif defined(__OS2__)
786# if defined(INT_IS_SHORT)
792 ULONG ulCP[1], ulncp;
793 DosQueryCp(
sizeof(ulCP), ulCP, &ulncp);
794 if (ulCP[0] == 932 || ulCP[0] == 943)
805nkf_locale_encoding(
void)
808 const char *encname = nkf_locale_charmap();
810 enc = nkf_enc_find(encname);
816nkf_utf8_encoding(
void)
822nkf_default_encoding(
void)
825#ifdef DEFAULT_CODE_LOCALE
826 enc = nkf_locale_encoding();
827#elif defined(DEFAULT_ENCIDX)
828 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
830 if (!enc) enc = nkf_utf8_encoding();
841nkf_buf_new(
int length)
859#define nkf_buf_length(buf) ((buf)->len)
860#define nkf_buf_empty_p(buf) ((buf)->len == 0)
878 if (
buf->capa <=
buf->len) {
888 return buf->ptr[--
buf->len];
894#define fprintf dllprintf
907 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
909 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
910 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
914 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
915 " UTF option is -W[8,[16,32][B,L]]\n"
917 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
921 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
922 " M[BQ] MIME encode [B:base64 Q:quoted]\n"
923 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
926 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
927 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
928 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
929 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
932 " O Output to File (DEFAULT 'nkf.out')\n"
933 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
936 " --ic=<encoding> Specify the input encoding\n"
937 " --oc=<encoding> Specify the output encoding\n"
938 " --hiragana --katakana Hiragana/Katakana Conversion\n"
939 " --katakana-hiragana Converts each other\n"
943 " --{cap, url}-input Convert hex after ':' or '%%'\n"
946 " --numchar-input Convert Unicode Character Reference\n"
949 " --fb-{skip, html, xml, perl, java, subchar}\n"
950 " Specify unassigned character's replacement\n"
955 " --in-place[=SUF] Overwrite original files\n"
956 " --overwrite[=SUF] Preserve timestamp of original files\n"
958 " -g --guess Guess the input code\n"
959 " -v --version Print the version\n"
960 " --help/-V Print this help / configuration\n"
966show_configuration(
void)
970 " Compile-time options:\n"
971 " Compiled at: " __DATE__
" " __TIME__
"\n"
974 " Default output encoding: "
977#elif defined(DEFAULT_ENCIDX)
984 " Default output end of line: "
993 " Decode MIME encoded string: "
1000 " Convert JIS X 0201 Katakana: "
1007 " --help, --version output: "
1008#
if HELP_OUTPUT_HELP_OUTPUT
1019get_backup_filename(
const char *suffix,
const char *filename)
1021 char *backup_filename;
1022 int asterisk_count = 0;
1024 int filename_length =
strlen(filename);
1026 for(
i = 0; suffix[
i];
i++){
1027 if(suffix[
i] ==
'*') asterisk_count++;
1031 backup_filename = nkf_xmalloc(
strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1032 for(
i = 0, j = 0; suffix[
i];){
1033 if(suffix[
i] ==
'*'){
1034 backup_filename[j] =
'\0';
1035 strncat(backup_filename, filename, filename_length);
1037 j += filename_length;
1039 backup_filename[j++] = suffix[
i++];
1042 backup_filename[j] =
'\0';
1044 j = filename_length +
strlen(suffix);
1045 backup_filename = nkf_xmalloc(j + 1);
1046 strcpy(backup_filename, filename);
1047 strcat(backup_filename, suffix);
1048 backup_filename[j] =
'\0';
1050 return backup_filename;
1054#ifdef UTF8_INPUT_ENABLE
1084 (*oconv)(0, 0x30+(c/10000 )%10);
1086 (*oconv)(0, 0x30+(c/1000 )%10);
1088 (*oconv)(0, 0x30+(c/100 )%10);
1090 (*oconv)(0, 0x30+(c/10 )%10);
1092 (*oconv)(0, 0x30+ c %10);
1103 nkf_each_char_to_hex(oconv, c);
1117 (*oconv)(0,
bin2hex(high>>12));
1118 (*oconv)(0,
bin2hex(high>> 8));
1119 (*oconv)(0,
bin2hex(high>> 4));
1123 (*oconv)(0,
bin2hex(low>>12));
1124 (*oconv)(0,
bin2hex(low>> 8));
1125 (*oconv)(0,
bin2hex(low>> 4));
1143 nkf_each_char_to_hex(oconv, c);
1151 c = unicode_subchar;
1152 (*oconv)((c>>8)&0xFF, c&0xFF);
1157static const struct {
1181 {
"katakana-hiragana",
"h3"},
1189#ifdef UTF8_OUTPUT_ENABLE
1199 {
"fb-subchar=",
""},
1201#ifdef UTF8_INPUT_ENABLE
1202 {
"utf8-input",
"W"},
1203 {
"utf16-input",
"W16"},
1204 {
"no-cp932ext",
""},
1205 {
"no-best-fit-chars",
""},
1207#ifdef UNICODE_NORMALIZATION
1208 {
"utf8mac-input",
""},
1220#ifdef NUMCHAR_OPTION
1221 {
"numchar-input",
""},
1227#ifdef SHIFTJIS_CP932
1248#ifdef SHIFTJIS_CP932
1251#ifdef UTF8_OUTPUT_ENABLE
1270#ifdef SHIFTJIS_CP932
1273#ifdef UTF8_OUTPUT_ENABLE
1279#ifdef SHIFTJIS_CP932
1282#ifdef UTF8_OUTPUT_ENABLE
1292#ifdef SHIFTJIS_CP932
1295#ifdef UTF8_OUTPUT_ENABLE
1301#ifdef SHIFTJIS_CP932
1304#ifdef UTF8_OUTPUT_ENABLE
1310#ifdef SHIFTJIS_CP932
1313#ifdef UTF8_OUTPUT_ENABLE
1320#ifdef SHIFTJIS_CP932
1322 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1328#ifdef SHIFTJIS_CP932
1332#ifdef UTF8_INPUT_ENABLE
1333#ifdef UNICODE_NORMALIZATION
1365#ifdef SHIFTJIS_CP932
1366 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1368#ifdef UTF8_OUTPUT_ENABLE
1374#ifdef SHIFTJIS_CP932
1375 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1377#ifdef UTF8_OUTPUT_ENABLE
1382#ifdef SHIFTJIS_CP932
1383 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1388#ifdef SHIFTJIS_CP932
1389 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1396#ifdef SHIFTJIS_CP932
1397 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1404#ifdef UTF8_OUTPUT_ENABLE
1409#ifdef UTF8_OUTPUT_ENABLE
1415#ifdef SHIFTJIS_CP932
1416 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1418#ifdef UTF8_OUTPUT_ENABLE
1424#ifdef SHIFTJIS_CP932
1425 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1427#ifdef UTF8_OUTPUT_ENABLE
1433#ifdef SHIFTJIS_CP932
1434 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1436#ifdef UTF8_OUTPUT_ENABLE
1443#ifdef UTF8_OUTPUT_ENABLE
1450#ifdef UTF8_OUTPUT_ENABLE
1457#ifdef SHIFTJIS_CP932
1458 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1465#ifdef SHIFTJIS_CP932
1466 if (cp932inv_f ==
TRUE) cp932inv_f =
FALSE;
1469#ifdef UTF8_OUTPUT_ENABLE
1471 output_bom_f =
TRUE;
1475 output_bom_f =
TRUE;
1479 output_bom_f =
FALSE;
1483 output_bom_f =
TRUE;
1487 output_bom_f =
TRUE;
1491 output_bom_f =
FALSE;
1495 output_bom_f =
TRUE;
1519#ifdef INPUT_CODE_FIX
1520 if (
f || !input_encoding)
1528 && (
f == -
TRUE || !input_encoding)
1534 if (estab_f && iconv_for_check != iconv){
1535 struct input_code *p = find_inputcode_byfunc(iconv);
1537 set_input_codename(p->
name);
1540 iconv_for_check = iconv;
1552 if (0x75 <= c && c <= 0x7f){
1553 ret = c + (0x109 - 0x75);
1556 if (0x75 <= c && c <= 0x7f){
1557 ret = c + (0x113 - 0x75);
1568 if (0x7f <= c && c <= 0x88){
1569 ret = c + (0x75 - 0x7f);
1570 }
else if (0x89 <= c && c <= 0x92){
1580 static const char x0213_2_table[] =
1581 {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
1584 return x0213_2_table[ku];
1585 if (78 <= ku && ku <= 94)
1596 if (x0213_f && is_x0213_2_in_x0212(ndx)){
1597 if((0x21 <= ndx && ndx <= 0x2F)){
1598 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1599 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1601 }
else if(0x6E <= ndx && ndx <= 0x7E){
1602 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1603 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1611 const unsigned short *
ptr;
1614 val =
ptr[(c1 & 0x7f) - 0x21];
1623 c2 = x0212_shift(c2);
1627 if(0x7F < c2)
return 1;
1628 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1629 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1636#if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1639 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1640 if (0xFC < c1)
return 1;
1641#ifdef SHIFTJIS_CP932
1650 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1651 val =
cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1676 if(x0213_f && c2 >= 0xF0){
1677 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){
1678 c2 =
PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1681 if (0x9E < c1) c2++;
1684#define SJ0162 0x00e1
1685#define SJ6394 0x0161
1687 if (0x9E < c1) c2++;
1690 c1 = c1 - ((c1 >
DEL) ?
SP : 0x1F);
1697 c2 = x0212_unshift(c2);
1704#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1714 }
else if (val < 0x800){
1715 *p1 = 0xc0 | (val >> 6);
1716 *p2 = 0x80 | (val & 0x3f);
1720 *p1 = 0xe0 | (val >> 12);
1721 *p2 = 0x80 | ((val >> 6) & 0x3f);
1722 *p3 = 0x80 | ( val & 0x3f);
1725 *p1 = 0xf0 | (val >> 18);
1726 *p2 = 0x80 | ((val >> 12) & 0x3f);
1727 *p3 = 0x80 | ((val >> 6) & 0x3f);
1728 *p4 = 0x80 | ( val & 0x3f);
1745 else if (c1 <= 0xC1) {
1749 else if (c1 <= 0xDF) {
1751 wc = (c1 & 0x1F) << 6;
1754 else if (c1 <= 0xEF) {
1756 wc = (c1 & 0x0F) << 12;
1757 wc |= (c2 & 0x3F) << 6;
1760 else if (c2 <= 0xF4) {
1762 wc = (c1 & 0x0F) << 18;
1763 wc |= (c2 & 0x3F) << 12;
1764 wc |= (c3 & 0x3F) << 6;
1774#ifdef UTF8_INPUT_ENABLE
1777 const unsigned short *
const *pp,
nkf_char psize,
1781 const unsigned short *p;
1784 if (pp == 0)
return 1;
1787 if (c1 < 0 || psize <= c1)
return 1;
1789 if (p == 0)
return 1;
1792 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0)
return 1;
1794 if (val == 0)
return 1;
1795 if (no_cp932ext_f && (
1815 const unsigned short *
const *pp;
1816 const unsigned short *
const *
const *ppp;
1817 static const char no_best_fit_chars_table_C2[] =
1818 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1819 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1820 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1821 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1822 static const char no_best_fit_chars_table_C2_ms[] =
1823 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1824 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1825 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1826 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1827 static const char no_best_fit_chars_table_932_C2[] =
1828 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1829 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1830 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1831 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1832 static const char no_best_fit_chars_table_932_C3[] =
1833 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1834 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1835 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1836 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1842 }
else if(c2 < 0xe0){
1843 if(no_best_fit_chars_f){
1847 if(no_best_fit_chars_table_932_C2[c1&0x3F])
return 1;
1850 if(no_best_fit_chars_table_932_C3[c1&0x3F])
return 1;
1853 }
else if(!cp932inv_f){
1856 if(no_best_fit_chars_table_C2[c1&0x3F])
return 1;
1859 if(no_best_fit_chars_table_932_C3[c1&0x3F])
return 1;
1863 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F])
return 1;
1887 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1888 }
else if(c0 < 0xF0){
1889 if(no_best_fit_chars_f){
1891 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94)
return 1;
1897 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE)
return 1;
1900 if(c0 == 0x92)
return 1;
1905 if(c1 == 0x80 || c0 == 0x9C)
return 1;
1913 if(c0 == 0x94)
return 1;
1916 if(c0 == 0xBB)
return 1;
1926 if(c0 == 0x95)
return 1;
1929 if(c0 == 0xA5)
return 1;
1936 if(c0 == 0x8D)
return 1;
1939 if(c0 == 0x9E && !cp932inv_f)
return 1;
1942 if(0xA0 <= c0 && c0 <= 0xA5)
return 1;
1955 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1957#ifdef SHIFTJIS_CP932
1960 if (encode_fallback) ret = 1;
1964 if (e2s_conv(*p2, *p1, &
s2, &s1) == 0) {
1965 s2e_conv(
s2, s1, p2, p1);
1975#ifdef UTF8_OUTPUT_ENABLE
1976#define X0213_SURROGATE_FIND(tbl, size, euc) do { \
1978 for (i = 0; i < size; i++) \
1979 if (tbl[i][0] == euc) { \
1988 const unsigned short *p;
2005 c2 = (c2&0x7f) - 0x21;
2006 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2015 c2 = (c2&0x7f) - 0x21;
2016 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2026 c1 = (c1 & 0x7f) - 0x21;
2027 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
2029 if (x0213_f && 0xD800<=val && val<=0xDBFF) {
2030 nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
2051 for (
i = 0;
i < sizeof_x0213_combining_chars;
i++)
2054 if (
i >= sizeof_x0213_combining_chars)
2056 euc = (c2&0x7f)<<8 | (c1&0x7f);
2057 for (
i = 0;
i < sizeof_x0213_combining_table;
i++)
2072 }
else if (0xc0 <= c2 && c2 <= 0xef) {
2073 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2074#ifdef NUMCHAR_OPTION
2085#ifdef UTF8_INPUT_ENABLE
2097 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2098 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2110 for (
i = 0;
i < sizeof_x0213_1_surrogate_table;
i++)
2117 for (
i = 0;
i < sizeof_x0213_2_surrogate_table;
i++)
2136 if (iso2022jp_f && !x0201_f) {
2143 }
else if (c2 == 0x8f){
2147 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2152 c2 = (c2 << 8) | (c1 & 0x7f);
2154#ifdef SHIFTJIS_CP932
2157 if (e2s_conv(c2, c1, &
s2, &s1) == 0){
2158 s2e_conv(
s2, s1, &c2, &c1);
2171 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2178#ifdef SHIFTJIS_CP932
2179 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2181 if (e2s_conv(c2, c1, &
s2, &s1) == 0){
2182 s2e_conv(
s2, s1, &c2, &c1);
2200 if (iso2022jp_f && !x0201_f) {
2205 }
else if ((c2 ==
EOF) || (c2 == 0) || c2 <
SP) {
2207 }
else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2209 if(c1 == 0x7F)
return 0;
2213 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2214 if (ret)
return ret;
2224 for (
i = 0;
i < sizeof_x0213_combining_table;
i++) {
2236 for (
i = 0;
i < sizeof_x0213_combining_chars;
i++) {
2248 static const char w_iconv_utf8_1st_byte[] =
2250 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2251 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2252 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2253 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2260 if (c1 < 0 || 0xff < c1) {
2261 }
else if (c1 == 0) {
2263 }
else if ((c1 & 0xC0) == 0x80) {
2266 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2268 if (c2 < 0x80 || 0xBF < c2)
return 0;
2271 if (c3 == 0)
return -1;
2272 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2277 if (c3 == 0)
return -1;
2278 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2282 if (c3 == 0)
return -1;
2283 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2287 if (c3 == 0)
return -2;
2288 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2292 if (c3 == 0)
return -2;
2293 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2297 if (c3 == 0)
return -2;
2298 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2306 if (c1 == 0 || c1 ==
EOF){
2307 }
else if ((c1 & 0xf8) == 0xf0) {
2311 if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4)))
2313 ret = w2e_conv(c1, c2, c3, &c1, &c2);
2325 nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2);
2332#define NKF_ICONV_INVALID_CODE_RANGE -13
2333#define NKF_ICONV_WAIT_COMBINING_CHAR -14
2334#define NKF_ICONV_NOT_COMBINED -15
2336unicode_iconv(
nkf_char wc,
int nocombine)
2344 }
else if ((wc>>11) == 27) {
2347 }
else if (wc < 0xFFFF) {
2348 if (!nocombine && x0213_f && x0213_wait_combining_p(wc))
2350 ret = w16e_conv(wc, &c2, &c1);
2351 if (ret)
return ret;
2352 }
else if (wc < 0x10FFFF) {
2370 }
else if ((wc2>>11) == 27) {
2373 }
else if (wc2 < 0xFFFF) {
2374 if (!x0213_combining_p(wc2))
2376 for (
i = 0;
i < sizeof_x0213_combining_table;
i++) {
2385 }
else if (wc2 < 0x10FFFF) {
2397 wc = nkf_utf8_to_unicode(c1, c2, c3, 0);
2398 wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0);
2401 return unicode_iconv_combine(wc, wc2);
2404#define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
2405#define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
2417 if (0xD8 <= c1 && c1 <= 0xDB) {
2418 if (0xDC <= c3 && c3 <= 0xDF) {
2425 if (0xD8 <= c2 && c2 <= 0xDB) {
2426 if (0xDC <= c4 && c4 <= 0xDF) {
2434 return (*unicode_iconv)(wc,
FALSE);
2443 if (0xD8 <= c3 && c3 <= 0xDB) {
2450 if (0xD8 <= c2 && c2 <= 0xDB) {
2458 return unicode_iconv_combine(wc, wc2);
2469 return (*unicode_iconv)(wc,
TRUE);
2491 switch(input_endian){
2493 wc = c2 << 16 | c3 << 8 | c4;
2496 wc = c3 << 16 | c2 << 8 | c1;
2499 wc = c1 << 16 | c4 << 8 | c3;
2502 wc = c4 << 16 | c1 << 8 | c2;
2520 wc = utf32_to_nkf_char(c1, c2, c3, c4);
2524 return (*unicode_iconv)(wc,
FALSE);
2532 wc = utf32_to_nkf_char(c1, c2, c3, c4);
2535 wc2 = utf32_to_nkf_char(c5, c6, c7, c8);
2539 return unicode_iconv_combine(wc, wc2);
2547 wc = utf32_to_nkf_char(c1, c2, c3, c4);
2548 return (*unicode_iconv)(wc,
TRUE);
2552#define output_ascii_escape_sequence(mode) do { \
2553 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2556 (*o_putc)(ascii_intro); \
2557 output_mode = mode; \
2562output_escape_sequence(
int mode)
2564 if (output_mode == mode)
2580 (*o_putc)(kanji_intro);
2607#ifdef NUMCHAR_OPTION
2609 w16e_conv(c1, &c2, &c1);
2612 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2615 c2 = 0x7F + c1 / 94;
2616 c1 = 0x21 + c1 % 94;
2618 if (encode_fallback) (*encode_fallback)(c1);
2628 else if (c2 ==
EOF) {
2642 (*o_putc)(c2 & 0x7f);
2647 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2648 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1)
return;
2659 w16e_conv(c1, &c2, &c1);
2662 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2666 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2667 c1 = 0x21 + c1 % 94;
2670 (*o_putc)((c2 & 0x7f) | 0x080);
2671 (*o_putc)(c1 | 0x080);
2673 (*o_putc)((c2 & 0x7f) | 0x080);
2674 (*o_putc)(c1 | 0x080);
2678 if (encode_fallback) (*encode_fallback)(c1);
2686 }
else if (c2 == 0) {
2687 output_mode =
ASCII;
2691 (*o_putc)(
SS2); (*o_putc)(c1|0x80);
2694 (*o_putc)(c1 | 0x080);
2698#ifdef SHIFTJIS_CP932
2701 if (e2s_conv(c2, c1, &
s2, &s1) == 0){
2702 s2e_conv(
s2, s1, &c2, &c1);
2707 output_mode =
ASCII;
2712 (*o_putc)((c2 & 0x7f) | 0x080);
2713 (*o_putc)(c1 | 0x080);
2716 (*o_putc)((c2 & 0x7f) | 0x080);
2717 (*o_putc)(c1 | 0x080);
2722 set_iconv(
FALSE, 0);
2726 (*o_putc)(c2 | 0x080);
2727 (*o_putc)(c1 | 0x080);
2734#ifdef NUMCHAR_OPTION
2736 w16e_conv(c1, &c2, &c1);
2739 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2742 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2744 c1 += 0x40 + (c1 > 0x3e);
2749 if(encode_fallback)(*encode_fallback)(c1);
2758 }
else if (c2 == 0) {
2759 output_mode =
ASCII;
2766 (*o_putc)(c1 | 0x080);
2770 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2777 set_iconv(
FALSE, 0);
2781 e2s_conv(c2, c1, &c2, &c1);
2783#ifdef SHIFTJIS_CP932
2785 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2795 if (prefix_table[(
unsigned char)c1]){
2796 (*o_putc)(prefix_table[(
unsigned char)c1]);
2802#ifdef UTF8_OUTPUT_ENABLE
2803#define OUTPUT_UTF8(val) do { \
2804 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \
2806 if (c2) (*o_putc)(c2); \
2807 if (c3) (*o_putc)(c3); \
2808 if (c4) (*o_putc)(c4); \
2818 output_bom_f =
FALSE;
2838 val = e2w_conv(c2, c1);
2840 val2 = e2w_combining(val, c2, c1);
2848#define OUTPUT_UTF16_BYTES(c1, c2) do { \
2849 if (output_endian == ENDIAN_LITTLE){ \
2858#define OUTPUT_UTF16(val) do { \
2859 if (nkf_char_unicode_bmp_p(val)) { \
2860 c2 = (val >> 8) & 0xff; \
2862 OUTPUT_UTF16_BYTES(c1, c2); \
2864 val &= VALUE_MASK; \
2865 if (val <= UNICODE_MAX) { \
2866 c2 = (val >> 10) + NKF_INT32_C(0xD7C0); \
2867 c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); \
2868 OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \
2869 OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \
2878 output_bom_f =
FALSE;
2891 val = e2w_conv(c2, c1);
2893 val2 = e2w_combining(val, c2, c1);
2902#define OUTPUT_UTF32(c) do { \
2903 if (output_endian == ENDIAN_LITTLE){ \
2904 (*o_putc)( (c) & 0xFF); \
2905 (*o_putc)(((c) >> 8) & 0xFF); \
2906 (*o_putc)(((c) >> 16) & 0xFF); \
2910 (*o_putc)(((c) >> 16) & 0xFF); \
2911 (*o_putc)(((c) >> 8) & 0xFF); \
2912 (*o_putc)( (c) & 0xFF); \
2920 output_bom_f =
FALSE;
2945 val = e2w_conv(c2, c1);
2947 val2 = e2w_combining(val, c2, c1);
2957#define SCORE_KANA (SCORE_L2 << 1)
2958#define SCORE_DEPEND (SCORE_KANA << 1)
2959#define SCORE_CP932 (SCORE_DEPEND << 1)
2960#define SCORE_X0212 (SCORE_CP932 << 1)
2961#define SCORE_X0213 (SCORE_X0212 << 1)
2962#define SCORE_NO_EXIST (SCORE_X0213 << 1)
2963#define SCORE_iMIME (SCORE_NO_EXIST << 1)
2964#define SCORE_ERROR (SCORE_iMIME << 1)
2966#define SCORE_INIT (SCORE_iMIME)
2968static const nkf_char score_table_A0[] = {
2975static const nkf_char score_table_F0[] = {
2982static const nkf_char score_table_8FA0[] = {
2989static const nkf_char score_table_8FE0[] = {
2996static const nkf_char score_table_8FF0[] = {
3015 ptr->score &= ~score;
3026 }
else if (c2 ==
SS2){
3028 }
else if (c2 == 0x8f){
3029 if ((c1 & 0x70) == 0x20){
3030 set_code_score(
ptr, score_table_8FA0[c1 & 0x0f]);
3031 }
else if ((c1 & 0x70) == 0x60){
3032 set_code_score(
ptr, score_table_8FE0[c1 & 0x0f]);
3033 }
else if ((c1 & 0x70) == 0x70){
3034 set_code_score(
ptr, score_table_8FF0[c1 & 0x0f]);
3038#ifdef UTF8_OUTPUT_ENABLE
3039 }
else if (!e2w_conv(c2, c1)){
3042 }
else if ((c2 & 0x70) == 0x20){
3043 set_code_score(
ptr, score_table_A0[c2 & 0x0f]);
3044 }
else if ((c2 & 0x70) == 0x70){
3045 set_code_score(
ptr, score_table_F0[c2 & 0x0f]);
3046 }
else if ((c2 & 0x70) >= 0x50){
3057 if (iconv ==
ptr->iconv_func) set_iconv(
FALSE, 0);
3063 ptr->buf[
ptr->index++] = c;
3084 ptr->_file_stat = 0;
3090 if (c <=
DEL && estab_f){
3100 status_check(
ptr, c);
3107 }
else if (0xa1 <= c && c <= 0xdf){
3108 status_push_ch(
ptr,
SS2);
3109 status_push_ch(
ptr, c);
3112 }
else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
3114 status_push_ch(
ptr, c);
3115 }
else if (0xed <= c && c <= 0xee){
3117 status_push_ch(
ptr, c);
3118#ifdef SHIFTJIS_CP932
3121 status_push_ch(
ptr, c);
3124 }
else if (0xf0 <= c && c <= 0xfc){
3126 status_push_ch(
ptr, c);
3129 status_disable(
ptr);
3133 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3134 status_push_ch(
ptr, c);
3135 s2e_conv(
ptr->buf[0],
ptr->buf[1], &
ptr->buf[0], &
ptr->buf[1]);
3139 status_disable(
ptr);
3143#ifdef SHIFTJIS_CP932
3144 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
3145 status_push_ch(
ptr, c);
3146 if (s2e_conv(
ptr->buf[0],
ptr->buf[1], &
ptr->buf[0], &
ptr->buf[1]) == 0) {
3153 status_disable(
ptr);
3156 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3157 status_push_ch(
ptr, c);
3158 s2e_conv(
ptr->buf[0],
ptr->buf[1], &
ptr->buf[0], &
ptr->buf[1]);
3162 status_disable(
ptr);
3173 status_check(
ptr, c);
3180 }
else if (
SS2 == c || (0xa1 <= c && c <= 0xfe)){
3182 status_push_ch(
ptr, c);
3184 }
else if (0x8f == c){
3186 status_push_ch(
ptr, c);
3189 status_disable(
ptr);
3193 if (0xa1 <= c && c <= 0xfe){
3194 status_push_ch(
ptr, c);
3198 status_disable(
ptr);
3203 if (0xa1 <= c && c <= 0xfe){
3205 status_push_ch(
ptr, c);
3207 status_disable(
ptr);
3213#ifdef UTF8_INPUT_ENABLE
3219 status_check(
ptr, c);
3226 }
else if (0xc0 <= c && c <= 0xdf){
3228 status_push_ch(
ptr, c);
3229 }
else if (0xe0 <= c && c <= 0xef){
3231 status_push_ch(
ptr, c);
3232 }
else if (0xf0 <= c && c <= 0xf4){
3234 status_push_ch(
ptr, c);
3236 status_disable(
ptr);
3241 if (0x80 <= c && c <= 0xbf){
3242 status_push_ch(
ptr, c);
3243 if (
ptr->index >
ptr->stat){
3244 int bom = (
ptr->buf[0] == 0xef &&
ptr->buf[1] == 0xbb
3245 &&
ptr->buf[2] == 0xbf);
3246 w2e_conv(
ptr->buf[0],
ptr->buf[1],
ptr->buf[2],
3247 &
ptr->buf[0], &
ptr->buf[1]);
3254 status_disable(
ptr);
3258 if (0x80 <= c && c <= 0xbf){
3259 if (
ptr->index <
ptr->stat){
3260 status_push_ch(
ptr, c);
3265 status_disable(
ptr);
3275 int action_flag = 1;
3288 }
else if(p->
stat == 0){
3299 if (result && !estab_f){
3301 }
else if (c <=
DEL){
3321#define STD_GC_BUFSIZE (256)
3329 nkf_buf_clear(nkf_state->
nfc_buf);
3335 nkf_state->
nfc_buf = nkf_buf_new(9);
3369static int hold_count = 0;
3375 hold_buf[hold_count++] = c2;
3376 return ((hold_count >=
HOLD_SIZE*2) ?
EOF : hold_count);
3395 while ((c2 = (*i_getc)(
f)) !=
EOF) {
3401 if (push_hold_buf(c2) ==
EOF || estab_f) {
3433 while (hold_index < hold_count){
3434 c1 = hold_buf[hold_index++];
3439 else if (c1 <=
DEL){
3442 }
else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3447 if (hold_index < hold_count){
3448 c2 = hold_buf[hold_index++];
3459 switch ((*iconv)(c1, c2, 0)) {
3462 if (hold_index < hold_count){
3463 c3 = hold_buf[hold_index++];
3464 }
else if ((c3 = (*i_getc)(
f)) ==
EOF) {
3469 if (hold_index < hold_count){
3470 c4 = hold_buf[hold_index++];
3471 }
else if ((c4 = (*i_getc)(
f)) ==
EOF) {
3476 (*iconv)(c1, c2, (c3<<8)|c4);
3480 if (hold_index < hold_count){
3481 c3 = hold_buf[hold_index++];
3483 }
else if ((c3 = (*i_getc)(
f)) ==
EOF) {
3484 w_iconv_nocombine(c1, c2, 0);
3487 if (hold_index < hold_count){
3488 c4 = hold_buf[hold_index++];
3490 }
else if ((c4 = (*i_getc)(
f)) ==
EOF) {
3491 w_iconv_nocombine(c1, c2, 0);
3492 if (fromhold_count <= 2)
3498 if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) {
3499 w_iconv_nocombine(c1, c2, 0);
3500 if (fromhold_count <= 2) {
3503 }
else if (fromhold_count == 3) {
3513 if (hold_index < hold_count){
3514 c3 = hold_buf[hold_index++];
3516 }
else if ((c3 = (*i_getc)(
f)) ==
EOF) {
3522 if ((*iconv)(c1, c2, c3) == -3) {
3525 if (hold_index < hold_count){
3526 c4 = hold_buf[hold_index++];
3528 }
else if ((c4 = (*i_getc)(
f)) ==
EOF) {
3529 w_iconv_nocombine(c1, c2, c3);
3532 if (hold_index < hold_count){
3533 c5 = hold_buf[hold_index++];
3535 }
else if ((c5 = (*i_getc)(
f)) ==
EOF) {
3536 w_iconv_nocombine(c1, c2, c3);
3537 if (fromhold_count == 4)
3543 if (hold_index < hold_count){
3544 c6 = hold_buf[hold_index++];
3546 }
else if ((c6 = (*i_getc)(
f)) ==
EOF) {
3547 w_iconv_nocombine(c1, c2, c3);
3548 if (fromhold_count == 5) {
3550 }
else if (fromhold_count == 4) {
3559 if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) {
3560 w_iconv_nocombine(c1, c2, c3);
3561 if (fromhold_count == 6) {
3563 }
else if (fromhold_count == 5) {
3566 }
else if (fromhold_count == 4) {
3579 if (c3 ==
EOF)
break;
3591 input_bom_f =
FALSE;
3592 switch(c2 = (*i_getc)(
f)){
3594 if((c2 = (*i_getc)(
f)) == 0x00){
3595 if((c2 = (*i_getc)(
f)) == 0xFE){
3596 if((c2 = (*i_getc)(
f)) == 0xFF){
3597 if(!input_encoding){
3598 set_iconv(
TRUE, w_iconv32);
3600 if (iconv == w_iconv32) {
3605 (*i_ungetc)(0xFF,
f);
3606 }
else (*i_ungetc)(c2,
f);
3607 (*i_ungetc)(0xFE,
f);
3608 }
else if(c2 == 0xFF){
3609 if((c2 = (*i_getc)(
f)) == 0xFE){
3610 if(!input_encoding){
3611 set_iconv(
TRUE, w_iconv32);
3613 if (iconv == w_iconv32) {
3617 (*i_ungetc)(0xFF,
f);
3618 }
else (*i_ungetc)(c2,
f);
3619 (*i_ungetc)(0xFF,
f);
3620 }
else (*i_ungetc)(c2,
f);
3621 (*i_ungetc)(0x00,
f);
3622 }
else (*i_ungetc)(c2,
f);
3623 (*i_ungetc)(0x00,
f);
3626 if((c2 = (*i_getc)(
f)) == 0xBB){
3627 if((c2 = (*i_getc)(
f)) == 0xBF){
3628 if(!input_encoding){
3629 set_iconv(
TRUE, w_iconv);
3631 if (iconv == w_iconv) {
3635 (*i_ungetc)(0xBF,
f);
3636 }
else (*i_ungetc)(c2,
f);
3637 (*i_ungetc)(0xBB,
f);
3638 }
else (*i_ungetc)(c2,
f);
3639 (*i_ungetc)(0xEF,
f);
3642 if((c2 = (*i_getc)(
f)) == 0xFF){
3643 if((c2 = (*i_getc)(
f)) == 0x00){
3644 if((c2 = (*i_getc)(
f)) == 0x00){
3645 if(!input_encoding){
3646 set_iconv(
TRUE, w_iconv32);
3648 if (iconv == w_iconv32) {
3652 (*i_ungetc)(0x00,
f);
3653 }
else (*i_ungetc)(c2,
f);
3654 (*i_ungetc)(0x00,
f);
3655 }
else (*i_ungetc)(c2,
f);
3656 if(!input_encoding){
3657 set_iconv(
TRUE, w_iconv16);
3659 if (iconv == w_iconv16) {
3664 (*i_ungetc)(0xFF,
f);
3665 }
else (*i_ungetc)(c2,
f);
3666 (*i_ungetc)(0xFE,
f);
3669 if((c2 = (*i_getc)(
f)) == 0xFE){
3670 if((c2 = (*i_getc)(
f)) == 0x00){
3671 if((c2 = (*i_getc)(
f)) == 0x00){
3672 if(!input_encoding){
3673 set_iconv(
TRUE, w_iconv32);
3675 if (iconv == w_iconv32) {
3680 (*i_ungetc)(0x00,
f);
3681 }
else (*i_ungetc)(c2,
f);
3682 (*i_ungetc)(0x00,
f);
3683 }
else (*i_ungetc)(c2,
f);
3684 if(!input_encoding){
3685 set_iconv(
TRUE, w_iconv16);
3687 if (iconv == w_iconv16) {
3692 (*i_ungetc)(0xFE,
f);
3693 }
else (*i_ungetc)(c2,
f);
3694 (*i_ungetc)(0xFF,
f);
3715 if (c1==
'@'|| c1==
'B') {
3727 if (c1==
'J'|| c1==
'B') {
3752 if (guess_f && input_eol !=
EOF) {
3753 if (c2 == 0 && c1 ==
LF) {
3754 if (!input_eol) input_eol = prev_cr ?
CRLF :
LF;
3755 else if (input_eol != (prev_cr ?
CRLF :
LF)) input_eol =
EOF;
3756 }
else if (c2 == 0 && c1 ==
CR && input_eol ==
LF) input_eol =
EOF;
3758 else if (!input_eol) input_eol =
CR;
3759 else if (input_eol !=
CR) input_eol =
EOF;
3761 if (prev_cr || (c2 == 0 && c1 ==
LF)) {
3763 if (eolmode_f !=
LF) (*o_eol_conv)(0,
CR);
3764 if (eolmode_f !=
CR) (*o_eol_conv)(0,
LF);
3766 if (c2 == 0 && c1 ==
CR) prev_cr =
CR;
3767 else if (c2 != 0 || c1 !=
LF) (*o_eol_conv)(c2, c1);
3824#define char_size(c2,c1) (c2?2:1)
3832 if (c1==
CR && !fold_preserve_f) {
3834 }
else if (c1==
LF&&f_prev==
CR && fold_preserve_f) {
3837 }
else if (c1==
BS) {
3838 if (f_line>0) f_line--;
3840 }
else if (c2==
EOF && f_line != 0) {
3842 }
else if ((c1==
LF && !fold_preserve_f)
3843 || ((c1==
CR||(c1==
LF&&f_prev!=
CR))
3844 && fold_preserve_f)) {
3846 if (fold_preserve_f) {
3850 }
else if ((f_prev == c1)
3864 }
else if (f_prev==
SP) {
3868 if (++f_line<=fold_len)
3876 }
else if (c1==
'\f') {
3880 }
else if ((c2==0 &&
nkf_isblank(c1)) || (c2 ==
'!' && c1 ==
'!')) {
3886 if (++f_line<=fold_len)
3889 f_prev =
SP; f_line = 0;
3899 if (f_line<=fold_len) {
3902 if (f_line>fold_len+fold_margin) {
3907 if (c1==(0xde&0x7f)) fold_state = 1;
3908 else if (c1==(0xdf&0x7f)) fold_state = 1;
3909 else if (c1==(0xa4&0x7f)) fold_state = 1;
3910 else if (c1==(0xa3&0x7f)) fold_state = 1;
3911 else if (c1==(0xa1&0x7f)) fold_state = 1;
3912 else if (c1==(0xb0&0x7f)) fold_state = 1;
3913 else if (
SP<=c1 && c1<=(0xdf&0x7f)) {
3937 }
else if ((prev0==
SP) ||
3947 if (c1==
'"') fold_state = 1;
3948 else if (c1==
'#') fold_state = 1;
3949 else if (c1==
'W') fold_state = 1;
3950 else if (c1==
'K') fold_state = 1;
3951 else if (c1==
'$') fold_state = 1;
3952 else if (c1==
'%') fold_state = 1;
3953 else if (c1==
'\'') fold_state = 1;
3954 else if (c1==
'(') fold_state = 1;
3955 else if (c1==
')') fold_state = 1;
3956 else if (c1==
'*') fold_state = 1;
3957 else if (c1==
'+') fold_state = 1;
3958 else if (c1==
',') fold_state = 1;
3974 switch(fold_state) {
3976 oconv_newline(o_fconv);
3982 oconv_newline(o_fconv);
3993static nkf_char z_prev2=0,z_prev1=0;
4009 if (c1 == (0xde&0x7f)) {
4011 (*o_zconv)(dv[(z_prev1-
SP)*2], dv[(z_prev1-
SP)*2+1]);
4013 }
else if (c1 == (0xdf&0x7f) && ev[(z_prev1-
SP)*2]) {
4015 (*o_zconv)(ev[(z_prev1-
SP)*2], ev[(z_prev1-
SP)*2+1]);
4017 }
else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-
SP)*2]) {
4019 (*o_zconv)(ev_x0213[(z_prev1-
SP)*2], ev_x0213[(z_prev1-
SP)*2+1]);
4024 (*o_zconv)(cv[(z_prev1-
SP)*2], cv[(z_prev1-
SP)*2+1]);
4027 if (dv[(c1-
SP)*2] || ev[(c1-
SP)*2] || (x0213_f && ev_x0213[(c1-
SP)*2])) {
4033 (*o_zconv)(cv[(c1-
SP)*2], cv[(c1-
SP)*2+1]);
4044 if (alpha_f&1 && c2 == 0x23) {
4047 }
else if (c2 == 0x21) {
4053 }
else if (alpha_f&4) {
4058 }
else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4064 if (alpha_f&8 && c2 == 0) {
4066 const char *entity = 0;
4068 case '>': entity =
">";
break;
4069 case '<': entity =
"<";
break;
4070 case '\"': entity =
""";
break;
4071 case '&': entity =
"&";
break;
4074 while (*entity) (*o_zconv)(0, *entity++);
4121 }
else if (c2 == 0x25) {
4123 static const int fullwidth_to_halfwidth[] =
4125 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
4126 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
4127 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
4128 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
4129 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
4130 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
4131 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
4132 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
4133 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
4134 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
4135 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
4136 0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
4138 if (fullwidth_to_halfwidth[c1-0x20]){
4139 c2 = fullwidth_to_halfwidth[c1-0x20];
4158 (c <= 'M') ? (c + 13): \
4159 (c <= 'Z') ? (c - 13): \
4161 (c <= 'm') ? (c + 13): \
4162 (c <= 'z') ? (c - 13): \
4168 ( c <= 'O') ? (c + 47) : \
4169 ( c <= '~') ? (c - 47) : \
4182 (*o_rot_conv)(c2,c1);
4190 if (0x20 < c1 && c1 < 0x74) {
4192 (*o_hira_conv)(c2,c1);
4197 (*o_hira_conv)(c2,c1);
4200 }
else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
4202 (*o_hira_conv)(c2,c1);
4210 }
else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
4212 }
else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
4216 (*o_hira_conv)(c2,c1);
4223#define RANGE_NUM_MAX 18
4247 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4251 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4260 if (c >= start && c <= end) {
4265 (*o_iso2022jp_check_conv)(c2,c1);
4271static const unsigned char *mime_pattern[] = {
4272 (
const unsigned char *)
"\075?EUC-JP?B?",
4273 (
const unsigned char *)
"\075?SHIFT_JIS?B?",
4274 (
const unsigned char *)
"\075?ISO-8859-1?Q?",
4275 (
const unsigned char *)
"\075?ISO-8859-1?B?",
4276 (
const unsigned char *)
"\075?ISO-2022-JP?B?",
4277 (
const unsigned char *)
"\075?ISO-2022-JP?B?",
4278 (
const unsigned char *)
"\075?ISO-2022-JP?Q?",
4280 (
const unsigned char *)
"\075?UTF-8?B?",
4281 (
const unsigned char *)
"\075?UTF-8?Q?",
4283 (
const unsigned char *)
"\075?US-ASCII?Q?",
4290 e_iconv, s_iconv, 0, 0, 0, 0, 0,
4291#if defined(UTF8_INPUT_ENABLE)
4297static const nkf_char mime_encode[] = {
4299#if defined(UTF8_INPUT_ENABLE)
4306static const nkf_char mime_encode_method[] = {
4307 'B',
'B',
'Q',
'B',
'B',
'B',
'Q',
4308#if defined(UTF8_INPUT_ENABLE)
4318#define MIME_BUF_SIZE (1024)
4319#define MIME_BUF_MASK (MIME_BUF_SIZE-1)
4320#define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
4329#define MAXRECOVER 20
4340 mime_input_buf_unshift(c);
4348 (*i_mungetc_buf)(c,
f);
4355mime_getc_buf(
FILE *
f)
4359 return ((mimebuf_f)?
4364switch_mime_getc(
void)
4366 if (i_getc!=mime_getc) {
4367 i_mgetc = i_getc; i_getc = mime_getc;
4368 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
4370 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
4371 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
4377unswitch_mime_getc(
void)
4380 i_mgetc = i_mgetc_buf;
4381 i_mungetc = i_mungetc_buf;
4384 i_ungetc = i_mungetc;
4385 if(mime_iconv_back)set_iconv(
FALSE, mime_iconv_back);
4386 mime_iconv_back =
NULL;
4390mime_integrity(
FILE *
f,
const unsigned char *p)
4396 mime_input_state.input = mime_input_state.top;
4397 mime_input_state.last = mime_input_state.top;
4401 q = mime_input_state.input;
4402 while((c=(*i_getc)(
f))!=
EOF) {
4403 if (((mime_input_state.input-mime_input_state.top)&
MIME_BUF_MASK)==0) {
4406 if (c==
'=' && d==
'?') {
4410 mime_input_state.input = q;
4414 if (!( (c==
'+'||c==
'/'|| c==
'=' || c==
'?' ||
is_alnum(c))))
4422 mime_input_state.last = mime_input_state.input;
4423 mime_decode_mode = 1;
4429mime_begin_strict(
FILE *
f)
4433 const unsigned char *p,*q;
4436 mime_decode_mode =
FALSE;
4439 p = mime_pattern[j];
4442 for(
i=2;p[
i]>
SP;
i++) {
4446 while (mime_pattern[++j]) {
4447 p = mime_pattern[j];
4449 if (p[k]!=q[k])
break;
4452 p = mime_pattern[j];
4462 mime_decode_mode = p[
i-2];
4464 mime_iconv_back = iconv;
4468 if (mime_decode_mode==
'B') {
4469 mimebuf_f = unbuf_f;
4472 return mime_integrity(
f,mime_pattern[j]);
4490 k = mime_input_state.last;
4495 if (c1==
LF||c1==
SP||c1==
CR||
4496 c1==
'-'||c1==
'_'||
is_alnum(c1))
continue;
4500 mime_input_state.last--;
4508 if (c1==
'b'||c1==
'B') {
4509 mime_decode_mode =
'B';
4510 }
else if (c1==
'q'||c1==
'Q') {
4511 mime_decode_mode =
'Q';
4518 mime_decode_mode =
FALSE;
4524 if (!mime_decode_mode) {
4526 mime_decode_mode = 1;
4532 mime_input_state.last = k;
4554set_input_codename(
const char *codename)
4556 if (!input_codename) {
4557 input_codename = codename;
4558 }
else if (
strcmp(codename, input_codename) != 0) {
4559 input_codename =
"";
4564get_guessed_code(
void)
4566 if (input_codename && !*input_codename) {
4567 input_codename =
"BINARY";
4569 struct input_code *p = find_inputcode_byfunc(iconv);
4570 if (!input_codename) {
4571 input_codename =
"ASCII";
4572 }
else if (
strcmp(input_codename,
"Shift_JIS") == 0) {
4574 input_codename =
"CP932";
4575 }
else if (
strcmp(input_codename,
"EUC-JP") == 0) {
4577 input_codename =
"EUC-JIS-2004";
4579 input_codename =
"EUCJP-MS";
4581 input_codename =
"CP51932";
4582 }
else if (
strcmp(input_codename,
"ISO-2022-JP") == 0) {
4584 input_codename =
"CP50221";
4586 input_codename =
"CP50220";
4589 return input_codename;
4592#if !defined(PERL_XS) && !defined(WIN32DLL)
4594print_guessed_code(
char *filename)
4596 if (filename !=
NULL)
printf(
"%s: ", filename);
4597 if (input_codename && !*input_codename) {
4600 input_codename = get_guessed_code();
4602 printf(
"%s\n", input_codename);
4606 iconv != w_iconv16 && iconv != w_iconv32 ?
"" :
4610 input_bom_f ?
" (BOM)" :
"",
4611 input_eol ==
CR ?
" (CR)" :
4612 input_eol ==
LF ?
" (LF)" :
4613 input_eol ==
CRLF ?
" (CRLF)" :
4614 input_eol ==
EOF ?
" (MIXED NL)" :
4648 return hex_getc(
':',
f, i_cgetc, i_cungetc);
4654 return (*i_cungetc)(c,
f);
4660 return hex_getc(
'%',
f, i_ugetc, i_uungetc);
4666 return (*i_uungetc)(c,
f);
4670#ifdef NUMCHAR_OPTION
4672numchar_getc(
FILE *
f)
4686 if (
buf[
i] ==
'x' ||
buf[
i] ==
'X'){
4687 for (j = 0; j < 7; j++){
4699 for (j = 0; j < 8; j++){
4728 return (*i_nungetc)(c,
f);
4732#ifdef UNICODE_NORMALIZATION
4740 const unsigned char *array;
4741 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4744 if (c ==
EOF || c > 0xFF || (c & 0xc0) == 0x80)
return c;
4746 nkf_buf_push(
buf, c);
4748 while (lower <= upper) {
4749 int mid = (lower+upper) / 2;
4752 for (
len=0;
len < NORMALIZATION_TABLE_NFD_LENGTH && array[
len];
len++) {
4757 lower = 1, upper = 0;
4760 nkf_buf_push(
buf, c);
4762 if (array[
len] != nkf_buf_at(
buf,
len)) {
4763 if (array[
len] < nkf_buf_at(
buf,
len)) lower = mid + 1;
4764 else upper = mid - 1;
4773 for (
i=0;
i < NORMALIZATION_TABLE_NFC_LENGTH && array[
i];
i++)
4774 nkf_buf_push(
buf, array[
i]);
4778 }
while (lower <= upper);
4781 c = nkf_buf_pop(
buf);
4789 return (*i_nfc_ungetc)(c,
f);
4801 }
else if (c ==
'_') {
4806 }
else if (c >
'/') {
4808 }
else if (c ==
'+' || c ==
'-') {
4820 nkf_char t1, t2, t3, t4, mode, exit_mode;
4826 if (mime_input_state.top != mime_input_state.last) {
4829 if (mime_decode_mode==1 ||mime_decode_mode==
FALSE) {
4830 mime_decode_mode=
FALSE;
4831 unswitch_mime_getc();
4832 return (*i_getc)(
f);
4836 exit_mode = mime_decode_mode;
4839 if (mime_decode_mode ==
'Q') {
4840 if ((c1 = (*i_mgetc)(
f)) ==
EOF)
return (
EOF);
4843 if (c1<=
SP ||
DEL<=c1) {
4844 mime_decode_mode = exit_mode;
4847 if (c1!=
'=' && (c1!=
'?' || mimebuf_f ==
FIXED_MIME)) {
4851 mime_decode_mode = exit_mode;
4852 if ((c2 = (*i_mgetc)(
f)) ==
EOF)
return (
EOF);
4853 if (c1==
'?'&&c2==
'=' && mimebuf_f !=
FIXED_MIME) {
4855 input_mode = exit_mode;
4857 lwsp_buf = nkf_xmalloc((lwsp_size+5)*
sizeof(
char));
4858 while ((c1=(*i_getc)(
f))!=
EOF) {
4871 if ((c1=(*i_getc)(
f))!=
EOF && c1 ==
LF) {
4887 lwsp_buf[lwsp_count] = (
unsigned char)c1;
4888 if (lwsp_count++>lwsp_size){
4890 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*
sizeof(
char));
4891 lwsp_buf = lwsp_buf_new;
4897 if (lwsp_count > 0 && (c1 !=
'=' || (lwsp_buf[lwsp_count-1] !=
SP && lwsp_buf[lwsp_count-1] !=
TAB))) {
4899 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4900 i_ungetc(lwsp_buf[lwsp_count],
f);
4906 if (c1==
'='&&c2<
SP) {
4907 while((c1 = (*i_mgetc)(
f)) <=
SP) {
4908 if (c1 ==
EOF)
return (
EOF);
4910 mime_decode_mode =
'Q';
4911 goto restart_mime_q;
4914 mime_decode_mode =
'Q';
4918 if ((c3 = (*i_mgetc)(
f)) ==
EOF)
return (
EOF);
4919 if (c2<=
SP)
return c2;
4920 mime_decode_mode =
'Q';
4924 if (mime_decode_mode !=
'B') {
4925 mime_decode_mode =
FALSE;
4926 return (*i_mgetc)(
f);
4938 mode = mime_decode_mode;
4939 mime_decode_mode = exit_mode;
4941 while ((c1 = (*i_mgetc)(
f))<=
SP) {
4946 if ((c2 = (*i_mgetc)(
f))<=
SP) {
4953 if ((c1 ==
'?') && (c2 ==
'=')) {
4956 lwsp_buf = nkf_xmalloc((lwsp_size+5)*
sizeof(
char));
4957 while ((c1=(*i_getc)(
f))!=
EOF) {
4970 if ((c1=(*i_getc)(
f))!=
EOF) {
4989 lwsp_buf[lwsp_count] = (
unsigned char)c1;
4990 if (lwsp_count++>lwsp_size){
4992 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*
sizeof(
char));
4993 lwsp_buf = lwsp_buf_new;
4999 if (lwsp_count > 0 && (c1 !=
'=' || (lwsp_buf[lwsp_count-1] !=
SP && lwsp_buf[lwsp_count-1] !=
TAB))) {
5001 for(lwsp_count--;lwsp_count>0;lwsp_count--)
5002 i_ungetc(lwsp_buf[lwsp_count],
f);
5009 if ((c3 = (*i_mgetc)(
f))<=
SP) {
5017 if ((c4 = (*i_mgetc)(
f))<=
SP) {
5025 mime_decode_mode = mode;
5029 t1 = 0x3f & base64decode(c1);
5030 t2 = 0x3f & base64decode(c2);
5031 t3 = 0x3f & base64decode(c3);
5032 t4 = 0x3f & base64decode(c4);
5033 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
5036 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
5039 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
5049static const char basis_64[] =
5050 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
5052#define MIMEOUT_BUF_LENGTH 74
5063 const unsigned char *p;
5066 p = mime_pattern[0];
5067 for(
i=0;mime_pattern[
i];
i++) {
5068 if (mode == mime_encode[
i]) {
5069 p = mime_pattern[
i];
5073 mimeout_mode = mime_encode_method[
i];
5075 if (base64_count>45) {
5076 if (mimeout_state.count>0 &&
nkf_isblank(mimeout_state.buf[
i])){
5077 (*o_mputc)(mimeout_state.buf[
i]);
5080 put_newline(o_mputc);
5083 if (mimeout_state.count>0 &&
nkf_isspace(mimeout_state.buf[
i])) {
5087 for (;
i<mimeout_state.count;
i++) {
5089 (*o_mputc)(mimeout_state.buf[
i]);
5099 j = mimeout_state.count;
5100 mimeout_state.count = 0;
5102 mime_putc(mimeout_state.buf[
i]);
5109 if (mimeout_mode > 0){
5111 if (base64_count + mimeout_state.count/3*4> 73){
5112 (*o_base64conv)(
EOF,0);
5113 oconv_newline(o_base64conv);
5114 (*o_base64conv)(0,
SP);
5118 if ((c2 != 0 || c1 >
DEL) && base64_count + mimeout_state.count/3*4> 66) {
5119 (*o_base64conv)(
EOF,0);
5120 oconv_newline(o_base64conv);
5121 (*o_base64conv)(0,
SP);
5127 if (c2 !=
EOF && base64_count + mimeout_state.count/3*4> 60) {
5128 mimeout_mode = (output_mode==
ASCII ||output_mode ==
ISO_8859_1) ?
'Q' :
'B';
5129 open_mime(output_mode);
5130 (*o_base64conv)(
EOF,0);
5131 oconv_newline(o_base64conv);
5132 (*o_base64conv)(0,
SP);
5151 switch(mimeout_mode) {
5156 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0x3)<< 4)]);
5162 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0xF) << 2)]);
5167 if (mimeout_mode > 0) {
5170 }
else if (mimeout_mode !=
'Q')
5178 switch(mimeout_mode) {
5185 (*o_mputc)(
bin2hex(((c>>4)&0xf)));
5195 (*o_mputc)(basis_64[c>>2]);
5200 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5206 (*o_mputc)(basis_64[((nkf_state->
mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
5207 (*o_mputc)(basis_64[c & 0x3F]);
5225 if (mimeout_mode ==
'Q'){
5226 if (base64_count > 71){
5227 if (c!=
CR && c!=
LF) {
5229 put_newline(o_mputc);
5234 if (base64_count > 71){
5236 put_newline(o_mputc);
5252 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
5253 j = mimeout_state.count;
5254 mimeout_state.count = 0;
5256 if (mimeout_mode > 0) {
5259 if (
nkf_isspace(mimeout_state.buf[
i]) && base64_count < 71){
5262 mimeout_addchar(mimeout_state.buf[
i]);
5266 mimeout_addchar(mimeout_state.buf[
i]);
5270 mimeout_addchar(mimeout_state.buf[
i]);
5276 mimeout_addchar(mimeout_state.buf[
i]);
5282 if (mimeout_state.count > 0){
5283 lastchar = mimeout_state.buf[mimeout_state.count - 1];
5288 if (mimeout_mode==
'Q') {
5290 if (c ==
CR || c ==
LF) {
5295 }
else if (c <=
SP) {
5297 if (base64_count > 70) {
5298 put_newline(o_mputc);
5306 if (base64_count > 70) {
5308 put_newline(o_mputc);
5311 open_mime(output_mode);
5326 if (mimeout_mode <= 0) {
5328 output_mode ==
UTF_8)) {
5331 if (mimeout_mode == -1) {
5334 if (c==
CR || c==
LF) {
5336 open_mime(output_mode);
5342 for (
i=0;
i<mimeout_state.count;
i++) {
5343 (*o_mputc)(mimeout_state.buf[
i]);
5344 if (mimeout_state.buf[
i] ==
CR || mimeout_state.buf[
i] ==
LF){
5355 mimeout_state.buf[0] = (
char)c;
5356 mimeout_state.count = 1;
5358 if (base64_count > 1
5359 && base64_count + mimeout_state.count > 76
5360 && mimeout_state.buf[0] !=
CR && mimeout_state.buf[0] !=
LF){
5361 static const char *
str =
"boundary=\"";
5362 static int len = 10;
5365 for (;
i < mimeout_state.count -
len; ++
i) {
5372 if (
i == 0 ||
i == mimeout_state.count -
len) {
5373 put_newline(o_mputc);
5382 for (j = 0; j <=
i; ++j) {
5383 (*o_mputc)(mimeout_state.buf[j]);
5385 put_newline(o_mputc);
5387 for (; j <= mimeout_state.count; ++j) {
5388 mimeout_state.buf[j -
i] = mimeout_state.buf[j];
5390 mimeout_state.count -=
i;
5393 mimeout_state.buf[mimeout_state.count++] = (
char)c;
5395 open_mime(output_mode);
5400 if (lastchar==
CR || lastchar ==
LF){
5401 for (
i=0;
i<mimeout_state.count;
i++) {
5402 (*o_mputc)(mimeout_state.buf[
i]);
5405 mimeout_state.count = 0;
5408 for (
i=0;
i<mimeout_state.count-1;
i++) {
5409 (*o_mputc)(mimeout_state.buf[
i]);
5412 mimeout_state.buf[0] =
SP;
5413 mimeout_state.count = 1;
5415 open_mime(output_mode);
5420 output_mode ==
UTF_8)) {
5421 if (lastchar ==
CR || lastchar ==
LF){
5423 for (
i=0;
i<mimeout_state.count;
i++) {
5424 mimeout_addchar(mimeout_state.buf[
i]);
5426 mimeout_state.count = 0;
5429 for (
i=0;
i<mimeout_state.count;
i++) {
5430 (*o_mputc)(mimeout_state.buf[
i]);
5433 mimeout_state.count = 0;
5435 mimeout_state.buf[mimeout_state.count++] = (
char)c;
5439 for (
i=0;
i<mimeout_state.count;
i++) {
5440 if (
SP<mimeout_state.buf[
i] && mimeout_state.buf[
i]<
DEL) {
5442 for (
i=0;
i<mimeout_state.count;
i++) {
5443 (*o_mputc)(mimeout_state.buf[
i]);
5446 mimeout_state.count = 0;
5449 mimeout_state.buf[mimeout_state.count++] = (
char)c;
5452 for (j=0;j<mimeout_state.count;j++) {
5453 (*o_mputc)(mimeout_state.buf[j]);
5456 mimeout_state.count = 0;
5460 if (mimeout_state.count>0 &&
SP<c && c!=
'=') {
5461 mimeout_state.buf[mimeout_state.count++] = (
char)c;
5463 j = mimeout_state.count;
5464 mimeout_state.count = 0;
5466 mimeout_addchar(mimeout_state.buf[
i]);
5473 if (mimeout_state.count>0) {
5474 j = mimeout_state.count;
5475 mimeout_state.count = 0;
5477 if (mimeout_state.buf[
i]==
CR || mimeout_state.buf[
i]==
LF)
5479 mimeout_addchar(mimeout_state.buf[
i]);
5485 (*o_mputc)(mimeout_state.buf[
i]);
5487 open_mime(output_mode);
5496 mime_prechar(c2, c1);
5497 (*o_base64conv)(c2,c1);
5501typedef struct nkf_iconv_t {
5504 size_t input_buffer_size;
5505 char *output_buffer;
5506 size_t output_buffer_size;
5510nkf_iconv_new(
char *tocode,
char *fromcode)
5512 nkf_iconv_t converter;
5515 converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5516 converter->output_buffer_size =
IOBUF_SIZE * 2;
5517 converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5518 converter->cd = iconv_open(tocode, fromcode);
5519 if (converter->cd == (iconv_t)-1)
5523 perror(
fprintf(
"iconv doesn't support %s to %s conversion.", fromcode, tocode));
5526 perror(
"can't iconv_open");
5532nkf_iconv_convert(nkf_iconv_t *converter,
FILE *
input)
5534 size_t invalid = (
size_t)0;
5535 char *input_buffer = converter->input_buffer;
5536 size_t input_length = (
size_t)0;
5537 char *output_buffer = converter->output_buffer;
5538 size_t output_length = converter->output_buffer_size;
5543 while ((c = (*i_getc)(
f)) !=
EOF) {
5544 input_buffer[input_length++] = c;
5545 if (input_length < converter->input_buffer_size)
break;
5549 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5550 while (output_length-- > 0) {
5551 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5553 if (ret == (
size_t) - 1) {
5556 if (input_buffer != converter->input_buffer)
5557 memmove(converter->input_buffer, input_buffer, input_length);
5560 converter->output_buffer_size *= 2;
5561 output_buffer =
realloc(converter->outbuf, converter->output_buffer_size);
5562 if (output_buffer ==
NULL) {
5566 converter->output_buffer = output_buffer;
5582nkf_iconv_close(nkf_iconv_t *convert)
5586 iconv_close(converter->cd);
5608 mime_decode_f =
FALSE;
5614 iso2022jp_f =
FALSE;
5615#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5618#ifdef UTF8_INPUT_ENABLE
5619 no_cp932ext_f =
FALSE;
5620 no_best_fit_chars_f =
FALSE;
5621 encode_fallback =
NULL;
5622 unicode_subchar =
'?';
5625#ifdef UTF8_OUTPUT_ENABLE
5626 output_bom_f =
FALSE;
5629#ifdef UNICODE_NORMALIZATION
5645#ifdef SHIFTJIS_CP932
5655 for (
i = 0;
i < 256;
i++){
5656 prefix_table[
i] = 0;
5660 mimeout_state.count = 0;
5665 fold_preserve_f =
FALSE;
5671 o_zconv = no_connection;
5672 o_fconv = no_connection;
5673 o_eol_conv = no_connection;
5674 o_rot_conv = no_connection;
5675 o_hira_conv = no_connection;
5676 o_base64conv = no_connection;
5677 o_iso2022jp_check_conv = no_connection;
5680 i_ungetc = std_ungetc;
5682 i_bungetc = std_ungetc;
5685 i_mungetc = std_ungetc;
5686 i_mgetc_buf = std_getc;
5687 i_mungetc_buf = std_ungetc;
5688 output_mode =
ASCII;
5690 mime_decode_mode =
FALSE;
5696 z_prev2=0,z_prev1=0;
5698 iconv_for_check = 0;
5700 input_codename =
NULL;
5701 input_encoding =
NULL;
5702 output_encoding =
NULL;
5710module_connection(
void)
5712 if (input_encoding) set_input_encoding(input_encoding);
5713 if (!output_encoding) {
5714 output_encoding = nkf_default_encoding();
5716 if (!output_encoding) {
5717 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(
ISO_2022_JP);
5720 set_output_encoding(output_encoding);
5724 output_mode =
UTF_8;
5734 if (noout_f || guess_f){
5741 if (mimeout_f ==
TRUE) {
5742 o_base64conv = oconv; oconv = base64_conv;
5747 if (eolmode_f || guess_f) {
5748 o_eol_conv = oconv; oconv = eol_conv;
5751 o_rot_conv = oconv; oconv = rot_conv;
5754 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5757 o_hira_conv = oconv; oconv = hira_conv;
5760 o_fconv = oconv; oconv = fold_conv;
5763 if (alpha_f || x0201_f) {
5764 o_zconv = oconv; oconv = z_conv;
5768 i_ungetc = std_ungetc;
5772 i_cgetc = i_getc; i_getc = cap_getc;
5773 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5776 i_ugetc = i_getc; i_getc = url_getc;
5777 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5780#ifdef NUMCHAR_OPTION
5782 i_ngetc = i_getc; i_getc = numchar_getc;
5783 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5786#ifdef UNICODE_NORMALIZATION
5788 i_nfc_getc = i_getc; i_getc = nfc_getc;
5789 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5793 i_mgetc = i_getc; i_getc = mime_getc;
5794 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5797 i_bgetc = i_getc; i_getc = broken_getc;
5798 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5800 if (input_encoding) {
5803 set_iconv(
FALSE, e_iconv);
5819#if !defined(PERL_XS) && !defined(WIN32DLL)
5826 module_connection();
5827 while ((c = (*i_getc)(
f)) !=
EOF)
5834#define NEXT continue
5835#define SKIP c2=0;continue
5836#define MORE c2=c1;continue
5839#define set_input_mode(mode) do { \
5840 input_mode = mode; \
5842 set_input_codename("ISO-2022-JP"); \
5843 debug("ISO-2022-JP"); \
5847kanji_convert(
FILE *
f)
5852 int is_8bit =
FALSE;
5859 output_mode =
ASCII;
5861 if (module_connection() < 0) {
5862#if !defined(PERL_XS) && !defined(WIN32DLL)
5869#ifdef UTF8_INPUT_ENABLE
5870 if(iconv == w_iconv32){
5871 while ((c1 = (*i_getc)(
f)) !=
EOF &&
5872 (c2 = (*i_getc)(
f)) !=
EOF &&
5873 (c3 = (*i_getc)(
f)) !=
EOF &&
5874 (c4 = (*i_getc)(
f)) !=
EOF) {
5877 if ((c5 = (*i_getc)(
f)) !=
EOF &&
5878 (c6 = (*i_getc)(
f)) !=
EOF &&
5879 (c7 = (*i_getc)(
f)) !=
EOF &&
5880 (c8 = (*i_getc)(
f)) !=
EOF) {
5881 if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) {
5886 nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5889 nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5895 else if (iconv == w_iconv16) {
5896 while ((c1 = (*i_getc)(
f)) !=
EOF &&
5897 (c2 = (*i_getc)(
f)) !=
EOF) {
5898 size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0);
5900 (c3 = (*i_getc)(
f)) !=
EOF &&
5901 (c4 = (*i_getc)(
f)) !=
EOF) {
5902 nkf_iconv_utf_16(c1, c2, c3, c4);
5904 if ((c3 = (*i_getc)(
f)) !=
EOF &&
5905 (c4 = (*i_getc)(
f)) !=
EOF) {
5906 if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) {
5909 nkf_iconv_utf_16_nocombine(c1, c2);
5912 nkf_iconv_utf_16_nocombine(c1, c2);
5920 while ((c1 = (*i_getc)(
f)) !=
EOF) {
5921#ifdef INPUT_CODE_FIX
5922 if (!input_encoding)
5929 if (!estab_f&&!mime_decode_mode) {
5932 if (h_conv(
f, c2, c1)==
EOF) {
5963 }
else if (input_codename && input_codename[0] ==
'I' &&
5964 0xA1 <= c1 && c1 <= 0xDF) {
5969 }
else if (c1 >
DEL) {
5971 if (!estab_f && !iso8859_f) {
5980 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5992 }
else if (
SP < c1 && c1 <
DEL) {
6011 }
else if (c1 ==
'=' && mime_f && !mime_decode_mode) {
6013 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6016 }
else if (c1 ==
'?') {
6020 if (mime_begin_strict(
f) ==
EOF)
6023 }
else if (mime_begin(
f) ==
EOF)
6035 }
else if (c1 ==
SI && (!is_8bit || mime_decode_mode)) {
6038 }
else if (c1 ==
SO && (!is_8bit || mime_decode_mode)) {
6041 }
else if (c1 ==
ESC && (!is_8bit || mime_decode_mode)) {
6042 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6046 else if (c1 ==
'&') {
6048 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6054 else if (c1 ==
'$') {
6056 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6061 }
else if (c1 ==
'@' || c1 ==
'B') {
6065 }
else if (c1 ==
'(') {
6067 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6074 }
else if (c1 ==
'@'|| c1 ==
'B') {
6079 }
else if (c1 ==
'D'){
6083 }
else if (c1 ==
'O' || c1 ==
'Q'){
6086 }
else if (c1 ==
'P'){
6097 }
else if (broken_f&0x2) {
6108 }
else if (c1 ==
'(') {
6110 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6116 else if (c1 ==
'I') {
6122 else if (c1 ==
'B' || c1 ==
'J' || c1 ==
'H') {
6127 else if (broken_f&0x2) {
6137 else if (c1 ==
'.') {
6139 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6142 else if (c1 ==
'A') {
6153 else if (c1 ==
'N') {
6172 }
else if (c1 ==
ESC && iconv == s_iconv) {
6174 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6177 }
else if (c1 ==
'$') {
6179 if ((c1 = (*i_getc)(
f)) ==
EOF) {
6181 }
else if ((
'E' <= c1 && c1 <=
'G') ||
6182 (
'O' <= c1 && c1 <=
'Q')) {
6190 static const nkf_char jphone_emoji_first_table[7] =
6191 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
6193 if ((c1 = (*i_getc)(
f)) ==
EOF)
LAST;
6194 while (
SP <= c1 && c1 <=
'z') {
6195 (*oconv)(0, c1 + c3);
6196 if ((c1 = (*i_getc)(
f)) ==
EOF)
LAST;
6212 }
else if (c1 ==
LF || c1 ==
CR) {
6216 }
else if (mime_decode_f && !mime_decode_mode){
6218 if ((c1=(*i_getc)(
f))!=
EOF && c1 ==
SP) {
6227 if ((c1=(*i_getc)(
f))!=
EOF) {
6231 }
else if (c1 ==
LF && (c1=(*i_getc)(
f))!=
EOF && c1 ==
SP) {
6251 switch ((*iconv)(c2, c1, 0)) {
6254 if ((c3 = (*i_getc)(
f)) !=
EOF) {
6257 if ((c4 = (*i_getc)(
f)) !=
EOF) {
6259 (*iconv)(c2, c1, c3|c4);
6265 if ((c3 = (*i_getc)(
f)) !=
EOF) {
6266 if ((c4 = (*i_getc)(
f)) !=
EOF) {
6267 if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) {
6270 w_iconv_nocombine(c2, c1, 0);
6274 w_iconv_nocombine(c2, c1, 0);
6277 w_iconv_nocombine(c2, c1, 0);
6282 if ((c3 = (*i_getc)(
f)) !=
EOF) {
6284 if ((*iconv)(c2, c1, c3) == -3) {
6287 if ((c4 = (*i_getc)(
f)) !=
EOF) {
6288 if ((c5 = (*i_getc)(
f)) !=
EOF) {
6289 if ((c6 = (*i_getc)(
f)) !=
EOF) {
6290 if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) {
6294 w_iconv_nocombine(c2, c1, c3);
6299 w_iconv_nocombine(c2, c1, c3);
6303 w_iconv_nocombine(c2, c1, c3);
6306 w_iconv_nocombine(c2, c1, c3);
6316 0x7F <= c2 && c2 <= 0x92 &&
6317 0x21 <= c1 && c1 <= 0x7E) {
6333 (*oconv)(input_mode, c1);
6344 (*iconv)(
EOF, 0, 0);
6345 if (!input_codename)
6354 set_input_codename(result->
name);
6371options(
unsigned char *cp)
6375 unsigned char *cp_back =
NULL;
6380 while(*cp && *cp++!=
'-');
6381 while (*cp || cp_back) {
6390 if (!*cp || *cp ==
SP) {
6394 for (
i=0;
i<(
int)(
sizeof(long_option)/
sizeof(long_option[0]));
i++) {
6395 p = (
unsigned char *)long_option[
i].
name;
6396 for (j=0;*p && *p !=
'=' && *p == cp[j];p++, j++);
6397 if (*p == cp[j] || cp[j] ==
SP){
6404#if !defined(PERL_XS) && !defined(WIN32DLL)
6409 while(*cp && *cp !=
SP && cp++);
6410 if (long_option[
i].
alias[0]){
6412 cp = (
unsigned char *)long_option[
i].
alias;
6421 enc = nkf_enc_find((
char *)p);
6423 input_encoding = enc;
6427 enc = nkf_enc_find((
char *)p);
6430 output_encoding = enc;
6434 if (p[0] ==
'0' || p[0] ==
'1') {
6442 if (
strcmp(long_option[
i].
name,
"overwrite") == 0){
6445 preserve_time_f =
TRUE;
6448 if (
strcmp(long_option[
i].
name,
"overwrite=") == 0){
6451 preserve_time_f =
TRUE;
6453 backup_suffix = (
char *)p;
6456 if (
strcmp(long_option[
i].
name,
"in-place") == 0){
6459 preserve_time_f =
FALSE;
6462 if (
strcmp(long_option[
i].
name,
"in-place=") == 0){
6465 preserve_time_f =
FALSE;
6467 backup_suffix = (
char *)p;
6472 if (
strcmp(long_option[
i].
name,
"cap-input") == 0){
6476 if (
strcmp(long_option[
i].
name,
"url-input") == 0){
6481#ifdef NUMCHAR_OPTION
6482 if (
strcmp(long_option[
i].
name,
"numchar-input") == 0){
6488 if (
strcmp(long_option[
i].
name,
"no-output") == 0){
6498#ifdef SHIFTJIS_CP932
6502#ifdef UTF8_OUTPUT_ENABLE
6507 if (
strcmp(long_option[
i].
name,
"no-cp932") == 0){
6508#ifdef SHIFTJIS_CP932
6512#ifdef UTF8_OUTPUT_ENABLE
6517#ifdef SHIFTJIS_CP932
6518 if (
strcmp(long_option[
i].
name,
"cp932inv") == 0){
6536 if (
strcmp(long_option[
i].
name,
"exec-out") == 0){
6541#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6542 if (
strcmp(long_option[
i].
name,
"no-cp932ext") == 0){
6543 no_cp932ext_f =
TRUE;
6546 if (
strcmp(long_option[
i].
name,
"no-best-fit-chars") == 0){
6547 no_best_fit_chars_f =
TRUE;
6551 encode_fallback =
NULL;
6555 encode_fallback = encode_fallback_html;
6559 encode_fallback = encode_fallback_xml;
6563 encode_fallback = encode_fallback_java;
6567 encode_fallback = encode_fallback_perl;
6570 if (
strcmp(long_option[
i].
name,
"fb-subchar") == 0){
6571 encode_fallback = encode_fallback_subchar;
6574 if (
strcmp(long_option[
i].
name,
"fb-subchar=") == 0){
6575 encode_fallback = encode_fallback_subchar;
6576 unicode_subchar = 0;
6580 unicode_subchar *= 10;
6583 }
else if(p[1] ==
'x' || p[1] ==
'X'){
6586 unicode_subchar <<= 4;
6592 unicode_subchar *= 8;
6596 w16e_conv(unicode_subchar, &
i, &j);
6597 unicode_subchar =
i<<8 | j;
6601#ifdef UTF8_OUTPUT_ENABLE
6602 if (
strcmp(long_option[
i].
name,
"ms-ucs-map") == 0){
6607#ifdef UNICODE_NORMALIZATION
6608 if (
strcmp(long_option[
i].
name,
"utf8mac-input") == 0){
6616 prefix_table[p[
i]] = p[0];
6621#if !defined(PERL_XS) && !defined(WIN32DLL)
6638 }
else if (*cp==
'2') {
6652 output_encoding = nkf_enc_from_index(
ISO_2022_JP);
6655 output_encoding = nkf_enc_from_index(
EUCJP_NKF);
6658 output_encoding = nkf_enc_from_index(
SHIFT_JIS);
6662 input_encoding = nkf_enc_from_index(
ISO_8859_1);
6665 if (*cp==
'@'||*cp==
'B')
6666 kanji_intro = *cp++;
6670 if (*cp==
'J'||*cp==
'B'||*cp==
'H')
6671 ascii_intro = *cp++;
6678 if (
'9'>= *cp && *cp>=
'0')
6679 hira_f |= (*cp++ -
'0');
6686#if defined(MSDOS) || defined(__OS2__)
6693 show_configuration();
6701#ifdef UTF8_OUTPUT_ENABLE
6707 output_encoding = nkf_enc_from_index(
UTF_8N);
6709 output_bom_f =
TRUE;
6710 output_encoding = nkf_enc_from_index(
UTF_8_BOM);
6714 if (
'1'== cp[0] &&
'6'==cp[1]) {
6717 }
else if (
'3'== cp[0] &&
'2'==cp[1]) {
6721 output_encoding = nkf_enc_from_index(
UTF_8);
6727 output_bom_f =
TRUE;
6728 }
else if (cp[0] ==
'B') {
6730 output_bom_f =
TRUE;
6733 output_bom_f =
FALSE;
6735 enc_idx = enc_idx ==
UTF_16
6739 enc_idx = enc_idx ==
UTF_16
6743 output_encoding = nkf_enc_from_index(enc_idx);
6747#ifdef UTF8_INPUT_ENABLE
6751 input_encoding = nkf_enc_from_index(
UTF_8);
6754 if (
'1'== cp[0] &&
'6'==cp[1]) {
6758 }
else if (
'3'== cp[0] &&
'2'==cp[1]) {
6763 input_encoding = nkf_enc_from_index(
UTF_8);
6769 }
else if (cp[0] ==
'B') {
6773 enc_idx = (enc_idx ==
UTF_16
6776 input_encoding = nkf_enc_from_index(enc_idx);
6785 input_encoding = nkf_enc_from_index(
EUCJP_NKF);
6788 input_encoding = nkf_enc_from_index(
SHIFT_JIS);
6798 while (
'0'<= *cp && *cp <=
'4') {
6799 alpha_f |= 1 << (*cp++ -
'0');
6820 fold_preserve_f =
TRUE;
6824 while(
'0'<= *cp && *cp <=
'9') {
6826 fold_len += *cp++ -
'0';
6828 if (!(0<fold_len && fold_len<
BUFSIZ))
6833 while(
'0'<= *cp && *cp <=
'9') {
6835 fold_margin += *cp++ -
'0';
6841 if (*cp==
'B'||*cp==
'Q') {
6842 mime_decode_mode = *cp++;
6844 }
else if (*cp==
'N') {
6845 mime_f =
TRUE; cp++;
6846 }
else if (*cp==
'S') {
6848 }
else if (*cp==
'0') {
6849 mime_decode_f =
FALSE;
6850 mime_f =
FALSE; cp++;
6859 }
else if (*cp==
'Q') {
6871 if (
'9'>= *cp && *cp>=
'0')
6872 broken_f |= 1<<(*cp++ -
'0');
6892 eolmode_f =
LF; cp++;
6893 }
else if (*cp==
'm') {
6894 eolmode_f =
CR; cp++;
6895 }
else if (*cp==
'w') {
6896 eolmode_f =
CRLF; cp++;
6897 }
else if (*cp==
'0') {
6898 eolmode_f = 0; cp++;
6903 if (
'2' <= *cp && *cp <=
'9') {
6906 }
else if (*cp ==
'0' || *cp ==
'1') {
6916 while(*cp && *cp++!=
'-');
6919#if !defined(PERL_XS) && !defined(WIN32DLL)
6930#include "nkf32dll.c"
6931#elif defined(PERL_XS)
6939 char *outfname =
NULL;
6943 _BufferSize.y = 400;
6945#ifdef DEFAULT_CODE_LOCALE
6946 setlocale(LC_CTYPE,
"");
6951 cp = (
unsigned char *)*
argv;
6956 if (
pipe(fds) < 0 || (pid =
fork()) < 0){
6984 int debug_f_back = debug_f;
6987 int exec_f_back = exec_f;
6990 int x0212_f_back = x0212_f;
6992 int x0213_f_back = x0213_f;
6993 int guess_f_back = guess_f;
6995 guess_f = guess_f_back;
6998 debug_f = debug_f_back;
7001 exec_f = exec_f_back;
7003 x0212_f = x0212_f_back;
7004 x0213_f = x0213_f_back;
7007 if (binmode_f ==
TRUE)
7008#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7021 if (binmode_f ==
TRUE)
7022#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7031 kanji_convert(
stdin);
7032 if (guess_f) print_guessed_code(
NULL);
7036 int is_argument_error =
FALSE;
7038 input_codename =
NULL;
7041 iconv_for_check = 0;
7045 is_argument_error =
TRUE;
7054 if (file_out_f ==
TRUE) {
7057 outfname = nkf_xmalloc(
strlen(origfname)
7058 +
strlen(
".nkftmpXXXXXX")
7060 strcpy(outfname, origfname);
7065 if (outfname[
i - 1] ==
'/'
7066 || outfname[
i - 1] ==
'\\'){
7072 strcat(outfname,
"ntXXXXXX");
7074 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
7077 strcat(outfname,
".nkftmpXXXXXX");
7093 outfname =
"nkf.out";
7100 if (binmode_f ==
TRUE) {
7101#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7109 if (binmode_f ==
TRUE)
7110#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7120 char *filename =
NULL;
7122 if (nfiles > 1) filename = origfname;
7123 if (guess_f) print_guessed_code(filename);
7129#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7140 if (
stat(origfname, &sb)) {
7149 if(preserve_time_f){
7150#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7151 tb[0] = tb[1] = sb.st_mtime;
7152 if (utime(outfname, tb)) {
7156 tb.actime = sb.st_atime;
7157 tb.modtime = sb.st_mtime;
7158 if (utime(outfname, &tb)) {
7164 char *backup_filename = get_backup_filename(backup_suffix, origfname);
7168 if (
rename(origfname, backup_filename)) {
7171 origfname, backup_filename);
7181 if (
rename(outfname, origfname)) {
7184 outfname, origfname);
7191 if (is_argument_error)
7195 if (file_out_f ==
FALSE)
7196 scanf(
"%d",&end_check);
7200 if (file_out_f ==
TRUE)
#define UTF8_OUTPUT_ENABLE
#define UTF8_INPUT_ENABLE
#define range(low, item, hi)
char str[HTML_ESCAPE_MAX_LEN+1]
#define nkf_enc_to_iconv(enc)
#define nkf_char_unicode_value_p(c)
nkf_native_encoding NkfEncodingISO_2022_JP
#define nkf_char_unicode_p(c)
nkf_native_encoding NkfEncodingShift_JIS
#define set_input_mode(mode)
nkf_native_encoding NkfEncodingUTF_8
nkf_native_encoding NkfEncodingEUC_JP
int main(int argc, char **argv)
#define X0213_SURROGATE_FIND(tbl, size, euc)
#define char_size(c2, c1)
struct input_code input_code_list[]
nkf_native_encoding NkfEncodingUTF_32
#define MIMEOUT_BUF_LENGTH
#define nkf_enc_to_oconv(enc)
#define nkf_buf_empty_p(buf)
#define mime_input_buf(n)
nkf_native_encoding NkfEncodingASCII
#define nkf_buf_length(buf)
#define output_ascii_escape_sequence(mode)
#define nkf_enc_cp5022x_p(enc)
nkf_char(* mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0)
#define is_ibmext_in_sjis(c2)
nkf_encoding nkf_encoding_table[]
#define NKF_ICONV_NOT_COMBINED
#define nkf_char_unicode_bmp_p(c)
#define NKF_ICONV_NEED_TWO_MORE_BYTES
@ NKF_ENCODING_TABLE_SIZE
#define UTF16_TO_UTF32(lead, trail)
unsigned char buf[MIME_BUF_SIZE]
#define nkf_char_unicode_new(c)
nkf_native_encoding NkfEncodingUTF_16
#define nkf_enc_to_index(enc)
#define nkf_enc_name(enc)
#define OUTPUT_UTF16_BYTES(c1, c2)
#define nkf_enc_asciicompat(enc)
#define NKF_ICONV_WAIT_COMBINING_CHAR
#define nkf_enc_unicode_p(enc)
#define OUTPUT_UTF16(val)
#define nkf_byte_jisx0201_katakana_p(c)
#define nkf_noescape_mime(c)
#define NKF_ICONV_INVALID_CODE_RANGE
struct @116 encoding_name_to_id_table[]
#define setvbuffer(fp, buf, size)
#define MIME_DECODE_DEFAULT
#define DEFAULT_CODE_LOCALE
const nkf_native_encoding * base_encoding
const unsigned short *const *const utf8_to_euc_3bytes[]
const unsigned short *const *const utf8_to_euc_3bytes_mac[]
const unsigned short *const x0212_to_utf8_2bytes[]
const unsigned short euc_to_utf8_1byte[]
const unsigned short *const x0212_shiftjis[]
const unsigned short shiftjis_x0212[3][189]
const unsigned short *const utf8_to_euc_2bytes_x0213[]
const struct normalization_pair normalization_table[]
const unsigned short *const *const utf8_to_euc_3bytes_x0213[]
const unsigned short *const *const utf8_to_euc_3bytes_ms[]
const unsigned short *const utf8_to_euc_2bytes[]
const unsigned short *const euc_to_utf8_2bytes_x0213[]
const unsigned short x0213_combining_chars[sizeof_x0213_combining_chars]
const unsigned short x0213_combining_table[sizeof_x0213_combining_table][3]
const unsigned short *const *const utf8_to_euc_3bytes_932[]
const unsigned short *const euc_to_utf8_2bytes[]
const unsigned short *const euc_to_utf8_2bytes_mac[]
const unsigned short x0213_1_surrogate_table[sizeof_x0213_1_surrogate_table][3]
const unsigned short cp932inv[2][189]
const unsigned short *const utf8_to_euc_2bytes_ms[]
const unsigned short *const euc_to_utf8_2bytes_ms[]
const unsigned short *const utf8_to_euc_2bytes_932[]
const unsigned short *const x0212_to_utf8_2bytes_x0213[]
const unsigned short *const utf8_to_euc_2bytes_mac[]
const unsigned short shiftjis_cp932[3][189]
const unsigned short x0213_2_surrogate_table[sizeof_x0213_2_surrogate_table][3]