Any codec to UTF-8
#-*- coding: utf-8 -*-
import sys
import os
def file_enc(path):
import chardet
with open( path, "rb" ) as f:
return chardet.detect(f.read()).get('encoding')
def py2_euc2utf(in_file, out_file):
with open(in_file, "r") as f:
euc = f.read().decode('cp949') #encode('cp949').decode('cp437')
#euc = f.read().decode('euc_kr') #encode('cp949').decode('cp437')
utf = euc.encode('utf-8')
with open(out_file, "w") as w:
w.write(utf)
def py3_euc2utf(in_file, out_file):
with open(in_file, "r", encoding=file_enc(in_file)) as f:
utf = f.read()
with open(out_file, "w", encoding="utf-8") as w:
w.write(utf)
def euc2utf(in_file, out_file):
if sys.version_info.major == 3: py3_euc2utf(in_file, out_file + '.py3')
else: py2_euc2utf(in_file, out_file + '.py2')
def filelist(path):
from os import listdir
from os.path import isfile, join
return [f for f in listdir(path) if isfile(join(path, f))]
if __name__ == "__main__":
in_dir = r"d:/Ebook/가림토txt"
out_dir = r"d:Ebook/가림토txt_utf"
if not os.path.exists(out_dir):
os.mkdir(out_dir)
files = filelist(in_dir)
for f in files:
print(f, file_enc(os.path.join(in_dir,f)))
try:
euc2utf(os.path.join(in_dir,f),os.path.join(out_dir,f))
except:
print(f, "--------------> ERROR")