Any codec to UTF-8
#-*- coding: utf-8 -*- import sys import os def file_enc(path): import chardet with open( path, "rb" ) as f: return chardet.detect(f.read()).get('encoding') def py2_euc2utf(in_file, out_file): with open(in_file, "r") as f: euc = f.read().decode('cp949') #encode('cp949').decode('cp437') #euc = f.read().decode('euc_kr') #encode('cp949').decode('cp437') utf = euc.encode('utf-8') with open(out_file, "w") as w: w.write(utf) def py3_euc2utf(in_file, out_file): with open(in_file, "r", encoding=file_enc(in_file)) as f: utf = f.read() with open(out_file, "w", encoding="utf-8") as w: w.write(utf) def euc2utf(in_file, out_file): if sys.version_info.major == 3: py3_euc2utf(in_file, out_file + '.py3') else: py2_euc2utf(in_file, out_file + '.py2') def filelist(path): from os import listdir from os.path import isfile, join return [f for f in listdir(path) if isfile(join(path, f))] if __name__ == "__main__": in_dir = r"d:/Ebook/가림토txt" out_dir = r"d:Ebook/가림토txt_utf" if not os.path.exists(out_dir): os.mkdir(out_dir) files = filelist(in_dir) for f in files: print(f, file_enc(os.path.join(in_dir,f))) try: euc2utf(os.path.join(in_dir,f),os.path.join(out_dir,f)) except: print(f, "--------------> ERROR")
댓글 없음:
댓글 쓰기