Python教程

python 将文件 批量转换编码

本文主要是介绍python 将文件 批量转换编码,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!

文件批量转换编码    原始基础代码来自互联网   在其基础上 改了改  做了一些适配   记录下 只用了  code2code 这个方法 其他没用到  也就没去动了。

# -*- coding:utf-8 -*-
# @Time : 2022/3/22 20:04
# @Author: zhcode
# @File : convert_file_encode.py
import chardet
import codecs
import os
import sys
import time
 
CURRENT_PATH = os.path.abspath('.')
 
def code2code(filename, encode_out):
    print("transfer filename " + filename + " ... ")
    fileencoding = chardet.detect(open(filename, "rb").read())
    fileencode = fileencoding['encoding']
    if fileencode == encode_out:
        print("encode is no need to transfer...")
        return

    if fileencode:
        print("transfer detect codetype = " + fileencode)
    else:
        print(fileencode)
        return 

    with codecs.open(filename, mode='r', encoding=fileencode) as fi:
        data = fi.read()
        with codecs.open(filename, mode='w', encoding=encode_out) as fo:
            fo.write(data)
    return os.path.basename(filename), fileencode
 
 
def main():
    try:
        if len(sys.argv) <= 1:
            convert()
        elif sys.argv[1] == '-h':
            helper()
        else:
            argv = sys.argv[1:]
            args = [(op, argv[i + 1]) for i, op in enumerate(argv) if i % 2 == 0]
            keys = list(map(lambda x: x[0], args))
            values = list(map(lambda x: x[1], args))
 
            encode_out = "gb2312"
            dir_path = None
            if '-h' in keys:
                helper()
            if '-f' in keys:
                t = values[keys.index('-f')]
            if '-d' in keys:
                dir_path = values[keys.index('-d')]
            convert(f=encode_out, d=dir_path)
 
            print('Finish 转换完毕')
 
    except KeyboardInterrupt:
        print("\ngoodbye.")
    except Exception as ex:
        print(ex)
        exit(1)
 
 
def time_format(time_diff):
    add_zero = lambda t: "0{}".format(t) if t < 10 else t
    return "{}:{}:{}".format(add_zero(int(time_diff / 3600)), add_zero(int((time_diff % 3600) / 60)),
                             add_zero(int(time_diff % 60)))
 
 
def traverse_dir(file_dir):
    """
    Traverse the specific folder and return picture's name list.
    :param file_dir: Traverse folder name
    :return: picture's name list
    """
    file_path_list = []
    try:
        for root, dirs, files in os.walk(file_dir):
            # print(root, dirs, files)
            for file in files:
                if os.path.splitext(file)[1] not in [".py", ".class", ".gif", ".png", ".jpg", ".project",]:
                    file_path_list.append(os.path.join(root, file))
    except Exception as ex:
        print("文件路径不正确!")
 
    return file_path_list
 
 
def convert(f=None, d=None):
    encode_out = f
    dir_path = None
    if not d:
        dir_path = CURRENT_PATH
    else:
        if os.path.isdir(d):
            dir_path = d
        else:
            raise Exception("该路径不是一个文件夹.")
 
    start_time = time.time()
    file_path_list = traverse_dir(dir_path)
    len_pics = len(file_path_list)
    # iterate filename
    # print(f_dir, " ", d_dir)
    for i in range(len_pics):
        time_diff = int(time.time() - start_time)
        time_eat = time_format(time_diff)
 
        file_name, file_encode = code2code(file_path_list[i], encode_out)
 
        progressbar(i + 1, len_pics, 50, r"{time_eat} {file_name} {file_encode}")
 
 
def progressbar(curr, total, duration=10, extra=''):
    """
    show the progress bar
    :param curr:
    :param total:
    :param duration:
    :param extra:
    :return:
    """

    print(curr)
    print(total)
    frac = curr / total
    print(frac)
    # filled = int(round(frac * duration))
    filled = round(frac * duration)

    ffff = int(filled)
    tstr = ''
    for x in xrange(1,ffff):
        # pass
        tstr = tstr + '#'

    tstr1 = ''
    for x in xrange(1,duration - ffff):
        # pass
        tstr1 = tstr1 + ' '

    
    # print('{0:.1f}'.format(frac))
    print('\r'+tstr + tstr1 + '[0~{0:.0f}]'.format(frac*duration))
    sys.stdout.flush()
 
 
def helper():
    app_name = sys.argv[0]
    app_name = "./{}".format(os.path.split(app_name)[-1])
    print("====== Image format conversion ======")
    print(r'{app_name}                  # 将当前文件夹下文件格式转换为gb2312格式.')
    print(r'{app_name} -f <type>        # 设置转换的编码格式')
    print(r'{app_name} -d <dirname>     # 设置转换文件的路径')
    print(r'{app_name} -h               # 帮助')
 
 
if __name__ == '__main__':
    # main()
    file_name_list = traverse_dir(CURRENT_PATH)
    
    print("file_name_list = ",len(file_name_list))
    index = 0
    for name in file_name_list:
        index = index + 1
        print(code2code(name, "utf-8"))
        # progressbar(index, len(file_name_list), duration=100, extra='')
 

 

这篇关于python 将文件 批量转换编码的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!