将pdf的第一页即为封面转为图片。
所有pdf在同一个目录下,
生成的图片在同目录下的img文件夹内。
图片命名即为pdf的名字。
pip install PyPDF2
pip install pdf2image
同时下载poppler,下载地址是:
https://blog.alivate.com.au/wp-content/uploads/2018/08/poppler-0.67.0_x86.7z
解压压缩包,将poppler/bin/ 目录添加至电脑的path的环境变量里。
注意一定要重启!否则没有生效
from PyPDF2 import PdfFileReader, PdfFileWriter import glob import os from pdf2image import convert_from_path import shutil def pdf2image2(pdfPath, imagePath): images = convert_from_path(pdfPath, dpi=96) for image in images: if not os.path.exists(imagePath): os.makedirs(imagePath) pngname=pdfPath[6:-4] image.save(imagePath+'/'+pngname+'.png', 'PNG') def process_bar(no, total_length): bar = '\r' + str(no) + '|' + str(total_length) print(bar, end='', flush=True) def split_combine(path, pdf_writer): pdf = PdfFileReader(path, strict=False) # lastest page page = pdf.getPage(0) pdf_writer.addPage(page) if __name__ == '__main__': # get curren dir pdf files pdf_list = glob.glob('*.pdf') pdf_writer = PdfFileWriter() imgpath="./img/" tmppath="./tmp/" if not os.path.exists(imgpath): os.makedirs(imgpath) if not os.path.exists(tmppath): os.makedirs(tmppath) for i, pdf_file in enumerate(pdf_list): process_bar(i + 1, len(pdf_list)) split_combine(pdf_file, pdf_writer) with open(tmppath+pdf_file, 'wb') as output_pdf: pdf_writer.write(output_pdf) pdf2image2(tmppath+pdf_file, imgpath) shutil.rmtree(tmppath)