发现一个小众的库:PyPDF2。
可以用它非常方便地拆分、合并、调整PDF文件页面。
比如:
from PyPDF2 import PdfFileReader, PdfFileWriter import os import re def split_pdf(infn, outfn): pdf_input = PdfFileReader(open(infn, 'rb')) # 获取 pdf 共用多少页 page_count = pdf_input.getNumPages() print(page_count) #将每一页单独输出一个页面 for i in range(page_count): pdf_output = PdfFileWriter() pdf_output.addPage(pdf_input.getPage(i)) pdf_output.write(open(outfn + str(i)+".pdf", 'wb')) def merge_pdf(infnList, outfn): pdf_output = PdfFileWriter() total_page = 0 #sort infnList fns = lambda s: sum(((s,int(n))for s,n in re.findall('(\D+)(\d+)','a%s0'%s)),()) infnList = sorted(infnList, key=fns) for infn in infnList: try: pdf_input = PdfFileReader(open(infn, 'rb')) except: print("Error at ", infn) # 获取 pdf 共用多少页 page_count = pdf_input.getNumPages() print(infn, " pages: ", page_count) for i in range(page_count): pdf_output.addPage(pdf_input.getPage(i)) pdf_output.addBookmark(infn,total_page) total_page += page_count print("Total pages: ", pdf_output.getNumPages()) pdf_output.write(open(outfn, 'wb')) def walk_files(path,endpoint=None): ''' 遍历所有文件夹下的文件 ''' file_list = [] for root,dirs,files in os.walk(path): for file in files: file_path = os.path.join(root,file) if file_path.endswith(endpoint): file_list.append(file_path) return file_list if __name__ == '__main__': infn = 'infn.pdf' outfn = 'outfn.pdf' #split_pdf(infn, outfn) merge_pdf(walk_files("[FOLDER_NAME]",endpoint=".pdf"), outfn)
这个库还在缓慢开发中,文档也不是很全。基本功能还是有的。更多情况去PyPDF2官网了解吧:Home page for the PyPDF2 projecthttps://mstamy2.github.io/PyPDF2/#documentation