Python教程

Python自动化办公--pdf操作

本文主要是介绍Python自动化办公--pdf操作,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!

一、pdf基本操作

from PyPDF2 import PdfFileReader, PdfFileWriter


def extract_information(pdf_path):
    with open(pdf_path, 'rb') as fp:
        pdf = PdfFileReader(fp)
        information = pdf.getDocumentInfo()

    return information


def rotate_page(pdf_path):
    pdf_reader = PdfFileReader(pdf_path)
    # 瞬时间旋转90度
    page1 = pdf_reader.getPage(0)
    # page1 = page1.rotateClockwise(90)
    page1 = page1.rotateCounterClockwise(90)
    pdf_writer = PdfFileWriter()
    pdf_writer.addPage(page1)

    with open('rotate_pdf1.pdf', 'wb') as fp:
        pdf_writer.write(fp)


# 合并操作
def merge_pdfs(paths, output):
    pdf_writer = PdfFileWriter()

    for path in paths:
        pdf_reader = PdfFileReader(path)
        page_number = pdf_reader.getNumPages()
        for page in range(page_number):
            pdf_writer.addPage(pdf_reader.getPage(page))

    with open(output, 'wb') as fp:
        pdf_writer.write(fp)

# 拆解操作
def split_pdf(path, name_of_split):
    pdf = PdfFileReader(path)

    page_num = pdf.getNumPages()
    for page in range(page_num):
        pdf_writer = PdfFileWriter()
        pdf_writer.addPage(pdf.getPage(page))

        out_put = f'{name_of_split}_{page}.pdf'
        print(out_put)
        with open(out_put, 'wb') as fp:
            pdf_writer.write(fp)

if __name__ == '__main__':
    # information = extract_information('C:\\Users\dongys_z\1.pdf')
    # print(information)

    # rotate_page('C:\\Users\dongys_z\1.pdf')


    # merge_pdfs(['C:\\Users\dongys_z\1.pdf',
    #             'C:\\Users\dongys_z\简历.pdf'], 'total.pdf')

    split_pdf('total.pdf', 'split')

二、pdf内容读取

import pdfplumber as pb
from PyPDF2 import PdfFileReader, PdfFileWriter

def transform_txt(path, output):
    pdf = pb.open(path)

    page_number = len(pdf.pages)

    fp = open(output, 'w', encoding='utf-8')

    for i in range(page_number):
        page = pdf.pages[i]
        text = page.extract_text()
        fp.write(text)

    fp.close()

def create_watermak(input_path, output_path, watermark):
    watermark_obj = PdfFileReader(watermark)
    watermark_page = watermark_obj.getPage(0)

    pdf_reader = PdfFileReader(input_path)
    pdf_writer = PdfFileWriter()

    for i in range(pdf_reader.getNumPages()):
        page = pdf_reader.getPage(i)
        page.mergePage(watermark_page)
        page.compressContentStreams()
        pdf_writer.addPage(page)

    with open(output_path, 'wb') as fp:
        pdf_writer.write(fp)


if __name__ == '__main__':
    # transform_txt('total.pdf', 'total.txt')

    create_watermak('total.pdf', 'QFmarkerpdf.pdf', 'merge_pdf.pdf')

 

这篇关于Python自动化办公--pdf操作的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!