注:安装python-docx也可能出现以下问题:
这里附上查找到的一个比较好的解决方法.
① python-docx 提取文字
from docx import Document doc = Document(r"D:\PythonCode\test.docx") print(doc.paragraphs) for paragraph in doc.paragraphs: print(paragraph.text)
② python-docx 提取文字块
from docx import Document doc = Document(r"D:\PythonCode\test.docx") print(doc.paragraphs) paragraph = doc.paragraphs[0] runs = paragraph.runs print(runs) for run in paragraph.runs: print(run.text) paragraph = doc.paragraphs[1] runs = paragraph.runs print(runs) for run in paragraph.runs: print(run.text)
① 添加段落
from docx import Document doc = Document(r"D:\PythonCode\test.docx") # print(doc.add_heading("一级标题", level=1)) 添加一级标题的时候出错,还没有解决! paragraph1 = doc.add_paragraph("这是一个段落") paragraph2 = doc.add_paragraph("这是第二个段落") doc.save(r"D:\PythonCode\test1.docx") """ 添加段落的时候,赋值给一个变量,方便我们后面进行格式调整; """
② 添加文字块
from docx import Document doc = Document(r"D:\PythonCode\test.docx") # 这里相当于输入了一个空格,后面等待着文字输入 paragraph3 = doc.add_paragraph() paragraph3.add_run("我被加粗了文字块儿").bold = True paragraph3.add_run(",我是普通文字块儿,") paragraph3.add_run("我是斜体文字块儿").italic = True doc.save(r"D:\PythonCode\test.docx")
③ 添加一个分页
from docx import Document doc = Document(r"D:\PythonCode\test.docx") doc.add_page_break() doc.save(r"D:\PythonCode\test.docx")
④ 添加图片
from docx import Document from docx.shared import Cm doc = Document(r"D:\PythonCode\test.docx") doc.add_picture(r"D:\PythonCode\test.png",width=Cm(5),height=Cm(5)) doc.save(r"D:\PythonCode\test.docx") """ Cm 模块,用于设定图片尺寸大小 """
⑤ 添加表格
from docx import Document doc = Document(r"D:\PythonCode\test.docx") list1 = [ ["姓名","性别","家庭地址"], ["唐僧","男","湖北省"], ["孙悟空","男","北京市"], ["猪八戒","男","广东省"], ["沙和尚","男","湖南省"] ] list2 = [ ["姓名","性别","家庭地址"], ["貂蝉","女","河北省"], ["杨贵妃","女","贵州省"], ["西施","女","山东省"] ] table1 = doc.add_table(rows=5,cols=3) for row in range(5): cells = table1.rows[row].cells for col in range(3): cells[col].text = str(list1[row][col]) doc.add_paragraph("-----------------------------------------------------------") table2 = doc.add_table(rows=4,cols=3) for row in range(4): cells = table2.rows[row].cells for col in range(3): cells[col].text = str(list2[row][col]) doc.save(r"D:\PythonCode\test.docx")
⑥ 提取 word 表格,并保存在 excel 中
from docx import Document from openpyxl import Workbook doc = Document(r"D:\PythonCode\test.docx") t0 = doc.tables[0] workbook = Workbook() sheet = workbook.active for i in range(len(t0.rows)): list1 = [] for j in range(len(t0.columns)): list1.append(t0.cell(i,j).text) sheet.append(list1) workbook.save
from docx import Document from docx.shared import Pt,RGBColor from docx.oxml.ns import qn doc = Document(r"D:\PythonCode\test.docx") for paragraph in doc.paragraphs: for run in paragraph.runs: run.font.bold = True run.font.italic = True run.font.underline = True run.font.strike = True run.font.shadow = True run.font.size = Pt(18) run.font.color.rgb = RGBColor(255,255,0) run.font.name = "宋体" # 设置像宋体这样的中文字体,必须添加下面 2 行代码 r = run._element.rPr.rFonts r.set(qn("w:eastAsia"),"宋体") doc.save(r"D:\PythonCode\test.docx")
① 对齐样式
from docx import Document from docx.enum.text import WD_PARAGRAPH_ALIGNMENT doc = Document(r"D:\PythonCode\test.docx") print(doc.paragraphs[0].text) doc.paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # 这里设置的是居中对齐 doc.save(r"D:\PythonCode\test.docx") """ 居中对齐是其中一种样式,这里还有其他选择,自己百度了解: LEFT,CENTER,RIGHT,JUSTIFY,DISTRIBUTE,JUSTIFY_MED,JUSTIFY_HI,JUSTIFY_L OW,THAI_JUSTIFY """
② 行间距调整
from docx import Document from docx.enum.text import WD_PARAGRAPH_ALIGNMENT doc = Document(r"D:\PythonCode\test.docx") for paragraph in doc.paragraphs: paragraph.paragraph_format.line_spacing = 5.0 doc.save(r"D:\PythonCode\test.docx")
③ 段前与段后间距
from docx import Document from docx.shared import Pt doc = Document(r"D:\PythonCode\test.docx") for paragraph in doc.paragraphs: paragraph.paragraph_format.space_before = Pt(12) #段前间距 paragraph.paragraph_format.space_after = Pt(12) #段后间距 #Pt(12)表示12磅 doc.save(r"D:\PythonCode\test.docx")