python win32com、docx 操控word

头一次写微博，分享一下自己曾经的笔记

win32com模块

import win32com
import win32com.client 有时候直接引用win32com 找不到client
最常用的模块其实是win32com.client

打开与创建word

word = win32com.client.Dispatch(‘Word.Application’) #打开word
word = win32com.client.DispatchEx(‘Word.Application’) #独立进程打开word

word.Visible = 0 # 后台运行
word.DisplayAlerts = 0 # 不显示，不警告

doc = word.Documents.Open('xxx.doc) # 打开一个已有的word文档
new_doc = word.Documents.Add() # 创建新的word文档

内容操作

myRange = doc.Range(0,).text # 读取内容
doc.Paragraphs[5].Range.text=“12456” #赋值

doc.Paragraphs[11].Range() #段落只能.Range() 读取全部这样不行：Range(3,4)

安装段落遍历：

doc.Paragraphs.count #段落的总数量
for i in range( len(doc.Paragraphs)):
para = doc.Paragraphs[i]
print(para.Range.text)

for i in doc.Paragraphs: # 或这样子，也能直接打印文字
print(i.Range())

插入操作

InsertBefore 、 InsertAfter

Range中的参数，两个参数分别代表起始点，结束点
在文章头部插入
myRange = doc.Range(0,0)
myRange.InsertBefore(‘Hello from Python!’) #在头部插入，换行请加\r

在文章指定为位置插入
myRange = doc.Range(22,23)
myRange.InsertBefore(‘Hello from Python!’)

**在文章尾部插入 **
myRange2 = doc.Range()
myRange2.InsertAfter(‘Bye word’)

插入在某段尾部
doc.Paragraphs[11].Range.text = doc.Paragraphs[11].Range()[:-2] +‘strss\r’

Select操作

doc.Paragraphs[8].Range.Select() 选中一段

w=win32com.client.Dispatch(‘Word.Application’)
objsel = w.Selection

objsel.Start #选取起点
objsel.end #选取终点
objsel.end = objsel.start+5 #选中操作

objsel.Delete #删除选中

objsel.EndKey() # 为当前光标所在行的最后，并非文档最后，返回移动步数 objsel.HomeKey() #当前段落开头，返回步数为负数
objsel.MoveRight(1,3) #向右移动3格，1 是枚举量

objsel.Collapse() #选取释放，光标为开头

objsel.TypeText(‘insert first pic’) # 在光标位置，打字
objsel.TypeParagraph() #在光标位置插入回车
objsel.InsertBreak() # 插入页

图片插入
objshape = doc.Shapes
p1 = objshape.AddPicture(‘e:\1.jpeg’) #通过这种doc.shapes的方式，图片插入的位置在文档开头
objsel.TypeParagraph() #回车

newpl = objsel.InlineShapes.AddPicture(‘e:\1.jpeg’) #而这种通过selection的插入图片方式，插入的位置是在光标位置；
objsel.TypeParagraph() #敲回车

获取活动word

dc=w.ActiveDocument.Content #取得当前活动文档的内容句柄
dc.Font.Size=15 # 设置文档的文字大小（影响全文的大小）

转换

table.Cell(0,0).Select() Range 转换为Select操作
如: doc.Tables[0].Cell(2,4).Select() 表格的第二行，第四个单位选中

表格操作

doc.Tables[0].Rows[0].Cells[0].Range.Text =‘123123’
worddoc.Tables[0].Rows.Add() # 增加一行

doc.Tables(0).Rows(1).Cells(1).Range.Text =‘修改1’ #修改，原内容不保存
doc.Tables(1).Cell(3,5).Range.Text =(‘Some text’) #表格内容修改 doc.Tables(1).Cell(3,4).Range.InsertAfter(‘Some text’) #插入，原内容保留

表遍历：

for i in range(len(doc.Tables)):   #遍历所有表
    biao = doc.Tables[i]            
    print('行:',len(biao.Rows))          #打印当前表有多少行
    for j in range(len(biao.Rows)):   #循环行
        print('列:',len(biao.Rows[j].Cells))  #打印当前行有多少列
        for w in range(len(biao.Rows[j].Cells)):
            print(i,j,w,[biao.Rows[j].Cells[w].Range.Text])  #打印值

表格插入行

biao = doc.Tables[0]
ls = biao.Rows.Add() # 增加一行 继承上一行属性
len(ls.Cells)
ls.Cells[0].Range.Text=10
ls.Cells[1].Range.Text=12

Selection.InsertRowsAbove 1   #  vba  Select  上方插入1行
Selection.InsertRowsBelow 2    # vba Select插入法 ，插入2行

#找打某个标记，并在它上方插入1行
sel.Find.Execute('{:xxx}', False, False, False, False, False, True, 1, True, '', 0)
sel.InsertRowsAbove(1)   #上方插入
sel.Rows.Delete()          #删除行
ps： 更多信息，可以录制vba获取

# 插入信息，并删除 标记行{:xxx}
lslist = [['a%d'%(i) for i in range(12)],['b%d'%(i) for i in range(12)]]
sel.Find.Execute('{:xxx}', False, False, False, False, False, True, 1, True, '', 0)  #找到某个标记
for i in lslist:
    sel.InsertRowsAbove()
    for j in i:        
        sel.TypeText(str(j))
        sel.Start = sel.Start+1
sel.Find.Execute('{:xxx}', False, False, False, False, False, True, 1, True, '', 0)  #找到某个标记
sel.Rows.Delete           #删除行

样式

字体
Font属性是Selection,Range,Find,Replacement,Style,ConditionalStyle对象的属性，主要改变字体的大小（Size),颜色(Color),字体类型(Name),粗体(Bold),斜体(Italic)等

doc = word.Documents.Open(FileName=path,Encoding='gbk')
#中文路径乱码问题处理
path="c:/文档.docx"
FileName=path.decode("utf8")
#中文写入乱码处理
table.Cell(0,1).Range.Text=str.decode("utf8")
table.Cell(0,2).Range.Text=(u'%s' % str)

替换、查找操作

wordApp.Selection.Find.ClearFormatting()
wordApp.Selection.Find.Replacement.ClearFormatting()
word.Selection.Find.Execute(OldStr, False, False, False, False, False, True, 1, True, NewStr, 2)

参数(OldStr–搜索的关键字,
True–区分大小写,
True–完全匹配的单词，并非单词中的部分（全字匹配）,
True–使用通配符,
True–同音,
True–查找单词的各种形式,
True–向文档尾部搜索,
1,
True–带格式的文本,
NewStr–替换文本,
2–替换个数（0表示不替换，1表示只替换匹配到的第一个，2表示全部替换）

关闭与保存

doc.Save() # 保存
doc.SaveAs(‘asdasd.doc’) # 另存为
doc.Close() # 关闭 word 文档
word.Documents.Close(wc.wdDoNotSaveChanges) # 保存并关闭 word 文档
word.Quit() # 关闭 office

另存为操作

from win32com import client as wc
word = wc.Dispatch(“Word.Application”)
wordhandle.Visible = 0 # 后台运行，不显示
wordhandle.DisplayAlerts = 0 #不警告
doc = wordhandle.Documents.Open(‘xxx.docx’)
doc.SaveAs(‘xxx.pdf’, 17) # txt=4, html=10, docx=16， pdf=17
doc.Close()
word.Quit()

下表为格式转换的代码

        wdFormatDocument = 0
        wdFormatDocument97 = 0
        wdFormatDocumentDefault = 16
        wdFormatDOSText = 4
        wdFormatDOSTextLineBreaks = 5
        wdFormatEncodedText = 7
        wdFormatFilteredHTML = 10
        wdFormatFlatXML = 19
        wdFormatFlatXMLMacroEnabled = 20
        wdFormatFlatXMLTemplate = 21
        wdFormatFlatXMLTemplateMacroEnabled = 22
        wdFormatHTML = 8
        wdFormatPDF = 17
        wdFormatRTF = 6
        wdFormatTemplate = 1
        wdFormatTemplate97 = 1
        wdFormatText = 2
        wdFormatTextLineBreaks = 3
        wdFormatUnicodeText = 7
        wdFormatWebArchive = 9
        wdFormatXML = 11
        wdFormatXMLDocument = 12
        wdFormatXMLDocumentMacroEnabled = 13
        wdFormatXMLTemplate = 14
        wdFormatXMLTemplateMacroEnabled = 15

多线程

在多线程里面使用win32com调用com组件的时
需要用pythoncom.CoInitialize初始化一下。

最后还需要用pythoncom.CoUninitialize释放资源

from win32com.client import Dispatch
import pythoncom 引入这个东西
def test(doc_full_path):
table_count = 0
pythoncom.CoInitialize() # 初始化一下
… #这里面可以用了
pythoncom.CoUninitialize() #释放一下
return table_count

Coinitialize是Windows提供的API函数
用来告诉Windows系统单独一个线程创建COM对象

docx模块

基本概括

from docx import Document
from docx.shared import Inches

document = Document()

document.add_heading('Document Title', 0)                      #插入标题

p = document.add_paragraph('A plain paragraph having some ')   #插入段落，并定义段落为p变量
p.add_run('bold').bold = True   #添加粗体字bold
p.add_run(' and some ')         #添加普通字
p.add_run('italic.').italic = True   #添加斜体字

document.add_heading('Heading, level 1', level=1)          #1级标题，比0小一点
document.add_paragraph('Intense quote', style='IntenseQuote')   #添加段落，样式为强调

document.add_paragraph(
    'first item in unordered list', style='ListBullet'          #添加段落，前面有个点
)
document.add_paragraph(
    'first item in ordered list', style='ListNumber'            #添加段落，前面有个序号1
)

document.add_picture('monty-truth.png', width=Inches(1.25))      #插入图片

table = document.add_table(rows=1, cols=3)                      #插入表格
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Qty'
hdr_cells[1].text = 'Id'
hdr_cells[2].text = 'Desc'
recordset =[{'qty':'1a','id':1,'desc':'aaa'},{'qty':'2a','id':2,'desc':'bbb'},{'qty':'3c','id':3,'desc':'ccc'}]
for item in recordset:
    row_cells = table.add_row().cells
    row_cells[0].text = str(item['qty'])
    row_cells[1].text = str(item['id'])
    row_cells[2].text = item['desc']

document.add_page_break()    #添加下一页       

document.save('demo.docx')  #保存文档

读取和编辑一个已有的word文档，只需在一开始添加上文件路径就行了，如下：

from docx import Document
from docx.shared import Inches

document = Document('demo.docx')  #打开文件demo.docx
for paragraph in document.paragraphs:
    print(paragraph.text)  #打印各段落内容文本

document.add_paragraph(
    'Add new paragraph', style='ListNumber'
)    #添加新段落

document.save('demo.docx') #保存文档

替换

for x in wb.paragraphs:
x.text=x.text.replace(‘ABC’,‘DEF’)

详细概括

安装 pip3 install python-docx

pip3 install python-docx -i https://pypi.douban/simple 豆瓣镜像下载

内联对象一般包括：段落（paragraph）、图片（inline picture）、表（table）、标题（heading）、有序列表（numbered lists）、无序列表（bullets lists）

创建文档

from docx import Document
from docx.shared import Inches
document = Document()  #创建基于默认“模板”的空白文档

打开文档

document = Document('d:/test.docx')  #打开文档

添加段落

paragraph = document.add_paragraph('段落1')  #在尾部添加段落
#参数  段落文本

在段落尾部添加文本

kuai=paragraph.add_run('我是中国人')  #在段落尾部添加文本
#返回值：内联对象

paragraphs=document.paragraphs   #返回段落引用集合--列表
paragraphs[1].text="小Z同学："  #设置序号1段落的文本

返回段落集合

s=document.paragraphs  #返回段落引用集合--列表

返回段落总数

s=len(document.paragraphs)  #返回段落总数

返回指定段落的文本

s=document.paragraphs[0].text  #返回指定段落的文本

设置段落样式

paragraph.style = 'List Bullet'  #设置段落样式
paragraph =document.add_paragraph('段落4',style = 'List Bullet')  #添加段落--带段落样式

返回段落样式

s=document.paragraphs  #返回段落引用集合--列表
s1=s[0].style    #返回序号0段落的样式
print(s1)

段落对齐

需要 from docx.enum.text import WD_ALIGN_PARAGRAPH

paragraph_format = paragraph.paragraph_format  #创建段落格式对象
paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER   #段落居中对齐
paragraph_format.alignment =WD_ALIGN_PARAGRAPH.LEFT    #段落左对齐
paragraph_format.alignment =WD_ALIGN_PARAGRAPH.RIGHT   #段落右对齐
paragraph_format.alignment =WD_ALIGN_PARAGRAPH.JUSTIFY   #段落两端对齐

paragraphs=document.paragraphs   #返回段落引用集合--列表
paragraphs[4].paragraph_format.alignment=WD_ALIGN_PARAGRAPH.RIGHT  #序号4段落右对齐

段落缩进

段落可以在左侧和右侧分别缩进。第一行也可以具有与段落其余部分不同的缩进，缩进的第一行有一个悬挂缩进

paragraph_format = paragraph.paragraph_format  #创建段落格式对象
paragraph_format.left_indent = Inches(0.5)  #段落左缩进0.5英寸
#需要  from docx.shared import Inches
paragraph_format.right_indent = Pt(20)   #右缩进20点
#from docx.shared import Pt
paragraph_format.first_line_indent = Inches(0.5)  #第一行缩进

paragraphs=document.paragraphs   #返回段落引用集合--列表
paragraphs[2].paragraph_format.first_line_indent=Cm(0.74)  #序号2段落首行缩进0.74厘米
#from docx.shared import Cm

段落间距

paragraph_format = paragraph.paragraph_format  #创建段落格式对象
paragraph_format.space_before = Pt(38)  #设置段落前间距
paragraph_format.space_after = Pt(19)   #设置段落后间距

行间距

paragraph_format = paragraph.paragraph_format  #创建段落格式对象
paragraph_format.line_spacing = Pt(50)   #设置行间距

行距可以通过段落paragraph_format属性的line_spacing或line_spacing_rule属性来指定，当line_spacing设置为长度值时表示绝对距离，设置为浮点数时表示行高的倍数，设置为None表示根据继承层次决定

保存文档

document.save('d:/test.docx')  #保存文档--覆盖原文档

添加标题

document.add_heading('标题', level=0)  #添加标题
#参数2 标题级别  0--9

添加分页

document.add_page_break()  #添加分页

换页方式

换页方式决定一个段落在一个页面结束附近如何表现，常用属性有如下，每个属性的取值可以为True、False、None：

keep_together设置为True时使得整个段落出现在同一页中，如果一个段落在换页时可能会被打断就在段前换页；
keep_with_next设置为True时使得本段与下一段出现在同一页中；
page_break_before设置为True时使得本段出现在新的一页的顶端，例如新的一章标题必须从新的一页开始；
window_control设置为True时表示可以在必要的时候进行分页，避免本段的第一行或最后一行单独出现在一页中

粗体和斜体

kuai=paragraph.add_run('我是中国人')  #在段落尾部添加文本
#返回值：内联对象
kuai.bold = True  #给内联设置粗体

kuai=paragraph.add_run('我是中国人')  #在段落尾部添加文本
#返回值：内联对象
kuai.italic = True  #给内联设置斜体

kuai.underline = True  #给内联设置下划线

字符格式

Run属于行内元素的一种，是一个块级元素的组成部分，可以看做是一段连续的具有相同格式（字体、字号、颜色、加粗、斜体、下画线、阴影等）的文本。一般来说，一个段落会包含一个或多个Run，使得同一个段落中可以包含不同格式的文本

可以通过一个Run对象的font属性来获取和设置该Run的字符格式，例如字体名称font.name、字体大小font.size、是否加粗font.bold、是否斜体font.italic、下画线格式font.underline（True表示单下画线，False表示没有下画线，或者使用WD_UNDERLINE中的成员设置更多下画线格式）、字体颜色font.color.rgb（设置为docx.shared.RGBColor对象）

包括字体字体和大小，粗体，斜体和下划线

#设置字体--麻烦一点
kuai.font.name=u'华文彩云'
r = kuai._element
r.rPr.rFonts.set(qn('w:eastAsia'), '华文彩云')
#需要 from docx.oxml.ns import qn

kuai.font.size = Pt(30)  #字体大小
kuai.font.color.rgb = RGBColor(0x42, 0x24, 0xE9)  #设置字体颜色
#需要 from docx.shared import RGBColor

样式

s=document.styles  #获取word所有样式集合对象
for i in s:
    print(i)

章节

from docx import Document
from docx.shared import Inches
from docx.enum.section import WD_ORIENT, WD_SECTION

document = Document()
paragraph = document.add_paragraph('段落1')
paragraph = document.add_paragraph('段落5:床前明月光，疑是地上霜。举头望明月，低头思故乡。')

document.add_section()  #添加新章节
paragraph = document.add_paragraph('章节2-1')
document.add_section()  #添加新章节
paragraph = document.add_paragraph('章节3-1')

sections = document.sections  #返回所有章节引用的对象
s=len(sections)   #返回章节总数
section = sections[0]  #返回指定章节的对象
section = document.sections[-1]  # 返回文档最后一个章节
new_height= section.page_height  #返回章节页面的高
#10058400    单位：像素    1英寸=914400像素
new_width=section.page_width   #返回章节页面的宽
#7772400
section.page_height=10058400  #设置章节的高度
section.page_width =4072400  #设置章节宽度

section.orientation = WD_ORIENT.LANDSCAPE  #设置页面方向  ？？？
#需要  from docx.enum.section import WD_ORIENT, WD_SECTION

s=section.left_margin  #返回左边距--单位像素
s=section.right_margin  #返回右边距--单位像素
s=section.top_margin  #返回上边距--单位像素
s=section.bottom_margin  #返回下边距--单位像素
section.left_margin = Inches(1.5)   #设置左边距
s=section.header_distance   #返回页眉距离--单位像素
s=section.footer_distance  #返回页脚距离--单位像素

print(s)
document.save('d:/test.docx')

页眉和页脚

Word支持页眉和页脚。页眉是出现在每个页面的上边距区域中的文本，与文本主体分开，并且通常传达上下文信息，例如文档标题，作者，创建日期或页码。文档中的页眉在页面之间是相同的，内容上只有很小的差异，例如更改部分标题或页码。页眉也称为运行头

页脚在页眉的每个方面都类似，只不过它出现在页面底部。它不应与脚注混淆，脚注在页面之间内容是不一致的

页眉和页脚与一个章节相关联，这允许每个章节具有不同的页眉和/或页脚

页眉：

每个section对象都有一个.header属性，可以访问该节的_Header对象：

from docx import Document

document = Document()
paragraph = document.add_paragraph('段落1')
document.add_section()  #添加新章节
paragraph = document.add_paragraph('段落2')
document.add_section()
paragraph = document.add_paragraph('段落3')

section = document.sections[0]  #返回序号0章节的引用
header = section.header  #返回章节header引用
print(header.is_linked_to_previous)   #章节头(页眉)是否无定义
#值为True表示_Header对象不包含章节头定义，该章节将显示与上一节相同的章节头

#添加页眉
paragraph = header.paragraphs[0]  #返回页眉序号0的段落的引用
#章节头已包含单个（空）段落
#此时把header.is_linked_to_previous属性设为false
paragraph.text = "这是页眉1"

print(header.is_linked_to_previous)
document.save('d:/test.docx')

删除页眉

通过将True分配给其.is_linked_to_previous属性，可以删除不需要的页眉：

header = section.header  #返回章节header引用
header.is_linked_to_previous = True  #删除页眉

页脚：

每个section对象都有一个footer属性，可以访问该章节的页脚对象：

section = document.sections[0]  #返回序号0章节的引用
footer=section.footer  #返回章节页脚的引用
print(footer.is_linked_to_previous)
#值为True表示页脚对象不包含定义，该章节将显示与上一节相同的页脚

#添加页脚
paragraph = footer.paragraphs[0]  #返回页脚序号0的段落的引用
#页脚已包含单个（空）段落
#此时把footer.is_linked_to_previous属性设为false
paragraph.text = "这是页脚1"

footer.is_linked_to_previous = True  #删除页脚

制表符

tab_stops = paragraph.paragraph_format.tab_stops  #返回段落格式制表符的引用
from docx.enum.text import WD_TAB_ALIGNMENT, WD_TAB_LEADER
tab_stop = tab_stops.add_tab_stop(Inches(4.5), WD_TAB_ALIGNMENT.LEFT, WD_TAB_LEADER.DOTS)  #添加制表位
#参数2 对齐--默认左  https://python-docx.readthedocs.io/en/latest/api/enum/WdTabAlignment.html#wdtabalignment
#参数3 填充符--默认空格    https://python-docx.readthedocs.io/en/latest/api/enum/WdTabLeader.html#wdtableader
print(tab_stop.position)  #返回制表位位置--单位像素
print(tab_stop.position.inches)  #返回制表位位置--单位英寸
tab_stops[0] #返回序号0制表位的引用

表格

添加表格

table = document.add_table(rows=2, cols=2)  #添加表格

给单元格赋值和读取单元格文本

cell = table.cell(0, 1)  #返回表格的单元格对象
cell.text = '0行1列'   #给单元格赋值
s=cell.text   #返回单元格文本

row = table.rows[1]  #返回行对象
row.cells[0].text = '一行零列'  #给行对象的第n个单元格赋值
s=row.cells[1].text  #返回行对象的第n个单元格的文本

col = table.columns[1]  #返回列对象
col.cells[0].text = '零行1列'  #给列对象的第n个单元格赋值
s=col.cells[1].text  #返回列对象的第n个单元格的文本

tables=document.tables  #返回文档所有表格引用集合--列表
tables[0].cell(1,0).text="猫粮1"  #给序号0的表格指定单元格设置文本

总行数和总列数

s = len(table.rows)     #返回表格的总行数
s = len(table.columns)  #返回表格的总列数

图片

添加图片

document.add_picture('大象.png')  #添加图片--添加的图像以原始大小显示

document.add_picture('大象.png', width=Inches(1.0))  #添加图片
#参数2 图品宽度  -宽度和高度只指定一个，另一个按比例缩放
#Inches--单位是英寸

document.add_picture('大象.png', width=Cm(11.8))#添加图片
#需要   from docx.shared import Cm

更多推荐

python win32com、docx 操控word

python win32com、docx 操控word

win32com模块

打开与创建word

内容操作

插入操作

Select操作

转换

表格操作

样式

替换、查找操作

关闭与保存

另存为操作

多线程

docx模块

基本概括

替换

详细概括

创建文档

打开文档

添加段落

在段落尾部添加文本

返回段落集合

返回段落总数

返回指定段落的文本

设置段落样式

返回段落样式

段落对齐

段落缩进

段落间距

行间距

保存文档

添加标题

添加分页

换页方式

粗体和斜体

字符格式

样式

章节

页眉和页脚

删除页眉

**页脚： **

制表符

表格

添加表格

给单元格赋值和读取单元格文本

总行数和总列数

图片

添加图片

相关文章

发布评论取消回复

最近发表

热门文章

标签列表

页脚：