# -*- coding: utf-8 -*- """ @Time : 2023/2/14 14:29 @Author : @FileName: @Software: @Describe: """ # from zipfile import ZipFile # from bs4 import BeautifulSoup # # document=ZipFile("../data/11篇/13139551_于丰源_在线考试系统-原文.docx") # xml=document.read("word/document.xml") # wordObj=BeautifulSoup(xml.decode("utf-8")) # texts=wordObj.findAll("w:t") # for text in texts: # print(text.text) import docx import win32com.client as wc import operator #doc文件另存为docx # path = "E:\pycharm_workspace\drop_weight_rewrite\data\\11篇\\13138572_李菊_谈小学语文教学难点与创新-原文.doc" # path_new = "E:\pycharm_workspace\drop_weight_rewrite\data\\11篇\\13138572_李菊_谈小学语文教学难点与创新-原文.docx" # word = wc.Dispatch("Word.Application") # doc = word.Documents.Open(path) # # 12代表转换后为docx文件 # doc.SaveAs(path_new, 12) # doc.Close # word.Quit # # #读取转换后的docx # # file = docx.Document(path_new) # for p in file.paragraphs: # print(p.text) # from win32com import client as wc # w = wc.Dispatch('Word.Application') # # 或者使用下面的方法,使用启动独立的进程: # # w = wc.DispatchEx('Word.Application') # doc=w.Documents.Open(path) # doc.SaveAs(path_new,16)#必须有参数16,否则会出错. import os from win32com import client as wc def save_doc_to_docx(rawpath): # doc转docx ''' :param rawpath: 传入和传出文件夹的路径 :return: None ''' word = wc.Dispatch("Word.Application") # 不能用相对路径,老老实实用绝对路径 # 需要处理的文件所在文件夹目录 filenamelist = os.listdir(rawpath) for i in os.listdir(rawpath): # 找出文件中以.doc结尾并且不以~$开头的文件(~$是为了排除临时文件的) if i.endswith('.doc') and not i.startswith('~$'): print(i) # try # 打开文件 doc = word.Documents.Open(rawpath + i) # # 将文件名与后缀分割 rename = os.path.splitext(i) # 将文件另存为.docx doc.SaveAs(pathls + rename[0] + '.docx', 12) # 12表示docx格式 doc.Close() word.Quit() if __name__ == '__main__': pathls = "E:\\pycharm_workspace\\drop_weight_rewrite\\data\\11篇\\" save_doc_to_docx(pathls)