You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
77 lines
2.3 KiB
77 lines
2.3 KiB
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
@Time : 2023/2/14 14:29
|
|
@Author :
|
|
@FileName:
|
|
@Software:
|
|
@Describe:
|
|
"""
|
|
# from zipfile import ZipFile
|
|
# from bs4 import BeautifulSoup
|
|
#
|
|
# document=ZipFile("../data/11篇/13139551_于丰源_在线考试系统-原文.docx")
|
|
# xml=document.read("word/document.xml")
|
|
# wordObj=BeautifulSoup(xml.decode("utf-8"))
|
|
# texts=wordObj.findAll("w:t")
|
|
# for text in texts:
|
|
# print(text.text)
|
|
|
|
import docx
|
|
import win32com.client as wc
|
|
import operator
|
|
#doc文件另存为docx
|
|
|
|
# path = "E:\pycharm_workspace\drop_weight_rewrite\data\\11篇\\13138572_李菊_谈小学语文教学难点与创新-原文.doc"
|
|
# path_new = "E:\pycharm_workspace\drop_weight_rewrite\data\\11篇\\13138572_李菊_谈小学语文教学难点与创新-原文.docx"
|
|
# word = wc.Dispatch("Word.Application")
|
|
# doc = word.Documents.Open(path)
|
|
# # 12代表转换后为docx文件
|
|
# doc.SaveAs(path_new, 12)
|
|
# doc.Close
|
|
# word.Quit
|
|
#
|
|
# #读取转换后的docx
|
|
#
|
|
# file = docx.Document(path_new)
|
|
# for p in file.paragraphs:
|
|
# print(p.text)
|
|
|
|
# from win32com import client as wc
|
|
# w = wc.Dispatch('Word.Application')
|
|
# # 或者使用下面的方法,使用启动独立的进程:
|
|
# # w = wc.DispatchEx('Word.Application')
|
|
# doc=w.Documents.Open(path)
|
|
# doc.SaveAs(path_new,16)#必须有参数16,否则会出错.
|
|
|
|
|
|
import os
|
|
from win32com import client as wc
|
|
|
|
def save_doc_to_docx(rawpath): # doc转docx
|
|
'''
|
|
:param rawpath: 传入和传出文件夹的路径
|
|
:return: None
|
|
'''
|
|
word = wc.Dispatch("Word.Application")
|
|
# 不能用相对路径,老老实实用绝对路径
|
|
# 需要处理的文件所在文件夹目录
|
|
filenamelist = os.listdir(rawpath)
|
|
for i in os.listdir(rawpath):
|
|
# 找出文件中以.doc结尾并且不以~$开头的文件(~$是为了排除临时文件的)
|
|
if i.endswith('.doc') and not i.startswith('~$'):
|
|
print(i)
|
|
# try
|
|
# 打开文件
|
|
doc = word.Documents.Open(rawpath + i)
|
|
# # 将文件名与后缀分割
|
|
rename = os.path.splitext(i)
|
|
# 将文件另存为.docx
|
|
doc.SaveAs(pathls + rename[0] + '.docx', 12) # 12表示docx格式
|
|
doc.Close()
|
|
word.Quit()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
pathls = "E:\\pycharm_workspace\\drop_weight_rewrite\\data\\11篇\\"
|
|
save_doc_to_docx(pathls)
|