
2 changed files with 270 additions and 0 deletions
@ -0,0 +1,8 @@ |
|||||
|
# 生成ppt格式的json数据 |
||||
|
|
||||
|
|
||||
|
上传 docx文件 |
||||
|
返回json数据 |
||||
|
``` |
||||
|
python ppt_api.py |
||||
|
``` |
@ -0,0 +1,262 @@ |
|||||
|
from docx import Document |
||||
|
import os |
||||
|
os.environ['ALL_PROXY'] = 'http://127.0.0.1:10809' |
||||
|
import docx |
||||
|
import json |
||||
|
import re |
||||
|
from docx.document import Document |
||||
|
from docx.oxml.table import CT_Tbl |
||||
|
from docx.oxml.text.paragraph import CT_P |
||||
|
from docx.table import _Cell, Table |
||||
|
from docx.text.paragraph import Paragraph |
||||
|
import requests |
||||
|
import random |
||||
|
|
||||
|
fileName = "data/基于Python的电影网站设计_范文.docx" |
||||
|
pantten_mulu= '目录(.*?)致谢' |
||||
|
pantten_xiaobiaoti= "{}(.*?){}" |
||||
|
pantten_yijibiaoti = '^([一二三四五六七八九])、(.*)' |
||||
|
pantten_yijibiaoti_content = '^[一二三四五六七八九]、(.*)' |
||||
|
pantten_erjibiaoti = '^[0-9](\.[0-9]\d*){1}\s{1,}?.*$' |
||||
|
pantten_erjibiaoti_content = '^[0-9]\.[0-9]\d*{1}\s{1,}?(.*)$' |
||||
|
pantten_content_tiaoshu = '[0-9]\.{1}\s{0,}?(.*)' |
||||
|
|
||||
|
prompt_two_title_min_max = "为论文题目“{}”生成中文目录,要求只有一级标题,二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成{}个;每个一级标题包含{}-{}个二级标题" |
||||
|
prompt_two_title_not_min_max = "为论文题目“{}”生成中文目录,要求只有一级标题,二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成{}个;每个一级标题包含{}个二级标题" |
||||
|
|
||||
|
|
||||
|
pantten_title = "为论文题目“(.*?)”生成中文目录" |
||||
|
pantten_xiaobiaoti_geshu = "每个一级标题包含(.*?)个" |
||||
|
pantten_dabiaoti_geshu = "一级标题生成(.*?)个" |
||||
|
|
||||
|
mulusuojian = "请问把以下目录缩减成只有4个一级标题作为ppt的题目,请问留下原始目录中的哪4个一级标题最合适,一级标题必须在原始目录中\n{}\n" |
||||
|
|
||||
|
|
||||
|
self_api = "http://192.168.31.149:12004/predict" |
||||
|
gpt_api = "https://api.openai.com/v1/chat/completions" |
||||
|
|
||||
|
|
||||
|
def iter_block_items(parent): |
||||
|
""" |
||||
|
Yield each paragraph and table child within *parent*, in document order. |
||||
|
Each returned value is an instance of either Table or Paragraph. *parent* |
||||
|
would most commonly be a reference to a main Document object, but |
||||
|
also works for a _Cell object, which itself can contain paragraphs and tables. |
||||
|
""" |
||||
|
if isinstance(parent, Document): |
||||
|
parent_elm = parent.element.body |
||||
|
elif isinstance(parent, _Cell): |
||||
|
parent_elm = parent._tc |
||||
|
else: |
||||
|
raise ValueError("something's not right") |
||||
|
|
||||
|
for child in parent_elm.iterchildren(): |
||||
|
if isinstance(child, CT_P): |
||||
|
yield Paragraph(child, parent) |
||||
|
elif isinstance(child, CT_Tbl): |
||||
|
yield Table(child, parent) |
||||
|
|
||||
|
|
||||
|
def read_table(table): |
||||
|
return [[cell.text for cell in row.cells] for row in table.rows] |
||||
|
|
||||
|
|
||||
|
def read_word(word_path): |
||||
|
|
||||
|
paper_text = [] |
||||
|
doc = docx.Document(word_path) |
||||
|
for block in iter_block_items(doc): |
||||
|
if isinstance(block, Paragraph): |
||||
|
paper_text.append(block.text) |
||||
|
elif isinstance(block, Table): |
||||
|
table_list = read_table(block) |
||||
|
table_list_new = [] |
||||
|
for row in table_list: |
||||
|
table_list_new.append("<td>" + "</td>\n<td>".join(row) + "</td>") |
||||
|
table_str = "\n<tr>\n" + "\n</tr>\n<tr>\n".join(table_list_new) + "\n</tr>\n" |
||||
|
table_str = "<tbStart>\n<table>" + table_str + "</table>\n\n<tbEnd>" |
||||
|
paper_text.append(table_str) |
||||
|
paper_text = "\n".join(paper_text) |
||||
|
return paper_text |
||||
|
|
||||
|
|
||||
|
def getText(fileName): |
||||
|
doc = docx.Document(fileName) |
||||
|
TextList = [] |
||||
|
for paragraph in doc.paragraphs: |
||||
|
TextList.append(paragraph.text) |
||||
|
|
||||
|
return '\n'.join(TextList) |
||||
|
|
||||
|
|
||||
|
def request_selfmodel_api(prompt): |
||||
|
print(prompt) |
||||
|
url = "http://192.168.31.149:12004/predict" |
||||
|
data = { |
||||
|
"model": "gpt-4-turbo-preview", |
||||
|
"messages": [ |
||||
|
{"role": "user", "content": prompt} |
||||
|
], |
||||
|
"top_p": 0.7, |
||||
|
"temperature": 0.95 |
||||
|
} |
||||
|
response = requests.post( |
||||
|
url, |
||||
|
json=data, |
||||
|
timeout=100000 |
||||
|
) |
||||
|
|
||||
|
return response.json() |
||||
|
|
||||
|
|
||||
|
def request_chatgpt_api(prompt): |
||||
|
OPENAI_API_KEY = "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" |
||||
|
url = "https://api.openai.com/v1/chat/completions" |
||||
|
# url = "https://one.aiskt.com" |
||||
|
headers = { |
||||
|
"Content-Type": "application/json", |
||||
|
"Authorization": f"Bearer {OPENAI_API_KEY}" |
||||
|
} |
||||
|
data = { |
||||
|
"model": "gpt-4-turbo-preview", |
||||
|
"messages": [ |
||||
|
{"role": "user", "content": prompt} |
||||
|
], |
||||
|
"top_p": 0.7, |
||||
|
"temperature": 0.95 |
||||
|
} |
||||
|
response = requests.post(url, |
||||
|
headers=headers, |
||||
|
data=json.dumps(data), |
||||
|
timeout=1200) |
||||
|
|
||||
|
return response.json() |
||||
|
|
||||
|
|
||||
|
def yanzhengyijibiaoti(mulu, res): |
||||
|
''' |
||||
|
判断生成的大标题是否可用 |
||||
|
:param mulu: |
||||
|
:param res: |
||||
|
:return: |
||||
|
''' |
||||
|
|
||||
|
mulu_list = str(mulu).split("\n") |
||||
|
|
||||
|
dabiaoti_list = [] |
||||
|
dabiaoti_res_list = [] |
||||
|
for i in mulu_list: |
||||
|
res_re = re.findall(pantten_yijibiaoti, i) |
||||
|
if res_re != []: |
||||
|
dabiaoti_list.append(re.findall(pantten_yijibiaoti, i)[0]) |
||||
|
for i in dabiaoti_list: |
||||
|
if i[1].strip() in res: |
||||
|
dabiaoti_res_list.append("、".join(i)) |
||||
|
|
||||
|
if len(dabiaoti_res_list) == 4: |
||||
|
return_bool = True |
||||
|
else: |
||||
|
return_bool = False |
||||
|
|
||||
|
return return_bool, dabiaoti_res_list |
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
|
||||
|
text_1 = json.dumps(read_word(fileName),ensure_ascii=False) |
||||
|
print(text_1) |
||||
|
mulu_str = re.findall(pantten_mulu, text_1)[0] |
||||
|
print(mulu_str) |
||||
|
mulu_list_xuhao = str(mulu_str).split("\\n") |
||||
|
|
||||
|
mulu_list = [] |
||||
|
for i in mulu_list_xuhao: |
||||
|
if i != "": |
||||
|
mulu_list.append(i.split("\\t")[0]) |
||||
|
|
||||
|
mulu_list.append("致谢") |
||||
|
|
||||
|
print(mulu_list) |
||||
|
content_list = [] |
||||
|
|
||||
|
yijibiaoti = "" |
||||
|
paper_content = {} |
||||
|
for i in range(len(mulu_list) -1): |
||||
|
title = mulu_list[i].strip(" ").strip("\\n") |
||||
|
content = str(re.findall(pantten_xiaobiaoti.format(mulu_list[i], mulu_list[i+1]), text_1)[1]).strip(" ").strip("\\n") |
||||
|
# print(title) |
||||
|
# print(content) |
||||
|
|
||||
|
yijibiaoti_res = re.findall(pantten_yijibiaoti, title) |
||||
|
erjibiaoti_res = re.findall(pantten_erjibiaoti, title) |
||||
|
if yijibiaoti_res != []: |
||||
|
# title = "、".join([yijibiaoti_res[0][1], yijibiaoti_res[0][1].strip()]) |
||||
|
paper_content[title] = {} |
||||
|
yijibiaoti = title |
||||
|
continue |
||||
|
|
||||
|
elif erjibiaoti_res != []: |
||||
|
paper_content[yijibiaoti][title] = content.replace("\\n", "\n") |
||||
|
|
||||
|
else: |
||||
|
paper_content[yijibiaoti][title] += "\n".join(title + content) |
||||
|
|
||||
|
|
||||
|
while True: |
||||
|
mulu_str = "\n".join(mulu_list[:-1]) |
||||
|
prompt = f'请问把以下目录缩减成只有4个一级标题作为ppt的题目,请问留下原始目录中的哪4个一级标题最合适,一级标题必须在原始目录中\n{mulu_str}\n' |
||||
|
# try: |
||||
|
# res = request_chatgpt_api(prompt)['choices'][0]['message']['content'] |
||||
|
# except: |
||||
|
# continue |
||||
|
res = '''根据您提供的目录内容,如果要将其缩减为只包含4个一级标题的PPT题目,建议选择以下四个一级标题,因为它们分别代表了研究的引入、理论框架、实际应用与实践,以及未来展望,从而形成了一个完整的研究过程和内容框架: |
||||
|
|
||||
|
1. 一、绪论 |
||||
|
2. 二、电影网站设计的基本概念 |
||||
|
3. 三、Python在电影网站设计中的应用 |
||||
|
4. 五、电影网站设计的实践与展望 |
||||
|
|
||||
|
这样的选择既涵盖了研究的背景、目的与意义(绪论),也包括了研究的理论基础(电影网站设计的基本概念),以及研究的实际操作和技术实现(Python在电影网站设计中的应用),最后还有对项目实践经验的总结和对未来发展的展望(电影网站设计的实践与展望)。这四个部分共同构成了一个完整的研究报告或项目介绍的框架,能够全面展示电影网站设计项目的各个方面。 |
||||
|
''' |
||||
|
|
||||
|
shaixuan_bool, dabiaoti_res_list = yanzhengyijibiaoti("\n".join(mulu_list), res.replace("\n", "\\n")) |
||||
|
if shaixuan_bool == True: |
||||
|
break |
||||
|
|
||||
|
content_1 = [] |
||||
|
for yijibiaoti in dabiaoti_res_list: |
||||
|
content_2 = [] |
||||
|
for erjibiaoti in paper_content[yijibiaoti]: |
||||
|
num = random.randint(2, 6) |
||||
|
content = paper_content[yijibiaoti][erjibiaoti] |
||||
|
res = request_selfmodel_api(f'任务:生成段落主要内容\n请对以下内容进行提取信息,只需要提取{str(num)}条主要内容,使用条数罗列下面这段话的主要信息,例如1. xxx\n2.xxx \n' + content)['choices'][0]['message']['content'] |
||||
|
tiaoshu_list = str(res).split("\n") |
||||
|
|
||||
|
tiaoshu_list_new = [] |
||||
|
for dantiao in tiaoshu_list: |
||||
|
tiaoshu_list_new.append(re.findall(pantten_content_tiaoshu, dantiao)[0].strip()) |
||||
|
content_2.append({ |
||||
|
"title_small": erjibiaoti, |
||||
|
"content_3": tiaoshu_list_new |
||||
|
}) |
||||
|
content_1.append({ |
||||
|
"title_big": yijibiaoti, |
||||
|
"content_2": content_2 |
||||
|
}) |
||||
|
|
||||
|
data_new = { |
||||
|
"title": fileName, |
||||
|
"catalogue": dabiaoti_res_list, |
||||
|
"content_1": content_1 |
||||
|
} |
||||
|
|
||||
|
with open("data/ceshi.json", "w", encoding="utf-8") as f: |
||||
|
f.write(json.dumps(data_new, ensure_ascii=False, indent=2)) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
# res = request_chatgpt_api(f'针对下面这篇文章,请回答,我为什么选择这个题目,做这个研究有什么意义?\n' + data)['choices'][0]['message']['content'] |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
Loading…
Reference in new issue