|
@ -1,6 +1,7 @@ |
|
|
from docx import Document |
|
|
from docx import Document |
|
|
|
|
|
import platform |
|
|
import os |
|
|
import os |
|
|
|
|
|
import concurrent.futures |
|
|
os.environ['ALL_PROXY'] = 'http://127.0.0.1:10809' |
|
|
os.environ['ALL_PROXY'] = 'http://127.0.0.1:10809' |
|
|
import docx |
|
|
import docx |
|
|
import json |
|
|
import json |
|
@ -126,7 +127,6 @@ def getText(fileName): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def request_selfmodel_api(prompt): |
|
|
def request_selfmodel_api(prompt): |
|
|
print(prompt) |
|
|
|
|
|
url = "http://192.168.31.149:12004/predict" |
|
|
url = "http://192.168.31.149:12004/predict" |
|
|
data = { |
|
|
data = { |
|
|
"model": "gpt-4-turbo-preview", |
|
|
"model": "gpt-4-turbo-preview", |
|
@ -197,9 +197,15 @@ def yanzhengyijibiaoti(mulu, res): |
|
|
return return_bool, dabiaoti_res_list |
|
|
return return_bool, dabiaoti_res_list |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(fileName): |
|
|
def main(path): |
|
|
# fileName = "data/基于Python的电影网站设计_范文.docx" |
|
|
# fileName = "data/基于Python的电影网站设计_范文.docx" |
|
|
text_1 = json.dumps(read_word(fileName), ensure_ascii=False) |
|
|
system = platform.system() |
|
|
|
|
|
if system == 'Linux': |
|
|
|
|
|
file_name = path.split("/")[-1].split(".")[0] |
|
|
|
|
|
else: |
|
|
|
|
|
file_name = path.split("\\")[-1].split(".")[0] |
|
|
|
|
|
|
|
|
|
|
|
text_1 = json.dumps(read_word(path), ensure_ascii=False) |
|
|
print(text_1) |
|
|
print(text_1) |
|
|
mulu_str = re.findall(pantten_mulu, text_1)[0] |
|
|
mulu_str = re.findall(pantten_mulu, text_1)[0] |
|
|
print(mulu_str) |
|
|
print(mulu_str) |
|
@ -213,7 +219,6 @@ def main(fileName): |
|
|
mulu_list.append("致谢") |
|
|
mulu_list.append("致谢") |
|
|
|
|
|
|
|
|
print(mulu_list) |
|
|
print(mulu_list) |
|
|
content_list = [] |
|
|
|
|
|
|
|
|
|
|
|
yijibiaoti = "" |
|
|
yijibiaoti = "" |
|
|
paper_content = {} |
|
|
paper_content = {} |
|
@ -261,6 +266,9 @@ def main(fileName): |
|
|
if shaixuan_bool == True: |
|
|
if shaixuan_bool == True: |
|
|
break |
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
index_zahnweifu = 0 |
|
|
|
|
|
zhanweifu = [] |
|
|
content_1 = [] |
|
|
content_1 = [] |
|
|
catalogue = [] |
|
|
catalogue = [] |
|
|
for yijibiaoti in dabiaoti_res_list: |
|
|
for yijibiaoti in dabiaoti_res_list: |
|
@ -270,27 +278,58 @@ def main(fileName): |
|
|
for erjibiaoti in paper_content[yijibiaoti]: |
|
|
for erjibiaoti in paper_content[yijibiaoti]: |
|
|
num = random.randint(2, 6) |
|
|
num = random.randint(2, 6) |
|
|
content = paper_content[yijibiaoti][erjibiaoti] |
|
|
content = paper_content[yijibiaoti][erjibiaoti] |
|
|
res = request_selfmodel_api( |
|
|
# res = request_selfmodel_api( |
|
|
f'任务:生成段落主要内容\n请对以下内容进行提取信息,只需要提取{str(num)}条主要内容,使用条数罗列下面这段话的主要信息,例如1. xxx\n2.xxx \n' + content)[ |
|
|
# f'任务:生成段落主要内容\n请对以下内容进行提取信息,只需要提取{str(num)}条主要内容,使用条数罗列下面这段话的主要信息,例如1. xxx\n2.xxx \n' + content)[ |
|
|
'choices'][0]['message']['content'] |
|
|
# 'choices'][0]['message']['content'] |
|
|
tiaoshu_list = str(res).split("\n") |
|
|
|
|
|
|
|
|
zhanweifu.append(f'任务:生成段落主要内容\n请对以下内容进行提取信息,只需要提取{str(num)}条主要内容,使用条数罗列下面这段话的主要信息,例如1. xxx\n2.xxx \n' + content) |
|
|
|
|
|
|
|
|
tiaoshu_list_new = [] |
|
|
|
|
|
for dantiao in tiaoshu_list: |
|
|
|
|
|
tiaoshu_list_new.append(re.findall(pantten_content_tiaoshu, dantiao)[0].strip()) |
|
|
|
|
|
content_2.append({ |
|
|
content_2.append({ |
|
|
"title_small": re.findall(pantten_erjibiaoti_content, erjibiaoti)[0], |
|
|
"title_small": re.findall(pantten_erjibiaoti_content, erjibiaoti)[0], |
|
|
"content_3": tiaoshu_list_new |
|
|
"content_3": index_zahnweifu |
|
|
}) |
|
|
}) |
|
|
|
|
|
index_zahnweifu += 1 |
|
|
content_1.append({ |
|
|
content_1.append({ |
|
|
"title_big": yijibiaoti_content, |
|
|
"title_big": yijibiaoti_content, |
|
|
"content_2": content_2 |
|
|
"content_2": content_2 |
|
|
}) |
|
|
}) |
|
|
|
|
|
|
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(100) as executor: |
|
|
|
|
|
results = executor.map(request_selfmodel_api, zhanweifu) |
|
|
|
|
|
|
|
|
|
|
|
zhanweifu = [] |
|
|
|
|
|
for result in results: |
|
|
|
|
|
res = result['choices'][0]['message']['content'] |
|
|
|
|
|
|
|
|
|
|
|
tiaoshu_list = str(res).split("\n") |
|
|
|
|
|
|
|
|
|
|
|
tiaoshu_list_new = [] |
|
|
|
|
|
for dantiao in tiaoshu_list: |
|
|
|
|
|
tiaoshu_list_new.append(re.findall(pantten_content_tiaoshu, dantiao)[0].strip()) |
|
|
|
|
|
zhanweifu.append(tiaoshu_list_new) |
|
|
|
|
|
|
|
|
|
|
|
content_1_new = [] |
|
|
|
|
|
|
|
|
|
|
|
for yijibiaoti_content in content_1: |
|
|
|
|
|
content_2_new = [] |
|
|
|
|
|
title_big = yijibiaoti_content["title_big"] |
|
|
|
|
|
for erjibiaoti_content in yijibiaoti_content["content_2"]: |
|
|
|
|
|
title_small = erjibiaoti_content["title_small"] |
|
|
|
|
|
content_3 = zhanweifu[erjibiaoti_content["content_3"]] |
|
|
|
|
|
content_2_new.append({ |
|
|
|
|
|
"title_small": title_small, |
|
|
|
|
|
"content_3": content_3 |
|
|
|
|
|
}) |
|
|
|
|
|
content_1_new.append({ |
|
|
|
|
|
"title_big": title_big, |
|
|
|
|
|
"content_2": content_2_new |
|
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data_new = { |
|
|
data_new = { |
|
|
"title": fileName, |
|
|
"title": file_name, |
|
|
"catalogue": catalogue, |
|
|
"catalogue": catalogue, |
|
|
"content_1": content_1 |
|
|
"content_1": content_1_new |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
# with open("data/ceshi.json", "w", encoding="utf-8") as f: |
|
|
# with open("data/ceshi.json", "w", encoding="utf-8") as f: |
|
@ -307,11 +346,10 @@ def upload_file(): |
|
|
file = request.files.get('file') |
|
|
file = request.files.get('file') |
|
|
|
|
|
|
|
|
if file and allowed_file(file.filename): |
|
|
if file and allowed_file(file.filename): |
|
|
filename = secure_filename(file.filename) |
|
|
path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename) |
|
|
path = os.path.join(app.config['UPLOAD_FOLDER'], filename) |
|
|
|
|
|
print(path) |
|
|
print(path) |
|
|
file.save(path) |
|
|
file.save(path) |
|
|
|
|
|
# file.save(file.filename) |
|
|
|
|
|
|
|
|
result = main(path) |
|
|
result = main(path) |
|
|
return_text = {"texts": result, "probabilities": None, "status_code": 200} |
|
|
return_text = {"texts": result, "probabilities": None, "status_code": 200} |
|
|