|
@ -211,6 +211,55 @@ def ulit_request_file(new_id, sentence, title): |
|
|
"content": f"{row['正文']}\n以上这条中可能包含了一些病情或者症状,请帮我归纳这条中所对应的病情或者症状是哪些,总结出来,不需要很长,简单归纳即可,直接输出症状或者病情,可以包含一些形容词来辅助描述,不需要有辅助词汇" |
|
|
"content": f"{row['正文']}\n以上这条中可能包含了一些病情或者症状,请帮我归纳这条中所对应的病情或者症状是哪些,总结出来,不需要很长,简单归纳即可,直接输出症状或者病情,可以包含一些形容词来辅助描述,不需要有辅助词汇" |
|
|
}], |
|
|
}], |
|
|
"top_p": 0.9, |
|
|
"top_p": 0.9, |
|
|
|
|
|
"temperature": 0.3 |
|
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
|
|
# 并发处理请求 |
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(200) as executor: |
|
|
|
|
|
results = list(executor.map(dialog_line_parse, data_list)) |
|
|
|
|
|
|
|
|
|
|
|
# 更新总结字段 |
|
|
|
|
|
for idx, result in zip(to_process.index, results): |
|
|
|
|
|
summary = result['choices'][0]['message']['content'] |
|
|
|
|
|
df.at[idx, "总结"] = summary |
|
|
|
|
|
|
|
|
|
|
|
# 保存更新后的CSV |
|
|
|
|
|
df.to_csv(file_name_res_save, sep="\t", index=False) |
|
|
|
|
|
return df |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def ulit_request_file_zongjie(new_id, sentence, zongjie, title): |
|
|
|
|
|
file_name_res_save = f"data_file_res/{title}.csv" |
|
|
|
|
|
|
|
|
|
|
|
# 初始化或读取CSV文件 |
|
|
|
|
|
if os.path.exists(file_name_res_save): |
|
|
|
|
|
df = pd.read_csv(file_name_res_save, sep="\t") |
|
|
|
|
|
else: |
|
|
|
|
|
df = pd.DataFrame(columns=["ID", "正文", "总结", "有效", "已向量化"]) |
|
|
|
|
|
|
|
|
|
|
|
# # 添加新数据(生成唯一ID) |
|
|
|
|
|
# new_row = { |
|
|
|
|
|
# "ID": str(new_id), |
|
|
|
|
|
# "正文": sentence, |
|
|
|
|
|
# "总结": zongjie, |
|
|
|
|
|
# "有效": True, |
|
|
|
|
|
# "已向量化": False |
|
|
|
|
|
# } |
|
|
|
|
|
# df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) |
|
|
|
|
|
|
|
|
|
|
|
# 筛选需要处理的记录 |
|
|
|
|
|
to_process = df[df["有效"] == True] |
|
|
|
|
|
|
|
|
|
|
|
# 调用API生成总结(示例保留原有逻辑) |
|
|
|
|
|
data_list = [] |
|
|
|
|
|
for _, row in to_process.iterrows(): |
|
|
|
|
|
data_list.append({ |
|
|
|
|
|
"model": "gpt-4-turbo", |
|
|
|
|
|
"messages": [{ |
|
|
|
|
|
"role": "user", |
|
|
|
|
|
"content": f"{row['正文']}\n以上这条中可能包含了一些病情或者症状,请帮我归纳这条中所对应的病情或者症状是哪些,总结出来,不需要很长,简单归纳即可,直接输出症状或者病情,可以包含一些形容词来辅助描述,不需要有辅助词汇" |
|
|
|
|
|
}], |
|
|
|
|
|
"top_p": 0.9, |
|
|
"temperature": 0.6 |
|
|
"temperature": 0.6 |
|
|
}) |
|
|
}) |
|
|
|
|
|
|
|
@ -227,6 +276,7 @@ def ulit_request_file(new_id, sentence, title): |
|
|
df.to_csv(file_name_res_save, sep="\t", index=False) |
|
|
df.to_csv(file_name_res_save, sep="\t", index=False) |
|
|
return df |
|
|
return df |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(question, title, top): |
|
|
def main(question, title, top): |
|
|
db_dict = { |
|
|
db_dict = { |
|
|
"1": "yetianshi" |
|
|
"1": "yetianshi" |
|
@ -264,7 +314,7 @@ def main(question, title, top): |
|
|
vector_path = f"data_np/{title_dan}.npy" |
|
|
vector_path = f"data_np/{title_dan}.npy" |
|
|
vectors = np.load(vector_path) |
|
|
vectors = np.load(vector_path) |
|
|
|
|
|
|
|
|
data_str = pd.read_csv(f"data_file/{title_dan}.csv", sep="\t", encoding="utf-8").values.tolist() |
|
|
data_str = pd.read_csv(f"data_file_res/{title_dan}.csv", sep="\t", encoding="utf-8").values.tolist() |
|
|
index.add(vectors) |
|
|
index.add(vectors) |
|
|
D, I = index.search(embs, int(top)) |
|
|
D, I = index.search(embs, int(top)) |
|
|
print(I) |
|
|
print(I) |
|
@ -274,7 +324,7 @@ def main(question, title, top): |
|
|
reference_list.append([data_str[i], j]) |
|
|
reference_list.append([data_str[i], j]) |
|
|
|
|
|
|
|
|
for i,j in enumerate(reference_list): |
|
|
for i,j in enumerate(reference_list): |
|
|
paper_list_str += "第{}篇\n{},此篇文章跟问题的相关度为{}%\n".format(str(i+1), j[0][0], j[1]) |
|
|
paper_list_str += "第{}篇\n{},此篇文章跟问题的相关度为{}%\n".format(str(i+1), j[0][1], j[1]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
''' |
|
|
''' |
|
@ -334,6 +384,7 @@ def upload_file_check(): |
|
|
sentence = request.form.get('sentence') |
|
|
sentence = request.form.get('sentence') |
|
|
title = request.form.get("title") |
|
|
title = request.form.get("title") |
|
|
new_id = request.form.get("id") |
|
|
new_id = request.form.get("id") |
|
|
|
|
|
zongjie = request.form.get("zongjie") |
|
|
state = request.form.get("state") |
|
|
state = request.form.get("state") |
|
|
''' |
|
|
''' |
|
|
{ |
|
|
{ |
|
@ -343,14 +394,29 @@ def upload_file_check(): |
|
|
"4": "pdf" |
|
|
"4": "pdf" |
|
|
} |
|
|
} |
|
|
''' |
|
|
''' |
|
|
|
|
|
# 增 |
|
|
state_res = "" |
|
|
state_res = "" |
|
|
if state == "1": |
|
|
if state == "1": |
|
|
df = ulit_request_file(new_id, sentence, title) |
|
|
df = ulit_request_file(new_id, sentence, title) |
|
|
Building_vector_database(title, df) |
|
|
Building_vector_database(title, df) |
|
|
state_res = "上传完成" |
|
|
state_res = "上传完成" |
|
|
|
|
|
|
|
|
|
|
|
# 删 |
|
|
elif state == "2": |
|
|
elif state == "2": |
|
|
delete_data(title, new_id) |
|
|
delete_data(title, new_id) |
|
|
state_res = "删除完成" |
|
|
state_res = "删除完成" |
|
|
|
|
|
|
|
|
|
|
|
# 改 |
|
|
|
|
|
elif state == "3": |
|
|
|
|
|
df = ulit_request_file(new_id, sentence, title) |
|
|
|
|
|
Building_vector_database(title, df) |
|
|
|
|
|
state_res = "修改完成" |
|
|
|
|
|
|
|
|
|
|
|
# 查 |
|
|
|
|
|
elif state == "4": |
|
|
|
|
|
df = ulit_request_file(new_id, sentence, title) |
|
|
|
|
|
state_res = "" |
|
|
|
|
|
|
|
|
return_json = { |
|
|
return_json = { |
|
|
"code": 200, |
|
|
"code": 200, |
|
|
"info": state_res |
|
|
"info": state_res |
|
|