From 8d7708f7b045e9c8c06ed0e0c621e20e9e8dd4e4 Mon Sep 17 00:00:00 2001 From: "majiahui@haimaqingfan.com" Date: Fri, 22 Aug 2025 14:19:20 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=B5=8B=E8=AF=95scokt?= =?UTF-8?q?=E8=AF=B7=E6=B1=82=EF=BC=8C=E5=B9=B6=E6=9B=B4=E6=94=B9=E6=B5=81?= =?UTF-8?q?=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 0b30e43..af2b5c7 100644 --- a/main.py +++ b/main.py @@ -211,6 +211,55 @@ def ulit_request_file(new_id, sentence, title): "content": f"{row['正文']}\n以上这条中可能包含了一些病情或者症状,请帮我归纳这条中所对应的病情或者症状是哪些,总结出来,不需要很长,简单归纳即可,直接输出症状或者病情,可以包含一些形容词来辅助描述,不需要有辅助词汇" }], "top_p": 0.9, + "temperature": 0.3 + }) + + # 并发处理请求 + with concurrent.futures.ThreadPoolExecutor(200) as executor: + results = list(executor.map(dialog_line_parse, data_list)) + + # 更新总结字段 + for idx, result in zip(to_process.index, results): + summary = result['choices'][0]['message']['content'] + df.at[idx, "总结"] = summary + + # 保存更新后的CSV + df.to_csv(file_name_res_save, sep="\t", index=False) + return df + + +def ulit_request_file_zongjie(new_id, sentence, zongjie, title): + file_name_res_save = f"data_file_res/{title}.csv" + + # 初始化或读取CSV文件 + if os.path.exists(file_name_res_save): + df = pd.read_csv(file_name_res_save, sep="\t") + else: + df = pd.DataFrame(columns=["ID", "正文", "总结", "有效", "已向量化"]) + + # # 添加新数据(生成唯一ID) + # new_row = { + # "ID": str(new_id), + # "正文": sentence, + # "总结": zongjie, + # "有效": True, + # "已向量化": False + # } + # df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) + + # 筛选需要处理的记录 + to_process = df[df["有效"] == True] + + # 调用API生成总结(示例保留原有逻辑) + data_list = [] + for _, row in to_process.iterrows(): + data_list.append({ + "model": "gpt-4-turbo", + "messages": [{ + "role": "user", + "content": f"{row['正文']}\n以上这条中可能包含了一些病情或者症状,请帮我归纳这条中所对应的病情或者症状是哪些,总结出来,不需要很长,简单归纳即可,直接输出症状或者病情,可以包含一些形容词来辅助描述,不需要有辅助词汇" + }], + "top_p": 0.9, "temperature": 0.6 }) @@ -227,6 +276,7 @@ def ulit_request_file(new_id, sentence, title): df.to_csv(file_name_res_save, sep="\t", index=False) return df + def main(question, title, top): db_dict = { "1": "yetianshi" @@ -264,7 +314,7 @@ def main(question, title, top): vector_path = f"data_np/{title_dan}.npy" vectors = np.load(vector_path) - data_str = pd.read_csv(f"data_file/{title_dan}.csv", sep="\t", encoding="utf-8").values.tolist() + data_str = pd.read_csv(f"data_file_res/{title_dan}.csv", sep="\t", encoding="utf-8").values.tolist() index.add(vectors) D, I = index.search(embs, int(top)) print(I) @@ -274,7 +324,7 @@ def main(question, title, top): reference_list.append([data_str[i], j]) for i,j in enumerate(reference_list): - paper_list_str += "第{}篇\n{},此篇文章跟问题的相关度为{}%\n".format(str(i+1), j[0][0], j[1]) + paper_list_str += "第{}篇\n{},此篇文章跟问题的相关度为{}%\n".format(str(i+1), j[0][1], j[1]) ''' @@ -334,23 +384,39 @@ def upload_file_check(): sentence = request.form.get('sentence') title = request.form.get("title") new_id = request.form.get("id") + zongjie = request.form.get("zongjie") state = request.form.get("state") ''' { - "1": "csv", + "1": "csv", "2": "xlsx", "3": "txt", "4": "pdf" } ''' + # 增 state_res = "" if state == "1": df = ulit_request_file(new_id, sentence, title) Building_vector_database(title, df) state_res = "上传完成" + + # 删 elif state == "2": delete_data(title, new_id) state_res = "删除完成" + + # 改 + elif state == "3": + df = ulit_request_file(new_id, sentence, title) + Building_vector_database(title, df) + state_res = "修改完成" + + # 查 + elif state == "4": + df = ulit_request_file(new_id, sentence, title) + state_res = "" + return_json = { "code": 200, "info": state_res