增加测试scokt请求，并更改流程

5 months ago · 8d7708f7b0
1 changed files with 69 additions and 3 deletions
--- a/main.py
+++ b/main.py
@ -211,6 +211,55 @@ def ulit_request_file(new_id, sentence, title):
                "content": f"{row['正文']}\n以上这条中可能包含了一些病情或者症状，请帮我归纳这条中所对应的病情或者症状是哪些，总结出来，不需要很长，简单归纳即可，直接输出症状或者病情，可以包含一些形容词来辅助描述，不需要有辅助词汇"
            }],
            "top_p": 0.9,
            "temperature": 0.3
        })
    # 并发处理请求
    with concurrent.futures.ThreadPoolExecutor(200) as executor:
        results = list(executor.map(dialog_line_parse, data_list))
    # 更新总结字段
    for idx, result in zip(to_process.index, results):
        summary = result['choices'][0]['message']['content']
        df.at[idx, "总结"] = summary
    # 保存更新后的CSV
    df.to_csv(file_name_res_save, sep="\t", index=False)
    return df
 def ulit_request_file_zongjie(new_id, sentence, zongjie, title):
    file_name_res_save = f"data_file_res/{title}.csv"
    # 初始化或读取CSV文件
    if os.path.exists(file_name_res_save):
        df = pd.read_csv(file_name_res_save, sep="\t")
    else:
        df = pd.DataFrame(columns=["ID", "正文", "总结", "有效", "已向量化"])
    # # 添加新数据（生成唯一ID）
    # new_row = {
    #     "ID": str(new_id),
    #     "正文": sentence,
    #     "总结": zongjie,
    #     "有效": True,
    #     "已向量化": False
    # }
    # df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    # 筛选需要处理的记录
    to_process = df[df["有效"] == True]
    # 调用API生成总结（示例保留原有逻辑）
    data_list = []
    for _, row in to_process.iterrows():
        data_list.append({
            "model": "gpt-4-turbo",
            "messages": [{
                "role": "user",
                "content": f"{row['正文']}\n以上这条中可能包含了一些病情或者症状，请帮我归纳这条中所对应的病情或者症状是哪些，总结出来，不需要很长，简单归纳即可，直接输出症状或者病情，可以包含一些形容词来辅助描述，不需要有辅助词汇"
            }],
            "top_p": 0.9,
            "temperature": 0.6
        })
@ -227,6 +276,7 @@ def ulit_request_file(new_id, sentence, title):
    df.to_csv(file_name_res_save, sep="\t", index=False)
    return df
 def main(question, title, top):
    db_dict = {
        "1": "yetianshi"
@ -264,7 +314,7 @@ def main(question, title, top):
        vector_path = f"data_np/{title_dan}.npy"
        vectors = np.load(vector_path)
-        data_str = pd.read_csv(f"data_file/{title_dan}.csv", sep="\t", encoding="utf-8").values.tolist()
+        data_str = pd.read_csv(f"data_file_res/{title_dan}.csv", sep="\t", encoding="utf-8").values.tolist()
        index.add(vectors)
        D, I = index.search(embs, int(top))
        print(I)
@ -274,7 +324,7 @@ def main(question, title, top):
            reference_list.append([data_str[i], j])
        for i,j in enumerate(reference_list):
-            paper_list_str += "第{}篇\n{}，此篇文章跟问题的相关度为{}%\n".format(str(i+1), j[0][0], j[1])
+            paper_list_str += "第{}篇\n{}，此篇文章跟问题的相关度为{}%\n".format(str(i+1), j[0][1], j[1])
    '''
@ -334,6 +384,7 @@ def upload_file_check():
    sentence = request.form.get('sentence')
    title = request.form.get("title")
    new_id = request.form.get("id")
    zongjie = request.form.get("zongjie")
    state = request.form.get("state")
    '''
        {
@ -343,14 +394,29 @@ def upload_file_check():
            "4": "pdf"
        }
    '''
    # 增
    state_res = ""
    if state == "1":
        df = ulit_request_file(new_id, sentence, title)
        Building_vector_database(title, df)
        state_res = "上传完成"
    # 删
    elif state == "2":
        delete_data(title, new_id)
        state_res = "删除完成"
    # 改
    elif state == "3":
        df = ulit_request_file(new_id, sentence, title)
        Building_vector_database(title, df)
        state_res = "修改完成"
    # 查
    elif state == "4":
        df = ulit_request_file(new_id, sentence, title)
        state_res = ""
    return_json = {
        "code": 200,
        "info": state_res