Browse Source

修复bug

master
majiahui@haimaqingfan.com 2 months ago
parent
commit
9938da6680
  1. 58
      main.py

58
main.py

@ -10,6 +10,8 @@ import requests
import time
from flask import Flask, jsonify
from flask import request
import pandas as pd
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
@ -18,7 +20,7 @@ model = SentenceTransformer('/home/majiahui/project/models-llm/bge-large-zh-v1.5
propmt_connect = '''我是一名中医,你是一个中医的医生的助理,我的患者有一个症状,症状如下:
{}
根据这些症状我通过查找资料{}
请根据上面的这些资料和方子根据患者的症状帮我开出正确的药方和治疗方案'''
请根据上面的这些资料和方子并根据每篇文章的转发数确定文章的重要程度转发数越高的文章最终答案的参考度越高反之越低根据患者的症状和上面的文章的资料的重要程度以及文章和症状的匹配程度帮我开出正确的药方和治疗方案'''
propmt_connect_ziliao = '''在“{}”资料中,有如下相关内容:
{}'''
@ -55,10 +57,10 @@ def shengcehng_array(data):
embs = model.encode(data, normalize_embeddings=True)
return embs
def Building_vector_database(type, name, data):
def Building_vector_database(type, name, df):
data_ndarray = np.empty((0, 1024))
for sen in data:
data_ndarray = np.concatenate((data_ndarray, shengcehng_array([sen])))
for sen in df:
data_ndarray = np.concatenate((data_ndarray, shengcehng_array([sen[0]])))
print("data_ndarray.shape", data_ndarray.shape)
print("data_ndarray.shape", data_ndarray.shape)
@ -68,21 +70,22 @@ def Building_vector_database(type, name, data):
def ulit_request_file(file, title):
file_name = file.filename
file_name_save = "data_file/{}.txt".format(title)
file_name_save = "data_file/{}.csv".format(title)
file.save(file_name_save)
try:
with open(file_name_save, encoding="gbk") as f:
content = f.read()
except:
with open(file_name_save, encoding="utf-8") as f:
content = f.read()
# try:
# with open(file_name_save, encoding="gbk") as f:
# content = f.read()
# except:
# with open(file_name_save, encoding="utf-8") as f:
# content = f.read()
# elif file_name.split(".")[-1] == "docx":
# content = docx2txt.process(file_name_save)
content_list = [i for i in content.split("\n")]
# content_list = [i for i in content.split("\n")]
df = pd.read_csv(file_name_save, sep="\t", encoding="utf-8").values.tolist()
return content_list
return df
def main(question, db_type, top):
@ -116,19 +119,20 @@ def main(question, db_type, top):
paper_list_str = ""
for i in db_type_list:
embs = shengcehng_array([question])
index = faiss.IndexFlatL2(d) # buid the index
index = faiss.IndexFlatIP(d) # buid the index
data_np = np.load(f"data_np/{i}.npy")
data_str = open(f"data_file/{i}.txt").read().split("\n")
# data_str = open(f"data_file/{i}.txt").read().split("\n")
data_str = pd.read_csv(f"data_file/{i}.csv", sep="\t", encoding="utf-8").values.tolist()
index.add(data_np)
D, I = index.search(embs, int(top))
print(I)
reference_list = []
for i in I[0]:
reference_list.append(data_str[i])
for i,j in zip(I[0], D[0]):
reference_list.append([data_str[i], j])
for i,j in enumerate(reference_list):
paper_list_str += "{}\n{}\n".format(str(i+1), j)
paper_list_str += "{}\n{},此篇文章的转发数为{},评论数为{},点赞数为{}\n,此篇文章跟问题的相关度为{}%\n".format(str(i+1), j[0][0], j[0][1], j[0][2], j[0][3], j[1])
'''
构造prompt
@ -179,8 +183,22 @@ def upload_file():
print(request.remote_addr)
file = request.files.get('file')
title = request.form.get("title")
data = ulit_request_file(file, title)
Building_vector_database("1", title, data)
df = ulit_request_file(file, title)
Building_vector_database("1", title, df)
return_json = {
"code": 200,
"info": "上传完成"
}
return jsonify(return_json) # 返回结果
@app.route("/upload_file_check", methods=["POST"])
def upload_file_check():
print(request.remote_addr)
file = request.files.get('file')
title = request.form.get("title")
df = ulit_request_file(file, title)
Building_vector_database("1", title, df)
return_json = {
"code": 200,
"info": "上传完成"

Loading…
Cancel
Save