langchain_self/main.py

# 这是一个示例 Python 脚本。

# 按 Shift+F10 执行或将其替换为您的代码。
# 按 双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。
import os
import faiss
import numpy as np
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
import requests
import time
from flask import Flask, jsonify, Response, request
from openai import OpenAI
from flask_cors import CORS
import pandas as pd
import concurrent.futures
import json
from threading import Thread
import redis

app = Flask(__name__)
CORS(app)
app.config["JSON_AS_ASCII"] = False

pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=1, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)

db_key_query = 'query'
db_key_querying = 'querying'
batch_size = 32

# openai_api_key = "token-abc123"
# openai_api_base = "http://127.0.0.1:12011/v1"
#
# client = OpenAI(
#     api_key=openai_api_key,
#     base_url=openai_api_base,
# )

# models = client.models.list()
# model = models.data[0].id
model = "1"
model_encode = SentenceTransformer('/home/majiahui/project/models-llm/bge-large-zh-v1.5')
propmt_connect = '''我是一名中医，你是一个中医的医生的助理，我的患者有一个症状，症状如下:
{}
根据这些症状，我通过查找资料，{}
请根据上面的这些资料和方子，并根据每篇文章的转发数确定文章的重要程度，转发数越高的文章，最终答案的参考度越高，反之越低。根据患者的症状和上面的文章的资料的重要程度以及文章和症状的匹配程度，帮我开出正确的药方和治疗方案'''

propmt_connect_ziliao = '''在“{}”资料中，有如下相关内容：
{}'''


def dialog_line_parse(text):
    """
    将数据输入模型进行分析并输出结果
    :param url: 模型url
    :param text: 进入模型的数据
    :return: 模型返回结果
    """

    url_predict = "http://118.178.228.101:12004/predict"
    response = requests.post(
        url_predict,
        json=text,
        timeout=100000
    )
    if response.status_code == 200:
        return response.json()
    else:
        # logger.error(
        #     "【{}】 Failed to get a proper response from remote "
        #     "server. Status Code: {}. Response: {}"
        #     "".format(url, response.status_code, response.text)
        # )
        print("【{}】 Failed to get a proper response from remote "
              "server. Status Code: {}. Response: {}"
              "".format(url_predict, response.status_code, response.text))
        return {}

    # ['choices'][0]['message']['content']
    #
    # text = text['messages'][0]['content']
    # return_text = {
    #     'code': 200,
    #     'id': "1",
    #     'object': 0,
    #     'created': 0,
    #     'model': 0,
    #     'choices': [
    #         {
    #             'index': 0,
    #             'message': {
    #                 'role': 'assistant',
    #                 'content': text
    #             },
    #             'logprobs': None,
    #             'finish_reason': 'stop'
    #         }
    #     ],
    #     'usage': 0,
    #     'system_fingerprint': 0
    # }
    # return return_text


def shengcehng_array(data):
    embs = model_encode.encode(data, normalize_embeddings=True)
    return embs


def Building_vector_database(title, df):
    # 加载需要处理的数据（有效且未向量化）
    to_process = df[(df["有效"] == True) & (df["已向量化"] == False)]

    if len(to_process) == 0:
        print("无新增数据需要向量化")
        return

    # 生成向量数组
    new_vectors = shengcehng_array(to_process["总结"].tolist())  # 假设这是你的向量生成函数

    # 加载现有向量库和索引
    vector_path = f"data_np/{title}.npy"
    index_path = f"data_np/{title}_index.json"

    vectors = np.load(vector_path) if os.path.exists(vector_path) else np.empty((0, 1024))
    index_data = {}
    if os.path.exists(index_path):
        with open(index_path, "r") as f:
            index_data = json.load(f)

    # 更新索引和向量库
    start_idx = len(vectors)
    vectors = np.vstack([vectors, new_vectors])

    for i, (_, row) in enumerate(to_process.iterrows()):
        index_data[row["ID"]] = {
            "row": start_idx + i,
            "valid": True
        }

    # 保存数据
    np.save(vector_path, vectors)
    with open(index_path, "w") as f:
        json.dump(index_data, f)

    # 标记已向量化
    df.loc[to_process.index, "已向量化"] = True
    df.to_csv(f"data_file_res/{title}.csv", sep="\t", index=False)


def delete_data(title, data_id):
    # 更新CSV标记
    csv_path = f"data_file_res/{title}.csv"
    df = pd.read_csv(csv_path, sep="\t", dtype={"ID": str})
    df.loc[df["ID"] == data_id, "有效"] = False
    df.to_csv(csv_path, sep="\t", index=False)

    # 更新索引标记
    index_path = f"data_np/{title}_index.json"
    if os.path.exists(index_path):
        with open(index_path, "r+") as f:
            index_data = json.load(f)
            if data_id in index_data:
                index_data[data_id]["valid"] = False
                f.seek(0)
                json.dump(index_data, f)
                f.truncate()


def check_file_exists(file_path):
    """
    检查文件是否存在

    参数:
        file_path (str): 要检查的文件路径

    返回:
        bool: 文件存在返回True，否则返回False
    """
    return os.path.isfile(file_path)


def ulit_request_file(new_id, sentence, title):
    file_name_res_save = f"data_file_res/{title}.csv"

    # 初始化或读取CSV文件
    if os.path.exists(file_name_res_save):
        df = pd.read_csv(file_name_res_save, sep="\t")
        # 检查是否已存在相同正文
        if sentence in df["正文"].values:
            print("正文已存在，跳过处理")
            return df
    else:
        df = pd.DataFrame(columns=["ID", "正文", "总结", "有效", "已向量化"])

    # 添加新数据（生成唯一ID）
    new_row = {
        "ID": str(new_id),
        "正文": sentence,
        "总结": None,
        "有效": True,
        "已向量化": False
    }
    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)

    # 筛选需要处理的记录
    to_process = df[(df["总结"].isna()) & (df["有效"] == True)]

    # 调用API生成总结（示例保留原有逻辑）
    data_list = []
    for _, row in to_process.iterrows():
        data_list.append({
            "model": "gpt-4-turbo",
            "messages": [{
                "role": "user",
                "content": f"{row['正文']}\n以上这条中可能包含了一些病情或者症状，请帮我归纳这条中所对应的病情或者症状是哪些，总结出来，不需要很长，简单归纳即可，直接输出症状或者病情，可以包含一些形容词来辅助描述，不需要有辅助词汇"
            }],
            "top_p": 0.9,
            "temperature": 0.6
        })

    # 并发处理请求
    with concurrent.futures.ThreadPoolExecutor(200) as executor:
        results = list(executor.map(dialog_line_parse, data_list))

    # 更新总结字段
    for idx, result in zip(to_process.index, results):
        summary = result['choices'][0]['message']['content']
        df.at[idx, "总结"] = summary

    # 保存更新后的CSV
    df.to_csv(file_name_res_save, sep="\t", index=False)
    return df

def main(question, title, top):
    db_dict = {
        "1": "yetianshi"
    }
    '''
    定义文件路径
    '''

    '''
    加载文件
    '''

    '''
    文本分割
    '''

    '''
    构建向量数据库
        1. 正常匹配
        2. 把文本使用大模型生成一个问题之后再进行匹配      
    '''

    '''
    根据提问匹配上下文
    '''
    d = 1024
    db_type_list = title.split(",")

    paper_list_str = ""
    for title_dan in db_type_list:
        embs = shengcehng_array([question])
        index = faiss.IndexFlatIP(d)  # buid the index

        # 查找向量
        vector_path = f"data_np/{title_dan}.npy"
        index_path = f"data_np/{title_dan}_index.json"

        if not os.path.exists(vector_path) or not os.path.exists(index_path):
            return np.empty((0, 1024))

        vectors = np.load(vector_path)
        with open(index_path, "r") as f:
            index_data = json.load(f)

        data_str = pd.read_csv(f"data_file_res/{title_dan}.csv", sep="\t", encoding="utf-8").values.tolist()
        index.add(vectors)
        D, I = index.search(embs, int(top))
        print(I)

        reference_list = []
        for i,j in zip(I[0], D[0]):
            if data_str[i][3] == True:
                reference_list.append([data_str[i], j])

        for i,j in enumerate(reference_list):
            paper_list_str += "第{}篇\n{}，此篇文章跟问题的相关度为{}%\n".format(str(i+1), j[0][1], j[1])


    '''
    构造prompt
    '''
    print("paper_list_str", paper_list_str)
    propmt_connect_ziliao_input = []
    for i in db_type_list:
        propmt_connect_ziliao_input.append(propmt_connect_ziliao.format(i, paper_list_str))

    propmt_connect_ziliao_input_str = "，".join(propmt_connect_ziliao_input)
    propmt_connect_input = propmt_connect.format(question, propmt_connect_ziliao_input_str)
    '''
    生成回答
    '''
    return model_generate_stream(propmt_connect_input)

def classify():  # 调用模型，设置最大batch_size
    while True:
        if redis_.llen(db_key_query) == 0:  # 若队列中没有元素就继续获取
            time.sleep(3)
            continue
        query = redis_.lpop(db_key_query).decode('UTF-8')  # 获取query的text
        data_dict = json.loads(query)
        if data_dict["state"] == "1":
            new_id = data_dict["id"]
            sentence = data_dict["sentence"]
            title = data_dict["title"]
            df = ulit_request_file(new_id, sentence, title)
            Building_vector_database(title, df)


def model_generate_stream(prompt):
    messages = [
        {"role": "user", "content": prompt}
    ]

    stream = client.chat.completions.create(model=model,
                                            messages=messages,
                                            stream=True)
    printed_reasoning_content = False
    printed_content = False

    for chunk in stream:
        reasoning_content = None
        content = None
        # Check the content is reasoning_content or content
        if hasattr(chunk.choices[0].delta, "reasoning_content"):
            reasoning_content = chunk.choices[0].delta.reasoning_content
        elif hasattr(chunk.choices[0].delta, "content"):
            content = chunk.choices[0].delta.content

        if reasoning_content is not None:
            if not printed_reasoning_content:
                printed_reasoning_content = True
                print("reasoning_content:", end="", flush=True)
            print(reasoning_content, end="", flush=True)
        elif content is not None:
            if not printed_content:
                printed_content = True
                print("\ncontent:", end="", flush=True)
            # Extract and print the content
            # print(content, end="", flush=True)
            print(content)
            yield content


@app.route("/upload_file_check", methods=["POST"])
def upload_file_check():
    print(request.remote_addr)
    sentence = request.json['sentence']
    title = request.json["title"]
    new_id = request.json["id"]
    state = request.json["state"]
    '''
        {
            "1": "csv",
            "2": "xlsx",
            "3": "txt",
            "4": "pdf"
        }
    '''
    state_res = ""
    if state == "1":
        # df = ulit_request_file(new_id, sentence, title)
        # Building_vector_database(title, df)
        redis_.rpush(db_key_query, json.dumps({
            "id": new_id,
            "sentence": sentence,
            "state": state,
            "title": title
        }))  # 加入redis
        state_res = "上传完成"
    elif state == "2":
        delete_data(title, new_id)
        state_res = "删除完成"
    return_json = {
        "code": 200,
        "info": state_res
    }
    return jsonify(return_json)  # 返回结果


@app.route("/search", methods=["POST"])
def search():
    print(request.remote_addr)
    texts = request.json["texts"]
    title = request.json["title"]
    top = request.json["top"]
    response = main(texts, title, top)
    return Response(response, mimetype='text/plain; charset=utf-8')  # 返回结果

t = Thread(target=classify)
t.start()

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=27000, threaded=True, debug=False)
首次提交 10 months ago			`# 这是一个示例 Python 脚本。`

			`# 按 Shift+F10 执行或将其替换为您的代码。`
			`# 按双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。`
单条数据上传 9 months ago			`import os`
首次提交 10 months ago			`import faiss`
			`import numpy as np`
			`from tqdm import tqdm`
			`from sentence_transformers import SentenceTransformer`
			`import requests`
			`import time`
单条数据上传 9 months ago			`from flask import Flask, jsonify, Response, request`
			`from openai import OpenAI`
			`from flask_cors import CORS`
修复bug 9 months ago			`import pandas as pd`
单条数据上传 9 months ago			`import concurrent.futures`
			`import json`
修复批量上传出错问题，并发承载力更强 9 months ago			`from threading import Thread`
			`import redis`
首次提交 10 months ago
			`app = Flask(__name__)`
单条数据上传 9 months ago			`CORS(app)`
首次提交 10 months ago			`app.config["JSON_AS_ASCII"] = False`

修复批量上传出错问题，并发承载力更强 9 months ago			`pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=1, password="zhicheng123*")`
			`redis_ = redis.Redis(connection_pool=pool, decode_responses=True)`

			`db_key_query = 'query'`
			`db_key_querying = 'querying'`
			`batch_size = 32`
单条数据上传 9 months ago
修复批量上传出错问题，并发承载力更强 9 months ago			`# openai_api_key = "token-abc123"`
			`# openai_api_base = "http://127.0.0.1:12011/v1"`
			`#`
			`# client = OpenAI(`
			`# api_key=openai_api_key,`
			`# base_url=openai_api_base,`
			`# )`
单条数据上传 9 months ago
修复批量上传出错问题，并发承载力更强 9 months ago			`# models = client.models.list()`
单条数据上传 9 months ago			`# model = models.data[0].id`
			`model = "1"`
			`model_encode = SentenceTransformer('/home/majiahui/project/models-llm/bge-large-zh-v1.5')`
首次提交 10 months ago			`propmt_connect = '''我是一名中医，你是一个中医的医生的助理，我的患者有一个症状，症状如下:`
			`{}`
			`根据这些症状，我通过查找资料，{}`
修复bug 9 months ago			`请根据上面的这些资料和方子，并根据每篇文章的转发数确定文章的重要程度，转发数越高的文章，最终答案的参考度越高，反之越低。根据患者的症状和上面的文章的资料的重要程度以及文章和症状的匹配程度，帮我开出正确的药方和治疗方案'''`
首次提交 10 months ago
			`propmt_connect_ziliao = '''在“{}”资料中，有如下相关内容：`
			`{}'''`


单条数据上传 9 months ago			`def dialog_line_parse(text):`
首次提交 10 months ago			`"""`
			`将数据输入模型进行分析并输出结果`
			`:param url: 模型url`
			`:param text: 进入模型的数据`
			`:return: 模型返回结果`
			`"""`

单条数据上传 9 months ago			`url_predict = "http://118.178.228.101:12004/predict"`
首次提交 10 months ago			`response = requests.post(`
单条数据上传 9 months ago			`url_predict,`
首次提交 10 months ago			`json=text,`
			`timeout=100000`
			`)`
			`if response.status_code == 200:`
			`return response.json()`
			`else:`
			`# logger.error(`
			`# "【{}】 Failed to get a proper response from remote "`
			`# "server. Status Code: {}. Response: {}"`
			`# "".format(url, response.status_code, response.text)`
			`# )`
			`print("【{}】 Failed to get a proper response from remote "`
			`"server. Status Code: {}. Response: {}"`
单条数据上传 9 months ago			`"".format(url_predict, response.status_code, response.text))`
首次提交 10 months ago			`return {}`

单条数据上传 9 months ago			`# ['choices'][0]['message']['content']`
			`#`
			`# text = text['messages'][0]['content']`
			`# return_text = {`
			`# 'code': 200,`
			`# 'id': "1",`
			`# 'object': 0,`
			`# 'created': 0,`
			`# 'model': 0,`
			`# 'choices': [`
			`# {`
			`# 'index': 0,`
			`# 'message': {`
			`# 'role': 'assistant',`
			`# 'content': text`
			`# },`
			`# 'logprobs': None,`
			`# 'finish_reason': 'stop'`
			`# }`
			`# ],`
			`# 'usage': 0,`
			`# 'system_fingerprint': 0`
			`# }`
			`# return return_text`

首次提交 10 months ago
			`def shengcehng_array(data):`
单条数据上传 9 months ago			`embs = model_encode.encode(data, normalize_embeddings=True)`
首次提交 10 months ago			`return embs`



单条数据上传 9 months ago			`def Building_vector_database(title, df):`
			`# 加载需要处理的数据（有效且未向量化）`
			`to_process = df[(df["有效"] == True) & (df["已向量化"] == False)]`
首次提交 10 months ago
单条数据上传 9 months ago			`if len(to_process) == 0:`
			`print("无新增数据需要向量化")`
			`return`
首次提交 10 months ago
单条数据上传 9 months ago			`# 生成向量数组`
			`new_vectors = shengcehng_array(to_process["总结"].tolist()) # 假设这是你的向量生成函数`
首次提交 10 months ago
单条数据上传 9 months ago			`# 加载现有向量库和索引`
			`vector_path = f"data_np/{title}.npy"`
			`index_path = f"data_np/{title}_index.json"`
首次提交 10 months ago
单条数据上传 9 months ago			`vectors = np.load(vector_path) if os.path.exists(vector_path) else np.empty((0, 1024))`
			`index_data = {}`
			`if os.path.exists(index_path):`
			`with open(index_path, "r") as f:`
			`index_data = json.load(f)`
首次提交 10 months ago
单条数据上传 9 months ago			`# 更新索引和向量库`
			`start_idx = len(vectors)`
			`vectors = np.vstack([vectors, new_vectors])`

			`for i, (_, row) in enumerate(to_process.iterrows()):`
			`index_data[row["ID"]] = {`
			`"row": start_idx + i,`
			`"valid": True`
			`}`

			`# 保存数据`
			`np.save(vector_path, vectors)`
			`with open(index_path, "w") as f:`
			`json.dump(index_data, f)`
首次提交 10 months ago
单条数据上传 9 months ago			`# 标记已向量化`
			`df.loc[to_process.index, "已向量化"] = True`
			`df.to_csv(f"data_file_res/{title}.csv", sep="\t", index=False)`


			`def delete_data(title, data_id):`
			`# 更新CSV标记`
			`csv_path = f"data_file_res/{title}.csv"`
			`df = pd.read_csv(csv_path, sep="\t", dtype={"ID": str})`
			`df.loc[df["ID"] == data_id, "有效"] = False`
			`df.to_csv(csv_path, sep="\t", index=False)`

			`# 更新索引标记`
			`index_path = f"data_np/{title}_index.json"`
			`if os.path.exists(index_path):`
			`with open(index_path, "r+") as f:`
			`index_data = json.load(f)`
			`if data_id in index_data:`
			`index_data[data_id]["valid"] = False`
			`f.seek(0)`
			`json.dump(index_data, f)`
			`f.truncate()`


			`def check_file_exists(file_path):`
			`"""`
			`检查文件是否存在`
首次提交 10 months ago
单条数据上传 9 months ago			`参数:`
			`file_path (str): 要检查的文件路径`

			`返回:`
			`bool: 文件存在返回True，否则返回False`
			`"""`
			`return os.path.isfile(file_path)`


			`def ulit_request_file(new_id, sentence, title):`
			`file_name_res_save = f"data_file_res/{title}.csv"`

			`# 初始化或读取CSV文件`
			`if os.path.exists(file_name_res_save):`
			`df = pd.read_csv(file_name_res_save, sep="\t")`
			`# 检查是否已存在相同正文`
			`if sentence in df["正文"].values:`
			`print("正文已存在，跳过处理")`
			`return df`
			`else:`
			`df = pd.DataFrame(columns=["ID", "正文", "总结", "有效", "已向量化"])`

			`# 添加新数据（生成唯一ID）`
			`new_row = {`
			`"ID": str(new_id),`
			`"正文": sentence,`
			`"总结": None,`
			`"有效": True,`
			`"已向量化": False`
			`}`
			`df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)`

			`# 筛选需要处理的记录`
			`to_process = df[(df["总结"].isna()) & (df["有效"] == True)]`

			`# 调用API生成总结（示例保留原有逻辑）`
			`data_list = []`
			`for _, row in to_process.iterrows():`
			`data_list.append({`
			`"model": "gpt-4-turbo",`
			`"messages": [{`
			`"role": "user",`
			`"content": f"{row['正文']}\n以上这条中可能包含了一些病情或者症状，请帮我归纳这条中所对应的病情或者症状是哪些，总结出来，不需要很长，简单归纳即可，直接输出症状或者病情，可以包含一些形容词来辅助描述，不需要有辅助词汇"`
			`}],`
			`"top_p": 0.9,`
			`"temperature": 0.6`
			`})`

			`# 并发处理请求`
			`with concurrent.futures.ThreadPoolExecutor(200) as executor:`
			`results = list(executor.map(dialog_line_parse, data_list))`

			`# 更新总结字段`
			`for idx, result in zip(to_process.index, results):`
			`summary = result['choices'][0]['message']['content']`
			`df.at[idx, "总结"] = summary`

			`# 保存更新后的CSV`
			`df.to_csv(file_name_res_save, sep="\t", index=False)`
			`return df`

			`def main(question, title, top):`
首次提交 10 months ago			`db_dict = {`
			`"1": "yetianshi"`
			`}`
			`'''`
			`定义文件路径`
			`'''`

			`'''`
			`加载文件`
			`'''`

			`'''`
			`文本分割`
			`'''`

			`'''`
			`构建向量数据库`
			`1. 正常匹配`
			`2. 把文本使用大模型生成一个问题之后再进行匹配`
			`'''`

			`'''`
			`根据提问匹配上下文`
			`'''`
			`d = 1024`
单条数据上传 9 months ago			`db_type_list = title.split(",")`
首次提交 10 months ago
			`paper_list_str = ""`
单条数据上传 9 months ago			`for title_dan in db_type_list:`
首次提交 10 months ago			`embs = shengcehng_array([question])`
修复bug 9 months ago			`index = faiss.IndexFlatIP(d) # buid the index`
单条数据上传 9 months ago
			`# 查找向量`
			`vector_path = f"data_np/{title_dan}.npy"`
			`index_path = f"data_np/{title_dan}_index.json"`

			`if not os.path.exists(vector_path) or not os.path.exists(index_path):`
			`return np.empty((0, 1024))`

			`vectors = np.load(vector_path)`
			`with open(index_path, "r") as f:`
			`index_data = json.load(f)`

			`data_str = pd.read_csv(f"data_file_res/{title_dan}.csv", sep="\t", encoding="utf-8").values.tolist()`
			`index.add(vectors)`
首次提交 10 months ago			`D, I = index.search(embs, int(top))`
			`print(I)`

			`reference_list = []`
修复bug 9 months ago			`for i,j in zip(I[0], D[0]):`
单条数据上传 9 months ago			`if data_str[i][3] == True:`
			`reference_list.append([data_str[i], j])`
首次提交 10 months ago
			`for i,j in enumerate(reference_list):`
单条数据上传 9 months ago			`paper_list_str += "第{}篇\n{}，此篇文章跟问题的相关度为{}%\n".format(str(i+1), j[0][1], j[1])`

首次提交 10 months ago
			`'''`
			`构造prompt`
			`'''`
			`print("paper_list_str", paper_list_str)`
			`propmt_connect_ziliao_input = []`
			`for i in db_type_list:`
			`propmt_connect_ziliao_input.append(propmt_connect_ziliao.format(i, paper_list_str))`

			`propmt_connect_ziliao_input_str = "，".join(propmt_connect_ziliao_input)`
			`propmt_connect_input = propmt_connect.format(question, propmt_connect_ziliao_input_str)`
			`'''`
			`生成回答`
			`'''`
单条数据上传 9 months ago			`return model_generate_stream(propmt_connect_input)`

修复批量上传出错问题，并发承载力更强 9 months ago			`def classify(): # 调用模型，设置最大batch_size`
			`while True:`
			`if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取`
			`time.sleep(3)`
			`continue`
			`query = redis_.lpop(db_key_query).decode('UTF-8') # 获取query的text`
			`data_dict = json.loads(query)`
			`if data_dict["state"] == "1":`
			`new_id = data_dict["id"]`
			`sentence = data_dict["sentence"]`
			`title = data_dict["title"]`
			`df = ulit_request_file(new_id, sentence, title)`
			`Building_vector_database(title, df)`

单条数据上传 9 months ago
			`def model_generate_stream(prompt):`
			`messages = [`
			`{"role": "user", "content": prompt}`
			`]`

			`stream = client.chat.completions.create(model=model,`
			`messages=messages,`
			`stream=True)`
			`printed_reasoning_content = False`
			`printed_content = False`

			`for chunk in stream:`
			`reasoning_content = None`
			`content = None`
			`# Check the content is reasoning_content or content`
			`if hasattr(chunk.choices[0].delta, "reasoning_content"):`
			`reasoning_content = chunk.choices[0].delta.reasoning_content`
			`elif hasattr(chunk.choices[0].delta, "content"):`
			`content = chunk.choices[0].delta.content`

			`if reasoning_content is not None:`
			`if not printed_reasoning_content:`
			`printed_reasoning_content = True`
			`print("reasoning_content:", end="", flush=True)`
			`print(reasoning_content, end="", flush=True)`
			`elif content is not None:`
			`if not printed_content:`
			`printed_content = True`
			`print("\ncontent:", end="", flush=True)`
			`# Extract and print the content`
			`# print(content, end="", flush=True)`
			`print(content)`
			`yield content`
修复bug 9 months ago

			`@app.route("/upload_file_check", methods=["POST"])`
			`def upload_file_check():`
			`print(request.remote_addr)`
修复批量上传出错问题，并发承载力更强 9 months ago			`sentence = request.json['sentence']`
			`title = request.json["title"]`
			`new_id = request.json["id"]`
			`state = request.json["state"]`
单条数据上传 9 months ago			`'''`
			`{`
			`"1": "csv",`
			`"2": "xlsx",`
			`"3": "txt",`
			`"4": "pdf"`
			`}`
			`'''`
			`state_res = ""`
			`if state == "1":`
修复批量上传出错问题，并发承载力更强 9 months ago			`# df = ulit_request_file(new_id, sentence, title)`
			`# Building_vector_database(title, df)`
			`redis_.rpush(db_key_query, json.dumps({`
			`"id": new_id,`
			`"sentence": sentence,`
			`"state": state,`
			`"title": title`
			`})) # 加入redis`
单条数据上传 9 months ago			`state_res = "上传完成"`
			`elif state == "2":`
			`delete_data(title, new_id)`
			`state_res = "删除完成"`
首次提交 10 months ago			`return_json = {`
			`"code": 200,`
单条数据上传 9 months ago			`"info": state_res`
首次提交 10 months ago			`}`
			`return jsonify(return_json) # 返回结果`


			`@app.route("/search", methods=["POST"])`
			`def search():`
			`print(request.remote_addr)`
			`texts = request.json["texts"]`
单条数据上传 9 months ago			`title = request.json["title"]`
首次提交 10 months ago			`top = request.json["top"]`
单条数据上传 9 months ago			`response = main(texts, title, top)`
			`return Response(response, mimetype='text/plain; charset=utf-8') # 返回结果`
首次提交 10 months ago
修复批量上传出错问题，并发承载力更强 9 months ago			`t = Thread(target=classify)`
			`t.start()`
首次提交 10 months ago
			`if __name__ == "__main__":`
			`app.run(host="0.0.0.0", port=27000, threaded=True, debug=False)`