import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" from flask import Flask, jsonify from flask import request from transformers import pipeline import redis import uuid import json from threading import Thread from vllm import LLM, SamplingParams import time import threading import time import concurrent.futures import requests import socket app = Flask(__name__) app.config["JSON_AS_ASCII"] = False pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=4, password="zhicheng123*") redis_ = redis.Redis(connection_pool=pool, decode_responses=True) db_key_query = 'query' db_key_query_articles_directory = 'query_articles_directory' db_key_result = 'result' @app.route("/predict", methods=["POST"]) def handle_query(): text = request.json["texts"] # 获取用户query中的文本 例如"I love you" id_ = str(uuid.uuid1()) # 为query生成唯一标识 d = {'id': id_, 'text': text} # 绑定文本和query id redis_.rpush(db_key_query, json.dumps(d)) # 加入redis time.sleep(1) while True: result = redis_.get(id_) # 获取该query的模型结果 if result is not None: redis_.delete(id_) result_text = {'code': "200", 'data': json.loads(result)} break time.sleep(1) return jsonify(result_text) # 返回结果 if __name__ == "__main__": app.run(debug=False, host='0.0.0.0', port=12001)