使用vllm部署
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

52 lines
1.5 KiB

from flask import Flask, jsonify
from flask import request
import redis
import uuid
import json
import time
import socket
def get_host_ip():
"""
查询本机ip地址
:return: ip
"""
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(('8.8.8.8', 80))
ip = s.getsockname()[0]
finally:
s.close()
return ip
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=11, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_query_articles_directory = 'query_articles_directory'
db_key_result = 'result'
batch_size = 32
@app.route("/predict", methods=["POST"])
def handle_query():
text = request.json["texts"] # 获取用户query中的文本 例如"I love you"
text = f"User:{text}\nAssistant:"
id_ = str(uuid.uuid1()) # 为query生成唯一标识
d = {'id': id_, 'text': text} # 绑定文本和query id
redis_.rpush(db_key_query, json.dumps(d)) # 加入redis
while True:
result = redis_.get(id_) # 获取该query的模型结果
if result is not None:
redis_.delete(id_)
result_text = {'code': "200", 'data': json.loads(result)}
break
time.sleep(1)
return jsonify(result_text) # 返回结果
if __name__ == "__main__":
app.run(debug=False, host='0.0.0.0', port=18001)