使用vllm部署
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

136 lines
5.2 KiB

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from flask import Flask, jsonify
from flask import request
from transformers import pipeline
import redis
import uuid
import json
from threading import Thread
from vllm import LLM, SamplingParams
import time
import threading
import time
import concurrent.futures
import requests
import socket
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=4, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_queryset = 'queryset'
db_key_result = 'result'
db_key_error = 'error'
def smtp_f(name):
# 在下面的代码行中使用断点来调试脚本。
import smtplib
from email.mime.text import MIMEText
from email.header import Header
sender = '838878981@qq.com' # 发送邮箱
receivers = ['838878981@qq.com'] # 接收邮箱
auth_code = "jfqtutaiwrtdbcge" # 授权码
message = MIMEText('基础大模型出现错误,紧急', 'plain', 'utf-8')
message['From'] = Header("Sender<%s>" % sender) # 发送者
message['To'] = Header("Receiver<%s>" % receivers[0]) # 接收者
subject = name
message['Subject'] = Header(subject, 'utf-8')
try:
server = smtplib.SMTP_SSL('smtp.qq.com', 465)
server.login(sender, auth_code)
server.sendmail(sender, receivers, message.as_string())
print("邮件发送成功")
server.close()
except smtplib.SMTPException:
print("Error: 无法发送邮件")
@app.route("/predict", methods=["POST"])
def predict():
text = request.json["texts"] # 获取用户query中的文本 例如"I love you"
id_ = str(uuid.uuid1()) # 为query生成唯一标识
print("uuid: ", uuid)
d = {'id': id_, 'text': text} # 绑定文本和query id
try:
if text == None:
return_text = {"texts": "请求文本不能为空", "probabilities": None, "status_code": 401}
else:
load_request_path = './request_data_logs/{}.json'.format(id_)
with open(load_request_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(d, f2, ensure_ascii=False, indent=4)
redis_.rpush(db_key_query, json.dumps({"id": id_, "path": load_request_path})) # 加入redis
redis_.sadd(db_key_querying, id_)
redis_.sadd(db_key_queryset, id_)
return_text = {"texts": {'id': id_, }, "probabilities": None, "status_code": 200}
except:
return_text = {"texts": {'id': id_, }, "probabilities": None, "status_code": 400}
smtp_f("vllm-main")
return jsonify(return_text) # 返回结果
@app.route("/search", methods=["POST"])
def search():
id_ = request.json['id'] # 获取用户query中的文本 例如"I love you"
result = redis_.get(id_) # 获取该query的模型结果
try:
if result is not None:
result_path = result.decode('UTF-8')
with open(result_path, encoding='utf8') as f1:
# 加载文件的对象
result_dict = json.load(f1)
code = result_dict["status_code"]
texts = result_dict["texts"]
probabilities = result_dict["probabilities"]
if str(code) == 400:
redis_.rpush(db_key_error, json.dumps({"id": id_}))
return False
result_text = {'code': code, 'text': texts, 'probabilities': probabilities}
else:
querying_list = list(redis_.smembers(db_key_querying))
querying_set = set()
for i in querying_list:
querying_set.add(i.decode())
querying_bool = False
if id_ in querying_set:
querying_bool = True
query_list_json = redis_.lrange(db_key_query, 0, -1)
query_set_ids = set()
for i in query_list_json:
data_dict = json.loads(i)
query_id = data_dict['id']
query_set_ids.add(query_id)
query_bool = False
if id_ in query_set_ids:
query_bool = True
if querying_bool == True and query_bool == True:
result_text = {'code': "201", 'text': "", 'probabilities': None}
elif querying_bool == True and query_bool == False:
result_text = {'code': "202", 'text': "", 'probabilities': None}
else:
result_text = {'code': "203", 'text': "", 'probabilities': None}
load_request_path = './request_data_logs_203/{}.json'.format(id_)
with open(load_request_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(result_text, f2, ensure_ascii=False, indent=4)
except:
smtp_f("vllm-main")
result_text = {'code': "400", 'text': "", 'probabilities': None}
return jsonify(result_text) # 返回结果
if __name__ == "__main__":
app.run(debug=False, host='0.0.0.0', port=12000)