使用vllm部署
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

76 lines
1.8 KiB

import threading
import requests
import time
# 用于记录成功和失败请求的全局变量
success_count = 0
failure_count = 0
lock = threading.Lock()
def dialog_line_parse(url, text):
"""
将数据输入模型进行分析并输出结果
:param url: 模型url
:param text: 进入模型的数据
:return: 模型返回结果
"""
response = requests.post(
url,
json=text,
timeout=1000
)
if response.status_code == 200:
return response.json()
else:
# logger.error(
# "【{}】 Failed to get a proper response from remote "
# "server. Status Code: {}. Response: {}"
# "".format(url, response.status_code, response.text)
# )
print("{}】 Failed to get a proper response from remote "
"server. Status Code: {}. Response: {}"
"".format(url, response.status_code, response.text))
print(text)
return []
# 定义一个函数来执行 HTTP 请求
def make_request(url):
global success_count, failure_count
try:
a = dialog_line_parse(url, {"texts": "User:你好\nAssistant:"})['data']
print(a)
with lock:
success_count += 1
except:
with lock:
failure_count += 1
# 要并发请求的 URL 列表
urls = [
'http://192.168.31.74:18001/predict',
# 可以添加更多的 URL
] * 30
# 创建一个线程列表
threads = []
# 创建并启动线程
start= time.time()
for url in urls:
thread = threading.Thread(target=make_request, args=(url,))
thread.start()
threads.append(thread)
# 等待所有线程完成
for thread in threads:
thread.join()
end = time.time()
print(end-start)
print(f"Successful requests: {success_count}")
print(f"Failed requests: {failure_count}")