vllm/yace2.py


								import threading

								import requests

								import time


								# 用于记录成功和失败请求的全局变量

								success_count = 0

								failure_count = 0

								lock = threading.Lock()


								def dialog_line_parse(url, text):

								    """

								    将数据输入模型进行分析并输出结果

								    :param url: 模型url

								    :param text: 进入模型的数据

								    :return: 模型返回结果

								    """


								    response = requests.post(

								        url,

								        json=text,

								        timeout=1000

								    )

								    if response.status_code == 200:

								        return response.json()

								    else:

								        # logger.error(

								        #     "【{}】 Failed to get a proper response from remote "

								        #     "server. Status Code: {}. Response: {}"

								        #     "".format(url, response.status_code, response.text)

								        # )

								        print("【{}】 Failed to get a proper response from remote "

								            "server. Status Code: {}. Response: {}"

								            "".format(url, response.status_code, response.text))

								        print(text)

								        return []


								# 定义一个函数来执行 HTTP 请求

								def make_request(url):

								    global success_count, failure_count


								    try:

								        a = dialog_line_parse(url, {"texts": "User:你好\nAssistant:"})['data']

								        print(a)

								        with lock:

								            success_count += 1

								    except:

								        with lock:

								            failure_count += 1


								# 要并发请求的 URL 列表

								urls = [

								    'http://192.168.31.74:18001/predict',

								    # 可以添加更多的 URL

								] * 30


								# 创建一个线程列表

								threads = []


								# 创建并启动线程

								start= time.time()

								for url in urls:

								    thread = threading.Thread(target=make_request, args=(url,))

								    thread.start()

								    threads.append(thread)


								# 等待所有线程完成

								for thread in threads:

								    thread.join()

								end = time.time()

								print(end-start)

								print(f"Successful requests: {success_count}")

								print(f"Failed requests: {failure_count}")