Browse Source

新增模型

master
majiahui@haimaqingfan.com 3 months ago
parent
commit
706c67159c
  1. 106
      mistral_model_predict_vllm_1.py
  2. 106
      mistral_model_predict_vllm_4.py
  3. 8
      model_api.py
  4. 202
      openbuddy_llama3_1_model_predict_vllm_1.py
  5. 202
      openbuddy_llama3_1_model_predict_vllm_2.py
  6. 202
      openbuddy_llama3_1_model_predict_vllm_3.py
  7. 202
      openbuddy_qwen2_5_model_predict_vllm_1.py
  8. 202
      openbuddy_qwen2_5_model_predict_vllm_2.py
  9. 202
      openbuddy_qwen2_5_model_predict_vllm_3.py
  10. 205
      qwen2_5_Instruct_model_predict_vllm_1.py
  11. 205
      qwen2_5_Instruct_model_predict_vllm_2.py
  12. 205
      qwen2_5_Instruct_model_predict_vllm_3.py
  13. 2
      run_api_gunicorn.sh
  14. 1
      run_model_1.sh
  15. 1
      run_model_4.sh
  16. 1
      run_model_openbuddy_llama3_1_1.sh
  17. 1
      run_model_openbuddy_llama3_1_2.sh
  18. 1
      run_model_openbuddy_llama3_1_3.sh
  19. 1
      run_model_openbuddy_qwen_1.sh
  20. 1
      run_model_openbuddy_qwen_2.sh
  21. 1
      run_model_openbuddy_qwen_3.sh
  22. 1
      run_model_qwen_Instruct1.sh
  23. 1
      run_model_qwen_Instruct2.sh
  24. 1
      run_model_qwen_Instruct3.sh

106
mistral_model_predict_vllm_1.py

@ -0,0 +1,106 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from transformers import pipeline
import redis
import uuid
import json
from threading import Thread
from vllm import LLM, SamplingParams
import time
import threading
import time
import concurrent.futures
import requests
import socket
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=3, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_result = 'result'
batch_size = 32
# sampling_params = SamplingParams(temperature=0.95, top_p=0.7,presence_penalty=0.9,stop="</s>", max_tokens=4096)
sampling_params = SamplingParams(temperature=0.95, top_p=0.7,stop="</s>", presence_penalty=1.1, max_tokens=8192)
models_path = "/home/majiahui/project/LLaMA-Factory-main/lora_openbuddy_mistral_7b_v20_3-32k_paper_model_10"
llm = LLM(model=models_path, tokenizer_mode="slow", max_model_len=8192)
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def classify(batch_size): # 调用模型,设置最大batch_size
while True:
texts = []
query_ids = []
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(2)
continue
# for i in range(min(redis_.llen(db_key_query), batch_size)):
while True:
query = redis_.lpop(db_key_query) # 获取query的text
if query == None:
break
query = query.decode('UTF-8')
data_dict_path = json.loads(query)
path = data_dict_path['path']
with open(path, encoding='utf8') as f1:
# 加载文件的对象
data_dict = json.load(f1)
# query_ids.append(json.loads(query)['id'])
# texts.append(json.loads(query)['text']) # 拼接若干text 为batch
query_id = data_dict['id']
text = data_dict["text"]
query_ids.append(query_id)
texts.append(text)
if len(texts) == batch_size:
break
outputs = llm.generate(texts, sampling_params) # 调用模型
generated_text_list = [""] * len(texts)
print("outputs", len(outputs))
for i, output in enumerate(outputs):
index = output.request_id
generated_text = output.outputs[0].text
generated_text_list[int(index)] = generated_text
for (id_, output) in zip(query_ids, generated_text_list):
return_text = {"texts": output, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(id_)
with open(load_result_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(id_, load_result_path, 86400)
# redis_.set(id_, load_result_path, 30)
redis_.srem(db_key_querying, id_)
log.log('start at',
'query_id:{},load_result_path:{},return_text:{}'.format(
id_, load_result_path, return_text))
if __name__ == '__main__':
t = Thread(target=classify, args=(batch_size,))
t.start()

106
mistral_model_predict_vllm_4.py

@ -0,0 +1,106 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4"
from transformers import pipeline
import redis
import uuid
import json
from threading import Thread
from vllm import LLM, SamplingParams
import time
import threading
import time
import concurrent.futures
import requests
import socket
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=4, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_result = 'result'
batch_size = 24
# sampling_params = SamplingParams(temperature=0.95, top_p=0.7,presence_penalty=0.9,stop="</s>", max_tokens=4096)
sampling_params = SamplingParams(temperature=0.95, top_p=0.7,stop="</s>", presence_penalty=1.1, max_tokens=4096)
models_path = "/home/majiahui/project/LLaMA-Factory-main/lora_openbuddy_zephyr_paper_model_190000"
llm = LLM(model=models_path, tokenizer_mode="slow")
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def classify(batch_size): # 调用模型,设置最大batch_size
while True:
texts = []
query_ids = []
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(2)
continue
# for i in range(min(redis_.llen(db_key_query), batch_size)):
while True:
query = redis_.lpop(db_key_query) # 获取query的text
if query == None:
break
query = query.decode('UTF-8')
data_dict_path = json.loads(query)
path = data_dict_path['path']
with open(path, encoding='utf8') as f1:
# 加载文件的对象
data_dict = json.load(f1)
# query_ids.append(json.loads(query)['id'])
# texts.append(json.loads(query)['text']) # 拼接若干text 为batch
query_id = data_dict['id']
text = data_dict["text"]
query_ids.append(query_id)
texts.append(text)
if len(texts) == batch_size:
break
outputs = llm.generate(texts, sampling_params) # 调用模型
generated_text_list = [""] * len(texts)
print("outputs", len(outputs))
for i, output in enumerate(outputs):
index = output.request_id
generated_text = output.outputs[0].text
generated_text_list[int(index)] = generated_text
for (id_, output) in zip(query_ids, generated_text_list):
return_text = {"texts": output, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(id_)
with open(load_result_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(id_, load_result_path, 300)
# redis_.set(id_, load_result_path, 30)
redis_.srem(db_key_querying, id_)
log.log('start at',
'query_id:{},load_result_path:{},return_text:{}'.format(
id_, load_result_path, return_text))
if __name__ == '__main__':
t = Thread(target=classify, args=(batch_size,))
t.start()

8
mistral_api.py → model_api.py

@ -53,10 +53,14 @@ def smtp_f(name):
@app.route("/predict", methods=["POST"])
def predict():
text = request.json["texts"] # 获取用户query中的文本 例如"I love you"
content = request.json["content"] # 获取用户query中的文本 例如"I love you"
model = request.json["model"]
top_p = request.json["top_p"]
temperature = request.json["temperature"]
id_ = str(uuid.uuid1()) # 为query生成唯一标识
print("uuid: ", uuid)
d = {'id': id_, 'text': text} # 绑定文本和query id
d = {'id': id_, 'text': content, 'model': model, 'top_p': top_p,'temperature': temperature} # 绑定文本和query id
print(d)
try:
load_request_path = './request_data_logs/{}.json'.format(id_)
with open(load_request_path, 'w', encoding='utf8') as f2:

202
openbuddy_llama3_1_model_predict_vllm_1.py

@ -0,0 +1,202 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import argparse
from typing import List, Tuple
from threading import Thread
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
# from vllm.utils import FlexibleArgumentParser
from flask import Flask, jsonify
from flask import request
import redis
import time
import json
# http接口服务
# app = FastAPI()
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=3, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_result = 'result'
batch_size = 15
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def initialize_engine() -> LLMEngine:
"""Initialize the LLMEngine from the command line arguments."""
# model_dir = "/home/majiahui/project/models-llm/Qwen-0_5B-Chat"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper"
model_dir = "/home/majiahui/project/models-llm/openbuddy-llama3.1-8b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper_1"
args = EngineArgs(model_dir)
args.max_num_seqs = 16 # batch最大20条样本
args.gpu_memory_utilization = 0.8
args.max_model_len=8192
# 加载模型
return LLMEngine.from_engine_args(args)
engine = initialize_engine()
def create_test_prompts(prompt_texts, query_ids, sampling_params_list) -> List[Tuple[str,str, SamplingParams]]:
"""Create a list of test prompts with their sampling parameters."""
return_list = []
for i,j,k in zip(prompt_texts, query_ids, sampling_params_list):
return_list.append((i, j, k))
return return_list
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, str, SamplingParams]]):
"""Continuously process a list of prompts and handle the outputs."""
return_list = []
while test_prompts or engine.has_unfinished_requests():
if test_prompts:
prompt, query_id, sampling_params = test_prompts.pop(0)
engine.add_request(str(query_id), prompt, sampling_params)
request_outputs: List[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
return_list.append(request_output)
return return_list
def main(prompt_texts, query_ids, sampling_params_list):
"""Main function that sets up and runs the prompt processing."""
test_prompts = create_test_prompts(prompt_texts, query_ids, sampling_params_list)
return process_requests(engine, test_prompts)
# chat对话接口
# @app.route("/predict/", methods=["POST"])
# def chat():
# # request = request.json()
# # query = request.get('query', None)
# # history = request.get('history', [])
# # system = request.get('system', 'You are a helpful assistant.')
# # stream = request.get("stream", False)
# # user_stop_words = request.get("user_stop_words",
# # []) # list[str],用户自定义停止句,例如:['Observation: ', 'Action: ']定义了2个停止句,遇到任何一个都会停止
#
# query = request.json['query']
#
#
# # 构造prompt
# # prompt_text, prompt_tokens = _build_prompt(generation_config, tokenizer, query, history=history, system=system)
#
# prompt_text = f"<|im_start|>user\n{query}\n<|im_end|>\n<|im_start|>assistant\n"
#
#
# return_output = main(prompt_text, sampling_params)
# return_info = {
# "request_id": return_output.request_id,
# "text": return_output.outputs[0].text
# }
#
# return jsonify(return_info)
def classify(batch_size): # 调用模型,设置最大batch_size
while True:
texts = []
query_ids = []
sampling_params_list = []
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(2)
continue
# for i in range(min(redis_.llen(db_key_query), batch_size)):
while True:
query = redis_.lpop(db_key_query) # 获取query的text
if query == None:
break
query = query.decode('UTF-8')
data_dict_path = json.loads(query)
path = data_dict_path['path']
with open(path, encoding='utf8') as f1:
# 加载文件的对象
data_dict = json.load(f1)
# query_ids.append(json.loads(query)['id'])
# texts.append(json.loads(query)['text']) # 拼接若干text 为batch
query_id = data_dict['id']
print("query_id", query_id)
text = data_dict["text"]
model = data_dict["model"]
top_p = data_dict["top_p"]
temperature = data_dict["temperature"]
presence_penalty = 0.8
max_tokens = 8192
query_ids.append(query_id)
texts.append(text)
# sampling_params = SamplingParams(temperature=0.3, top_p=0.5, stop="<|end|>", presence_penalty=1.1, max_tokens=8192)
sampling_params_list.append(SamplingParams(
temperature=temperature,
top_p=top_p,
stop="<|end|>",
presence_penalty=presence_penalty,
max_tokens=max_tokens
))
if len(texts) == batch_size:
break
print("texts", len(texts))
print("query_ids", len(query_ids))
print("sampling_params_list", len(sampling_params_list))
outputs = main(texts, query_ids, sampling_params_list)
print("预测完成")
generated_text_dict = {}
print("outputs", len(outputs))
for i, output in enumerate(outputs):
index = output.request_id
print(index)
generated_text = output.outputs[0].text
generated_text_dict[index] = generated_text
print(generated_text_dict)
for id_, output in generated_text_dict.items():
return_text = {"texts": output, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(id_)
with open(load_result_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(id_, load_result_path, 86400)
# redis_.set(id_, load_result_path, 30)
redis_.srem(db_key_querying, id_)
log.log('start at',
'query_id:{},load_result_path:{},return_text:{}'.format(
id_, load_result_path, return_text))
if __name__ == '__main__':
t = Thread(target=classify, args=(batch_size,))
t.start()

202
openbuddy_llama3_1_model_predict_vllm_2.py

@ -0,0 +1,202 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import argparse
from typing import List, Tuple
from threading import Thread
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
# from vllm.utils import FlexibleArgumentParser
from flask import Flask, jsonify
from flask import request
import redis
import time
import json
# http接口服务
# app = FastAPI()
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=3, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_result = 'result'
batch_size = 15
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def initialize_engine() -> LLMEngine:
"""Initialize the LLMEngine from the command line arguments."""
# model_dir = "/home/majiahui/project/models-llm/Qwen-0_5B-Chat"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper"
model_dir = "/home/majiahui/project/models-llm/openbuddy-llama3.1-8b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper_1"
args = EngineArgs(model_dir)
args.max_num_seqs = 16 # batch最大20条样本
args.gpu_memory_utilization = 0.8
args.max_model_len=8192
# 加载模型
return LLMEngine.from_engine_args(args)
engine = initialize_engine()
def create_test_prompts(prompt_texts, query_ids, sampling_params_list) -> List[Tuple[str,str, SamplingParams]]:
"""Create a list of test prompts with their sampling parameters."""
return_list = []
for i,j,k in zip(prompt_texts, query_ids, sampling_params_list):
return_list.append((i, j, k))
return return_list
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, str, SamplingParams]]):
"""Continuously process a list of prompts and handle the outputs."""
return_list = []
while test_prompts or engine.has_unfinished_requests():
if test_prompts:
prompt, query_id, sampling_params = test_prompts.pop(0)
engine.add_request(str(query_id), prompt, sampling_params)
request_outputs: List[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
return_list.append(request_output)
return return_list
def main(prompt_texts, query_ids, sampling_params_list):
"""Main function that sets up and runs the prompt processing."""
test_prompts = create_test_prompts(prompt_texts, query_ids, sampling_params_list)
return process_requests(engine, test_prompts)
# chat对话接口
# @app.route("/predict/", methods=["POST"])
# def chat():
# # request = request.json()
# # query = request.get('query', None)
# # history = request.get('history', [])
# # system = request.get('system', 'You are a helpful assistant.')
# # stream = request.get("stream", False)
# # user_stop_words = request.get("user_stop_words",
# # []) # list[str],用户自定义停止句,例如:['Observation: ', 'Action: ']定义了2个停止句,遇到任何一个都会停止
#
# query = request.json['query']
#
#
# # 构造prompt
# # prompt_text, prompt_tokens = _build_prompt(generation_config, tokenizer, query, history=history, system=system)
#
# prompt_text = f"<|im_start|>user\n{query}\n<|im_end|>\n<|im_start|>assistant\n"
#
#
# return_output = main(prompt_text, sampling_params)
# return_info = {
# "request_id": return_output.request_id,
# "text": return_output.outputs[0].text
# }
#
# return jsonify(return_info)
def classify(batch_size): # 调用模型,设置最大batch_size
while True:
texts = []
query_ids = []
sampling_params_list = []
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(2)
continue
# for i in range(min(redis_.llen(db_key_query), batch_size)):
while True:
query = redis_.lpop(db_key_query) # 获取query的text
if query == None:
break
query = query.decode('UTF-8')
data_dict_path = json.loads(query)
path = data_dict_path['path']
with open(path, encoding='utf8') as f1:
# 加载文件的对象
data_dict = json.load(f1)
# query_ids.append(json.loads(query)['id'])
# texts.append(json.loads(query)['text']) # 拼接若干text 为batch
query_id = data_dict['id']
print("query_id", query_id)
text = data_dict["text"]
model = data_dict["model"]
top_p = data_dict["top_p"]
temperature = data_dict["temperature"]
presence_penalty = 0.8
max_tokens = 8192
query_ids.append(query_id)
texts.append(text)
# sampling_params = SamplingParams(temperature=0.3, top_p=0.5, stop="<|end|>", presence_penalty=1.1, max_tokens=8192)
sampling_params_list.append(SamplingParams(
temperature=temperature,
top_p=top_p,
stop="<|end|>",
presence_penalty=presence_penalty,
max_tokens=max_tokens
))
if len(texts) == batch_size:
break
print("texts", len(texts))
print("query_ids", len(query_ids))
print("sampling_params_list", len(sampling_params_list))
outputs = main(texts, query_ids, sampling_params_list)
print("预测完成")
generated_text_dict = {}
print("outputs", len(outputs))
for i, output in enumerate(outputs):
index = output.request_id
print(index)
generated_text = output.outputs[0].text
generated_text_dict[index] = generated_text
print(generated_text_dict)
for id_, output in generated_text_dict.items():
return_text = {"texts": output, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(id_)
with open(load_result_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(id_, load_result_path, 86400)
# redis_.set(id_, load_result_path, 30)
redis_.srem(db_key_querying, id_)
log.log('start at',
'query_id:{},load_result_path:{},return_text:{}'.format(
id_, load_result_path, return_text))
if __name__ == '__main__':
t = Thread(target=classify, args=(batch_size,))
t.start()

202
openbuddy_llama3_1_model_predict_vllm_3.py

@ -0,0 +1,202 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import argparse
from typing import List, Tuple
from threading import Thread
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
# from vllm.utils import FlexibleArgumentParser
from flask import Flask, jsonify
from flask import request
import redis
import time
import json
# http接口服务
# app = FastAPI()
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=3, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_result = 'result'
batch_size = 15
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def initialize_engine() -> LLMEngine:
"""Initialize the LLMEngine from the command line arguments."""
# model_dir = "/home/majiahui/project/models-llm/Qwen-0_5B-Chat"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper"
model_dir = "/home/majiahui/project/models-llm/openbuddy-llama3.1-8b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper_1"
args = EngineArgs(model_dir)
args.max_num_seqs = 16 # batch最大20条样本
args.gpu_memory_utilization = 0.8
args.max_model_len=8192
# 加载模型
return LLMEngine.from_engine_args(args)
engine = initialize_engine()
def create_test_prompts(prompt_texts, query_ids, sampling_params_list) -> List[Tuple[str,str, SamplingParams]]:
"""Create a list of test prompts with their sampling parameters."""
return_list = []
for i,j,k in zip(prompt_texts, query_ids, sampling_params_list):
return_list.append((i, j, k))
return return_list
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, str, SamplingParams]]):
"""Continuously process a list of prompts and handle the outputs."""
return_list = []
while test_prompts or engine.has_unfinished_requests():
if test_prompts:
prompt, query_id, sampling_params = test_prompts.pop(0)
engine.add_request(str(query_id), prompt, sampling_params)
request_outputs: List[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
return_list.append(request_output)
return return_list
def main(prompt_texts, query_ids, sampling_params_list):
"""Main function that sets up and runs the prompt processing."""
test_prompts = create_test_prompts(prompt_texts, query_ids, sampling_params_list)
return process_requests(engine, test_prompts)
# chat对话接口
# @app.route("/predict/", methods=["POST"])
# def chat():
# # request = request.json()
# # query = request.get('query', None)
# # history = request.get('history', [])
# # system = request.get('system', 'You are a helpful assistant.')
# # stream = request.get("stream", False)
# # user_stop_words = request.get("user_stop_words",
# # []) # list[str],用户自定义停止句,例如:['Observation: ', 'Action: ']定义了2个停止句,遇到任何一个都会停止
#
# query = request.json['query']
#
#
# # 构造prompt
# # prompt_text, prompt_tokens = _build_prompt(generation_config, tokenizer, query, history=history, system=system)
#
# prompt_text = f"<|im_start|>user\n{query}\n<|im_end|>\n<|im_start|>assistant\n"
#
#
# return_output = main(prompt_text, sampling_params)
# return_info = {
# "request_id": return_output.request_id,
# "text": return_output.outputs[0].text
# }
#
# return jsonify(return_info)
def classify(batch_size): # 调用模型,设置最大batch_size
while True:
texts = []
query_ids = []
sampling_params_list = []
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(2)
continue
# for i in range(min(redis_.llen(db_key_query), batch_size)):
while True:
query = redis_.lpop(db_key_query) # 获取query的text
if query == None:
break
query = query.decode('UTF-8')
data_dict_path = json.loads(query)
path = data_dict_path['path']
with open(path, encoding='utf8') as f1:
# 加载文件的对象
data_dict = json.load(f1)
# query_ids.append(json.loads(query)['id'])
# texts.append(json.loads(query)['text']) # 拼接若干text 为batch
query_id = data_dict['id']
print("query_id", query_id)
text = data_dict["text"]
model = data_dict["model"]
top_p = data_dict["top_p"]
temperature = data_dict["temperature"]
presence_penalty = 0.8
max_tokens = 8192
query_ids.append(query_id)
texts.append(text)
# sampling_params = SamplingParams(temperature=0.3, top_p=0.5, stop="<|end|>", presence_penalty=1.1, max_tokens=8192)
sampling_params_list.append(SamplingParams(
temperature=temperature,
top_p=top_p,
stop="<|end|>",
presence_penalty=presence_penalty,
max_tokens=max_tokens
))
if len(texts) == batch_size:
break
print("texts", len(texts))
print("query_ids", len(query_ids))
print("sampling_params_list", len(sampling_params_list))
outputs = main(texts, query_ids, sampling_params_list)
print("预测完成")
generated_text_dict = {}
print("outputs", len(outputs))
for i, output in enumerate(outputs):
index = output.request_id
print(index)
generated_text = output.outputs[0].text
generated_text_dict[index] = generated_text
print(generated_text_dict)
for id_, output in generated_text_dict.items():
return_text = {"texts": output, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(id_)
with open(load_result_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(id_, load_result_path, 86400)
# redis_.set(id_, load_result_path, 30)
redis_.srem(db_key_querying, id_)
log.log('start at',
'query_id:{},load_result_path:{},return_text:{}'.format(
id_, load_result_path, return_text))
if __name__ == '__main__':
t = Thread(target=classify, args=(batch_size,))
t.start()

202
openbuddy_qwen2_5_model_predict_vllm_1.py

@ -0,0 +1,202 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import argparse
from typing import List, Tuple
from threading import Thread
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
# from vllm.utils import FlexibleArgumentParser
from flask import Flask, jsonify
from flask import request
import redis
import time
import json
# http接口服务
# app = FastAPI()
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=3, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_result = 'result'
batch_size = 15
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def initialize_engine() -> LLMEngine:
"""Initialize the LLMEngine from the command line arguments."""
# model_dir = "/home/majiahui/project/models-llm/Qwen-0_5B-Chat"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper"
model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper_2"
args = EngineArgs(model_dir)
args.max_num_seqs = 16 # batch最大20条样本
args.gpu_memory_utilization = 0.8
args.max_model_len=8192
# 加载模型
return LLMEngine.from_engine_args(args)
engine = initialize_engine()
def create_test_prompts(prompt_texts, query_ids, sampling_params_list) -> List[Tuple[str,str, SamplingParams]]:
"""Create a list of test prompts with their sampling parameters."""
return_list = []
for i,j,k in zip(prompt_texts, query_ids, sampling_params_list):
return_list.append((i, j, k))
return return_list
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, str, SamplingParams]]):
"""Continuously process a list of prompts and handle the outputs."""
return_list = []
while test_prompts or engine.has_unfinished_requests():
if test_prompts:
prompt, query_id, sampling_params = test_prompts.pop(0)
engine.add_request(str(query_id), prompt, sampling_params)
request_outputs: List[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
return_list.append(request_output)
return return_list
def main(prompt_texts, query_ids, sampling_params_list):
"""Main function that sets up and runs the prompt processing."""
test_prompts = create_test_prompts(prompt_texts, query_ids, sampling_params_list)
return process_requests(engine, test_prompts)
# chat对话接口
# @app.route("/predict/", methods=["POST"])
# def chat():
# # request = request.json()
# # query = request.get('query', None)
# # history = request.get('history', [])
# # system = request.get('system', 'You are a helpful assistant.')
# # stream = request.get("stream", False)
# # user_stop_words = request.get("user_stop_words",
# # []) # list[str],用户自定义停止句,例如:['Observation: ', 'Action: ']定义了2个停止句,遇到任何一个都会停止
#
# query = request.json['query']
#
#
# # 构造prompt
# # prompt_text, prompt_tokens = _build_prompt(generation_config, tokenizer, query, history=history, system=system)
#
# prompt_text = f"<|im_start|>user\n{query}\n<|im_end|>\n<|im_start|>assistant\n"
#
#
# return_output = main(prompt_text, sampling_params)
# return_info = {
# "request_id": return_output.request_id,
# "text": return_output.outputs[0].text
# }
#
# return jsonify(return_info)
def classify(batch_size): # 调用模型,设置最大batch_size
while True:
texts = []
query_ids = []
sampling_params_list = []
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(2)
continue
# for i in range(min(redis_.llen(db_key_query), batch_size)):
while True:
query = redis_.lpop(db_key_query) # 获取query的text
if query == None:
break
query = query.decode('UTF-8')
data_dict_path = json.loads(query)
path = data_dict_path['path']
with open(path, encoding='utf8') as f1:
# 加载文件的对象
data_dict = json.load(f1)
# query_ids.append(json.loads(query)['id'])
# texts.append(json.loads(query)['text']) # 拼接若干text 为batch
query_id = data_dict['id']
print("query_id", query_id)
text = data_dict["text"]
model = data_dict["model"]
top_p = data_dict["top_p"]
temperature = data_dict["temperature"]
presence_penalty = 0.8
max_tokens = 8192
query_ids.append(query_id)
texts.append(text)
# sampling_params = SamplingParams(temperature=0.3, top_p=0.5, stop="<|end|>", presence_penalty=1.1, max_tokens=8192)
sampling_params_list.append(SamplingParams(
temperature=temperature,
top_p=top_p,
stop="<|end|>",
presence_penalty=presence_penalty,
max_tokens=max_tokens
))
if len(texts) == batch_size:
break
print("texts", len(texts))
print("query_ids", len(query_ids))
print("sampling_params_list", len(sampling_params_list))
outputs = main(texts, query_ids, sampling_params_list)
print("预测完成")
generated_text_dict = {}
print("outputs", len(outputs))
for i, output in enumerate(outputs):
index = output.request_id
print(index)
generated_text = output.outputs[0].text
generated_text_dict[index] = generated_text
print(generated_text_dict)
for id_, output in generated_text_dict.items():
return_text = {"texts": output, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(id_)
with open(load_result_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(id_, load_result_path, 86400)
# redis_.set(id_, load_result_path, 30)
redis_.srem(db_key_querying, id_)
log.log('start at',
'query_id:{},load_result_path:{},return_text:{}'.format(
id_, load_result_path, return_text))
if __name__ == '__main__':
t = Thread(target=classify, args=(batch_size,))
t.start()

202
openbuddy_qwen2_5_model_predict_vllm_2.py

@ -0,0 +1,202 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import argparse
from typing import List, Tuple
from threading import Thread
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
# from vllm.utils import FlexibleArgumentParser
from flask import Flask, jsonify
from flask import request
import redis
import time
import json
# http接口服务
# app = FastAPI()
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=3, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_result = 'result'
batch_size = 15
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def initialize_engine() -> LLMEngine:
"""Initialize the LLMEngine from the command line arguments."""
# model_dir = "/home/majiahui/project/models-llm/Qwen-0_5B-Chat"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper"
model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper_2"
args = EngineArgs(model_dir)
args.max_num_seqs = 16 # batch最大20条样本
args.gpu_memory_utilization = 0.8
args.max_model_len=8192
# 加载模型
return LLMEngine.from_engine_args(args)
engine = initialize_engine()
def create_test_prompts(prompt_texts, query_ids, sampling_params_list) -> List[Tuple[str,str, SamplingParams]]:
"""Create a list of test prompts with their sampling parameters."""
return_list = []
for i,j,k in zip(prompt_texts, query_ids, sampling_params_list):
return_list.append((i, j, k))
return return_list
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, str, SamplingParams]]):
"""Continuously process a list of prompts and handle the outputs."""
return_list = []
while test_prompts or engine.has_unfinished_requests():
if test_prompts:
prompt, query_id, sampling_params = test_prompts.pop(0)
engine.add_request(str(query_id), prompt, sampling_params)
request_outputs: List[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
return_list.append(request_output)
return return_list
def main(prompt_texts, query_ids, sampling_params_list):
"""Main function that sets up and runs the prompt processing."""
test_prompts = create_test_prompts(prompt_texts, query_ids, sampling_params_list)
return process_requests(engine, test_prompts)
# chat对话接口
# @app.route("/predict/", methods=["POST"])
# def chat():
# # request = request.json()
# # query = request.get('query', None)
# # history = request.get('history', [])
# # system = request.get('system', 'You are a helpful assistant.')
# # stream = request.get("stream", False)
# # user_stop_words = request.get("user_stop_words",
# # []) # list[str],用户自定义停止句,例如:['Observation: ', 'Action: ']定义了2个停止句,遇到任何一个都会停止
#
# query = request.json['query']
#
#
# # 构造prompt
# # prompt_text, prompt_tokens = _build_prompt(generation_config, tokenizer, query, history=history, system=system)
#
# prompt_text = f"<|im_start|>user\n{query}\n<|im_end|>\n<|im_start|>assistant\n"
#
#
# return_output = main(prompt_text, sampling_params)
# return_info = {
# "request_id": return_output.request_id,
# "text": return_output.outputs[0].text
# }
#
# return jsonify(return_info)
def classify(batch_size): # 调用模型,设置最大batch_size
while True:
texts = []
query_ids = []
sampling_params_list = []
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(2)
continue
# for i in range(min(redis_.llen(db_key_query), batch_size)):
while True:
query = redis_.lpop(db_key_query) # 获取query的text
if query == None:
break
query = query.decode('UTF-8')
data_dict_path = json.loads(query)
path = data_dict_path['path']
with open(path, encoding='utf8') as f1:
# 加载文件的对象
data_dict = json.load(f1)
# query_ids.append(json.loads(query)['id'])
# texts.append(json.loads(query)['text']) # 拼接若干text 为batch
query_id = data_dict['id']
print("query_id", query_id)
text = data_dict["text"]
model = data_dict["model"]
top_p = data_dict["top_p"]
temperature = data_dict["temperature"]
presence_penalty = 0.8
max_tokens = 8192
query_ids.append(query_id)
texts.append(text)
# sampling_params = SamplingParams(temperature=0.3, top_p=0.5, stop="<|end|>", presence_penalty=1.1, max_tokens=8192)
sampling_params_list.append(SamplingParams(
temperature=temperature,
top_p=top_p,
stop="<|end|>",
presence_penalty=presence_penalty,
max_tokens=max_tokens
))
if len(texts) == batch_size:
break
print("texts", len(texts))
print("query_ids", len(query_ids))
print("sampling_params_list", len(sampling_params_list))
outputs = main(texts, query_ids, sampling_params_list)
print("预测完成")
generated_text_dict = {}
print("outputs", len(outputs))
for i, output in enumerate(outputs):
index = output.request_id
print(index)
generated_text = output.outputs[0].text
generated_text_dict[index] = generated_text
print(generated_text_dict)
for id_, output in generated_text_dict.items():
return_text = {"texts": output, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(id_)
with open(load_result_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(id_, load_result_path, 86400)
# redis_.set(id_, load_result_path, 30)
redis_.srem(db_key_querying, id_)
log.log('start at',
'query_id:{},load_result_path:{},return_text:{}'.format(
id_, load_result_path, return_text))
if __name__ == '__main__':
t = Thread(target=classify, args=(batch_size,))
t.start()

202
openbuddy_qwen2_5_model_predict_vllm_3.py

@ -0,0 +1,202 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import argparse
from typing import List, Tuple
from threading import Thread
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
# from vllm.utils import FlexibleArgumentParser
from flask import Flask, jsonify
from flask import request
import redis
import time
import json
# http接口服务
# app = FastAPI()
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=3, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_result = 'result'
batch_size = 15
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def initialize_engine() -> LLMEngine:
"""Initialize the LLMEngine from the command line arguments."""
# model_dir = "/home/majiahui/project/models-llm/Qwen-0_5B-Chat"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper"
model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper_2"
args = EngineArgs(model_dir)
args.max_num_seqs = 16 # batch最大20条样本
args.gpu_memory_utilization = 0.8
args.max_model_len=8192
# 加载模型
return LLMEngine.from_engine_args(args)
engine = initialize_engine()
def create_test_prompts(prompt_texts, query_ids, sampling_params_list) -> List[Tuple[str,str, SamplingParams]]:
"""Create a list of test prompts with their sampling parameters."""
return_list = []
for i,j,k in zip(prompt_texts, query_ids, sampling_params_list):
return_list.append((i, j, k))
return return_list
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, str, SamplingParams]]):
"""Continuously process a list of prompts and handle the outputs."""
return_list = []
while test_prompts or engine.has_unfinished_requests():
if test_prompts:
prompt, query_id, sampling_params = test_prompts.pop(0)
engine.add_request(str(query_id), prompt, sampling_params)
request_outputs: List[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
return_list.append(request_output)
return return_list
def main(prompt_texts, query_ids, sampling_params_list):
"""Main function that sets up and runs the prompt processing."""
test_prompts = create_test_prompts(prompt_texts, query_ids, sampling_params_list)
return process_requests(engine, test_prompts)
# chat对话接口
# @app.route("/predict/", methods=["POST"])
# def chat():
# # request = request.json()
# # query = request.get('query', None)
# # history = request.get('history', [])
# # system = request.get('system', 'You are a helpful assistant.')
# # stream = request.get("stream", False)
# # user_stop_words = request.get("user_stop_words",
# # []) # list[str],用户自定义停止句,例如:['Observation: ', 'Action: ']定义了2个停止句,遇到任何一个都会停止
#
# query = request.json['query']
#
#
# # 构造prompt
# # prompt_text, prompt_tokens = _build_prompt(generation_config, tokenizer, query, history=history, system=system)
#
# prompt_text = f"<|im_start|>user\n{query}\n<|im_end|>\n<|im_start|>assistant\n"
#
#
# return_output = main(prompt_text, sampling_params)
# return_info = {
# "request_id": return_output.request_id,
# "text": return_output.outputs[0].text
# }
#
# return jsonify(return_info)
def classify(batch_size): # 调用模型,设置最大batch_size
while True:
texts = []
query_ids = []
sampling_params_list = []
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(2)
continue
# for i in range(min(redis_.llen(db_key_query), batch_size)):
while True:
query = redis_.lpop(db_key_query) # 获取query的text
if query == None:
break
query = query.decode('UTF-8')
data_dict_path = json.loads(query)
path = data_dict_path['path']
with open(path, encoding='utf8') as f1:
# 加载文件的对象
data_dict = json.load(f1)
# query_ids.append(json.loads(query)['id'])
# texts.append(json.loads(query)['text']) # 拼接若干text 为batch
query_id = data_dict['id']
print("query_id", query_id)
text = data_dict["text"]
model = data_dict["model"]
top_p = data_dict["top_p"]
temperature = data_dict["temperature"]
presence_penalty = 0.8
max_tokens = 8192
query_ids.append(query_id)
texts.append(text)
# sampling_params = SamplingParams(temperature=0.3, top_p=0.5, stop="<|end|>", presence_penalty=1.1, max_tokens=8192)
sampling_params_list.append(SamplingParams(
temperature=temperature,
top_p=top_p,
stop="<|end|>",
presence_penalty=presence_penalty,
max_tokens=max_tokens
))
if len(texts) == batch_size:
break
print("texts", len(texts))
print("query_ids", len(query_ids))
print("sampling_params_list", len(sampling_params_list))
outputs = main(texts, query_ids, sampling_params_list)
print("预测完成")
generated_text_dict = {}
print("outputs", len(outputs))
for i, output in enumerate(outputs):
index = output.request_id
print(index)
generated_text = output.outputs[0].text
generated_text_dict[index] = generated_text
print(generated_text_dict)
for id_, output in generated_text_dict.items():
return_text = {"texts": output, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(id_)
with open(load_result_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(id_, load_result_path, 86400)
# redis_.set(id_, load_result_path, 30)
redis_.srem(db_key_querying, id_)
log.log('start at',
'query_id:{},load_result_path:{},return_text:{}'.format(
id_, load_result_path, return_text))
if __name__ == '__main__':
t = Thread(target=classify, args=(batch_size,))
t.start()

205
qwen2_5_Instruct_model_predict_vllm_1.py

@ -0,0 +1,205 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import argparse
from typing import List, Tuple
from threading import Thread
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
# from vllm.utils import FlexibleArgumentParser
from flask import Flask, jsonify
from flask import request
import redis
import time
import json
# http接口服务
# app = FastAPI()
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=3, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_result = 'result'
batch_size = 15
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def initialize_engine() -> LLMEngine:
"""Initialize the LLMEngine from the command line arguments."""
# model_dir = "/home/majiahui/project/models-llm/Qwen-0_5B-Chat"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper_2"
# model_dir = "/home/majiahui/project/models-llm/Qwen2.5-7B-Instruct-1M"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b-v23.1-200k"
model_dir = "/home/majiahui/project/models-llm/qwen2_5_7B_train_11_prompt_4_gpt_xiaobiaot_real_paper_1"
args = EngineArgs(model_dir)
args.max_num_seqs = 16 # batch最大20条样本
args.gpu_memory_utilization = 0.8
args.max_model_len=8192
# 加载模型
return LLMEngine.from_engine_args(args)
engine = initialize_engine()
def create_test_prompts(prompt_texts, query_ids, sampling_params_list) -> List[Tuple[str,str, SamplingParams]]:
"""Create a list of test prompts with their sampling parameters."""
return_list = []
for i,j,k in zip(prompt_texts, query_ids, sampling_params_list):
return_list.append((i, j, k))
return return_list
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, str, SamplingParams]]):
"""Continuously process a list of prompts and handle the outputs."""
return_list = []
while test_prompts or engine.has_unfinished_requests():
if test_prompts:
prompt, query_id, sampling_params = test_prompts.pop(0)
engine.add_request(str(query_id), prompt, sampling_params)
request_outputs: List[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
return_list.append(request_output)
return return_list
def main(prompt_texts, query_ids, sampling_params_list):
"""Main function that sets up and runs the prompt processing."""
test_prompts = create_test_prompts(prompt_texts, query_ids, sampling_params_list)
return process_requests(engine, test_prompts)
# chat对话接口
# @app.route("/predict/", methods=["POST"])
# def chat():
# # request = request.json()
# # query = request.get('query', None)
# # history = request.get('history', [])
# # system = request.get('system', 'You are a helpful assistant.')
# # stream = request.get("stream", False)
# # user_stop_words = request.get("user_stop_words",
# # []) # list[str],用户自定义停止句,例如:['Observation: ', 'Action: ']定义了2个停止句,遇到任何一个都会停止
#
# query = request.json['query']
#
#
# # 构造prompt
# # prompt_text, prompt_tokens = _build_prompt(generation_config, tokenizer, query, history=history, system=system)
#
# prompt_text = f"<|im_start|>user\n{query}\n<|im_end|>\n<|im_start|>assistant\n"
#
#
# return_output = main(prompt_text, sampling_params)
# return_info = {
# "request_id": return_output.request_id,
# "text": return_output.outputs[0].text
# }
#
# return jsonify(return_info)
def classify(batch_size): # 调用模型,设置最大batch_size
while True:
texts = []
query_ids = []
sampling_params_list = []
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(2)
continue
# for i in range(min(redis_.llen(db_key_query), batch_size)):
while True:
query = redis_.lpop(db_key_query) # 获取query的text
if query == None:
break
query = query.decode('UTF-8')
data_dict_path = json.loads(query)
path = data_dict_path['path']
with open(path, encoding='utf8') as f1:
# 加载文件的对象
data_dict = json.load(f1)
# query_ids.append(json.loads(query)['id'])
# texts.append(json.loads(query)['text']) # 拼接若干text 为batch
query_id = data_dict['id']
print("query_id", query_id)
text = data_dict["text"]
model = data_dict["model"]
top_p = data_dict["top_p"]
temperature = data_dict["temperature"]
presence_penalty = 1.1
max_tokens = 8192
query_ids.append(query_id)
texts.append(text)
# sampling_params = SamplingParams(temperature=0.3, top_p=0.5, stop="<|end|>", presence_penalty=1.1, max_tokens=8192)
sampling_params_list.append(SamplingParams(
temperature=temperature,
top_p=top_p,
stop="<|end|>",
presence_penalty=presence_penalty,
max_tokens=max_tokens
))
if len(texts) == batch_size:
break
print("texts", len(texts))
print("query_ids", len(query_ids))
print("sampling_params_list", len(sampling_params_list))
outputs = main(texts, query_ids, sampling_params_list)
print("预测完成")
generated_text_dict = {}
print("outputs", len(outputs))
for i, output in enumerate(outputs):
index = output.request_id
print(index)
generated_text = output.outputs[0].text
generated_text_dict[index] = generated_text
print(generated_text_dict)
for id_, output in generated_text_dict.items():
return_text = {"texts": output, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(id_)
with open(load_result_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(id_, load_result_path, 86400)
# redis_.set(id_, load_result_path, 30)
redis_.srem(db_key_querying, id_)
log.log('start at',
'query_id:{},load_result_path:{},return_text:{}'.format(
id_, load_result_path, return_text))
if __name__ == '__main__':
t = Thread(target=classify, args=(batch_size,))
t.start()

205
qwen2_5_Instruct_model_predict_vllm_2.py

@ -0,0 +1,205 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import argparse
from typing import List, Tuple
from threading import Thread
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
# from vllm.utils import FlexibleArgumentParser
from flask import Flask, jsonify
from flask import request
import redis
import time
import json
# http接口服务
# app = FastAPI()
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=3, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_result = 'result'
batch_size = 15
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def initialize_engine() -> LLMEngine:
"""Initialize the LLMEngine from the command line arguments."""
# model_dir = "/home/majiahui/project/models-llm/Qwen-0_5B-Chat"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper_2"
# model_dir = "/home/majiahui/project/models-llm/Qwen2.5-7B-Instruct-1M"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b-v23.1-200k"
model_dir = "/home/majiahui/project/models-llm/qwen2_5_7B_train_11_prompt_4_gpt_xiaobiaot_real_paper_1"
args = EngineArgs(model_dir)
args.max_num_seqs = 16 # batch最大20条样本
args.gpu_memory_utilization = 0.8
args.max_model_len=8192
# 加载模型
return LLMEngine.from_engine_args(args)
engine = initialize_engine()
def create_test_prompts(prompt_texts, query_ids, sampling_params_list) -> List[Tuple[str,str, SamplingParams]]:
"""Create a list of test prompts with their sampling parameters."""
return_list = []
for i,j,k in zip(prompt_texts, query_ids, sampling_params_list):
return_list.append((i, j, k))
return return_list
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, str, SamplingParams]]):
"""Continuously process a list of prompts and handle the outputs."""
return_list = []
while test_prompts or engine.has_unfinished_requests():
if test_prompts:
prompt, query_id, sampling_params = test_prompts.pop(0)
engine.add_request(str(query_id), prompt, sampling_params)
request_outputs: List[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
return_list.append(request_output)
return return_list
def main(prompt_texts, query_ids, sampling_params_list):
"""Main function that sets up and runs the prompt processing."""
test_prompts = create_test_prompts(prompt_texts, query_ids, sampling_params_list)
return process_requests(engine, test_prompts)
# chat对话接口
# @app.route("/predict/", methods=["POST"])
# def chat():
# # request = request.json()
# # query = request.get('query', None)
# # history = request.get('history', [])
# # system = request.get('system', 'You are a helpful assistant.')
# # stream = request.get("stream", False)
# # user_stop_words = request.get("user_stop_words",
# # []) # list[str],用户自定义停止句,例如:['Observation: ', 'Action: ']定义了2个停止句,遇到任何一个都会停止
#
# query = request.json['query']
#
#
# # 构造prompt
# # prompt_text, prompt_tokens = _build_prompt(generation_config, tokenizer, query, history=history, system=system)
#
# prompt_text = f"<|im_start|>user\n{query}\n<|im_end|>\n<|im_start|>assistant\n"
#
#
# return_output = main(prompt_text, sampling_params)
# return_info = {
# "request_id": return_output.request_id,
# "text": return_output.outputs[0].text
# }
#
# return jsonify(return_info)
def classify(batch_size): # 调用模型,设置最大batch_size
while True:
texts = []
query_ids = []
sampling_params_list = []
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(2)
continue
# for i in range(min(redis_.llen(db_key_query), batch_size)):
while True:
query = redis_.lpop(db_key_query) # 获取query的text
if query == None:
break
query = query.decode('UTF-8')
data_dict_path = json.loads(query)
path = data_dict_path['path']
with open(path, encoding='utf8') as f1:
# 加载文件的对象
data_dict = json.load(f1)
# query_ids.append(json.loads(query)['id'])
# texts.append(json.loads(query)['text']) # 拼接若干text 为batch
query_id = data_dict['id']
print("query_id", query_id)
text = data_dict["text"]
model = data_dict["model"]
top_p = data_dict["top_p"]
temperature = data_dict["temperature"]
presence_penalty = 1.1
max_tokens = 8192
query_ids.append(query_id)
texts.append(text)
# sampling_params = SamplingParams(temperature=0.3, top_p=0.5, stop="<|end|>", presence_penalty=1.1, max_tokens=8192)
sampling_params_list.append(SamplingParams(
temperature=temperature,
top_p=top_p,
stop="<|end|>",
presence_penalty=presence_penalty,
max_tokens=max_tokens
))
if len(texts) == batch_size:
break
print("texts", len(texts))
print("query_ids", len(query_ids))
print("sampling_params_list", len(sampling_params_list))
outputs = main(texts, query_ids, sampling_params_list)
print("预测完成")
generated_text_dict = {}
print("outputs", len(outputs))
for i, output in enumerate(outputs):
index = output.request_id
print(index)
generated_text = output.outputs[0].text
generated_text_dict[index] = generated_text
print(generated_text_dict)
for id_, output in generated_text_dict.items():
return_text = {"texts": output, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(id_)
with open(load_result_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(id_, load_result_path, 86400)
# redis_.set(id_, load_result_path, 30)
redis_.srem(db_key_querying, id_)
log.log('start at',
'query_id:{},load_result_path:{},return_text:{}'.format(
id_, load_result_path, return_text))
if __name__ == '__main__':
t = Thread(target=classify, args=(batch_size,))
t.start()

205
qwen2_5_Instruct_model_predict_vllm_3.py

@ -0,0 +1,205 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import argparse
from typing import List, Tuple
from threading import Thread
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
# from vllm.utils import FlexibleArgumentParser
from flask import Flask, jsonify
from flask import request
import redis
import time
import json
# http接口服务
# app = FastAPI()
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=3, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_result = 'result'
batch_size = 15
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def initialize_engine() -> LLMEngine:
"""Initialize the LLMEngine from the command line arguments."""
# model_dir = "/home/majiahui/project/models-llm/Qwen-0_5B-Chat"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper_2"
# model_dir = "/home/majiahui/project/models-llm/Qwen2.5-7B-Instruct-1M"
# model_dir = "/home/majiahui/project/models-llm/openbuddy-qwen2.5llamaify-7b-v23.1-200k"
model_dir = "/home/majiahui/project/models-llm/qwen2_5_7B_train_11_prompt_4_gpt_xiaobiaot_real_paper_1"
args = EngineArgs(model_dir)
args.max_num_seqs = 16 # batch最大20条样本
args.gpu_memory_utilization = 0.8
args.max_model_len=8192
# 加载模型
return LLMEngine.from_engine_args(args)
engine = initialize_engine()
def create_test_prompts(prompt_texts, query_ids, sampling_params_list) -> List[Tuple[str,str, SamplingParams]]:
"""Create a list of test prompts with their sampling parameters."""
return_list = []
for i,j,k in zip(prompt_texts, query_ids, sampling_params_list):
return_list.append((i, j, k))
return return_list
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, str, SamplingParams]]):
"""Continuously process a list of prompts and handle the outputs."""
return_list = []
while test_prompts or engine.has_unfinished_requests():
if test_prompts:
prompt, query_id, sampling_params = test_prompts.pop(0)
engine.add_request(str(query_id), prompt, sampling_params)
request_outputs: List[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
return_list.append(request_output)
return return_list
def main(prompt_texts, query_ids, sampling_params_list):
"""Main function that sets up and runs the prompt processing."""
test_prompts = create_test_prompts(prompt_texts, query_ids, sampling_params_list)
return process_requests(engine, test_prompts)
# chat对话接口
# @app.route("/predict/", methods=["POST"])
# def chat():
# # request = request.json()
# # query = request.get('query', None)
# # history = request.get('history', [])
# # system = request.get('system', 'You are a helpful assistant.')
# # stream = request.get("stream", False)
# # user_stop_words = request.get("user_stop_words",
# # []) # list[str],用户自定义停止句,例如:['Observation: ', 'Action: ']定义了2个停止句,遇到任何一个都会停止
#
# query = request.json['query']
#
#
# # 构造prompt
# # prompt_text, prompt_tokens = _build_prompt(generation_config, tokenizer, query, history=history, system=system)
#
# prompt_text = f"<|im_start|>user\n{query}\n<|im_end|>\n<|im_start|>assistant\n"
#
#
# return_output = main(prompt_text, sampling_params)
# return_info = {
# "request_id": return_output.request_id,
# "text": return_output.outputs[0].text
# }
#
# return jsonify(return_info)
def classify(batch_size): # 调用模型,设置最大batch_size
while True:
texts = []
query_ids = []
sampling_params_list = []
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(2)
continue
# for i in range(min(redis_.llen(db_key_query), batch_size)):
while True:
query = redis_.lpop(db_key_query) # 获取query的text
if query == None:
break
query = query.decode('UTF-8')
data_dict_path = json.loads(query)
path = data_dict_path['path']
with open(path, encoding='utf8') as f1:
# 加载文件的对象
data_dict = json.load(f1)
# query_ids.append(json.loads(query)['id'])
# texts.append(json.loads(query)['text']) # 拼接若干text 为batch
query_id = data_dict['id']
print("query_id", query_id)
text = data_dict["text"]
model = data_dict["model"]
top_p = data_dict["top_p"]
temperature = data_dict["temperature"]
presence_penalty = 1.1
max_tokens = 8192
query_ids.append(query_id)
texts.append(text)
# sampling_params = SamplingParams(temperature=0.3, top_p=0.5, stop="<|end|>", presence_penalty=1.1, max_tokens=8192)
sampling_params_list.append(SamplingParams(
temperature=temperature,
top_p=top_p,
stop="<|end|>",
presence_penalty=presence_penalty,
max_tokens=max_tokens
))
if len(texts) == batch_size:
break
print("texts", len(texts))
print("query_ids", len(query_ids))
print("sampling_params_list", len(sampling_params_list))
outputs = main(texts, query_ids, sampling_params_list)
print("预测完成")
generated_text_dict = {}
print("outputs", len(outputs))
for i, output in enumerate(outputs):
index = output.request_id
print(index)
generated_text = output.outputs[0].text
generated_text_dict[index] = generated_text
print(generated_text_dict)
for id_, output in generated_text_dict.items():
return_text = {"texts": output, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(id_)
with open(load_result_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(id_, load_result_path, 86400)
# redis_.set(id_, load_result_path, 30)
redis_.srem(db_key_querying, id_)
log.log('start at',
'query_id:{},load_result_path:{},return_text:{}'.format(
id_, load_result_path, return_text))
if __name__ == '__main__':
t = Thread(target=classify, args=(batch_size,))
t.start()

2
run_api_gunicorn.sh

@ -1 +1 @@
gunicorn mistral_api:app -c gunicorn_config.py
gunicorn model_api:app -c gunicorn_config.py

1
run_model_1.sh

@ -0,0 +1 @@
nohup python mistral_model_predict_vllm_1.py > myout_mis_model_1.file 2>&1 &

1
run_model_4.sh

@ -0,0 +1 @@
nohup python mistral_model_predict_vllm_4.py > myout_mis_model_4.file 2>&1 &

1
run_model_openbuddy_llama3_1_1.sh

@ -0,0 +1 @@
nohup python python openbuddy_llama3_1_model_predict_vllm_1.py > myout_model_openbuddy_llama3_1_1.file 2>&1 &

1
run_model_openbuddy_llama3_1_2.sh

@ -0,0 +1 @@
nohup python openbuddy_llama3_1_model_predict_vllm_2.py > myout_model_openbuddy_llama3_1_2.file 2>&1 &

1
run_model_openbuddy_llama3_1_3.sh

@ -0,0 +1 @@
nohup python openbuddy_llama3_1_model_predict_vllm_3.py > myout_model_openbuddy_llama3_1_3.file 2>&1 &

1
run_model_openbuddy_qwen_1.sh

@ -0,0 +1 @@
nohup python openbuddy_qwen2_5_model_predict_vllm_1.py > myout_model_openbuddy_qwen_1.file 2>&1 &

1
run_model_openbuddy_qwen_2.sh

@ -0,0 +1 @@
nohup python openbuddy_qwen2_5_model_predict_vllm_2.py > myout_model_openbuddy_qwen_2.file 2>&1 &

1
run_model_openbuddy_qwen_3.sh

@ -0,0 +1 @@
nohup python openbuddy_qwen2_5_model_predict_vllm_3.py > myout_model_openbuddy_qwen_3.file 2>&1 &

1
run_model_qwen_Instruct1.sh

@ -0,0 +1 @@
nohup python qwen2_5_Instruct_model_predict_vllm_1.py > myout_model_qwen_1.file 2>&1 &

1
run_model_qwen_Instruct2.sh

@ -0,0 +1 @@
nohup python qwen2_5_Instruct_model_predict_vllm_2.py > myout_model_qwen_2.file 2>&1 &

1
run_model_qwen_Instruct3.sh

@ -0,0 +1 @@
nohup python qwen2_5_Instruct_model_predict_vllm_3.py > myout_model_qwen_3.file 2>&1 &
Loading…
Cancel
Save