chatgpt服务使用vllm方式预测

2 years ago · 91a9e27a54
5 changed files with 328 additions and 26 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,175 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+
+# VSCode
+.vscode/
+
+# DS Store
+.DS_Store
+
+# Results
+*.csv
+
+# Python pickle files
+*.pkl
+
+# Sphinx documentation
+_build/
--- a/README.md
+++ b/README.md
@ -1,14 +1,22 @@
-# VLLM项目
+## 安装环境

-## 项目启动
-bash run_model_predict.sh
-bash run_api.sh
+```bash
+conda create -n vllm python=3.8
+pip install -r requirements.txt
+```

-## 调用示例
-"http://192.168.31.74:12000/predict"
+## 启动项目

-{
-    "texts": "你好"
-}
-## 返回示例
-## 请求参数
+```bash
+conda activate vllm
+bash run_api_nohup.sh
+bash run_model.sh
+```
+
+## 测试
+
+```bash
+curl -H "Content-Type: application/json" -X POST -d '{"texts": "User: 你好\nAssistant:"}' http://192.168.31.149:12000/predict
+curl -H "Content-Type: application/json" -X POST -d '{"id": "b412fc98-bdd7-11ee-8d23-d5e5c66dd02e"}' http://192.168.31.149:12000/search
+```
+返回"status_code"不出现 400 则调用成功
--- a/mistral_api.py
+++ b/mistral_api.py
@ -21,25 +21,113 @@ pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=50,db=
 redis_ = redis.Redis(connection_pool=pool, decode_responses=True)

 db_key_query = 'query'
-db_key_query_articles_directory = 'query_articles_directory'
+db_key_querying = 'querying'
+db_key_queryset = 'queryset'
 db_key_result = 'result'
+db_key_error = 'error'
+
+def smtp_f(name):
+    # 在下面的代码行中使用断点来调试脚本。
+    import smtplib
+    from email.mime.text import MIMEText
+    from email.header import Header
+
+    sender = '838878981@qq.com'  # 发送邮箱
+    receivers = ['838878981@qq.com']  # 接收邮箱
+    auth_code = "jfqtutaiwrtdbcge"  # 授权码
+
+    message = MIMEText('基础大模型出现错误，紧急', 'plain', 'utf-8')
+    message['From'] = Header("Sender<%s>" % sender)  # 发送者
+    message['To'] = Header("Receiver<%s>" % receivers[0])  # 接收者
+
+    subject = name
+    message['Subject'] = Header(subject, 'utf-8')
+
+    try:
+        server = smtplib.SMTP_SSL('smtp.qq.com', 465)
+        server.login(sender, auth_code)
+        server.sendmail(sender, receivers, message.as_string())
+        print("邮件发送成功")
+        server.close()
+    except smtplib.SMTPException:
+        print("Error: 无法发送邮件")
+

@app.route("/predict", methods=["POST"])
-def handle_query():
+def predict():
    text = request.json["texts"]  # 获取用户query中的文本 例如"I love you"
    id_ = str(uuid.uuid1())  # 为query生成唯一标识
+    print("uuid: ", uuid)
    d = {'id': id_, 'text': text}  # 绑定文本和query id
-    redis_.rpush(db_key_query, json.dumps(d))  # 加入redis
-    time.sleep(1)
-    while True:
-        result = redis_.get(id_)  # 获取该query的模型结果
+    try:
+        load_request_path = './request_data_logs/{}.json'.format(id_)
+        with open(load_request_path, 'w', encoding='utf8') as f2:
+            # ensure_ascii=False才能输入中文，否则是Unicode字符
+            # indent=2 JSON数据的缩进，美观
+            json.dump(d, f2, ensure_ascii=False, indent=4)
+        redis_.rpush(db_key_query, json.dumps({"id": id_, "path": load_request_path}))  # 加入redis
+        redis_.sadd(db_key_querying, id_)
+        redis_.sadd(db_key_queryset, id_)
+        return_text = {"texts": {'id': id_, }, "probabilities": None, "status_code": 200}
+    except:
+        return_text = {"texts": {'id': id_, }, "probabilities": None, "status_code": 400}
+        smtp_f("vllm-main")
+    return jsonify(return_text)  # 返回结果
+
+
+@app.route("/search", methods=["POST"])
+def search():
+    id_ = request.json['id']  # 获取用户query中的文本 例如"I love you"
+    result = redis_.get(id_)  # 获取该query的模型结果
+    try:
        if result is not None:
-            redis_.delete(id_)
-            result_text = {'code': "200", 'data': json.loads(result)}
-            break
-        time.sleep(1)
-    return jsonify(result_text)  # 返回结果
+            result_path = result.decode('UTF-8')
+            with open(result_path, encoding='utf8') as f1:
+                # 加载文件的对象
+                result_dict = json.load(f1)
+            code = result_dict["status_code"]
+            texts = result_dict["texts"]
+            probabilities = result_dict["probabilities"]
+            if str(code) == 400:
+                redis_.rpush(db_key_error, json.dumps({"id": id_}))
+                return False
+            result_text = {'code': code, 'text': texts, 'probabilities': probabilities}
+        else:
+            querying_list = list(redis_.smembers(db_key_querying))
+            querying_set = set()
+            for i in querying_list:
+                querying_set.add(i.decode())
+
+            querying_bool = False
+            if id_ in querying_set:
+                querying_bool = True

+            query_list_json = redis_.lrange(db_key_query, 0, -1)
+            query_set_ids = set()
+            for i in query_list_json:
+                data_dict = json.loads(i)
+                query_id = data_dict['id']
+                query_set_ids.add(query_id)
+
+            query_bool = False
+            if id_ in query_set_ids:
+                query_bool = True
+
+            if querying_bool == True and query_bool == True:
+                result_text = {'code': "201", 'text': "", 'probabilities': None}
+            elif querying_bool == True and query_bool == False:
+                result_text = {'code': "202", 'text': "", 'probabilities': None}
+            else:
+                result_text = {'code': "203", 'text': "", 'probabilities': None}
+                load_request_path = './request_data_logs_203/{}.json'.format(id_)
+                with open(load_request_path, 'w', encoding='utf8') as f2:
+                    # ensure_ascii=False才能输入中文，否则是Unicode字符
+                    # indent=2 JSON数据的缩进，美观
+                    json.dump(result_text, f2, ensure_ascii=False, indent=4)
+    except:
+        smtp_f("vllm-main")
+        result_text = {'code': "400", 'text': "", 'probabilities': None}
+    return jsonify(result_text)  # 返回结果

 if __name__ == "__main__":
-    app.run(debug=False, host='0.0.0.0', port=12001)
+    app.run(debug=False, host='0.0.0.0', port=12000)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,30 @@
+Flask==3.0.0
+gevent==23.9.1
+greenlet==3.0.3
+gunicorn==21.2.0
+numpy==1.26.3
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.18.1
+nvidia-nvjitlink-cu12==12.3.101
+nvidia-nvtx-cu12==12.1.105
+pandas==2.1.4
+redis==5.0.1
+requests==2.31.0
+safetensors==0.4.1
+tokenizers==0.15.0
+torch==2.1.2
+tqdm==4.66.1
+transformers==4.36.2
+urllib3==2.1.0
+uvicorn==0.25.0
+vllm==0.2.0
+xformers==0.0.23.post1
+
--- a/测试并发请求.py
+++ b/测试并发请求.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import os
 import concurrent.futures
 import time
 from threading import Thread
@ -8,7 +9,7 @@ import json


 def request_api_chatgpt(prompt):
-    url = "http://192.168.31.149:12003/predict"
+    url = "http://192.168.31.149:12000/predict"
    data = {
        "texts": prompt
    }
@ -33,7 +34,7 @@ def request_api_chatgpt(prompt):

 def uuid_search(uuid):

-    url = "http://192.168.31.149:12003/search"
+    url = "http://192.168.31.149:12000/search"
    data = {
        "id": uuid
    }
@ -56,7 +57,7 @@ def uuid_search(uuid):
        return {}


-input_values_1 = ["User: 背景:我是一名博士生，我想写一篇论文。\n角色：我需要你作为拥有google学术论文数据库和百度学术论文数据库的资深数据。\n要求：根据论文题目“中学教育国际化之路”和目录“一、 引言\n1.1 研究背景\n1.2 研究目的\n1.3 研究意义\n二、 国际化教育的概念与特点\n2.1 国际化教育的定义\n2.2 国际化教育的目标\n2.3 国际化教育的特点\n三、 中学教育国际化的现状分析\n3.1 国内中学教育国际化的发展历程\n3.2 国内中学教育国际化的现状\n3.3 存在的问题与挑战\n四、 中学教育国际化的路径探索\n4.1 课程设置与教学方法改革\n4.2 师资队伍建设与培训\n4.3 学校合作与交流机制建设\n五、 中学教育国际化的效果评估\n5.1 学生国际视野与跨文化交际能力的提升\n5.2 学校与教育机构的国际合作与交流的拓展\n5.3 教育资源的优化配置与整合\n六、 结论\n6.1 研究总结\n6.2 研究展望”，为小标题“4.2 师资队伍建设与培训”填充1200字左右的中文内容，内容第一行返回“4.2 师资队伍建设与培训”且不包含目录中其他标题，要求生成内容符合学术论文写法，可以出现三级标题，绝不能出现“首先”，“其次”等字样的词\n\nAssistant:"] * 50
+input_values_1 = ["User: 背景:我是一名博士生，我想写一篇论文。\n角色：我需要你作为拥有google学术论文数据库和百度学术论文数据库的资深数据。\n要求：根据论文题目“中学教育国际化之路”和目录“一、 引言\n1.1 研究背景\n1.2 研究目的\n1.3 研究意义\n二、 国际化教育的概念与特点\n2.1 国际化教育的定义\n2.2 国际化教育的目标\n2.3 国际化教育的特点\n三、 中学教育国际化的现状分析\n3.1 国内中学教育国际化的发展历程\n3.2 国内中学教育国际化的现状\n3.3 存在的问题与挑战\n四、 中学教育国际化的路径探索\n4.1 课程设置与教学方法改革\n4.2 师资队伍建设与培训\n4.3 学校合作与交流机制建设\n五、 中学教育国际化的效果评估\n5.1 学生国际视野与跨文化交际能力的提升\n5.2 学校与教育机构的国际合作与交流的拓展\n5.3 教育资源的优化配置与整合\n六、 结论\n6.1 研究总结\n6.2 研究展望”，为小标题“4.2 师资队伍建设与培训”填充1200字左右的中文内容，内容第一行返回“4.2 师资队伍建设与培训”且不包含目录中其他标题，要求生成内容符合学术论文写法，可以出现三级标题，绝不能出现“首先”，“其次”等字样的词\n\nAssistant:"] * 10
 input_values_2 = ["User: 任务：生成论文小标题内容\n任务：问：论文题目是《空调后板压筋成形的回弹翘曲研究》，目录是“一、引言\n1.1 研究背景\n1.2 研究意义\n1.3 回弹翘曲的定义与影响因素\n\n二、相关理论与方法\n2.1 空调后板压筋成形技术\n2.2 回弹翘曲的测量方法\n2.3 影响回弹翘曲的因素分析\n\n三、实验设计与参数设置\n3.1 实验样品制备\n3.2 实验装置与测量仪器\n3.3 实验参数设置\n\n四、回弹翘曲的数值模拟\n4.1 模型建立\n4.2 材料参数与边界条件\n4.3 数值模拟结果分析\n\n五、实验结果与分析\n5.1 回弹翘曲的测量结果\n5.2 影响因素的实验分析\n5.3 实验结果与数值模拟结果的对比\n\n六、结论与展望\n6.1 结论\n6.2 研究的不足与改进方向”，请把其中的小标题“6.2 研究的不足与改进方向”的内容补充完整，补充内容字数在600字左右\n答：\n\nAssistant:"] * 50