
commit
62c34defd3
6 changed files with 470 additions and 0 deletions
@ -0,0 +1,21 @@ |
|||||
|
## 安装环境 |
||||
|
|
||||
|
```bash |
||||
|
conda create -n your_env_name python=3.8 |
||||
|
``` |
||||
|
|
||||
|
## 启动项目 |
||||
|
启动此项目前必须启动 vllm-main 项目 |
||||
|
|
||||
|
```bash |
||||
|
conda activate llama_paper |
||||
|
bash run_api_gunicorn.sh |
||||
|
``` |
||||
|
|
||||
|
## 测试 |
||||
|
|
||||
|
```bash |
||||
|
curl -H "Content-Type: application/json" -X POST -d '{"orderid": "EEAE880E-BE95-11EE-8D23-D5E5C66DD02E"}' http://101.37.83.210:16005/search |
||||
|
``` |
||||
|
|
||||
|
返回"status_code"不出现 400 则调用成功 |
@ -0,0 +1,207 @@ |
|||||
|
import json |
||||
|
import datetime |
||||
|
import pymysql |
||||
|
import re |
||||
|
import requests |
||||
|
from flask import Flask, jsonify |
||||
|
from flask import request |
||||
|
import uuid |
||||
|
import time |
||||
|
import redis |
||||
|
from threading import Thread |
||||
|
|
||||
|
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=8, password="zhicheng123*") |
||||
|
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
||||
|
|
||||
|
db_key_query = 'query_check_task' |
||||
|
db_key_querying = 'querying_check_task' |
||||
|
db_key_queryset = 'queryset_check_task' |
||||
|
db_key_query_recall = 'query_recall' |
||||
|
|
||||
|
|
||||
|
def run_query(conn, sql, params): |
||||
|
with conn.cursor() as cursor: |
||||
|
cursor.execute(sql, params) |
||||
|
result = cursor.fetchall() |
||||
|
return result |
||||
|
|
||||
|
|
||||
|
# def processing_one_text(paper_id): |
||||
|
# conn = pymysql.connect( |
||||
|
# host='192.168.31.145', |
||||
|
# port=3306, |
||||
|
# user='root', |
||||
|
# password='123456', |
||||
|
# db='zhiwang_db', |
||||
|
# charset='utf8mb4', |
||||
|
# cursorclass=pymysql.cursors.DictCursor |
||||
|
# ) |
||||
|
# |
||||
|
# sql = 'SELECT * FROM main_table_paper_detail_message WHERE doc_id=%s' |
||||
|
# params = (paper_id,) |
||||
|
# |
||||
|
# result = run_query(conn, sql, params) |
||||
|
# |
||||
|
# conn.close() |
||||
|
# print(result[0]['title'], result[0]['author']) |
||||
|
# title = result[0]['title'] |
||||
|
# author = result[0]['author'] |
||||
|
# degree = result[0]['degree'] |
||||
|
# year = result[0]['content'].split("/")[5] |
||||
|
# content_path = result[0]['content'] |
||||
|
# school = result[0]['school'] |
||||
|
# qikan_name = result[0]['qikan_name'] |
||||
|
# author = str(author).strip(";") |
||||
|
# author = str(author).replace(";", ",") |
||||
|
# # select |
||||
|
# # school, qikan_name |
||||
|
# # from main_table_paper_detail_message limit |
||||
|
# # 10000 \G;; |
||||
|
# |
||||
|
# try: |
||||
|
# with open(content_path, encoding="utf-8") as f: |
||||
|
# text = f.read() |
||||
|
# except: |
||||
|
# with open(content_path, encoding="gbk") as f: |
||||
|
# text = f.read() |
||||
|
# |
||||
|
# paper_info = { |
||||
|
# "title": title, |
||||
|
# "author": author, |
||||
|
# "degree": degree, |
||||
|
# "year": year, |
||||
|
# "paper_len_word": len(text), |
||||
|
# "school": school, |
||||
|
# "qikan_name": qikan_name |
||||
|
# } |
||||
|
# return paper_info |
||||
|
|
||||
|
from clickhouse_driver import Client |
||||
|
|
||||
|
class PureClient: |
||||
|
def __init__(self, database='test_db'): |
||||
|
# 只需要写本地地址 |
||||
|
self.client = Client(host='192.168.31.74', port=9000, user='default', |
||||
|
password='zhicheng123*', database=database) |
||||
|
|
||||
|
def run(self, sql): |
||||
|
client = self.client |
||||
|
collection = client.query_dataframe(sql) |
||||
|
return collection |
||||
|
|
||||
|
def processing_one_text(paper_id): |
||||
|
|
||||
|
pureclient = PureClient() |
||||
|
print("paper_id", paper_id) |
||||
|
sql = 'SELECT * FROM main_paper_message WHERE doc_id={}'.format(paper_id) |
||||
|
result = pureclient.run(sql) |
||||
|
print("result", result) |
||||
|
title = result['title'][0] |
||||
|
author = result['author'][0] |
||||
|
degree = result['degree'][0] |
||||
|
year = result['content'][0].split("/")[5] |
||||
|
school = result['school'][0] |
||||
|
qikan_name = result['qikan_name'][0] |
||||
|
author = str(author).strip(";") |
||||
|
author = str(author).replace(";", ",") |
||||
|
# select |
||||
|
# school, qikan_name |
||||
|
# from main_table_paper_detail_message limit |
||||
|
# 10000 \G;; |
||||
|
|
||||
|
paper_info = { |
||||
|
"title": title, |
||||
|
"author": author, |
||||
|
"degree": degree, |
||||
|
"year": year, |
||||
|
"school": school, |
||||
|
"qikan_name": qikan_name |
||||
|
} |
||||
|
print("paper_info", paper_info) |
||||
|
return paper_info |
||||
|
|
||||
|
|
||||
|
def ulit_recall_paper(recall_data_list_dict): |
||||
|
''' |
||||
|
对返回的十篇文章路径读取并解析 |
||||
|
:param recall_data_list_path: |
||||
|
:return data: list [[sentence, filename],[sentence, filename],[sentence, filename]] |
||||
|
''' |
||||
|
|
||||
|
# data = [] |
||||
|
# for path in recall_data_list_path: |
||||
|
# filename = path.split("/")[-1] |
||||
|
# with open(path, encoding="gbk") as f: |
||||
|
# text = f.read() |
||||
|
# text_list = text.split("\n") |
||||
|
# for sentence in text_list: |
||||
|
# if sentence != "": |
||||
|
# data.append([sentence, filename]) |
||||
|
# return data |
||||
|
|
||||
|
data = [] |
||||
|
for i in list(recall_data_list_dict.items()): |
||||
|
data_one = processing_one_text(i[0]) |
||||
|
|
||||
|
print("ulit_recall_paper-1") |
||||
|
degree = "[D]" |
||||
|
if data_one['degree'] == "期刊": |
||||
|
degree = "[J]" |
||||
|
|
||||
|
# school = result[0]['school'] |
||||
|
# qikan_name = result[0]['qikan_name'] |
||||
|
if data_one['school'] != " ": |
||||
|
source = data_one['school'] |
||||
|
else: |
||||
|
source = data_one['qikan_name'] |
||||
|
print("ulit_recall_paper-2") |
||||
|
paper_name = ".".join([data_one['author'], data_one['title'] + degree, ",".join([source, data_one['year']])]) |
||||
|
paper_name = paper_name + "." |
||||
|
data.append(paper_name) |
||||
|
print("ulit_recall_paper-3") |
||||
|
data = list(set(data)) |
||||
|
return data |
||||
|
|
||||
|
|
||||
|
def classify_accurate_check(): |
||||
|
while True: |
||||
|
if redis_.llen(db_key_query_recall) == 0: # 若队列中没有元素就继续获取 |
||||
|
time.sleep(1) |
||||
|
continue |
||||
|
|
||||
|
print("计算结果") |
||||
|
query_recall = redis_.lpop(db_key_query_recall).decode('UTF-8') # 获取query的text |
||||
|
query_recall_dict = json.loads(query_recall) |
||||
|
|
||||
|
query_recall_uuid = query_recall_dict["uuid"] |
||||
|
recall_data_list_dict = query_recall_dict["data"] |
||||
|
is_success = query_recall_dict["is_success"] |
||||
|
|
||||
|
try: |
||||
|
if is_success == "0": |
||||
|
return_text = {"resilt": "宇鹏接口不成功", "probabilities": None, "status_code": 400} |
||||
|
else: |
||||
|
if recall_data_list_dict == "{}": |
||||
|
return_text = {"resilt": "查询结果为空", "probabilities": None, "status_code": 400} |
||||
|
else: |
||||
|
recall_data_list = ulit_recall_paper(recall_data_list_dict) |
||||
|
recall_data = "\n".join(recall_data_list) |
||||
|
return_text = {"resilt": recall_data, "probabilities": None, "status_code": 200} |
||||
|
except: |
||||
|
return_text = {"resilt": "计算有问题", "probabilities": None, "status_code": 400} |
||||
|
|
||||
|
load_result_path = "./new_data_logs/{}.json".format(query_recall_uuid) |
||||
|
print("queue_uuid: ", query_recall_uuid) |
||||
|
print("load_result_path: ", load_result_path) |
||||
|
|
||||
|
with open(load_result_path, 'w', encoding='utf8') as f2: |
||||
|
# ensure_ascii=False才能输入中文,否则是Unicode字符 |
||||
|
# indent=2 JSON数据的缩进,美观 |
||||
|
json.dump(return_text, f2, ensure_ascii=False, indent=4) |
||||
|
|
||||
|
redis_.set(query_recall_uuid, load_result_path, 86400) |
||||
|
|
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
t1 = Thread(target=classify_accurate_check) |
||||
|
t1.start() |
@ -0,0 +1,220 @@ |
|||||
|
import os |
||||
|
import numpy as np |
||||
|
from numpy.linalg import norm |
||||
|
import json |
||||
|
import datetime |
||||
|
import pymysql |
||||
|
import re |
||||
|
import requests |
||||
|
from flask import Flask, jsonify |
||||
|
from flask import request |
||||
|
import uuid |
||||
|
import time |
||||
|
import redis |
||||
|
from threading import Thread |
||||
|
from multiprocessing import Pool |
||||
|
|
||||
|
app = Flask(__name__) |
||||
|
app.config["JSON_AS_ASCII"] = False |
||||
|
|
||||
|
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=8, password="zhicheng123*") |
||||
|
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
||||
|
|
||||
|
db_key_query = 'query_check_task' |
||||
|
db_key_querying = 'querying_check_task' |
||||
|
db_key_queryset = 'queryset_check_task' |
||||
|
db_key_query_recall = 'query_recall' |
||||
|
|
||||
|
|
||||
|
def dialog_line_parse(url, text): |
||||
|
""" |
||||
|
将数据输入模型进行分析并输出结果 |
||||
|
:param url: 模型url |
||||
|
:param text: 进入模型的数据 |
||||
|
:return: 模型返回结果 |
||||
|
""" |
||||
|
|
||||
|
response = requests.post( |
||||
|
url, |
||||
|
json=text, |
||||
|
timeout=100000 |
||||
|
) |
||||
|
if response.status_code == 200: |
||||
|
return response.json() |
||||
|
else: |
||||
|
# logger.error( |
||||
|
# "【{}】 Failed to get a proper response from remote " |
||||
|
# "server. Status Code: {}. Response: {}" |
||||
|
# "".format(url, response.status_code, response.text) |
||||
|
# ) |
||||
|
print("【{}】 Failed to get a proper response from remote " |
||||
|
"server. Status Code: {}. Response: {}" |
||||
|
"".format(url, response.status_code, response.text)) |
||||
|
print(text) |
||||
|
return {} |
||||
|
|
||||
|
|
||||
|
def recall_10(queue_uuid, title, abst_zh, content): |
||||
|
''' |
||||
|
宇鹏召回接口 |
||||
|
:param paper_name: |
||||
|
:return: |
||||
|
''' |
||||
|
|
||||
|
request_json = { |
||||
|
"uuid": queue_uuid, |
||||
|
"title": title, |
||||
|
"abst_zh": abst_zh, |
||||
|
"content": content |
||||
|
} |
||||
|
print(request_json) |
||||
|
a = dialog_line_parse("http://192.168.31.74:50004/check1", request_json) |
||||
|
|
||||
|
|
||||
|
def uilt_content(content): |
||||
|
zhaiyao_list = ["摘要"] |
||||
|
zhaiyao_en_list = ["Abstract", "abstract"] |
||||
|
mulu_list = ["目录"] |
||||
|
key_word_list = ["关键词"] |
||||
|
caikanwenxian = ["参考文献"] |
||||
|
key_word_bool = False |
||||
|
key_word_str = "" |
||||
|
zhaiyao_bool = False |
||||
|
zhaiyao_en_bool = False |
||||
|
zhaiyao_str = "" |
||||
|
zhaiyao_en_str = "" |
||||
|
mulu_str = "" |
||||
|
zhaiyao_text = "" |
||||
|
mulu_bool = False |
||||
|
|
||||
|
pantten_zhaiyao = '(摘\s*要)' |
||||
|
result_biaoti_list = re.findall(pantten_zhaiyao, content) |
||||
|
if len(result_biaoti_list) != 0: |
||||
|
zhaiyao_str = result_biaoti_list[0] |
||||
|
zhaiyao_bool = True |
||||
|
else: |
||||
|
zhaiyao_bool = False |
||||
|
|
||||
|
for i in zhaiyao_en_list: |
||||
|
if i in content: |
||||
|
zhaiyao_en_bool = True |
||||
|
zhaiyao_en_str = i |
||||
|
break |
||||
|
|
||||
|
for i in mulu_list: |
||||
|
if i in content: |
||||
|
mulu_str = i |
||||
|
mulu_bool = True |
||||
|
break |
||||
|
|
||||
|
for i in key_word_list: |
||||
|
if i in content: |
||||
|
key_word_str = i |
||||
|
key_word_bool = True |
||||
|
break |
||||
|
|
||||
|
if zhaiyao_bool == True and key_word_bool == True: |
||||
|
pantten_zhaiyao = "{}(.*?){}".format(zhaiyao_str, key_word_str) |
||||
|
result_biaoti_list = re.findall(pantten_zhaiyao, content) |
||||
|
zhaiyao_text = result_biaoti_list[0] |
||||
|
|
||||
|
elif zhaiyao_bool == True and zhaiyao_en_bool == True: |
||||
|
pantten_zhaiyao = "{}(.*?){}".format(zhaiyao_str, zhaiyao_en_str) |
||||
|
result_biaoti_list = re.findall(pantten_zhaiyao, content) |
||||
|
zhaiyao_text = result_biaoti_list[0] |
||||
|
|
||||
|
elif zhaiyao_bool == True and mulu_bool == True: |
||||
|
pantten_zhaiyao = "{}(.*?){}".format(zhaiyao_str, mulu_str) |
||||
|
result_biaoti_list = re.findall(pantten_zhaiyao, content) |
||||
|
zhaiyao_text = result_biaoti_list[0] |
||||
|
|
||||
|
if zhaiyao_text == "": |
||||
|
content = str(content).replace("。\n", "。") |
||||
|
content_list = content.split("。") |
||||
|
zhaiyao_text = "".join(content_list[:15]) |
||||
|
return zhaiyao_text |
||||
|
|
||||
|
|
||||
|
def ulit_request_file(file): |
||||
|
file_name = file.filename |
||||
|
if file_name.split(".")[-1] == "txt": |
||||
|
file_name_save = "data/request/{}".format(file_name) |
||||
|
file.save(file_name_save) |
||||
|
try: |
||||
|
with open(file_name_save, encoding="gbk") as f: |
||||
|
content = f.read() |
||||
|
except: |
||||
|
with open(file_name_save, encoding="utf-8") as f: |
||||
|
content = f.read() |
||||
|
|
||||
|
content = " ".join([i for i in content.split("\n") if i != ""]) |
||||
|
|
||||
|
return content |
||||
|
|
||||
|
|
||||
|
@app.route("/", methods=["POST"]) |
||||
|
def handle_query(): |
||||
|
try: |
||||
|
title = request.form.get("title") |
||||
|
# file = request.files.get('file') |
||||
|
abstract = request.form.get('abstract') |
||||
|
nums = request.form.get('nums') |
||||
|
|
||||
|
# content = ulit_request_file(file) |
||||
|
content = "" |
||||
|
|
||||
|
id_ = str(uuid.uuid1()) # 为query生成唯一标识 |
||||
|
print("uuid: ", id_) |
||||
|
print(id_) |
||||
|
d = { |
||||
|
'id': id_, |
||||
|
'abstract': abstract, |
||||
|
'title': title, |
||||
|
'nums': nums |
||||
|
} |
||||
|
# print(d) |
||||
|
# 绑定文本和query id |
||||
|
# recall_10(id_, title, abst_zh, content) |
||||
|
|
||||
|
Thread_rellce = Thread(target=recall_10, args=(id_, title, abstract, content,)) |
||||
|
Thread_rellce.start() |
||||
|
|
||||
|
load_request_path = './request_data_logs/{}.json'.format(id_) |
||||
|
with open(load_request_path, 'w', encoding='utf8') as f2: # ensure_ascii=False才能输入中文,否则是Unicode字符 indent=2 JSON数据的缩进,美观 |
||||
|
json.dump(d, f2, ensure_ascii=False, indent=4) |
||||
|
|
||||
|
while True: |
||||
|
result = redis_.get(id_) # 获取该query的模型结果 |
||||
|
if result is not None: |
||||
|
redis_.delete(id_) |
||||
|
result_path = result.decode('UTF-8') |
||||
|
break |
||||
|
|
||||
|
print("获取结果完成") |
||||
|
with open(result_path, encoding='utf8') as f1: |
||||
|
# 加载文件的对象 |
||||
|
result_dict = json.load(f1) |
||||
|
reference = result_dict["resilt"] |
||||
|
status_code = str(result_dict["status_code"]) |
||||
|
|
||||
|
print("结果分析完成") |
||||
|
print("reference", reference) |
||||
|
if status_code == "400": |
||||
|
return_text = {"resilt": "", "probabilities": None, "status_code": 400} |
||||
|
else: |
||||
|
reference_list = reference.split("\n") |
||||
|
reference_list = reference_list[:int(nums)] |
||||
|
print(reference_list) |
||||
|
reference = [f"[{str(i+1)}]" + reference_list[i] for i in range(len(reference_list))] |
||||
|
if status_code == "200": |
||||
|
return_text = {"resilt": reference, "probabilities": None, "status_code": 200} |
||||
|
else: |
||||
|
return_text = {"resilt": "", "probabilities": None, "status_code": 400} |
||||
|
except: |
||||
|
return_text = {"resilt": "", "probabilities": None, "status_code": 400} |
||||
|
return jsonify(return_text) # 返回结果 |
||||
|
|
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
app.run(host="0.0.0.0", port=17000, threaded=True) |
@ -0,0 +1,21 @@ |
|||||
|
# 并行工作线程数 |
||||
|
workers = 2 |
||||
|
# 监听内网端口5000【按需要更改】 |
||||
|
bind = '0.0.0.0:17000' |
||||
|
|
||||
|
loglevel = 'debug' |
||||
|
|
||||
|
worker_class = "gevent" |
||||
|
# 设置守护进程【关闭连接时,程序仍在运行】 |
||||
|
daemon = True |
||||
|
# 设置超时时间120s,默认为30s。按自己的需求进行设置 |
||||
|
timeout = 120 |
||||
|
# 设置访问日志和错误信息日志路径 |
||||
|
accesslog = './logs/acess.log' |
||||
|
errorlog = './logs/error.log' |
||||
|
# access_log_format = '%(h) - %(t)s - %(u)s - %(s)s %(H)s' |
||||
|
# errorlog = '-' # 记录到标准输出 |
||||
|
|
||||
|
|
||||
|
# 设置最大并发量 |
||||
|
worker_connections = 20000 |
@ -0,0 +1 @@ |
|||||
|
gunicorn flask_api:app -c gunicorn_config.py |
Loading…
Reference in new issue