
commit
62c34defd3
6 changed files with 470 additions and 0 deletions
@ -0,0 +1,21 @@ |
|||
## 安装环境 |
|||
|
|||
```bash |
|||
conda create -n your_env_name python=3.8 |
|||
``` |
|||
|
|||
## 启动项目 |
|||
启动此项目前必须启动 vllm-main 项目 |
|||
|
|||
```bash |
|||
conda activate llama_paper |
|||
bash run_api_gunicorn.sh |
|||
``` |
|||
|
|||
## 测试 |
|||
|
|||
```bash |
|||
curl -H "Content-Type: application/json" -X POST -d '{"orderid": "EEAE880E-BE95-11EE-8D23-D5E5C66DD02E"}' http://101.37.83.210:16005/search |
|||
``` |
|||
|
|||
返回"status_code"不出现 400 则调用成功 |
@ -0,0 +1,207 @@ |
|||
import json |
|||
import datetime |
|||
import pymysql |
|||
import re |
|||
import requests |
|||
from flask import Flask, jsonify |
|||
from flask import request |
|||
import uuid |
|||
import time |
|||
import redis |
|||
from threading import Thread |
|||
|
|||
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=8, password="zhicheng123*") |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
db_key_query = 'query_check_task' |
|||
db_key_querying = 'querying_check_task' |
|||
db_key_queryset = 'queryset_check_task' |
|||
db_key_query_recall = 'query_recall' |
|||
|
|||
|
|||
def run_query(conn, sql, params): |
|||
with conn.cursor() as cursor: |
|||
cursor.execute(sql, params) |
|||
result = cursor.fetchall() |
|||
return result |
|||
|
|||
|
|||
# def processing_one_text(paper_id): |
|||
# conn = pymysql.connect( |
|||
# host='192.168.31.145', |
|||
# port=3306, |
|||
# user='root', |
|||
# password='123456', |
|||
# db='zhiwang_db', |
|||
# charset='utf8mb4', |
|||
# cursorclass=pymysql.cursors.DictCursor |
|||
# ) |
|||
# |
|||
# sql = 'SELECT * FROM main_table_paper_detail_message WHERE doc_id=%s' |
|||
# params = (paper_id,) |
|||
# |
|||
# result = run_query(conn, sql, params) |
|||
# |
|||
# conn.close() |
|||
# print(result[0]['title'], result[0]['author']) |
|||
# title = result[0]['title'] |
|||
# author = result[0]['author'] |
|||
# degree = result[0]['degree'] |
|||
# year = result[0]['content'].split("/")[5] |
|||
# content_path = result[0]['content'] |
|||
# school = result[0]['school'] |
|||
# qikan_name = result[0]['qikan_name'] |
|||
# author = str(author).strip(";") |
|||
# author = str(author).replace(";", ",") |
|||
# # select |
|||
# # school, qikan_name |
|||
# # from main_table_paper_detail_message limit |
|||
# # 10000 \G;; |
|||
# |
|||
# try: |
|||
# with open(content_path, encoding="utf-8") as f: |
|||
# text = f.read() |
|||
# except: |
|||
# with open(content_path, encoding="gbk") as f: |
|||
# text = f.read() |
|||
# |
|||
# paper_info = { |
|||
# "title": title, |
|||
# "author": author, |
|||
# "degree": degree, |
|||
# "year": year, |
|||
# "paper_len_word": len(text), |
|||
# "school": school, |
|||
# "qikan_name": qikan_name |
|||
# } |
|||
# return paper_info |
|||
|
|||
from clickhouse_driver import Client |
|||
|
|||
class PureClient: |
|||
def __init__(self, database='test_db'): |
|||
# 只需要写本地地址 |
|||
self.client = Client(host='192.168.31.74', port=9000, user='default', |
|||
password='zhicheng123*', database=database) |
|||
|
|||
def run(self, sql): |
|||
client = self.client |
|||
collection = client.query_dataframe(sql) |
|||
return collection |
|||
|
|||
def processing_one_text(paper_id): |
|||
|
|||
pureclient = PureClient() |
|||
print("paper_id", paper_id) |
|||
sql = 'SELECT * FROM main_paper_message WHERE doc_id={}'.format(paper_id) |
|||
result = pureclient.run(sql) |
|||
print("result", result) |
|||
title = result['title'][0] |
|||
author = result['author'][0] |
|||
degree = result['degree'][0] |
|||
year = result['content'][0].split("/")[5] |
|||
school = result['school'][0] |
|||
qikan_name = result['qikan_name'][0] |
|||
author = str(author).strip(";") |
|||
author = str(author).replace(";", ",") |
|||
# select |
|||
# school, qikan_name |
|||
# from main_table_paper_detail_message limit |
|||
# 10000 \G;; |
|||
|
|||
paper_info = { |
|||
"title": title, |
|||
"author": author, |
|||
"degree": degree, |
|||
"year": year, |
|||
"school": school, |
|||
"qikan_name": qikan_name |
|||
} |
|||
print("paper_info", paper_info) |
|||
return paper_info |
|||
|
|||
|
|||
def ulit_recall_paper(recall_data_list_dict): |
|||
''' |
|||
对返回的十篇文章路径读取并解析 |
|||
:param recall_data_list_path: |
|||
:return data: list [[sentence, filename],[sentence, filename],[sentence, filename]] |
|||
''' |
|||
|
|||
# data = [] |
|||
# for path in recall_data_list_path: |
|||
# filename = path.split("/")[-1] |
|||
# with open(path, encoding="gbk") as f: |
|||
# text = f.read() |
|||
# text_list = text.split("\n") |
|||
# for sentence in text_list: |
|||
# if sentence != "": |
|||
# data.append([sentence, filename]) |
|||
# return data |
|||
|
|||
data = [] |
|||
for i in list(recall_data_list_dict.items()): |
|||
data_one = processing_one_text(i[0]) |
|||
|
|||
print("ulit_recall_paper-1") |
|||
degree = "[D]" |
|||
if data_one['degree'] == "期刊": |
|||
degree = "[J]" |
|||
|
|||
# school = result[0]['school'] |
|||
# qikan_name = result[0]['qikan_name'] |
|||
if data_one['school'] != " ": |
|||
source = data_one['school'] |
|||
else: |
|||
source = data_one['qikan_name'] |
|||
print("ulit_recall_paper-2") |
|||
paper_name = ".".join([data_one['author'], data_one['title'] + degree, ",".join([source, data_one['year']])]) |
|||
paper_name = paper_name + "." |
|||
data.append(paper_name) |
|||
print("ulit_recall_paper-3") |
|||
data = list(set(data)) |
|||
return data |
|||
|
|||
|
|||
def classify_accurate_check(): |
|||
while True: |
|||
if redis_.llen(db_key_query_recall) == 0: # 若队列中没有元素就继续获取 |
|||
time.sleep(1) |
|||
continue |
|||
|
|||
print("计算结果") |
|||
query_recall = redis_.lpop(db_key_query_recall).decode('UTF-8') # 获取query的text |
|||
query_recall_dict = json.loads(query_recall) |
|||
|
|||
query_recall_uuid = query_recall_dict["uuid"] |
|||
recall_data_list_dict = query_recall_dict["data"] |
|||
is_success = query_recall_dict["is_success"] |
|||
|
|||
try: |
|||
if is_success == "0": |
|||
return_text = {"resilt": "宇鹏接口不成功", "probabilities": None, "status_code": 400} |
|||
else: |
|||
if recall_data_list_dict == "{}": |
|||
return_text = {"resilt": "查询结果为空", "probabilities": None, "status_code": 400} |
|||
else: |
|||
recall_data_list = ulit_recall_paper(recall_data_list_dict) |
|||
recall_data = "\n".join(recall_data_list) |
|||
return_text = {"resilt": recall_data, "probabilities": None, "status_code": 200} |
|||
except: |
|||
return_text = {"resilt": "计算有问题", "probabilities": None, "status_code": 400} |
|||
|
|||
load_result_path = "./new_data_logs/{}.json".format(query_recall_uuid) |
|||
print("queue_uuid: ", query_recall_uuid) |
|||
print("load_result_path: ", load_result_path) |
|||
|
|||
with open(load_result_path, 'w', encoding='utf8') as f2: |
|||
# ensure_ascii=False才能输入中文,否则是Unicode字符 |
|||
# indent=2 JSON数据的缩进,美观 |
|||
json.dump(return_text, f2, ensure_ascii=False, indent=4) |
|||
|
|||
redis_.set(query_recall_uuid, load_result_path, 86400) |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
t1 = Thread(target=classify_accurate_check) |
|||
t1.start() |
@ -0,0 +1,220 @@ |
|||
import os |
|||
import numpy as np |
|||
from numpy.linalg import norm |
|||
import json |
|||
import datetime |
|||
import pymysql |
|||
import re |
|||
import requests |
|||
from flask import Flask, jsonify |
|||
from flask import request |
|||
import uuid |
|||
import time |
|||
import redis |
|||
from threading import Thread |
|||
from multiprocessing import Pool |
|||
|
|||
app = Flask(__name__) |
|||
app.config["JSON_AS_ASCII"] = False |
|||
|
|||
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=8, password="zhicheng123*") |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
db_key_query = 'query_check_task' |
|||
db_key_querying = 'querying_check_task' |
|||
db_key_queryset = 'queryset_check_task' |
|||
db_key_query_recall = 'query_recall' |
|||
|
|||
|
|||
def dialog_line_parse(url, text): |
|||
""" |
|||
将数据输入模型进行分析并输出结果 |
|||
:param url: 模型url |
|||
:param text: 进入模型的数据 |
|||
:return: 模型返回结果 |
|||
""" |
|||
|
|||
response = requests.post( |
|||
url, |
|||
json=text, |
|||
timeout=100000 |
|||
) |
|||
if response.status_code == 200: |
|||
return response.json() |
|||
else: |
|||
# logger.error( |
|||
# "【{}】 Failed to get a proper response from remote " |
|||
# "server. Status Code: {}. Response: {}" |
|||
# "".format(url, response.status_code, response.text) |
|||
# ) |
|||
print("【{}】 Failed to get a proper response from remote " |
|||
"server. Status Code: {}. Response: {}" |
|||
"".format(url, response.status_code, response.text)) |
|||
print(text) |
|||
return {} |
|||
|
|||
|
|||
def recall_10(queue_uuid, title, abst_zh, content): |
|||
''' |
|||
宇鹏召回接口 |
|||
:param paper_name: |
|||
:return: |
|||
''' |
|||
|
|||
request_json = { |
|||
"uuid": queue_uuid, |
|||
"title": title, |
|||
"abst_zh": abst_zh, |
|||
"content": content |
|||
} |
|||
print(request_json) |
|||
a = dialog_line_parse("http://192.168.31.74:50004/check1", request_json) |
|||
|
|||
|
|||
def uilt_content(content): |
|||
zhaiyao_list = ["摘要"] |
|||
zhaiyao_en_list = ["Abstract", "abstract"] |
|||
mulu_list = ["目录"] |
|||
key_word_list = ["关键词"] |
|||
caikanwenxian = ["参考文献"] |
|||
key_word_bool = False |
|||
key_word_str = "" |
|||
zhaiyao_bool = False |
|||
zhaiyao_en_bool = False |
|||
zhaiyao_str = "" |
|||
zhaiyao_en_str = "" |
|||
mulu_str = "" |
|||
zhaiyao_text = "" |
|||
mulu_bool = False |
|||
|
|||
pantten_zhaiyao = '(摘\s*要)' |
|||
result_biaoti_list = re.findall(pantten_zhaiyao, content) |
|||
if len(result_biaoti_list) != 0: |
|||
zhaiyao_str = result_biaoti_list[0] |
|||
zhaiyao_bool = True |
|||
else: |
|||
zhaiyao_bool = False |
|||
|
|||
for i in zhaiyao_en_list: |
|||
if i in content: |
|||
zhaiyao_en_bool = True |
|||
zhaiyao_en_str = i |
|||
break |
|||
|
|||
for i in mulu_list: |
|||
if i in content: |
|||
mulu_str = i |
|||
mulu_bool = True |
|||
break |
|||
|
|||
for i in key_word_list: |
|||
if i in content: |
|||
key_word_str = i |
|||
key_word_bool = True |
|||
break |
|||
|
|||
if zhaiyao_bool == True and key_word_bool == True: |
|||
pantten_zhaiyao = "{}(.*?){}".format(zhaiyao_str, key_word_str) |
|||
result_biaoti_list = re.findall(pantten_zhaiyao, content) |
|||
zhaiyao_text = result_biaoti_list[0] |
|||
|
|||
elif zhaiyao_bool == True and zhaiyao_en_bool == True: |
|||
pantten_zhaiyao = "{}(.*?){}".format(zhaiyao_str, zhaiyao_en_str) |
|||
result_biaoti_list = re.findall(pantten_zhaiyao, content) |
|||
zhaiyao_text = result_biaoti_list[0] |
|||
|
|||
elif zhaiyao_bool == True and mulu_bool == True: |
|||
pantten_zhaiyao = "{}(.*?){}".format(zhaiyao_str, mulu_str) |
|||
result_biaoti_list = re.findall(pantten_zhaiyao, content) |
|||
zhaiyao_text = result_biaoti_list[0] |
|||
|
|||
if zhaiyao_text == "": |
|||
content = str(content).replace("。\n", "。") |
|||
content_list = content.split("。") |
|||
zhaiyao_text = "".join(content_list[:15]) |
|||
return zhaiyao_text |
|||
|
|||
|
|||
def ulit_request_file(file): |
|||
file_name = file.filename |
|||
if file_name.split(".")[-1] == "txt": |
|||
file_name_save = "data/request/{}".format(file_name) |
|||
file.save(file_name_save) |
|||
try: |
|||
with open(file_name_save, encoding="gbk") as f: |
|||
content = f.read() |
|||
except: |
|||
with open(file_name_save, encoding="utf-8") as f: |
|||
content = f.read() |
|||
|
|||
content = " ".join([i for i in content.split("\n") if i != ""]) |
|||
|
|||
return content |
|||
|
|||
|
|||
@app.route("/", methods=["POST"]) |
|||
def handle_query(): |
|||
try: |
|||
title = request.form.get("title") |
|||
# file = request.files.get('file') |
|||
abstract = request.form.get('abstract') |
|||
nums = request.form.get('nums') |
|||
|
|||
# content = ulit_request_file(file) |
|||
content = "" |
|||
|
|||
id_ = str(uuid.uuid1()) # 为query生成唯一标识 |
|||
print("uuid: ", id_) |
|||
print(id_) |
|||
d = { |
|||
'id': id_, |
|||
'abstract': abstract, |
|||
'title': title, |
|||
'nums': nums |
|||
} |
|||
# print(d) |
|||
# 绑定文本和query id |
|||
# recall_10(id_, title, abst_zh, content) |
|||
|
|||
Thread_rellce = Thread(target=recall_10, args=(id_, title, abstract, content,)) |
|||
Thread_rellce.start() |
|||
|
|||
load_request_path = './request_data_logs/{}.json'.format(id_) |
|||
with open(load_request_path, 'w', encoding='utf8') as f2: # ensure_ascii=False才能输入中文,否则是Unicode字符 indent=2 JSON数据的缩进,美观 |
|||
json.dump(d, f2, ensure_ascii=False, indent=4) |
|||
|
|||
while True: |
|||
result = redis_.get(id_) # 获取该query的模型结果 |
|||
if result is not None: |
|||
redis_.delete(id_) |
|||
result_path = result.decode('UTF-8') |
|||
break |
|||
|
|||
print("获取结果完成") |
|||
with open(result_path, encoding='utf8') as f1: |
|||
# 加载文件的对象 |
|||
result_dict = json.load(f1) |
|||
reference = result_dict["resilt"] |
|||
status_code = str(result_dict["status_code"]) |
|||
|
|||
print("结果分析完成") |
|||
print("reference", reference) |
|||
if status_code == "400": |
|||
return_text = {"resilt": "", "probabilities": None, "status_code": 400} |
|||
else: |
|||
reference_list = reference.split("\n") |
|||
reference_list = reference_list[:int(nums)] |
|||
print(reference_list) |
|||
reference = [f"[{str(i+1)}]" + reference_list[i] for i in range(len(reference_list))] |
|||
if status_code == "200": |
|||
return_text = {"resilt": reference, "probabilities": None, "status_code": 200} |
|||
else: |
|||
return_text = {"resilt": "", "probabilities": None, "status_code": 400} |
|||
except: |
|||
return_text = {"resilt": "", "probabilities": None, "status_code": 400} |
|||
return jsonify(return_text) # 返回结果 |
|||
|
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
app.run(host="0.0.0.0", port=17000, threaded=True) |
@ -0,0 +1,21 @@ |
|||
# 并行工作线程数 |
|||
workers = 2 |
|||
# 监听内网端口5000【按需要更改】 |
|||
bind = '0.0.0.0:17000' |
|||
|
|||
loglevel = 'debug' |
|||
|
|||
worker_class = "gevent" |
|||
# 设置守护进程【关闭连接时,程序仍在运行】 |
|||
daemon = True |
|||
# 设置超时时间120s,默认为30s。按自己的需求进行设置 |
|||
timeout = 120 |
|||
# 设置访问日志和错误信息日志路径 |
|||
accesslog = './logs/acess.log' |
|||
errorlog = './logs/error.log' |
|||
# access_log_format = '%(h) - %(t)s - %(u)s - %(s)s %(H)s' |
|||
# errorlog = '-' # 记录到标准输出 |
|||
|
|||
|
|||
# 设置最大并发量 |
|||
worker_connections = 20000 |
@ -0,0 +1 @@ |
|||
gunicorn flask_api:app -c gunicorn_config.py |
Loading…
Reference in new issue