根据标题和摘要生成参考文献
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

220 lines
6.7 KiB

import os
import numpy as np
from numpy.linalg import norm
import json
import datetime
import pymysql
import re
import requests
from flask import Flask, jsonify
from flask import request
import uuid
import time
import redis
from threading import Thread
from multiprocessing import Pool
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=8, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query_check_task'
db_key_querying = 'querying_check_task'
db_key_queryset = 'queryset_check_task'
db_key_query_recall = 'query_recall'
def dialog_line_parse(url, text):
"""
将数据输入模型进行分析并输出结果
:param url: 模型url
:param text: 进入模型的数据
:return: 模型返回结果
"""
response = requests.post(
url,
json=text,
timeout=100000
)
if response.status_code == 200:
return response.json()
else:
# logger.error(
# "【{}】 Failed to get a proper response from remote "
# "server. Status Code: {}. Response: {}"
# "".format(url, response.status_code, response.text)
# )
print("{}】 Failed to get a proper response from remote "
"server. Status Code: {}. Response: {}"
"".format(url, response.status_code, response.text))
print(text)
return {}
def recall_10(queue_uuid, title, abst_zh, content):
'''
宇鹏召回接口
:param paper_name:
:return:
'''
request_json = {
"uuid": queue_uuid,
"title": title,
"abst_zh": abst_zh,
"content": content
}
print(request_json)
a = dialog_line_parse("http://192.168.31.74:50004/check1", request_json)
def uilt_content(content):
zhaiyao_list = ["摘要"]
zhaiyao_en_list = ["Abstract", "abstract"]
mulu_list = ["目录"]
key_word_list = ["关键词"]
caikanwenxian = ["参考文献"]
key_word_bool = False
key_word_str = ""
zhaiyao_bool = False
zhaiyao_en_bool = False
zhaiyao_str = ""
zhaiyao_en_str = ""
mulu_str = ""
zhaiyao_text = ""
mulu_bool = False
pantten_zhaiyao = '(摘\s*要)'
result_biaoti_list = re.findall(pantten_zhaiyao, content)
if len(result_biaoti_list) != 0:
zhaiyao_str = result_biaoti_list[0]
zhaiyao_bool = True
else:
zhaiyao_bool = False
for i in zhaiyao_en_list:
if i in content:
zhaiyao_en_bool = True
zhaiyao_en_str = i
break
for i in mulu_list:
if i in content:
mulu_str = i
mulu_bool = True
break
for i in key_word_list:
if i in content:
key_word_str = i
key_word_bool = True
break
if zhaiyao_bool == True and key_word_bool == True:
pantten_zhaiyao = "{}(.*?){}".format(zhaiyao_str, key_word_str)
result_biaoti_list = re.findall(pantten_zhaiyao, content)
zhaiyao_text = result_biaoti_list[0]
elif zhaiyao_bool == True and zhaiyao_en_bool == True:
pantten_zhaiyao = "{}(.*?){}".format(zhaiyao_str, zhaiyao_en_str)
result_biaoti_list = re.findall(pantten_zhaiyao, content)
zhaiyao_text = result_biaoti_list[0]
elif zhaiyao_bool == True and mulu_bool == True:
pantten_zhaiyao = "{}(.*?){}".format(zhaiyao_str, mulu_str)
result_biaoti_list = re.findall(pantten_zhaiyao, content)
zhaiyao_text = result_biaoti_list[0]
if zhaiyao_text == "":
content = str(content).replace("\n", "")
content_list = content.split("")
zhaiyao_text = "".join(content_list[:15])
return zhaiyao_text
def ulit_request_file(file):
file_name = file.filename
if file_name.split(".")[-1] == "txt":
file_name_save = "data/request/{}".format(file_name)
file.save(file_name_save)
try:
with open(file_name_save, encoding="gbk") as f:
content = f.read()
except:
with open(file_name_save, encoding="utf-8") as f:
content = f.read()
content = " ".join([i for i in content.split("\n") if i != ""])
return content
@app.route("/", methods=["POST"])
def handle_query():
try:
title = request.form.get("title")
# file = request.files.get('file')
abstract = request.form.get('abstract')
nums = request.form.get('nums')
# content = ulit_request_file(file)
content = ""
id_ = str(uuid.uuid1()) # 为query生成唯一标识
print("uuid: ", id_)
print(id_)
d = {
'id': id_,
'abstract': abstract,
'title': title,
'nums': nums
}
# print(d)
# 绑定文本和query id
# recall_10(id_, title, abst_zh, content)
Thread_rellce = Thread(target=recall_10, args=(id_, title, abstract, content,))
Thread_rellce.start()
load_request_path = './request_data_logs/{}.json'.format(id_)
with open(load_request_path, 'w', encoding='utf8') as f2: # ensure_ascii=False才能输入中文,否则是Unicode字符 indent=2 JSON数据的缩进,美观
json.dump(d, f2, ensure_ascii=False, indent=4)
while True:
result = redis_.get(id_) # 获取该query的模型结果
if result is not None:
redis_.delete(id_)
result_path = result.decode('UTF-8')
break
print("获取结果完成")
with open(result_path, encoding='utf8') as f1:
# 加载文件的对象
result_dict = json.load(f1)
reference = result_dict["resilt"]
status_code = str(result_dict["status_code"])
print("结果分析完成")
print("reference", reference)
if status_code == "400":
return_text = {"resilt": "", "probabilities": None, "status_code": 400}
else:
reference_list = reference.split("\n")
reference_list = reference_list[:int(nums)]
print(reference_list)
reference = [f"[{str(i+1)}]" + reference_list[i] for i in range(len(reference_list))]
if status_code == "200":
return_text = {"resilt": reference, "probabilities": None, "status_code": 200}
else:
return_text = {"resilt": "", "probabilities": None, "status_code": 400}
except:
return_text = {"resilt": "", "probabilities": None, "status_code": 400}
return jsonify(return_text) # 返回结果
if __name__ == "__main__":
app.run(host="0.0.0.0", port=17000, threaded=True)