You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
207 lines
6.5 KiB
207 lines
6.5 KiB
![]()
1 year ago
|
import json
|
||
|
import datetime
|
||
|
import pymysql
|
||
|
import re
|
||
|
import requests
|
||
|
from flask import Flask, jsonify
|
||
|
from flask import request
|
||
|
import uuid
|
||
|
import time
|
||
|
import redis
|
||
|
from threading import Thread
|
||
|
|
||
|
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=8, password="zhicheng123*")
|
||
|
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
|
||
|
|
||
|
db_key_query = 'query_check_task'
|
||
|
db_key_querying = 'querying_check_task'
|
||
|
db_key_queryset = 'queryset_check_task'
|
||
|
db_key_query_recall = 'query_recall'
|
||
|
|
||
|
|
||
|
def run_query(conn, sql, params):
|
||
|
with conn.cursor() as cursor:
|
||
|
cursor.execute(sql, params)
|
||
|
result = cursor.fetchall()
|
||
|
return result
|
||
|
|
||
|
|
||
|
# def processing_one_text(paper_id):
|
||
|
# conn = pymysql.connect(
|
||
|
# host='192.168.31.145',
|
||
|
# port=3306,
|
||
|
# user='root',
|
||
|
# password='123456',
|
||
|
# db='zhiwang_db',
|
||
|
# charset='utf8mb4',
|
||
|
# cursorclass=pymysql.cursors.DictCursor
|
||
|
# )
|
||
|
#
|
||
|
# sql = 'SELECT * FROM main_table_paper_detail_message WHERE doc_id=%s'
|
||
|
# params = (paper_id,)
|
||
|
#
|
||
|
# result = run_query(conn, sql, params)
|
||
|
#
|
||
|
# conn.close()
|
||
|
# print(result[0]['title'], result[0]['author'])
|
||
|
# title = result[0]['title']
|
||
|
# author = result[0]['author']
|
||
|
# degree = result[0]['degree']
|
||
|
# year = result[0]['content'].split("/")[5]
|
||
|
# content_path = result[0]['content']
|
||
|
# school = result[0]['school']
|
||
|
# qikan_name = result[0]['qikan_name']
|
||
|
# author = str(author).strip(";")
|
||
|
# author = str(author).replace(";", ",")
|
||
|
# # select
|
||
|
# # school, qikan_name
|
||
|
# # from main_table_paper_detail_message limit
|
||
|
# # 10000 \G;;
|
||
|
#
|
||
|
# try:
|
||
|
# with open(content_path, encoding="utf-8") as f:
|
||
|
# text = f.read()
|
||
|
# except:
|
||
|
# with open(content_path, encoding="gbk") as f:
|
||
|
# text = f.read()
|
||
|
#
|
||
|
# paper_info = {
|
||
|
# "title": title,
|
||
|
# "author": author,
|
||
|
# "degree": degree,
|
||
|
# "year": year,
|
||
|
# "paper_len_word": len(text),
|
||
|
# "school": school,
|
||
|
# "qikan_name": qikan_name
|
||
|
# }
|
||
|
# return paper_info
|
||
|
|
||
|
from clickhouse_driver import Client
|
||
|
|
||
|
class PureClient:
|
||
|
def __init__(self, database='test_db'):
|
||
|
# 只需要写本地地址
|
||
|
self.client = Client(host='192.168.31.74', port=9000, user='default',
|
||
|
password='zhicheng123*', database=database)
|
||
|
|
||
|
def run(self, sql):
|
||
|
client = self.client
|
||
|
collection = client.query_dataframe(sql)
|
||
|
return collection
|
||
|
|
||
|
def processing_one_text(paper_id):
|
||
|
|
||
|
pureclient = PureClient()
|
||
|
print("paper_id", paper_id)
|
||
|
sql = 'SELECT * FROM main_paper_message WHERE doc_id={}'.format(paper_id)
|
||
|
result = pureclient.run(sql)
|
||
|
print("result", result)
|
||
|
title = result['title'][0]
|
||
|
author = result['author'][0]
|
||
|
degree = result['degree'][0]
|
||
|
year = result['content'][0].split("/")[5]
|
||
|
school = result['school'][0]
|
||
|
qikan_name = result['qikan_name'][0]
|
||
|
author = str(author).strip(";")
|
||
|
author = str(author).replace(";", ",")
|
||
|
# select
|
||
|
# school, qikan_name
|
||
|
# from main_table_paper_detail_message limit
|
||
|
# 10000 \G;;
|
||
|
|
||
|
paper_info = {
|
||
|
"title": title,
|
||
|
"author": author,
|
||
|
"degree": degree,
|
||
|
"year": year,
|
||
|
"school": school,
|
||
|
"qikan_name": qikan_name
|
||
|
}
|
||
|
print("paper_info", paper_info)
|
||
|
return paper_info
|
||
|
|
||
|
|
||
|
def ulit_recall_paper(recall_data_list_dict):
|
||
|
'''
|
||
|
对返回的十篇文章路径读取并解析
|
||
|
:param recall_data_list_path:
|
||
|
:return data: list [[sentence, filename],[sentence, filename],[sentence, filename]]
|
||
|
'''
|
||
|
|
||
|
# data = []
|
||
|
# for path in recall_data_list_path:
|
||
|
# filename = path.split("/")[-1]
|
||
|
# with open(path, encoding="gbk") as f:
|
||
|
# text = f.read()
|
||
|
# text_list = text.split("\n")
|
||
|
# for sentence in text_list:
|
||
|
# if sentence != "":
|
||
|
# data.append([sentence, filename])
|
||
|
# return data
|
||
|
|
||
|
data = []
|
||
|
for i in list(recall_data_list_dict.items()):
|
||
|
data_one = processing_one_text(i[0])
|
||
|
|
||
|
print("ulit_recall_paper-1")
|
||
|
degree = "[D]"
|
||
|
if data_one['degree'] == "期刊":
|
||
|
degree = "[J]"
|
||
|
|
||
|
# school = result[0]['school']
|
||
|
# qikan_name = result[0]['qikan_name']
|
||
|
if data_one['school'] != " ":
|
||
|
source = data_one['school']
|
||
|
else:
|
||
|
source = data_one['qikan_name']
|
||
|
print("ulit_recall_paper-2")
|
||
|
paper_name = ".".join([data_one['author'], data_one['title'] + degree, ",".join([source, data_one['year']])])
|
||
|
paper_name = paper_name + "."
|
||
|
data.append(paper_name)
|
||
|
print("ulit_recall_paper-3")
|
||
|
data = list(set(data))
|
||
|
return data
|
||
|
|
||
|
|
||
|
def classify_accurate_check():
|
||
|
while True:
|
||
|
if redis_.llen(db_key_query_recall) == 0: # 若队列中没有元素就继续获取
|
||
|
time.sleep(1)
|
||
|
continue
|
||
|
|
||
|
print("计算结果")
|
||
|
query_recall = redis_.lpop(db_key_query_recall).decode('UTF-8') # 获取query的text
|
||
|
query_recall_dict = json.loads(query_recall)
|
||
|
|
||
|
query_recall_uuid = query_recall_dict["uuid"]
|
||
|
recall_data_list_dict = query_recall_dict["data"]
|
||
|
is_success = query_recall_dict["is_success"]
|
||
|
|
||
|
try:
|
||
|
if is_success == "0":
|
||
|
return_text = {"resilt": "宇鹏接口不成功", "probabilities": None, "status_code": 400}
|
||
|
else:
|
||
|
if recall_data_list_dict == "{}":
|
||
|
return_text = {"resilt": "查询结果为空", "probabilities": None, "status_code": 400}
|
||
|
else:
|
||
|
recall_data_list = ulit_recall_paper(recall_data_list_dict)
|
||
|
recall_data = "\n".join(recall_data_list)
|
||
|
return_text = {"resilt": recall_data, "probabilities": None, "status_code": 200}
|
||
|
except:
|
||
|
return_text = {"resilt": "计算有问题", "probabilities": None, "status_code": 400}
|
||
|
|
||
|
load_result_path = "./new_data_logs/{}.json".format(query_recall_uuid)
|
||
|
print("queue_uuid: ", query_recall_uuid)
|
||
|
print("load_result_path: ", load_result_path)
|
||
|
|
||
|
with open(load_result_path, 'w', encoding='utf8') as f2:
|
||
|
# ensure_ascii=False才能输入中文,否则是Unicode字符
|
||
|
# indent=2 JSON数据的缩进,美观
|
||
|
json.dump(return_text, f2, ensure_ascii=False, indent=4)
|
||
|
|
||
|
redis_.set(query_recall_uuid, load_result_path, 86400)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
t1 = Thread(target=classify_accurate_check)
|
||
|
t1.start()
|