import json import datetime import pymysql import re import requests from flask import Flask, jsonify from flask import request import uuid import time import redis from threading import Thread pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=8, password="zhicheng123*") redis_ = redis.Redis(connection_pool=pool, decode_responses=True) db_key_query = 'query_check_task' db_key_querying = 'querying_check_task' db_key_queryset = 'queryset_check_task' db_key_query_recall = 'query_recall' def run_query(conn, sql, params): with conn.cursor() as cursor: cursor.execute(sql, params) result = cursor.fetchall() return result # def processing_one_text(paper_id): # conn = pymysql.connect( # host='192.168.31.145', # port=3306, # user='root', # password='123456', # db='zhiwang_db', # charset='utf8mb4', # cursorclass=pymysql.cursors.DictCursor # ) # # sql = 'SELECT * FROM main_table_paper_detail_message WHERE doc_id=%s' # params = (paper_id,) # # result = run_query(conn, sql, params) # # conn.close() # print(result[0]['title'], result[0]['author']) # title = result[0]['title'] # author = result[0]['author'] # degree = result[0]['degree'] # year = result[0]['content'].split("/")[5] # content_path = result[0]['content'] # school = result[0]['school'] # qikan_name = result[0]['qikan_name'] # author = str(author).strip(";") # author = str(author).replace(";", ",") # # select # # school, qikan_name # # from main_table_paper_detail_message limit # # 10000 \G;; # # try: # with open(content_path, encoding="utf-8") as f: # text = f.read() # except: # with open(content_path, encoding="gbk") as f: # text = f.read() # # paper_info = { # "title": title, # "author": author, # "degree": degree, # "year": year, # "paper_len_word": len(text), # "school": school, # "qikan_name": qikan_name # } # return paper_info from clickhouse_driver import Client class PureClient: def __init__(self, database='test_db'): # 只需要写本地地址 self.client = Client(host='192.168.31.74', port=9000, user='default', password='zhicheng123*', database=database) def run(self, sql): client = self.client collection = client.query_dataframe(sql) return collection def processing_one_text(paper_id): pureclient = PureClient() print("paper_id", paper_id) sql = 'SELECT * FROM main_paper_message WHERE doc_id={}'.format(paper_id) result = pureclient.run(sql) print("result", result) title = result['title'][0] author = result['author'][0] degree = result['degree'][0] year = result['content'][0].split("/")[5] school = result['school'][0] qikan_name = result['qikan_name'][0] author = str(author).strip(";") author = str(author).replace(";", ",") # select # school, qikan_name # from main_table_paper_detail_message limit # 10000 \G;; paper_info = { "title": title, "author": author, "degree": degree, "year": year, "school": school, "qikan_name": qikan_name } print("paper_info", paper_info) return paper_info def ulit_recall_paper(recall_data_list_dict): ''' 对返回的十篇文章路径读取并解析 :param recall_data_list_path: :return data: list [[sentence, filename],[sentence, filename],[sentence, filename]] ''' # data = [] # for path in recall_data_list_path: # filename = path.split("/")[-1] # with open(path, encoding="gbk") as f: # text = f.read() # text_list = text.split("\n") # for sentence in text_list: # if sentence != "": # data.append([sentence, filename]) # return data data = [] for i in list(recall_data_list_dict.items()): data_one = processing_one_text(i[0]) print("ulit_recall_paper-1") degree = "[D]" if data_one['degree'] == "期刊": degree = "[J]" # school = result[0]['school'] # qikan_name = result[0]['qikan_name'] if data_one['school'] != " ": source = data_one['school'] else: source = data_one['qikan_name'] print("ulit_recall_paper-2") paper_name = ".".join([data_one['author'], data_one['title'] + degree, ",".join([source, data_one['year']])]) paper_name = paper_name + "." data.append(paper_name) print("ulit_recall_paper-3") data = list(set(data)) return data def classify_accurate_check(): while True: if redis_.llen(db_key_query_recall) == 0: # 若队列中没有元素就继续获取 time.sleep(1) continue print("计算结果") query_recall = redis_.lpop(db_key_query_recall).decode('UTF-8') # 获取query的text query_recall_dict = json.loads(query_recall) query_recall_uuid = query_recall_dict["uuid"] recall_data_list_dict = query_recall_dict["data"] is_success = query_recall_dict["is_success"] try: if is_success == "0": return_text = {"resilt": "宇鹏接口不成功", "probabilities": None, "status_code": 400} else: if recall_data_list_dict == "{}": return_text = {"resilt": "查询结果为空", "probabilities": None, "status_code": 400} else: recall_data_list = ulit_recall_paper(recall_data_list_dict) recall_data = "\n".join(recall_data_list) return_text = {"resilt": recall_data, "probabilities": None, "status_code": 200} except: return_text = {"resilt": "计算有问题", "probabilities": None, "status_code": 400} load_result_path = "./new_data_logs/{}.json".format(query_recall_uuid) print("queue_uuid: ", query_recall_uuid) print("load_result_path: ", load_result_path) with open(load_result_path, 'w', encoding='utf8') as f2: # ensure_ascii=False才能输入中文,否则是Unicode字符 # indent=2 JSON数据的缩进,美观 json.dump(return_text, f2, ensure_ascii=False, indent=4) redis_.set(query_recall_uuid, load_result_path, 86400) if __name__ == '__main__': t1 = Thread(target=classify_accurate_check) t1.start()