commit
70e151d687
5 changed files with 1923 additions and 0 deletions
@ -0,0 +1,192 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
|
||||
|
""" |
||||
|
@Time : 2023/3/16 11:03 |
||||
|
@Author : |
||||
|
@FileName: |
||||
|
@Software: |
||||
|
@Describe: |
||||
|
""" |
||||
|
from rouge import Rouge |
||||
|
rouge = Rouge() |
||||
|
from copy import deepcopy |
||||
|
|
||||
|
class Rouge_w: |
||||
|
def __init__(self): |
||||
|
self.k = 0.1 |
||||
|
self.ki = 1.2 |
||||
|
self.p = 1.0 |
||||
|
|
||||
|
def fi_(self,a): |
||||
|
return a * self.ki |
||||
|
|
||||
|
def f(self, a): |
||||
|
return self.k * (a ** 2) |
||||
|
|
||||
|
def WLCS(self, X, Y, f): |
||||
|
m = len(X) |
||||
|
n = len(Y) |
||||
|
c = [[0 for j in range(n+1)] for i in range(m+1)] |
||||
|
w = [[0 for j in range(n+1)] for i in range(m+1)] |
||||
|
|
||||
|
for i in range(1, m+1): |
||||
|
for j in range(1, n+1): |
||||
|
if X[i-1] == Y[j-1]: |
||||
|
k = w[i-1][j-1] |
||||
|
c[i][j] = c[i-1][j-1] + 10.0 * (f(k+1) - f(k)) |
||||
|
w[i][j] = k+1 |
||||
|
else: |
||||
|
if c[i-1][j] > c[i][j-1]: |
||||
|
c[i][j] = c[i-1][j] |
||||
|
w[i][j] = 0 |
||||
|
else: |
||||
|
c[i][j] = c[i][j-1] |
||||
|
w[i][j] = 0 |
||||
|
|
||||
|
return c[m][n] |
||||
|
|
||||
|
def f_1(self, k): |
||||
|
return k ** 0.5 |
||||
|
|
||||
|
def f_(self, k): |
||||
|
return k ** 2 |
||||
|
|
||||
|
# print(WLCS([1,2,5], [1,2,5],f)) |
||||
|
|
||||
|
def score(self, p, r): |
||||
|
m = len(p) |
||||
|
n = len(r) |
||||
|
wlcs = self.WLCS(p, r, self.f) |
||||
|
p_wlcs = self.f_1(wlcs/self.f_(m)) |
||||
|
r_wlcs = self.f_1(wlcs/self.f_(n)) |
||||
|
f_lcs = (1 + self.p **2) * ((p_wlcs * r_wlcs) / (p_wlcs + ((self.p ** 2) *r_wlcs) + 1e-8)) |
||||
|
return f_lcs |
||||
|
|
||||
|
class Rouge_l: |
||||
|
def __init__(self): |
||||
|
self.b = 3 |
||||
|
|
||||
|
def LCS(self, X, Y): |
||||
|
m = len(X) |
||||
|
n = len(Y) |
||||
|
# 创建一个二维数组来存储中间结果 |
||||
|
dp = [[0] * (n + 1) for _ in range(m + 1)] |
||||
|
|
||||
|
# 使用动态规划填充dp数组 |
||||
|
for i in range(1, m + 1): |
||||
|
for j in range(1, n + 1): |
||||
|
if X[i - 1] == Y[j - 1]: |
||||
|
dp[i][j] = dp[i - 1][j - 1] + 1 |
||||
|
else: |
||||
|
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]) |
||||
|
|
||||
|
return dp[m][n] |
||||
|
|
||||
|
# print(WLCS([1,2,5], [1,2,5],f)) |
||||
|
|
||||
|
def score(self, p, r): |
||||
|
m = len(p) |
||||
|
n = len(r) |
||||
|
lcs = self.LCS(p, r) |
||||
|
p_lcs = lcs/m |
||||
|
r_lcs = lcs/n |
||||
|
f_lcs = ((1 + self.b ** 2) * (p_lcs * r_lcs) / (p_lcs + self.b ** 2 * r_lcs + 1e-8)) |
||||
|
return f_lcs |
||||
|
|
||||
|
|
||||
|
# class Ngrams(object): |
||||
|
# """ |
||||
|
# Ngrams datastructure based on `set` or `list` |
||||
|
# depending in `exclusive` |
||||
|
# """ |
||||
|
# |
||||
|
# def __init__(self, ngrams={}, exclusive=True): |
||||
|
# if exclusive: |
||||
|
# self._ngrams = set(ngrams) |
||||
|
# else: |
||||
|
# self._ngrams = list(ngrams) |
||||
|
# self.exclusive = exclusive |
||||
|
# |
||||
|
# def add(self, o): |
||||
|
# if self.exclusive: |
||||
|
# self._ngrams.add(o) |
||||
|
# else: |
||||
|
# self._ngrams.append(o) |
||||
|
# |
||||
|
# def __len__(self): |
||||
|
# return len(self._ngrams) |
||||
|
# |
||||
|
# def intersection(self, o): |
||||
|
# if self.exclusive: |
||||
|
# inter_set = self._ngrams.intersection(o._ngrams) |
||||
|
# return Ngrams(inter_set, exclusive=True) |
||||
|
# else: |
||||
|
# other_list = deepcopy(o._ngrams) |
||||
|
# inter_list = [] |
||||
|
# |
||||
|
# for e in self._ngrams: |
||||
|
# try: |
||||
|
# i = other_list.index(e) |
||||
|
# except ValueError: |
||||
|
# continue |
||||
|
# other_list.pop(i) |
||||
|
# inter_list.append(e) |
||||
|
# return Ngrams(inter_list, exclusive=False) |
||||
|
# |
||||
|
# def union(self, *ngrams): |
||||
|
# if self.exclusive: |
||||
|
# union_set = self._ngrams |
||||
|
# for o in ngrams: |
||||
|
# union_set = union_set.union(o._ngrams) |
||||
|
# return Ngrams(union_set, exclusive=True) |
||||
|
# else: |
||||
|
# union_list = deepcopy(self._ngrams) |
||||
|
# for o in ngrams: |
||||
|
# union_list.extend(o._ngrams) |
||||
|
# return Ngrams(union_list, exclusive=False) |
||||
|
# |
||||
|
# class Rouge_l: |
||||
|
# def __init__(self): |
||||
|
# |
||||
|
# def score(self, evaluated_sentences, reference_sentences, raw_results=False, exclusive=True, **_): |
||||
|
# if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: |
||||
|
# raise ValueError("Collections must contain at least 1 sentence.") |
||||
|
# |
||||
|
# # total number of words in reference sentences |
||||
|
# m = len( |
||||
|
# Ngrams( |
||||
|
# _split_into_words(reference_sentences), |
||||
|
# exclusive=exclusive)) |
||||
|
# |
||||
|
# # total number of words in evaluated sentences |
||||
|
# n = len( |
||||
|
# Ngrams( |
||||
|
# _split_into_words(evaluated_sentences), |
||||
|
# exclusive=exclusive)) |
||||
|
# |
||||
|
# # print("m,n %d %d" % (m, n)) |
||||
|
# union_lcs_sum_across_all_references = 0 |
||||
|
# union = Ngrams(exclusive=exclusive) |
||||
|
# for ref_s in reference_sentences: |
||||
|
# lcs_count, union = _union_lcs(evaluated_sentences, |
||||
|
# ref_s, |
||||
|
# prev_union=union, |
||||
|
# exclusive=exclusive) |
||||
|
# union_lcs_sum_across_all_references += lcs_count |
||||
|
# |
||||
|
# llcs = union_lcs_sum_across_all_references |
||||
|
# r_lcs = llcs / m |
||||
|
# p_lcs = llcs / n |
||||
|
# |
||||
|
# f_lcs = 2.0 * ((p_lcs * r_lcs) / (p_lcs + r_lcs + 1e-8)) |
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
|
||||
|
rouge_model = Rouge_l() |
||||
|
X = ["A", "B", "C", "D", "u", "u", "u", "u", "u", "u"] |
||||
|
Y1 = ["A", "B", "C", "D", "H", "I", "K", "K", "K", "K", "K", "K"] |
||||
|
Y2 = ["A", "H", "B", "K", "C", "I", "K", "K", "K", "K", "K", "K"] |
||||
|
# X = "我爱你" |
||||
|
# Y = "我他爱" |
||||
|
print(rouge_model.score(X, Y1)) |
||||
|
# print(WLCS([1,2,5], [1,2,5],f)) |
||||
File diff suppressed because it is too large
@ -0,0 +1,92 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
|
||||
|
""" |
||||
|
@Time : 2023/3/2 19:31 |
||||
|
@Author : |
||||
|
@FileName: |
||||
|
@Software: |
||||
|
@Describe: |
||||
|
""" |
||||
|
# |
||||
|
# import redis |
||||
|
# |
||||
|
# redis_pool = redis.ConnectionPool(host='127.0.0.1', port=6379, password='', db=0) |
||||
|
# redis_conn = redis.Redis(connection_pool=redis_pool) |
||||
|
# |
||||
|
# |
||||
|
# name_dict = { |
||||
|
# 'name_4' : 'Zarten_4', |
||||
|
# 'name_5' : 'Zarten_5' |
||||
|
# } |
||||
|
# redis_conn.mset(name_dict) |
||||
|
|
||||
|
import flask |
||||
|
import redis |
||||
|
import uuid |
||||
|
import json |
||||
|
from threading import Thread |
||||
|
import time |
||||
|
|
||||
|
app = flask.Flask(__name__) |
||||
|
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=6, password="zhicheng123*") |
||||
|
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
||||
|
|
||||
|
db_key_query = 'queryset_check_task' |
||||
|
db_key_querying = 'querying_check_task' |
||||
|
db_key_error = 'error' |
||||
|
|
||||
|
@app.route("/search", methods=["POST"]) |
||||
|
def handle_query(): |
||||
|
id_ = flask.request.json['id'] # 获取用户query中的文本 例如"I love you" |
||||
|
result = redis_.get(id_) # 获取该query的模型结果 |
||||
|
if result is not None: |
||||
|
# redis_.delete(id_) |
||||
|
result_path = result.decode('UTF-8') |
||||
|
with open(result_path, encoding='utf8') as f1: |
||||
|
# 加载文件的对象 |
||||
|
result_dict = json.load(f1) |
||||
|
code = result_dict["status_code"] |
||||
|
resilt = result_dict["resilt"] |
||||
|
probabilities = result_dict["probabilities"] |
||||
|
if str(code) == 400: |
||||
|
redis_.rpush(db_key_error, json.dumps({"id": id_})) |
||||
|
return False |
||||
|
result_text = {'code': code, 'resilt': resilt, 'probabilities': probabilities} |
||||
|
else: |
||||
|
querying_list = list(redis_.smembers("querying")) |
||||
|
querying_set = set() |
||||
|
for i in querying_list: |
||||
|
querying_set.add(i.decode()) |
||||
|
|
||||
|
querying_bool = False |
||||
|
if id_ in querying_set: |
||||
|
querying_bool = True |
||||
|
|
||||
|
query_list_json = redis_.lrange(db_key_query, 0, -1) |
||||
|
query_set_ids = set() |
||||
|
for i in query_list_json: |
||||
|
data_dict = json.loads(i) |
||||
|
query_id = data_dict['id'] |
||||
|
query_set_ids.add(query_id) |
||||
|
|
||||
|
query_bool = False |
||||
|
if id_ in query_set_ids: |
||||
|
query_bool = True |
||||
|
|
||||
|
if querying_bool == True and query_bool == True: |
||||
|
result_text = {'code': "201", 'text': "", 'probabilities': None} |
||||
|
elif querying_bool == True and query_bool == False: |
||||
|
result_text = {'code': "202", 'text': "", 'probabilities': None} |
||||
|
else: |
||||
|
result_text = {'code': "203", 'text': "", 'probabilities': None} |
||||
|
load_request_path = './request_data_logs_203/{}.json'.format(id_) |
||||
|
with open(load_request_path, 'w', encoding='utf8') as f2: |
||||
|
# ensure_ascii=False才能输入中文,否则是Unicode字符 |
||||
|
# indent=2 JSON数据的缩进,美观 |
||||
|
json.dump(result_text, f2, ensure_ascii=False, indent=4) |
||||
|
|
||||
|
return flask.jsonify(result_text) # 返回结果 |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
app.run(debug=False, host='0.0.0.0', port=14001) |
||||
@ -0,0 +1,49 @@ |
|||||
|
from clickhouse_driver import Client |
||||
|
|
||||
|
# 连接到ClickHouse |
||||
|
client = Client( |
||||
|
host='192.168.31.74', |
||||
|
port=9000, |
||||
|
user='default', |
||||
|
password='zhicheng123*', |
||||
|
database='mini_check' |
||||
|
) |
||||
|
|
||||
|
|
||||
|
# 2. 使用新数据库 |
||||
|
client.execute('USE mini_check') |
||||
|
|
||||
|
# 3. 创建简单的表 |
||||
|
# create_table_sql = """ |
||||
|
# CREATE TABLE IF NOT EXISTS user_table ( |
||||
|
# user_uuid String, |
||||
|
# file_path String, |
||||
|
# is_delete UInt32, |
||||
|
# ) ENGINE = MergeTree() |
||||
|
# """ |
||||
|
|
||||
|
# create_table_sql = """ |
||||
|
# CREATE TABLE IF NOT EXISTS user_table ( |
||||
|
# user_uuid String, |
||||
|
# file_path String, |
||||
|
# is_delete UInt32, |
||||
|
# ) ENGINE = MergeTree() |
||||
|
# ORDER BY (user_uuid) -- 必须指定 ORDER BY |
||||
|
# SETTINGS index_granularity = 8192; |
||||
|
# """ |
||||
|
# |
||||
|
# client.execute(create_table_sql) |
||||
|
# |
||||
|
# 4. 插入数据 |
||||
|
data = [ |
||||
|
("113", '/home/zyp/mnt/8T_disk/program/docx_deal/deal_finish_txt/2023-04-08/14397246.txt', 1), |
||||
|
("113", '/home/zyp/mnt/8T_disk/program/docx_deal/deal_finish_txt/2023-04-08/14397314.txt', 1), |
||||
|
("113", '/home/zyp/mnt/8T_disk/program/docx_deal/deal_finish_txt/2023-04-08/14397321.txt', 1) |
||||
|
] |
||||
|
client.execute('INSERT INTO user_table (user_uuid, file_path, is_delete) VALUES', data) |
||||
|
# |
||||
|
# 5. 查询数据 |
||||
|
result = client.query_dataframe('SELECT * FROM user_table') |
||||
|
print(result) |
||||
|
|
||||
|
|
||||
Loading…
Reference in new issue