commit
70e151d687
5 changed files with 1923 additions and 0 deletions
@ -0,0 +1,192 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/16 11:03 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
from rouge import Rouge |
|||
rouge = Rouge() |
|||
from copy import deepcopy |
|||
|
|||
class Rouge_w: |
|||
def __init__(self): |
|||
self.k = 0.1 |
|||
self.ki = 1.2 |
|||
self.p = 1.0 |
|||
|
|||
def fi_(self,a): |
|||
return a * self.ki |
|||
|
|||
def f(self, a): |
|||
return self.k * (a ** 2) |
|||
|
|||
def WLCS(self, X, Y, f): |
|||
m = len(X) |
|||
n = len(Y) |
|||
c = [[0 for j in range(n+1)] for i in range(m+1)] |
|||
w = [[0 for j in range(n+1)] for i in range(m+1)] |
|||
|
|||
for i in range(1, m+1): |
|||
for j in range(1, n+1): |
|||
if X[i-1] == Y[j-1]: |
|||
k = w[i-1][j-1] |
|||
c[i][j] = c[i-1][j-1] + 10.0 * (f(k+1) - f(k)) |
|||
w[i][j] = k+1 |
|||
else: |
|||
if c[i-1][j] > c[i][j-1]: |
|||
c[i][j] = c[i-1][j] |
|||
w[i][j] = 0 |
|||
else: |
|||
c[i][j] = c[i][j-1] |
|||
w[i][j] = 0 |
|||
|
|||
return c[m][n] |
|||
|
|||
def f_1(self, k): |
|||
return k ** 0.5 |
|||
|
|||
def f_(self, k): |
|||
return k ** 2 |
|||
|
|||
# print(WLCS([1,2,5], [1,2,5],f)) |
|||
|
|||
def score(self, p, r): |
|||
m = len(p) |
|||
n = len(r) |
|||
wlcs = self.WLCS(p, r, self.f) |
|||
p_wlcs = self.f_1(wlcs/self.f_(m)) |
|||
r_wlcs = self.f_1(wlcs/self.f_(n)) |
|||
f_lcs = (1 + self.p **2) * ((p_wlcs * r_wlcs) / (p_wlcs + ((self.p ** 2) *r_wlcs) + 1e-8)) |
|||
return f_lcs |
|||
|
|||
class Rouge_l: |
|||
def __init__(self): |
|||
self.b = 3 |
|||
|
|||
def LCS(self, X, Y): |
|||
m = len(X) |
|||
n = len(Y) |
|||
# 创建一个二维数组来存储中间结果 |
|||
dp = [[0] * (n + 1) for _ in range(m + 1)] |
|||
|
|||
# 使用动态规划填充dp数组 |
|||
for i in range(1, m + 1): |
|||
for j in range(1, n + 1): |
|||
if X[i - 1] == Y[j - 1]: |
|||
dp[i][j] = dp[i - 1][j - 1] + 1 |
|||
else: |
|||
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]) |
|||
|
|||
return dp[m][n] |
|||
|
|||
# print(WLCS([1,2,5], [1,2,5],f)) |
|||
|
|||
def score(self, p, r): |
|||
m = len(p) |
|||
n = len(r) |
|||
lcs = self.LCS(p, r) |
|||
p_lcs = lcs/m |
|||
r_lcs = lcs/n |
|||
f_lcs = ((1 + self.b ** 2) * (p_lcs * r_lcs) / (p_lcs + self.b ** 2 * r_lcs + 1e-8)) |
|||
return f_lcs |
|||
|
|||
|
|||
# class Ngrams(object): |
|||
# """ |
|||
# Ngrams datastructure based on `set` or `list` |
|||
# depending in `exclusive` |
|||
# """ |
|||
# |
|||
# def __init__(self, ngrams={}, exclusive=True): |
|||
# if exclusive: |
|||
# self._ngrams = set(ngrams) |
|||
# else: |
|||
# self._ngrams = list(ngrams) |
|||
# self.exclusive = exclusive |
|||
# |
|||
# def add(self, o): |
|||
# if self.exclusive: |
|||
# self._ngrams.add(o) |
|||
# else: |
|||
# self._ngrams.append(o) |
|||
# |
|||
# def __len__(self): |
|||
# return len(self._ngrams) |
|||
# |
|||
# def intersection(self, o): |
|||
# if self.exclusive: |
|||
# inter_set = self._ngrams.intersection(o._ngrams) |
|||
# return Ngrams(inter_set, exclusive=True) |
|||
# else: |
|||
# other_list = deepcopy(o._ngrams) |
|||
# inter_list = [] |
|||
# |
|||
# for e in self._ngrams: |
|||
# try: |
|||
# i = other_list.index(e) |
|||
# except ValueError: |
|||
# continue |
|||
# other_list.pop(i) |
|||
# inter_list.append(e) |
|||
# return Ngrams(inter_list, exclusive=False) |
|||
# |
|||
# def union(self, *ngrams): |
|||
# if self.exclusive: |
|||
# union_set = self._ngrams |
|||
# for o in ngrams: |
|||
# union_set = union_set.union(o._ngrams) |
|||
# return Ngrams(union_set, exclusive=True) |
|||
# else: |
|||
# union_list = deepcopy(self._ngrams) |
|||
# for o in ngrams: |
|||
# union_list.extend(o._ngrams) |
|||
# return Ngrams(union_list, exclusive=False) |
|||
# |
|||
# class Rouge_l: |
|||
# def __init__(self): |
|||
# |
|||
# def score(self, evaluated_sentences, reference_sentences, raw_results=False, exclusive=True, **_): |
|||
# if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: |
|||
# raise ValueError("Collections must contain at least 1 sentence.") |
|||
# |
|||
# # total number of words in reference sentences |
|||
# m = len( |
|||
# Ngrams( |
|||
# _split_into_words(reference_sentences), |
|||
# exclusive=exclusive)) |
|||
# |
|||
# # total number of words in evaluated sentences |
|||
# n = len( |
|||
# Ngrams( |
|||
# _split_into_words(evaluated_sentences), |
|||
# exclusive=exclusive)) |
|||
# |
|||
# # print("m,n %d %d" % (m, n)) |
|||
# union_lcs_sum_across_all_references = 0 |
|||
# union = Ngrams(exclusive=exclusive) |
|||
# for ref_s in reference_sentences: |
|||
# lcs_count, union = _union_lcs(evaluated_sentences, |
|||
# ref_s, |
|||
# prev_union=union, |
|||
# exclusive=exclusive) |
|||
# union_lcs_sum_across_all_references += lcs_count |
|||
# |
|||
# llcs = union_lcs_sum_across_all_references |
|||
# r_lcs = llcs / m |
|||
# p_lcs = llcs / n |
|||
# |
|||
# f_lcs = 2.0 * ((p_lcs * r_lcs) / (p_lcs + r_lcs + 1e-8)) |
|||
|
|||
if __name__ == '__main__': |
|||
|
|||
rouge_model = Rouge_l() |
|||
X = ["A", "B", "C", "D", "u", "u", "u", "u", "u", "u"] |
|||
Y1 = ["A", "B", "C", "D", "H", "I", "K", "K", "K", "K", "K", "K"] |
|||
Y2 = ["A", "H", "B", "K", "C", "I", "K", "K", "K", "K", "K", "K"] |
|||
# X = "我爱你" |
|||
# Y = "我他爱" |
|||
print(rouge_model.score(X, Y1)) |
|||
# print(WLCS([1,2,5], [1,2,5],f)) |
|||
File diff suppressed because it is too large
@ -0,0 +1,92 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/2 19:31 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
# |
|||
# import redis |
|||
# |
|||
# redis_pool = redis.ConnectionPool(host='127.0.0.1', port=6379, password='', db=0) |
|||
# redis_conn = redis.Redis(connection_pool=redis_pool) |
|||
# |
|||
# |
|||
# name_dict = { |
|||
# 'name_4' : 'Zarten_4', |
|||
# 'name_5' : 'Zarten_5' |
|||
# } |
|||
# redis_conn.mset(name_dict) |
|||
|
|||
import flask |
|||
import redis |
|||
import uuid |
|||
import json |
|||
from threading import Thread |
|||
import time |
|||
|
|||
app = flask.Flask(__name__) |
|||
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=6, password="zhicheng123*") |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
db_key_query = 'queryset_check_task' |
|||
db_key_querying = 'querying_check_task' |
|||
db_key_error = 'error' |
|||
|
|||
@app.route("/search", methods=["POST"]) |
|||
def handle_query(): |
|||
id_ = flask.request.json['id'] # 获取用户query中的文本 例如"I love you" |
|||
result = redis_.get(id_) # 获取该query的模型结果 |
|||
if result is not None: |
|||
# redis_.delete(id_) |
|||
result_path = result.decode('UTF-8') |
|||
with open(result_path, encoding='utf8') as f1: |
|||
# 加载文件的对象 |
|||
result_dict = json.load(f1) |
|||
code = result_dict["status_code"] |
|||
resilt = result_dict["resilt"] |
|||
probabilities = result_dict["probabilities"] |
|||
if str(code) == 400: |
|||
redis_.rpush(db_key_error, json.dumps({"id": id_})) |
|||
return False |
|||
result_text = {'code': code, 'resilt': resilt, 'probabilities': probabilities} |
|||
else: |
|||
querying_list = list(redis_.smembers("querying")) |
|||
querying_set = set() |
|||
for i in querying_list: |
|||
querying_set.add(i.decode()) |
|||
|
|||
querying_bool = False |
|||
if id_ in querying_set: |
|||
querying_bool = True |
|||
|
|||
query_list_json = redis_.lrange(db_key_query, 0, -1) |
|||
query_set_ids = set() |
|||
for i in query_list_json: |
|||
data_dict = json.loads(i) |
|||
query_id = data_dict['id'] |
|||
query_set_ids.add(query_id) |
|||
|
|||
query_bool = False |
|||
if id_ in query_set_ids: |
|||
query_bool = True |
|||
|
|||
if querying_bool == True and query_bool == True: |
|||
result_text = {'code': "201", 'text': "", 'probabilities': None} |
|||
elif querying_bool == True and query_bool == False: |
|||
result_text = {'code': "202", 'text': "", 'probabilities': None} |
|||
else: |
|||
result_text = {'code': "203", 'text': "", 'probabilities': None} |
|||
load_request_path = './request_data_logs_203/{}.json'.format(id_) |
|||
with open(load_request_path, 'w', encoding='utf8') as f2: |
|||
# ensure_ascii=False才能输入中文,否则是Unicode字符 |
|||
# indent=2 JSON数据的缩进,美观 |
|||
json.dump(result_text, f2, ensure_ascii=False, indent=4) |
|||
|
|||
return flask.jsonify(result_text) # 返回结果 |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
app.run(debug=False, host='0.0.0.0', port=14001) |
|||
@ -0,0 +1,49 @@ |
|||
from clickhouse_driver import Client |
|||
|
|||
# 连接到ClickHouse |
|||
client = Client( |
|||
host='192.168.31.74', |
|||
port=9000, |
|||
user='default', |
|||
password='zhicheng123*', |
|||
database='mini_check' |
|||
) |
|||
|
|||
|
|||
# 2. 使用新数据库 |
|||
client.execute('USE mini_check') |
|||
|
|||
# 3. 创建简单的表 |
|||
# create_table_sql = """ |
|||
# CREATE TABLE IF NOT EXISTS user_table ( |
|||
# user_uuid String, |
|||
# file_path String, |
|||
# is_delete UInt32, |
|||
# ) ENGINE = MergeTree() |
|||
# """ |
|||
|
|||
# create_table_sql = """ |
|||
# CREATE TABLE IF NOT EXISTS user_table ( |
|||
# user_uuid String, |
|||
# file_path String, |
|||
# is_delete UInt32, |
|||
# ) ENGINE = MergeTree() |
|||
# ORDER BY (user_uuid) -- 必须指定 ORDER BY |
|||
# SETTINGS index_granularity = 8192; |
|||
# """ |
|||
# |
|||
# client.execute(create_table_sql) |
|||
# |
|||
# 4. 插入数据 |
|||
data = [ |
|||
("113", '/home/zyp/mnt/8T_disk/program/docx_deal/deal_finish_txt/2023-04-08/14397246.txt', 1), |
|||
("113", '/home/zyp/mnt/8T_disk/program/docx_deal/deal_finish_txt/2023-04-08/14397314.txt', 1), |
|||
("113", '/home/zyp/mnt/8T_disk/program/docx_deal/deal_finish_txt/2023-04-08/14397321.txt', 1) |
|||
] |
|||
client.execute('INSERT INTO user_table (user_uuid, file_path, is_delete) VALUES', data) |
|||
# |
|||
# 5. 查询数据 |
|||
result = client.query_dataframe('SELECT * FROM user_table') |
|||
print(result) |
|||
|
|||
|
|||
Loading…
Reference in new issue