Browse Source

第一次提交,自建库查询

master
majiahui@haimaqingfan.com 2 weeks ago
commit
70e151d687
  1. 0
      README.md
  2. 192
      Rouge_w.py
  3. 1590
      flask_check_bert_test.py
  4. 92
      redis_check_uuid_mistral.py
  5. 49
      连接数据库.py

0
README.md

192
Rouge_w.py

@ -0,0 +1,192 @@
# -*- coding: utf-8 -*-
"""
@Time : 2023/3/16 11:03
@Author :
@FileName:
@Software:
@Describe:
"""
from rouge import Rouge
rouge = Rouge()
from copy import deepcopy
class Rouge_w:
def __init__(self):
self.k = 0.1
self.ki = 1.2
self.p = 1.0
def fi_(self,a):
return a * self.ki
def f(self, a):
return self.k * (a ** 2)
def WLCS(self, X, Y, f):
m = len(X)
n = len(Y)
c = [[0 for j in range(n+1)] for i in range(m+1)]
w = [[0 for j in range(n+1)] for i in range(m+1)]
for i in range(1, m+1):
for j in range(1, n+1):
if X[i-1] == Y[j-1]:
k = w[i-1][j-1]
c[i][j] = c[i-1][j-1] + 10.0 * (f(k+1) - f(k))
w[i][j] = k+1
else:
if c[i-1][j] > c[i][j-1]:
c[i][j] = c[i-1][j]
w[i][j] = 0
else:
c[i][j] = c[i][j-1]
w[i][j] = 0
return c[m][n]
def f_1(self, k):
return k ** 0.5
def f_(self, k):
return k ** 2
# print(WLCS([1,2,5], [1,2,5],f))
def score(self, p, r):
m = len(p)
n = len(r)
wlcs = self.WLCS(p, r, self.f)
p_wlcs = self.f_1(wlcs/self.f_(m))
r_wlcs = self.f_1(wlcs/self.f_(n))
f_lcs = (1 + self.p **2) * ((p_wlcs * r_wlcs) / (p_wlcs + ((self.p ** 2) *r_wlcs) + 1e-8))
return f_lcs
class Rouge_l:
def __init__(self):
self.b = 3
def LCS(self, X, Y):
m = len(X)
n = len(Y)
# 创建一个二维数组来存储中间结果
dp = [[0] * (n + 1) for _ in range(m + 1)]
# 使用动态规划填充dp数组
for i in range(1, m + 1):
for j in range(1, n + 1):
if X[i - 1] == Y[j - 1]:
dp[i][j] = dp[i - 1][j - 1] + 1
else:
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
return dp[m][n]
# print(WLCS([1,2,5], [1,2,5],f))
def score(self, p, r):
m = len(p)
n = len(r)
lcs = self.LCS(p, r)
p_lcs = lcs/m
r_lcs = lcs/n
f_lcs = ((1 + self.b ** 2) * (p_lcs * r_lcs) / (p_lcs + self.b ** 2 * r_lcs + 1e-8))
return f_lcs
# class Ngrams(object):
# """
# Ngrams datastructure based on `set` or `list`
# depending in `exclusive`
# """
#
# def __init__(self, ngrams={}, exclusive=True):
# if exclusive:
# self._ngrams = set(ngrams)
# else:
# self._ngrams = list(ngrams)
# self.exclusive = exclusive
#
# def add(self, o):
# if self.exclusive:
# self._ngrams.add(o)
# else:
# self._ngrams.append(o)
#
# def __len__(self):
# return len(self._ngrams)
#
# def intersection(self, o):
# if self.exclusive:
# inter_set = self._ngrams.intersection(o._ngrams)
# return Ngrams(inter_set, exclusive=True)
# else:
# other_list = deepcopy(o._ngrams)
# inter_list = []
#
# for e in self._ngrams:
# try:
# i = other_list.index(e)
# except ValueError:
# continue
# other_list.pop(i)
# inter_list.append(e)
# return Ngrams(inter_list, exclusive=False)
#
# def union(self, *ngrams):
# if self.exclusive:
# union_set = self._ngrams
# for o in ngrams:
# union_set = union_set.union(o._ngrams)
# return Ngrams(union_set, exclusive=True)
# else:
# union_list = deepcopy(self._ngrams)
# for o in ngrams:
# union_list.extend(o._ngrams)
# return Ngrams(union_list, exclusive=False)
#
# class Rouge_l:
# def __init__(self):
#
# def score(self, evaluated_sentences, reference_sentences, raw_results=False, exclusive=True, **_):
# if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0:
# raise ValueError("Collections must contain at least 1 sentence.")
#
# # total number of words in reference sentences
# m = len(
# Ngrams(
# _split_into_words(reference_sentences),
# exclusive=exclusive))
#
# # total number of words in evaluated sentences
# n = len(
# Ngrams(
# _split_into_words(evaluated_sentences),
# exclusive=exclusive))
#
# # print("m,n %d %d" % (m, n))
# union_lcs_sum_across_all_references = 0
# union = Ngrams(exclusive=exclusive)
# for ref_s in reference_sentences:
# lcs_count, union = _union_lcs(evaluated_sentences,
# ref_s,
# prev_union=union,
# exclusive=exclusive)
# union_lcs_sum_across_all_references += lcs_count
#
# llcs = union_lcs_sum_across_all_references
# r_lcs = llcs / m
# p_lcs = llcs / n
#
# f_lcs = 2.0 * ((p_lcs * r_lcs) / (p_lcs + r_lcs + 1e-8))
if __name__ == '__main__':
rouge_model = Rouge_l()
X = ["A", "B", "C", "D", "u", "u", "u", "u", "u", "u"]
Y1 = ["A", "B", "C", "D", "H", "I", "K", "K", "K", "K", "K", "K"]
Y2 = ["A", "H", "B", "K", "C", "I", "K", "K", "K", "K", "K", "K"]
# X = "我爱你"
# Y = "我他爱"
print(rouge_model.score(X, Y1))
# print(WLCS([1,2,5], [1,2,5],f))

1590
flask_check_bert_test.py

File diff suppressed because it is too large

92
redis_check_uuid_mistral.py

@ -0,0 +1,92 @@
# -*- coding: utf-8 -*-
"""
@Time : 2023/3/2 19:31
@Author :
@FileName:
@Software:
@Describe:
"""
#
# import redis
#
# redis_pool = redis.ConnectionPool(host='127.0.0.1', port=6379, password='', db=0)
# redis_conn = redis.Redis(connection_pool=redis_pool)
#
#
# name_dict = {
# 'name_4' : 'Zarten_4',
# 'name_5' : 'Zarten_5'
# }
# redis_conn.mset(name_dict)
import flask
import redis
import uuid
import json
from threading import Thread
import time
app = flask.Flask(__name__)
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=6, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'queryset_check_task'
db_key_querying = 'querying_check_task'
db_key_error = 'error'
@app.route("/search", methods=["POST"])
def handle_query():
id_ = flask.request.json['id'] # 获取用户query中的文本 例如"I love you"
result = redis_.get(id_) # 获取该query的模型结果
if result is not None:
# redis_.delete(id_)
result_path = result.decode('UTF-8')
with open(result_path, encoding='utf8') as f1:
# 加载文件的对象
result_dict = json.load(f1)
code = result_dict["status_code"]
resilt = result_dict["resilt"]
probabilities = result_dict["probabilities"]
if str(code) == 400:
redis_.rpush(db_key_error, json.dumps({"id": id_}))
return False
result_text = {'code': code, 'resilt': resilt, 'probabilities': probabilities}
else:
querying_list = list(redis_.smembers("querying"))
querying_set = set()
for i in querying_list:
querying_set.add(i.decode())
querying_bool = False
if id_ in querying_set:
querying_bool = True
query_list_json = redis_.lrange(db_key_query, 0, -1)
query_set_ids = set()
for i in query_list_json:
data_dict = json.loads(i)
query_id = data_dict['id']
query_set_ids.add(query_id)
query_bool = False
if id_ in query_set_ids:
query_bool = True
if querying_bool == True and query_bool == True:
result_text = {'code': "201", 'text': "", 'probabilities': None}
elif querying_bool == True and query_bool == False:
result_text = {'code': "202", 'text': "", 'probabilities': None}
else:
result_text = {'code': "203", 'text': "", 'probabilities': None}
load_request_path = './request_data_logs_203/{}.json'.format(id_)
with open(load_request_path, 'w', encoding='utf8') as f2:
# ensure_ascii=False才能输入中文,否则是Unicode字符
# indent=2 JSON数据的缩进,美观
json.dump(result_text, f2, ensure_ascii=False, indent=4)
return flask.jsonify(result_text) # 返回结果
if __name__ == "__main__":
app.run(debug=False, host='0.0.0.0', port=14001)

49
连接数据库.py

@ -0,0 +1,49 @@
from clickhouse_driver import Client
# 连接到ClickHouse
client = Client(
host='192.168.31.74',
port=9000,
user='default',
password='zhicheng123*',
database='mini_check'
)
# 2. 使用新数据库
client.execute('USE mini_check')
# 3. 创建简单的表
# create_table_sql = """
# CREATE TABLE IF NOT EXISTS user_table (
# user_uuid String,
# file_path String,
# is_delete UInt32,
# ) ENGINE = MergeTree()
# """
# create_table_sql = """
# CREATE TABLE IF NOT EXISTS user_table (
# user_uuid String,
# file_path String,
# is_delete UInt32,
# ) ENGINE = MergeTree()
# ORDER BY (user_uuid) -- 必须指定 ORDER BY
# SETTINGS index_granularity = 8192;
# """
#
# client.execute(create_table_sql)
#
# 4. 插入数据
data = [
("113", '/home/zyp/mnt/8T_disk/program/docx_deal/deal_finish_txt/2023-04-08/14397246.txt', 1),
("113", '/home/zyp/mnt/8T_disk/program/docx_deal/deal_finish_txt/2023-04-08/14397314.txt', 1),
("113", '/home/zyp/mnt/8T_disk/program/docx_deal/deal_finish_txt/2023-04-08/14397321.txt', 1)
]
client.execute('INSERT INTO user_table (user_uuid, file_path, is_delete) VALUES', data)
#
# 5. 查询数据
result = client.query_dataframe('SELECT * FROM user_table')
print(result)
Loading…
Cancel
Save