
2 changed files with 349 additions and 0 deletions
@ -0,0 +1,192 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
|
||||
|
""" |
||||
|
@Time : 2023/3/16 11:03 |
||||
|
@Author : |
||||
|
@FileName: |
||||
|
@Software: |
||||
|
@Describe: |
||||
|
""" |
||||
|
from rouge import Rouge |
||||
|
rouge = Rouge() |
||||
|
from copy import deepcopy |
||||
|
|
||||
|
class Rouge_w: |
||||
|
def __init__(self): |
||||
|
self.k = 0.1 |
||||
|
self.ki = 1.2 |
||||
|
self.p = 1.0 |
||||
|
|
||||
|
def fi_(self,a): |
||||
|
return a * self.ki |
||||
|
|
||||
|
def f(self, a): |
||||
|
return self.k * (a ** 2) |
||||
|
|
||||
|
def WLCS(self, X, Y, f): |
||||
|
m = len(X) |
||||
|
n = len(Y) |
||||
|
c = [[0 for j in range(n+1)] for i in range(m+1)] |
||||
|
w = [[0 for j in range(n+1)] for i in range(m+1)] |
||||
|
|
||||
|
for i in range(1, m+1): |
||||
|
for j in range(1, n+1): |
||||
|
if X[i-1] == Y[j-1]: |
||||
|
k = w[i-1][j-1] |
||||
|
c[i][j] = c[i-1][j-1] + 10.0 * (f(k+1) - f(k)) |
||||
|
w[i][j] = k+1 |
||||
|
else: |
||||
|
if c[i-1][j] > c[i][j-1]: |
||||
|
c[i][j] = c[i-1][j] |
||||
|
w[i][j] = 0 |
||||
|
else: |
||||
|
c[i][j] = c[i][j-1] |
||||
|
w[i][j] = 0 |
||||
|
|
||||
|
return c[m][n] |
||||
|
|
||||
|
def f_1(self, k): |
||||
|
return k ** 0.5 |
||||
|
|
||||
|
def f_(self, k): |
||||
|
return k ** 2 |
||||
|
|
||||
|
# print(WLCS([1,2,5], [1,2,5],f)) |
||||
|
|
||||
|
def score(self, p, r): |
||||
|
m = len(p) |
||||
|
n = len(r) |
||||
|
wlcs = self.WLCS(p, r, self.f) |
||||
|
p_wlcs = self.f_1(wlcs/self.f_(m)) |
||||
|
r_wlcs = self.f_1(wlcs/self.f_(n)) |
||||
|
f_lcs = (1 + self.p **2) * ((p_wlcs * r_wlcs) / (p_wlcs + ((self.p ** 2) *r_wlcs) + 1e-8)) |
||||
|
return f_lcs |
||||
|
|
||||
|
class Rouge_l: |
||||
|
def __init__(self): |
||||
|
self.b = 3 |
||||
|
|
||||
|
def LCS(self, X, Y): |
||||
|
m = len(X) |
||||
|
n = len(Y) |
||||
|
# 创建一个二维数组来存储中间结果 |
||||
|
dp = [[0] * (n + 1) for _ in range(m + 1)] |
||||
|
|
||||
|
# 使用动态规划填充dp数组 |
||||
|
for i in range(1, m + 1): |
||||
|
for j in range(1, n + 1): |
||||
|
if X[i - 1] == Y[j - 1]: |
||||
|
dp[i][j] = dp[i - 1][j - 1] + 1 |
||||
|
else: |
||||
|
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]) |
||||
|
|
||||
|
return dp[m][n] |
||||
|
|
||||
|
# print(WLCS([1,2,5], [1,2,5],f)) |
||||
|
|
||||
|
def score(self, p, r): |
||||
|
m = len(p) |
||||
|
n = len(r) |
||||
|
lcs = self.LCS(p, r) |
||||
|
p_lcs = lcs/m |
||||
|
r_lcs = lcs/n |
||||
|
f_lcs = ((1 + self.b ** 2) * (p_lcs * r_lcs) / (p_lcs + self.b ** 2 * r_lcs + 1e-8)) |
||||
|
return f_lcs |
||||
|
|
||||
|
|
||||
|
# class Ngrams(object): |
||||
|
# """ |
||||
|
# Ngrams datastructure based on `set` or `list` |
||||
|
# depending in `exclusive` |
||||
|
# """ |
||||
|
# |
||||
|
# def __init__(self, ngrams={}, exclusive=True): |
||||
|
# if exclusive: |
||||
|
# self._ngrams = set(ngrams) |
||||
|
# else: |
||||
|
# self._ngrams = list(ngrams) |
||||
|
# self.exclusive = exclusive |
||||
|
# |
||||
|
# def add(self, o): |
||||
|
# if self.exclusive: |
||||
|
# self._ngrams.add(o) |
||||
|
# else: |
||||
|
# self._ngrams.append(o) |
||||
|
# |
||||
|
# def __len__(self): |
||||
|
# return len(self._ngrams) |
||||
|
# |
||||
|
# def intersection(self, o): |
||||
|
# if self.exclusive: |
||||
|
# inter_set = self._ngrams.intersection(o._ngrams) |
||||
|
# return Ngrams(inter_set, exclusive=True) |
||||
|
# else: |
||||
|
# other_list = deepcopy(o._ngrams) |
||||
|
# inter_list = [] |
||||
|
# |
||||
|
# for e in self._ngrams: |
||||
|
# try: |
||||
|
# i = other_list.index(e) |
||||
|
# except ValueError: |
||||
|
# continue |
||||
|
# other_list.pop(i) |
||||
|
# inter_list.append(e) |
||||
|
# return Ngrams(inter_list, exclusive=False) |
||||
|
# |
||||
|
# def union(self, *ngrams): |
||||
|
# if self.exclusive: |
||||
|
# union_set = self._ngrams |
||||
|
# for o in ngrams: |
||||
|
# union_set = union_set.union(o._ngrams) |
||||
|
# return Ngrams(union_set, exclusive=True) |
||||
|
# else: |
||||
|
# union_list = deepcopy(self._ngrams) |
||||
|
# for o in ngrams: |
||||
|
# union_list.extend(o._ngrams) |
||||
|
# return Ngrams(union_list, exclusive=False) |
||||
|
# |
||||
|
# class Rouge_l: |
||||
|
# def __init__(self): |
||||
|
# |
||||
|
# def score(self, evaluated_sentences, reference_sentences, raw_results=False, exclusive=True, **_): |
||||
|
# if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: |
||||
|
# raise ValueError("Collections must contain at least 1 sentence.") |
||||
|
# |
||||
|
# # total number of words in reference sentences |
||||
|
# m = len( |
||||
|
# Ngrams( |
||||
|
# _split_into_words(reference_sentences), |
||||
|
# exclusive=exclusive)) |
||||
|
# |
||||
|
# # total number of words in evaluated sentences |
||||
|
# n = len( |
||||
|
# Ngrams( |
||||
|
# _split_into_words(evaluated_sentences), |
||||
|
# exclusive=exclusive)) |
||||
|
# |
||||
|
# # print("m,n %d %d" % (m, n)) |
||||
|
# union_lcs_sum_across_all_references = 0 |
||||
|
# union = Ngrams(exclusive=exclusive) |
||||
|
# for ref_s in reference_sentences: |
||||
|
# lcs_count, union = _union_lcs(evaluated_sentences, |
||||
|
# ref_s, |
||||
|
# prev_union=union, |
||||
|
# exclusive=exclusive) |
||||
|
# union_lcs_sum_across_all_references += lcs_count |
||||
|
# |
||||
|
# llcs = union_lcs_sum_across_all_references |
||||
|
# r_lcs = llcs / m |
||||
|
# p_lcs = llcs / n |
||||
|
# |
||||
|
# f_lcs = 2.0 * ((p_lcs * r_lcs) / (p_lcs + r_lcs + 1e-8)) |
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
|
||||
|
rouge_model = Rouge_l() |
||||
|
X = ["A", "B", "C", "D", "u", "u", "u", "u", "u", "u"] |
||||
|
Y1 = ["A", "B", "C", "D", "H", "I", "K", "K", "K", "K", "K", "K"] |
||||
|
Y2 = ["A", "H", "B", "K", "C", "I", "K", "K", "K", "K", "K", "K"] |
||||
|
# X = "我爱你" |
||||
|
# Y = "我他爱" |
||||
|
print(rouge_model.score(X, Y1)) |
||||
|
# print(WLCS([1,2,5], [1,2,5],f)) |
@ -0,0 +1,157 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
|
||||
|
""" |
||||
|
@Time : 2023/3/9 18:36 |
||||
|
@Author : |
||||
|
@FileName: |
||||
|
@Software: |
||||
|
@Describe: |
||||
|
""" |
||||
|
import os |
||||
|
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" |
||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0" |
||||
|
import jieba |
||||
|
import tensorflow as tf |
||||
|
import os |
||||
|
from src import basemodel |
||||
|
from src import simscemodel |
||||
|
import numpy as np |
||||
|
from numpy.linalg import norm |
||||
|
import pandas as pd |
||||
|
# from rouge import Rouge |
||||
|
from rouge_chinese import Rouge |
||||
|
from Rouge_w import Rouge_w,Rouge_l |
||||
|
from tqdm import tqdm |
||||
|
|
||||
|
rouge = Rouge() |
||||
|
rouge_model = Rouge_w() |
||||
|
rouge_l_model = Rouge_l() |
||||
|
|
||||
|
# a = [[1, 3, 2], [2, 2, 1]] |
||||
|
# print(cosine_similarity(a)) |
||||
|
|
||||
|
def cos_sim(a, b): |
||||
|
A = np.array(a) |
||||
|
B = np.array(b) |
||||
|
cosine = np.dot(A, B) / (norm(A) * norm(B)) |
||||
|
return cosine |
||||
|
|
||||
|
|
||||
|
|
||||
|
# def rouge_value(data_1, data_2): |
||||
|
# data_1 = [' '.join(i) for i in data_1] |
||||
|
# data_2 = [' '.join(i) for i in data_2] |
||||
|
# scores = rouge.get_scores(hyps=data_1, refs=data_2) |
||||
|
# |
||||
|
# rouge_1_list = [] |
||||
|
# rouge_2_list = [] |
||||
|
# rouge_l_list = [] |
||||
|
# |
||||
|
# for i in range(len(scores)): |
||||
|
# rouge_1 = scores[i]['rouge-1']['f'] |
||||
|
# rouge_2 = scores[i]['rouge-2']['f'] |
||||
|
# rouge_l = scores[i]['rouge-l']['f'] |
||||
|
# rouge_1_list.append(rouge_1) |
||||
|
# rouge_2_list.append(rouge_2) |
||||
|
# rouge_l_list.append(rouge_l) |
||||
|
# |
||||
|
# return rouge_1_list, rouge_2_list, rouge_l_list |
||||
|
|
||||
|
|
||||
|
def rouge_value_dan(data_1, data_2): |
||||
|
|
||||
|
hypothesis = ' '.join(jieba.cut(data_1)) |
||||
|
reference = ' '.join(jieba.cut(data_2)) |
||||
|
scores = rouge.get_scores(hypothesis, reference) |
||||
|
rouge_1 = scores[0]['rouge-1']['f'] |
||||
|
rouge_2 = scores[0]['rouge-2']['f'] |
||||
|
rouge_l = scores[0]['rouge-l']['f'] |
||||
|
|
||||
|
return rouge_1, rouge_2, rouge_l |
||||
|
|
||||
|
def rouge_value(data_1, data_2): |
||||
|
rouge_l_list = [] |
||||
|
|
||||
|
for data_1_dan, data_2_dan in zip(data_1, data_2): |
||||
|
rouge_1, rouge_2, rouge_l = rouge_value_dan(data_1_dan, data_2_dan) |
||||
|
# rouge_l = weight_lenw(data_1_dan,data_2_dan,rouge_l) |
||||
|
rouge_l_list.append(rouge_l) |
||||
|
|
||||
|
return "", "", rouge_l_list |
||||
|
|
||||
|
|
||||
|
def rouge_value_self(data_1, data_2): |
||||
|
data_1 = [' '.join(i) for i in data_1] |
||||
|
data_2 = [' '.join(i) for i in data_2] |
||||
|
|
||||
|
rouge_l_list = [] |
||||
|
|
||||
|
for sen_1, sen_2 in zip(data_1, data_2): |
||||
|
sen_1 = sen_1.split(" ") |
||||
|
sen_2 = sen_2.split(" ") |
||||
|
rouge_l_score = rouge_l_model.score(sen_1, sen_2) |
||||
|
rouge_l_list.append(rouge_l_score) |
||||
|
|
||||
|
return "", "", rouge_l_list |
||||
|
|
||||
|
|
||||
|
def rouge_w_value(data_1, data_2): |
||||
|
score = rouge_model.score(data_1, data_2) |
||||
|
return score |
||||
|
|
||||
|
def weight_lenw(text_1, text_2, wight): |
||||
|
if len(text_2) > len(text_1): |
||||
|
x = len(text_2) / len(text_1) |
||||
|
else: |
||||
|
return wight |
||||
|
|
||||
|
k = 0.08 |
||||
|
b = 0.92 |
||||
|
y = k * x + b |
||||
|
|
||||
|
wight = wight * y |
||||
|
return wight |
||||
|
|
||||
|
|
||||
|
def rouge_pre(text, df_train_nuoche): |
||||
|
return_list = [] |
||||
|
index_rouge_list = [] |
||||
|
index_rouge_w_list = [] |
||||
|
text_list = [text] * len(df_train_nuoche) |
||||
|
|
||||
|
data_list = [] |
||||
|
for data_dan in df_train_nuoche: |
||||
|
data_list.append(data_dan[0]) |
||||
|
rouge_1, rouge_2, rouge_l = rouge_value_self(text_list, data_list) |
||||
|
index_rouge_list.extend(rouge_l) |
||||
|
|
||||
|
# index_rouge_list = [weight_lenw(text_1, text_2, w) for text_1, text_2, w in zip(text_list, data_list, index_rouge_list)] |
||||
|
|
||||
|
re1 = [(i[0], i[1]) for i in sorted(list(enumerate(index_rouge_list)), key=lambda x: x[1], reverse=True)] |
||||
|
|
||||
|
|
||||
|
return_list.append(re1[0][1]) |
||||
|
return_list.append(df_train_nuoche[re1[0][0]][0]) |
||||
|
filename = df_train_nuoche[re1[0][0]][1].split("\\")[-1] |
||||
|
return_list.append(filename) |
||||
|
|
||||
|
return return_list |
||||
|
|
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
load_weights_path = r"E:\pycharm_workspace\premodel\keras\simscemodel/my_model_4.weights" |
||||
|
df_train_nuoche = pd.read_csv("data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv", encoding="utf-8").values.tolist() |
||||
|
|
||||
|
data_zong = [] |
||||
|
path_csv = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照.csv" |
||||
|
path_excel = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照_acc_13_self_rouge.xlsx" |
||||
|
|
||||
|
centent_list = pd.read_csv(path_csv, encoding="gbk").values.tolist() |
||||
|
for text in tqdm(centent_list): |
||||
|
if text[1] == "##": |
||||
|
true_bool = 0 |
||||
|
else: |
||||
|
true_bool = 1 |
||||
|
rouge_pre_list = rouge_pre(text[0], df_train_nuoche) |
||||
|
data_zong.append([text[0], text[1], true_bool] + rouge_pre_list) |
||||
|
pd.DataFrame(data_zong).to_excel(path_excel, index=None) |
Loading…
Reference in new issue