You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
158 lines
4.4 KiB
158 lines
4.4 KiB
![]()
2 years ago
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
"""
|
||
|
@Time : 2023/3/9 18:36
|
||
|
@Author :
|
||
|
@FileName:
|
||
|
@Software:
|
||
|
@Describe:
|
||
|
"""
|
||
|
import os
|
||
|
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
||
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||
|
import jieba
|
||
|
import tensorflow as tf
|
||
|
import os
|
||
|
from src import basemodel
|
||
|
from src import simscemodel
|
||
|
import numpy as np
|
||
|
from numpy.linalg import norm
|
||
|
import pandas as pd
|
||
|
# from rouge import Rouge
|
||
|
from rouge_chinese import Rouge
|
||
|
from Rouge_w import Rouge_w,Rouge_l
|
||
|
from tqdm import tqdm
|
||
|
|
||
|
rouge = Rouge()
|
||
|
rouge_model = Rouge_w()
|
||
|
rouge_l_model = Rouge_l()
|
||
|
|
||
|
# a = [[1, 3, 2], [2, 2, 1]]
|
||
|
# print(cosine_similarity(a))
|
||
|
|
||
|
def cos_sim(a, b):
|
||
|
A = np.array(a)
|
||
|
B = np.array(b)
|
||
|
cosine = np.dot(A, B) / (norm(A) * norm(B))
|
||
|
return cosine
|
||
|
|
||
|
|
||
|
|
||
|
# def rouge_value(data_1, data_2):
|
||
|
# data_1 = [' '.join(i) for i in data_1]
|
||
|
# data_2 = [' '.join(i) for i in data_2]
|
||
|
# scores = rouge.get_scores(hyps=data_1, refs=data_2)
|
||
|
#
|
||
|
# rouge_1_list = []
|
||
|
# rouge_2_list = []
|
||
|
# rouge_l_list = []
|
||
|
#
|
||
|
# for i in range(len(scores)):
|
||
|
# rouge_1 = scores[i]['rouge-1']['f']
|
||
|
# rouge_2 = scores[i]['rouge-2']['f']
|
||
|
# rouge_l = scores[i]['rouge-l']['f']
|
||
|
# rouge_1_list.append(rouge_1)
|
||
|
# rouge_2_list.append(rouge_2)
|
||
|
# rouge_l_list.append(rouge_l)
|
||
|
#
|
||
|
# return rouge_1_list, rouge_2_list, rouge_l_list
|
||
|
|
||
|
|
||
|
def rouge_value_dan(data_1, data_2):
|
||
|
|
||
|
hypothesis = ' '.join(jieba.cut(data_1))
|
||
|
reference = ' '.join(jieba.cut(data_2))
|
||
|
scores = rouge.get_scores(hypothesis, reference)
|
||
|
rouge_1 = scores[0]['rouge-1']['f']
|
||
|
rouge_2 = scores[0]['rouge-2']['f']
|
||
|
rouge_l = scores[0]['rouge-l']['f']
|
||
|
|
||
|
return rouge_1, rouge_2, rouge_l
|
||
|
|
||
|
def rouge_value(data_1, data_2):
|
||
|
rouge_l_list = []
|
||
|
|
||
|
for data_1_dan, data_2_dan in zip(data_1, data_2):
|
||
|
rouge_1, rouge_2, rouge_l = rouge_value_dan(data_1_dan, data_2_dan)
|
||
|
# rouge_l = weight_lenw(data_1_dan,data_2_dan,rouge_l)
|
||
|
rouge_l_list.append(rouge_l)
|
||
|
|
||
|
return "", "", rouge_l_list
|
||
|
|
||
|
|
||
|
def rouge_value_self(data_1, data_2):
|
||
|
data_1 = [' '.join(i) for i in data_1]
|
||
|
data_2 = [' '.join(i) for i in data_2]
|
||
|
|
||
|
rouge_l_list = []
|
||
|
|
||
|
for sen_1, sen_2 in zip(data_1, data_2):
|
||
|
sen_1 = sen_1.split(" ")
|
||
|
sen_2 = sen_2.split(" ")
|
||
|
rouge_l_score = rouge_l_model.score(sen_1, sen_2)
|
||
|
rouge_l_list.append(rouge_l_score)
|
||
|
|
||
|
return "", "", rouge_l_list
|
||
|
|
||
|
|
||
|
def rouge_w_value(data_1, data_2):
|
||
|
score = rouge_model.score(data_1, data_2)
|
||
|
return score
|
||
|
|
||
|
def weight_lenw(text_1, text_2, wight):
|
||
|
if len(text_2) > len(text_1):
|
||
|
x = len(text_2) / len(text_1)
|
||
|
else:
|
||
|
return wight
|
||
|
|
||
|
k = 0.08
|
||
|
b = 0.92
|
||
|
y = k * x + b
|
||
|
|
||
|
wight = wight * y
|
||
|
return wight
|
||
|
|
||
|
|
||
|
def rouge_pre(text, df_train_nuoche):
|
||
|
return_list = []
|
||
|
index_rouge_list = []
|
||
|
index_rouge_w_list = []
|
||
|
text_list = [text] * len(df_train_nuoche)
|
||
|
|
||
|
data_list = []
|
||
|
for data_dan in df_train_nuoche:
|
||
|
data_list.append(data_dan[0])
|
||
|
rouge_1, rouge_2, rouge_l = rouge_value_self(text_list, data_list)
|
||
|
index_rouge_list.extend(rouge_l)
|
||
|
|
||
|
# index_rouge_list = [weight_lenw(text_1, text_2, w) for text_1, text_2, w in zip(text_list, data_list, index_rouge_list)]
|
||
|
|
||
|
re1 = [(i[0], i[1]) for i in sorted(list(enumerate(index_rouge_list)), key=lambda x: x[1], reverse=True)]
|
||
|
|
||
|
|
||
|
return_list.append(re1[0][1])
|
||
|
return_list.append(df_train_nuoche[re1[0][0]][0])
|
||
|
filename = df_train_nuoche[re1[0][0]][1].split("\\")[-1]
|
||
|
return_list.append(filename)
|
||
|
|
||
|
return return_list
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
load_weights_path = r"E:\pycharm_workspace\premodel\keras\simscemodel/my_model_4.weights"
|
||
|
df_train_nuoche = pd.read_csv("data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv", encoding="utf-8").values.tolist()
|
||
|
|
||
|
data_zong = []
|
||
|
path_csv = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照.csv"
|
||
|
path_excel = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照_acc_13_self_rouge.xlsx"
|
||
|
|
||
|
centent_list = pd.read_csv(path_csv, encoding="gbk").values.tolist()
|
||
|
for text in tqdm(centent_list):
|
||
|
if text[1] == "##":
|
||
|
true_bool = 0
|
||
|
else:
|
||
|
true_bool = 1
|
||
|
rouge_pre_list = rouge_pre(text[0], df_train_nuoche)
|
||
|
data_zong.append([text[0], text[1], true_bool] + rouge_pre_list)
|
||
|
pd.DataFrame(data_zong).to_excel(path_excel, index=None)
|