You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

158 lines
4.4 KiB

# -*- coding: utf-8 -*-
"""
@Time : 2023/3/9 18:36
@Author :
@FileName:
@Software:
@Describe:
"""
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import jieba
import tensorflow as tf
import os
from src import basemodel
from src import simscemodel
import numpy as np
from numpy.linalg import norm
import pandas as pd
# from rouge import Rouge
from rouge_chinese import Rouge
from Rouge_w import Rouge_w,Rouge_l
from tqdm import tqdm
rouge = Rouge()
rouge_model = Rouge_w()
rouge_l_model = Rouge_l()
# a = [[1, 3, 2], [2, 2, 1]]
# print(cosine_similarity(a))
def cos_sim(a, b):
A = np.array(a)
B = np.array(b)
cosine = np.dot(A, B) / (norm(A) * norm(B))
return cosine
# def rouge_value(data_1, data_2):
# data_1 = [' '.join(i) for i in data_1]
# data_2 = [' '.join(i) for i in data_2]
# scores = rouge.get_scores(hyps=data_1, refs=data_2)
#
# rouge_1_list = []
# rouge_2_list = []
# rouge_l_list = []
#
# for i in range(len(scores)):
# rouge_1 = scores[i]['rouge-1']['f']
# rouge_2 = scores[i]['rouge-2']['f']
# rouge_l = scores[i]['rouge-l']['f']
# rouge_1_list.append(rouge_1)
# rouge_2_list.append(rouge_2)
# rouge_l_list.append(rouge_l)
#
# return rouge_1_list, rouge_2_list, rouge_l_list
def rouge_value_dan(data_1, data_2):
hypothesis = ' '.join(jieba.cut(data_1))
reference = ' '.join(jieba.cut(data_2))
scores = rouge.get_scores(hypothesis, reference)
rouge_1 = scores[0]['rouge-1']['f']
rouge_2 = scores[0]['rouge-2']['f']
rouge_l = scores[0]['rouge-l']['f']
return rouge_1, rouge_2, rouge_l
def rouge_value(data_1, data_2):
rouge_l_list = []
for data_1_dan, data_2_dan in zip(data_1, data_2):
rouge_1, rouge_2, rouge_l = rouge_value_dan(data_1_dan, data_2_dan)
# rouge_l = weight_lenw(data_1_dan,data_2_dan,rouge_l)
rouge_l_list.append(rouge_l)
return "", "", rouge_l_list
def rouge_value_self(data_1, data_2):
data_1 = [' '.join(i) for i in data_1]
data_2 = [' '.join(i) for i in data_2]
rouge_l_list = []
for sen_1, sen_2 in zip(data_1, data_2):
sen_1 = sen_1.split(" ")
sen_2 = sen_2.split(" ")
rouge_l_score = rouge_l_model.score(sen_1, sen_2)
rouge_l_list.append(rouge_l_score)
return "", "", rouge_l_list
def rouge_w_value(data_1, data_2):
score = rouge_model.score(data_1, data_2)
return score
def weight_lenw(text_1, text_2, wight):
if len(text_2) > len(text_1):
x = len(text_2) / len(text_1)
else:
return wight
k = 0.08
b = 0.92
y = k * x + b
wight = wight * y
return wight
def rouge_pre(text, df_train_nuoche):
return_list = []
index_rouge_list = []
index_rouge_w_list = []
text_list = [text] * len(df_train_nuoche)
data_list = []
for data_dan in df_train_nuoche:
data_list.append(data_dan[0])
rouge_1, rouge_2, rouge_l = rouge_value_self(text_list, data_list)
index_rouge_list.extend(rouge_l)
# index_rouge_list = [weight_lenw(text_1, text_2, w) for text_1, text_2, w in zip(text_list, data_list, index_rouge_list)]
re1 = [(i[0], i[1]) for i in sorted(list(enumerate(index_rouge_list)), key=lambda x: x[1], reverse=True)]
return_list.append(re1[0][1])
return_list.append(df_train_nuoche[re1[0][0]][0])
filename = df_train_nuoche[re1[0][0]][1].split("\\")[-1]
return_list.append(filename)
return return_list
if __name__ == '__main__':
load_weights_path = r"E:\pycharm_workspace\premodel\keras\simscemodel/my_model_4.weights"
df_train_nuoche = pd.read_csv("data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv", encoding="utf-8").values.tolist()
data_zong = []
path_csv = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照.csv"
path_excel = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照_acc_13_self_rouge.xlsx"
centent_list = pd.read_csv(path_csv, encoding="gbk").values.tolist()
for text in tqdm(centent_list):
if text[1] == "##":
true_bool = 0
else:
true_bool = 1
rouge_pre_list = rouge_pre(text[0], df_train_nuoche)
data_zong.append([text[0], text[1], true_bool] + rouge_pre_list)
pd.DataFrame(data_zong).to_excel(path_excel, index=None)