# -*- coding: utf-8 -*- """ @Time : 2023/3/9 18:36 @Author : @FileName: @Software: @Describe: """ import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" import jieba import tensorflow as tf import os from src import basemodel from src import simscemodel import numpy as np from numpy.linalg import norm import pandas as pd # from rouge import Rouge from rouge_chinese import Rouge from Rouge_w import Rouge_w,Rouge_l from tqdm import tqdm rouge = Rouge() rouge_model = Rouge_w() rouge_l_model = Rouge_l() # a = [[1, 3, 2], [2, 2, 1]] # print(cosine_similarity(a)) def cos_sim(a, b): A = np.array(a) B = np.array(b) cosine = np.dot(A, B) / (norm(A) * norm(B)) return cosine # def rouge_value(data_1, data_2): # data_1 = [' '.join(i) for i in data_1] # data_2 = [' '.join(i) for i in data_2] # scores = rouge.get_scores(hyps=data_1, refs=data_2) # # rouge_1_list = [] # rouge_2_list = [] # rouge_l_list = [] # # for i in range(len(scores)): # rouge_1 = scores[i]['rouge-1']['f'] # rouge_2 = scores[i]['rouge-2']['f'] # rouge_l = scores[i]['rouge-l']['f'] # rouge_1_list.append(rouge_1) # rouge_2_list.append(rouge_2) # rouge_l_list.append(rouge_l) # # return rouge_1_list, rouge_2_list, rouge_l_list def rouge_value_dan(data_1, data_2): hypothesis = ' '.join(jieba.cut(data_1)) reference = ' '.join(jieba.cut(data_2)) scores = rouge.get_scores(hypothesis, reference) rouge_1 = scores[0]['rouge-1']['f'] rouge_2 = scores[0]['rouge-2']['f'] rouge_l = scores[0]['rouge-l']['f'] return rouge_1, rouge_2, rouge_l def rouge_value(data_1, data_2): rouge_l_list = [] for data_1_dan, data_2_dan in zip(data_1, data_2): rouge_1, rouge_2, rouge_l = rouge_value_dan(data_1_dan, data_2_dan) # rouge_l = weight_lenw(data_1_dan,data_2_dan,rouge_l) rouge_l_list.append(rouge_l) return "", "", rouge_l_list def rouge_value_self(data_1, data_2): data_1 = [' '.join(i) for i in data_1] data_2 = [' '.join(i) for i in data_2] rouge_l_list = [] for sen_1, sen_2 in zip(data_1, data_2): sen_1 = sen_1.split(" ") sen_2 = sen_2.split(" ") rouge_l_score = rouge_l_model.score(sen_1, sen_2) rouge_l_list.append(rouge_l_score) return "", "", rouge_l_list def rouge_w_value(data_1, data_2): score = rouge_model.score(data_1, data_2) return score def weight_lenw(text_1, text_2, wight): if len(text_2) > len(text_1): x = len(text_2) / len(text_1) else: return wight k = 0.08 b = 0.92 y = k * x + b wight = wight * y return wight def rouge_pre(text, df_train_nuoche): return_list = [] index_rouge_list = [] index_rouge_w_list = [] text_list = [text] * len(df_train_nuoche) data_list = [] for data_dan in df_train_nuoche: data_list.append(data_dan[0]) rouge_1, rouge_2, rouge_l = rouge_value_self(text_list, data_list) index_rouge_list.extend(rouge_l) # index_rouge_list = [weight_lenw(text_1, text_2, w) for text_1, text_2, w in zip(text_list, data_list, index_rouge_list)] re1 = [(i[0], i[1]) for i in sorted(list(enumerate(index_rouge_list)), key=lambda x: x[1], reverse=True)] return_list.append(re1[0][1]) return_list.append(df_train_nuoche[re1[0][0]][0]) filename = df_train_nuoche[re1[0][0]][1].split("\\")[-1] return_list.append(filename) return return_list if __name__ == '__main__': load_weights_path = r"E:\pycharm_workspace\premodel\keras\simscemodel/my_model_4.weights" df_train_nuoche = pd.read_csv("data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv", encoding="utf-8").values.tolist() data_zong = [] path_csv = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照.csv" path_excel = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照_acc_13_self_rouge.xlsx" centent_list = pd.read_csv(path_csv, encoding="gbk").values.tolist() for text in tqdm(centent_list): if text[1] == "##": true_bool = 0 else: true_bool = 1 rouge_pre_list = rouge_pre(text[0], df_train_nuoche) data_zong.append([text[0], text[1], true_bool] + rouge_pre_list) pd.DataFrame(data_zong).to_excel(path_excel, index=None)