# -*- coding: utf-8 -*- """ @Time : 2023/3/14 17:52 @Author : @FileName: @Software: @Describe: """ from rouge import Rouge import pandas as pd from tqdm import tqdm rouge = Rouge() def rouge_value(data_1, data_2): data_1 = ' '.join(data_1) data_2 = ' '.join(data_2) scores = rouge.get_scores(hyps=[data_1], refs=[data_2]) rouge_1 = scores[0]['rouge-1']['f'] rouge_2 = scores[0]['rouge-2']['f'] rouge_l = scores[0]['rouge-l']['f'] # rouge_w = scores[0]['rouge-w']['f'] # rouge_s = scores[0]['rouge-s']['f'] return rouge_1, rouge_2, rouge_l if __name__ == '__main__': df_train_nuoche = pd.read_csv("data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv", encoding="utf-8").values.tolist() path_txt = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究.txt" path_excel = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_2_rouge.xlsx" f = open(path_txt, encoding="utf-8") centent = f.read() f.close() data_zong = [] centent_list = centent.split("\n") for text in tqdm(centent_list): dan_data = [text] index_list = [] if text[:5] == "*****": continue for data_dan in df_train_nuoche: rouge_1, rouge_2, rouge_l = rouge_value(text, data_dan[0]) index_list.append(rouge_l) re1 = [(i[0],i[1]) for i in sorted(list(enumerate(index_list)), key=lambda x: x[1], reverse=True)] for i in range(0, 10): dan_data.append(re1[i][1]) dan_data.append(df_train_nuoche[re1[i][0]][0]) filename = df_train_nuoche[re1[i][0]][1].split("\\")[-1] dan_data.append(filename) data_zong.append(dan_data) pd.DataFrame(data_zong).to_excel(path_excel, index=None)