You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

58 lines
1.9 KiB

# -*- coding: utf-8 -*-
"""
@Time : 2023/3/14 17:52
@Author :
@FileName:
@Software:
@Describe:
"""
from rouge import Rouge
import pandas as pd
from tqdm import tqdm
rouge = Rouge()
def rouge_value(data_1, data_2):
data_1 = ' '.join(data_1)
data_2 = ' '.join(data_2)
scores = rouge.get_scores(hyps=[data_1], refs=[data_2])
rouge_1 = scores[0]['rouge-1']['f']
rouge_2 = scores[0]['rouge-2']['f']
rouge_l = scores[0]['rouge-l']['f']
# rouge_w = scores[0]['rouge-w']['f']
# rouge_s = scores[0]['rouge-s']['f']
return rouge_1, rouge_2, rouge_l
if __name__ == '__main__':
df_train_nuoche = pd.read_csv("data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv", encoding="utf-8").values.tolist()
path_txt = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究.txt"
path_excel = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_2_rouge.xlsx"
f = open(path_txt, encoding="utf-8")
centent = f.read()
f.close()
data_zong = []
centent_list = centent.split("\n")
for text in tqdm(centent_list):
dan_data = [text]
index_list = []
if text[:5] == "*****":
continue
for data_dan in df_train_nuoche:
rouge_1, rouge_2, rouge_l = rouge_value(text, data_dan[0])
index_list.append(rouge_l)
re1 = [(i[0],i[1]) for i in sorted(list(enumerate(index_list)), key=lambda x: x[1], reverse=True)]
for i in range(0, 10):
dan_data.append(re1[i][1])
dan_data.append(df_train_nuoche[re1[i][0]][0])
filename = df_train_nuoche[re1[i][0]][1].split("\\")[-1]
dan_data.append(filename)
data_zong.append(dan_data)
pd.DataFrame(data_zong).to_excel(path_excel, index=None)