# -*- coding: utf-8 -*- """ @Time : 2023/2/27 18:24 @Author : @FileName: @Software: @Describe: """ import pandas as pd import difflib # file = "../data/11篇汇总txt_new_predict_t5.txt" file = "../data/11篇汇总txt_new_predict_t5_0724.txt" try: with open(file, 'r', encoding="utf-8") as f: lines = [x.strip() for x in f if x.strip() != ''] except: with open(file, 'r', encoding="gbk") as f: lines = [x.strip() for x in f if x.strip() != ''] data_new = [] for i in lines: data_dan = i.split("\t") if len(data_dan) != 2: continue data_1 = data_dan[0] data_2 = data_dan[1] str_sim_value = difflib.SequenceMatcher(None, data_1, data_2).quick_ratio() data_new.append(data_dan + [str_sim_value]) print(data_new) data_new = sorted(data_new, key= lambda x:x[2], reverse=True) df = pd.DataFrame(data_new) df.to_excel("../data/11篇_t5_strsim_0724.xlsx", index=None)