# -*- coding: utf-8 -*- """ @Time : 2023/2/15 14:13 @Author : @FileName: @Software: @Describe: """ import os import pandas as pd path_1 = '../data/11篇excel' path_2 = "../data/11篇临时拼接" path_3 = "../data/11篇临时拼接2" path_yy = "../data/11篇_yy_strsim.xlsx" path_t5 = "../data/11篇_t5_strsim.xlsx" data_yy = pd.read_excel(path_yy).values.tolist() data_t5 = pd.read_excel(path_t5).values.tolist() data_yy_dict = {} data_t5_dict = {} for i in data_yy: str_data_yuan = str(i[0]).strip("。").strip() str_data_lable = str(i[1]).strip("。").strip() data_yy_dict[str_data_yuan] = str_data_lable for i in data_t5: str_data_yuan = str(i[0]).strip("。").strip() str_data_lable = str(i[1]).strip("。").strip() data_t5_dict[str_data_yuan] = str_data_lable path_list = [] for file_name in os.listdir(path_1): path_list.append(file_name) for file_name in path_list: data_new = [] data_1 = pd.read_excel(path_1 + "/" + file_name).values.tolist() data_2 = pd.read_excel(path_2 + "/" + file_name).values.tolist() file_name_0 = file_name.split(".")[0] file_name_1 = file_name.split(".")[1] file_name_ = file_name_0 + "_." + file_name_1 data_3 = pd.read_excel(path_3 + "/" + file_name_).values.tolist() for i in range(len(data_1)): # print(data_1[i]) if data_1[i][0] == "。": continue str_data = str(data_1[i][0]).strip() try: data_t5_dan = data_t5_dict[str_data] data_yy_dan = data_yy_dict[str_data] data_new.append(data_1[i] + [data_2[i][1], data_3[i][1], data_t5_dan, data_yy_dan]) except: print(str_data) df = pd.DataFrame(data_new,columns=["原文","simbert","simbert_datasim07","bertsim_simsim","t5","yy"]) df.to_excel("../data/11篇测试excel_汇总_3/{}.xlsx".format(file_name_0), index=None)