# -*- coding: utf-8 -*-

"""
@Time    :  2023/2/15 14:13
@Author  : 
@FileName: 
@Software: 
@Describe:
"""
import os
import pandas as pd
path_1 = '../data/11篇excel'
path_2 = "../data/11篇临时拼接"
path_3 = "../data/11篇临时拼接2"
path_yy = "../data/11篇_yy_strsim.xlsx"
path_t5 = "../data/11篇_t5_strsim.xlsx"
path_t5_0724 = "../data/11篇_t5_strsim_0724.xlsx"


data_yy = pd.read_excel(path_yy).values.tolist()
data_t5 = pd.read_excel(path_t5).values.tolist()
data_t5_0724 = pd.read_excel(path_t5_0724).values.tolist()
data_yy_dict = {}
data_t5_dict = {}
data_t5_dict_0724 = {}
for i in data_yy:
    str_data_yuan = str(i[0]).strip("。").strip()
    str_data_lable = str(i[1]).strip("。").strip()
    data_yy_dict[str_data_yuan] = str_data_lable

for i in data_t5:
    str_data_yuan = str(i[0]).strip("。").strip()
    str_data_lable = str(i[1]).strip("。").strip()
    data_t5_dict[str_data_yuan] = str_data_lable
for i in data_t5_0724:
    str_data_yuan = str(i[0]).strip("。").strip()
    str_data_lable = str(i[1]).strip("。").strip()
    data_t5_dict_0724[str_data_yuan] = str_data_lable



path_list = []
for file_name in os.listdir(path_1):
    path_list.append(file_name)


for file_name in path_list:
    data_new = []
    data_1 = pd.read_excel(path_1 + "/" + file_name).values.tolist()
    data_2 = pd.read_excel(path_2 + "/" + file_name).values.tolist()
    file_name_0 = file_name.split(".")[0]
    file_name_1 = file_name.split(".")[1]
    file_name_ = file_name_0 + "_." + file_name_1
    data_3 = pd.read_excel(path_3 + "/" + file_name_).values.tolist()
    for i in range(len(data_1)):
        # print(data_1[i])
        if data_1[i][0] == "。":
            continue

        str_data = str(data_1[i][0]).strip()
        try:
            data_t5_0724_dan = data_t5_dict_0724[str_data]
            data_t5_dan = data_t5_dict[str_data]
            data_yy_dan = data_yy_dict[str_data]
            data_new.append(data_1[i] + [data_2[i][1], data_3[i][1], data_t5_dan, data_t5_0724_dan, data_yy_dan])
        except:
            print(str_data)

    df = pd.DataFrame(data_new,columns=["原文","simbert","simbert_datasim07","bertsim_simsim","t5","t5-0724", "yy"])
    df.to_excel("../data/11篇测试excel_汇总_3/{}.xlsx".format(file_name_0), index=None)