# -*- coding: utf-8 -*- """ @Time : 2023/3/15 11:39 @Author : @FileName: @Software: @Describe: """ import pandas as pd import difflib path_txt = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究.txt" path_csv = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文.csv" path_csv_sim = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照.csv" f = open(path_txt, encoding="utf-8") centent = f.read() f.close() data = [] centent_text_list = centent.split("\n") centent_csv_list = pd.read_csv(path_csv).values.tolist() for dan_yuan in centent_csv_list: str_sim_text = "##" for dan_lable in centent_text_list: str_sim_value = difflib.SequenceMatcher(None, dan_yuan[0], dan_lable).quick_ratio() if str_sim_value >= 0.95: str_sim_text = dan_lable break data.append([dan_yuan[0], str_sim_text]) pd.DataFrame(data).to_csv(path_csv_sim,index=None)