You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

34 lines
1.1 KiB

2 years ago
# -*- coding: utf-8 -*-
"""
@Time : 2023/3/15 11:39
@Author :
@FileName:
@Software:
@Describe:
"""
import pandas as pd
import difflib
path_txt = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究.txt"
path_csv = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文.csv"
path_csv_sim = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照.csv"
f = open(path_txt, encoding="utf-8")
centent = f.read()
f.close()
data = []
centent_text_list = centent.split("\n")
centent_csv_list = pd.read_csv(path_csv).values.tolist()
for dan_yuan in centent_csv_list:
str_sim_text = "##"
for dan_lable in centent_text_list:
str_sim_value = difflib.SequenceMatcher(None, dan_yuan[0], dan_lable).quick_ratio()
if str_sim_value >= 0.95:
str_sim_text = dan_lable
break
data.append([dan_yuan[0], str_sim_text])
pd.DataFrame(data).to_csv(path_csv_sim,index=None)