You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
34 lines
1.1 KiB
34 lines
1.1 KiB
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
@Time : 2023/3/15 11:39
|
|
@Author :
|
|
@FileName:
|
|
@Software:
|
|
@Describe:
|
|
"""
|
|
import pandas as pd
|
|
import difflib
|
|
|
|
|
|
|
|
path_txt = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究.txt"
|
|
path_csv = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文.csv"
|
|
path_csv_sim = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照.csv"
|
|
f = open(path_txt, encoding="utf-8")
|
|
centent = f.read()
|
|
f.close()
|
|
|
|
data = []
|
|
centent_text_list = centent.split("\n")
|
|
centent_csv_list = pd.read_csv(path_csv).values.tolist()
|
|
for dan_yuan in centent_csv_list:
|
|
str_sim_text = "##"
|
|
for dan_lable in centent_text_list:
|
|
str_sim_value = difflib.SequenceMatcher(None, dan_yuan[0], dan_lable).quick_ratio()
|
|
if str_sim_value >= 0.95:
|
|
str_sim_text = dan_lable
|
|
break
|
|
data.append([dan_yuan[0], str_sim_text])
|
|
|
|
pd.DataFrame(data).to_csv(path_csv_sim,index=None)
|