You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

38 lines
907 B

# -*- coding: utf-8 -*-
"""
@Time : 2023/3/13 10:38
@Author :
@FileName:
@Software:
@Describe:
"""
import os
import pandas as pd
file = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重txt_new'
file_csv = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv'
path_list = []
data = []
for root, dirs, files in os.walk(file):
for file in files:
path = os.path.join(root, file)
path_list.append(path)
print(path_list)
for path in path_list:
with open(path, encoding="gbk") as f:
text = f.read()
text_list = text.split("@@@@@@@@@@")
text_zhengwen = text_list[-1]
text_zhengwen_list = text_zhengwen.split("\n")
for sentence in text_zhengwen_list:
if sentence != "":
data.append([sentence, path])
pd.DataFrame(data,columns=["sentence", "path"]).to_csv(file_csv, index=None)