You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
907 B
38 lines
907 B
![]()
2 years ago
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
"""
|
||
|
@Time : 2023/3/13 10:38
|
||
|
@Author :
|
||
|
@FileName:
|
||
|
@Software:
|
||
|
@Describe:
|
||
|
"""
|
||
|
import os
|
||
|
|
||
|
import pandas as pd
|
||
|
|
||
|
file = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重txt_new'
|
||
|
file_csv = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv'
|
||
|
|
||
|
path_list = []
|
||
|
data = []
|
||
|
|
||
|
|
||
|
for root, dirs, files in os.walk(file):
|
||
|
for file in files:
|
||
|
path = os.path.join(root, file)
|
||
|
path_list.append(path)
|
||
|
|
||
|
print(path_list)
|
||
|
for path in path_list:
|
||
|
with open(path, encoding="gbk") as f:
|
||
|
text = f.read()
|
||
|
|
||
|
text_list = text.split("@@@@@@@@@@")
|
||
|
text_zhengwen = text_list[-1]
|
||
|
text_zhengwen_list = text_zhengwen.split("\n")
|
||
|
for sentence in text_zhengwen_list:
|
||
|
if sentence != "":
|
||
|
data.append([sentence, path])
|
||
|
|
||
|
pd.DataFrame(data,columns=["sentence", "path"]).to_csv(file_csv, index=None)
|