# -*- coding: utf-8 -*-

"""
@Time    :  2023/3/13 10:38
@Author  : 
@FileName: 
@Software: 
@Describe:
"""
import os

import pandas as pd

file = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重txt_new'
file_csv = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv'

path_list = []
data = []


for root, dirs, files in os.walk(file):
    for file in files:
        path = os.path.join(root, file)
        path_list.append(path)

print(path_list)
for path in path_list:
    with open(path, encoding="gbk") as f:
        text = f.read()

    text_list = text.split("@@@@@@@@@@")
    text_zhengwen = text_list[-1]
    text_zhengwen_list = text_zhengwen.split("\n")
    for sentence in text_zhengwen_list:
        if sentence != "":
            data.append([sentence, path])

pd.DataFrame(data,columns=["sentence", "path"]).to_csv(file_csv, index=None)