# -*- coding: utf-8 -*- """ @Time : 2023/3/13 10:38 @Author : @FileName: @Software: @Describe: """ import os import pandas as pd file = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重txt_new' file_csv = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv' path_list = [] data = [] for root, dirs, files in os.walk(file): for file in files: path = os.path.join(root, file) path_list.append(path) print(path_list) for path in path_list: with open(path, encoding="gbk") as f: text = f.read() text_list = text.split("@@@@@@@@@@") text_zhengwen = text_list[-1] text_zhengwen_list = text_zhengwen.split("\n") for sentence in text_zhengwen_list: if sentence != "": data.append([sentence, path]) pd.DataFrame(data,columns=["sentence", "path"]).to_csv(file_csv, index=None)