duplicate_check/txt_to_csv.py

# -*- coding: utf-8 -*-

"""
@Time    :  2023/3/13 10:38
@Author  : 
@FileName: 
@Software: 
@Describe:
"""
import os

import pandas as pd

file = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重txt_new'
file_csv = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv'

path_list = []
data = []


for root, dirs, files in os.walk(file):
    for file in files:
        path = os.path.join(root, file)
        path_list.append(path)

print(path_list)
for path in path_list:
    with open(path, encoding="gbk") as f:
        text = f.read()

    text_list = text.split("@@@@@@@@@@")
    text_zhengwen = text_list[-1]
    text_zhengwen_list = text_zhengwen.split("\n")
    for sentence in text_zhengwen_list:
        if sentence != "":
            data.append([sentence, path])

pd.DataFrame(data,columns=["sentence", "path"]).to_csv(file_csv, index=None)
第一次提交 3 years ago			`# -- coding: utf-8 --`

			`"""`
			`@Time : 2023/3/13 10:38`
			`@Author :`
			`@FileName:`
			`@Software:`
			`@Describe:`
			`"""`
			`import os`

			`import pandas as pd`

			`file = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重txt_new'`
			`file_csv = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv'`

			`path_list = []`
			`data = []`


			`for root, dirs, files in os.walk(file):`
			`for file in files:`
			`path = os.path.join(root, file)`
			`path_list.append(path)`

			`print(path_list)`
			`for path in path_list:`
			`with open(path, encoding="gbk") as f:`
			`text = f.read()`

			`text_list = text.split("@@@@@@@@@@")`
			`text_zhengwen = text_list[-1]`
			`text_zhengwen_list = text_zhengwen.split("\n")`
			`for sentence in text_zhengwen_list:`
			`if sentence != "":`
			`data.append([sentence, path])`

			`pd.DataFrame(data,columns=["sentence", "path"]).to_csv(file_csv, index=None)`