数据处理代码,为了生成chatgpt数据
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

23 lines
512 B

import os
path = "train_data2/train_data2"
path_list = []
for root, dirs, files in os.walk(path):
for file in files:
path = os.path.join(root, file)
path_list.append(path)
data_new = []
for i in path_list:
with open(i, encoding="utf-8") as f:
data_str = f.read()
data_list = data_str.split("\n")
data_new.append(data_list[0].split("@@@@@")[0])
with open("data/题目5.txt", "w", encoding="utf-8") as f:
for i in data_new:
f.write(i)
f.write("\n")