数据处理代码,为了生成chatgpt数据
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

32 lines
769 B

import os
from tqdm import tqdm
import re
patten = "目录是“(.*)”,请把其中的"
p0 = "@@@@@@@@@@@@@@@@@@"
p1 = "补充内容字数在1500字左右"
p2 = "**************"
data_path_list = []
for root,dirs,files in os.walk(r"./data/paper_prompt_title_3_2/zhaiyao_prompt"):
for file in files:
#获取文件路径
data_path_list.append(os.path.join(root,file))
print(data_path_list)
jishu = 0
data_str = ""
for i in tqdm(data_path_list):
dayin = False
with open(i, encoding="utf-8") as f:
data_dan = f.read()
data_str += data_dan
data_str += "\n"
print(jishu)
print(data_str)
with open("./data/paper_prompt_title_3_1/title_mulu_zhaiyao_data.txt", mode="w", encoding="utf-8") as f:
f.write(data_str)