数据处理代码,为了生成chatgpt数据
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

23 lines
530 B

import json
from pathlib import Path
data = []
with open('data/small_title_train.json', encoding="utf-8") as fp:
lines = fp.readlines()
for i in lines:
data.append(json.loads(i))
with open('data/mulu_prompt_shuffle.json', encoding="utf-8") as fp:
lines = fp.readlines()
for i in lines:
data.append(json.loads(i))
import random
random.shuffle(data)
with open("data/train_paper.json", mode="w", encoding="utf-8") as f:
for i in data:
f.write(json.dumps(i, ensure_ascii=False))
f.write("\n")