数据处理代码,为了生成chatgpt数据
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

12 lines
369 B

2 years ago
import json
from tqdm import tqdm
dataset = []
data_path = "data/chatglm_dev_3_prompt.json"
with open(data_path, "r", encoding="utf-8") as fh:
for i, line in enumerate(fh):
# print(line)
sample = json.loads(line.strip())
dataset.append(
{"input": sample["prompt"] + sample["query"], "answer": sample["response"]})
print(dataset)