数据处理代码,为了生成chatgpt数据
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

33 lines
866 B

2 years ago
import json
# {
# '生成课题的研究背景和意义#': 1851,
# '生成论文小标题内容#': 8316,
# '生成目录#': 1975,
# '生成论文摘要#': 958,
# '生成6点本篇论文应完成的主要内容#': 881,
# '生成研究内容#': 2014,
# '生成关键字#': 850,
# '翻译关键词#': 980,
# '生成论文简短总结#': 2055,
# '生成论文来源的背景#': 2003,
# '生成课题的国内外研究状况综述#': 1915,
# '翻译摘要#': 199
# }
path = "data/chatglm_dev_3_prompt.json"
with open(path, encoding="utf-8") as f:
data = f.readlines()
data_type = {}
for i in data:
data_dan = eval(i)
# if data_dan["prompt"] not in data_type:
# data_type[data_dan["prompt"]] = 1
# else:
# data_type[data_dan["prompt"]] += 1
if data_dan["prompt"] == "生成论文小标题内容#":
print(i)
print(data_type)