You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
981 B
41 lines
981 B
import json
|
|
import re
|
|
import math
|
|
import numpy as np
|
|
from tqdm import tqdm
|
|
|
|
|
|
prompt = "请把“{}”这几个关键字翻译成英文"
|
|
pantten_title = "(.*?)》为题目生成论文摘要,要求生成的字数在"
|
|
|
|
|
|
path = "./data/paper_prompt_title_3_1/zhaiyao_chinese_keyword_prompt_data.txt"
|
|
with open(path, encoding="utf-8") as f:
|
|
text = f.read()
|
|
|
|
# "\n\"请为“",
|
|
text_list = text.split("\n\"请为“")
|
|
data_list = []
|
|
chinese_keyword_data_list = []
|
|
|
|
for text_dan in tqdm(text_list[1:]):
|
|
# print(text_dan)
|
|
try:
|
|
_ , chinese_keyword = text_dan.split("**************")
|
|
except:
|
|
continue
|
|
|
|
chinese_keyword = str(chinese_keyword).strip("\n")
|
|
|
|
data_list.append(prompt.format(chinese_keyword))
|
|
|
|
import random
|
|
|
|
random.shuffle(data_list)
|
|
|
|
with open("./data/chinese_keyword_to_/chinese_keyword_en_prompt.txt", mode="w", encoding="utf-8") as f:
|
|
for i in data_list:
|
|
f.write(json.dumps(i, ensure_ascii=False))
|
|
f.write("\n")
|
|
|
|
|
|
|