import json import re import math import numpy as np from tqdm import tqdm prompt = "请把“{}”这几个关键字翻译成英文" pantten_title = "(.*?)》为题目生成论文摘要,要求生成的字数在" path = "./data/paper_prompt_title_3_1/zhaiyao_chinese_keyword_prompt_data.txt" with open(path, encoding="utf-8") as f: text = f.read() # "\n\"请为“", text_list = text.split("\n\"请为“") data_list = [] chinese_keyword_data_list = [] for text_dan in tqdm(text_list[1:]): # print(text_dan) try: _ , chinese_keyword = text_dan.split("**************") except: continue chinese_keyword = str(chinese_keyword).strip("\n") data_list.append(prompt.format(chinese_keyword)) import random random.shuffle(data_list) with open("./data/chinese_keyword_to_/chinese_keyword_en_prompt.txt", mode="w", encoding="utf-8") as f: for i in data_list: f.write(json.dumps(i, ensure_ascii=False)) f.write("\n")