import json
import re
from tqdm import tqdm
import os
os . environ [ " CUDA_VISIBLE_DEVICES " ] = " 2,3 "
import torch
from transformers import AutoModelForCausalLM , AutoTokenizer
from transformers . generation . utils import GenerationConfig
model_path = " /home/majiahui/project/models-llm/Baichuan-13B-Chat "
tokenizer = AutoTokenizer . from_pretrained ( model_path , use_fast = False , trust_remote_code = True )
model = AutoModelForCausalLM . from_pretrained ( model_path , device_map = " auto " , torch_dtype = torch . float16 , trust_remote_code = True )
model . generation_config = GenerationConfig . from_pretrained ( model_path )
def predict_baichuan ( text ) :
messages = [ ]
text = " 把下面这段翻译成英文 \n " + text
messages . append ( { " role " : " user " ,
" content " : text } )
response = model . chat ( tokenizer , messages )
return response
prompt_tpye_dict = {
" 生成论文来源的背景# \n " : {
" data_tpye " : " Background for generating paper sources# \n " ,
" title " : " 以《(.*)》为论文题目 " ,
" en-prompt " : " Using ‘ { $title}’ as the title of the paper, write a paragraph about the background of the English question source, with a minimum word count of { $wordnum} words "
} ,
" 生成研究内容# \n " : {
" data_tpye " : " Generate research content# \n " ,
" title " : " 请帮我生成《(.*)》为题目 " ,
" en-prompt " : " Please help me generate the English research content titled ‘ { $title}’, including an overall introduction and a summary of at least three aspects "
} ,
" 生成目录# \n " : {
" data_tpye " : " Generate directory# \n " ,
" title " : " 为论文题目《(.*)》 " ,
" en-prompt " : " Generate an English directory for the paper title ‘ { $title}’, which requires only the first level title and second level title. The format of the first level title is as follows: 1. xxx; The format of the secondary title is as follows: 1.1 xxx; 4 first level titles; Each first level title should contain at least 3 second level titles; "
} ,
" 生成课题的研究背景和意义# \n " : {
" data_tpye " : " Generate directory# \n " ,
" title " : " 请分别写出以《(.*)》为课题 " ,
" en-prompt " : " Please provide the English research background and significance of ‘ { $title}’ as the topic, with a minimum of { $wordnum} words "
} ,
" 生成致谢# \n " : {
" data_tpye " : " Generate acknowledgments# \n " ,
" title " : " 请以《(.*)》为题写一篇论文的中文致谢 " ,
" en-prompt " : " Write an English thank based on the paper title ‘ { $title}’ "
} ,
" 生成论文简短总结# \n " : {
" data_tpye " : " Generate a brief summary of the paper# \n " ,
" title " : " 以《(.*)》为论文题目 " ,
" en-prompt " : " Write a brief summary of the English paper titled ‘ { $title}’, with a requirement of no more than { $wordnum} words "
} ,
" 生成课题的国内外研究状况综述# \n " : {
" data_tpye " : " A Summary of Research Status at Home and Abroad on Generated Topics# \n " ,
" title " : " 请写出以《(.*)》为课题的国内外研究状况综述 " ,
" en-prompt " : " Please provide an English summary of the research status of ‘ { $title}’ at home and abroad, with a word count of around { $wordnum} words "
} ,
" 生成6点本篇论文应完成的主要内容# \n " : {
" data_tpye " : " Generate 6 main contents that should be completed in this paper# \n " ,
" title " : " 请根据题目为《(.*)》 " ,
" opening_report_main_content " : " 研究内容为“(.*)”总结出至少6点本篇论文应完成的主要内容 " ,
" en-prompt " : " Please summarize at least 6 main English content that should be completed for this paper based on the title ‘ { $title}’ and the research content ‘ { $opening_report_main_content}’, using Arabic numerals for arrangement "
} ,
" 生成参考文献# \n " : {
" data_tpye " : " Generate references# \n " ,
" title " : " 论文题目是《(.*)》 " ,
" catalogue_str " : " 目录是“(.*)” " ,
" en-prompt " : " According to the paper title ‘ { $title}’ and the directory is ‘ { $catalogue_str}’, generate 15 references in the format of [1] xxx. "
} ,
" 生成论文小标题内容# \n " : {
" data_tpye " : " Generate paper subheading content# \n " ,
" title " : " 论文题目是《(.*)》 " ,
" catalogue_str " : " 目录是“(.*)” " ,
" smell-title " : " 请把其中的小标题“(.*?)”的内容补充完整 " ,
" en-prompt " : " According to the paper title ‘ { $title}’ and the table of contents ‘ { $catalogue_str}’, add approximately { $wordnum} words of English content to the subheading ‘ { $secondray_title}’. The content must include the current subheading and not include other titles in the table of contents "
} ,
" 生成论文摘要# \n " : {
" data_tpye " : " Generate paper abstract# \n " ,
" title " : " 论文题目是《(.*)》 " ,
" catalogue_str " : " 目录是“(.*)” " ,
" en-prompt " : " Generate a Chinese paper abstract based on the title “ { $title}” and the directory “ { $catalogue_str}”, with a required word count of around { $wordnum} words "
} ,
" 生成关键字# \n " : {
" data_tpye " : " Generate Keywords# \n " ,
" abstract " : " 请为“(.*)”这段论文摘要生成3-5个关键字 " ,
" en-prompt " : " Please generate 3-5 keywords for the abstract of the paper ‘ { $abstract}’ "
} ,
" 生成论文来源的背景-核心内容# \n " : {
" data_tpye " : " Background of generating paper sources - core content# \n " ,
" title " : " 以《(.*)》为论文题目 " ,
" hexin " : " 以“(.*)”为论文的研究方向 " ,
" en-prompt " : " Based on the paper title ‘ { $title}’ and the core content ‘ { $this->core_content}’, generate a background of approximately { $wordnum} words of English topic sources "
} ,
" 生成研究内容-核心内容# \n " : {
" data_tpye " : " Generate research content - core content# \n " ,
" title " : " 请帮我生成《(.*)》为题目 " ,
" hexin " : " 以“(.*)”为论文的研究方向 " ,
" en-prompt " : " Generate English research content based on the paper title ‘ { $title}’ and core content ‘ { $this->core_content}’, including an overall introduction and a summary of at least three aspects "
} ,
" 生成目录-核心内容# \n " : {
" data_tpye " : " Generate Directory - Core Content# \n " ,
" title " : " 为论文题目《(.*)》生成目录 " ,
" hexin " : " 以“(.*)”为论文的研究方向 " ,
" en-prompt " : " Generate an English directory based on the paper title ‘ { $title}’ and the core content ‘ { $this->core_content}’, with only the first and second level titles required. The format of the first level title is as follows: 1. xxx; The format of the secondary title is as follows: 1.1 xxx; 4 first level titles; Each first level title should contain at least 3 second level titles; "
} ,
" 生成课题的研究背景和意义-核心内容# \n " : {
" data_tpye " : " Research background and significance of the generated topic - core content# \n " ,
" title " : " 请分别写出以《(.*)》为课题 " ,
" hexin " : " 以“(.*)”为论文的研究方向 " ,
" en-prompt " : " Generate an English research background and significance of no less than { $wordnum} words based on the paper title ‘ { $title}’ and the core content ‘ { $this->core_content}’ "
} ,
" 生成论文简短总结-核心内容# \n " : {
" data_tpye " : " Generate a brief summary of the paper - core content# \n " ,
" title " : " 以《(.*)》为论文题目 " ,
" hexin " : " 以“(.*)”为论文的研究方向 " ,
" en-prompt " : " Generate a brief summary of the English paper with approximately { $wordnum} words based on the title ‘ { $title}’ and the core content ‘ { $this->core_content}’ "
} ,
" 生成课题的国内外研究状况综述-核心内容# \n " : {
" data_tpye " : " Overview of domestic and international research status on generating topics - core content# \n " ,
" title " : " 请写出以《(.*)》为课题 " ,
" hexin " : " 以“(.*)”为论文的研究方向 " ,
" en-prompt " : " Based on the paper title ‘ { $title}’ and the core content ‘ { $this->core_content}’, generate an English summary of research status at home and abroad with no less than { $wordnum} words "
}
}
with open ( " data/llama_t/chatglm_dev_4_prompt_llama.json " ) as f :
data = json . loads ( f . read ( ) )
data_new = [ ]
for i in tqdm ( data ) :
data_dan_dict = { }
instruction = i [ " instruction " ]
input_ = i [ " input " ]
input_ = str ( input_ ) . replace ( " \n " , " \\ n " )
try :
if instruction == " 生成论文来源的背景# \n " :
dan_dict = prompt_tpye_dict [ " 生成论文来源的背景# \n " ]
title = dan_dict [ " title " ]
title_re = re . findall ( title , input_ )
title = title_re [ 0 ]
en_title = predict_baichuan ( title )
data_dan_dict [ " en_title " ] = en_title
data_dan_dict [ " en_hexin " ] = " "
# data_dan_dict["output"] = en_output
data_new . append ( data_dan_dict )
#
# elif instruction == "生成研究内容#\n":
# dan_dict = prompt_tpye_dict["生成研究内容#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# en_title = predict_baichuan(title)
# # en_output = predict_baichuan(i["output"])
#
# en_prompt = en_prompt.replace("{$title}", en_title)
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
#
# elif instruction == "生成目录#\n":
# dan_dict = prompt_tpye_dict["生成目录#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# en_title = predict_baichuan(title)
# # en_output = predict_baichuan(i["output"])
#
# en_prompt = en_prompt.replace("{$title}", en_title)
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成课题的研究背景和意义#\n":
# dan_dict = prompt_tpye_dict["生成课题的研究背景和意义#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# en_title = predict_baichuan(title)
# # en_output = predict_baichuan(i["output"])
# #
# # en_output_list = str(en_output).split(" ")
# # zishu = len(en_output_list)//100 * 100
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$wordnum}", "1000")
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成致谢#\n":
# dan_dict = prompt_tpye_dict["生成致谢#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# en_title = predict_baichuan(title)
# # en_output = predict_baichuan(i["output"])
#
# en_prompt = en_prompt.replace("{$title}", en_title)
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成论文简短总结#\n":
# dan_dict = prompt_tpye_dict["生成论文简短总结#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# en_title = predict_baichuan(title)
# # en_output = predict_baichuan(i["output"])
# #
# # en_output_list = str(en_output).split(" ")
# # zishu = len(en_output_list)//100 * 100
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$wordnum}", "300")
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成课题的国内外研究状况综述#\n":
# dan_dict = prompt_tpye_dict["生成课题的国内外研究状况综述#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# en_title = predict_baichuan(title)
# # en_output = predict_baichuan(i["output"])
# #
# # en_output_list = str(en_output).split(" ")
# # zishu = len(en_output_list)//100 * 100
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$wordnum}", "800")
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成6点本篇论文应完成的主要内容#\n":
# dan_dict = prompt_tpye_dict["生成6点本篇论文应完成的主要内容#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# opening_report_main_content = dan_dict["opening_report_main_content"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# opening_report_main_content_re = re.findall(opening_report_main_content, input_)
# opening_report_main_content = opening_report_main_content_re[0]
# opening_report_main_content = opening_report_main_content.replace("\\n", "\n")
#
# en_title = predict_baichuan(title)
# en_opening_report_main_content = predict_baichuan(opening_report_main_content)
# # en_output = predict_baichuan(i["output"])
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$opening_report_main_content}", en_opening_report_main_content)
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成参考文献#\n":
# dan_dict = prompt_tpye_dict["生成参考文献#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# catalogue_str = dan_dict["catalogue_str"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# catalogue_str_re = re.findall(catalogue_str, input_)
# catalogue_str = catalogue_str_re[0]
# catalogue_str = catalogue_str.replace("\\n", "\n")
#
# en_title = predict_baichuan(title)
# en_catalogue_str = predict_baichuan(catalogue_str)
# # en_output = predict_baichuan(i["output"])
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$catalogue_str}", en_catalogue_str)
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成论文小标题内容#\n":
# print(1)
# dan_dict = prompt_tpye_dict["生成论文小标题内容#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# catalogue_str = dan_dict["catalogue_str"]
# smell_title = dan_dict["smell-title"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# catalogue_str_re = re.findall(catalogue_str, input_)
# catalogue_str = catalogue_str_re[0]
# catalogue_str = catalogue_str.replace("\\n", "\n")
#
# smell_title_re = re.findall(smell_title, input_)
# print(smell_title_re)
# if smell_title_re == []:
# continue
# smell_title = smell_title_re[0]
# smell_title = smell_title.replace("\\n", "\n")
#
# en_title = predict_baichuan(title)
# en_catalogue_str = predict_baichuan(catalogue_str)
# en_smell_title = predict_baichuan(smell_title)
# # en_output = predict_baichuan(i["output"])
# #
# # en_output_list = str(en_output).split(" ")
# # zishu = len(en_output_list)//100 * 100
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$catalogue_str}", en_catalogue_str).replace("{$secondray_title}", en_smell_title).replace("{$wordnum}", "800")
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成论文摘要#\n":
# dan_dict = prompt_tpye_dict["生成论文摘要#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# catalogue_str = dan_dict["catalogue_str"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# catalogue_str_re = re.findall(catalogue_str, input_)
# catalogue_str = catalogue_str_re[0]
# catalogue_str = catalogue_str.replace("\\n", "\n")
#
# en_title = predict_baichuan(title)
# en_catalogue_str = predict_baichuan(catalogue_str)
# # en_output = predict_baichuan(i["output"])
# #
# # en_output_list = str(en_output).split(" ")
# # zishu = len(en_output_list)//100 * 100
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$catalogue_str}", en_catalogue_str).replace("{$wordnum}", "400")
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成关键字#\n":
# dan_dict = prompt_tpye_dict["生成关键字#\n"]
# en_prompt = dan_dict["en-prompt"]
# abstract = dan_dict["abstract"]
# catalogue_str = dan_dict["catalogue_str"]
# data_tpye = dan_dict["data_tpye"]
#
# abstract_re = re.findall(abstract, input_)
# abstract = abstract_re[0]
# abstract = abstract.replace("\\n", "\n")
#
# en_abstract = predict_baichuan(abstract)
# # en_output = predict_baichuan(i["output"])
#
# en_prompt = en_prompt.replace("{$abstract}", en_abstract)
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成论文来源的背景-核心内容#\n":
# dan_dict = prompt_tpye_dict["生成论文来源的背景-核心内容#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# hexin = dan_dict["hexin"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# hexin_re = re.findall(hexin, input_)
# hexin = hexin_re[0]
# hexin = hexin.replace("\\n", "\n")
#
# en_title = predict_baichuan(title)
# en_hexin = predict_baichuan(hexin)
# # en_output = predict_baichuan(i["output"])
# #
# # en_output_list = str(en_output).split(" ")
# # zishu = len(en_output_list)//100 * 100
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$wordnum}", "200").replace("{$this->core_content}", en_hexin)
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成研究内容-核心内容#\n":
# dan_dict = prompt_tpye_dict["生成研究内容-核心内容#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# hexin = dan_dict["hexin"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# hexin_re = re.findall(hexin, input_)
# hexin = hexin_re[0]
# hexin = hexin.replace("\\n", "\n")
#
# en_title = predict_baichuan(title)
# en_hexin = predict_baichuan(hexin)
# # en_output = predict_baichuan(i["output"])
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$this->core_content}", en_hexin)
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成目录-核心内容#\n":
# dan_dict = prompt_tpye_dict["生成目录-核心内容#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# hexin = dan_dict["hexin"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# hexin_re = re.findall(hexin, input_)
# hexin = hexin_re[0]
# hexin = hexin.replace("\\n", "\n")
#
# en_title = predict_baichuan(title)
# en_hexin = predict_baichuan(hexin)
# # en_output = predict_baichuan(i["output"])
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$this->core_content}", en_hexin)
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成课题的研究背景和意义-核心内容#\n":
# dan_dict = prompt_tpye_dict["生成课题的研究背景和意义-核心内容#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# hexin = dan_dict["hexin"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# hexin_re = re.findall(hexin, input_)
# hexin = hexin_re[0]
# hexin = hexin.replace("\\n", "\n")
#
# en_title = predict_baichuan(title)
# en_hexin = predict_baichuan(hexin)
# # en_output = predict_baichuan(i["output"])
# #
# # en_output_list = str(en_output).split(" ")
# # zishu = len(en_output_list)//100 * 100
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$wordnum}", "1000").replace("{$this->core_content}", en_hexin)
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
#
# elif instruction == "生成论文简短总结-核心内容#\n":
# dan_dict = prompt_tpye_dict["生成论文简短总结-核心内容#\n"]
# en_prompt = dan_dict["en-prompt"]
# title = dan_dict["title"]
# hexin = dan_dict["hexin"]
# data_tpye = dan_dict["data_tpye"]
#
# title_re = re.findall(title, input_)
# title = title_re[0]
#
# hexin_re = re.findall(hexin, input_)
# hexin = hexin_re[0]
# hexin = hexin.replace("\\n", "\n")
#
# en_title = predict_baichuan(title)
# en_hexin = predict_baichuan(hexin)
# # en_output = predict_baichuan(i["output"])
# #
# # en_output_list = str(en_output).split(" ")
# # zishu = len(en_output_list)//100 * 100
#
# en_prompt = en_prompt.replace("{$title}", en_title).replace("{$wordnum}", "300").replace("{$this->core_content}", en_hexin)
#
# data_dan_dict["instruction"] = data_tpye
# data_dan_dict["input"] = en_prompt
# # data_dan_dict["output"] = en_output
#
# data_new.append(data_dan_dict)
if instruction == " 生成课题的国内外研究状况综述-核心内容# \n " :
dan_dict = prompt_tpye_dict [ " 生成课题的国内外研究状况综述-核心内容# \n " ]
title = dan_dict [ " title " ]
hexin = dan_dict [ " hexin " ]
title_re = re . findall ( title , input_ )
title = title_re [ 0 ]
hexin_re = re . findall ( hexin , input_ )
hexin = hexin_re [ 0 ]
hexin = hexin . replace ( " \\ n " , " \n " )
en_title = predict_baichuan ( title )
en_hexin = predict_baichuan ( hexin )
# en_output = predict_baichuan(i["output"])
#
# en_output_list = str(en_output).split(" ")
# zishu = len(en_output_list)//100 * 100
data_dan_dict [ " en_title " ] = en_title
data_dan_dict [ " en_hexin " ] = en_hexin
# data_dan_dict["output"] = en_output
data_new . append ( data_dan_dict )
except :
print ( i )
with open ( " ./data/llama_t/chatglm_en.json " , mode = " w " , encoding = " utf-8 " ) as f :
f . write ( json . dumps ( data_new , ensure_ascii = False , indent = 2 ) )