You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
72 lines
2.0 KiB
72 lines
2.0 KiB
![]()
2 years ago
|
import json
|
||
|
import re
|
||
|
import math
|
||
|
import numpy as np
|
||
|
from tqdm import tqdm
|
||
|
|
||
|
|
||
|
task_book_main_content_prompt = "请根据题目为《{}》,和研究内容为“{}”总结出至少6点本篇论文应完成的主要内容,使用阿拉伯数字排列"
|
||
|
pantten_title = "(.*?)》为题目的研究内容,包括整体简介和分最少三个方面总结"
|
||
|
|
||
|
|
||
|
path = "./data/paper_prompt_title_3/title_jianjie_prompt_data.txt"
|
||
|
with open(path, encoding="utf-8") as f:
|
||
|
text = f.read()
|
||
|
|
||
|
|
||
|
text_list = text.split("请帮我生成《")
|
||
|
|
||
|
data_list = []
|
||
|
chinese_keyword_data_list = []
|
||
|
|
||
|
for text_dan in tqdm(text_list):
|
||
|
# print(text_dan)
|
||
|
try:
|
||
|
title_prompt, jianjie = text_dan.split("**************")
|
||
|
except:
|
||
|
continue
|
||
|
result_biaoti_list = re.findall(pantten_title, title_prompt)
|
||
|
try:
|
||
|
result_biaoti_list[0]
|
||
|
except:
|
||
|
print(title_prompt)
|
||
|
continue
|
||
|
|
||
|
title = str(result_biaoti_list[0]).strip("\n")
|
||
|
jianjie = str(jianjie).strip("\n")
|
||
|
|
||
|
data_list.append(task_book_main_content_prompt.format(title, jianjie))
|
||
|
|
||
|
import random
|
||
|
|
||
|
random.shuffle(data_list)
|
||
|
|
||
|
with open("./data/jianjie_to_/task_book_prompt.txt", mode="w", encoding="utf-8") as f:
|
||
|
for i in data_list:
|
||
|
f.write(json.dumps(i, ensure_ascii=False))
|
||
|
f.write("\n")
|
||
|
|
||
|
|
||
|
# for lable in table_of_contents:
|
||
|
# text_len = len(paper_text)
|
||
|
# dan_nerlable = [text_len, text_len + len(lable[0]), lable[1]]
|
||
|
# nerlable_list.append(dan_nerlable)
|
||
|
# paper_text += lable[0]
|
||
|
# paper_text += "@"
|
||
|
#
|
||
|
# paper_dan = {"text": paper_text, "label": nerlable_list}
|
||
|
#
|
||
|
# ner_lable.append(str(table_of_contents))
|
||
|
# text_zong.append(paper_dan)
|
||
|
#
|
||
|
# with open("../data/train.txt", mode="w", encoding="utf-8") as f:
|
||
|
# for i in text_zong:
|
||
|
# f.write(json.dumps(i, ensure_ascii=False))
|
||
|
# f.write("\n")
|
||
|
#
|
||
|
#
|
||
|
# with open("../data/train_lable.txt", mode="w") as f:
|
||
|
# for i in ner_lable:
|
||
|
# f.write(json.dumps(i, ensure_ascii=False))
|
||
|
# f.write("\n")
|