
commit
a8031664e4
50 changed files with 5612 additions and 0 deletions
@ -0,0 +1,8 @@ |
|||
# Default ignored files |
|||
/shelf/ |
|||
/workspace.xml |
|||
# Editor-based HTTP Client requests |
|||
/httpRequests/ |
|||
# Datasource local storage ignored files |
|||
/dataSources/ |
|||
/dataSources.local.xml |
@ -0,0 +1,79 @@ |
|||
import os |
|||
from tqdm import tqdm |
|||
import re |
|||
|
|||
|
|||
patten = "目录是“(.*)”,请把其中的" |
|||
p0 = "@@@@@@@@@@@@@@@@@@" |
|||
p1 = "补充内容字数在1500字左右" |
|||
p2 = "**************" |
|||
data_path_list = [] |
|||
for root,dirs,files in os.walk(r"./data/paper_prompt_title_3_2/small_title_prompt_shuffle_1"): |
|||
for file in files: |
|||
#获取文件路径 |
|||
data_path_list.append(os.path.join(root,file)) |
|||
|
|||
for root,dirs,files in os.walk(r"./data/paper_prompt_title_3_2/small_title_prompt_shuffle_2"): |
|||
for file in files: |
|||
#获取文件路径 |
|||
data_path_list.append(os.path.join(root,file)) |
|||
|
|||
for root,dirs,files in os.walk(r"./data/paper_prompt_title_3_2_10000_40000/small_title_prompt_2_10000_40000"): |
|||
for file in files: |
|||
#获取文件路径 |
|||
data_path_list.append(os.path.join(root,file)) |
|||
|
|||
|
|||
print(data_path_list) |
|||
|
|||
|
|||
jishu = 0 |
|||
data_str = "" |
|||
for i in tqdm(data_path_list): |
|||
dayin = False |
|||
with open(i, encoding="utf-8") as f: |
|||
data_dan = f.read() |
|||
data_dan_list = data_dan.split(p0)[1].split(p2) |
|||
|
|||
tishi = data_dan_list[0] |
|||
gen = data_dan_list[1] |
|||
gen_len = len(gen) |
|||
result_biaoti_list = re.findall(patten, tishi) |
|||
try: |
|||
mulu = str(result_biaoti_list[0]) |
|||
except: |
|||
print(tishi) |
|||
continue |
|||
mulu_list = mulu.split("\\n") |
|||
mulu_list = [i.strip() for i in mulu_list if i != ""] |
|||
|
|||
mulu_list_bool = [] |
|||
pantten_biaoti = '[1-9一二三四五六七八九ⅠⅡⅢⅣⅤⅥⅦⅧⅨ][、.]\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|||
for i in mulu_list: |
|||
result_biaoti_list = re.findall(pantten_biaoti, i) |
|||
if result_biaoti_list != []: |
|||
mulu_list_bool.append((i, "一级标题")) |
|||
else: |
|||
mulu_list_bool.append((i, "二级标题")) |
|||
|
|||
for i in range(len(mulu_list) - 2): |
|||
if "\n" + mulu_list_bool[i][0] in gen and "\n" + mulu_list_bool[i+1][0] in gen: |
|||
# print("标题1", mulu_list_bool[i][0]) |
|||
# print("标题2", mulu_list_bool[i+1][0]) |
|||
dayin = True |
|||
break |
|||
if dayin == True: |
|||
# print('gen', gen) |
|||
# print('tishi', tishi) |
|||
jishu += 1 |
|||
continue |
|||
zishu = str(int(gen_len/100)*100) |
|||
prompt = tishi.replace(p1, "".join(["补充内容字数在", zishu, "字左右"])) |
|||
data_str += p0 + prompt + p2 + gen |
|||
|
|||
|
|||
print(jishu) |
|||
print(data_str) |
|||
with open("./data/paper_prompt_title_3_1/title_mulu_small_title_prompt_shuffle_data.txt", mode="w", encoding="utf-8") as f: |
|||
f.write(data_str) |
|||
|
@ -0,0 +1,15 @@ |
|||
@@@@@@@@@@@@@@@@@@论文题目是“不同有机肥料对猕猴桃土壤微生态及产量、品质的影响研究”,目录是“一、研究背景和目的\n 1.1 研究背景\n 1.2 研究目的\n\n二、相关理论和研究现状\n 2.1 有机肥料的种类和特点\n 2.2 猕猴桃的土壤微生态及其影响因素\n 2.3 有机肥料对猕猴桃产量和品质的影响研究现状\n\n三、材料与方法\n 3.1 实验地点和时间\n 3.2 实验设计\n 3.3 实验材料和处理\n\n四、结果与分析\n 4.1 土壤微生态指标的变化\n 4.2 猕猴桃产量的变化\n 4.3 猕猴桃品质的变化\n\n五、结论与展望\n 5.1 结论\n 5.2 展望\n\n六、参考文献”,请把其中的小标题“3.3 实验材料和处理”的内容补充完整,补充内容字数在1500字左右**************3.3 实验材料和处理 |
|||
|
|||
3.3.1 实验材料 |
|||
|
|||
本实验选取了猕猴桃栽培常用的有机肥料,包括牛粪、鸡粪、豆渣、蚯蚓粪和堆肥。这些有机肥料均为当地市场上常见的肥料,选取时要求肥料来源可靠、质量稳定。同时,为了保证实验的可比性,选取的有机肥料均为新鲜的,未经过发酵处理。 |
|||
|
|||
3.3.2 实验处理 |
|||
|
|||
本实验共设置了五个有机肥料处理,分别为牛粪、鸡粪、豆渣、蚯蚓粪和堆肥,以及一个对照组,即不施加有机肥料。每个处理设置了三个重复,共计18个试验单位。 |
|||
|
|||
在实验开始前,先将试验地点进行清理和平整,确保试验地点的土壤质量和水分状况均匀。接着,根据实验设计,在试验地点上设置了18个试验单位。每个试验单位的大小为2 m × 2 m,其中中央1 m × 1 m 的区域为猕猴桃树的生长区域,周围1 m 的区域为控制区域。 |
|||
|
|||
在实验开始前,先对试验地点的土壤进行了全面的化验分析,以了解土壤的基本性质和养分状况。根据化验结果,确定了每个试验单位所需的施肥量。在实验开始前,先将有机肥料进行筛选和清洗,去除杂质和异物。然后将有机肥料均匀地撒在每个试验单位的中央1 m × 1 m 的区域内,厚度约为5 cm。接着,将有机肥料与土壤混合均匀,然后进行翻耕和平整,使有机肥料与土壤充分接触。 |
|||
|
|||
在猕猴桃生长期间,根据实验设计,对每个试验单位进行了相同的管理和维护。包括定期浇水、除草、松土、修剪等。同时,对每个试验单位进行了相同的采样和分析,以了解土壤微生态指标、猕猴桃产量和品质的变化。 |
@ -0,0 +1,6 @@ |
|||
title = "大自然" |
|||
|
|||
|
|||
b = f"你好,{title}" |
|||
|
|||
print(b) |
@ -0,0 +1,187 @@ |
|||
suppcopu@tersnetz.best----5w9qoEQr----POCELccB2F----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 |
|||
dusomulti@tersnetz.best----aDNO7BcM----dus2OaZ----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 |
|||
bensuba@tersnetz.best----CB2s90DNo----Z2VY4G2----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 |
|||
pokari@tersnetz.best----XKkVzDbnfB----DipqKfn----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 |
|||
paisa@tersnetz.best----UDYF6prHog----SvY63GaE----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 |
|||
nichin@tersnetz.best----kLOEJtYZS----kolaE6oWH9----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 |
|||
saeburficudi@tersnetz.best----7SP9lWnA----JpHWCCNgk----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 |
|||
seubercadi@tersnetz.best----xw3Y4ntl----gSykWdYNBT----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 |
|||
daiguri@tersnetz.best----rdHfvLan----yAre6Ya3----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 |
|||
regakuzuhi@tersnetz.best----SjI0de3NY----3OY1hXrt----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 |
|||
demagvice@tersnetz.best----9xpzk8wCt3----mE1DjozAP----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 |
|||
vilule@tersnetz.best----f9YRSo61s----ycBciuDA----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 |
|||
kokuji@tersnetz.best----IiNfcoZrh----N3MSAzzA----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 |
|||
woiba@tersnetz.best----1UGfuJIYnA----gS3yJekxc4----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 |
|||
terpgerfipasio@tersnetz.best----LV8ITFXwt----5nEDpOLcLQ----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 |
|||
inimep@tersnetz.best----BKOpyYvVqm----dvpRZwD----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 |
|||
ligby@tersnetz.best----y9O75hW4nR----8YhM9TZdr----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 |
|||
guruni@tersnetz.best----mq9lAGye----DruCe1b8----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 |
|||
sylimica@tersnetz.best----mGfSWZkb----fuI8nr7----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 |
|||
shino@tersnetz.best----pGPOj2KfC----hAmMwcV7Vk----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 |
|||
glutorpatwallsket@tersnetz.best----osxXYZAGyJ----MjoYp9wfm1----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 |
|||
causenfors@tersnetz.best----EFxOBnofYm----umcHLmc----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 |
|||
waru@tersnetz.best----aR6tA3vdCl----vaskmpqY----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 |
|||
rikobake@tersnetz.best----hjf7oReFW----OjYJKLMn----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 |
|||
panbancformi@tersnetz.best----DKjavVqN23----oURpmte6m----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 |
|||
reacmecu@tersnetz.best----JPeYc64H----OBh4UyP6qk----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 |
|||
bjorerde@tersnetz.best----mzNnraysu----Bbue3w6D----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 |
|||
golddetarmars@tersnetz.best----YXE3CWUz9D----4zAjuwoHX----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 |
|||
chiraino@tersnetz.best----YDNodsQU----Rd5XK1Vr----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 |
|||
dense@tersnetz.best----yNIag2GUm----rhjEzVm----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 |
|||
confrafab@tersnetz.best----WOaehfloc----8cXwgXIx----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 |
|||
trumtheruvolking@tersnetz.best----UdPX17bys----tcocPmNjLm----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 |
|||
ctivexum@tersnetz.best----3fqU0skQ----F7QwHRSh----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 |
|||
tsukufuri@tersnetz.best----XHY4CSOrie----sVuobsY----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 |
|||
langgemugo@tersnetz.best----Gl23t1zMoS----wpxJrJj7D----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 |
|||
hukssunbcordia@tersnetz.best----rGMLBXZuHl----A1syOOwQXC----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 |
|||
imcutel@tersnetz.best----N0J34mXC----IhYiYZRR----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 |
|||
fasttubedstapo@tersnetz.best----AKeLUZ7Sl----bW7JRmKU----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 |
|||
sonbeau@tersnetz.best----sjkgJ8hdv----zkSEgLRDI----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 |
|||
kitsu@tersnetz.best----UtKNwdAifP----cnz7VcvF----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 |
|||
flavduoda@tersnetz.best----LcfnjQHkoO----KuFG3gIi----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 |
|||
dergduffsket@tersnetz.best----KpVU9hFv1X----vCsa6gJ----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 |
|||
terdena@tersnetz.best----sVO8fUrM7----kOKYgIm----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 |
|||
quilect@tersnetz.best----QVK6TINboy----RkiK55D----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 |
|||
sturavupan@tersnetz.best----dGIXpM5nmE----GzC5Aug26s----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 |
|||
daesaecicom@tersnetz.best----gQr5I0i4q3----OvyaeAy----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 |
|||
billracfi@tersnetz.best----8PzN6TB3i9----XD5IkYoZ----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 |
|||
baachieblogjugg@tersnetz.best----kCD9x8qcE----DhzxEcCJw----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 |
|||
mabtipurpja@tersnetz.best----THetxRiaz----FEFkkRz----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 |
|||
zenpuku@tersnetz.best----W0q14FOJnP----4z77pOeCm----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 |
|||
pikiriha@tersnetz.best----8l5absUI----KSSz4dr----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 |
|||
cunccommos@tersnetz.best----RhCdwaj7----OpoL6FX----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 |
|||
seikake@tersnetz.best----g8fqrkmL----kuHCPxH----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 |
|||
deshi@tersnetz.best----ROblT421----q3hrr1qSZ----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 |
|||
okain@tersnetz.best----0KowvLYHp----lgwCnrQT----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 |
|||
degai@tersnetz.best----5l43u9CrA----KlazC33DrS----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 |
|||
nshimanji@tersnetz.best----RxyM369Xr----F54qcLz----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 |
|||
robanatate@tersnetz.best----HlByp0Z1G----5WUHk7sM----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 |
|||
mishime@tersnetz.best----fzSri4WlY----g8vbRwjnYn----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 |
|||
praninanad@tersnetz.best----gsrVwMbf----3rJx4Ao2VS----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 |
|||
mulsuhita@tersnetz.best----yMN0ZXWb----gU2H8iFCk----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 |
|||
bionakupirs@tersnetz.best----EAuXa6qx1----lSd4VZKzg----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 |
|||
neku@tersnetz.best----P2XtwnLa----usA2ApwEB----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 |
|||
honyu@tersnetz.best----gBxXrPcn----Ql8b6IcrZ----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 |
|||
latyvo@tersnetz.best----SGmqURdn----zAqTZLOzui----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 |
|||
abuto@tersnetz.best----ct9CBRJE----bSxaQeGI----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 |
|||
biconlen@tersnetz.best----09yJBwGQai----sHKTcGv----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 |
|||
raslybe@tersnetz.best----68iZkI7YU----BGXgxiAaJV----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 |
|||
sustcacuna@tersnetz.best----CH24eNyR----nYjc4yjSCT----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 |
|||
wordhealthli@tersnetz.best----NIuJSdzR----WabZUQiOtZ----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 |
|||
momeruoma@tersnetz.best----ezRkGBuQg----1JtY1NHm----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 |
|||
shikiminka@tersnetz.best----DMP0CZ2j----3KPnPBibwJ----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 |
|||
plosinumpel@tersnetz.best----T4vwFyLzg----Us78iwrH----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 |
|||
torcato@tersnetz.best----A7158bEXp----WedQeSI6p----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 |
|||
poscoaniema@tersnetz.best----FPk5eXZ0----2YZ5MHZx4H----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 |
|||
riebo@tersnetz.best----V815QYeNZ----Jdxc1cTaR----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 |
|||
zlatradu@tersnetz.best----ejtdwoUP7----W6KjGnskk----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 |
|||
nujobco@tersnetz.best----AOhCKSn1o----jJ1laVg----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 |
|||
todonahizu@tersnetz.best----XgyPS7JNfo----4QevFd6z----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 |
|||
cetabethei@tersnetz.best----B4tuo9f0k----arUWRjZ6----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 |
|||
tofenci@tersnetz.best----RefaiozD1----PDaZ7B2----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 |
|||
jinna@tersnetz.best----bCjupdci75----nS3gR8T1----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 |
|||
sconilin@tersnetz.best----zPaoiMfjJ----axYMSHydh----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 |
|||
enomai@tersnetz.best----3NIJlUqM----pbdju7Fq8----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 |
|||
senro@tersnetz.best----Xk0B8ltdG----JjtGAWEl----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 |
|||
iltecounro@tersnetz.best----JT2q59vyLg----ywN9iO88c----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 |
|||
itrefu@tersnetz.best----aftKNicH4----AVItDJiBG2----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 |
|||
wakai@tersnetz.best----WlFeJUCsaR----YaYzzOKlqP----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 |
|||
gokoshi@tersnetz.best----RV86XJyp----jt5WFHLVt----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 |
|||
lefwwisibhuntdi@tersnetz.best----OreVGSgI----sbg7im4ps----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 |
|||
rocomvilin@tersnetz.best----AlfviK3dra----4nt2W2rF----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 |
|||
reilogphalac@tersnetz.best----m3LkYClJX----GUmlR3p----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 |
|||
sculidenter@tersnetz.best----tUlAosJXdV----KWBznnO----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 |
|||
mamsa@tersnetz.best----eVUj4g3c----Ab2QltdMhg----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 |
|||
graphictecda@tersnetz.best----erh0nZuI----hsm3DSB----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 |
|||
norekiri@tersnetz.best----dBxAEGtoc----U4kbwU6GHP----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 |
|||
torsrurolis@tersnetz.best----ovThfDl8JO----QK7V9bn8----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 |
|||
gurumarana@tersnetz.best----zkTD90NWHG----Ckt6z85j----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 |
|||
giokett@tersnetz.best----VK0uAHi37----HId8LZxy----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 |
|||
suin@tersnetz.best----fgIxcEVX----mx9Aaa2----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 |
|||
wodohanke@tersnetz.best----bPFSgQDh9----qszZRmsyfV----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 |
|||
taimuemite@tersnetz.best----mYQSrztN----mVdzeEOcsz----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 |
|||
cuico@tersnetz.best----PVyOkW0r9----l2L7DkPc4o----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 |
|||
bicaunocos@tersnetz.best----J5gbQ07mPs----RKxxjqW----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 |
|||
cecafeta@tersnetz.best----Z5KrLsT79----9e1jfyi2ED----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 |
|||
abtotiomil@tersnetz.best----fxegFI3S----1acH8JSy2----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 |
|||
abutin@tersnetz.best----O53BR7k1F----6F3JvapCg----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 |
|||
porliopersan@tersnetz.best----9YNfpI7Ang----ZLMFxUl----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 |
|||
brocwinthocom@tersnetz.best----51fnDpNz----mDyqSsqm8----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 |
|||
banta@tersnetz.best----K8Mmx9ov----yLHsnIIT----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 |
|||
forcaucada@tersnetz.best----q8JGDtTv----Foz6594Z----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 |
|||
ruimanquadna@tersnetz.best----Vbfcp5usd----puAUVk84If----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 |
|||
chingo@tersnetz.best----4wFJyBaCu----RZbfOKqME----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 |
|||
heie@tersnetz.best----zYcBJORnr----p7bzYgwMxx----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 |
|||
genza@tersnetz.best----luaJBP40----k3vygXx----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 |
|||
rabanse@tersnetz.best----NubZKUjy----ymPAm8s73----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 |
|||
naistabidim@tersnetz.best----cmEWhaSx----GOrvLMtaS----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 |
|||
jinshi@tersnetz.best----QKblfGOVZw----8fxWnagX----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 |
|||
compmoperqui@tersnetz.best----UeRm2g9d5W----1cKdgmoI----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 |
|||
faulealiamul@tersnetz.best----0MJeW8EO----cj2dvFXU----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 |
|||
kioto@tersnetz.best----lU7S4ERexn----gV13ioHYh----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 |
|||
ashin@tersnetz.best----aotDG8LsB----Y32jaNE----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 |
|||
biolecpartu@tersnetz.best----DYxPqhrHJ----2KWQbGEGS----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 |
|||
holosaperback@tersnetz.best----69qgaiAG----qpvWxtrlP----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 |
|||
zumeishi@tersnetz.best----XMh3UbO7z----8X8WvxLr----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 |
|||
dietuter@tersnetz.best----WRf64VsTxD----wfjFDrFFu----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 |
|||
missniraga@tersnetz.best----NXbBHk8WQI----b71FlC38----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 |
|||
inmistan@tersnetz.best----BwuVYCWJmA----YH1VU2iQsC----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 |
|||
bergworllidu@tersnetz.best----5BFbjOeI4----N6nOfN3----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 |
|||
keruma@tersnetz.best----ehMG1nTx----arVsSQj----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 |
|||
acnisven@tersnetz.best----J4q9HrKo----Pj7EzUfYT----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 |
|||
kelchace@tersnetz.best----tY1kVjHe----aYJ2njW----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 |
|||
awai@tersnetz.best----S7siMGPCV----Nmp1c1Nk----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 |
|||
sugari@tersnetz.best----HShEdOYprU----qSkanC9kJC----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 |
|||
grytunmatbouhot@tersnetz.best----xfeScnXT1J----3Hof885QP----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 |
|||
sebilucdenlo@tersnetz.best----V3wAWzLPeZ----RK2lneqJ2----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 |
|||
zukika@tersnetz.best----wqB4PdVA----lTmeFVR----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 |
|||
statabingran@tersnetz.best----arHep8fL----xLQujXy----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 |
|||
bildjoundebt@tersnetz.best----l5cPtQmYzo----tAg5U9bkr----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 |
|||
lanacepreai@tersnetz.best----bVKWQXBpv3----GIqo2nz----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 |
|||
kokushina@tersnetz.best----w1jDxe23cq----rHQgkLUg2----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 |
|||
claronince@tersnetz.best----lQpgBuFzwk----drsk7ZeTp----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 |
|||
lustboconvavi@tersnetz.best----XyA7vlpN----2tov7Fi9----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 |
|||
oyabu@tersnetz.best----Rf9MGtl4a----gG8xSqf1----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 |
|||
suguriko@tersnetz.best----eQ2tumyB----FVpGjtQr----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 |
|||
dialitendebe@tersnetz.best----1QvDTk6LE----fEJ8qfa----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 |
|||
kisoa@tersnetz.best----WLEwqpsRH2----jUlIxOPBU----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 |
|||
bokosa@tersnetz.best----baEg3hy9xR----dAYimP5J----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 |
|||
crysinsubmen@tersnetz.best----F4Wn69G7A----NsLoccR5u----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 |
|||
feporvo@tersnetz.best----3hLkfOmdA----1fdyQeu----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 |
|||
mennici@tersnetz.best----CU8aqBbPKD----tpucc87uF----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 |
|||
unnemeli@tersnetz.best----aiSPQOTs----zZD217o----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 |
|||
cusini@tersnetz.best----4mgIb50KjW----gN7uk7UIo----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 |
|||
steepkeebeantwoodcwor@tersnetz.best----iRvBhSqFOX----Y4WaziiS----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 |
|||
geni@tersnetz.best----QU2qnLJSR----keNcmCED----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 |
|||
pyperdoli@tersnetz.best----FVmjYPCASH----UcYetSh7----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 |
|||
inolov@tersnetz.best----OBaiZE4NI----btTCxwzs----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 |
|||
ofen@tersnetz.best----WD68NJKoBx----Jj9HnAA6QG----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 |
|||
issimixmos@tersnetz.best----kdgtIWAx----O9MMnrNBN----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 |
|||
herthotire@tersnetz.best----mPYbLI6pdy----uZPGtLUJ----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 |
|||
tomono@tersnetz.best----ZJQUPCNt----178G8Xw2u----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 |
|||
liepussomu@tersnetz.best----0DXB8yuR----wnaDucodqj----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 |
|||
togara@tersnetz.best----mbLEGV8t----4jBGDXmncu----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 |
|||
tasecnylpputhe@tersnetz.best----20sGYJWQl----L5s5GmFfL----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 |
|||
goriserrisp@tersnetz.best----qaCnKUJf----mrh93NKAei----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 |
|||
nungiotrus@tersnetz.best----mUyxdNoBz4----1FaqrgKyQO----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 |
|||
nuki@tersnetz.best----aOcQN84jn----eE5yr5UB----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 |
|||
slinjan@tersnetz.best----VXWxrEni----9YC4kFK----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 |
|||
deibelsil@tersnetz.best----g8D21sQn----d6rdBoe----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 |
|||
dendhia@nethi.shop----b9XokT1rp----CE14smI----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 |
|||
haemyomusi@nethi.shop----3ovJzfBtO----DvGs9Im----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 |
|||
okumo@nethi.shop----3dDcMbxQa----IGlcvBj----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 |
|||
bozuriko@nethi.shop----W79q53NI----gTEzuh5S----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 |
|||
keefithigh@nethi.shop----KeuSACYm----D1mthaj1MX----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 |
|||
swelmenstrig@nethi.shop----Fpbkiorafg----JRHoMOBG----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 |
|||
porode@nethi.shop----HodnTKIfi9----BSOVaMVzcY----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 |
|||
watchsandllamur@nethi.shop----SLnsXCju----14rTgXRRnA----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 |
|||
rakibaya@nethi.shop----JTye84ZlD----E6MsMvg----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 |
|||
prositin@nethi.shop----Bw9OokM6----DXxj2ReT----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 |
|||
gin@nethi.shop----fpGw5hlHtE----c4ZKdZD----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 |
|||
saythalys@nethi.shop----RUmZ6D0I----4XTfor5T----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 |
|||
mifu@nethi.shop----xHJ6n8y05----FMoYYFx----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 |
|||
flicum@nethi.shop----ovGkuLURZq----27r7t5m----sk-uhviZQFakEdAhaOd43PQT3BlbkFJQmQz3VUgDvOvbKonrskS |
|||
paevul@nethi.shop----Viuk2EKUcp----V2yiKrM----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 |
|||
connieten@nethi.shop----421zLxAsOu----FWkYXQW----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 |
|||
perrezabor@nethi.shop----HhoX0kiEU----4Uw6kC5----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 |
|||
trucdes@nethi.shop----A7n9v0Nt----wbS9wKlvE----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 |
@ -0,0 +1,43 @@ |
|||
import requests |
|||
import json |
|||
|
|||
with open("api_key.txt", "r",) as f: |
|||
a = f.read() |
|||
a = a.split("\n") |
|||
|
|||
api_key_list = [] |
|||
for i in a: |
|||
api_key_list.append(str(i.split("----")[-1])) |
|||
|
|||
for i in api_key_list: |
|||
OPENAI_API_KEY = i |
|||
|
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": "请帮我根据题目为“初中英语写作教学的现状分析及应对策略”生成一个论文目录其中只含有一级标题和二级标题"}, |
|||
|
|||
], |
|||
"temperature": 0.7 |
|||
} |
|||
|
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
print(res) |
|||
print(res["choices"][0]["message"]["content"]) |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,28 @@ |
|||
import requests |
|||
import json |
|||
|
|||
OPENAI_API_KEY = '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' |
|||
|
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": "请帮我改写这句话:在城市发展进程当中,逐渐呈现出一些综合性的大型建筑群。"}, |
|||
{"role": "assistant", "content": "随着城市的发展,综合性大型建筑群正在逐渐出现。"}, |
|||
{"role": "user", "content": "这句话我不满意,再改一下帮我"} |
|||
], |
|||
"temperature": 0.7 |
|||
} |
|||
|
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=1000) |
|||
|
|||
res = response.json() |
|||
print(res) |
|||
print(res["choices"][0]["message"]["content"]) |
@ -0,0 +1,41 @@ |
|||
import json |
|||
import re |
|||
import math |
|||
import numpy as np |
|||
from tqdm import tqdm |
|||
|
|||
|
|||
prompt = "请把“{}”这几个关键字翻译成英文" |
|||
pantten_title = "(.*?)》为题目生成论文摘要,要求生成的字数在" |
|||
|
|||
|
|||
path = "./data/paper_prompt_title_3_1/zhaiyao_chinese_keyword_prompt_data.txt" |
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
# "\n\"请为“", |
|||
text_list = text.split("\n\"请为“") |
|||
data_list = [] |
|||
chinese_keyword_data_list = [] |
|||
|
|||
for text_dan in tqdm(text_list[1:]): |
|||
# print(text_dan) |
|||
try: |
|||
_ , chinese_keyword = text_dan.split("**************") |
|||
except: |
|||
continue |
|||
|
|||
chinese_keyword = str(chinese_keyword).strip("\n") |
|||
|
|||
data_list.append(prompt.format(chinese_keyword)) |
|||
|
|||
import random |
|||
|
|||
random.shuffle(data_list) |
|||
|
|||
with open("./data/chinese_keyword_to_/chinese_keyword_en_prompt.txt", mode="w", encoding="utf-8") as f: |
|||
for i in data_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
|
@ -0,0 +1,201 @@ |
|||
import json |
|||
import re |
|||
import math |
|||
import numpy as np |
|||
from tqdm import tqdm |
|||
|
|||
# pantten_second_biaoti = '[2二ⅡⅠ][、.]\s{0,}?[\u4e00-\u9fa5]+' |
|||
pantten_biaoti = '[1-9一二三四五六七八九ⅠⅡⅢⅣⅤⅥⅦⅧⅨ][、.]\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|||
first_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的大标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
small_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的小标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
thanks = "致谢" |
|||
references = "参考文献" |
|||
excursus = "附录" |
|||
u = 3.5 # 均值μ |
|||
sig = math.sqrt(6.0) |
|||
zong_gradient = 6 |
|||
paper_word_count = 12000 |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
path = "../data/paper_prompt_title_3/title_mulu_prompt_data.txt" |
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
|
|||
def normal_distribution(x): |
|||
y = np.exp(-(x - u) ** 2 / (2 * sig ** 2)) / (math.sqrt(2 * math.pi) * sig) |
|||
return y |
|||
|
|||
text_list = text.split("\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n") |
|||
|
|||
ner_lable = [] |
|||
text_zong = [] |
|||
|
|||
train_list = [] |
|||
|
|||
for text_dan in tqdm(text_list): |
|||
# print(text_dan) |
|||
try: |
|||
title, mulu = text_dan.split("**********************************************") |
|||
except: |
|||
continue |
|||
title = str(title).strip("\n") |
|||
mulu = str(mulu).strip("\n") |
|||
paper_text = "题目:{}@目录:".format(title) |
|||
table_of_contents = [] |
|||
nerlable_list = [] |
|||
|
|||
# mulu_base64 = base64.b64encode(mulu.encode('utf-8')) |
|||
# mulu_path = os.path.join(uuid_path, "mulu.txt") |
|||
# with open(mulu_path, 'wb', encoding='utf8') as f2: |
|||
# f2.write(mulu_base64) |
|||
mulu_list = str(mulu).split("\n") |
|||
mulu_list = [i.strip() for i in mulu_list if i != ""] |
|||
mulu_str = "@".join(mulu_list) |
|||
|
|||
mulu_list_bool = [] |
|||
for i in mulu_list: |
|||
result_biaoti_list = re.findall(pantten_biaoti, i) |
|||
if result_biaoti_list != []: |
|||
mulu_list_bool.append((i, "一级标题")) |
|||
else: |
|||
mulu_list_bool.append((i, "二级标题")) |
|||
|
|||
mulu_list_bool_part = mulu_list_bool[:3] |
|||
|
|||
if mulu_list_bool_part[0][1] != "一级标题": |
|||
continue |
|||
if mulu_list_bool_part[0][1] == mulu_list_bool_part[1][1] == mulu_list_bool_part[2][1] == "一级标题": |
|||
continue |
|||
|
|||
thanks_references_bool_table = mulu_list_bool[-5:] |
|||
|
|||
for i in thanks_references_bool_table: |
|||
try: |
|||
if references in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
if thanks in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
if excursus in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
except: |
|||
|
|||
print(thanks_references_bool_table) |
|||
continue |
|||
|
|||
for i in mulu_list_bool: |
|||
if i[1] == "一级标题": |
|||
paper_dan = { |
|||
"title": "@@" + i[0], |
|||
"small_title": [], |
|||
"word_count": 0 |
|||
} |
|||
table_of_contents.append(paper_dan) |
|||
else: |
|||
table_of_contents[-1]["small_title"].append(i[0]) |
|||
|
|||
x_list = [0] |
|||
y_list = [normal_distribution(0)] |
|||
|
|||
gradient = zong_gradient/len(table_of_contents) |
|||
for i in range(len(table_of_contents)-1): |
|||
x_gradient = x_list[-1] + gradient |
|||
x_list.append(x_gradient) |
|||
y_list.append(normal_distribution(x_list[-1])) |
|||
|
|||
dan_gradient = paper_word_count/sum(y_list) |
|||
|
|||
for i in range(len(y_list)): |
|||
table_of_contents[i]["word_count"] = dan_gradient * y_list[i] |
|||
|
|||
# print(table_of_contents) |
|||
# |
|||
# print(len(table_of_contents)) |
|||
|
|||
table_of_contents_new = [] |
|||
for dabiaoti_index in range(len(table_of_contents)): |
|||
dabiaoti_dict = table_of_contents[dabiaoti_index] |
|||
table_of_contents_new.append([dabiaoti_dict["title"], 0]) |
|||
for xiaobiaoti in dabiaoti_dict["small_title"]: |
|||
table_of_contents_new.append([xiaobiaoti, int(dabiaoti_dict["word_count"]/len(dabiaoti_dict["small_title"]))]) |
|||
|
|||
small_task_list = [] |
|||
content_index = 0 |
|||
while True: |
|||
if content_index == len(table_of_contents_new): |
|||
break |
|||
subtitle, word_count = table_of_contents_new[content_index] |
|||
prompt = small_title_prompt |
|||
|
|||
if content_index == 0 and table_of_contents_new[1][0][:2] == "@@" and subtitle[:2] == "@@": |
|||
subtitle, prompt, word_count = subtitle[2:], first_title_prompt, 800 |
|||
|
|||
if content_index == len(table_of_contents_new) -1 and subtitle[:2] == "@@": |
|||
subtitle, prompt, word_count = subtitle[2:], first_title_prompt, 800 |
|||
|
|||
paper_content = [ |
|||
content_index, |
|||
title, |
|||
mulu, |
|||
subtitle, |
|||
prompt, |
|||
word_count |
|||
] |
|||
|
|||
small_task_list.append(paper_content) |
|||
content_index += 1 |
|||
|
|||
for i in small_task_list: |
|||
if i[3][:2] == "@@": |
|||
continue |
|||
elif i[5] > 1280: |
|||
continue |
|||
else: |
|||
paper_prompt = i[4].format(i[1], i[2], i[3], i[5]) |
|||
if len(paper_prompt) < 768: |
|||
train_list.append(paper_prompt) |
|||
else: |
|||
continue |
|||
|
|||
import random |
|||
|
|||
random.shuffle(train_list) |
|||
|
|||
train_list_shuffle = train_list[:100000] |
|||
with open("../data/title_to_/prompt.txt", mode="w", encoding="utf-8") as f: |
|||
for i in train_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
with open("../data/title_to_/prompt_shuffle.txt", mode="w", encoding="utf-8") as f: |
|||
for i in train_list_shuffle: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
|
|||
# for lable in table_of_contents: |
|||
# text_len = len(paper_text) |
|||
# dan_nerlable = [text_len, text_len + len(lable[0]), lable[1]] |
|||
# nerlable_list.append(dan_nerlable) |
|||
# paper_text += lable[0] |
|||
# paper_text += "@" |
|||
# |
|||
# paper_dan = {"text": paper_text, "label": nerlable_list} |
|||
# |
|||
# ner_lable.append(str(table_of_contents)) |
|||
# text_zong.append(paper_dan) |
|||
# |
|||
# with open("../data/train.txt", mode="w", encoding="utf-8") as f: |
|||
# for i in text_zong: |
|||
# f.write(json.dumps(i, ensure_ascii=False)) |
|||
# f.write("\n") |
|||
# |
|||
# |
|||
# with open("../data/train_lable.txt", mode="w") as f: |
|||
# for i in ner_lable: |
|||
# f.write(json.dumps(i, ensure_ascii=False)) |
|||
# f.write("\n") |
@ -0,0 +1,133 @@ |
|||
import json |
|||
import re |
|||
import math |
|||
import numpy as np |
|||
from tqdm import tqdm |
|||
|
|||
# pantten_second_biaoti = '[2二ⅡⅠ][、.]\s{0,}?[\u4e00-\u9fa5]+' |
|||
pantten_biaoti = '[一二三四五六七八九][、]\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|||
pantten_biaoti_1 = '[1-9].[1-9].[1-9](.*)' |
|||
first_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的大标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
small_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的小标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
mulu_prompt = "请帮我根据题目为“{}”生成一个论文目录其中只含有一级标题和二级标题" |
|||
|
|||
thanks = "致谢" |
|||
references = "参考文献" |
|||
excursus = "附录" |
|||
u = 3.5 # 均值μ |
|||
sig = math.sqrt(6.0) |
|||
zong_gradient = 6 |
|||
paper_word_count = 12000 |
|||
|
|||
|
|||
path = "../data/title.txt" |
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
|
|||
def normal_distribution(x): |
|||
y = np.exp(-(x - u) ** 2 / (2 * sig ** 2)) / (math.sqrt(2 * math.pi) * sig) |
|||
return y |
|||
|
|||
text_list = text.split("\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n") |
|||
|
|||
ner_lable = [] |
|||
text_zong = [] |
|||
|
|||
train_list = [] |
|||
|
|||
for text_dan in tqdm(text_list): |
|||
tiaoguo = False |
|||
# print(text_dan) |
|||
try: |
|||
title, mulu = text_dan.split("**********************************************") |
|||
except: |
|||
continue |
|||
title = str(title).strip("\n") |
|||
mulu = str(mulu).strip("\n") |
|||
paper_text = "题目:{}@目录:".format(title) |
|||
nerlable_list = [] |
|||
|
|||
# mulu_base64 = base64.b64encode(mulu.encode('utf-8')) |
|||
# mulu_path = os.path.join(uuid_path, "mulu.txt") |
|||
# with open(mulu_path, 'wb', encoding='utf8') as f2: |
|||
# f2.write(mulu_base64) |
|||
mulu_list = str(mulu).split("\n") |
|||
mulu_list = [i.strip() for i in mulu_list if i != ""] |
|||
|
|||
mulu_list_bool = [] |
|||
for i in mulu_list: |
|||
result_biaoti_list = re.findall(pantten_biaoti, i) |
|||
if result_biaoti_list != []: |
|||
mulu_list_bool.append((i, "一级标题")) |
|||
else: |
|||
result_biaoti_sanji_list = re.findall(pantten_biaoti, i) |
|||
if result_biaoti_sanji_list != []: |
|||
tiaoguo = True |
|||
break |
|||
else: |
|||
mulu_list_bool.append((i, "二级标题")) |
|||
if tiaoguo == True: |
|||
continue |
|||
|
|||
mulu_list_bool_part = mulu_list_bool[:3] |
|||
|
|||
if mulu_list_bool_part[0][1] != "一级标题": |
|||
continue |
|||
if mulu_list_bool_part[0][1] == mulu_list_bool_part[1][1] == "一级标题": |
|||
continue |
|||
if mulu_list_bool_part[-1][1] == "一级标题": |
|||
continue |
|||
|
|||
thanks_references_bool_table = mulu_list_bool[-5:] |
|||
|
|||
for i in thanks_references_bool_table: |
|||
try: |
|||
if references in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
if thanks in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
if excursus in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
except: |
|||
print(thanks_references_bool_table) |
|||
continue |
|||
|
|||
table_of_contents = [] |
|||
|
|||
for i in mulu_list_bool: |
|||
if i[1] == "一级标题": |
|||
paper_dan = { |
|||
"title": "@@" + i[0], |
|||
"small_title": [i[0]], |
|||
"word_count": 0 |
|||
} |
|||
table_of_contents.append(paper_dan) |
|||
else: |
|||
table_of_contents[-1]["small_title"].append(i[0]) |
|||
|
|||
table_of_contents_new = [] |
|||
for i in table_of_contents: |
|||
a = "\n".join(i["small_title"]) |
|||
table_of_contents_new.append(a) |
|||
|
|||
b = "\n\n".join(table_of_contents_new) |
|||
|
|||
title_p = mulu_prompt.format(title) |
|||
train_list.append({"content": str(title_p), "summary": str(b)}) |
|||
|
|||
print(train_list) |
|||
import random |
|||
|
|||
random.shuffle(train_list) |
|||
|
|||
train_list_shuffle = train_list[:500] |
|||
with open("../data/mulu_prompt.txt", mode="w", encoding="utf-8") as f: |
|||
for i in train_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
with open("../data/mulu_prompt_shuffle.json", mode="w", encoding="utf-8") as f: |
|||
for i in train_list_shuffle: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
@ -0,0 +1,119 @@ |
|||
import time |
|||
import os |
|||
from tqdm import tqdm |
|||
import random |
|||
import requests |
|||
import json |
|||
import threading |
|||
from threading import Thread |
|||
import redis |
|||
|
|||
|
|||
lock = threading.RLock() |
|||
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*') |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
with open("api_key.txt", "r",) as f: |
|||
a = f.read() |
|||
a = a.split("\n") |
|||
|
|||
redis_key_name_openaikey_bad_list = "openaikey_bad_list" |
|||
redis_key_name_openaikey_list = "openaikey_list" |
|||
redis_zirenwu = "redis_zirenwu" |
|||
|
|||
api_key_list = [] |
|||
for i in a: |
|||
api_key_list.append(str(i.split("----")[-1])) |
|||
|
|||
for i in api_key_list: |
|||
redis_.rpush(redis_key_name_openaikey_list, i) |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
file = r'./data/chinese_keyword_to_/chinese_keyword_en_prompt.txt' |
|||
|
|||
zirenwu_list = [] |
|||
with open(file, encoding="utf-8") as f: |
|||
type_prompt = file.split("/")[-1].split(".")[0] |
|||
texts = f.readlines() |
|||
for i in texts: |
|||
zirenwu_list.append((i, type_prompt)) |
|||
|
|||
import random |
|||
random.shuffle(zirenwu_list) |
|||
for i in zirenwu_list: |
|||
redis_.rpush(redis_zirenwu, str(i)) |
|||
|
|||
|
|||
def request_api_chatgpt(api_key, prompt, type_prompt): |
|||
global api_key_list |
|||
global zirenwu_list |
|||
try: |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
|
|||
text = res["choices"][0]["message"]["content"] |
|||
lock.acquire() |
|||
# api_key_list.append(api_key) |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
|
|||
with open("/home/majiahui/mulu_ner/data/paper_prompt_title_3_1_1/{}_data.txt".format(type_prompt), mode="a") as f: |
|||
f.write(prompt) |
|||
f.write("**************") |
|||
f.write(text) |
|||
f.write("\n") |
|||
lock.release() |
|||
time.sleep(5) |
|||
|
|||
except: |
|||
time.sleep(20) |
|||
lock.acquire() |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
redis_.rpush(redis_zirenwu, str((type_prompt, prompt))) |
|||
lock.release() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
while True: |
|||
if redis_.llen(redis_zirenwu) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0: |
|||
lock.acquire() |
|||
api_key = redis_.lpop(redis_key_name_openaikey_list) |
|||
api_key = api_key.decode('UTF-8') |
|||
dan_zirenwu = redis_.lpop(redis_zirenwu) |
|||
dan_zirenwu = dan_zirenwu.decode('UTF-8') |
|||
lock.release() |
|||
# dan_zirenwu = zirenwu_list.pop(0) |
|||
dan_zirenwu = eval(dan_zirenwu) |
|||
prompt, type_prompt = dan_zirenwu[0], dan_zirenwu[1] |
|||
t = Thread(target=request_api_chatgpt, args=(api_key, prompt, type_prompt)) |
|||
t.start() |
|||
elif redis_.llen(redis_key_name_openaikey_list) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
else: |
|||
time.sleep(1) |
|||
continue |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,188 @@ |
|||
import time |
|||
|
|||
from tqdm import tqdm |
|||
import random |
|||
import requests |
|||
import json |
|||
import threading |
|||
from threading import Thread |
|||
import redis |
|||
|
|||
lock = threading.RLock() |
|||
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*') |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
with open("api_key.txt", "r",) as f: |
|||
a = f.read() |
|||
a = a.split("\n") |
|||
|
|||
redis_key_name_openaikey_list = "openaikey_list" |
|||
redis_zirenwu = "redis_zirenwu" |
|||
|
|||
api_key_list = [] |
|||
for i in a: |
|||
api_key_list.append(str(i.split("----")[-1])) |
|||
|
|||
for i in api_key_list: |
|||
redis_.rpush(redis_key_name_openaikey_list, i) |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
prompt_dict = { |
|||
"mulu_prompt": "为论文题目“{}”生成目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题不少于7个;每个一级标题至少包含3个二级标题", |
|||
"beijing_prompt": "以“{}”为论文题目,写一段题目来源的背景,要求字数在200字以内", |
|||
"zongjie_prompt": "以“{}”为论文题目,写一个论文简短总结,要求在300字以内", |
|||
"zongshu_prompt": "请写出以《{}》为课题的国内外研究状况综述,字数在800字左右", |
|||
"yanjiubeijingyiyi_prompt": "请分别写出以《{}》为课题的研究背景和意义,字数不少于1000字", |
|||
"jianjie_prompt": "请帮我生成《{}》为题目的研究内容,包括整体简介和分最少三个方面总结" |
|||
} |
|||
|
|||
with open("./data/题目3.txt", encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
text_list = text.split("\n") |
|||
|
|||
title_list = [] |
|||
for i in text_list: |
|||
title_list.append(i.split("@@@@@")[0]) |
|||
|
|||
random.shuffle(title_list) |
|||
|
|||
print(len(title_list)) |
|||
|
|||
zirenwu_list = [] |
|||
|
|||
for title in title_list: |
|||
for prompt in prompt_dict: |
|||
zirenwu_list.append((prompt, str(prompt_dict[prompt]).format(title))) |
|||
|
|||
for i in zirenwu_list: |
|||
redis_.rpush(redis_zirenwu, str(i)) |
|||
|
|||
# def request_api_chatgpt(api_key, prompt): |
|||
# OPENAI_API_KEY = api_key |
|||
# url = "https://api.openai.com/v1/chat/completions" |
|||
# headers = { |
|||
# "Content-Type": "application/json", |
|||
# "Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
# } |
|||
# data = { |
|||
# "model": "gpt-3.5-turbo", |
|||
# "messages": [ |
|||
# {"role": "user", "content": prompt}, |
|||
# ], |
|||
# "temperature": 0.5 |
|||
# } |
|||
# response = requests.post(url, |
|||
# headers=headers, |
|||
# data=json.dumps(data), |
|||
# timeout=240) |
|||
# print("response", response) |
|||
# |
|||
# return response |
|||
# |
|||
# def task(api_key, title): |
|||
# try: |
|||
# for pormpt_dan in prompt_dict: |
|||
# name = pormpt_dan.split("_")[0] |
|||
# print("pormpt_dan", pormpt_dan) |
|||
# print("prompt_dict", prompt_dict) |
|||
# prompt = str(prompt_dict[pormpt_dan]).format(title) |
|||
# print("api_key", api_key) |
|||
# print("prompt", prompt) |
|||
# response = request_api_chatgpt(api_key, prompt) |
|||
# res = response.json() |
|||
# text = res["choices"][0]["message"]["content"] |
|||
# lock.acquire() |
|||
# |
|||
# with open("/home/majiahui/mulu_ner/data/paper_prompt_title_new/title_{}_data.txt".format(name), mode="a") as f: |
|||
# f.write(title) |
|||
# f.write("\n**********************************************\n") |
|||
# f.write(text) |
|||
# f.write("\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n") |
|||
# lock.release() |
|||
# time.sleep(2) |
|||
# lock.acquire() |
|||
# api_key_list.append(api_key) |
|||
# lock.release() |
|||
# print(1) |
|||
# except: |
|||
# print() |
|||
# time.sleep(5) |
|||
# lock.acquire() |
|||
# api_key_list.append(api_key) |
|||
# lock.release() |
|||
# print(2) |
|||
|
|||
def request_api_chatgpt(api_key, task_type, prompt): |
|||
global api_key_list |
|||
global zirenwu_list |
|||
try: |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
text = res["choices"][0]["message"]["content"] |
|||
lock.acquire() |
|||
# api_key_list.append(api_key) |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
|
|||
with open("/home/majiahui/mulu_ner/data/paper_prompt_title_3/title_{}_data.txt".format(task_type), mode="a") as f: |
|||
f.write(prompt) |
|||
f.write("**************") |
|||
f.write(text) |
|||
f.write("\n") |
|||
lock.release() |
|||
|
|||
except: |
|||
print("task_type_bad", task_type) |
|||
print("api_key_bad", api_key) |
|||
time.sleep(5) |
|||
lock.acquire() |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
redis_.rpush(redis_zirenwu, str((task_type, prompt))) |
|||
lock.release() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
while True: |
|||
if redis_.llen(redis_zirenwu) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0: |
|||
lock.acquire() |
|||
api_key = redis_.lpop(redis_key_name_openaikey_list) |
|||
api_key = api_key.decode('UTF-8') |
|||
dan_zirenwu = redis_.lpop(redis_zirenwu) |
|||
dan_zirenwu = dan_zirenwu.decode('UTF-8') |
|||
lock.release() |
|||
# dan_zirenwu = zirenwu_list.pop(0) |
|||
dan_zirenwu = eval(dan_zirenwu) |
|||
task_type, prompt = dan_zirenwu[0], dan_zirenwu[1] |
|||
t = Thread(target=request_api_chatgpt, args=(api_key, task_type, prompt)) |
|||
t.start() |
|||
elif redis_.llen(redis_key_name_openaikey_list) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
else: |
|||
time.sleep(1) |
|||
continue |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,140 @@ |
|||
import time |
|||
|
|||
from tqdm import tqdm |
|||
import random |
|||
import requests |
|||
import json |
|||
import threading |
|||
from threading import Thread |
|||
import redis |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*') |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
|
|||
with open("api_key.txt", "r",) as f: |
|||
a = f.read() |
|||
a = a.split("\n") |
|||
|
|||
redis_key_name_openaikey_list = "openaikey_list" |
|||
redis_zirenwu = "redis_zirenwu" |
|||
|
|||
api_key_list = [] |
|||
for i in a: |
|||
api_key_list.append(str(i.split("----")[-1])) |
|||
|
|||
for i in api_key_list: |
|||
redis_.rpush(redis_key_name_openaikey_list, i) |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
prompt_dict = { |
|||
"mulu_prompt": "为论文题目《{}》生成目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题不少于7个;每个一级标题至少包含3个二级标题", |
|||
} |
|||
|
|||
with open("./data/题目4_new.txt", encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
text_list = text.split("\n") |
|||
|
|||
title_list = [] |
|||
for i in text_list: |
|||
title_list.append(i.split("@@@@@")[0]) |
|||
|
|||
random.shuffle(title_list) |
|||
|
|||
print(len(title_list)) |
|||
|
|||
zirenwu_list = [] |
|||
|
|||
for title in title_list: |
|||
for prompt in prompt_dict: |
|||
zirenwu_list.append((prompt, str(prompt_dict[prompt]).format(title))) |
|||
|
|||
for i in zirenwu_list: |
|||
redis_.rpush(redis_zirenwu, str(i)) |
|||
|
|||
|
|||
def request_api_chatgpt(api_key, task_type, prompt): |
|||
t1 = time.time() |
|||
global api_key_list |
|||
global zirenwu_list |
|||
try: |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=1200) |
|||
|
|||
res = response.json() |
|||
text = res["choices"][0]["message"]["content"] |
|||
|
|||
# api_key_list.append(api_key) |
|||
t2 = time.time() |
|||
t_n = t2 - t1 |
|||
lock.acquire() |
|||
with open("/home/majiahui/mulu_ner/data/paper_prompt_title_4/title_{}_data.txt".format(task_type), mode="a") as f: |
|||
f.write(prompt) |
|||
f.write("**************") |
|||
f.write(text) |
|||
f.write("\n") |
|||
f.write("=================================================================================================") |
|||
lock.release() |
|||
|
|||
if t_n > 20: |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
else: |
|||
time.sleep(20 - t_n) |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
|
|||
except: |
|||
print("task_type_bad", task_type) |
|||
print("api_key_bad", api_key) |
|||
time.sleep(20) |
|||
lock.acquire() |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
redis_.rpush(redis_zirenwu, str((task_type, prompt))) |
|||
lock.release() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
while True: |
|||
if redis_.llen(redis_zirenwu) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0: |
|||
lock.acquire() |
|||
api_key = redis_.lpop(redis_key_name_openaikey_list) |
|||
api_key = api_key.decode('UTF-8') |
|||
dan_zirenwu = redis_.lpop(redis_zirenwu) |
|||
dan_zirenwu = dan_zirenwu.decode('UTF-8') |
|||
lock.release() |
|||
# dan_zirenwu = zirenwu_list.pop(0) |
|||
dan_zirenwu = eval(dan_zirenwu) |
|||
task_type, prompt = dan_zirenwu[0], dan_zirenwu[1] |
|||
t = Thread(target=request_api_chatgpt, args=(api_key, task_type, prompt)) |
|||
t.start() |
|||
elif redis_.llen(redis_key_name_openaikey_list) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
else: |
|||
time.sleep(1) |
|||
continue |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,93 @@ |
|||
import time |
|||
|
|||
from tqdm import tqdm |
|||
import random |
|||
import requests |
|||
import json |
|||
import threading |
|||
from threading import Thread |
|||
|
|||
|
|||
api_key_list = [ |
|||
"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", |
|||
"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", |
|||
"sk-nYbapOeC5VmSReJB1JgEr3BlbkFJnOo2J9qFJRKXrOSNiYFO", |
|||
"sk-tOy3uBFkPsg9uVWTpDOor3BlbkFJkbXgo0sHAubK8VWyaeso", |
|||
"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", |
|||
"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", |
|||
"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", |
|||
"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", |
|||
"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", |
|||
"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" |
|||
] |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
def request_api_chatgpt(api_key, prompt): |
|||
try: |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
text = res["choices"][0]["message"]["content"] |
|||
|
|||
lock.acquire() |
|||
api_key_list.append(api_key) |
|||
|
|||
|
|||
with open("/home/majiahui/mulu_ner/data/prompt_small_gen.txt", mode="a") as f: |
|||
f.write(prompt) |
|||
f.write("**************") |
|||
f.write(text) |
|||
f.write("\n") |
|||
lock.release() |
|||
|
|||
except: |
|||
time.sleep(5) |
|||
lock.acquire() |
|||
api_key_list.append(api_key) |
|||
lock.release() |
|||
|
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
with open("./data/prompt_shuffle.txt", encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
text_list = text.split("\n") |
|||
|
|||
index = 0 |
|||
while True: |
|||
if index == len(text_list): |
|||
break |
|||
|
|||
if api_key_list == []: |
|||
time.sleep(1) |
|||
continue |
|||
else: |
|||
api_key = api_key_list.pop(0) |
|||
prompt = text_list[index] |
|||
|
|||
t = Thread(target=request_api_chatgpt, args=(api_key, prompt)) |
|||
t.start() |
|||
lock.acquire() |
|||
index += 1 |
|||
print(index) |
|||
lock.release() |
|||
|
|||
|
@ -0,0 +1,125 @@ |
|||
import time |
|||
import os |
|||
from tqdm import tqdm |
|||
import random |
|||
import requests |
|||
import json |
|||
import threading |
|||
from threading import Thread |
|||
import redis |
|||
|
|||
lock = threading.RLock() |
|||
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*') |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
with open("api_key.txt", "r",) as f: |
|||
a = f.read() |
|||
a = a.split("\n") |
|||
|
|||
redis_key_name_openaikey_bad_list = "openaikey_bad_list" |
|||
redis_key_name_openaikey_list = "openaikey_list" |
|||
redis_zirenwu = "redis_zirenwu" |
|||
|
|||
api_key_list = [] |
|||
for i in a: |
|||
api_key_list.append(str(i.split("----")[-1])) |
|||
|
|||
for i in api_key_list: |
|||
redis_.rpush(redis_key_name_openaikey_list, i) |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
file = r'./data/small_title_prompt' |
|||
|
|||
zirenwu_list = [] |
|||
path_list = [] |
|||
for root, dirs, files in os.walk(file): |
|||
for file in files: |
|||
path = os.path.join(root, file) |
|||
path_list.append(path) |
|||
|
|||
for path in path_list: |
|||
with open(path, encoding="utf-8") as f: |
|||
type_prompt = path.split("/")[-1].split(".")[0] |
|||
texts = f.readlines() |
|||
for i in texts: |
|||
zirenwu_list.append((i, type_prompt)) |
|||
|
|||
import random |
|||
random.shuffle(zirenwu_list) |
|||
for i in zirenwu_list: |
|||
redis_.rpush(redis_zirenwu, str(i)) |
|||
|
|||
|
|||
def request_api_chatgpt(api_key, prompt, type_prompt): |
|||
global api_key_list |
|||
global zirenwu_list |
|||
try: |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
|
|||
text = res["choices"][0]["message"]["content"] |
|||
lock.acquire() |
|||
# api_key_list.append(api_key) |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
|
|||
with open("/home/majiahui/mulu_ner/data/paper_prompt_title_3_1/{}_data.txt".format(type_prompt), mode="a") as f: |
|||
f.write(prompt) |
|||
f.write("**************") |
|||
f.write(text) |
|||
f.write("\n") |
|||
lock.release() |
|||
time.sleep(5) |
|||
|
|||
except: |
|||
time.sleep(20) |
|||
lock.acquire() |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
redis_.rpush(redis_zirenwu, str((type_prompt, prompt))) |
|||
lock.release() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
while True: |
|||
if redis_.llen(redis_zirenwu) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0: |
|||
lock.acquire() |
|||
api_key = redis_.lpop(redis_key_name_openaikey_list) |
|||
api_key = api_key.decode('UTF-8') |
|||
dan_zirenwu = redis_.lpop(redis_zirenwu) |
|||
dan_zirenwu = dan_zirenwu.decode('UTF-8') |
|||
lock.release() |
|||
# dan_zirenwu = zirenwu_list.pop(0) |
|||
dan_zirenwu = eval(dan_zirenwu) |
|||
prompt, type_prompt = dan_zirenwu[0], dan_zirenwu[1] |
|||
t = Thread(target=request_api_chatgpt, args=(api_key, prompt, type_prompt)) |
|||
t.start() |
|||
elif redis_.llen(redis_key_name_openaikey_list) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
else: |
|||
time.sleep(1) |
|||
continue |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,126 @@ |
|||
import time |
|||
import os |
|||
from tqdm import tqdm |
|||
import random |
|||
import requests |
|||
import json |
|||
import threading |
|||
from threading import Thread |
|||
import redis |
|||
import uuid |
|||
|
|||
lock = threading.RLock() |
|||
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*') |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
with open("api_key.txt", "r",) as f: |
|||
a = f.read() |
|||
a = a.split("\n") |
|||
|
|||
redis_key_name_openaikey_bad_list = "openaikey_bad_list" |
|||
redis_key_name_openaikey_list = "openaikey_list" |
|||
redis_zirenwu = "redis_zirenwu" |
|||
|
|||
api_key_list = [] |
|||
for i in a: |
|||
api_key_list.append(str(i.split("----")[-1])) |
|||
|
|||
for i in api_key_list: |
|||
redis_.rpush(redis_key_name_openaikey_list, i) |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
file = 'data/title_mulu_to_/small_title_prompt_2_10000_40000.txt' |
|||
|
|||
zirenwu_list = [] |
|||
|
|||
with open(file, encoding="utf-8") as f: |
|||
type_prompt = file.split("/")[-1].split(".")[0] |
|||
texts = f.read() |
|||
texts_list = texts.split("\n") |
|||
for i in texts_list: |
|||
zirenwu_list.append((i, type_prompt)) |
|||
|
|||
import random |
|||
random.shuffle(zirenwu_list) |
|||
for i in zirenwu_list: |
|||
redis_.rpush(redis_zirenwu, str(i)) |
|||
|
|||
|
|||
def request_api_chatgpt(api_key, prompt, type_prompt): |
|||
id_ = uuid.uuid1() |
|||
try: |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
|
|||
text = res["choices"][0]["message"]["content"] |
|||
lock.acquire() |
|||
# api_key_list.append(api_key) |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
path = f"/home/majiahui/mulu_ner/data/paper_prompt_title_3_2_10000_40000/{type_prompt}/" |
|||
bool_ = os.path.exists(path) |
|||
if bool_ == False: |
|||
os.makedirs(path) |
|||
|
|||
with open(path + f"/{id_}_data.txt", mode="w") as f: |
|||
f.write("@@@@@@@@@@@@@@@@@@") |
|||
f.write(prompt) |
|||
f.write("**************") |
|||
f.write(text) |
|||
f.write("\n") |
|||
f.close() |
|||
lock.release() |
|||
time.sleep(5) |
|||
|
|||
except: |
|||
time.sleep(20) |
|||
lock.acquire() |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
redis_.rpush(redis_zirenwu, str((type_prompt, prompt))) |
|||
lock.release() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
while True: |
|||
if redis_.llen(redis_zirenwu) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0: |
|||
lock.acquire() |
|||
api_key = redis_.lpop(redis_key_name_openaikey_list) |
|||
api_key = api_key.decode('UTF-8') |
|||
dan_zirenwu = redis_.lpop(redis_zirenwu) |
|||
dan_zirenwu = dan_zirenwu.decode('UTF-8') |
|||
lock.release() |
|||
# dan_zirenwu = zirenwu_list.pop(0) |
|||
dan_zirenwu = eval(dan_zirenwu) |
|||
prompt, type_prompt = dan_zirenwu[0], dan_zirenwu[1] |
|||
t = Thread(target=request_api_chatgpt, args=(api_key, prompt, type_prompt)) |
|||
t.start() |
|||
elif redis_.llen(redis_key_name_openaikey_list) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
else: |
|||
time.sleep(1) |
|||
continue |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,125 @@ |
|||
import time |
|||
import os |
|||
from tqdm import tqdm |
|||
import random |
|||
import requests |
|||
import json |
|||
import threading |
|||
from threading import Thread |
|||
import redis |
|||
import uuid |
|||
|
|||
lock = threading.RLock() |
|||
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*') |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
with open("api_key.txt", "r",) as f: |
|||
a = f.read() |
|||
a = a.split("\n") |
|||
|
|||
redis_key_name_openaikey_bad_list = "openaikey_bad_list" |
|||
redis_key_name_openaikey_list = "openaikey_list" |
|||
redis_zirenwu = "redis_zirenwu" |
|||
|
|||
api_key_list = [] |
|||
for i in a: |
|||
api_key_list.append(str(i.split("----")[-1])) |
|||
|
|||
for i in api_key_list: |
|||
redis_.rpush(redis_key_name_openaikey_list, i) |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
file = r'./data/title_mulu_to_/zhaiyao_prompt.txt' |
|||
|
|||
zirenwu_list = [] |
|||
|
|||
with open(file, encoding="utf-8") as f: |
|||
type_prompt = file.split("/")[-1].split(".")[0] |
|||
texts = f.readlines() |
|||
for i in texts: |
|||
zirenwu_list.append((i, type_prompt)) |
|||
|
|||
import random |
|||
random.shuffle(zirenwu_list) |
|||
for i in zirenwu_list: |
|||
redis_.rpush(redis_zirenwu, str(i)) |
|||
|
|||
|
|||
def request_api_chatgpt(api_key, prompt, type_prompt): |
|||
id_ = uuid.uuid1() |
|||
try: |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
|
|||
text = res["choices"][0]["message"]["content"] |
|||
lock.acquire() |
|||
# api_key_list.append(api_key) |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
path = f"/home/majiahui/mulu_ner/data/paper_prompt_title_3_2/{type_prompt}/" |
|||
bool_ = os.path.exists(path) |
|||
if bool_ == False: |
|||
os.makedirs(path) |
|||
|
|||
with open(path + f"/{id_}_data.txt", mode="w") as f: |
|||
f.write("@@@@@@@@@@@@@@@@@@") |
|||
f.write(prompt) |
|||
f.write("**************") |
|||
f.write(text) |
|||
f.write("\n") |
|||
f.close() |
|||
lock.release() |
|||
time.sleep(5) |
|||
|
|||
except: |
|||
time.sleep(20) |
|||
lock.acquire() |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
redis_.rpush(redis_zirenwu, str((type_prompt, prompt))) |
|||
lock.release() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
while True: |
|||
if redis_.llen(redis_zirenwu) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0: |
|||
lock.acquire() |
|||
api_key = redis_.lpop(redis_key_name_openaikey_list) |
|||
api_key = api_key.decode('UTF-8') |
|||
dan_zirenwu = redis_.lpop(redis_zirenwu) |
|||
dan_zirenwu = dan_zirenwu.decode('UTF-8') |
|||
lock.release() |
|||
# dan_zirenwu = zirenwu_list.pop(0) |
|||
dan_zirenwu = eval(dan_zirenwu) |
|||
prompt, type_prompt = dan_zirenwu[0], dan_zirenwu[1] |
|||
t = Thread(target=request_api_chatgpt, args=(api_key, prompt, type_prompt)) |
|||
t.start() |
|||
elif redis_.llen(redis_key_name_openaikey_list) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
else: |
|||
time.sleep(1) |
|||
continue |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,125 @@ |
|||
import time |
|||
import os |
|||
from tqdm import tqdm |
|||
import random |
|||
import requests |
|||
import json |
|||
import threading |
|||
from threading import Thread |
|||
import redis |
|||
import uuid |
|||
|
|||
lock = threading.RLock() |
|||
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*') |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
with open("api_key.txt", "r",) as f: |
|||
a = f.read() |
|||
a = a.split("\n") |
|||
|
|||
redis_key_name_openaikey_bad_list = "openaikey_bad_list" |
|||
redis_key_name_openaikey_list = "openaikey_list" |
|||
redis_zirenwu = "redis_zirenwu" |
|||
|
|||
api_key_list = [] |
|||
for i in a: |
|||
api_key_list.append(str(i.split("----")[-1])) |
|||
|
|||
for i in api_key_list: |
|||
redis_.rpush(redis_key_name_openaikey_list, i) |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
file = r'./data/title_mulu_to_/small_title_prompt_shuffle_2.txt' |
|||
|
|||
zirenwu_list = [] |
|||
|
|||
with open(file, encoding="utf-8") as f: |
|||
type_prompt = file.split("/")[-1].split(".")[0] |
|||
texts = f.readlines() |
|||
for i in texts: |
|||
zirenwu_list.append((i, type_prompt)) |
|||
|
|||
import random |
|||
random.shuffle(zirenwu_list) |
|||
for i in zirenwu_list: |
|||
redis_.rpush(redis_zirenwu, str(i)) |
|||
|
|||
|
|||
def request_api_chatgpt(api_key, prompt, type_prompt): |
|||
id_ = uuid.uuid1() |
|||
try: |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
|
|||
text = res["choices"][0]["message"]["content"] |
|||
lock.acquire() |
|||
# api_key_list.append(api_key) |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
path = f"/home/majiahui/mulu_ner/data/paper_prompt_title_3_2/{type_prompt}/" |
|||
bool_ = os.path.exists(path) |
|||
if bool_ == False: |
|||
os.makedirs(path) |
|||
|
|||
with open(path + f"/{id_}_data.txt", mode="w") as f: |
|||
f.write("@@@@@@@@@@@@@@@@@@") |
|||
f.write(prompt) |
|||
f.write("**************") |
|||
f.write(text) |
|||
f.write("\n") |
|||
f.close() |
|||
lock.release() |
|||
time.sleep(5) |
|||
|
|||
except: |
|||
time.sleep(20) |
|||
lock.acquire() |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
redis_.rpush(redis_zirenwu, str((type_prompt, prompt))) |
|||
lock.release() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
while True: |
|||
if redis_.llen(redis_zirenwu) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0: |
|||
lock.acquire() |
|||
api_key = redis_.lpop(redis_key_name_openaikey_list) |
|||
api_key = api_key.decode('UTF-8') |
|||
dan_zirenwu = redis_.lpop(redis_zirenwu) |
|||
dan_zirenwu = dan_zirenwu.decode('UTF-8') |
|||
lock.release() |
|||
# dan_zirenwu = zirenwu_list.pop(0) |
|||
dan_zirenwu = eval(dan_zirenwu) |
|||
prompt, type_prompt = dan_zirenwu[0], dan_zirenwu[1] |
|||
t = Thread(target=request_api_chatgpt, args=(api_key, prompt, type_prompt)) |
|||
t.start() |
|||
elif redis_.llen(redis_key_name_openaikey_list) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
else: |
|||
time.sleep(1) |
|||
continue |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,71 @@ |
|||
import json |
|||
import re |
|||
import math |
|||
import numpy as np |
|||
from tqdm import tqdm |
|||
|
|||
|
|||
task_book_main_content_prompt = "请根据题目为《{}》,和研究内容为“{}”总结出至少6点本篇论文应完成的主要内容,使用阿拉伯数字排列" |
|||
pantten_title = "(.*?)》为题目的研究内容,包括整体简介和分最少三个方面总结" |
|||
|
|||
|
|||
path = "./data/paper_prompt_title_3/title_jianjie_prompt_data.txt" |
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
|
|||
text_list = text.split("请帮我生成《") |
|||
|
|||
data_list = [] |
|||
chinese_keyword_data_list = [] |
|||
|
|||
for text_dan in tqdm(text_list): |
|||
# print(text_dan) |
|||
try: |
|||
title_prompt, jianjie = text_dan.split("**************") |
|||
except: |
|||
continue |
|||
result_biaoti_list = re.findall(pantten_title, title_prompt) |
|||
try: |
|||
result_biaoti_list[0] |
|||
except: |
|||
print(title_prompt) |
|||
continue |
|||
|
|||
title = str(result_biaoti_list[0]).strip("\n") |
|||
jianjie = str(jianjie).strip("\n") |
|||
|
|||
data_list.append(task_book_main_content_prompt.format(title, jianjie)) |
|||
|
|||
import random |
|||
|
|||
random.shuffle(data_list) |
|||
|
|||
with open("./data/jianjie_to_/task_book_prompt.txt", mode="w", encoding="utf-8") as f: |
|||
for i in data_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
|
|||
# for lable in table_of_contents: |
|||
# text_len = len(paper_text) |
|||
# dan_nerlable = [text_len, text_len + len(lable[0]), lable[1]] |
|||
# nerlable_list.append(dan_nerlable) |
|||
# paper_text += lable[0] |
|||
# paper_text += "@" |
|||
# |
|||
# paper_dan = {"text": paper_text, "label": nerlable_list} |
|||
# |
|||
# ner_lable.append(str(table_of_contents)) |
|||
# text_zong.append(paper_dan) |
|||
# |
|||
# with open("../data/train.txt", mode="w", encoding="utf-8") as f: |
|||
# for i in text_zong: |
|||
# f.write(json.dumps(i, ensure_ascii=False)) |
|||
# f.write("\n") |
|||
# |
|||
# |
|||
# with open("../data/train_lable.txt", mode="w") as f: |
|||
# for i in ner_lable: |
|||
# f.write(json.dumps(i, ensure_ascii=False)) |
|||
# f.write("\n") |
@ -0,0 +1,16 @@ |
|||
# This is a sample Python script. |
|||
|
|||
# Press Shift+F10 to execute it or replace it with your code. |
|||
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. |
|||
|
|||
|
|||
def print_hi(name): |
|||
# Use a breakpoint in the code line below to debug your script. |
|||
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint. |
|||
|
|||
|
|||
# Press the green button in the gutter to run the script. |
|||
if __name__ == '__main__': |
|||
print_hi('PyCharm') |
|||
|
|||
# See PyCharm help at https://www.jetbrains.com/help/pycharm/ |
@ -0,0 +1,221 @@ |
|||
import json |
|||
import re |
|||
import math |
|||
import numpy as np |
|||
from tqdm import tqdm |
|||
|
|||
# pantten_second_biaoti = '[2二ⅡⅠ][、.]\s{0,}?[\u4e00-\u9fa5]+' |
|||
pantten_biaoti = '[1-9一二三四五六七八九ⅠⅡⅢⅣⅤⅥⅦⅧⅨ][、.]\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|||
first_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的大标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
small_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的小标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
references_prompt = "论文题目是“{}”,目录是“{}”,请为这篇论文生成15篇左右的参考文献,要求其中有有中文参考文献不低于12篇,英文参考文献不低于2篇" |
|||
thanks = "致谢" |
|||
references = "参考文献" |
|||
excursus = "附录" |
|||
u = 3.5 # 均值μ |
|||
sig = math.sqrt(6.0) |
|||
zong_gradient = 6 |
|||
paper_word_count = 12000 |
|||
pantten_title = "(.*?)”生成目录,要求只有一级标题和二级标题," |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
path = "./data/paper_prompt_title_3/title_mulu_prompt_data.txt" |
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
|
|||
def normal_distribution(x): |
|||
y = np.exp(-(x - u) ** 2 / (2 * sig ** 2)) / (math.sqrt(2 * math.pi) * sig) |
|||
return y |
|||
|
|||
text_list = text.split("为论文题目“") |
|||
|
|||
ner_lable = [] |
|||
text_zong = [] |
|||
|
|||
train_list = [] |
|||
train_references_list = [] |
|||
|
|||
for text_dan in tqdm(text_list): |
|||
# print(text_dan) |
|||
try: |
|||
title_prompt, mulu = text_dan.split("**************") |
|||
except: |
|||
continue |
|||
result_biaoti_list = re.findall(pantten_title, title_prompt) |
|||
try: |
|||
result_biaoti_list[0] |
|||
except: |
|||
print(title_prompt) |
|||
continue |
|||
|
|||
title = str(result_biaoti_list[0]).strip("\n") |
|||
mulu = str(mulu).strip("\n") |
|||
|
|||
# 生成参考文件的提示文本 |
|||
train_references_list.append(references_prompt.format(title, mulu)) |
|||
|
|||
paper_text = "题目:{}@目录:".format(title) |
|||
table_of_contents = [] |
|||
nerlable_list = [] |
|||
|
|||
# mulu_base64 = base64.b64encode(mulu.encode('utf-8')) |
|||
# mulu_path = os.path.join(uuid_path, "mulu.txt") |
|||
# with open(mulu_path, 'wb', encoding='utf8') as f2: |
|||
# f2.write(mulu_base64) |
|||
mulu_list = str(mulu).split("\n") |
|||
mulu_list = [i.strip() for i in mulu_list if i != ""] |
|||
mulu_str = "@".join(mulu_list) |
|||
|
|||
mulu_list_bool = [] |
|||
for i in mulu_list: |
|||
result_biaoti_list = re.findall(pantten_biaoti, i) |
|||
if result_biaoti_list != []: |
|||
mulu_list_bool.append((i, "一级标题")) |
|||
else: |
|||
mulu_list_bool.append((i, "二级标题")) |
|||
|
|||
mulu_list_bool_part = mulu_list_bool[:3] |
|||
|
|||
if mulu_list_bool_part[0][1] != "一级标题": |
|||
continue |
|||
if mulu_list_bool_part[0][1] == mulu_list_bool_part[1][1] == mulu_list_bool_part[2][1] == "一级标题": |
|||
continue |
|||
|
|||
thanks_references_bool_table = mulu_list_bool[-5:] |
|||
|
|||
for i in thanks_references_bool_table: |
|||
try: |
|||
if references in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
if thanks in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
if excursus in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
except: |
|||
|
|||
print(thanks_references_bool_table) |
|||
continue |
|||
|
|||
for i in mulu_list_bool: |
|||
if i[1] == "一级标题": |
|||
paper_dan = { |
|||
"title": "@@" + i[0], |
|||
"small_title": [], |
|||
"word_count": 0 |
|||
} |
|||
table_of_contents.append(paper_dan) |
|||
else: |
|||
table_of_contents[-1]["small_title"].append(i[0]) |
|||
|
|||
x_list = [0] |
|||
y_list = [normal_distribution(0)] |
|||
|
|||
gradient = zong_gradient/len(table_of_contents) |
|||
for i in range(len(table_of_contents)-1): |
|||
x_gradient = x_list[-1] + gradient |
|||
x_list.append(x_gradient) |
|||
y_list.append(normal_distribution(x_list[-1])) |
|||
|
|||
dan_gradient = paper_word_count/sum(y_list) |
|||
|
|||
for i in range(len(y_list)): |
|||
table_of_contents[i]["word_count"] = dan_gradient * y_list[i] |
|||
|
|||
# print(table_of_contents) |
|||
# |
|||
# print(len(table_of_contents)) |
|||
|
|||
table_of_contents_new = [] |
|||
for dabiaoti_index in range(len(table_of_contents)): |
|||
dabiaoti_dict = table_of_contents[dabiaoti_index] |
|||
table_of_contents_new.append([dabiaoti_dict["title"], 0]) |
|||
for xiaobiaoti in dabiaoti_dict["small_title"]: |
|||
table_of_contents_new.append([xiaobiaoti, int(dabiaoti_dict["word_count"]/len(dabiaoti_dict["small_title"]))]) |
|||
|
|||
small_task_list = [] |
|||
content_index = 0 |
|||
while True: |
|||
if content_index == len(table_of_contents_new): |
|||
break |
|||
subtitle, word_count = table_of_contents_new[content_index] |
|||
prompt = small_title_prompt |
|||
|
|||
if content_index == 0 and table_of_contents_new[1][0][:2] == "@@" and subtitle[:2] == "@@": |
|||
subtitle, prompt, word_count = subtitle[2:], first_title_prompt, 800 |
|||
|
|||
if content_index == len(table_of_contents_new) -1 and subtitle[:2] == "@@": |
|||
subtitle, prompt, word_count = subtitle[2:], first_title_prompt, 800 |
|||
|
|||
paper_content = [ |
|||
content_index, |
|||
title, |
|||
mulu, |
|||
subtitle, |
|||
prompt, |
|||
word_count |
|||
] |
|||
|
|||
small_task_list.append(paper_content) |
|||
content_index += 1 |
|||
|
|||
for i in small_task_list: |
|||
if i[3][:2] == "@@": |
|||
continue |
|||
elif i[5] > 1280: |
|||
continue |
|||
else: |
|||
paper_prompt = i[4].format(i[1], i[2], i[3], i[5]) |
|||
if len(paper_prompt) < 768: |
|||
train_list.append(paper_prompt) |
|||
else: |
|||
continue |
|||
|
|||
import random |
|||
|
|||
random.shuffle(train_list) |
|||
|
|||
train_list_shuffle = train_list[:10000] |
|||
|
|||
with open("./data/title_mulu_to_/references_prompt.txt", mode="w", encoding="utf-8") as f: |
|||
for i in train_references_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
with open("./data/title_mulu_to_/small_title_prompt.txt", mode="w", encoding="utf-8") as f: |
|||
for i in train_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
with open("./data/title_mulu_to_/small_title_prompt_shuffle.txt", mode="w", encoding="utf-8") as f: |
|||
for i in train_list_shuffle: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
|
|||
# for lable in table_of_contents: |
|||
# text_len = len(paper_text) |
|||
# dan_nerlable = [text_len, text_len + len(lable[0]), lable[1]] |
|||
# nerlable_list.append(dan_nerlable) |
|||
# paper_text += lable[0] |
|||
# paper_text += "@" |
|||
# |
|||
# paper_dan = {"text": paper_text, "label": nerlable_list} |
|||
# |
|||
# ner_lable.append(str(table_of_contents)) |
|||
# text_zong.append(paper_dan) |
|||
# |
|||
# with open("../data/train.txt", mode="w", encoding="utf-8") as f: |
|||
# for i in text_zong: |
|||
# f.write(json.dumps(i, ensure_ascii=False)) |
|||
# f.write("\n") |
|||
# |
|||
# |
|||
# with open("../data/train_lable.txt", mode="w") as f: |
|||
# for i in ner_lable: |
|||
# f.write(json.dumps(i, ensure_ascii=False)) |
|||
# f.write("\n") |
@ -0,0 +1,210 @@ |
|||
import json |
|||
import re |
|||
import math |
|||
import numpy as np |
|||
from tqdm import tqdm |
|||
|
|||
# pantten_second_biaoti = '[2二ⅡⅠ][、.]\s{0,}?[\u4e00-\u9fa5]+' |
|||
pantten_biaoti = '[1-9一二三四五六七八九ⅠⅡⅢⅣⅤⅥⅦⅧⅨ][、.]\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|||
first_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的大标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
small_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的小标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
thanks = "致谢" |
|||
references = "参考文献" |
|||
excursus = "附录" |
|||
u = 3.5 # 均值μ |
|||
sig = math.sqrt(6.0) |
|||
zong_gradient = 6 |
|||
paper_word_count = 12000 |
|||
pantten_title = "(.*?)”生成目录,要求只有一级标题和二级标题," |
|||
|
|||
|
|||
|
|||
path = "./data/paper_prompt_title_4/title_mulu_prompt_data.txt" |
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
|
|||
def normal_distribution(x): |
|||
y = np.exp(-(x - u) ** 2 / (2 * sig ** 2)) / (math.sqrt(2 * math.pi) * sig) |
|||
return y |
|||
|
|||
text_list = text.split("为论文题目“") |
|||
|
|||
ner_lable = [] |
|||
text_zong = [] |
|||
|
|||
train_list = [] |
|||
train_references_list = [] |
|||
|
|||
for text_dan in tqdm(text_list): |
|||
# print(text_dan) |
|||
try: |
|||
title_prompt, mulu = text_dan.split("**************") |
|||
except: |
|||
continue |
|||
result_biaoti_list = re.findall(pantten_title, title_prompt) |
|||
try: |
|||
result_biaoti_list[0] |
|||
except: |
|||
print(title_prompt) |
|||
continue |
|||
|
|||
title = str(result_biaoti_list[0]).strip("\n") |
|||
mulu = str(mulu).strip("\n") |
|||
|
|||
# 生成参考文件的提示文本 |
|||
|
|||
table_of_contents = [] |
|||
nerlable_list = [] |
|||
|
|||
# mulu_base64 = base64.b64encode(mulu.encode('utf-8')) |
|||
# mulu_path = os.path.join(uuid_path, "mulu.txt") |
|||
# with open(mulu_path, 'wb', encoding='utf8') as f2: |
|||
# f2.write(mulu_base64) |
|||
mulu_list = str(mulu).split("\n") |
|||
mulu_list = [i.strip() for i in mulu_list if i != ""] |
|||
mulu_str = "@".join(mulu_list) |
|||
|
|||
mulu_list_bool = [] |
|||
for i in mulu_list: |
|||
result_biaoti_list = re.findall(pantten_biaoti, i) |
|||
if result_biaoti_list != []: |
|||
mulu_list_bool.append((i, "一级标题")) |
|||
else: |
|||
mulu_list_bool.append((i, "二级标题")) |
|||
|
|||
mulu_list_bool_part = mulu_list_bool[:3] |
|||
|
|||
if mulu_list_bool_part[0][1] != "一级标题": |
|||
continue |
|||
if mulu_list_bool_part[0][1] == mulu_list_bool_part[1][1] == mulu_list_bool_part[2][1] == "一级标题": |
|||
continue |
|||
|
|||
thanks_references_bool_table = mulu_list_bool[-5:] |
|||
|
|||
for i in thanks_references_bool_table: |
|||
try: |
|||
if references in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
if thanks in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
if excursus in i[0]: |
|||
mulu_list_bool.remove(i) |
|||
except: |
|||
|
|||
print(thanks_references_bool_table) |
|||
continue |
|||
|
|||
for i in mulu_list_bool: |
|||
if i[1] == "一级标题": |
|||
paper_dan = { |
|||
"title": "@@" + i[0], |
|||
"small_title": [], |
|||
"word_count": 0 |
|||
} |
|||
table_of_contents.append(paper_dan) |
|||
else: |
|||
table_of_contents[-1]["small_title"].append(i[0]) |
|||
|
|||
x_list = [0] |
|||
y_list = [normal_distribution(0)] |
|||
|
|||
gradient = zong_gradient/len(table_of_contents) |
|||
for i in range(len(table_of_contents)-1): |
|||
x_gradient = x_list[-1] + gradient |
|||
x_list.append(x_gradient) |
|||
y_list.append(normal_distribution(x_list[-1])) |
|||
|
|||
dan_gradient = paper_word_count/sum(y_list) |
|||
|
|||
for i in range(len(y_list)): |
|||
table_of_contents[i]["word_count"] = dan_gradient * y_list[i] |
|||
|
|||
# print(table_of_contents) |
|||
# |
|||
# print(len(table_of_contents)) |
|||
|
|||
table_of_contents_new = [] |
|||
for dabiaoti_index in range(len(table_of_contents)): |
|||
dabiaoti_dict = table_of_contents[dabiaoti_index] |
|||
table_of_contents_new.append([dabiaoti_dict["title"], 0]) |
|||
for xiaobiaoti in dabiaoti_dict["small_title"]: |
|||
# table_of_contents_new.append([xiaobiaoti, int(dabiaoti_dict["word_count"]/len(dabiaoti_dict["small_title"]))]) |
|||
table_of_contents_new.append([xiaobiaoti, 1500]) |
|||
|
|||
small_task_list = [] |
|||
content_index = 0 |
|||
while True: |
|||
if content_index == len(table_of_contents_new): |
|||
break |
|||
subtitle, word_count = table_of_contents_new[content_index] |
|||
prompt = small_title_prompt |
|||
|
|||
if content_index == 0 and table_of_contents_new[1][0][:2] == "@@" and subtitle[:2] == "@@": |
|||
subtitle, prompt, word_count = subtitle[2:], first_title_prompt, 800 |
|||
|
|||
if content_index == len(table_of_contents_new) -1 and subtitle[:2] == "@@": |
|||
subtitle, prompt, word_count = subtitle[2:], first_title_prompt, 800 |
|||
|
|||
paper_content = [ |
|||
content_index, |
|||
title, |
|||
mulu, |
|||
subtitle, |
|||
prompt, |
|||
word_count |
|||
] |
|||
|
|||
small_task_list.append(paper_content) |
|||
content_index += 1 |
|||
|
|||
for i in small_task_list: |
|||
if i[3][:2] == "@@": |
|||
continue |
|||
else: |
|||
paper_prompt = i[4].format(i[1], i[2], i[3], i[5]) |
|||
if len(paper_prompt) < 768: |
|||
train_list.append(paper_prompt) |
|||
else: |
|||
continue |
|||
|
|||
import random |
|||
|
|||
random.shuffle(train_list) |
|||
|
|||
train_list_shuffle = train_list[:10000] |
|||
|
|||
|
|||
with open("./data/title_mulu_to_/small_title_prompt_4.txt", mode="w", encoding="utf-8") as f: |
|||
for i in train_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
with open("./data/title_mulu_to_/small_title_prompt_shuffle_4.txt", mode="w", encoding="utf-8") as f: |
|||
for i in train_list_shuffle: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
|
|||
# for lable in table_of_contents: |
|||
# text_len = len(paper_text) |
|||
# dan_nerlable = [text_len, text_len + len(lable[0]), lable[1]] |
|||
# nerlable_list.append(dan_nerlable) |
|||
# paper_text += lable[0] |
|||
# paper_text += "@" |
|||
# |
|||
# paper_dan = {"text": paper_text, "label": nerlable_list} |
|||
# |
|||
# ner_lable.append(str(table_of_contents)) |
|||
# text_zong.append(paper_dan) |
|||
# |
|||
# with open("../data/train.txt", mode="w", encoding="utf-8") as f: |
|||
# for i in text_zong: |
|||
# f.write(json.dumps(i, ensure_ascii=False)) |
|||
# f.write("\n") |
|||
# |
|||
# |
|||
# with open("../data/train_lable.txt", mode="w") as f: |
|||
# for i in ner_lable: |
|||
# f.write(json.dumps(i, ensure_ascii=False)) |
|||
# f.write("\n") |
@ -0,0 +1,68 @@ |
|||
import json |
|||
import re |
|||
import math |
|||
import numpy as np |
|||
from tqdm import tqdm |
|||
|
|||
# pantten_second_biaoti = '[2二ⅡⅠ][、.]\s{0,}?[\u4e00-\u9fa5]+' |
|||
pantten_biaoti = '[1-9一二三四五六七八九ⅠⅡⅢⅣⅤⅥⅦⅧⅨ][、.]\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|||
zhaiyao_prompt = "论文题目是“{}”,目录是“{}”,生成论文摘要,要求生成的字数在600字左右" |
|||
thanks = "致谢" |
|||
references = "参考文献" |
|||
excursus = "附录" |
|||
u = 3.5 # 均值μ |
|||
sig = math.sqrt(6.0) |
|||
zong_gradient = 6 |
|||
paper_word_count = 12000 |
|||
pantten_title = "(.*?)”生成目录,要求只有一级标题和二级标题," |
|||
|
|||
|
|||
|
|||
path = "./data/paper_prompt_title_3/title_mulu_prompt_data.txt" |
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
|
|||
def normal_distribution(x): |
|||
y = np.exp(-(x - u) ** 2 / (2 * sig ** 2)) / (math.sqrt(2 * math.pi) * sig) |
|||
return y |
|||
|
|||
text_list = text.split("为论文题目“") |
|||
|
|||
ner_lable = [] |
|||
text_zong = [] |
|||
|
|||
train_list = [] |
|||
train_references_list = [] |
|||
|
|||
for text_dan in tqdm(text_list): |
|||
# print(text_dan) |
|||
try: |
|||
title_prompt, mulu = text_dan.split("**************") |
|||
except: |
|||
continue |
|||
result_biaoti_list = re.findall(pantten_title, title_prompt) |
|||
try: |
|||
result_biaoti_list[0] |
|||
except: |
|||
print(title_prompt) |
|||
continue |
|||
|
|||
title = str(result_biaoti_list[0]).strip("\n") |
|||
mulu = str(mulu).strip("\n") |
|||
paper_prompt = zhaiyao_prompt.format(title, mulu) |
|||
train_list.append(paper_prompt) |
|||
|
|||
|
|||
import random |
|||
|
|||
random.shuffle(train_list) |
|||
|
|||
train_list_shuffle = train_list[:10000] |
|||
|
|||
|
|||
with open("./data/title_mulu_to_/zhaiyao_prompt.txt", mode="w", encoding="utf-8") as f: |
|||
for i in train_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
@ -0,0 +1,12 @@ |
|||
import json |
|||
from tqdm import tqdm |
|||
|
|||
dataset = [] |
|||
data_path = "data/chatglm_dev_3_prompt.json" |
|||
with open(data_path, "r", encoding="utf-8") as fh: |
|||
for i, line in enumerate(fh): |
|||
# print(line) |
|||
sample = json.loads(line.strip()) |
|||
dataset.append( |
|||
{"input": sample["prompt"] + sample["query"], "answer": sample["response"]}) |
|||
print(dataset) |
@ -0,0 +1,225 @@ |
|||
#! -*- coding: utf-8 -*- |
|||
# 用CRF做中文命名实体识别 |
|||
# 数据集 http://s3.bmio.net/kashgari/china-people-daily-ner-corpus.tar.gz |
|||
# 实测验证集的F1可以到96.48%,测试集的F1可以到95.38% |
|||
#分配显存 |
|||
import os |
|||
|
|||
# os.environ["TF_KERAS"] = '1' |
|||
import numpy as np |
|||
from bert4keras.backend import keras, K |
|||
from bert4keras.models import build_transformer_model |
|||
from bert4keras.tokenizers import Tokenizer |
|||
from bert4keras.optimizers import Adam |
|||
from bert4keras.snippets import sequence_padding, DataGenerator |
|||
from bert4keras.snippets import open, ViterbiDecoder, to_array |
|||
from bert4keras.layers import ConditionalRandomField |
|||
from keras.layers import Dense |
|||
from keras.models import Model |
|||
from tqdm import tqdm |
|||
import tensorflow as tf |
|||
|
|||
# physical_devices = tf.config.list_physical_devices('GPU') |
|||
# for gpu_instance in physical_devices: |
|||
# tf.config.experimental.set_memory_growth(gpu_instance, True) |
|||
|
|||
maxlen = 128 |
|||
epochs = 10 |
|||
batch_size = 16 |
|||
bert_layers = 12 |
|||
learning_rate = 2e-5 # bert_layers越小,学习率应该要越大 |
|||
crf_lr_multiplier = 1000 # 必要时扩大CRF层的学习率 |
|||
categories = set() |
|||
|
|||
# bert配置 |
|||
config_path = r'./premodels/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_config.json' |
|||
checkpoint_path = r'./premodels/chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_model.ckpt' |
|||
dict_path = r'./premodels/chinese_roberta_wwm_ext_L-12_H-768_A-12/vocab.txt' |
|||
|
|||
|
|||
def load_data(filename): |
|||
"""加载数据 |
|||
单条格式:[text, (start, end, label), (start, end, label), ...], |
|||
意味着text[start:end + 1]是类型为label的实体。 |
|||
""" |
|||
D = [] |
|||
with open(filename, encoding='utf-8') as f: |
|||
f = f.read() |
|||
for l in f.split('\n\n'): |
|||
if not l: |
|||
continue |
|||
d = [''] |
|||
for i, c in enumerate(l.split('\n')): |
|||
try: |
|||
char, flag = c.split(' ') |
|||
except: |
|||
continue |
|||
d[0] += char |
|||
if flag[0] == 'B': |
|||
d.append([i, i, flag[2:]]) |
|||
categories.add(flag[2:]) |
|||
elif flag[0] == 'I': |
|||
d[-1][1] = i |
|||
D.append(d) |
|||
return D |
|||
|
|||
|
|||
# 标注数据 |
|||
train_data = load_data('./data/说话人_ner/train.txt') |
|||
valid_data = load_data('./data/说话人_ner/dev.txt') |
|||
test_data = load_data('./data/说话人_ner/test.txt') |
|||
|
|||
categories = list(sorted(categories)) |
|||
print(categories) |
|||
|
|||
# 建立分词器 |
|||
tokenizer = Tokenizer(dict_path, do_lower_case=True) |
|||
|
|||
|
|||
class data_generator(DataGenerator): |
|||
"""数据生成器 |
|||
""" |
|||
def __iter__(self, random=False): |
|||
batch_token_ids, batch_segment_ids, batch_labels = [], [], [] |
|||
for is_end, d in self.sample(random): |
|||
tokens = tokenizer.tokenize(d[0], maxlen=maxlen) |
|||
mapping = tokenizer.rematch(d[0], tokens) |
|||
start_mapping = {j[0]: i for i, j in enumerate(mapping) if j} |
|||
end_mapping = {j[-1]: i for i, j in enumerate(mapping) if j} |
|||
token_ids = tokenizer.tokens_to_ids(tokens) |
|||
segment_ids = [0] * len(token_ids) |
|||
labels = np.zeros(len(token_ids)) |
|||
for start, end, label in d[1:]: |
|||
if start in start_mapping and end in end_mapping: |
|||
start = start_mapping[start] |
|||
end = end_mapping[end] |
|||
labels[start] = categories.index(label) * 2 + 1 |
|||
labels[start + 1:end + 1] = categories.index(label) * 2 + 2 |
|||
batch_token_ids.append(token_ids) |
|||
batch_segment_ids.append(segment_ids) |
|||
batch_labels.append(labels) |
|||
if len(batch_token_ids) == self.batch_size or is_end: |
|||
batch_token_ids = sequence_padding(batch_token_ids) |
|||
batch_segment_ids = sequence_padding(batch_segment_ids) |
|||
batch_labels = sequence_padding(batch_labels) |
|||
yield [batch_token_ids, batch_segment_ids], batch_labels |
|||
batch_token_ids, batch_segment_ids, batch_labels = [], [], [] |
|||
|
|||
|
|||
""" |
|||
后面的代码使用的是bert类型的模型,如果你用的是albert,那么前几行请改为: |
|||
model = build_transformer_model( |
|||
config_path, |
|||
checkpoint_path, |
|||
model='albert', |
|||
) |
|||
output_layer = 'Transformer-FeedForward-Norm' |
|||
output = model.get_layer(output_layer).get_output_at(bert_layers - 1) |
|||
""" |
|||
|
|||
model = build_transformer_model( |
|||
config_path, |
|||
checkpoint_path, |
|||
) |
|||
|
|||
output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1) |
|||
output = model.get_layer(output_layer).output |
|||
output = Dense(len(categories) * 2 + 1)(output) |
|||
CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) |
|||
output = CRF(output) |
|||
|
|||
model = Model(model.input, output) |
|||
model.summary() |
|||
|
|||
model.compile( |
|||
loss=CRF.sparse_loss, |
|||
optimizer=Adam(learning_rate), |
|||
metrics=[CRF.sparse_accuracy] |
|||
) |
|||
|
|||
|
|||
class NamedEntityRecognizer(ViterbiDecoder): |
|||
"""命名实体识别器 |
|||
""" |
|||
def recognize(self, text): |
|||
tokens = tokenizer.tokenize(text, maxlen=512) |
|||
mapping = tokenizer.rematch(text, tokens) |
|||
token_ids = tokenizer.tokens_to_ids(tokens) |
|||
segment_ids = [0] * len(token_ids) |
|||
token_ids, segment_ids = to_array([token_ids], [segment_ids]) |
|||
nodes = model.predict([token_ids, segment_ids])[0] |
|||
labels = self.decode(nodes) |
|||
entities, starting = [], False |
|||
for i, label in enumerate(labels): |
|||
if label > 0: |
|||
if label % 2 == 1: |
|||
starting = True |
|||
entities.append([[i], categories[(label - 1) // 2]]) |
|||
elif starting: |
|||
entities[-1][0].append(i) |
|||
else: |
|||
starting = False |
|||
else: |
|||
starting = False |
|||
return [(mapping[w[0]][0], mapping[w[-1]][-1], l) for w, l in entities] |
|||
|
|||
|
|||
NER = NamedEntityRecognizer(trans=K.eval(CRF.trans), starts=[0], ends=[0]) |
|||
|
|||
|
|||
def evaluate(data): |
|||
"""评测函数 |
|||
""" |
|||
X, Y, Z = 1e-10, 1e-10, 1e-10 |
|||
for d in tqdm(data, ncols=100): |
|||
R = set(NER.recognize(d[0])) |
|||
T = set([tuple(i) for i in d[1:]]) |
|||
X += len(R & T) |
|||
Y += len(R) |
|||
Z += len(T) |
|||
f1, precision, recall = 2 * X / (Y + Z), X / Y, X / Z |
|||
return f1, precision, recall |
|||
|
|||
|
|||
class Evaluator(keras.callbacks.Callback): |
|||
"""评估与保存 |
|||
""" |
|||
def __init__(self): |
|||
self.best_val_f1 = 0 |
|||
|
|||
def on_epoch_end(self, epoch, logs=None): |
|||
trans = K.eval(CRF.trans) |
|||
NER.trans = trans |
|||
print(NER.trans) |
|||
f1, precision, recall = evaluate(valid_data) |
|||
# 保存最优 |
|||
if f1 >= self.best_val_f1: |
|||
self.best_val_f1 = f1 |
|||
model.save_weights('./models_result_crf_shuohuaren/best_model.weights') |
|||
print( |
|||
'valid: f1: %.5f, precision: %.5f, recall: %.5f, best f1: %.5f\n' % |
|||
(f1, precision, recall, self.best_val_f1) |
|||
) |
|||
f1, precision, recall = evaluate(test_data) |
|||
print( |
|||
'test: f1: %.5f, precision: %.5f, recall: %.5f\n' % |
|||
(f1, precision, recall) |
|||
) |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
|
|||
evaluator = Evaluator() |
|||
train_generator = data_generator(train_data, batch_size) |
|||
|
|||
model.fit( |
|||
train_generator.forfit(), |
|||
steps_per_epoch=len(train_generator), |
|||
epochs=epochs, |
|||
callbacks=[evaluator] |
|||
) |
|||
|
|||
else: |
|||
|
|||
model.load_weights('./best_model.weights') |
|||
NER.trans = K.eval(CRF.trans) |
File diff suppressed because it is too large
@ -0,0 +1,86 @@ |
|||
import json |
|||
import re |
|||
import math |
|||
import numpy as np |
|||
from tqdm import tqdm |
|||
|
|||
|
|||
prompt = "请把“{}”这段文字翻译成英文" |
|||
chinese_keyword_prompt = "请为“{}”这段论文摘要生成3-5个关键字,使用阿拉伯数字作为序号标注,例如“1.xxx \n2.xxx \n3.xxx \n4.xxx \n5.xxx \n”" |
|||
pantten_title = "(.*?)》为题目生成论文摘要,要求生成的字数在" |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
path = "./data/paper_prompt_title_3/title_zhaiyao_prompt_data.txt" |
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
|
|||
text_list = text.split("请以《") |
|||
|
|||
data_list = [] |
|||
chinese_keyword_data_list = [] |
|||
|
|||
for text_dan in tqdm(text_list): |
|||
# print(text_dan) |
|||
try: |
|||
title_prompt, zhaiyao = text_dan.split("**************") |
|||
except: |
|||
continue |
|||
result_biaoti_list = re.findall(pantten_title, title_prompt) |
|||
try: |
|||
result_biaoti_list[0] |
|||
except: |
|||
print(title_prompt) |
|||
continue |
|||
|
|||
title = str(result_biaoti_list[0]).strip("\n") |
|||
zhaiyao = str(zhaiyao).strip("\n") |
|||
|
|||
data_list.append(prompt.format(zhaiyao)) |
|||
chinese_keyword_data_list.append(chinese_keyword_prompt.format(zhaiyao)) |
|||
|
|||
|
|||
import random |
|||
|
|||
random.shuffle(data_list) |
|||
|
|||
with open("./data/zhaiyao_to_/zhaiyao_fanyi_prompt.txt", mode="w", encoding="utf-8") as f: |
|||
for i in data_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
random.shuffle(chinese_keyword_data_list) |
|||
|
|||
with open("./data/zhaiyao_to_/zhaiyao_chinese_keyword_prompt.txt", mode="w", encoding="utf-8") as f: |
|||
for i in chinese_keyword_data_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
|
|||
|
|||
# for lable in table_of_contents: |
|||
# text_len = len(paper_text) |
|||
# dan_nerlable = [text_len, text_len + len(lable[0]), lable[1]] |
|||
# nerlable_list.append(dan_nerlable) |
|||
# paper_text += lable[0] |
|||
# paper_text += "@" |
|||
# |
|||
# paper_dan = {"text": paper_text, "label": nerlable_list} |
|||
# |
|||
# ner_lable.append(str(table_of_contents)) |
|||
# text_zong.append(paper_dan) |
|||
# |
|||
# with open("../data/train.txt", mode="w", encoding="utf-8") as f: |
|||
# for i in text_zong: |
|||
# f.write(json.dumps(i, ensure_ascii=False)) |
|||
# f.write("\n") |
|||
# |
|||
# |
|||
# with open("../data/train_lable.txt", mode="w") as f: |
|||
# for i in ner_lable: |
|||
# f.write(json.dumps(i, ensure_ascii=False)) |
|||
# f.write("\n") |
@ -0,0 +1,158 @@ |
|||
import time |
|||
|
|||
from tqdm import tqdm |
|||
import random |
|||
import requests |
|||
import json |
|||
import threading |
|||
from threading import Thread |
|||
import redis |
|||
import re |
|||
|
|||
def is_contains_chinese(strs): |
|||
for _char in strs: |
|||
if '\u4e00' <= _char <= '\u9fa5': |
|||
return True |
|||
return False |
|||
|
|||
lock = threading.RLock() |
|||
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=11, password='Zhicheng123*') |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
with open("api_key.txt", "r",) as f: |
|||
a = f.read() |
|||
a = a.split("\n") |
|||
|
|||
pantten_title = "《(.*)》" |
|||
redis_key_name_openaikey_list = "openaikey_list" |
|||
redis_zirenwu = "redis_zirenwu" |
|||
|
|||
api_key_list = [] |
|||
for i in a: |
|||
api_key_list.append(str(i.split("----")[-1])) |
|||
|
|||
for i in api_key_list: |
|||
redis_.rpush(redis_key_name_openaikey_list, i) |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
prompt_dict = { |
|||
"mulu_prompt": "以“{}”为论文的生成方向,为论文题目为“{}”生成目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题不少于7个;每个一级标题至少包含3个二级标题", |
|||
"beijing_prompt": "以“{}”为论文题目,以“{}”为论文的生成方向,写一段题目来源的背景,要求字数在200字以内", |
|||
"zongjie_prompt": "以“{}”为论文题目,以“{}”为论文的生成方向,写一个论文简短总结,要求在300字以内", |
|||
"zongshu_prompt": "以《{}》为课题,以“{}”为论文的生成方向,请写出这篇论文的国内外研究状况综述,字数在800字左右", |
|||
"yanjiubeijingyiyi_prompt": "请分别写出以《{}》为课题的研究背景和意义,字数不少于1000字", |
|||
"jianjie_prompt": "请帮我生成《{}》为题目的研究内容,包括整体简介和分最少三个方面总结" |
|||
} |
|||
|
|||
with open("./data/paper_prompt_title_3/title_zhuyaoneirong_prompt_data.txt", encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
text_list = text.split("\n") |
|||
|
|||
title_list = [] |
|||
for i in text_list: |
|||
if "**************" in i: |
|||
title_list.append(i.split("**************")[1]) |
|||
|
|||
random.shuffle(title_list) |
|||
|
|||
print(len(title_list)) |
|||
|
|||
zirenwu_list = [] |
|||
|
|||
|
|||
for text in title_list: |
|||
bool_ = is_contains_chinese(text) |
|||
if bool_ == False: |
|||
continue |
|||
if "》:" not in text: |
|||
continue |
|||
text = text.strip("\"").strip("“").strip("”") |
|||
result_biaoti_list = re.findall(pantten_title, text) |
|||
try: |
|||
title = result_biaoti_list[0] |
|||
except: |
|||
continue |
|||
hexinnrirong = text.split("》:")[1] |
|||
for prompt in prompt_dict: |
|||
zirenwu_list.append((prompt, str(prompt_dict[prompt]).format(title, hexinnrirong))) |
|||
|
|||
|
|||
for i in zirenwu_list: |
|||
redis_.rpush(redis_zirenwu, str(i)) |
|||
|
|||
|
|||
def request_api_chatgpt(api_key, task_type, prompt): |
|||
global api_key_list |
|||
global zirenwu_list |
|||
try: |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
text = res["choices"][0]["message"]["content"] |
|||
lock.acquire() |
|||
# api_key_list.append(api_key) |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
|
|||
with open("/home/majiahui/mulu_ner/data/paper_prompt_title_hexin_3/title_{}_data.txt".format(task_type), mode="a") as f: |
|||
f.write("@@@@@@@@@@@@@@@@@@@@@@@") |
|||
f.write(prompt) |
|||
f.write("**************") |
|||
f.write(text) |
|||
f.write("\n") |
|||
lock.release() |
|||
|
|||
except: |
|||
print("task_type_bad", task_type) |
|||
print("api_key_bad", api_key) |
|||
time.sleep(5) |
|||
lock.acquire() |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
redis_.rpush(redis_zirenwu, str((task_type, prompt))) |
|||
lock.release() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
while True: |
|||
if redis_.llen(redis_zirenwu) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0: |
|||
lock.acquire() |
|||
api_key = redis_.lpop(redis_key_name_openaikey_list) |
|||
api_key = api_key.decode('UTF-8') |
|||
dan_zirenwu = redis_.lpop(redis_zirenwu) |
|||
dan_zirenwu = dan_zirenwu.decode('UTF-8') |
|||
lock.release() |
|||
# dan_zirenwu = zirenwu_list.pop(0) |
|||
dan_zirenwu = eval(dan_zirenwu) |
|||
task_type, prompt = dan_zirenwu[0], dan_zirenwu[1] |
|||
t = Thread(target=request_api_chatgpt, args=(api_key, task_type, prompt)) |
|||
t.start() |
|||
elif redis_.llen(redis_key_name_openaikey_list) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
else: |
|||
time.sleep(1) |
|||
continue |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,155 @@ |
|||
import time |
|||
|
|||
from tqdm import tqdm |
|||
import random |
|||
import requests |
|||
import json |
|||
import threading |
|||
from threading import Thread |
|||
import redis |
|||
import re |
|||
|
|||
def is_contains_chinese(strs): |
|||
for _char in strs: |
|||
if '\u4e00' <= _char <= '\u9fa5': |
|||
return True |
|||
return False |
|||
|
|||
lock = threading.RLock() |
|||
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=11, password='Zhicheng123*') |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
with open("api_key.txt", "r",) as f: |
|||
a = f.read() |
|||
a = a.split("\n") |
|||
|
|||
pantten_title = "《(.*)》" |
|||
redis_key_name_openaikey_list = "openaikey_list" |
|||
redis_zirenwu = "redis_zirenwu" |
|||
|
|||
api_key_list = [] |
|||
for i in a: |
|||
api_key_list.append(str(i.split("----")[-1])) |
|||
|
|||
for i in api_key_list: |
|||
redis_.rpush(redis_key_name_openaikey_list, i) |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
prompt_dict = { |
|||
"mulu_prompt": "论文题目为“{}”,以“{}”为论文的生成方向,为论文生成目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题不少于7个;每个一级标题至少包含3个二级标题", |
|||
"yanjiubeijingyiyi_prompt": "请分别写出以《{}》为课题,以“{}”为论文的生成方向,生成论文的研究背景和意义,字数不少于1000字", |
|||
"jianjie_prompt": "请帮我生成《{}》为题目,以“{}”为论文的生成方向,生成论文研究内容,包括整体简介和分最少三个方面总结" |
|||
} |
|||
|
|||
with open("./data/paper_prompt_title_3/title_zhuyaoneirong_prompt_data.txt", encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
text_list = text.split("\n") |
|||
|
|||
title_list = [] |
|||
for i in text_list: |
|||
if "**************" in i: |
|||
title_list.append(i.split("**************")[1]) |
|||
|
|||
random.shuffle(title_list) |
|||
|
|||
print(len(title_list)) |
|||
|
|||
zirenwu_list = [] |
|||
|
|||
|
|||
for text in title_list: |
|||
bool_ = is_contains_chinese(text) |
|||
if bool_ == False: |
|||
continue |
|||
if "》:" not in text: |
|||
continue |
|||
text = text.strip("\"").strip("“").strip("”") |
|||
result_biaoti_list = re.findall(pantten_title, text) |
|||
try: |
|||
title = result_biaoti_list[0] |
|||
except: |
|||
continue |
|||
hexinnrirong = text.split("》:")[1] |
|||
for prompt in prompt_dict: |
|||
zirenwu_list.append((prompt, str(prompt_dict[prompt]).format(title, hexinnrirong))) |
|||
|
|||
|
|||
for i in zirenwu_list: |
|||
redis_.rpush(redis_zirenwu, str(i)) |
|||
|
|||
|
|||
def request_api_chatgpt(api_key, task_type, prompt): |
|||
global api_key_list |
|||
global zirenwu_list |
|||
try: |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
text = res["choices"][0]["message"]["content"] |
|||
lock.acquire() |
|||
# api_key_list.append(api_key) |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
|
|||
with open("/home/majiahui/mulu_ner/data/paper_prompt_title_hexin_3/title_{}_data.txt".format(task_type), mode="a") as f: |
|||
f.write("@@@@@@@@@@@@@@@@@@@@@@@") |
|||
f.write(prompt) |
|||
f.write("**************") |
|||
f.write(text) |
|||
f.write("\n") |
|||
lock.release() |
|||
|
|||
except: |
|||
print("task_type_bad", task_type) |
|||
print("api_key_bad", api_key) |
|||
time.sleep(5) |
|||
lock.acquire() |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
redis_.rpush(redis_zirenwu, str((task_type, prompt))) |
|||
lock.release() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
while True: |
|||
if redis_.llen(redis_zirenwu) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0: |
|||
lock.acquire() |
|||
api_key = redis_.lpop(redis_key_name_openaikey_list) |
|||
api_key = api_key.decode('UTF-8') |
|||
dan_zirenwu = redis_.lpop(redis_zirenwu) |
|||
dan_zirenwu = dan_zirenwu.decode('UTF-8') |
|||
lock.release() |
|||
# dan_zirenwu = zirenwu_list.pop(0) |
|||
dan_zirenwu = eval(dan_zirenwu) |
|||
task_type, prompt = dan_zirenwu[0], dan_zirenwu[1] |
|||
t = Thread(target=request_api_chatgpt, args=(api_key, task_type, prompt)) |
|||
t.start() |
|||
elif redis_.llen(redis_key_name_openaikey_list) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
else: |
|||
time.sleep(1) |
|||
continue |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,16 @@ |
|||
import json |
|||
|
|||
data = [] |
|||
with open("data/title_mulu_to_/small_title_prompt_2.txt", encoding="utf-8") as f: |
|||
texts = f.read() |
|||
texts_list = texts.split("\n") |
|||
for i in texts_list: |
|||
data_dan = i.strip("\"").strip("\n").strip("\"").strip("\"") |
|||
data.append(data_dan) |
|||
|
|||
|
|||
data_new = data[10000:40000] |
|||
with open("./data/title_mulu_to_/small_title_prompt_2_10000_40000.txt", mode="w", encoding="utf-8") as f: |
|||
for i in data_new: |
|||
f.write(i) |
|||
f.write("\n") |
@ -0,0 +1,23 @@ |
|||
import json |
|||
from pathlib import Path |
|||
|
|||
data = [] |
|||
|
|||
with open('data/small_title_train.json', encoding="utf-8") as fp: |
|||
lines = fp.readlines() |
|||
for i in lines: |
|||
data.append(json.loads(i)) |
|||
|
|||
|
|||
with open('data/mulu_prompt_shuffle.json', encoding="utf-8") as fp: |
|||
lines = fp.readlines() |
|||
for i in lines: |
|||
data.append(json.loads(i)) |
|||
|
|||
import random |
|||
random.shuffle(data) |
|||
|
|||
with open("data/train_paper.json", mode="w", encoding="utf-8") as f: |
|||
for i in data: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
@ -0,0 +1,17 @@ |
|||
first_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的大标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
small_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的小标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
references_prompt = "论文题目是“{}”,目录是“{}”,请为这篇论文生成15篇左右的参考文献,要求其中有有中文参考文献不低于12篇,英文参考文献不低于2篇" |
|||
prompt = "请把“{}”这几个关键字翻译成英文" |
|||
prompt = "请把“{}”这段文字翻译成英文" |
|||
chinese_keyword_prompt = "请为“{}”这段论文摘要生成3-5个关键字,使用阿拉伯数字作为序号标注,例如“1.xxx \n2.xxx \n3.xxx \n4.xxx \n5.xxx \n”" |
|||
|
|||
|
|||
zhuyaoneirong_prompt" = “《{}》:研制一款基于单片机的多功能充电控制器,包括硬件和软件设计。最终成果是研制一台可对多种类型蓄电池充电的控制器实物,并以一个特定蓄电池充电为例,验证所设计控制器的可用性”,以上面话术为标准。根据论文题目为“我国护理硕士专业学位研究生课程体系的现状研究”生成这种格式的一段话,要求100个字左右", |
|||
mulu_prompt" = 为论文题目“{}”生成目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题不少于7个;每个一级标题至少包含3个二级标题", |
|||
zhaiyao_prompt" = 请以《{}》为题目生成论文摘要,要求生成的字数在600字左右", |
|||
beijing_prompt" = 以“{}”为论文题目,写一段题目来源的背景,要求字数在200字以内", |
|||
zongjie_prompt" = 以“{}”为论文题目,写一个论文简短总结,要求在300字以内", |
|||
zongshu_prompt" = 请写出以《{}》为课题的国内外研究状况综述,字数在800字左右", |
|||
yanjiubeijingyiyi_prompt" = 请分别写出以《{}》为课题的研究背景和意义,字数不少于1000字", |
|||
jianjie_prompt" = 请帮我生成《{}》为题目的研究内容,包括整体简介和分最少三个方面总结" |
|||
|
@ -0,0 +1,32 @@ |
|||
import os |
|||
from tqdm import tqdm |
|||
import re |
|||
|
|||
|
|||
patten = "目录是“(.*)”,请把其中的" |
|||
p0 = "@@@@@@@@@@@@@@@@@@" |
|||
p1 = "补充内容字数在1500字左右" |
|||
p2 = "**************" |
|||
data_path_list = [] |
|||
for root,dirs,files in os.walk(r"./data/paper_prompt_title_3_2/zhaiyao_prompt"): |
|||
for file in files: |
|||
#获取文件路径 |
|||
data_path_list.append(os.path.join(root,file)) |
|||
|
|||
print(data_path_list) |
|||
|
|||
jishu = 0 |
|||
data_str = "" |
|||
for i in tqdm(data_path_list): |
|||
dayin = False |
|||
with open(i, encoding="utf-8") as f: |
|||
data_dan = f.read() |
|||
data_str += data_dan |
|||
data_str += "\n" |
|||
|
|||
|
|||
print(jishu) |
|||
print(data_str) |
|||
with open("./data/paper_prompt_title_3_1/title_mulu_zhaiyao_data.txt", mode="w", encoding="utf-8") as f: |
|||
f.write(data_str) |
|||
|
@ -0,0 +1,108 @@ |
|||
import os |
|||
import re |
|||
import random |
|||
import json |
|||
from tqdm import tqdm |
|||
|
|||
RE_CHINA_NUMS = "[一二三四五六七八九].?.?总结|[1-9].?.?总结|[一二三四五六七八九].?.?结论|[1-9].?.?结论" |
|||
RE_CHINA_TITLE = "请把其中的小标题“(.*?)”的内容补充完整|请把其中的大标题“(.*?)”的内容补充完整" |
|||
|
|||
data_tongji = { |
|||
"0-600": 0, |
|||
"600-1500": 0, |
|||
"1500-": 0, |
|||
} |
|||
# print("这段文字翻译成英文"\n'") |
|||
data_tongji_prompt = [] |
|||
|
|||
def is_contains_chinese(strs): |
|||
for _char in strs: |
|||
if '\u4e00' <= _char <= '\u9fa5': |
|||
return True |
|||
return False |
|||
|
|||
data_list = [] |
|||
|
|||
jishu = 0 |
|||
|
|||
with open("data/chatglm_paper_data_2_prompt.txt", encoding="utf-8") as f: |
|||
for i in tqdm(f): |
|||
data_dan = eval(i) |
|||
zishu_query = len(data_dan["query"]) |
|||
zishu_response = len(data_dan["response"]) |
|||
|
|||
prompt = str(data_dan["prompt"]).replace("\\n", "\n") |
|||
query = data_dan["query"].replace("\\n", "\n") |
|||
response = data_dan["response"].replace("\\n", "\n") |
|||
|
|||
if prompt == "翻译摘要#": |
|||
zishu_summary = len(response.split(" ")) |
|||
elif prompt == "翻译关键词#": |
|||
zishu_summary = len(response.split(" ")) |
|||
else: |
|||
bool_ = is_contains_chinese(response) |
|||
if bool_ == False: |
|||
print(data_dan) |
|||
continue |
|||
|
|||
if "生成方向" in query: |
|||
query = query.replace("生成方向","研究方向") |
|||
if "生成方向" in response: |
|||
response = response.replace("生成方向", "研究方向") |
|||
|
|||
if prompt == "生成论文小标题内容#": |
|||
query_re = re.findall(RE_CHINA_TITLE, query) |
|||
if "总结" not in query_re[0] or "结论" not in query_re[0]: |
|||
response_re = re.findall(RE_CHINA_NUMS, response) |
|||
if response_re != []: |
|||
print(response) |
|||
print("==========================================================================================") |
|||
jishu += 1 |
|||
|
|||
if prompt[-1] != "\n": |
|||
prompt += "\n" |
|||
if query[-1] != "\n": |
|||
query += "\n" |
|||
query = "问:" + query + "答:\n" |
|||
|
|||
if len(query) < 700 and len(response) < 1400: |
|||
data_list.append({ |
|||
"instruction": prompt, |
|||
"input": query, |
|||
"output": response |
|||
}) |
|||
# if zishu_summary < 600: |
|||
# data_tongji["0-600"] += 1 |
|||
# if 600 < zishu_summary < 1500: |
|||
# data_tongji["600-1500"] += 1 |
|||
# if 1500 < zishu_summary: |
|||
# data_tongji["1500-"] += 1 |
|||
# data_tongji_prompt.append([data_dan['summary'], zishu_summary]) |
|||
# else: |
|||
# train_list.append(i) |
|||
|
|||
|
|||
# for i in data_tongji_prompt: |
|||
# print(i) |
|||
# |
|||
|
|||
# random.shuffle(data_list) |
|||
# |
|||
train_nums = int(len(data_list) * 0.9) |
|||
dev_nums = int(len(data_list) * 0.1) |
|||
# |
|||
random.shuffle(data_list) |
|||
print(train_nums) |
|||
train_list = data_list[:train_nums] |
|||
dev_list = data_list[train_nums:] |
|||
with open("./data/chatglm_train_3_prompt_llama.json", mode="w", encoding="utf-8") as f: |
|||
f.write(json.dumps(train_list, ensure_ascii=False, indent=2)) |
|||
|
|||
with open("./data/chatglm_dev_3_prompt_llama.json", mode="w", encoding="utf-8") as f: |
|||
f.write(json.dumps(dev_list, ensure_ascii=False, indent=2)) |
|||
# for i in data_tongji_prompt: |
|||
# print(i) |
|||
# |
|||
# print(data_tongji) |
|||
|
|||
print(jishu) |
@ -0,0 +1,91 @@ |
|||
import os |
|||
import random |
|||
import json |
|||
from tqdm import tqdm |
|||
|
|||
data_tongji = { |
|||
"0-600": 0, |
|||
"600-1500": 0, |
|||
"1500-": 0, |
|||
} |
|||
# print("这段文字翻译成英文"\n'") |
|||
data_tongji_prompt = [] |
|||
|
|||
def is_contains_chinese(strs): |
|||
for _char in strs: |
|||
if '\u4e00' <= _char <= '\u9fa5': |
|||
return True |
|||
return False |
|||
|
|||
data_list = [] |
|||
|
|||
|
|||
with open("data/chatglm_paper_data_2_prompt.txt", encoding="utf-8") as f: |
|||
for i in tqdm(f): |
|||
data_dan = eval(i) |
|||
zishu_query = len(data_dan["query"]) |
|||
zishu_response = len(data_dan["response"]) |
|||
|
|||
query = data_dan["query"] |
|||
response = data_dan["response"] |
|||
prompt = data_dan["prompt"] |
|||
|
|||
|
|||
if prompt == "翻译摘要#": |
|||
zishu_summary = len(data_dan["response"].split(" ")) |
|||
elif prompt == "翻译关键词#": |
|||
zishu_summary = len(data_dan["response"].split(" ")) |
|||
else: |
|||
bool_ = is_contains_chinese(data_dan["response"]) |
|||
if bool_ == False: |
|||
print(data_dan) |
|||
continue |
|||
|
|||
if "生成方向" in query: |
|||
data_dan["query"] = query.replace("生成方向","研究方向") |
|||
if "生成方向" in response: |
|||
data_dan["response"] = response.replace("生成方向", "研究方向") |
|||
|
|||
if zishu_query < 700 and zishu_response< 1400: |
|||
data_dan_dict = { |
|||
"text" :"Bob: " + data_dan["query"] + "\n\nAlice: "+ data_dan["response"] |
|||
} |
|||
data_list.append(json.dumps(data_dan_dict, ensure_ascii=False)) |
|||
# if zishu_summary < 600: |
|||
# data_tongji["0-600"] += 1 |
|||
# if 600 < zishu_summary < 1500: |
|||
# data_tongji["600-1500"] += 1 |
|||
# if 1500 < zishu_summary: |
|||
# data_tongji["1500-"] += 1 |
|||
# data_tongji_prompt.append([data_dan['summary'], zishu_summary]) |
|||
# else: |
|||
# train_list.append(i) |
|||
|
|||
|
|||
# for i in data_tongji_prompt: |
|||
# print(i) |
|||
# |
|||
|
|||
# random.shuffle(data_list) |
|||
# |
|||
train_nums = int(len(data_list) * 0.8) |
|||
dev_nums = int(len(data_list) * 0.2) |
|||
# |
|||
random.shuffle(data_list) |
|||
print(train_nums) |
|||
train_list = data_list[:train_nums] |
|||
dev_list = data_list[train_nums:] |
|||
with open("./data/chatglm_train_3_chatrwkv.jsonl", mode="w", encoding="utf-8") as f: |
|||
for i in train_list: |
|||
f.write(i) |
|||
f.write("\n") |
|||
|
|||
with open("./data/chatglm_dev_3_chatrwkv.jsonl", mode="w", encoding="utf-8") as f: |
|||
for i in dev_list: |
|||
f.write(i) |
|||
f.write("\n") |
|||
|
|||
# for i in data_tongji_prompt: |
|||
# print(i) |
|||
# |
|||
# print(data_tongji) |
@ -0,0 +1,33 @@ |
|||
import json |
|||
|
|||
# { |
|||
# '生成课题的研究背景和意义#': 1851, |
|||
# '生成论文小标题内容#': 8316, |
|||
# '生成目录#': 1975, |
|||
# '生成论文摘要#': 958, |
|||
# '生成6点本篇论文应完成的主要内容#': 881, |
|||
# '生成研究内容#': 2014, |
|||
# '生成关键字#': 850, |
|||
# '翻译关键词#': 980, |
|||
# '生成论文简短总结#': 2055, |
|||
# '生成论文来源的背景#': 2003, |
|||
# '生成课题的国内外研究状况综述#': 1915, |
|||
# '翻译摘要#': 199 |
|||
# } |
|||
path = "data/chatglm_dev_3_prompt.json" |
|||
with open(path, encoding="utf-8") as f: |
|||
data = f.readlines() |
|||
|
|||
data_type = {} |
|||
|
|||
for i in data: |
|||
data_dan = eval(i) |
|||
# if data_dan["prompt"] not in data_type: |
|||
# data_type[data_dan["prompt"]] = 1 |
|||
# else: |
|||
# data_type[data_dan["prompt"]] += 1 |
|||
|
|||
if data_dan["prompt"] == "生成论文小标题内容#": |
|||
print(i) |
|||
|
|||
print(data_type) |
@ -0,0 +1,14 @@ |
|||
#-*- coding:utf-8 -*- |
|||
|
|||
import re |
|||
|
|||
|
|||
a = "[一二三四五六七八九].?.?总结|[1-9].?.?总结" |
|||
|
|||
b = "1.2.1 总结 adsadadadadadadadadadadadadadad" |
|||
|
|||
c = "请把其中的小标题“(.*?)”的内容补充完整" |
|||
|
|||
d = "问:论文题目是“《子不语》精怪故事研究”,目录是“一、引言\n1.1 研究背景\n1.2 研究意义\n1.3 研究方法\n\n二、《子不语》精怪故事概述\n2.1 《子不语》的作者和成书背景\n2.2 《子不语》中的精怪故事类型\n2.3 《子不语》中的精怪故事特点\n\n三、《子不语》中的精怪故事主题\n3.1 爱情主题\n3.2 死亡主题\n3.3 婚姻主题\n3.4 人性主题\n\n四、《子不语》中的精怪故事人物形象\n4.1 神仙形象\n4.2 鬼怪形象\n4.3 人物形象\n\n五、《子不语》中的精怪故事情节分析\n5.1 情节的变化和转折\n5.2 情节的发展和衔接\n5.3 情节的意义和价值\n\n六、《子不语》中的精怪故事艺术特色\n6.1 语言艺术特色\n6.2 形象艺术特色\n6.3 结构艺术特色\n\n七、结论\n7.1 研究成果总结\n7.2 研究不足和展望\n\n参考文献”,请把其中的小标题“5.1 情节的变化和转折”的内容补充完整,补充内容字数在1000字左右\n答:\n" |
|||
|
|||
print(re.findall(c, d)) |
@ -0,0 +1,38 @@ |
|||
import requests |
|||
import json |
|||
|
|||
|
|||
def request_api_chatgpt(api_key, prompt): |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
text = res["choices"][0]["message"]["content"] |
|||
print(text) |
|||
|
|||
api = "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" |
|||
# prompt = "为论文题目“基于单片机的多功能充电控制器设计”生成目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题不少于7个;每个一级标题至少包含3个二级标题" |
|||
|
|||
# prompt = "为论文题目“基于单片机的多功能充电控制器设计”生成目录,以“研制一款基于单片机的多功能充电控制器,包括硬件和软件设计。最终成 |
|||
# 果是研制一台可对多种类型蓄电池充电的控制器实物,并以一个特定蓄电池充电为例,验证所设计控制器的可用性”为论文的生成方向,要求只有一级 |
|||
# 标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题不少于7个;每个一级标题至少包含3个二级标题" |
|||
|
|||
# 基于单片机的多功能充电控制器设计”生成目录,以“研制一款基于单片机的多功能充电控制器,包括硬件和软件设计。最终成果是研制一台可对多种类型蓄电池充电的控制器实物,并以一个特定蓄电池充电为例,验证所设计控制器的可用性 |
|||
|
|||
prompt = "请以《基于单片机的多功能充电控制器设计》为题目,以“研制一款基于单片机的多功能充电控制器,包括硬件和软件设计。最终成果是研制一台可对多种类型蓄电池充电的控制器实物,并以一个特定蓄电池充电为例,验证所设计控制器的可用性”为论文的生成方向,生成论文摘要,要求生成的字数在600字左右" |
|||
request_api_chatgpt(api, prompt) |
@ -0,0 +1,159 @@ |
|||
|
|||
import os |
|||
import json |
|||
import re |
|||
import math |
|||
import numpy as np |
|||
from tqdm import tqdm |
|||
|
|||
def is_contains_chinese(strs): |
|||
for _char in strs: |
|||
if '\u4e00' <= _char <= '\u9fa5': |
|||
return True |
|||
return False |
|||
# pantten_second_biaoti = '[2二ⅡⅠ][、.]\s{0,}?[\u4e00-\u9fa5]+' |
|||
|
|||
lable_data_amount = { |
|||
"title_beijing_prompt_data.txt": {"num_token": 5000, "prompt": "生成论文来源的背景#"}, |
|||
"title_jianjie_prompt_data.txt": {"num_token": 5000, "prompt": "生成研究内容#"}, |
|||
"title_mulu_prompt_data.txt": {"num_token": 5000, "prompt": "生成目录#"}, |
|||
"title_yanjiubeijingyiyi_prompt_data.txt": {"num_token": 5000, "prompt": "生成课题的研究背景和意义#"}, |
|||
"title_zongjie_prompt_data.txt": {"num_token": 5000, "prompt": "生成论文简短总结#"}, |
|||
"title_zongshu_prompt_data.txt": {"num_token": 5000, "prompt": "生成课题的国内外研究状况综述#"}, |
|||
"jianjie_task_book_prompt_data.txt": {"num_token": 5000, "prompt": "生成6点本篇论文应完成的主要内容#"}, |
|||
"title_mulu_references_prompt_data.txt": {"num_token": 1, "prompt": "生成参考文献#"}, |
|||
"title_mulu_small_title_prompt_shuffle_data.txt": {"num_token": 18730, "prompt": "生成论文小标题内容#"}, |
|||
"title_mulu_zhaiyao_data.txt": {"num_token": 5000, "prompt": "生成论文摘要#"}, |
|||
"zhaiyao_chinese_keyword_prompt_data.txt": {"num_token": 5000, "prompt": "生成关键字#"}, |
|||
"zhaiyao_fanyi_prompt_data.txt": {"num_token": 5000, "prompt": "翻译摘要#"}, |
|||
"chinese_keyword_en_prompt_data.txt": {"num_token": 5000, "prompt": "翻译关键词#"}, |
|||
"title_hexin_beijing_prompt_data.txt": {"num_token": 4971, "prompt": "生成论文来源的背景#"}, |
|||
"title_hexin_jianjie_prompt_data.txt": {"num_token": 4903, "prompt": "生成研究内容#"}, |
|||
"title_hexin_mulu_prompt_data.txt": {"num_token": 4954, "prompt": "生成目录#"}, |
|||
"title_hexin_yanjiubeijingyiyi_prompt_data.txt": {"num_token": 4902, "prompt": "生成课题的研究背景和意义#"}, |
|||
"title_hexin_zongjie_prompt_data.txt": {"num_token": 4971, "prompt": "生成论文简短总结#"}, |
|||
"title_hexin_zongshu_prompt_data.txt": {"num_token": 4671, "prompt": "生成课题的国内外研究状况综述#"} |
|||
} |
|||
|
|||
re_file = { |
|||
"title_beijing_prompt_data.txt": "\n以“", |
|||
"title_jianjie_prompt_data.txt": "\n请帮我生成《", |
|||
"title_mulu_prompt_data.txt": "\n为论文题目“", |
|||
"title_yanjiubeijingyiyi_prompt_data.txt": "\n请分别写出以《", |
|||
"title_zongjie_prompt_data.txt": "\n以“", |
|||
"title_zongshu_prompt_data.txt": "\n请写出以《", |
|||
"jianjie_task_book_prompt_data.txt": "\n\"请根据题目为《", |
|||
"title_mulu_references_prompt_data.txt": "\n\"论文题目是“", |
|||
"zhaiyao_chinese_keyword_prompt_data.txt": "\n\"请为“", |
|||
"zhaiyao_fanyi_prompt_data.txt": "\n\"请把“", |
|||
"chinese_keyword_en_prompt_data.txt": "\n\"请把“", |
|||
"title_mulu_zhaiyao_data.txt": "@@@@@@@@@@@@@@@@@@", |
|||
"title_mulu_small_title_prompt_shuffle_data.txt": "@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_beijing_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_jianjie_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_mulu_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_yanjiubeijingyiyi_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_zongjie_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_zongshu_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@" |
|||
} |
|||
|
|||
split_teshu = [ |
|||
"title_mulu_zhaiyao_data.txt", |
|||
"title_mulu_small_title_prompt_shuffle_data.txt", |
|||
"title_hexin_beijing_prompt_data.txt", |
|||
"title_hexin_jianjie_prompt_data.txt", |
|||
"title_hexin_mulu_prompt_data.txt", |
|||
"title_hexin_yanjiubeijingyiyi_prompt_data.txt", |
|||
"title_hexin_zongjie_prompt_data.txt", |
|||
"title_hexin_zongshu_prompt_data.txt" |
|||
] |
|||
|
|||
path_list = [] |
|||
file = "./data/paper_prompt_title_3" |
|||
for root, dirs, files in os.walk(file): |
|||
for file in files: |
|||
path = os.path.join(root, file) |
|||
path_list.append(path) |
|||
|
|||
file = "./data/paper_prompt_title_3_1" |
|||
for root, dirs, files in os.walk(file): |
|||
for file in files: |
|||
path = os.path.join(root, file) |
|||
path_list.append(path) |
|||
|
|||
file = "./data/paper_prompt_title_3_1_1" |
|||
for root, dirs, files in os.walk(file): |
|||
for file in files: |
|||
path = os.path.join(root, file) |
|||
path_list.append(path) |
|||
|
|||
file = "./data/paper_prompt_title_hexin_3" |
|||
for root, dirs, files in os.walk(file): |
|||
for file in files: |
|||
path = os.path.join(root, file) |
|||
path_list.append(path) |
|||
|
|||
|
|||
text_list_new = [] |
|||
|
|||
tongji = {} |
|||
|
|||
|
|||
for path in path_list: |
|||
task_name = path.split("\\")[-1] |
|||
if task_name in re_file: |
|||
spilt_dan = re_file[task_name] |
|||
else: |
|||
continue |
|||
|
|||
train_data_amount = lable_data_amount[task_name] |
|||
|
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
text_list = text.split(spilt_dan) |
|||
index = 1 |
|||
while True: |
|||
if index >= train_data_amount: |
|||
break |
|||
data_dan = text_list[index] |
|||
if "**************" in data_dan: |
|||
# if task_name == "title_jianjie_prompt_data.txt": |
|||
# content, summary = data_dan.split("**************") |
|||
# bool_ = is_contains_chinese(summary) |
|||
# if bool_ == False: |
|||
# index += 1 |
|||
# continue |
|||
if task_name in split_teshu: |
|||
data_dan = data_dan |
|||
else: |
|||
data_dan = spilt_dan[1:] + data_dan |
|||
text_list_new.append(data_dan) |
|||
index += 1 |
|||
if task_name not in tongji: |
|||
tongji[task_name] = 1 |
|||
else: |
|||
tongji[task_name] += 1 |
|||
else: |
|||
index += 4 |
|||
print(data_dan) |
|||
|
|||
# train_list.append({"content": str(title_p), "summary": str(b)}) |
|||
|
|||
train_list = [] |
|||
for text in text_list_new: |
|||
content, summary = text.split("**************") |
|||
train_list.append( |
|||
{"content": str(content).strip("\"").strip("\n").strip("\""), "summary": str(summary)} |
|||
) |
|||
|
|||
import random |
|||
random.shuffle(train_list) |
|||
|
|||
|
|||
for i in tongji: |
|||
print(i, tongji[i]) |
|||
with open("./data/chatglm_paper_data_2.txt", mode="w", encoding="utf-8") as f: |
|||
for i in train_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
@ -0,0 +1,165 @@ |
|||
|
|||
import os |
|||
import json |
|||
import re |
|||
import math |
|||
import numpy as np |
|||
from tqdm import tqdm |
|||
|
|||
def is_contains_chinese(strs): |
|||
for _char in strs: |
|||
if '\u4e00' <= _char <= '\u9fa5': |
|||
return True |
|||
return False |
|||
# pantten_second_biaoti = '[2二ⅡⅠ][、.]\s{0,}?[\u4e00-\u9fa5]+' |
|||
|
|||
lable_data_amount = { |
|||
"title_beijing_prompt_data.txt": {"num_token": 5000, "prompt": "生成论文来源的背景#"}, |
|||
"title_jianjie_prompt_data.txt": {"num_token": 5000, "prompt": "生成研究内容#"}, |
|||
"title_mulu_prompt_data.txt": {"num_token": 5000, "prompt": "生成目录#"}, |
|||
"title_yanjiubeijingyiyi_prompt_data.txt": {"num_token": 5000, "prompt": "生成课题的研究背景和意义#"}, |
|||
"title_zongjie_prompt_data.txt": {"num_token": 5000, "prompt": "生成论文简短总结#"}, |
|||
"title_zongshu_prompt_data.txt": {"num_token": 5000, "prompt": "生成课题的国内外研究状况综述#"}, |
|||
"jianjie_task_book_prompt_data.txt": {"num_token": 5000, "prompt": "生成6点本篇论文应完成的主要内容#"}, |
|||
"title_mulu_references_prompt_data.txt": {"num_token": 1, "prompt": "生成参考文献#"}, |
|||
"title_mulu_small_title_prompt_shuffle_data.txt": {"num_token": -1, "prompt": "生成论文小标题内容#"}, |
|||
"title_mulu_zhaiyao_data.txt": {"num_token": 5000, "prompt": "生成论文摘要#"}, |
|||
"zhaiyao_chinese_keyword_prompt_data.txt": {"num_token": 5000, "prompt": "生成关键字#"}, |
|||
"zhaiyao_fanyi_prompt_data.txt": {"num_token": 5000, "prompt": "翻译摘要#"}, |
|||
"chinese_keyword_en_prompt_data.txt": {"num_token": 5000, "prompt": "翻译关键词#"}, |
|||
"title_hexin_beijing_prompt_data.txt": {"num_token": 4971, "prompt": "生成论文来源的背景#"}, |
|||
"title_hexin_jianjie_prompt_data.txt": {"num_token": 4903, "prompt": "生成研究内容#"}, |
|||
"title_hexin_mulu_prompt_data.txt": {"num_token": 4954, "prompt": "生成目录#"}, |
|||
"title_hexin_yanjiubeijingyiyi_prompt_data.txt": {"num_token": 4902, "prompt": "生成课题的研究背景和意义#"}, |
|||
"title_hexin_zongjie_prompt_data.txt": {"num_token": 4971, "prompt": "生成论文简短总结#"}, |
|||
"title_hexin_zongshu_prompt_data.txt": {"num_token": 4671, "prompt": "生成课题的国内外研究状况综述#"} |
|||
} |
|||
|
|||
re_file = { |
|||
"title_beijing_prompt_data.txt": "\n以“", |
|||
"title_jianjie_prompt_data.txt": "\n请帮我生成《", |
|||
"title_mulu_prompt_data.txt": "\n为论文题目“", |
|||
"title_yanjiubeijingyiyi_prompt_data.txt": "\n请分别写出以《", |
|||
"title_zongjie_prompt_data.txt": "\n以“", |
|||
"title_zongshu_prompt_data.txt": "\n请写出以《", |
|||
"jianjie_task_book_prompt_data.txt": "\n\"请根据题目为《", |
|||
"title_mulu_references_prompt_data.txt": "\n\"论文题目是“", |
|||
"zhaiyao_chinese_keyword_prompt_data.txt": "\n\"请为“", |
|||
"zhaiyao_fanyi_prompt_data.txt": "\n\"请把“", |
|||
"chinese_keyword_en_prompt_data.txt": "\n\"请把“", |
|||
"title_mulu_zhaiyao_data.txt": "@@@@@@@@@@@@@@@@@@", |
|||
"title_mulu_small_title_prompt_shuffle_data.txt": "@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_beijing_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_jianjie_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_mulu_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_yanjiubeijingyiyi_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_zongjie_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@", |
|||
"title_hexin_zongshu_prompt_data.txt": "@@@@@@@@@@@@@@@@@@@@@@@" |
|||
} |
|||
|
|||
split_teshu = [ |
|||
"title_mulu_zhaiyao_data.txt", |
|||
"title_mulu_small_title_prompt_shuffle_data.txt", |
|||
"title_hexin_beijing_prompt_data.txt", |
|||
"title_hexin_jianjie_prompt_data.txt", |
|||
"title_hexin_mulu_prompt_data.txt", |
|||
"title_hexin_yanjiubeijingyiyi_prompt_data.txt", |
|||
"title_hexin_zongjie_prompt_data.txt", |
|||
"title_hexin_zongshu_prompt_data.txt" |
|||
] |
|||
|
|||
path_list = [] |
|||
file = "./data/paper_prompt_title_3" |
|||
for root, dirs, files in os.walk(file): |
|||
for file in files: |
|||
path = os.path.join(root, file) |
|||
path_list.append(path) |
|||
|
|||
file = "./data/paper_prompt_title_3_1" |
|||
for root, dirs, files in os.walk(file): |
|||
for file in files: |
|||
path = os.path.join(root, file) |
|||
path_list.append(path) |
|||
|
|||
file = "./data/paper_prompt_title_3_1_1" |
|||
for root, dirs, files in os.walk(file): |
|||
for file in files: |
|||
path = os.path.join(root, file) |
|||
path_list.append(path) |
|||
|
|||
file = "./data/paper_prompt_title_hexin_3" |
|||
for root, dirs, files in os.walk(file): |
|||
for file in files: |
|||
path = os.path.join(root, file) |
|||
path_list.append(path) |
|||
|
|||
|
|||
text_list_new = [] |
|||
|
|||
tongji = {} |
|||
|
|||
|
|||
for path in path_list: |
|||
task_name = path.split("\\")[-1] |
|||
if task_name in re_file: |
|||
spilt_dan = re_file[task_name] |
|||
else: |
|||
continue |
|||
|
|||
train_data_amount_dict = lable_data_amount[task_name] |
|||
train_data_amount = train_data_amount_dict["num_token"] |
|||
|
|||
prompt = train_data_amount_dict["prompt"] |
|||
|
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
text_list = text.split(spilt_dan) |
|||
index = 1 |
|||
|
|||
if train_data_amount == -1: |
|||
train_data_amount = len(text_list) -1 |
|||
while True: |
|||
if index >= train_data_amount: |
|||
break |
|||
data_dan = text_list[index] |
|||
if "**************" in data_dan: |
|||
# if task_name == "title_jianjie_prompt_data.txt": |
|||
# content, summary = data_dan.split("**************") |
|||
# bool_ = is_contains_chinese(summary) |
|||
# if bool_ == False: |
|||
# index += 1 |
|||
# continue |
|||
if task_name in split_teshu: |
|||
data_dan = data_dan |
|||
else: |
|||
data_dan = spilt_dan[1:] + data_dan |
|||
text_list_new.append((data_dan, prompt)) |
|||
index += 1 |
|||
if task_name not in tongji: |
|||
tongji[task_name] = 1 |
|||
else: |
|||
tongji[task_name] += 1 |
|||
else: |
|||
index += 4 |
|||
print(data_dan) |
|||
|
|||
# train_list.append({"content": str(title_p), "summary": str(b)}) |
|||
|
|||
train_list = [] |
|||
for text, prompt in text_list_new: |
|||
content, summary = text.split("**************") |
|||
train_list.append( |
|||
{"query": str(content).strip("\"").strip("\n").strip("\""), "response": str(summary), "prompt": prompt} |
|||
) |
|||
|
|||
import random |
|||
random.shuffle(train_list) |
|||
|
|||
|
|||
for i in tongji: |
|||
print(i, tongji[i]) |
|||
with open("./data/chatglm_paper_data_2_prompt.txt", mode="w", encoding="utf-8") as f: |
|||
for i in train_list: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
@ -0,0 +1,127 @@ |
|||
import time |
|||
|
|||
from tqdm import tqdm |
|||
import random |
|||
import requests |
|||
import json |
|||
import threading |
|||
from threading import Thread |
|||
import redis |
|||
|
|||
lock = threading.RLock() |
|||
pool = redis.ConnectionPool(host='104.244.90.248', port=63179, max_connections=50, db=10, password='Zhicheng123*') |
|||
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|||
|
|||
with open("api_key.txt", "r",) as f: |
|||
a = f.read() |
|||
a = a.split("\n") |
|||
|
|||
redis_key_name_openaikey_list = "openaikey_list" |
|||
redis_zirenwu = "redis_zirenwu" |
|||
|
|||
api_key_list = [] |
|||
for i in a: |
|||
api_key_list.append(str(i.split("----")[-1])) |
|||
|
|||
for i in api_key_list: |
|||
redis_.rpush(redis_key_name_openaikey_list, i) |
|||
|
|||
lock = threading.RLock() |
|||
|
|||
|
|||
zhuyaoneirong_prompt = "“《基于单片机的多功能充电控制器设计》:研制一款基于单片机的多功能充电控制器,包括硬件和软件设计。最终成果是" \ |
|||
"研制一台可对多种类型蓄电池充电的控制器实物,并以一个特定蓄电池充电为例,验证所设计控制器的可用性”," \ |
|||
"以上面话术为标准。根据论文题目为《{}》生成这种格式的一段话,要求200个字以内", |
|||
|
|||
|
|||
with open("./data/题目2.txt", encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
text_list = text.split("\n") |
|||
|
|||
|
|||
random.shuffle(text_list) |
|||
text_list = text_list[:6000] |
|||
title_list = [] |
|||
for i in text_list: |
|||
title_list.append(i.split("@@@@@")[0]) |
|||
|
|||
random.shuffle(title_list) |
|||
|
|||
print(len(title_list)) |
|||
|
|||
zirenwu_list = [] |
|||
|
|||
for title in title_list: |
|||
zirenwu_list.append(("zhuyaoneirong_prompt", str(zhuyaoneirong_prompt).format(title))) |
|||
|
|||
for i in zirenwu_list: |
|||
redis_.rpush(redis_zirenwu, str(i)) |
|||
|
|||
|
|||
def request_api_chatgpt(api_key, task_type, prompt): |
|||
try: |
|||
OPENAI_API_KEY = api_key |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json", |
|||
"Authorization": f"Bearer {OPENAI_API_KEY}" |
|||
} |
|||
data = { |
|||
"model": "gpt-3.5-turbo", |
|||
"messages": [ |
|||
{"role": "user", "content": prompt}, |
|||
], |
|||
"temperature": 0.5 |
|||
} |
|||
response = requests.post(url, |
|||
headers=headers, |
|||
data=json.dumps(data), |
|||
timeout=240) |
|||
|
|||
res = response.json() |
|||
text = res["choices"][0]["message"]["content"] |
|||
lock.acquire() |
|||
# api_key_list.append(api_key) |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
|
|||
with open("/home/majiahui/mulu_ner/data/paper_prompt_title_3/title_{}_data.txt".format(task_type), mode="a") as f: |
|||
f.write(prompt) |
|||
f.write("**************") |
|||
f.write(text) |
|||
f.write("\n") |
|||
lock.release() |
|||
|
|||
except: |
|||
print("task_type_bad", task_type) |
|||
print("api_key_bad", api_key) |
|||
time.sleep(5) |
|||
lock.acquire() |
|||
redis_.rpush(redis_key_name_openaikey_list, api_key) |
|||
redis_.rpush(redis_zirenwu, str((task_type, prompt))) |
|||
lock.release() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
while True: |
|||
if redis_.llen(redis_zirenwu) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
elif redis_.llen(redis_zirenwu) != 0 and redis_.llen(redis_key_name_openaikey_list) != 0: |
|||
lock.acquire() |
|||
api_key = redis_.lpop(redis_key_name_openaikey_list) |
|||
api_key = api_key.decode('UTF-8') |
|||
dan_zirenwu = redis_.lpop(redis_zirenwu) |
|||
dan_zirenwu = dan_zirenwu.decode('UTF-8') |
|||
lock.release() |
|||
# dan_zirenwu = zirenwu_list.pop(0) |
|||
dan_zirenwu = eval(dan_zirenwu) |
|||
task_type, prompt = dan_zirenwu[0], dan_zirenwu[1] |
|||
t = Thread(target=request_api_chatgpt, args=(api_key, task_type, prompt)) |
|||
t.start() |
|||
elif redis_.llen(redis_key_name_openaikey_list) == 0: |
|||
time.sleep(1) |
|||
continue |
|||
else: |
|||
time.sleep(1) |
|||
continue |
@ -0,0 +1,74 @@ |
|||
import json |
|||
import re |
|||
|
|||
|
|||
pantten_mulu = "目录是“(.*?)”,请把其中" |
|||
pantten_title = "“(.*?)”,目录是" |
|||
pantten_small_title = "请把其中的小标题“(.*?)”的内容补充完整" |
|||
pantten_big_title = "请把其中的大标题“(.*?)”的内容补充完整" |
|||
pantten_zishu = "的内容补充完整,补充内容字数在(.*?)字左右" |
|||
|
|||
|
|||
with open("data/prompt_small_gen.txt", encoding="utf-8") as f: |
|||
content = f.read() |
|||
|
|||
content_list = content.split("\"论文题目是") |
|||
content_list = content_list[1:] |
|||
content_list = [i.strip("\n") for i in content_list] |
|||
|
|||
train = [] |
|||
print(len(content_list)) |
|||
for i in content_list: |
|||
result_biaoti_list = re.findall(pantten_mulu, i) |
|||
try: |
|||
result_biaoti_list[0] |
|||
except: |
|||
print(i) |
|||
continue |
|||
if result_biaoti_list[0] != "": |
|||
mulu_list = str(result_biaoti_list[0]).split("\\n") |
|||
mulu_list = [i.strip() for i in mulu_list if i != ""] |
|||
mulu = "@".join(mulu_list) |
|||
else: |
|||
continue |
|||
result_biaoti_list = re.findall(pantten_title, i) |
|||
if result_biaoti_list[0] != "": |
|||
title = result_biaoti_list[0] |
|||
else: |
|||
continue |
|||
result_biaoti_small_list = re.findall(pantten_small_title, i) |
|||
result_biaoti_big_list = re.findall(pantten_big_title, i) |
|||
if result_biaoti_small_list != []: |
|||
small_title = result_biaoti_small_list[0] |
|||
result_biaoti_list = re.findall(pantten_zishu, i) |
|||
if result_biaoti_list[0] != "": |
|||
zishu = result_biaoti_list[0] |
|||
else: |
|||
continue |
|||
small_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的小标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
neirong = i.split("**************")[1] |
|||
a = small_title_prompt.format(title, mulu, small_title, zishu) |
|||
if len(str(a)) + len(str(neirong))< 2048: |
|||
train.append({"content": str(a), "summary": str(neirong)}) |
|||
elif result_biaoti_big_list != []: |
|||
big_title = result_biaoti_big_list[0] |
|||
result_biaoti_list = re.findall(pantten_zishu, i) |
|||
if result_biaoti_list[0] != "": |
|||
zishu = result_biaoti_list[0] |
|||
else: |
|||
continue |
|||
big_title_prompt = "论文题目是“{}”,目录是“{}”,请把其中的小标题“{}”的内容补充完整,补充内容字数在{}字左右" |
|||
neirong = i.split("**************")[1] |
|||
a = big_title_prompt.format(title, mulu, big_title, zishu) |
|||
if len(str(neirong)) + len(str(a)) < 2048: |
|||
train.append({"content": str(a), "summary": str(neirong)}) |
|||
else: |
|||
continue |
|||
|
|||
with open("data/small_title_train.json", "w", encoding="utf-8") as f: |
|||
for i in train: |
|||
f.write(json.dumps(i, ensure_ascii=False)) |
|||
f.write("\n") |
|||
|
|||
|
|||
|
@ -0,0 +1,11 @@ |
|||
|
|||
|
|||
path = "data/title.txt" |
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
|
|||
|
|||
|
|||
text_list = text.split("\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n") |
|||
print(len(text_list)) |
@ -0,0 +1,6 @@ |
|||
path = "data/title.txt" |
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
text_list = text.split("\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n") |
|||
print(len(text_list)) |
@ -0,0 +1,11 @@ |
|||
# 衣柜部分 |
|||
a = (888 * 24) # 套餐内衣柜 |
|||
print(a) |
|||
|
|||
# 橱柜部分 假如地柜台面吊柜都是超出3米 |
|||
d = (3*999 + 3*999 + 3*999) |
|||
|
|||
e = d + 4999 |
|||
|
|||
# 24米定制 + 6米台面6米地柜4米吊柜 - 楼控部分 |
|||
print(a + e) |
@ -0,0 +1,83 @@ |
|||
import os |
|||
import random |
|||
import json |
|||
from tqdm import tqdm |
|||
|
|||
data_tongji = { |
|||
"0-600": 0, |
|||
"600-1500": 0, |
|||
"1500-": 0, |
|||
} |
|||
# print("这段文字翻译成英文"\n'") |
|||
data_tongji_prompt = [] |
|||
|
|||
def is_contains_chinese(strs): |
|||
for _char in strs: |
|||
if '\u4e00' <= _char <= '\u9fa5': |
|||
return True |
|||
return False |
|||
|
|||
data_list = [] |
|||
|
|||
|
|||
with open("data/chatglm_paper_data_2.txt", encoding="utf-8") as f: |
|||
for i in tqdm(f): |
|||
data_dan = eval(i) |
|||
zishu_content = len(data_dan["content"]) |
|||
zishu_summary = len(data_dan["summary"]) |
|||
prompt = data_dan["content"] |
|||
summary = data_dan["summary"] |
|||
|
|||
|
|||
if "这段文字翻译成英文" in prompt: |
|||
zishu_summary = len(data_dan['summary'].split(" ")) |
|||
elif "这几个关键字翻译成英文" in prompt: |
|||
zishu_summary = len(data_dan['summary'].split(" ")) |
|||
else: |
|||
bool_ = is_contains_chinese(data_dan["summary"]) |
|||
if bool_ == False: |
|||
print(data_dan) |
|||
continue |
|||
|
|||
if "生成方向" in prompt: |
|||
data_dan["content"] = prompt.replace("生成方向","研究方向") |
|||
if "生成方向" in summary: |
|||
data_dan["summary"] = summary.replace("生成方向", "研究方向") |
|||
|
|||
if zishu_content < 900 and zishu_summary < 1900: |
|||
data_list.append(json.dumps(data_dan, ensure_ascii=False)) |
|||
# if zishu_summary < 600: |
|||
# data_tongji["0-600"] += 1 |
|||
# if 600 < zishu_summary < 1500: |
|||
# data_tongji["600-1500"] += 1 |
|||
# if 1500 < zishu_summary: |
|||
# data_tongji["1500-"] += 1 |
|||
# data_tongji_prompt.append([data_dan['summary'], zishu_summary]) |
|||
# else: |
|||
# train_list.append(i) |
|||
|
|||
|
|||
# for i in data_tongji_prompt: |
|||
# print(i) |
|||
# |
|||
|
|||
# random.shuffle(data_list) |
|||
# |
|||
train_nums = int(len(data_list) * 0.9) |
|||
dev_nums = int(len(data_list) * 0.1) |
|||
# |
|||
random.shuffle(data_list) |
|||
train_list = data_list[:train_nums] |
|||
dev_list = data_list[train_nums:] |
|||
with open("./data/chatglm_train_3.json", mode="w", encoding="utf-8") as f: |
|||
for i in train_list: |
|||
f.write(i) |
|||
|
|||
with open("./data/chatglm_dev_3.json", mode="w", encoding="utf-8") as f: |
|||
for i in dev_list: |
|||
f.write(i) |
|||
|
|||
# for i in data_tongji_prompt: |
|||
# print(i) |
|||
# |
|||
# print(data_tongji) |
@ -0,0 +1,89 @@ |
|||
import os |
|||
import random |
|||
import json |
|||
from tqdm import tqdm |
|||
|
|||
|
|||
data_tongji = { |
|||
"0-600": 0, |
|||
"600-1500": 0, |
|||
"1500-": 0, |
|||
} |
|||
# print("这段文字翻译成英文"\n'") |
|||
data_tongji_prompt = [] |
|||
|
|||
def is_contains_chinese(strs): |
|||
for _char in strs: |
|||
if '\u4e00' <= _char <= '\u9fa5': |
|||
return True |
|||
return False |
|||
|
|||
data_list = [] |
|||
|
|||
|
|||
with open("data/chatglm_paper_data_2_prompt.txt", encoding="utf-8") as f: |
|||
for i in tqdm(f): |
|||
data_dan = eval(i) |
|||
zishu_query = len(data_dan["query"]) |
|||
zishu_response = len(data_dan["response"]) |
|||
|
|||
query = data_dan["query"] |
|||
response = data_dan["response"] |
|||
prompt = data_dan["prompt"] |
|||
|
|||
|
|||
if prompt == "翻译摘要#": |
|||
zishu_summary = len(data_dan["response"].split(" ")) |
|||
elif prompt == "翻译关键词#": |
|||
zishu_summary = len(data_dan["response"].split(" ")) |
|||
else: |
|||
bool_ = is_contains_chinese(data_dan["response"]) |
|||
if bool_ == False: |
|||
print(data_dan) |
|||
continue |
|||
|
|||
if "生成方向" in query: |
|||
data_dan["query"] = query.replace("生成方向","研究方向") |
|||
if "生成方向" in response: |
|||
data_dan["response"] = response.replace("生成方向", "研究方向") |
|||
|
|||
if zishu_query < 700 and zishu_response< 1400: |
|||
data_list.append(json.dumps(data_dan, ensure_ascii=False)) |
|||
# if zishu_summary < 600: |
|||
# data_tongji["0-600"] += 1 |
|||
# if 600 < zishu_summary < 1500: |
|||
# data_tongji["600-1500"] += 1 |
|||
# if 1500 < zishu_summary: |
|||
# data_tongji["1500-"] += 1 |
|||
# data_tongji_prompt.append([data_dan['summary'], zishu_summary]) |
|||
# else: |
|||
# train_list.append(i) |
|||
|
|||
|
|||
# for i in data_tongji_prompt: |
|||
# print(i) |
|||
# |
|||
|
|||
# random.shuffle(data_list) |
|||
# |
|||
train_nums = int(len(data_list) * 0.8) |
|||
dev_nums = int(len(data_list) * 0.2) |
|||
# |
|||
random.shuffle(data_list) |
|||
print(train_nums) |
|||
train_list = data_list[:train_nums] |
|||
dev_list = data_list[train_nums:] |
|||
with open("./data/chatglm_train_3_prompt.json", mode="w", encoding="utf-8") as f: |
|||
for i in train_list: |
|||
f.write(i) |
|||
f.write("\n") |
|||
|
|||
with open("./data/chatglm_dev_3_prompt.json", mode="w", encoding="utf-8") as f: |
|||
for i in dev_list: |
|||
f.write(i) |
|||
f.write("\n") |
|||
|
|||
# for i in data_tongji_prompt: |
|||
# print(i) |
|||
# |
|||
# print(data_tongji) |
@ -0,0 +1,41 @@ |
|||
title_list = [] |
|||
|
|||
|
|||
with open("./data/题目3.txt", encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
text_list = text.split("\n") |
|||
|
|||
|
|||
for i in text_list: |
|||
title_list.append(i.split("@@@@@")[0]) |
|||
|
|||
with open("./data/题目2.txt", encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
text_list = text.split("\n") |
|||
|
|||
for i in text_list: |
|||
title_list.append(i.split("@@@@@")[0]) |
|||
|
|||
print(title_list) |
|||
|
|||
with open("./data/题目4.txt", encoding="utf-8") as f: |
|||
text = f.read() |
|||
|
|||
text_list = text.split("\n") |
|||
|
|||
|
|||
title_list_new = [] |
|||
for i in text_list: |
|||
if i.split("@@@@@")[0] in title_list: |
|||
continue |
|||
else: |
|||
title_list_new.append(i) |
|||
|
|||
print(len(title_list_new)) |
|||
|
|||
with open("./data/题目4_new.txt", mode="w",encoding="utf-8") as f: |
|||
for i in title_list_new: |
|||
f.write(i) |
|||
f.write("\n") |
Loading…
Reference in new issue