|
|
@ -5,13 +5,17 @@ import re |
|
|
|
pantten_dabiaoti = '^第([0-9一二三四五六七八九十]{1,})?章\s{1,}?(.*)|^([0-9一二三四五六七八九十]{1,}?)\s{1,}?(.*)' |
|
|
|
pantten_xiaobiaoti = '^[0-9](\.[0-9]\d*){1,3}\s{1,}?.*$' |
|
|
|
|
|
|
|
pantten_dabiaoti_shai = '^([5-7五六七])、(.*)' |
|
|
|
pantten_xiaobiaoti_shai = '^[5-7](\.[5-7]){1,2}\s{1,}?.*$' |
|
|
|
pantten_dabiaoti_shai = '^([3-9三四五六七八九])、(.*)' |
|
|
|
pantten_xiaobiaoti_shai = '^[3-9](\.[3-9]){1,2}\s{1,}?.*$' |
|
|
|
|
|
|
|
pantten_dabiaoti_ = '^([八九])、(.*)' |
|
|
|
pantten_xiaobiaoti_1 = '^[1-7](\.[9]){1,2}\s{1,}?.*$' |
|
|
|
pantten_xiaobiaoti_2 = '^[1-7](\.[1-9]{2,}?){1,2}\s{1,}?.*$' |
|
|
|
pantten_xiaobiaoti_3 = '^[1-7](\.[1-8]){3,}\s{1,}?.*$' |
|
|
|
pantten_xiaobiaoti_1 = '^[1-9](\.[9]){1,2}\s{1,}?.*$' |
|
|
|
pantten_xiaobiaoti_2 = '^[1-9](\.[1-9]{2,}?){1,2}\s{1,}?.*$' |
|
|
|
pantten_xiaobiaoti_3 = '^[1-9](\.[1-8]){3,}\s{1,}?.*$' |
|
|
|
|
|
|
|
pantten_dabiaoti_geshu = '^([1-9一二三四五六七八九])、(.*)' |
|
|
|
pantten_xiaobiaoti_geshu = '^[1-9](\.[1-9]){1}\s{1,}?.*$' |
|
|
|
|
|
|
|
pantten_weijinci_list = ["论文结构"] |
|
|
|
|
|
|
|
def contains_chinese(text): |
|
|
|
# 检查是否包含中文字符 |
|
|
@ -59,13 +63,19 @@ def mulu_ulit(mulu): |
|
|
|
continue |
|
|
|
|
|
|
|
if return_bool == True: |
|
|
|
for weijinci in pantten_weijinci_list: |
|
|
|
for i in mulu_new: |
|
|
|
if weijinci in i: |
|
|
|
return_bool = False |
|
|
|
break |
|
|
|
|
|
|
|
if return_bool == True: |
|
|
|
for i in mulu_new: |
|
|
|
dabiaoti = re.findall(pantten_dabiaoti_, i) |
|
|
|
xiaobiaoti_1 = re.findall(pantten_xiaobiaoti_1, i) |
|
|
|
xiaobiaoti_2 = re.findall(pantten_xiaobiaoti_2, i) |
|
|
|
xiaobiaoti_3 = re.findall(pantten_xiaobiaoti_3, i) |
|
|
|
|
|
|
|
if list(set(dabiaoti)| set(xiaobiaoti_1)| set(xiaobiaoti_2)| set(xiaobiaoti_3)) != []: |
|
|
|
if list(set(xiaobiaoti_1)| set(xiaobiaoti_2)| set(xiaobiaoti_3)) != []: |
|
|
|
return_bool = False |
|
|
|
break |
|
|
|
|
|
|
@ -94,32 +104,56 @@ def mulu_ulit(mulu): |
|
|
|
if chinese_bool == False: |
|
|
|
return_bool = False |
|
|
|
|
|
|
|
return [return_bool, mulu_new] |
|
|
|
dabiaoti_geshu_jishu = 0 |
|
|
|
xiaobiaoti_min = 0 |
|
|
|
xiaobiaoti_max = 0 |
|
|
|
if return_bool == True: |
|
|
|
dabiaoti_geshu_jishu = 0 |
|
|
|
|
|
|
|
xiaobiaoti_geshu_jishu_list = [] |
|
|
|
for i in mulu_new: |
|
|
|
dabiaoti_geshu = re.findall(pantten_dabiaoti_geshu, i) |
|
|
|
if dabiaoti_geshu != []: |
|
|
|
dabiaoti_geshu_jishu += 1 |
|
|
|
xiaobiaoti_geshu_jishu_list.append(0) |
|
|
|
|
|
|
|
xiaobiaoti_geshu = re.findall(pantten_xiaobiaoti_geshu, i) |
|
|
|
if xiaobiaoti_geshu != []: |
|
|
|
xiaobiaoti_geshu_jishu_list[-1] += 1 |
|
|
|
xiaobiaoti_min = min(xiaobiaoti_geshu_jishu_list) |
|
|
|
xiaobiaoti_max = max(xiaobiaoti_geshu_jishu_list) |
|
|
|
|
|
|
|
return [return_bool, mulu_new, dabiaoti_geshu_jishu, xiaobiaoti_min, xiaobiaoti_max] |
|
|
|
|
|
|
|
|
|
|
|
prompt = "为论文题目“{}”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题不少于7个;每个一级标题至少包含3个二级标题;三级标题个数不限制" |
|
|
|
prompt_min_max = "为论文题目“{}”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成{}个;每个一级标题包含{}-{}个二级标题;三级标题个数不限制" |
|
|
|
prompt_not_min_max = "为论文题目“{}”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成{}个;每个一级标题包含{}个二级标题;三级标题个数不限制" |
|
|
|
mulu_zong = [] |
|
|
|
|
|
|
|
|
|
|
|
def shengcheng_prompt(title, mulu_list): |
|
|
|
mulu_prompt = prompt.format(title) |
|
|
|
mulu = "\n".join(mulu_list) |
|
|
|
def shengcheng_prompt(title, mulu_list, dabiaoti_geshu_jishu, xiaobiaoti_min, xiaobiaoti_max): |
|
|
|
if xiaobiaoti_min != xiaobiaoti_max: |
|
|
|
mulu_prompt = prompt_min_max.format(title, dabiaoti_geshu_jishu, xiaobiaoti_min, xiaobiaoti_max) |
|
|
|
mulu = "\n".join(mulu_list) |
|
|
|
else: |
|
|
|
mulu_prompt = prompt_not_min_max.format(title, dabiaoti_geshu_jishu, xiaobiaoti_min) |
|
|
|
mulu = "\n".join(mulu_list) |
|
|
|
return mulu_prompt, mulu |
|
|
|
|
|
|
|
|
|
|
|
with open("data.json", encoding="utf-8") as f: |
|
|
|
with open("data/data.json", encoding="utf-8") as f: |
|
|
|
for i in f.readlines(): |
|
|
|
a = json.loads(i) |
|
|
|
try: |
|
|
|
return_bool, mulu_new = mulu_ulit(json.loads(a[-1])) |
|
|
|
return_bool, mulu_new, dabiaoti_geshu_jishu, xiaobiaoti_min, xiaobiaoti_max= mulu_ulit(json.loads(a[-1])) |
|
|
|
if return_bool == True: |
|
|
|
|
|
|
|
mulu_zong.append(shengcheng_prompt(a[0], mulu_new)) |
|
|
|
mulu_zong.append(shengcheng_prompt(a[0], mulu_new, dabiaoti_geshu_jishu, xiaobiaoti_min, xiaobiaoti_max)) |
|
|
|
# else: |
|
|
|
# print("===========================================================================") |
|
|
|
# print(mulu_new) |
|
|
|
except: |
|
|
|
continue |
|
|
|
print(len(mulu_zong)) |
|
|
|
with open("mulu_prompt.json", "w", encoding="utf-8") as f: |
|
|
|
f.write(json.dumps(mulu_zong, indent=2)) |
|
|
|
with open("data/mulu_prompt_1.json", "w", encoding="utf-8") as f: |
|
|
|
f.write(json.dumps(mulu_zong, ensure_ascii=False, indent=2)) |