diff --git a/articles_directory_predict.py b/articles_directory_predict.py index 050f870..a794d69 100644 --- a/articles_directory_predict.py +++ b/articles_directory_predict.py @@ -22,7 +22,7 @@ def get_host_ip(): chatgpt_url_predict = "http://{}:12000/predict".format(str(get_host_ip())) chatgpt_url_search = "http://{}:12000/search".format(str(get_host_ip())) - +prompt = "<|role|>user<|says|>{}<|end|>\n<|role|>assistant<|says|>" app = Flask(__name__) app.config["JSON_AS_ASCII"] = False @@ -56,8 +56,21 @@ def dialog_line_parse(url, text): def request_api_chatgpt(prompt): + ''' + { + "content": "<|role|>user<|says|>任务:生成目录\n为论文题目“基于“六经”伏邪理论治疗过敏性紫癜性肾炎初探”生成中文目录,要求只有一级标题,二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx一级标题生成5个;每个一级标题包含4-7个二级标题<|end|>\n<|role|>assistant<|says|>", + "model": "openbuddy-qwen2.5llamaify-7b_train_11_prompt_mistral_gpt_xiaobiaot_real_paper_2", + "top_p": 1, + "temperature": 0 + } + :param prompt: + :return: + ''' data = { - "texts": prompt + "content": prompt, + "model": "gpt-4-turbo", + "top_p": 0.95, + "temperature": 0.9 } response = requests.post( chatgpt_url_predict, @@ -151,7 +164,7 @@ def get_multiple_urls(urls): def articles_directory(): text = request.json["texts"] # 获取用户query中的文本 例如"I love you" nums = request.json["nums"] - + text = prompt.format(text) nums = int(nums) input_data = [] diff --git a/articles_directory_predict_word.py b/articles_directory_predict_word.py index d84b871..77ac9f4 100644 --- a/articles_directory_predict_word.py +++ b/articles_directory_predict_word.py @@ -44,11 +44,11 @@ chatgpt_url_predict = "http://{}:12000/predict".format(str(get_host_ip())) chatgpt_url_search = "http://{}:12000/search".format(str(get_host_ip())) prompt = { - "mulu_title_Level_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题\n\nAssistant:", - "mulu_title_Level_2_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题\n\nAssistant:", - "mulu_title_Level_3": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题;三级标题个数适中\n\nAssistant:", - "mulu_title_Level_3_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数适中\n\nAssistant:", - "mulu_title_Level_3_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数越多越好\n\nAssistant:" + "mulu_title_Level_2": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_2_1": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_3": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题;三级标题个数适中<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_3_1": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数适中<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_3_2": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数越多越好<|end|>\n<|role|>assistant<|says|>" } nums_word_dict = { @@ -73,29 +73,36 @@ nums_word_dict = { "10000": { "prompt": prompt["mulu_title_Level_2_1"], "title_1_nums": "6", - "title_2_nums": ["3", "5"], - "title_small_nums": ["6", "3", "5"], + "title_2_nums": ["2", "5"], + "title_small_nums": ["6", "2", "5"], }, "15000": { "prompt": prompt["mulu_title_Level_3_1"], "title_1_nums": "5", - "title_2_nums": ["3", "5"], - "title_small_nums": ["5", "3", "5"], + "title_2_nums": ["2", "3"], + "title_small_nums": ["5", "2", "3"], }, "20000": { - "prompt": prompt["mulu_title_Level_3_2"], + "prompt": prompt["mulu_title_Level_3_1"], + "title_1_nums": "6", + "title_2_nums": ["2", "4"], + "title_small_nums": ["6", "2", "4"], + }, + "30000": { + "prompt": prompt["mulu_title_Level_3_1"], "title_1_nums": "7", "title_2_nums": ["3", "5"], "title_small_nums": ["7", "3", "5"], }, - "30000": { + "50000": { "prompt": prompt["mulu_title_Level_3_2"], "title_1_nums": "8", - "title_2_nums": ["5", "8"], - "title_small_nums": ["8", "5", "8"], + "title_2_nums": ["3", "8"], + "title_small_nums": ["8", "3", "8"], }, } + app = Flask(__name__) app.config["JSON_AS_ASCII"] = False @@ -128,8 +135,19 @@ def dialog_line_parse(url, text): def request_api_chatgpt(prompt): + ''' + content = request.json["content"] # 获取用户query中的文本 例如"I love you" + model = request.json["model"] + top_p = request.json["top_p"] + temperature = request.json["temperature"] + :param prompt: + :return: + ''' data = { - "texts": prompt + "content": prompt, + "model": "gpt-4-turbo", + "top_p": 0.95, + "temperature": 0.9 } response = requests.post( chatgpt_url_predict, diff --git a/articles_directory_predict_word_online.py b/articles_directory_predict_word_online.py index 6056c55..de68306 100644 --- a/articles_directory_predict_word_online.py +++ b/articles_directory_predict_word_online.py @@ -44,25 +44,25 @@ chatgpt_url_predict = "http://{}:12001/predict".format(str(get_host_ip())) chatgpt_url_search = "http://{}:12001/search".format(str(get_host_ip())) prompt = { - "mulu_title_Level_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题\n\nAssistant:", - "mulu_title_Level_2_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题\n\nAssistant:", - "mulu_title_Level_3": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题;三级标题个数适中\n\nAssistant:", - "mulu_title_Level_3_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数适中\n\nAssistant:", - "mulu_title_Level_3_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数越多越好\n\nAssistant:" + "mulu_title_Level_2": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_2_1": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_3": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题;三级标题个数适中<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_3_1": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数适中<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_3_2": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数越多越好<|end|>\n<|role|>assistant<|says|>" } nums_word_dict = { "3000": { - "prompt": prompt["mulu_title_Level_2_1"], + "prompt": prompt["mulu_title_Level_2"], "title_1_nums": "3", - "title_2_nums": ["0", "2"], - "title_small_nums": ["3", "0", "2"] + "title_2_nums": ["2"], + "title_small_nums": ["3", "2"] }, "5000": { "prompt": prompt["mulu_title_Level_2_1"], "title_1_nums": "3", - "title_2_nums": ["2","3"], - "title_small_nums": ["3", "2","3"] + "title_2_nums": ["2","4"], + "title_small_nums": ["3", "2","4"] }, "8000": { "prompt": prompt["mulu_title_Level_2_1"], @@ -73,26 +73,32 @@ nums_word_dict = { "10000": { "prompt": prompt["mulu_title_Level_2_1"], "title_1_nums": "6", - "title_2_nums": ["3", "5"], - "title_small_nums": ["6", "3", "5"], + "title_2_nums": ["2", "5"], + "title_small_nums": ["6", "2", "5"], }, "15000": { "prompt": prompt["mulu_title_Level_3_1"], - "title_1_nums": "6", - "title_2_nums": ["2", "5"], - "title_small_nums": ["6", "2", "5"], + "title_1_nums": "5", + "title_2_nums": ["2", "3"], + "title_small_nums": ["5", "2", "3"], }, "20000": { - "prompt": prompt["mulu_title_Level_3_2"], + "prompt": prompt["mulu_title_Level_3_1"], + "title_1_nums": "6", + "title_2_nums": ["2", "4"], + "title_small_nums": ["6", "2", "4"], + }, + "30000": { + "prompt": prompt["mulu_title_Level_3_1"], "title_1_nums": "7", "title_2_nums": ["3", "5"], "title_small_nums": ["7", "3", "5"], }, - "30000": { + "50000": { "prompt": prompt["mulu_title_Level_3_2"], "title_1_nums": "8", - "title_2_nums": ["5", "8"], - "title_small_nums": ["8", "5", "8"], + "title_2_nums": ["3", "8"], + "title_small_nums": ["8", "3", "8"], }, } @@ -128,8 +134,19 @@ def dialog_line_parse(url, text): def request_api_chatgpt(prompt): + ''' + content = request.json["content"] # 获取用户query中的文本 例如"I love you" + model = request.json["model"] + top_p = request.json["top_p"] + temperature = request.json["temperature"] + :param prompt: + :return: + ''' data = { - "texts": prompt + "content": prompt, + "model": "gpt-4-turbo", + "top_p": 0.95, + "temperature": 0.9 } response = requests.post( chatgpt_url_predict, diff --git a/articles_directory_predict_word_table_formula_kongzhi.py b/articles_directory_predict_word_table_formula_kongzhi.py new file mode 100644 index 0000000..70ebe8a --- /dev/null +++ b/articles_directory_predict_word_table_formula_kongzhi.py @@ -0,0 +1,487 @@ +import os +import json +import re +from flask import Flask, jsonify +from flask import request +import time +import concurrent.futures +import requests +import socket + + +class log: + def __init__(self): + pass + + def log(*args, **kwargs): + format = '%Y/%m/%d-%H:%M:%S' + format_h = '%Y-%m-%d' + value = time.localtime(int(time.time())) + dt = time.strftime(format, value) + dt_log_file = time.strftime(format_h, value) + log_file = 'log_file/access-%s' % dt_log_file + ".log" + if not os.path.exists(log_file): + with open(os.path.join(log_file), 'w', encoding='utf-8') as f: + print(dt, *args, file=f, **kwargs) + else: + with open(os.path.join(log_file), 'a+', encoding='utf-8') as f: + print(dt, *args, file=f, **kwargs) + + +def get_host_ip(): + """ + 查询本机ip地址 + :return: ip + """ + try: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(('8.8.8.8', 80)) + ip = s.getsockname()[0] + finally: + s.close() + + return ip + +chatgpt_url_predict = "http://{}:12000/predict".format(str(get_host_ip())) +chatgpt_url_search = "http://{}:12000/search".format(str(get_host_ip())) + +prompt = { + "mulu_title_Level_2": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_2_1": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_3": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题;三级标题个数适中<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_3_1": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数适中<|end|>\n<|role|>assistant<|says|>", + "mulu_title_Level_3_2": "<|role|>user<|says|>为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数越多越好<|end|>\n<|role|>assistant<|says|>" +} + +prompt_biaogekongzhi = { + "prompt_1":"背景:我是一名博士生,我想写一篇论文。论文题目《{}》,目录是“{}”,我现在需要论文中有{},请根据这些信息告诉我", + "prompt_1_form":"哪些小标题生成的内容需要包含表格", + "prompt_1_formula":"哪些小标题生成的内容需要包含公式", + "prompt_1_picture":"哪些小标题生成的内容需要包含图片", + "prompt_1_code":"哪些小标题生成的内容需要包含代码", + "prompt2":"可以在一个小标题同时出现{}组合", + "prompt_2_form":"需要生成表格的小标题,请用“《小标题》- form”符号标记出来", + "prompt_2_formula":"需要生成公式的小标题,请用“《小标题》- formula”符号标记出来", + "prompt_2_picture":"需要生成图片的小标题,请用“《小标题》- picture”符号标记出来", + "prompt_2_code":"需要生成代码的小标题,请用“《小标题》- code”符号标记出来", + "prompt3_multiple_tags":"如果有组合,请用&连接,例如“《小标题》- {}”,书名号内小标题必须是二级标题或者三级标题,例如“《3.4.1 xxx》- {}”,生成请严格最遵守要求的格式生成", + "prompt3_dan_tags":"书名号内小标题必须是二级标题或者三级标题,例如“《3.4.1 xxx》- {}”,生成请严格最遵守要求的格式生成" +} + +lable_to_enlable = { + "表格": "form", + "公式": "formula", + "图片": "picture", + "代码": "code" +} + +id_to_lable = { + "1": "表格", + "2": "公式", + "3": "图片", + "4": "代码" +} + +nums_word_dict = { + "3000": { + "prompt": prompt["mulu_title_Level_2"], + "title_1_nums": "3", + "title_2_nums": ["2"], + "title_small_nums": ["3", "2"] + }, + "5000": { + "prompt": prompt["mulu_title_Level_2_1"], + "title_1_nums": "3", + "title_2_nums": ["2","4"], + "title_small_nums": ["3", "2","4"] + }, + "8000": { + "prompt": prompt["mulu_title_Level_2_1"], + "title_1_nums": "4", + "title_2_nums": ["2", "4"], + "title_small_nums": ["4", "2", "4"], + }, + "10000": { + "prompt": prompt["mulu_title_Level_2_1"], + "title_1_nums": "6", + "title_2_nums": ["2", "5"], + "title_small_nums": ["6", "2", "5"], + }, + "15000": { + "prompt": prompt["mulu_title_Level_3_1"], + "title_1_nums": "5", + "title_2_nums": ["2", "3"], + "title_small_nums": ["5", "2", "3"], + }, + "20000": { + "prompt": prompt["mulu_title_Level_3_1"], + "title_1_nums": "6", + "title_2_nums": ["2", "4"], + "title_small_nums": ["6", "2", "4"], + }, + "30000": { + "prompt": prompt["mulu_title_Level_3_1"], + "title_1_nums": "7", + "title_2_nums": ["3", "5"], + "title_small_nums": ["7", "3", "5"], + }, + "50000": { + "prompt": prompt["mulu_title_Level_3_2"], + "title_1_nums": "8", + "title_2_nums": ["3", "8"], + "title_small_nums": ["8", "3", "8"], + }, +} + + +app = Flask(__name__) +app.config["JSON_AS_ASCII"] = False + +def dialog_line_parse(url, text): + """ + 将数据输入模型进行分析并输出结果 + :param url: 模型url + :param text: 进入模型的数据 + :return: 模型返回结果 + """ + + response = requests.post( + url, + json=text, + timeout=1000 + ) + if response.status_code == 200: + return response.json() + else: + # logger.error( + # "【{}】 Failed to get a proper response from remote " + # "server. Status Code: {}. Response: {}" + # "".format(url, response.status_code, response.text) + # ) + print("【{}】 Failed to get a proper response from remote " + "server. Status Code: {}. Response: {}" + "".format(url, response.status_code, response.text)) + print(text) + return [] + + +def request_api_chatgpt(prompt): + ''' + content = request.json["content"] # 获取用户query中的文本 例如"I love you" + model = request.json["model"] + top_p = request.json["top_p"] + temperature = request.json["temperature"] + :param prompt: + :return: + ''' + data = { + "content": prompt, + "model": "gpt-4-turbo", + "top_p": 0.95, + "temperature": 0.9 + } + response = requests.post( + chatgpt_url_predict, + json=data, + timeout=100000 + ) + if response.status_code == 200: + return response.json() + else: + # logger.error( + # "【{}】 Failed to get a proper response from remote " + # "server. Status Code: {}. Response: {}" + # "".format(url, response.status_code, response.text) + # ) + print("Failed to get a proper response from remote " + "server. Status Code: {}. Response: {}" + "".format(response.status_code, response.text)) + return {} + + +def uuid_search(uuid): + data = { + "id": uuid + } + response = requests.post( + chatgpt_url_search, + json=data, + timeout=100000 + ) + if response.status_code == 200: + return response.json() + else: + # logger.error( + # "【{}】 Failed to get a proper response from remote " + # "server. Status Code: {}. Response: {}" + # "".format(url, response.status_code, response.text) + # ) + print("Failed to get a proper response from remote " + "server. Status Code: {}. Response: {}" + "".format(response.status_code, response.text)) + return {} + + +def uuid_search_mp(results): + results_list = [""] * len(results) + while True: + tiaochu_bool = True + + for i in results_list: + if i == "": + tiaochu_bool = False + break + + if tiaochu_bool == True: + break + + for i in range(len(results)): + uuid = results[i]["texts"]["id"] + + result = uuid_search(uuid) + if result["code"] == 200: + results_list[i] = result["text"] + time.sleep(3) + return results_list + + +def get_multiple_urls(urls): + input_values = [] + + for i in urls: + input_values.append(i[1]) + with concurrent.futures.ThreadPoolExecutor() as executor: + # 使用map方法并发地调用worker_function + results = list(executor.map(request_api_chatgpt, input_values)) + + with concurrent.futures.ThreadPoolExecutor() as executor: + # 使用map方法并发地调用worker_function + results = list(executor.map(uuid_search_mp, [results])) + + return_list = [] + for i in results[0]: + return_list.append(i) + return return_list + +pattern_mulu = "目录是“(.*)”,我现在需要论文中" +pantten_biaoti_0 = '^[一二三四五六七八九][、.]\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' +pantten_biaoti_1 = r"^\d+\.\d+\s{1,}?.*" +pantten_biaoti_shibie = r"《(.*)》" +pantten_biaoti_xuhao = r"(\d+(?:\.\d+)+)" + + +split_str_list = ["\\n", "\n"] +def ulit_mulu(mulu): + bioati_list = [] + sp = "\n" + for sp in split_str_list: + if sp in mulu: + break + mulu_list = mulu.split(sp) + for i in range(len(mulu_list)): + title_small = mulu_list[i].strip() + bool_list_0 = re.findall(pantten_biaoti_0, title_small) + bool_list_1 = re.findall(pantten_biaoti_1, title_small) + if bool_list_0 != []: + if i < len(mulu_list) - 1: + title_small_next = mulu_list[i + 1].strip() + bool_list_0_next = re.findall(pantten_biaoti_0, title_small_next) + if bool_list_0_next != []: + bioati_list.append(title_small) + else: + continue + else: + bioati_list.append(title_small) + elif bool_list_1 != []: + if i + 1 < len(mulu_list): + title_small_next = mulu_list[i + 1].strip() + bool_list_next_0 = re.findall(pantten_biaoti_0, title_small_next) + bool_list_next_1 = re.findall(pantten_biaoti_1, title_small_next) + if bool_list_next_0 != [] or bool_list_next_1 != []: + bioati_list.append(title_small) + else: + continue + else: + bioati_list.append(title_small) + else: + bioati_list.append(title_small) + return bioati_list + +def panduankongzhi(content): + kongzhi_list = [] + if "formula" in content: + content = str(content).replace("formula", "") + kongzhi_list.append(2) + if "form" in content: + content = str(content).replace("form", "") + kongzhi_list.append(1) + if "picture" in content: + content = str(content).replace("picture", "") + kongzhi_list.append(3) + if "code" in content: + content = str(content).replace("code", "") + kongzhi_list.append(4) + return kongzhi_list + + +def mulu_kongzhi(data): + data_new = [] + for dan_data_index in range(len(data)): + is_continue = False + # mulu = re.findall(pattern_mulu, data[dan_data_index]['input'], re.DOTALL)[0] + mulu = data[dan_data_index][0] + mulu_list = ulit_mulu(mulu) + + sp = "\n" + for sp in split_str_list: + if sp in mulu: + break + mulu_list_quan = mulu.split(sp) + mulu_list_quan = [i for i in mulu_list_quan if str(i).strip(" ") != ""] + + output_list = str(data[dan_data_index][1]).split("\n") + output_list_new = [i for i in output_list if str(i).strip(" ") != ""] + + not_mulu_xiaobiaoti_list = [] + mulu_xiaobiaoti_list = [] + mulu_xiaobiaoti_index = [] + for i in output_list_new: + xiaobiaoti_re = re.findall(pantten_biaoti_shibie, i, re.DOTALL) + if xiaobiaoti_re != []: + xiaobiaoti = xiaobiaoti_re[0] + # mulu_xiaobiaoti_list.append({ + # xiaobiaoti: i + # }) + # mulu_xiaobiaoti_dict[xiaobiaoti] = i + mulu_xiaobiaoti_index.append(xiaobiaoti) + mulu_xiaobiaoti_list.append(i) + + if xiaobiaoti not in mulu_list: + not_mulu_xiaobiaoti_list.append(xiaobiaoti) + + if not_mulu_xiaobiaoti_list != []: + for i in not_mulu_xiaobiaoti_list: + try: + xiaobiaoti_index = mulu_list_quan.index(i) + except: + continue + try: + if mulu_list_quan[xiaobiaoti_index + 1] in mulu_xiaobiaoti_index: + index_ = mulu_xiaobiaoti_index.index(i) + mulu_xiaobiaoti_list.pop(index_) + mulu_xiaobiaoti_index.pop(index_) + else: + index_ = mulu_xiaobiaoti_index.index(i) + # mulu_xiaobiaoti_dict[mulu_list_quan[xiaobiaoti_index+1]] = mulu_xiaobiaoti_dict[i].replace(mulu_list_quan[xiaobiaoti_index], mulu_list_quan[xiaobiaoti_index+1]) + mulu_xiaobiaoti_index.pop(index_) + content = mulu_xiaobiaoti_list.pop(index_) + + mulu_xiaobiaoti_index.insert(index_, mulu_list_quan[xiaobiaoti_index + 1]) + mulu_xiaobiaoti_list.insert(index_, content.replace(mulu_list_quan[xiaobiaoti_index], + mulu_list_quan[xiaobiaoti_index + 1])) + except: + continue + mulu_xiaobiaoti_tuple = [] + mulu_xiaobiaoti_dict = {} + for i, j in zip(mulu_xiaobiaoti_index, mulu_xiaobiaoti_list): + mulu_xiaobiaoti_tuple.append((i, j)) + for i in range(len(mulu_xiaobiaoti_tuple) - 1, -1, -1): + xiaobiaoti_index_lable = mulu_xiaobiaoti_tuple[i][0] + if re.findall(pantten_biaoti_xuhao, xiaobiaoti_index_lable) == []: + mulu_xiaobiaoti_tuple.pop(i) + + for i, j in mulu_xiaobiaoti_tuple: + mulu_xiaobiaoti_dict[i] = j + + for i in mulu_xiaobiaoti_dict: + # print(mulu_list_quan) + # print(i) + try: + index_title = mulu_list_quan.index(i) + mulu_list_quan[index_title] = [i, mulu_xiaobiaoti_dict[i], panduankongzhi(mulu_xiaobiaoti_dict[i])] + except: + continue + data_new.append(mulu_list_quan) + return data_new + + +def biaogegongshi_kongzhi_prompt(title, mulu_list, table_and_formulas_list): + prompt_list = [] + for mulu in mulu_list: + lable = [] + lable_en = [] + prompt_1_kongzhi = [] + prompt_2_kongzhi = [] + for i in id_to_lable: + if i in table_and_formulas_list: + lable.append(id_to_lable[i]) + lable_en.append(lable_to_enlable[id_to_lable[i]]) + if i == "1": + prompt_1_kongzhi.append(prompt_biaogekongzhi["prompt_1_form"]) + prompt_2_kongzhi.append(prompt_biaogekongzhi["prompt_2_form"]) + if i == "2": + prompt_1_kongzhi.append(prompt_biaogekongzhi["prompt_1_formula"]) + prompt_2_kongzhi.append(prompt_biaogekongzhi["prompt_2_formula"]) + if i == "3": + prompt_1_kongzhi.append(prompt_biaogekongzhi["prompt_1_picture"]) + prompt_2_kongzhi.append(prompt_biaogekongzhi["prompt_1_picture"]) + if i == "4": + prompt_1_kongzhi.append(prompt_biaogekongzhi["prompt_1_code"]) + prompt_2_kongzhi.append(prompt_biaogekongzhi["prompt_1_code"]) + + if len(table_and_formulas_list) == 0: + continue + elif len(table_and_formulas_list) == 1: + prompt = ",".join([ + prompt_biaogekongzhi["prompt_1"].format(title, mulu, "、".join(lable)), + ",".join(prompt_1_kongzhi), + ",".join(prompt_2_kongzhi), + prompt_biaogekongzhi["prompt3_dan_tags"].format("&".join(lable_en)), + ]) + else: + prompt = ",".join([ + prompt_biaogekongzhi["prompt_1"].format(title, mulu, "、".join(lable)), + ",".join(prompt_1_kongzhi), + prompt_biaogekongzhi["prompt2"].format("、".join(lable)), + ",".join(prompt_2_kongzhi), + prompt_biaogekongzhi["prompt3_multiple_tags"].format("&".join(lable_en), "&".join(lable_en)), + ]) + prompt_list.append(prompt) + return prompt_list + + +@app.route("/articles_directory", methods=["POST"]) +def articles_directory(): + title = request.json["title"] # 获取用户query中的文本 例如"I love you" + nums_catalogue = request.json["nums_catalogue"] + nums_word = request.json["nums_word"] # 获取用户query中的文本 例如"I love you" + table_and_formulas = request.json["table_and_formulas"] + table_and_formulas_list = str(table_and_formulas).split(",") + nums_catalogue = int(nums_catalogue) + keyword = tuple([title] + nums_word_dict[nums_word]["title_small_nums"]) + prompt = nums_word_dict[nums_word]["prompt"]%keyword + print(prompt) + input_data = [] + for i in range(nums_catalogue): + input_data.append([i, prompt]) + + mulu_list = get_multiple_urls(input_data) + print("mulu_list", mulu_list) + input_data = [] + prompt_kongzhi_list = biaogegongshi_kongzhi_prompt(title, mulu_list, table_and_formulas_list) + for i in range(len(prompt_kongzhi_list)): + input_data.append([i, prompt_kongzhi_list[i]]) + + biaogegongshi_kongzhi_res = get_multiple_urls(input_data) + print("biaogegongshi_kongzhi_res", biaogegongshi_kongzhi_res) + data = [[i,j] for i,j in zip(mulu_list, biaogegongshi_kongzhi_res)] + results = mulu_kongzhi(data) + data = { + "code": 200, + "data": results + } + # log.log('text:{},'.format(prompt)) + return jsonify(data) # 返回结果 + +if __name__ == "__main__": + app.run(debug=False, host='0.0.0.0', port=18003)