From 197b758b13829b054d1401ed7a79e8906726b9ee Mon Sep 17 00:00:00 2001 From: "majiahui@haimaqingfan.com" Date: Thu, 27 Jun 2024 11:55:46 +0800 Subject: [PATCH] =?UTF-8?q?=E7=94=9F=E6=88=90=E7=9B=AE=E5=BD=95=E9=A1=B9?= =?UTF-8?q?=E7=9B=AE=E5=A2=9E=E5=8A=A0=E5=AD=97=E6=95=B0=E6=8E=A7=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/deployment.xml | 87 ++++++++++- .idea/generate_articles_directory.iml | 2 +- articles_directory_predict.py | 2 +- articles_directory_predict_word.py | 240 ++++++++++++++++++++++++++++++ articles_directory_predict_word_online.py | 240 ++++++++++++++++++++++++++++++ gunicorn_config_word.py | 21 +++ gunicorn_config_word_online.py | 21 +++ run_api_gunicorn_word.sh | 1 + run_api_gunicorn_word_online.sh | 1 + 9 files changed, 612 insertions(+), 3 deletions(-) create mode 100644 articles_directory_predict_word.py create mode 100644 articles_directory_predict_word_online.py create mode 100644 gunicorn_config_word.py create mode 100644 gunicorn_config_word_online.py create mode 100644 run_api_gunicorn_word.sh create mode 100644 run_api_gunicorn_word_online.sh diff --git a/.idea/deployment.xml b/.idea/deployment.xml index 53a02f7..94f98bd 100644 --- a/.idea/deployment.xml +++ b/.idea/deployment.xml @@ -1,6 +1,6 @@ - + @@ -86,6 +86,20 @@ + + + + + + + + + + + + + + @@ -142,6 +156,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -156,6 +191,13 @@ + + + + + + + @@ -793,6 +835,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -801,5 +885,6 @@ + \ No newline at end of file diff --git a/.idea/generate_articles_directory.iml b/.idea/generate_articles_directory.iml index 37901ac..12ca792 100644 --- a/.idea/generate_articles_directory.iml +++ b/.idea/generate_articles_directory.iml @@ -5,7 +5,7 @@ - + diff --git a/articles_directory_predict.py b/articles_directory_predict.py index c3d4380..050f870 100644 --- a/articles_directory_predict.py +++ b/articles_directory_predict.py @@ -164,4 +164,4 @@ def articles_directory(): return jsonify(results) # 返回结果 if __name__ == "__main__": - app.run(debug=False, host='0.0.0.0', port=18000) \ No newline at end of file + app.run(debug=False, host='0.0.0.0', port=18000) diff --git a/articles_directory_predict_word.py b/articles_directory_predict_word.py new file mode 100644 index 0000000..86d6a0a --- /dev/null +++ b/articles_directory_predict_word.py @@ -0,0 +1,240 @@ +from flask import Flask, jsonify +from flask import request +import time +import concurrent.futures +import requests +import socket +import os + + +class log: + def __init__(self): + pass + + def log(*args, **kwargs): + format = '%Y/%m/%d-%H:%M:%S' + format_h = '%Y-%m-%d' + value = time.localtime(int(time.time())) + dt = time.strftime(format, value) + dt_log_file = time.strftime(format_h, value) + log_file = 'log_file/access-%s' % dt_log_file + ".log" + if not os.path.exists(log_file): + with open(os.path.join(log_file), 'w', encoding='utf-8') as f: + print(dt, *args, file=f, **kwargs) + else: + with open(os.path.join(log_file), 'a+', encoding='utf-8') as f: + print(dt, *args, file=f, **kwargs) + + +def get_host_ip(): + """ + 查询本机ip地址 + :return: ip + """ + try: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(('8.8.8.8', 80)) + ip = s.getsockname()[0] + finally: + s.close() + + return ip + +chatgpt_url_predict = "http://{}:12000/predict".format(str(get_host_ip())) +chatgpt_url_search = "http://{}:12000/search".format(str(get_host_ip())) + +prompt = { + "mulu_title_Level_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题\n\nAssistant:", + "mulu_title_Level_2_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题\n\nAssistant:", + "mulu_title_Level_3": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题;三级标题个数适中\n\nAssistant:", + "mulu_title_Level_3_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数适中\n\nAssistant:", + "mulu_title_Level_3_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数越多越好\n\nAssistant:" +} + +nums_word_dict = { + "3000": { + "prompt": prompt["mulu_title_Level_2"], + "title_1_nums": "3", + "title_2_nums": ["2"], + "title_small_nums": ["3", "2"] + }, + "5000": { + "prompt": prompt["mulu_title_Level_2_1"], + "title_1_nums": "3", + "title_2_nums": ["2","3"], + "title_small_nums": ["3", "2","3"] + }, + "8000": { + "prompt": prompt["mulu_title_Level_2_1"], + "title_1_nums": "4", + "title_2_nums": ["2", "4"], + "title_small_nums": ["4", "2", "4"], + }, + "10000": { + "prompt": prompt["mulu_title_Level_2_1"], + "title_1_nums": "6", + "title_2_nums": ["3", "5"], + "title_small_nums": ["6", "3", "5"], + }, + "15000": { + "prompt": prompt["mulu_title_Level_3_1"], + "title_1_nums": "5", + "title_2_nums": ["3", "5"], + "title_small_nums": ["5", "3", "5"], + }, + "20000": { + "prompt": prompt["mulu_title_Level_3_2"], + "title_1_nums": "7", + "title_2_nums": ["3", "5"], + "title_small_nums": ["7", "3", "5"], + }, + "30000": { + "prompt": prompt["mulu_title_Level_3_2"], + "title_1_nums": "8", + "title_2_nums": ["5", "8"], + "title_small_nums": ["8", "5", "8"], + }, +} + +app = Flask(__name__) +app.config["JSON_AS_ASCII"] = False + +def dialog_line_parse(url, text): + """ + 将数据输入模型进行分析并输出结果 + :param url: 模型url + :param text: 进入模型的数据 + :return: 模型返回结果 + """ + + response = requests.post( + url, + json=text, + timeout=1000 + ) + if response.status_code == 200: + return response.json() + else: + # logger.error( + # "【{}】 Failed to get a proper response from remote " + # "server. Status Code: {}. Response: {}" + # "".format(url, response.status_code, response.text) + # ) + print("【{}】 Failed to get a proper response from remote " + "server. Status Code: {}. Response: {}" + "".format(url, response.status_code, response.text)) + print(text) + return [] + + +def request_api_chatgpt(prompt): + data = { + "texts": prompt + } + response = requests.post( + chatgpt_url_predict, + json=data, + timeout=100000 + ) + if response.status_code == 200: + return response.json() + else: + # logger.error( + # "【{}】 Failed to get a proper response from remote " + # "server. Status Code: {}. Response: {}" + # "".format(url, response.status_code, response.text) + # ) + print("Failed to get a proper response from remote " + "server. Status Code: {}. Response: {}" + "".format(response.status_code, response.text)) + return {} + + +def uuid_search(uuid): + data = { + "id": uuid + } + response = requests.post( + chatgpt_url_search, + json=data, + timeout=100000 + ) + if response.status_code == 200: + return response.json() + else: + # logger.error( + # "【{}】 Failed to get a proper response from remote " + # "server. Status Code: {}. Response: {}" + # "".format(url, response.status_code, response.text) + # ) + print("Failed to get a proper response from remote " + "server. Status Code: {}. Response: {}" + "".format(response.status_code, response.text)) + return {} + + +def uuid_search_mp(results): + results_list = [""] * len(results) + while True: + tiaochu_bool = True + + for i in results_list: + if i == "": + tiaochu_bool = False + break + + if tiaochu_bool == True: + break + + for i in range(len(results)): + uuid = results[i]["texts"]["id"] + + result = uuid_search(uuid) + if result["code"] == 200: + results_list[i] = result["text"] + time.sleep(3) + return results_list + + +def get_multiple_urls(urls): + input_values = [] + + for i in urls: + input_values.append(i[1]) + with concurrent.futures.ThreadPoolExecutor() as executor: + # 使用map方法并发地调用worker_function + results = list(executor.map(request_api_chatgpt, input_values)) + + with concurrent.futures.ThreadPoolExecutor() as executor: + # 使用map方法并发地调用worker_function + results = list(executor.map(uuid_search_mp, [results])) + + return_list = [] + for i in results[0]: + data = { + "code": 200, + "data": i + } + return_list.append(data) + return return_list + + +@app.route("/articles_directory", methods=["POST"]) +def articles_directory(): + title = request.json["title"] # 获取用户query中的文本 例如"I love you" + nums_catalogue = request.json["nums_catalogue"] + nums_word = request.json["nums_word"] # 获取用户query中的文本 例如"I love you" + nums_catalogue = int(nums_catalogue) + keyword = tuple([title] + nums_word_dict[nums_word]["title_small_nums"]) + prompt = nums_word_dict[nums_word]["prompt"]%keyword + print(prompt) + input_data = [] + for i in range(nums_catalogue): + input_data.append([i, prompt]) + + results = get_multiple_urls(input_data) + log.log('text:{},'.format(prompt)) + return jsonify(results) # 返回结果 + +if __name__ == "__main__": + app.run(debug=False, host='0.0.0.0', port=18001) diff --git a/articles_directory_predict_word_online.py b/articles_directory_predict_word_online.py new file mode 100644 index 0000000..87e9bca --- /dev/null +++ b/articles_directory_predict_word_online.py @@ -0,0 +1,240 @@ +from flask import Flask, jsonify +from flask import request +import time +import concurrent.futures +import requests +import socket +import os + + +class log: + def __init__(self): + pass + + def log(*args, **kwargs): + format = '%Y/%m/%d-%H:%M:%S' + format_h = '%Y-%m-%d' + value = time.localtime(int(time.time())) + dt = time.strftime(format, value) + dt_log_file = time.strftime(format_h, value) + log_file = 'log_file/access-%s' % dt_log_file + ".log" + if not os.path.exists(log_file): + with open(os.path.join(log_file), 'w', encoding='utf-8') as f: + print(dt, *args, file=f, **kwargs) + else: + with open(os.path.join(log_file), 'a+', encoding='utf-8') as f: + print(dt, *args, file=f, **kwargs) + + +def get_host_ip(): + """ + 查询本机ip地址 + :return: ip + """ + try: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(('8.8.8.8', 80)) + ip = s.getsockname()[0] + finally: + s.close() + + return ip + +chatgpt_url_predict = "http://{}:12001/predict".format(str(get_host_ip())) +chatgpt_url_search = "http://{}:12001/search".format(str(get_host_ip())) + +prompt = { + "mulu_title_Level_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题\n\nAssistant:", + "mulu_title_Level_2_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题\n\nAssistant:", + "mulu_title_Level_3": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题;三级标题个数适中\n\nAssistant:", + "mulu_title_Level_3_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数适中\n\nAssistant:", + "mulu_title_Level_3_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数越多越好\n\nAssistant:" +} + +nums_word_dict = { + "3000": { + "prompt": prompt["mulu_title_Level_2"], + "title_1_nums": "3", + "title_2_nums": ["2"], + "title_small_nums": ["3", "2"] + }, + "5000": { + "prompt": prompt["mulu_title_Level_2_1"], + "title_1_nums": "3", + "title_2_nums": ["2","3"], + "title_small_nums": ["3", "2","3"] + }, + "8000": { + "prompt": prompt["mulu_title_Level_2_1"], + "title_1_nums": "4", + "title_2_nums": ["2", "4"], + "title_small_nums": ["4", "2", "4"], + }, + "10000": { + "prompt": prompt["mulu_title_Level_2_1"], + "title_1_nums": "6", + "title_2_nums": ["3", "5"], + "title_small_nums": ["6", "3", "5"], + }, + "15000": { + "prompt": prompt["mulu_title_Level_3_1"], + "title_1_nums": "5", + "title_2_nums": ["3", "5"], + "title_small_nums": ["5", "3", "5"], + }, + "20000": { + "prompt": prompt["mulu_title_Level_3_2"], + "title_1_nums": "7", + "title_2_nums": ["3", "5"], + "title_small_nums": ["7", "3", "5"], + }, + "30000": { + "prompt": prompt["mulu_title_Level_3_2"], + "title_1_nums": "8", + "title_2_nums": ["5", "8"], + "title_small_nums": ["8", "5", "8"], + }, +} + +app = Flask(__name__) +app.config["JSON_AS_ASCII"] = False + +def dialog_line_parse(url, text): + """ + 将数据输入模型进行分析并输出结果 + :param url: 模型url + :param text: 进入模型的数据 + :return: 模型返回结果 + """ + + response = requests.post( + url, + json=text, + timeout=1000 + ) + if response.status_code == 200: + return response.json() + else: + # logger.error( + # "【{}】 Failed to get a proper response from remote " + # "server. Status Code: {}. Response: {}" + # "".format(url, response.status_code, response.text) + # ) + print("【{}】 Failed to get a proper response from remote " + "server. Status Code: {}. Response: {}" + "".format(url, response.status_code, response.text)) + print(text) + return [] + + +def request_api_chatgpt(prompt): + data = { + "texts": prompt + } + response = requests.post( + chatgpt_url_predict, + json=data, + timeout=100000 + ) + if response.status_code == 200: + return response.json() + else: + # logger.error( + # "【{}】 Failed to get a proper response from remote " + # "server. Status Code: {}. Response: {}" + # "".format(url, response.status_code, response.text) + # ) + print("Failed to get a proper response from remote " + "server. Status Code: {}. Response: {}" + "".format(response.status_code, response.text)) + return {} + + +def uuid_search(uuid): + data = { + "id": uuid + } + response = requests.post( + chatgpt_url_search, + json=data, + timeout=100000 + ) + if response.status_code == 200: + return response.json() + else: + # logger.error( + # "【{}】 Failed to get a proper response from remote " + # "server. Status Code: {}. Response: {}" + # "".format(url, response.status_code, response.text) + # ) + print("Failed to get a proper response from remote " + "server. Status Code: {}. Response: {}" + "".format(response.status_code, response.text)) + return {} + + +def uuid_search_mp(results): + results_list = [""] * len(results) + while True: + tiaochu_bool = True + + for i in results_list: + if i == "": + tiaochu_bool = False + break + + if tiaochu_bool == True: + break + + for i in range(len(results)): + uuid = results[i]["texts"]["id"] + + result = uuid_search(uuid) + if result["code"] == 200: + results_list[i] = result["text"] + time.sleep(3) + return results_list + + +def get_multiple_urls(urls): + input_values = [] + + for i in urls: + input_values.append(i[1]) + with concurrent.futures.ThreadPoolExecutor() as executor: + # 使用map方法并发地调用worker_function + results = list(executor.map(request_api_chatgpt, input_values)) + + with concurrent.futures.ThreadPoolExecutor() as executor: + # 使用map方法并发地调用worker_function + results = list(executor.map(uuid_search_mp, [results])) + + return_list = [] + for i in results[0]: + data = { + "code": 200, + "data": i + } + return_list.append(data) + return return_list + + +@app.route("/articles_directory", methods=["POST"]) +def articles_directory(): + title = request.json["title"] # 获取用户query中的文本 例如"I love you" + nums_catalogue = request.json["nums_catalogue"] + nums_word = request.json["nums_word"] # 获取用户query中的文本 例如"I love you" + nums_catalogue = int(nums_catalogue) + keyword = tuple([title] + nums_word_dict[nums_word]["title_small_nums"]) + prompt = nums_word_dict[nums_word]["prompt"]%keyword + print(prompt) + input_data = [] + for i in range(nums_catalogue): + input_data.append([i, prompt]) + + results = get_multiple_urls(input_data) + log.log('text:{},'.format(prompt)) + return jsonify(results) # 返回结果 + +if __name__ == "__main__": + app.run(debug=False, host='0.0.0.0', port=18002) diff --git a/gunicorn_config_word.py b/gunicorn_config_word.py new file mode 100644 index 0000000..b19db78 --- /dev/null +++ b/gunicorn_config_word.py @@ -0,0 +1,21 @@ +# 并行工作线程数 +workers = 2 +# 监听内网端口5000【按需要更改】 +bind = '0.0.0.0:18001' + +loglevel = 'debug' + +worker_class = "gevent" +# 设置守护进程【关闭连接时,程序仍在运行】 +daemon = True +# 设置超时时间120s,默认为30s。按自己的需求进行设置 +timeout = 120 +# 设置访问日志和错误信息日志路径 +accesslog = './logs/acess_word.log' +errorlog = './logs/error_word.log' +# access_log_format = '%(h) - %(t)s - %(u)s - %(s)s %(H)s' +# errorlog = '-' # 记录到标准输出 + + +# 设置最大并发量 +worker_connections = 20000 diff --git a/gunicorn_config_word_online.py b/gunicorn_config_word_online.py new file mode 100644 index 0000000..d69be7b --- /dev/null +++ b/gunicorn_config_word_online.py @@ -0,0 +1,21 @@ +# 并行工作线程数 +workers = 2 +# 监听内网端口5000【按需要更改】 +bind = '0.0.0.0:18002' + +loglevel = 'debug' + +worker_class = "gevent" +# 设置守护进程【关闭连接时,程序仍在运行】 +daemon = True +# 设置超时时间120s,默认为30s。按自己的需求进行设置 +timeout = 120 +# 设置访问日志和错误信息日志路径 +accesslog = './logs/acess_word_online.log' +errorlog = './logs/error_word_online.log' +# access_log_format = '%(h) - %(t)s - %(u)s - %(s)s %(H)s' +# errorlog = '-' # 记录到标准输出 + + +# 设置最大并发量 +worker_connections = 20000 diff --git a/run_api_gunicorn_word.sh b/run_api_gunicorn_word.sh new file mode 100644 index 0000000..b35abd7 --- /dev/null +++ b/run_api_gunicorn_word.sh @@ -0,0 +1 @@ +gunicorn articles_directory_predict_word:app -c gunicorn_config_word.py diff --git a/run_api_gunicorn_word_online.sh b/run_api_gunicorn_word_online.sh new file mode 100644 index 0000000..9cdbf74 --- /dev/null +++ b/run_api_gunicorn_word_online.sh @@ -0,0 +1 @@ +gunicorn articles_directory_predict_word_online:app -c gunicorn_config_word_online.py \ No newline at end of file