Browse Source

生成目录项目增加字数控制

master
majiahui@haimaqingfan.com 11 months ago
parent
commit
197b758b13
  1. 87
      .idea/deployment.xml
  2. 2
      .idea/generate_articles_directory.iml
  3. 2
      articles_directory_predict.py
  4. 240
      articles_directory_predict_word.py
  5. 240
      articles_directory_predict_word_online.py
  6. 21
      gunicorn_config_word.py
  7. 21
      gunicorn_config_word_online.py
  8. 1
      run_api_gunicorn_word.sh
  9. 1
      run_api_gunicorn_word_online.sh

87
.idea/deployment.xml

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
<component name="PublishConfigData" autoUpload="Always" serverName="majiahui@192.168.31.149:22 (3)" remoteFilesAllowedToDisappearOnAutoupload="false">
<serverData>
<paths name="majiahui@104.244.90.248:28385">
<serverdata>
@ -86,6 +86,20 @@
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.145:22 (13)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.145:22 (14)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.145:22 (2)">
<serverdata>
<mappings>
@ -142,6 +156,27 @@
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.149:22">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.149:22 (2)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.149:22 (3)">
<serverdata>
<mappings>
<mapping deploy="/home/majiahui/project/generate_articles_directory" local="$PROJECT_DIR$" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.74:22">
<serverdata>
<mappings>
@ -156,6 +191,13 @@
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.74:22 (100)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.74:22 (11)">
<serverdata>
<mappings>
@ -793,6 +835,48 @@
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.74:22 (94)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.74:22 (95)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.74:22 (96)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.74:22 (97)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.74:22 (98)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.74:22 (99)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="majiahui@192.168.31.74:22 password">
<serverdata>
<mappings>
@ -801,5 +885,6 @@
</serverdata>
</paths>
</serverData>
<option name="myAutoUpload" value="ALWAYS" />
</component>
</project>

2
.idea/generate_articles_directory.iml

@ -5,7 +5,7 @@
</component>
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="jdk" jdkName="Python 3.8 (ldm)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TemplatesService">

2
articles_directory_predict.py

@ -164,4 +164,4 @@ def articles_directory():
return jsonify(results) # 返回结果
if __name__ == "__main__":
app.run(debug=False, host='0.0.0.0', port=18000)
app.run(debug=False, host='0.0.0.0', port=18000)

240
articles_directory_predict_word.py

@ -0,0 +1,240 @@
from flask import Flask, jsonify
from flask import request
import time
import concurrent.futures
import requests
import socket
import os
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def get_host_ip():
"""
查询本机ip地址
:return: ip
"""
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(('8.8.8.8', 80))
ip = s.getsockname()[0]
finally:
s.close()
return ip
chatgpt_url_predict = "http://{}:12000/predict".format(str(get_host_ip()))
chatgpt_url_search = "http://{}:12000/search".format(str(get_host_ip()))
prompt = {
"mulu_title_Level_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题\n\nAssistant:",
"mulu_title_Level_2_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题\n\nAssistant:",
"mulu_title_Level_3": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题;三级标题个数适中\n\nAssistant:",
"mulu_title_Level_3_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数适中\n\nAssistant:",
"mulu_title_Level_3_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数越多越好\n\nAssistant:"
}
nums_word_dict = {
"3000": {
"prompt": prompt["mulu_title_Level_2"],
"title_1_nums": "3",
"title_2_nums": ["2"],
"title_small_nums": ["3", "2"]
},
"5000": {
"prompt": prompt["mulu_title_Level_2_1"],
"title_1_nums": "3",
"title_2_nums": ["2","3"],
"title_small_nums": ["3", "2","3"]
},
"8000": {
"prompt": prompt["mulu_title_Level_2_1"],
"title_1_nums": "4",
"title_2_nums": ["2", "4"],
"title_small_nums": ["4", "2", "4"],
},
"10000": {
"prompt": prompt["mulu_title_Level_2_1"],
"title_1_nums": "6",
"title_2_nums": ["3", "5"],
"title_small_nums": ["6", "3", "5"],
},
"15000": {
"prompt": prompt["mulu_title_Level_3_1"],
"title_1_nums": "5",
"title_2_nums": ["3", "5"],
"title_small_nums": ["5", "3", "5"],
},
"20000": {
"prompt": prompt["mulu_title_Level_3_2"],
"title_1_nums": "7",
"title_2_nums": ["3", "5"],
"title_small_nums": ["7", "3", "5"],
},
"30000": {
"prompt": prompt["mulu_title_Level_3_2"],
"title_1_nums": "8",
"title_2_nums": ["5", "8"],
"title_small_nums": ["8", "5", "8"],
},
}
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
def dialog_line_parse(url, text):
"""
将数据输入模型进行分析并输出结果
:param url: 模型url
:param text: 进入模型的数据
:return: 模型返回结果
"""
response = requests.post(
url,
json=text,
timeout=1000
)
if response.status_code == 200:
return response.json()
else:
# logger.error(
# "【{}】 Failed to get a proper response from remote "
# "server. Status Code: {}. Response: {}"
# "".format(url, response.status_code, response.text)
# )
print("{}】 Failed to get a proper response from remote "
"server. Status Code: {}. Response: {}"
"".format(url, response.status_code, response.text))
print(text)
return []
def request_api_chatgpt(prompt):
data = {
"texts": prompt
}
response = requests.post(
chatgpt_url_predict,
json=data,
timeout=100000
)
if response.status_code == 200:
return response.json()
else:
# logger.error(
# "【{}】 Failed to get a proper response from remote "
# "server. Status Code: {}. Response: {}"
# "".format(url, response.status_code, response.text)
# )
print("Failed to get a proper response from remote "
"server. Status Code: {}. Response: {}"
"".format(response.status_code, response.text))
return {}
def uuid_search(uuid):
data = {
"id": uuid
}
response = requests.post(
chatgpt_url_search,
json=data,
timeout=100000
)
if response.status_code == 200:
return response.json()
else:
# logger.error(
# "【{}】 Failed to get a proper response from remote "
# "server. Status Code: {}. Response: {}"
# "".format(url, response.status_code, response.text)
# )
print("Failed to get a proper response from remote "
"server. Status Code: {}. Response: {}"
"".format(response.status_code, response.text))
return {}
def uuid_search_mp(results):
results_list = [""] * len(results)
while True:
tiaochu_bool = True
for i in results_list:
if i == "":
tiaochu_bool = False
break
if tiaochu_bool == True:
break
for i in range(len(results)):
uuid = results[i]["texts"]["id"]
result = uuid_search(uuid)
if result["code"] == 200:
results_list[i] = result["text"]
time.sleep(3)
return results_list
def get_multiple_urls(urls):
input_values = []
for i in urls:
input_values.append(i[1])
with concurrent.futures.ThreadPoolExecutor() as executor:
# 使用map方法并发地调用worker_function
results = list(executor.map(request_api_chatgpt, input_values))
with concurrent.futures.ThreadPoolExecutor() as executor:
# 使用map方法并发地调用worker_function
results = list(executor.map(uuid_search_mp, [results]))
return_list = []
for i in results[0]:
data = {
"code": 200,
"data": i
}
return_list.append(data)
return return_list
@app.route("/articles_directory", methods=["POST"])
def articles_directory():
title = request.json["title"] # 获取用户query中的文本 例如"I love you"
nums_catalogue = request.json["nums_catalogue"]
nums_word = request.json["nums_word"] # 获取用户query中的文本 例如"I love you"
nums_catalogue = int(nums_catalogue)
keyword = tuple([title] + nums_word_dict[nums_word]["title_small_nums"])
prompt = nums_word_dict[nums_word]["prompt"]%keyword
print(prompt)
input_data = []
for i in range(nums_catalogue):
input_data.append([i, prompt])
results = get_multiple_urls(input_data)
log.log('text:{},'.format(prompt))
return jsonify(results) # 返回结果
if __name__ == "__main__":
app.run(debug=False, host='0.0.0.0', port=18001)

240
articles_directory_predict_word_online.py

@ -0,0 +1,240 @@
from flask import Flask, jsonify
from flask import request
import time
import concurrent.futures
import requests
import socket
import os
class log:
def __init__(self):
pass
def log(*args, **kwargs):
format = '%Y/%m/%d-%H:%M:%S'
format_h = '%Y-%m-%d'
value = time.localtime(int(time.time()))
dt = time.strftime(format, value)
dt_log_file = time.strftime(format_h, value)
log_file = 'log_file/access-%s' % dt_log_file + ".log"
if not os.path.exists(log_file):
with open(os.path.join(log_file), 'w', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
else:
with open(os.path.join(log_file), 'a+', encoding='utf-8') as f:
print(dt, *args, file=f, **kwargs)
def get_host_ip():
"""
查询本机ip地址
:return: ip
"""
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(('8.8.8.8', 80))
ip = s.getsockname()[0]
finally:
s.close()
return ip
chatgpt_url_predict = "http://{}:12001/predict".format(str(get_host_ip()))
chatgpt_url_search = "http://{}:12001/search".format(str(get_host_ip()))
prompt = {
"mulu_title_Level_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题\n\nAssistant:",
"mulu_title_Level_2_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题和二级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题\n\nAssistant:",
"mulu_title_Level_3": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s个二级标题;三级标题个数适中\n\nAssistant:",
"mulu_title_Level_3_1": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数适中\n\nAssistant:",
"mulu_title_Level_3_2": "User:为论文题目“%s”生成中文目录,要求只有一级标题,二级标题和三级标题,一级标题使用中文数字 例如一、xxx;二级标题使用阿拉伯数字 例如1.1 xxx;三级标题使用阿拉伯数字 例如1.1.2 xxx;一级标题生成%s个;每个一级标题包含%s-%s个二级标题;三级标题个数越多越好\n\nAssistant:"
}
nums_word_dict = {
"3000": {
"prompt": prompt["mulu_title_Level_2"],
"title_1_nums": "3",
"title_2_nums": ["2"],
"title_small_nums": ["3", "2"]
},
"5000": {
"prompt": prompt["mulu_title_Level_2_1"],
"title_1_nums": "3",
"title_2_nums": ["2","3"],
"title_small_nums": ["3", "2","3"]
},
"8000": {
"prompt": prompt["mulu_title_Level_2_1"],
"title_1_nums": "4",
"title_2_nums": ["2", "4"],
"title_small_nums": ["4", "2", "4"],
},
"10000": {
"prompt": prompt["mulu_title_Level_2_1"],
"title_1_nums": "6",
"title_2_nums": ["3", "5"],
"title_small_nums": ["6", "3", "5"],
},
"15000": {
"prompt": prompt["mulu_title_Level_3_1"],
"title_1_nums": "5",
"title_2_nums": ["3", "5"],
"title_small_nums": ["5", "3", "5"],
},
"20000": {
"prompt": prompt["mulu_title_Level_3_2"],
"title_1_nums": "7",
"title_2_nums": ["3", "5"],
"title_small_nums": ["7", "3", "5"],
},
"30000": {
"prompt": prompt["mulu_title_Level_3_2"],
"title_1_nums": "8",
"title_2_nums": ["5", "8"],
"title_small_nums": ["8", "5", "8"],
},
}
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False
def dialog_line_parse(url, text):
"""
将数据输入模型进行分析并输出结果
:param url: 模型url
:param text: 进入模型的数据
:return: 模型返回结果
"""
response = requests.post(
url,
json=text,
timeout=1000
)
if response.status_code == 200:
return response.json()
else:
# logger.error(
# "【{}】 Failed to get a proper response from remote "
# "server. Status Code: {}. Response: {}"
# "".format(url, response.status_code, response.text)
# )
print("{}】 Failed to get a proper response from remote "
"server. Status Code: {}. Response: {}"
"".format(url, response.status_code, response.text))
print(text)
return []
def request_api_chatgpt(prompt):
data = {
"texts": prompt
}
response = requests.post(
chatgpt_url_predict,
json=data,
timeout=100000
)
if response.status_code == 200:
return response.json()
else:
# logger.error(
# "【{}】 Failed to get a proper response from remote "
# "server. Status Code: {}. Response: {}"
# "".format(url, response.status_code, response.text)
# )
print("Failed to get a proper response from remote "
"server. Status Code: {}. Response: {}"
"".format(response.status_code, response.text))
return {}
def uuid_search(uuid):
data = {
"id": uuid
}
response = requests.post(
chatgpt_url_search,
json=data,
timeout=100000
)
if response.status_code == 200:
return response.json()
else:
# logger.error(
# "【{}】 Failed to get a proper response from remote "
# "server. Status Code: {}. Response: {}"
# "".format(url, response.status_code, response.text)
# )
print("Failed to get a proper response from remote "
"server. Status Code: {}. Response: {}"
"".format(response.status_code, response.text))
return {}
def uuid_search_mp(results):
results_list = [""] * len(results)
while True:
tiaochu_bool = True
for i in results_list:
if i == "":
tiaochu_bool = False
break
if tiaochu_bool == True:
break
for i in range(len(results)):
uuid = results[i]["texts"]["id"]
result = uuid_search(uuid)
if result["code"] == 200:
results_list[i] = result["text"]
time.sleep(3)
return results_list
def get_multiple_urls(urls):
input_values = []
for i in urls:
input_values.append(i[1])
with concurrent.futures.ThreadPoolExecutor() as executor:
# 使用map方法并发地调用worker_function
results = list(executor.map(request_api_chatgpt, input_values))
with concurrent.futures.ThreadPoolExecutor() as executor:
# 使用map方法并发地调用worker_function
results = list(executor.map(uuid_search_mp, [results]))
return_list = []
for i in results[0]:
data = {
"code": 200,
"data": i
}
return_list.append(data)
return return_list
@app.route("/articles_directory", methods=["POST"])
def articles_directory():
title = request.json["title"] # 获取用户query中的文本 例如"I love you"
nums_catalogue = request.json["nums_catalogue"]
nums_word = request.json["nums_word"] # 获取用户query中的文本 例如"I love you"
nums_catalogue = int(nums_catalogue)
keyword = tuple([title] + nums_word_dict[nums_word]["title_small_nums"])
prompt = nums_word_dict[nums_word]["prompt"]%keyword
print(prompt)
input_data = []
for i in range(nums_catalogue):
input_data.append([i, prompt])
results = get_multiple_urls(input_data)
log.log('text:{},'.format(prompt))
return jsonify(results) # 返回结果
if __name__ == "__main__":
app.run(debug=False, host='0.0.0.0', port=18002)

21
gunicorn_config_word.py

@ -0,0 +1,21 @@
# 并行工作线程数
workers = 2
# 监听内网端口5000【按需要更改】
bind = '0.0.0.0:18001'
loglevel = 'debug'
worker_class = "gevent"
# 设置守护进程【关闭连接时,程序仍在运行】
daemon = True
# 设置超时时间120s,默认为30s。按自己的需求进行设置
timeout = 120
# 设置访问日志和错误信息日志路径
accesslog = './logs/acess_word.log'
errorlog = './logs/error_word.log'
# access_log_format = '%(h) - %(t)s - %(u)s - %(s)s %(H)s'
# errorlog = '-' # 记录到标准输出
# 设置最大并发量
worker_connections = 20000

21
gunicorn_config_word_online.py

@ -0,0 +1,21 @@
# 并行工作线程数
workers = 2
# 监听内网端口5000【按需要更改】
bind = '0.0.0.0:18002'
loglevel = 'debug'
worker_class = "gevent"
# 设置守护进程【关闭连接时,程序仍在运行】
daemon = True
# 设置超时时间120s,默认为30s。按自己的需求进行设置
timeout = 120
# 设置访问日志和错误信息日志路径
accesslog = './logs/acess_word_online.log'
errorlog = './logs/error_word_online.log'
# access_log_format = '%(h) - %(t)s - %(u)s - %(s)s %(H)s'
# errorlog = '-' # 记录到标准输出
# 设置最大并发量
worker_connections = 20000

1
run_api_gunicorn_word.sh

@ -0,0 +1 @@
gunicorn articles_directory_predict_word:app -c gunicorn_config_word.py

1
run_api_gunicorn_word_online.sh

@ -0,0 +1 @@
gunicorn articles_directory_predict_word_online:app -c gunicorn_config_word_online.py
Loading…
Cancel
Save