|
@ -10,6 +10,7 @@ import time |
|
|
import re |
|
|
import re |
|
|
import logging |
|
|
import logging |
|
|
import concurrent.futures |
|
|
import concurrent.futures |
|
|
|
|
|
import socket |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.DEBUG, # 控制台打印的日志级别 |
|
|
logging.basicConfig(level=logging.DEBUG, # 控制台打印的日志级别 |
|
@ -41,7 +42,50 @@ pantten_biaoti_0 = '^[1-9一二三四五六七八九ⅠⅡⅢⅣⅤⅥⅦⅧⅨ] |
|
|
pantten_biaoti_1 = '^第[一二三四五六七八九]章\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|
|
pantten_biaoti_1 = '^第[一二三四五六七八九]章\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|
|
pantten_biaoti_2 = '^[0-9.]+\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|
|
pantten_biaoti_2 = '^[0-9.]+\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|
|
pantten_biaoti_3 = '^[((][1-9一二三四五六七八九ⅠⅡⅢⅣⅤⅥⅦⅧⅨ][)_)][、.]{0,}?\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|
|
pantten_biaoti_3 = '^[((][1-9一二三四五六七八九ⅠⅡⅢⅣⅤⅥⅦⅧⅨ][)_)][、.]{0,}?\s{0,}?[\u4e00-\u9fa5a-zA-Z]+' |
|
|
chatgpt_url = "http://192.168.31.145:12001/predict" |
|
|
|
|
|
|
|
|
def get_host_ip(): |
|
|
|
|
|
""" |
|
|
|
|
|
查询本机ip地址 |
|
|
|
|
|
:return: ip |
|
|
|
|
|
""" |
|
|
|
|
|
try: |
|
|
|
|
|
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) |
|
|
|
|
|
s.connect(('8.8.8.8', 80)) |
|
|
|
|
|
ip = s.getsockname()[0] |
|
|
|
|
|
finally: |
|
|
|
|
|
s.close() |
|
|
|
|
|
|
|
|
|
|
|
return ip |
|
|
|
|
|
|
|
|
|
|
|
chatgpt_url_predict = "http://{}:12000/predict".format(str(get_host_ip())) |
|
|
|
|
|
chatgpt_url_search = "http://{}:12000/search".format(str(get_host_ip())) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def smtp_f(name): |
|
|
|
|
|
# 在下面的代码行中使用断点来调试脚本。 |
|
|
|
|
|
import smtplib |
|
|
|
|
|
from email.mime.text import MIMEText |
|
|
|
|
|
from email.header import Header |
|
|
|
|
|
|
|
|
|
|
|
sender = '838878981@qq.com' # 发送邮箱 |
|
|
|
|
|
receivers = ['838878981@qq.com'] # 接收邮箱 |
|
|
|
|
|
auth_code = "jfqtutaiwrtdbcge" # 授权码 |
|
|
|
|
|
|
|
|
|
|
|
message = MIMEText('降重项目出错,紧急', 'plain', 'utf-8') |
|
|
|
|
|
message['From'] = Header("Sender<%s>" % sender) # 发送者 |
|
|
|
|
|
message['To'] = Header("Receiver<%s>" % receivers[0]) # 接收者 |
|
|
|
|
|
|
|
|
|
|
|
subject = name |
|
|
|
|
|
message['Subject'] = Header(subject, 'utf-8') |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
server = smtplib.SMTP_SSL('smtp.qq.com', 465) |
|
|
|
|
|
server.login(sender, auth_code) |
|
|
|
|
|
server.sendmail(sender, receivers, message.as_string()) |
|
|
|
|
|
print("邮件发送成功") |
|
|
|
|
|
server.close() |
|
|
|
|
|
except smtplib.SMTPException: |
|
|
|
|
|
print("Error: 无法发送邮件") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class log: |
|
|
class log: |
|
@ -63,6 +107,7 @@ class log: |
|
|
print(dt, *args, file=f, **kwargs) |
|
|
print(dt, *args, file=f, **kwargs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def dialog_line_parse(url, text): |
|
|
def dialog_line_parse(url, text): |
|
|
""" |
|
|
""" |
|
|
将数据输入模型进行分析并输出结果 |
|
|
将数据输入模型进行分析并输出结果 |
|
@ -140,18 +185,109 @@ def chulichangju_1(text, snetence_id, chulipangban_return_list, short_num): |
|
|
return chulipangban_return_list |
|
|
return chulipangban_return_list |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def get_multiple_urls(urls): |
|
|
|
|
|
# with concurrent.futures.ThreadPoolExecutor() as executor: |
|
|
|
|
|
# future_to_url = {executor.submit(dialog_line_parse, url[1], url[2]): url for url in urls} |
|
|
|
|
|
# |
|
|
|
|
|
# |
|
|
|
|
|
# results = [] |
|
|
|
|
|
# for future in concurrent.futures.as_completed(future_to_url): |
|
|
|
|
|
# url = future_to_url[future] |
|
|
|
|
|
# try: |
|
|
|
|
|
# data = future.result() |
|
|
|
|
|
# results.append((url, data)) |
|
|
|
|
|
# except Exception as e: |
|
|
|
|
|
# results.append((url, f"Error: {str(e)}")) |
|
|
|
|
|
# return results |
|
|
|
|
|
|
|
|
|
|
|
def request_api_chatgpt(prompt): |
|
|
|
|
|
data = { |
|
|
|
|
|
"texts": prompt |
|
|
|
|
|
} |
|
|
|
|
|
response = requests.post( |
|
|
|
|
|
chatgpt_url_predict, |
|
|
|
|
|
json=data, |
|
|
|
|
|
timeout=100000 |
|
|
|
|
|
) |
|
|
|
|
|
if response.status_code == 200: |
|
|
|
|
|
return response.json() |
|
|
|
|
|
else: |
|
|
|
|
|
# logger.error( |
|
|
|
|
|
# "【{}】 Failed to get a proper response from remote " |
|
|
|
|
|
# "server. Status Code: {}. Response: {}" |
|
|
|
|
|
# "".format(url, response.status_code, response.text) |
|
|
|
|
|
# ) |
|
|
|
|
|
print("Failed to get a proper response from remote " |
|
|
|
|
|
"server. Status Code: {}. Response: {}" |
|
|
|
|
|
"".format(response.status_code, response.text)) |
|
|
|
|
|
return {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def uuid_search(uuid): |
|
|
|
|
|
data = { |
|
|
|
|
|
"id": uuid |
|
|
|
|
|
} |
|
|
|
|
|
response = requests.post( |
|
|
|
|
|
chatgpt_url_search, |
|
|
|
|
|
json=data, |
|
|
|
|
|
timeout=100000 |
|
|
|
|
|
) |
|
|
|
|
|
if response.status_code == 200: |
|
|
|
|
|
return response.json() |
|
|
|
|
|
else: |
|
|
|
|
|
# logger.error( |
|
|
|
|
|
# "【{}】 Failed to get a proper response from remote " |
|
|
|
|
|
# "server. Status Code: {}. Response: {}" |
|
|
|
|
|
# "".format(url, response.status_code, response.text) |
|
|
|
|
|
# ) |
|
|
|
|
|
print("Failed to get a proper response from remote " |
|
|
|
|
|
"server. Status Code: {}. Response: {}" |
|
|
|
|
|
"".format(response.status_code, response.text)) |
|
|
|
|
|
return {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def uuid_search_mp(results): |
|
|
|
|
|
|
|
|
|
|
|
results_list = [""] * len(results) |
|
|
|
|
|
while True: |
|
|
|
|
|
tiaochu_bool = True |
|
|
|
|
|
|
|
|
|
|
|
for i in results_list: |
|
|
|
|
|
if i == "": |
|
|
|
|
|
tiaochu_bool = False |
|
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
if tiaochu_bool == True: |
|
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
for i in range(len(results)): |
|
|
|
|
|
uuid = results[i]["texts"]["id"] |
|
|
|
|
|
|
|
|
|
|
|
result = uuid_search(uuid) |
|
|
|
|
|
if result["code"] == 200: |
|
|
|
|
|
results_list[i] = result["text"] |
|
|
|
|
|
time.sleep(3) |
|
|
|
|
|
return results_list |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_multiple_urls(urls): |
|
|
def get_multiple_urls(urls): |
|
|
|
|
|
|
|
|
|
|
|
input_values = [] |
|
|
|
|
|
|
|
|
|
|
|
for i in urls: |
|
|
|
|
|
input_values.append(i[1]) |
|
|
with concurrent.futures.ThreadPoolExecutor() as executor: |
|
|
with concurrent.futures.ThreadPoolExecutor() as executor: |
|
|
future_to_url = {executor.submit(dialog_line_parse, url[1], url[2]): url for url in urls} |
|
|
# 使用map方法并发地调用worker_function |
|
|
results = [] |
|
|
results = list(executor.map(request_api_chatgpt, input_values)) |
|
|
for future in concurrent.futures.as_completed(future_to_url): |
|
|
|
|
|
url = future_to_url[future] |
|
|
with concurrent.futures.ThreadPoolExecutor() as executor: |
|
|
try: |
|
|
# 使用map方法并发地调用worker_function |
|
|
data = future.result() |
|
|
results = list(executor.map(uuid_search_mp, [results])) |
|
|
results.append((url, data)) |
|
|
|
|
|
except Exception as e: |
|
|
return_list = [] |
|
|
results.append((url, f"Error: {str(e)}")) |
|
|
for i,j in zip(urls, results[0]): |
|
|
return results |
|
|
return_list.append([i, j]) |
|
|
|
|
|
return return_list |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def chulipangban_test_1(snetence_id, text): |
|
|
def chulipangban_test_1(snetence_id, text): |
|
@ -312,13 +448,12 @@ def post_sentence_ulit(sentence, text_info): |
|
|
# sentence = sentence[:-1] |
|
|
# sentence = sentence[:-1] |
|
|
|
|
|
|
|
|
sentence = sentence.strip("\n") |
|
|
sentence = sentence.strip("\n") |
|
|
if sentence[-1] != text_info[0][-1]: |
|
|
# if sentence[-1] != text_info[0][-1]: |
|
|
if is_chinese(text_info[0][-1]) == True: |
|
|
# if is_chinese(text_info[0][-1]) == True: |
|
|
if is_chinese(sentence[-1]) != True: |
|
|
# if is_chinese(sentence[-1]) != True: |
|
|
sentence = sentence[:-1] |
|
|
# sentence = sentence[:-1] |
|
|
else: |
|
|
# else: |
|
|
sentence = sentence[:-1] + text_info[0][-1] |
|
|
# sentence = sentence[:-1] + text_info[0][-1] |
|
|
|
|
|
|
|
|
else: |
|
|
else: |
|
|
sentence = text_info[0] |
|
|
sentence = text_info[0] |
|
|
return sentence |
|
|
return sentence |
|
@ -335,9 +470,9 @@ def pre_sentence_ulit(sentence): |
|
|
if len(sentence) > 9: |
|
|
if len(sentence) > 9: |
|
|
|
|
|
|
|
|
if sentence[-1] != "。": |
|
|
if sentence[-1] != "。": |
|
|
text = f"User:改写下面这段文字,要求意思接近但是改动幅度比较大,字数只能多不能少,短句前后词跟上下句衔接不能有错误,并且如果结尾有标点符号,标点不能改变,如果结尾没有标点符号,不能擅自添加标点符号:\n{sentence}\nAssistant:" |
|
|
text = f"User: 改写下面半这句话,要求意思接近但是改动幅度比较大,字数只能多不能少,短句前后词跟上下句衔接不能有错误:\n{sentence}\nAssistant:" |
|
|
else: |
|
|
else: |
|
|
text = f"User:改写下面这句话,要求意思接近但是改动幅度比较大,字数只能多不能少:\n{sentence}\nAssistant:" |
|
|
text = f"User: 改写下面这句话,要求意思接近但是改动幅度比较大,字数只能多不能少:\n{sentence}\nAssistant:" |
|
|
|
|
|
|
|
|
else: |
|
|
else: |
|
|
text = f"下面词不做任何变化:\n{sentence}" |
|
|
text = f"下面词不做任何变化:\n{sentence}" |
|
@ -422,14 +557,15 @@ def main(texts: dict): |
|
|
|
|
|
|
|
|
input_data = [] |
|
|
input_data = [] |
|
|
for i in range(len(text_sentence)): |
|
|
for i in range(len(text_sentence)): |
|
|
input_data.append([i, chatgpt_url, {"texts": text_sentence[i]}]) |
|
|
# input_data.append([i, chatgpt_url, {"texts": text_sentence[i]}]) |
|
|
|
|
|
input_data.append([i, text_sentence[i]]) |
|
|
|
|
|
|
|
|
results = get_multiple_urls(input_data) |
|
|
results = get_multiple_urls(input_data) |
|
|
|
|
|
|
|
|
generated_text_list = [""] * len(input_data) |
|
|
generated_text_list = [""] * len(input_data) |
|
|
for url, result in results: |
|
|
for url, result in results: |
|
|
# print(f"Result for {url}: {result}") |
|
|
# print(f"Result for {url}: {result}") |
|
|
generated_text_list[url[0]] = result["data"] |
|
|
generated_text_list[url[0]] = result |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for i in range(len(generated_text_list)): |
|
|
for i in range(len(generated_text_list)): |
|
@ -506,6 +642,7 @@ def classify(): # 调用模型,设置最大batch_size |
|
|
if texts_list != []: |
|
|
if texts_list != []: |
|
|
return_text = {"texts": texts_list, "probabilities": None, "status_code": 200} |
|
|
return_text = {"texts": texts_list, "probabilities": None, "status_code": 200} |
|
|
else: |
|
|
else: |
|
|
|
|
|
smtp_f("drop_weight_rewrite_increase") |
|
|
return_text = {"texts": texts_list, "probabilities": None, "status_code": 400} |
|
|
return_text = {"texts": texts_list, "probabilities": None, "status_code": 400} |
|
|
|
|
|
|
|
|
load_result_path = "./new_data_logs/{}.json".format(query_id) |
|
|
load_result_path = "./new_data_logs/{}.json".format(query_id) |
|
|