Compare commits

...

8 Commits

Author SHA1 Message Date
majiahui@haimaqingfan.com 84d7d00bd0 更新算法 2 weeks ago
majiahui@haimaqingfan.com 6fa529d312 更新检测模型 1 month ago
majiahui@haimaqingfan.com 69df67d318 更新检测模型 2 months ago
majiahui@haimaqingfan.com a6038cc55d 更新检测模型 2 months ago
majiahui@haimaqingfan.com 3049530f05 更新检测模型 3 months ago
majiahui@haimaqingfan.com 820a3367ce 更新检测模型,增加边界条件 12 months ago
majiahui@haimaqingfan.com 7650bf7f3e 更新检测模型,增加边界条件 1 year ago
majiahui@haimaqingfan.com 3b069e746c 更新检测模型 1 year ago
  1. 155
      chatgpt_detector_model_predict.py
  2. 70
      flask_api.py
  3. 2
      flask_chatgpt-detector_predict_redis_search.py

155
chatgpt_detector_model_predict.py

File diff suppressed because one or more lines are too long

70
flask_api.py

@ -20,15 +20,65 @@ import uuid
import time
import json
import docx2txt
import re
from datetime import datetime
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_queryset = 'queryset'
batch_size = 32
RE_DIALOG = re.compile(r"\".*?\"|\'.*?\'|“.*?”")
def get_dialogs_index(line: str):
"""
获取对话及其索引
:param line 文本
:return dialogs 对话内容
dialogs_index: 对话位置索引
other_index: 其他内容位置索引
"""
dialogs = re.finditer(RE_DIALOG, line)
dialogs_text = re.findall(RE_DIALOG, line)
dialogs_index = []
for dialog in dialogs:
all_ = [i for i in range(dialog.start(), dialog.end())]
dialogs_index.extend(all_)
other_index = [i for i in range(len(line)) if i not in dialogs_index]
return dialogs_text, dialogs_index, other_index
def chulichangju_1(text, chulipangban_return_list):
fuhao = [""]
dialogs_text, dialogs_index, other_index = get_dialogs_index(text)
text_1 = text[:500]
text_2 = text[500:]
text_1_new = ""
if text_2 == "":
chulipangban_return_list.append(text_1)
return chulipangban_return_list
for i in range(len(text_1) - 1, -1, -1):
if text_1[i] in fuhao:
if i in dialogs_index:
continue
text_1_new = text_1[:i]
text_1_new += text_1[i]
chulipangban_return_list.append(text_1_new)
if text_2 != "":
if i + 1 != 500:
text_2 = text_1[i + 1:] + text_2
break
# else:
# chulipangban_return_list.append(text_1)
if text_1_new == "":
chulipangban_return_list.append(text_1)
if text_2 != "":
chulipangban_return_list = chulichangju_1(text_2, chulipangban_return_list)
return chulipangban_return_list
def ulit_request_file(file):
file_name = file.filename
@ -45,8 +95,16 @@ def ulit_request_file(file):
# elif file_name.split(".")[-1] == "docx":
# content = docx2txt.process(file_name_save)
content_list = [i for i in content.split("\n")]
content_list = [i for i in content.split("\n") if i!= ""]
print(content_list)
content_list_new = []
for sen in content_list:
if len(sen) < 500:
content_list_new.append(sen)
else:
content_list_new.extend(chulichangju_1(sen, []))
return content_list
@ -88,7 +146,13 @@ def handle_query_predict():
# 绑定文本和query id
# recall_10(id_, title, abst_zh, content)
load_request_path = './request_data_logs/{}.json'.format(id_)
date_str = datetime.now().strftime("%Y-%m-%d")
dir_path = "./request_data_logs/{}".format(date_str)
# 检查并创建目录(如果不存在)
os.makedirs(dir_path, exist_ok=True)
load_request_path = dir_path + '/{}.json'.format(id_)
# load_request_path = './request_data_logs/{}.json'.format(id_)
with open(load_request_path, 'w', encoding='utf8') as f2: # ensure_ascii=False才能输入中文,否则是Unicode字符 indent=2 JSON数据的缩进,美观
json.dump(d, f2, ensure_ascii=False, indent=4)
redis_.rpush(db_key_query, json.dumps({"id": id_, "path": load_request_path})) # 加入redis

2
flask_chatgpt-detector_predict_redis_search.py

@ -28,7 +28,7 @@ from threading import Thread
import time
app = flask.Flask(__name__)
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)

Loading…
Cancel
Save