Browse Source

更新检测模型,增加边界条件

dev_2
majiahui@haimaqingfan.com 10 months ago
parent
commit
7650bf7f3e
  1. 23
      chatgpt_detector_model_predict.py
  2. 59
      flask_api.py
  3. 2
      flask_chatgpt-detector_predict_redis_search.py

23
chatgpt_detector_model_predict.py

@ -22,14 +22,17 @@ import json
import docx2txt
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_queryset = 'queryset'
batch_size = 32
# model_name = "AIGC_detector_zhv2"
model_name = "drop_aigc_model_2"
# model_name = "drop_aigc_model_3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name).cpu()
@ -56,10 +59,22 @@ def model_preidct(text):
output = torch.sigmoid(output[0]).tolist()
print(output)
if model_name == "drop_aigc_model_2":
return_list = {
"humen": output[0][1],
"robot": output[0][0]
}
elif model_name == "AIGC_detector_zhv2":
return_list = {
"humen": output[0][0],
"robot": output[0][1]
}
else:
return_list = {
"humen": output[0][0],
"robot": output[0][1]
}
return return_list
@ -93,12 +108,12 @@ def main(content_list: list):
gpt_score_list.append(res["robot"])
sim_word += len(content_list[i])
gpt_content.append(
"<em class=\"similar\" id='score_{}'>".format(str(i)) + content_list[i] + "\n" + "</em>")
elif 0.9 > res["robot"] > 0.5:
"<em class=\"similar\" id='score_{}'>".format(str(i)) + content_list[i] + "\n" + "</em>")
elif 0.9 >= res["robot"] > 0.5:
gpt_score_list.append(res["robot"])
sim_word_5_9 += len(content_list[i])
gpt_content.append(
"<em class=\"color-gold\" id='score_{}'>".format(str(i)) + content_list[i] + "\n" + "</em>")
"<em class=\"color-gold\" id='score_{}'>".format(str(i)) + content_list[i] + "\n" + "</em>")
else:
gpt_score_list.append(0)
gpt_content.append(content_list[i] + "\n")

59
flask_api.py

@ -20,15 +20,64 @@ import uuid
import time
import json
import docx2txt
import re
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_queryset = 'queryset'
batch_size = 32
RE_DIALOG = re.compile(r"\".*?\"|\'.*?\'|“.*?”")
def get_dialogs_index(line: str):
"""
获取对话及其索引
:param line 文本
:return dialogs 对话内容
dialogs_index: 对话位置索引
other_index: 其他内容位置索引
"""
dialogs = re.finditer(RE_DIALOG, line)
dialogs_text = re.findall(RE_DIALOG, line)
dialogs_index = []
for dialog in dialogs:
all_ = [i for i in range(dialog.start(), dialog.end())]
dialogs_index.extend(all_)
other_index = [i for i in range(len(line)) if i not in dialogs_index]
return dialogs_text, dialogs_index, other_index
def chulichangju_1(text, chulipangban_return_list):
fuhao = [""]
dialogs_text, dialogs_index, other_index = get_dialogs_index(text)
text_1 = text[:500]
text_2 = text[500:]
text_1_new = ""
if text_2 == "":
chulipangban_return_list.append(text_1)
return chulipangban_return_list
for i in range(len(text_1) - 1, -1, -1):
if text_1[i] in fuhao:
if i in dialogs_index:
continue
text_1_new = text_1[:i]
text_1_new += text_1[i]
chulipangban_return_list.append(text_1_new)
if text_2 != "":
if i + 1 != 500:
text_2 = text_1[i + 1:] + text_2
break
# else:
# chulipangban_return_list.append(text_1)
if text_1_new == "":
chulipangban_return_list.append(text_1)
if text_2 != "":
chulipangban_return_list = chulichangju_1(text_2, chulipangban_return_list)
return chulipangban_return_list
def ulit_request_file(file):
file_name = file.filename
@ -47,6 +96,14 @@ def ulit_request_file(file):
content_list = [i for i in content.split("\n")]
print(content_list)
content_list_new = []
for sen in content_list:
if len(sen) < 500:
content_list_new.append(sen)
else:
content_list_new.extend(chulichangju_1(sen, []))
return content_list

2
flask_chatgpt-detector_predict_redis_search.py

@ -28,7 +28,7 @@ from threading import Thread
import time
app = flask.Flask(__name__)
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)

Loading…
Cancel
Save