Compare commits

...

2 Commits

Author SHA1 Message Date
majiahui@haimaqingfan.com 69df67d318 更新检测模型 1 month ago
majiahui@haimaqingfan.com a6038cc55d 更新检测模型 1 month ago
  1. 17
      chatgpt_detector_model_predict.py

17
chatgpt_detector_model_predict.py

@ -20,6 +20,7 @@ import uuid
import time import time
import json import json
import docx2txt import docx2txt
from datetime import datetime
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*") pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
@ -32,7 +33,8 @@ batch_size = 32
# model_name = "AIGC_detector_zhv2" # model_name = "AIGC_detector_zhv2"
# model_name = "drop_aigc_model_2" # model_name = "drop_aigc_model_2"
# model_name = "drop_aigc_model_3" # model_name = "drop_aigc_model_3"
model_name = "/home/majiahui/project/models-llm/aigc_check_10" # model_name = "/home/majiahui/project/models-llm/aigc_check_10"
model_name = "/home/majiahui/project/models-llm/weipu_aigc_512_3"
tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name)
@ -124,14 +126,14 @@ def main(content_list: list):
reference_bool = is_reference_sentence(content_list[i]) reference_bool = is_reference_sentence(content_list[i])
if reference_bool == False: if reference_bool == False:
if res["robot"] > 0.8: if res["robot"] > 0.9:
for _ in range(len(content_list[i])): for _ in range(len(content_list[i])):
gpt_score_list.append(res["robot"]) gpt_score_list.append(res["robot"])
gpt_score_sentence_list.append(res["robot"]) gpt_score_sentence_list.append(res["robot"])
sim_word += len(content_list[i]) sim_word += len(content_list[i])
gpt_content.append( gpt_content.append(
"<em class=\"similar\" id='score_{}'>".format(str(i)) + content_list[i] + "\n" + "</em>") "<em class=\"similar\" id='score_{}'>".format(str(i)) + content_list[i] + "\n" + "</em>")
elif 0.8 >= res["robot"] > 0.3: elif 0.9 >= res["robot"] > 0.5:
for _ in range(len(content_list[i])): for _ in range(len(content_list[i])):
gpt_score_list.append(res["robot"]) gpt_score_list.append(res["robot"])
gpt_score_sentence_list.append(res["robot"]) gpt_score_sentence_list.append(res["robot"])
@ -208,7 +210,14 @@ def classify(): # 调用模型,设置最大batch_size
} }
return_text = {"resilt": resilt, "probabilities": None, "status_code": 200} return_text = {"resilt": resilt, "probabilities": None, "status_code": 200}
load_result_path = "./new_data_logs/{}.json".format(queue_uuid)
# 查询增加日期
date_str = datetime.now().strftime("%Y-%m-%d")
dir_path = "./new_data_logs/{}/".format(date_str)
# 检查并创建目录(如果不存在)
os.makedirs(dir_path, exist_ok=True)
load_result_path = dir_path + '{}.json'.format(id_)
# load_result_path = "./new_data_logs/{}.json".format(query_id)
print("query_id: ", queue_uuid) print("query_id: ", queue_uuid)
print("load_result_path: ", load_result_path) print("load_result_path: ", load_result_path)

Loading…
Cancel
Save