diff --git a/chatgpt_detector_model_predict.py b/chatgpt_detector_model_predict.py
index c3a3f05..8b69dda 100644
--- a/chatgpt_detector_model_predict.py
+++ b/chatgpt_detector_model_predict.py
@@ -22,14 +22,17 @@ import json
import docx2txt
-pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
+pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_queryset = 'queryset'
batch_size = 32
+# model_name = "AIGC_detector_zhv2"
model_name = "drop_aigc_model_2"
+# model_name = "drop_aigc_model_3"
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name).cpu()
@@ -56,10 +59,22 @@ def model_preidct(text):
output = torch.sigmoid(output[0]).tolist()
print(output)
- return_list = {
- "humen": output[0][0],
- "robot": output[0][1]
- }
+ if model_name == "drop_aigc_model_2":
+ return_list = {
+ "humen": output[0][1],
+ "robot": output[0][0]
+ }
+ elif model_name == "AIGC_detector_zhv2":
+ return_list = {
+ "humen": output[0][0],
+ "robot": output[0][1]
+ }
+ else:
+ return_list = {
+ "humen": output[0][0],
+ "robot": output[0][1]
+ }
+
return return_list
@@ -93,12 +108,12 @@ def main(content_list: list):
gpt_score_list.append(res["robot"])
sim_word += len(content_list[i])
gpt_content.append(
- "".format(str(i)) + content_list[i] + "。\n" + "")
- elif 0.9 > res["robot"] > 0.5:
+ "".format(str(i)) + content_list[i] + "\n" + "")
+ elif 0.9 >= res["robot"] > 0.5:
gpt_score_list.append(res["robot"])
sim_word_5_9 += len(content_list[i])
gpt_content.append(
- "".format(str(i)) + content_list[i] + "。\n" + "")
+ "".format(str(i)) + content_list[i] + "\n" + "")
else:
gpt_score_list.append(0)
gpt_content.append(content_list[i] + "\n")
diff --git a/flask_api.py b/flask_api.py
index 3c85041..b40613b 100644
--- a/flask_api.py
+++ b/flask_api.py
@@ -20,15 +20,64 @@ import uuid
import time
import json
import docx2txt
+import re
-pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
+pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
db_key_query = 'query'
db_key_querying = 'querying'
db_key_queryset = 'queryset'
batch_size = 32
+RE_DIALOG = re.compile(r"\".*?\"|\'.*?\'|“.*?”")
+
+def get_dialogs_index(line: str):
+ """
+ 获取对话及其索引
+ :param line 文本
+ :return dialogs 对话内容
+ dialogs_index: 对话位置索引
+ other_index: 其他内容位置索引
+ """
+ dialogs = re.finditer(RE_DIALOG, line)
+ dialogs_text = re.findall(RE_DIALOG, line)
+ dialogs_index = []
+ for dialog in dialogs:
+ all_ = [i for i in range(dialog.start(), dialog.end())]
+ dialogs_index.extend(all_)
+ other_index = [i for i in range(len(line)) if i not in dialogs_index]
+
+ return dialogs_text, dialogs_index, other_index
+
+
+def chulichangju_1(text, chulipangban_return_list):
+ fuhao = ["。"]
+ dialogs_text, dialogs_index, other_index = get_dialogs_index(text)
+ text_1 = text[:500]
+ text_2 = text[500:]
+ text_1_new = ""
+ if text_2 == "":
+ chulipangban_return_list.append(text_1)
+ return chulipangban_return_list
+ for i in range(len(text_1) - 1, -1, -1):
+ if text_1[i] in fuhao:
+ if i in dialogs_index:
+ continue
+ text_1_new = text_1[:i]
+ text_1_new += text_1[i]
+ chulipangban_return_list.append(text_1_new)
+ if text_2 != "":
+ if i + 1 != 500:
+ text_2 = text_1[i + 1:] + text_2
+ break
+ # else:
+ # chulipangban_return_list.append(text_1)
+ if text_1_new == "":
+ chulipangban_return_list.append(text_1)
+ if text_2 != "":
+ chulipangban_return_list = chulichangju_1(text_2, chulipangban_return_list)
+ return chulipangban_return_list
def ulit_request_file(file):
file_name = file.filename
@@ -47,6 +96,14 @@ def ulit_request_file(file):
content_list = [i for i in content.split("\n")]
print(content_list)
+
+ content_list_new = []
+ for sen in content_list:
+ if len(sen) < 500:
+ content_list_new.append(sen)
+ else:
+ content_list_new.extend(chulichangju_1(sen, []))
+
return content_list
diff --git a/flask_chatgpt-detector_predict_redis_search.py b/flask_chatgpt-detector_predict_redis_search.py
index cd38788..76f6668 100644
--- a/flask_chatgpt-detector_predict_redis_search.py
+++ b/flask_chatgpt-detector_predict_redis_search.py
@@ -28,7 +28,7 @@ from threading import Thread
import time
app = flask.Flask(__name__)
-pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
+pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
redis_ = redis.Redis(connection_pool=pool, decode_responses=True)