From 7650bf7f3eec5d2ff59aea7d7aa66ab0bcfa670b Mon Sep 17 00:00:00 2001
From: "majiahui@haimaqingfan.com" <majiahui@haimaqingfan.com>
Date: Thu, 1 Aug 2024 15:33:22 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=A3=80=E6=B5=8B=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B=EF=BC=8C=E5=A2=9E=E5=8A=A0=E8=BE=B9=E7=95=8C=E6=9D=A1?=
 =?UTF-8?q?=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 chatgpt_detector_model_predict.py              | 31 ++++++++++----
 flask_api.py                                   | 59 +++++++++++++++++++++++++-
 flask_chatgpt-detector_predict_redis_search.py |  2 +-
 3 files changed, 82 insertions(+), 10 deletions(-)
diff --git a/chatgpt_detector_model_predict.py b/chatgpt_detector_model_predict.py
index c3a3f05..8b69dda 100644
--- a/chatgpt_detector_model_predict.py
+++ b/chatgpt_detector_model_predict.py
@@ -22,14 +22,17 @@ import json
 import docx2txt
 
 
-pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
+pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
 redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
 
 db_key_query = 'query'
 db_key_querying = 'querying'
 db_key_queryset = 'queryset'
 batch_size = 32
+# model_name = "AIGC_detector_zhv2"
 model_name = "drop_aigc_model_2"
+# model_name = "drop_aigc_model_3"
+
 
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name).cpu()
@@ -56,10 +59,22 @@ def model_preidct(text):
     output = torch.sigmoid(output[0]).tolist()
     print(output)
 
-    return_list = {
-        "humen": output[0][0],
-        "robot": output[0][1]
-    }
+    if model_name == "drop_aigc_model_2":
+        return_list = {
+            "humen": output[0][1],
+            "robot": output[0][0]
+        }
+    elif model_name == "AIGC_detector_zhv2":
+        return_list = {
+            "humen": output[0][0],
+            "robot": output[0][1]
+        }
+    else:
+        return_list = {
+            "humen": output[0][0],
+            "robot": output[0][1]
+        }
+
     return return_list
 
 
@@ -93,12 +108,12 @@ def main(content_list: list):
             gpt_score_list.append(res["robot"])
             sim_word += len(content_list[i])
             gpt_content.append(
-                "<em class=\"similar\" id='score_{}'>".format(str(i)) + content_list[i] + "。\n" + "</em>")
-        elif 0.9 > res["robot"] > 0.5:
+                "<em class=\"similar\" id='score_{}'>".format(str(i)) + content_list[i] + "\n" + "</em>")
+        elif 0.9 >= res["robot"] > 0.5:
             gpt_score_list.append(res["robot"])
             sim_word_5_9 += len(content_list[i])
             gpt_content.append(
-                "<em class=\"color-gold\" id='score_{}'>".format(str(i)) + content_list[i] + "。\n" + "</em>")
+                "<em class=\"color-gold\" id='score_{}'>".format(str(i)) + content_list[i] + "\n" + "</em>")
         else:
             gpt_score_list.append(0)
             gpt_content.append(content_list[i] + "\n")
diff --git a/flask_api.py b/flask_api.py
index 3c85041..b40613b 100644
--- a/flask_api.py
+++ b/flask_api.py
@@ -20,15 +20,64 @@ import uuid
 import time
 import json
 import docx2txt
+import re
 
 
-pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
+pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
 redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
 
 db_key_query = 'query'
 db_key_querying = 'querying'
 db_key_queryset = 'queryset'
 batch_size = 32
+RE_DIALOG = re.compile(r"\".*?\"|\'.*?\'|“.*?”")
+
+def get_dialogs_index(line: str):
+    """
+    获取对话及其索引
+    :param line 文本
+    :return dialogs 对话内容
+            dialogs_index: 对话位置索引
+            other_index: 其他内容位置索引
+    """
+    dialogs = re.finditer(RE_DIALOG, line)
+    dialogs_text = re.findall(RE_DIALOG, line)
+    dialogs_index = []
+    for dialog in dialogs:
+        all_ = [i for i in range(dialog.start(), dialog.end())]
+        dialogs_index.extend(all_)
+    other_index = [i for i in range(len(line)) if i not in dialogs_index]
+
+    return dialogs_text, dialogs_index, other_index
+
+
+def chulichangju_1(text, chulipangban_return_list):
+    fuhao = ["。"]
+    dialogs_text, dialogs_index, other_index = get_dialogs_index(text)
+    text_1 = text[:500]
+    text_2 = text[500:]
+    text_1_new = ""
+    if text_2 == "":
+        chulipangban_return_list.append(text_1)
+        return chulipangban_return_list
+    for i in range(len(text_1) - 1, -1, -1):
+        if text_1[i] in fuhao:
+            if i in dialogs_index:
+                continue
+            text_1_new = text_1[:i]
+            text_1_new += text_1[i]
+            chulipangban_return_list.append(text_1_new)
+            if text_2 != "":
+                if i + 1 != 500:
+                    text_2 = text_1[i + 1:] + text_2
+            break
+        # else:
+        #     chulipangban_return_list.append(text_1)
+    if text_1_new == "":
+        chulipangban_return_list.append(text_1)
+    if text_2 != "":
+        chulipangban_return_list = chulichangju_1(text_2, chulipangban_return_list)
+    return chulipangban_return_list
 
 def ulit_request_file(file):
     file_name = file.filename
@@ -47,6 +96,14 @@ def ulit_request_file(file):
 
     content_list = [i for i in content.split("\n")]
     print(content_list)
+
+    content_list_new = []
+    for sen in content_list:
+        if len(sen) < 500:
+            content_list_new.append(sen)
+        else:
+            content_list_new.extend(chulichangju_1(sen, []))
+
     return content_list
 
 
diff --git a/flask_chatgpt-detector_predict_redis_search.py b/flask_chatgpt-detector_predict_redis_search.py
index cd38788..76f6668 100644
--- a/flask_chatgpt-detector_predict_redis_search.py
+++ b/flask_chatgpt-detector_predict_redis_search.py
@@ -28,7 +28,7 @@ from threading import Thread
 import time
 
 app = flask.Flask(__name__)
-pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
+pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
 redis_ = redis.Redis(connection_pool=pool, decode_responses=True)