From 23ed82e6fa72b2500da9ea8e55b6716000306109 Mon Sep 17 00:00:00 2001
From: "majiahui@haimaqingfan.com" <majiahui@haimaqingfan.com>
Date: Fri, 14 Jun 2024 18:30:16 +0800
Subject: [PATCH] =?UTF-8?q?=E6=AE=B5=E8=90=BD=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 chatgpt_detector_model_predict.py | 50 +++++++++++++--------------------------
 flask_api.py                      |  2 +-
 2 files changed, 18 insertions(+), 34 deletions(-)
diff --git a/chatgpt_detector_model_predict.py b/chatgpt_detector_model_predict.py
index dbe9789..0242104 100644
--- a/chatgpt_detector_model_predict.py
+++ b/chatgpt_detector_model_predict.py
@@ -29,10 +29,6 @@ db_key_query = 'query'
 db_key_querying = 'querying'
 db_key_queryset = 'queryset'
 batch_size = 32
-# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# tokenizer = AutoTokenizer.from_pretrained("chatgpt-detector-roberta-chinese")
-# model = AutoModelForSequenceClassification.from_pretrained("chatgpt-detector-roberta-chinese").cuda()
-# model = AutoModelForSequenceClassification.from_pretrained("chatgpt-detector-roberta-chinese").cpu()
 model_name = "AIGC_detector_zhv2"
 
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -84,42 +80,28 @@ def main(content_list: list):
     sim_word = 0
     sim_word_5_9 = 0
     total_words = 0
-    print(content_list)
     total_paragraph = len(content_list)
 
-
-    for i in range(0, len(content_list), 3):
-        if i + 2 <= len(content_list)-1:
-            sen_nums = 3
-            content_str = "。".join([content_list[i], content_list[i+1], content_list[i+2]])
-        elif i + 1 <= len(content_list)-1:
-            sen_nums = 2
-            content_str = "。".join([content_list[i], content_list[i + 1]])
-        else:
-            sen_nums = 1
-            content_str = content_list[i]
-        total_words += len(content_str)
-        res = model_preidct(content_str)
+    for i in range(len(content_list)):
+        total_words += len(content_list[i])
+        res = model_preidct(content_list[i])
         #     return_list = {
         #         "humen": output[0][0],
         #         "robot": output[0][1]
         #     }
         if res["robot"] > 0.9:
-            for ci in range(sen_nums):
-                gpt_score_list.append(res["robot"])
-                sim_word += len(content_list[i + ci])
-                gpt_content.append(
-                    "<em class=\"similar\" id='score_{}'>".format(str(i + ci)) + content_list[i + ci] + "。\n" + "</em>")
+            gpt_score_list.append(res["robot"])
+            sim_word += len(content_list[i])
+            gpt_content.append(
+                "<em class=\"similar\" id='score_{}'>".format(str(i)) + content_list[i] + "。\n" + "</em>")
         elif 0.9 > res["robot"] > 0.5:
-            for ci in range(sen_nums):
-                gpt_score_list.append(res["robot"])
-                sim_word_5_9 += len(content_list[i + ci])
-                gpt_content.append(
-                    "<em class=\"color-gold\" id='score_{}'>".format(str(i + ci)) + content_list[i + ci] + "。\n" + "</em>")
+            gpt_score_list.append(res["robot"])
+            sim_word_5_9 += len(content_list[i])
+            gpt_content.append(
+                "<em class=\"color-gold\" id='score_{}'>".format(str(i)) + content_list[i] + "。\n" + "</em>")
         else:
-            for ci in range(sen_nums):
-                gpt_score_list.append(0)
-                gpt_content.append(content_list[i + ci] + "。\n")
+            gpt_score_list.append(0)
+            gpt_content.append(content_list[i] + "。\n")
 
     return_list["gpt_content"] = "".join(gpt_content)
     return_list["gpt_score_list"] = str(gpt_score_list)
@@ -132,6 +114,7 @@ def main(content_list: list):
 
 def classify():  # 调用模型，设置最大batch_size
     while True:
+        try:
             if redis_.llen(db_key_query) == 0:  # 若队列中没有元素就继续获取
                 time.sleep(3)
                 continue
@@ -189,9 +172,10 @@ def classify():  # 调用模型，设置最大batch_size
                 json.dump(return_text, f2, ensure_ascii=False, indent=4)
             redis_.set(queue_uuid, load_result_path, 86400)
             redis_.srem(db_key_querying, queue_uuid)
-
+        except:
+            continue
 
 
 if __name__ == '__main__':
     t = Thread(target=classify)
-    t.start()
\ No newline at end of file
+    t.start()
diff --git a/flask_api.py b/flask_api.py
index 7656f63..3c85041 100644
--- a/flask_api.py
+++ b/flask_api.py
@@ -45,7 +45,7 @@ def ulit_request_file(file):
     # elif file_name.split(".")[-1] == "docx":
     #     content = docx2txt.process(file_name_save)
 
-    content_list = [i.strip("\n") for i in content.split("。")]
+    content_list = [i for i in content.split("\n")]
     print(content_list)
     return content_list