更新算法

更新检测模型
4 changed files with 203 additions and 30 deletions
--- a/chatgpt_detector_model_predict.py
+++ b/chatgpt_detector_model_predict.py
--- a/flask_api.py
+++ b/flask_api.py
@ -20,15 +20,65 @@ import uuid
 import time
 import json
 import docx2txt
+import re
+from datetime import datetime


-pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
+pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
 redis_ = redis.Redis(connection_pool=pool, decode_responses=True)

 db_key_query = 'query'
 db_key_querying = 'querying'
 db_key_queryset = 'queryset'
 batch_size = 32
+RE_DIALOG = re.compile(r"\".*?\"|\'.*?\'|“.*?”")
+
+def get_dialogs_index(line: str):
+    """
+    获取对话及其索引
+    :param line 文本
+    :return dialogs 对话内容
+            dialogs_index: 对话位置索引
+            other_index: 其他内容位置索引
+    """
+    dialogs = re.finditer(RE_DIALOG, line)
+    dialogs_text = re.findall(RE_DIALOG, line)
+    dialogs_index = []
+    for dialog in dialogs:
+        all_ = [i for i in range(dialog.start(), dialog.end())]
+        dialogs_index.extend(all_)
+    other_index = [i for i in range(len(line)) if i not in dialogs_index]
+
+    return dialogs_text, dialogs_index, other_index
+
+
+def chulichangju_1(text, chulipangban_return_list):
+    fuhao = ["。"]
+    dialogs_text, dialogs_index, other_index = get_dialogs_index(text)
+    text_1 = text[:500]
+    text_2 = text[500:]
+    text_1_new = ""
+    if text_2 == "":
+        chulipangban_return_list.append(text_1)
+        return chulipangban_return_list
+    for i in range(len(text_1) - 1, -1, -1):
+        if text_1[i] in fuhao:
+            if i in dialogs_index:
+                continue
+            text_1_new = text_1[:i]
+            text_1_new += text_1[i]
+            chulipangban_return_list.append(text_1_new)
+            if text_2 != "":
+                if i + 1 != 500:
+                    text_2 = text_1[i + 1:] + text_2
+            break
+        # else:
+        #     chulipangban_return_list.append(text_1)
+    if text_1_new == "":
+        chulipangban_return_list.append(text_1)
+    if text_2 != "":
+        chulipangban_return_list = chulichangju_1(text_2, chulipangban_return_list)
+    return chulipangban_return_list

 def ulit_request_file(file):
    file_name = file.filename
@ -45,8 +95,16 @@ def ulit_request_file(file):
    # elif file_name.split(".")[-1] == "docx":
    #     content = docx2txt.process(file_name_save)

-    content_list = [i for i in content.split("\n")]
+    content_list = [i for i in content.split("\n") if i!= ""]
    print(content_list)
+
+    content_list_new = []
+    for sen in content_list:
+        if len(sen) < 500:
+            content_list_new.append(sen)
+        else:
+            content_list_new.extend(chulichangju_1(sen, []))
+
    return content_list


@ -88,7 +146,13 @@ def handle_query_predict():
    # 绑定文本和query id
    # recall_10(id_, title, abst_zh, content)

-    load_request_path = './request_data_logs/{}.json'.format(id_)
+    date_str = datetime.now().strftime("%Y-%m-%d")
+    dir_path = "./request_data_logs/{}".format(date_str)
+    # 检查并创建目录（如果不存在）
+    os.makedirs(dir_path, exist_ok=True)
+    load_request_path = dir_path + '/{}.json'.format(id_)
+    # load_request_path = './request_data_logs/{}.json'.format(id_)
+
    with open(load_request_path, 'w', encoding='utf8') as f2: # ensure_ascii=False才能输入中文，否则是Unicode字符 indent=2 JSON数据的缩进，美观
        json.dump(d, f2, ensure_ascii=False, indent=4)
    redis_.rpush(db_key_query, json.dumps({"id": id_, "path": load_request_path}))  # 加入redis
--- a/flask_chatgpt-detector_predict_redis_search.py
+++ b/flask_chatgpt-detector_predict_redis_search.py
@ -28,7 +28,7 @@ from threading import Thread
 import time

 app = flask.Flask(__name__)
-pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*")
+pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*")
 redis_ = redis.Redis(connection_pool=pool, decode_responses=True)
Author	SHA1	Message	Date
majiahui@haimaqingfan.com	84d7d00bd0	更新算法	4 months ago
majiahui@haimaqingfan.com	6fa529d312	更新检测模型	5 months ago
majiahui@haimaqingfan.com	69df67d318	更新检测模型	5 months ago
majiahui@haimaqingfan.com	a6038cc55d	更新检测模型	5 months ago
majiahui@haimaqingfan.com	3049530f05	更新检测模型	7 months ago
majiahui@haimaqingfan.com	820a3367ce	更新检测模型，增加边界条件	1 year ago
majiahui@haimaqingfan.com	7650bf7f3e	更新检测模型，增加边界条件	1 year ago
majiahui@haimaqingfan.com	3b069e746c	更新检测模型	1 year ago