Browse Source

段落检测

master
majiahui@haimaqingfan.com 11 months ago
parent
commit
23ed82e6fa
  1. 38
      chatgpt_detector_model_predict.py
  2. 2
      flask_api.py

38
chatgpt_detector_model_predict.py

@ -29,10 +29,6 @@ db_key_query = 'query'
db_key_querying = 'querying' db_key_querying = 'querying'
db_key_queryset = 'queryset' db_key_queryset = 'queryset'
batch_size = 32 batch_size = 32
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# tokenizer = AutoTokenizer.from_pretrained("chatgpt-detector-roberta-chinese")
# model = AutoModelForSequenceClassification.from_pretrained("chatgpt-detector-roberta-chinese").cuda()
# model = AutoModelForSequenceClassification.from_pretrained("chatgpt-detector-roberta-chinese").cpu()
model_name = "AIGC_detector_zhv2" model_name = "AIGC_detector_zhv2"
tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name)
@ -84,42 +80,28 @@ def main(content_list: list):
sim_word = 0 sim_word = 0
sim_word_5_9 = 0 sim_word_5_9 = 0
total_words = 0 total_words = 0
print(content_list)
total_paragraph = len(content_list) total_paragraph = len(content_list)
for i in range(len(content_list)):
for i in range(0, len(content_list), 3): total_words += len(content_list[i])
if i + 2 <= len(content_list)-1: res = model_preidct(content_list[i])
sen_nums = 3
content_str = "".join([content_list[i], content_list[i+1], content_list[i+2]])
elif i + 1 <= len(content_list)-1:
sen_nums = 2
content_str = "".join([content_list[i], content_list[i + 1]])
else:
sen_nums = 1
content_str = content_list[i]
total_words += len(content_str)
res = model_preidct(content_str)
# return_list = { # return_list = {
# "humen": output[0][0], # "humen": output[0][0],
# "robot": output[0][1] # "robot": output[0][1]
# } # }
if res["robot"] > 0.9: if res["robot"] > 0.9:
for ci in range(sen_nums):
gpt_score_list.append(res["robot"]) gpt_score_list.append(res["robot"])
sim_word += len(content_list[i + ci]) sim_word += len(content_list[i])
gpt_content.append( gpt_content.append(
"<em class=\"similar\" id='score_{}'>".format(str(i + ci)) + content_list[i + ci] + "\n" + "</em>") "<em class=\"similar\" id='score_{}'>".format(str(i)) + content_list[i] + "\n" + "</em>")
elif 0.9 > res["robot"] > 0.5: elif 0.9 > res["robot"] > 0.5:
for ci in range(sen_nums):
gpt_score_list.append(res["robot"]) gpt_score_list.append(res["robot"])
sim_word_5_9 += len(content_list[i + ci]) sim_word_5_9 += len(content_list[i])
gpt_content.append( gpt_content.append(
"<em class=\"color-gold\" id='score_{}'>".format(str(i + ci)) + content_list[i + ci] + "\n" + "</em>") "<em class=\"color-gold\" id='score_{}'>".format(str(i)) + content_list[i] + "\n" + "</em>")
else: else:
for ci in range(sen_nums):
gpt_score_list.append(0) gpt_score_list.append(0)
gpt_content.append(content_list[i + ci] + "\n") gpt_content.append(content_list[i] + "\n")
return_list["gpt_content"] = "".join(gpt_content) return_list["gpt_content"] = "".join(gpt_content)
return_list["gpt_score_list"] = str(gpt_score_list) return_list["gpt_score_list"] = str(gpt_score_list)
@ -132,6 +114,7 @@ def main(content_list: list):
def classify(): # 调用模型,设置最大batch_size def classify(): # 调用模型,设置最大batch_size
while True: while True:
try:
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取 if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取
time.sleep(3) time.sleep(3)
continue continue
@ -189,7 +172,8 @@ def classify(): # 调用模型,设置最大batch_size
json.dump(return_text, f2, ensure_ascii=False, indent=4) json.dump(return_text, f2, ensure_ascii=False, indent=4)
redis_.set(queue_uuid, load_result_path, 86400) redis_.set(queue_uuid, load_result_path, 86400)
redis_.srem(db_key_querying, queue_uuid) redis_.srem(db_key_querying, queue_uuid)
except:
continue
if __name__ == '__main__': if __name__ == '__main__':

2
flask_api.py

@ -45,7 +45,7 @@ def ulit_request_file(file):
# elif file_name.split(".")[-1] == "docx": # elif file_name.split(".")[-1] == "docx":
# content = docx2txt.process(file_name_save) # content = docx2txt.process(file_name_save)
content_list = [i.strip("\n") for i in content.split("")] content_list = [i for i in content.split("\n")]
print(content_list) print(content_list)
return content_list return content_list

Loading…
Cancel
Save