|
|
@ -29,10 +29,6 @@ db_key_query = 'query' |
|
|
|
db_key_querying = 'querying' |
|
|
|
db_key_queryset = 'queryset' |
|
|
|
batch_size = 32 |
|
|
|
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
|
|
# tokenizer = AutoTokenizer.from_pretrained("chatgpt-detector-roberta-chinese") |
|
|
|
# model = AutoModelForSequenceClassification.from_pretrained("chatgpt-detector-roberta-chinese").cuda() |
|
|
|
# model = AutoModelForSequenceClassification.from_pretrained("chatgpt-detector-roberta-chinese").cpu() |
|
|
|
model_name = "AIGC_detector_zhv2" |
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
@ -84,42 +80,28 @@ def main(content_list: list): |
|
|
|
sim_word = 0 |
|
|
|
sim_word_5_9 = 0 |
|
|
|
total_words = 0 |
|
|
|
print(content_list) |
|
|
|
total_paragraph = len(content_list) |
|
|
|
|
|
|
|
|
|
|
|
for i in range(0, len(content_list), 3): |
|
|
|
if i + 2 <= len(content_list)-1: |
|
|
|
sen_nums = 3 |
|
|
|
content_str = "。".join([content_list[i], content_list[i+1], content_list[i+2]]) |
|
|
|
elif i + 1 <= len(content_list)-1: |
|
|
|
sen_nums = 2 |
|
|
|
content_str = "。".join([content_list[i], content_list[i + 1]]) |
|
|
|
else: |
|
|
|
sen_nums = 1 |
|
|
|
content_str = content_list[i] |
|
|
|
total_words += len(content_str) |
|
|
|
res = model_preidct(content_str) |
|
|
|
for i in range(len(content_list)): |
|
|
|
total_words += len(content_list[i]) |
|
|
|
res = model_preidct(content_list[i]) |
|
|
|
# return_list = { |
|
|
|
# "humen": output[0][0], |
|
|
|
# "robot": output[0][1] |
|
|
|
# } |
|
|
|
if res["robot"] > 0.9: |
|
|
|
for ci in range(sen_nums): |
|
|
|
gpt_score_list.append(res["robot"]) |
|
|
|
sim_word += len(content_list[i + ci]) |
|
|
|
gpt_content.append( |
|
|
|
"<em class=\"similar\" id='score_{}'>".format(str(i + ci)) + content_list[i + ci] + "。\n" + "</em>") |
|
|
|
gpt_score_list.append(res["robot"]) |
|
|
|
sim_word += len(content_list[i]) |
|
|
|
gpt_content.append( |
|
|
|
"<em class=\"similar\" id='score_{}'>".format(str(i)) + content_list[i] + "。\n" + "</em>") |
|
|
|
elif 0.9 > res["robot"] > 0.5: |
|
|
|
for ci in range(sen_nums): |
|
|
|
gpt_score_list.append(res["robot"]) |
|
|
|
sim_word_5_9 += len(content_list[i + ci]) |
|
|
|
gpt_content.append( |
|
|
|
"<em class=\"color-gold\" id='score_{}'>".format(str(i + ci)) + content_list[i + ci] + "。\n" + "</em>") |
|
|
|
gpt_score_list.append(res["robot"]) |
|
|
|
sim_word_5_9 += len(content_list[i]) |
|
|
|
gpt_content.append( |
|
|
|
"<em class=\"color-gold\" id='score_{}'>".format(str(i)) + content_list[i] + "。\n" + "</em>") |
|
|
|
else: |
|
|
|
for ci in range(sen_nums): |
|
|
|
gpt_score_list.append(0) |
|
|
|
gpt_content.append(content_list[i + ci] + "。\n") |
|
|
|
gpt_score_list.append(0) |
|
|
|
gpt_content.append(content_list[i] + "。\n") |
|
|
|
|
|
|
|
return_list["gpt_content"] = "".join(gpt_content) |
|
|
|
return_list["gpt_score_list"] = str(gpt_score_list) |
|
|
@ -132,6 +114,7 @@ def main(content_list: list): |
|
|
|
|
|
|
|
def classify(): # 调用模型,设置最大batch_size |
|
|
|
while True: |
|
|
|
try: |
|
|
|
if redis_.llen(db_key_query) == 0: # 若队列中没有元素就继续获取 |
|
|
|
time.sleep(3) |
|
|
|
continue |
|
|
@ -189,9 +172,10 @@ def classify(): # 调用模型,设置最大batch_size |
|
|
|
json.dump(return_text, f2, ensure_ascii=False, indent=4) |
|
|
|
redis_.set(queue_uuid, load_result_path, 86400) |
|
|
|
redis_.srem(db_key_querying, queue_uuid) |
|
|
|
|
|
|
|
except: |
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
t = Thread(target=classify) |
|
|
|
t.start() |
|
|
|
t.start() |
|
|
|