Browse Source

修改反向标红标注bug

master
majiahui@haimaqingfan.com 2 years ago
parent
commit
ca94067e1b
  1. 135
      flask_check_bert_test.py

135
flask_check_bert_test.py

@ -172,6 +172,9 @@ def original_text_contrast_func(data_sentence_dan, paper_dict):
paper_dict[i[0]][ paper_dict[i[0]][
3]) # text_original, bert_text, bert_text_pre 3]) # text_original, bert_text, bert_text_pre
if sentence_0_bool == False or sentence_1_bool == False:
continue
start_dan = sentence_0_dan_red.index("<red>") start_dan = sentence_0_dan_red.index("<red>")
end_dan = sentence_0_dan_red.index("</red>") - len("<red>") end_dan = sentence_0_dan_red.index("</red>") - len("<red>")
@ -180,9 +183,6 @@ def original_text_contrast_func(data_sentence_dan, paper_dict):
if end_dan > end: if end_dan > end:
end = end_dan end = end_dan
if sentence_0_bool == False or sentence_1_bool == False:
continue
similar_content_dan["content"] = sentence_1_dan_red similar_content_dan["content"] = sentence_1_dan_red
similar_content_dan["title"] = i[3]["title"] similar_content_dan["title"] = i[3]["title"]
similar_content_dan["author"] = i[3]["author"] similar_content_dan["author"] = i[3]["author"]
@ -752,81 +752,85 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre):
:param bert_text_pre: :param bert_text_pre:
:return: :return:
''' '''
try:
fuhao = ["\n"]
up_pointer = 0
down_pointer = 0
fuhao = ["\n"] pointer_list = []
up_pointer = 0
down_pointer = 0
pointer_list = []
if len(bert_text_pre) > len(bert_text): if len(bert_text_pre) > len(bert_text):
return False, "" return False, ""
while True: while True:
if down_pointer >= len(bert_text_pre): if down_pointer >= len(bert_text_pre):
break
elif down_pointer == len(bert_text_pre) - 1:
if bert_text[up_pointer] == bert_text_pre[down_pointer]:
pointer_list.append(up_pointer)
break break
else: elif down_pointer == len(bert_text_pre) - 1:
up_pointer += 1 if bert_text[up_pointer] == bert_text_pre[down_pointer]:
down_pointer = 0
pointer_list = []
elif bert_text[up_pointer] in fuhao:
up_pointer += 1
else:
if bert_text[up_pointer] == bert_text_pre[down_pointer]:
pointer_list.append(up_pointer)
up_pointer += 1
down_pointer += 1
else:
if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]":
up_pointer += 1
down_pointer += 5
pointer_list.append(up_pointer)
elif is_english_char(bert_text_pre[down_pointer]) == True:
up_pointer += 1
down_pointer += 1
pointer_list.append(up_pointer) pointer_list.append(up_pointer)
break
else: else:
up_pointer += 1 up_pointer += 1
down_pointer = 0 down_pointer = 0
pointer_list = [] pointer_list = []
start = pointer_list[0] elif bert_text[up_pointer] in fuhao:
end = pointer_list[-1] up_pointer += 1
bert_text_list = list(bert_text)
bert_text_list.insert(start, "<red>")
bert_text_list.insert(end + 2, "</red>")
text_original_list = list(text_original)
up = 0
down = 0
while True:
if up == len(text_original_list):
break
if text_original_list[up] == bert_text_list[down]:
up += 1
down += 1
else:
if bert_text_list[down] == "<red>":
down += 1
elif bert_text_list[down] == "</red>":
down += 1
else: else:
bert_text_list.insert(down, text_original_list[up]) if bert_text[up_pointer] == bert_text_pre[down_pointer]:
pointer_list.append(up_pointer)
up_pointer += 1
down_pointer += 1
else:
if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]":
up_pointer += 1
down_pointer += 5
pointer_list.append(up_pointer)
elif is_english_char(bert_text_pre[down_pointer]) == True:
up_pointer += 1
down_pointer += 1
pointer_list.append(up_pointer)
else:
up_pointer += 1
down_pointer = 0
pointer_list = []
start = pointer_list[0]
end = pointer_list[-1]
bert_text_list = list(bert_text)
bert_text_list.insert(start, "<red>")
bert_text_list.insert(end + 2, "</red>")
text_original_list = list(text_original)
up = 0
down = 0
while True:
if up == len(text_original_list):
break
if text_original_list[up] == bert_text_list[down]:
up += 1 up += 1
down += 1 down += 1
bert_text = "".join(bert_text_list) else:
return True, bert_text if bert_text_list[down] == "<red>":
down += 1
elif bert_text_list[down] == "</red>":
down += 1
else:
bert_text_list.insert(down, text_original_list[up])
up += 1
down += 1
bert_text = "".join(bert_text_list)
return True, bert_text
except:
print("句子标红报错")
print(text_original, bert_text, bert_text_pre)
return False, ""
def biaohong_bert_predict(sentence_0_list, sentence_1_list): def biaohong_bert_predict(sentence_0_list, sentence_1_list):
@ -927,7 +931,7 @@ def ulit_recall_paper(recall_data_list_dict):
# return data # return data
data = [] data = []
for i in list(recall_data_list_dict.items()): for i in list(recall_data_list_dict.items())[:5]:
data_one = processing_one_text(i[0]) data_one = processing_one_text(i[0])
data.extend(data_one) data.extend(data_one)
@ -948,7 +952,7 @@ def recall_10(queue_uuid, title, abst_zh, content):
"content": content "content": content
} }
print(request_json) print(request_json)
da = dialog_line_parse("http://192.168.31.145:50004/check", request_json) dialog_line_parse("http://192.168.31.145:50004/check", request_json)
@ -1126,6 +1130,7 @@ def classify_accurate_check():
recall_data_list_dict = json.loads(query_recall_dict["data"]) recall_data_list_dict = json.loads(query_recall_dict["data"])
recall_data_list = ulit_recall_paper(recall_data_list_dict) recall_data_list = ulit_recall_paper(recall_data_list_dict)
data_dict_path = redis_.get(query_recall_uuid + "_request_check") data_dict_path = redis_.get(query_recall_uuid + "_request_check")
print(data_dict_path)
with open(data_dict_path, encoding='utf8') as f: with open(data_dict_path, encoding='utf8') as f:
data_dict = json.loads(f.read()) data_dict = json.loads(f.read())

Loading…
Cancel
Save