|
|
@ -172,6 +172,9 @@ def original_text_contrast_func(data_sentence_dan, paper_dict): |
|
|
|
paper_dict[i[0]][ |
|
|
|
3]) # text_original, bert_text, bert_text_pre |
|
|
|
|
|
|
|
if sentence_0_bool == False or sentence_1_bool == False: |
|
|
|
continue |
|
|
|
|
|
|
|
start_dan = sentence_0_dan_red.index("<red>") |
|
|
|
end_dan = sentence_0_dan_red.index("</red>") - len("<red>") |
|
|
|
|
|
|
@ -180,9 +183,6 @@ def original_text_contrast_func(data_sentence_dan, paper_dict): |
|
|
|
if end_dan > end: |
|
|
|
end = end_dan |
|
|
|
|
|
|
|
if sentence_0_bool == False or sentence_1_bool == False: |
|
|
|
continue |
|
|
|
|
|
|
|
similar_content_dan["content"] = sentence_1_dan_red |
|
|
|
similar_content_dan["title"] = i[3]["title"] |
|
|
|
similar_content_dan["author"] = i[3]["author"] |
|
|
@ -752,81 +752,85 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre): |
|
|
|
:param bert_text_pre: |
|
|
|
:return: |
|
|
|
''' |
|
|
|
try: |
|
|
|
fuhao = ["\n"] |
|
|
|
up_pointer = 0 |
|
|
|
down_pointer = 0 |
|
|
|
|
|
|
|
fuhao = ["\n"] |
|
|
|
up_pointer = 0 |
|
|
|
down_pointer = 0 |
|
|
|
|
|
|
|
pointer_list = [] |
|
|
|
pointer_list = [] |
|
|
|
|
|
|
|
if len(bert_text_pre) > len(bert_text): |
|
|
|
return False, "" |
|
|
|
if len(bert_text_pre) > len(bert_text): |
|
|
|
return False, "" |
|
|
|
|
|
|
|
while True: |
|
|
|
if down_pointer >= len(bert_text_pre): |
|
|
|
break |
|
|
|
elif down_pointer == len(bert_text_pre) - 1: |
|
|
|
if bert_text[up_pointer] == bert_text_pre[down_pointer]: |
|
|
|
pointer_list.append(up_pointer) |
|
|
|
while True: |
|
|
|
if down_pointer >= len(bert_text_pre): |
|
|
|
break |
|
|
|
else: |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer = 0 |
|
|
|
pointer_list = [] |
|
|
|
|
|
|
|
elif bert_text[up_pointer] in fuhao: |
|
|
|
up_pointer += 1 |
|
|
|
|
|
|
|
else: |
|
|
|
if bert_text[up_pointer] == bert_text_pre[down_pointer]: |
|
|
|
pointer_list.append(up_pointer) |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer += 1 |
|
|
|
else: |
|
|
|
if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]": |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer += 5 |
|
|
|
pointer_list.append(up_pointer) |
|
|
|
elif is_english_char(bert_text_pre[down_pointer]) == True: |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer += 1 |
|
|
|
elif down_pointer == len(bert_text_pre) - 1: |
|
|
|
if bert_text[up_pointer] == bert_text_pre[down_pointer]: |
|
|
|
pointer_list.append(up_pointer) |
|
|
|
break |
|
|
|
else: |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer = 0 |
|
|
|
pointer_list = [] |
|
|
|
|
|
|
|
start = pointer_list[0] |
|
|
|
end = pointer_list[-1] |
|
|
|
bert_text_list = list(bert_text) |
|
|
|
bert_text_list.insert(start, "<red>") |
|
|
|
bert_text_list.insert(end + 2, "</red>") |
|
|
|
|
|
|
|
text_original_list = list(text_original) |
|
|
|
|
|
|
|
up = 0 |
|
|
|
down = 0 |
|
|
|
|
|
|
|
while True: |
|
|
|
if up == len(text_original_list): |
|
|
|
break |
|
|
|
|
|
|
|
if text_original_list[up] == bert_text_list[down]: |
|
|
|
up += 1 |
|
|
|
down += 1 |
|
|
|
elif bert_text[up_pointer] in fuhao: |
|
|
|
up_pointer += 1 |
|
|
|
|
|
|
|
else: |
|
|
|
if bert_text_list[down] == "<red>": |
|
|
|
down += 1 |
|
|
|
elif bert_text_list[down] == "</red>": |
|
|
|
down += 1 |
|
|
|
else: |
|
|
|
bert_text_list.insert(down, text_original_list[up]) |
|
|
|
if bert_text[up_pointer] == bert_text_pre[down_pointer]: |
|
|
|
pointer_list.append(up_pointer) |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer += 1 |
|
|
|
else: |
|
|
|
if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]": |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer += 5 |
|
|
|
pointer_list.append(up_pointer) |
|
|
|
elif is_english_char(bert_text_pre[down_pointer]) == True: |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer += 1 |
|
|
|
pointer_list.append(up_pointer) |
|
|
|
else: |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer = 0 |
|
|
|
pointer_list = [] |
|
|
|
|
|
|
|
start = pointer_list[0] |
|
|
|
end = pointer_list[-1] |
|
|
|
bert_text_list = list(bert_text) |
|
|
|
bert_text_list.insert(start, "<red>") |
|
|
|
bert_text_list.insert(end + 2, "</red>") |
|
|
|
|
|
|
|
text_original_list = list(text_original) |
|
|
|
|
|
|
|
up = 0 |
|
|
|
down = 0 |
|
|
|
|
|
|
|
while True: |
|
|
|
if up == len(text_original_list): |
|
|
|
break |
|
|
|
|
|
|
|
if text_original_list[up] == bert_text_list[down]: |
|
|
|
up += 1 |
|
|
|
down += 1 |
|
|
|
|
|
|
|
bert_text = "".join(bert_text_list) |
|
|
|
return True, bert_text |
|
|
|
else: |
|
|
|
if bert_text_list[down] == "<red>": |
|
|
|
down += 1 |
|
|
|
elif bert_text_list[down] == "</red>": |
|
|
|
down += 1 |
|
|
|
else: |
|
|
|
bert_text_list.insert(down, text_original_list[up]) |
|
|
|
up += 1 |
|
|
|
down += 1 |
|
|
|
|
|
|
|
bert_text = "".join(bert_text_list) |
|
|
|
return True, bert_text |
|
|
|
except: |
|
|
|
print("句子标红报错") |
|
|
|
print(text_original, bert_text, bert_text_pre) |
|
|
|
return False, "" |
|
|
|
|
|
|
|
|
|
|
|
def biaohong_bert_predict(sentence_0_list, sentence_1_list): |
|
|
@ -927,7 +931,7 @@ def ulit_recall_paper(recall_data_list_dict): |
|
|
|
# return data |
|
|
|
|
|
|
|
data = [] |
|
|
|
for i in list(recall_data_list_dict.items()): |
|
|
|
for i in list(recall_data_list_dict.items())[:5]: |
|
|
|
data_one = processing_one_text(i[0]) |
|
|
|
data.extend(data_one) |
|
|
|
|
|
|
@ -948,7 +952,7 @@ def recall_10(queue_uuid, title, abst_zh, content): |
|
|
|
"content": content |
|
|
|
} |
|
|
|
print(request_json) |
|
|
|
da = dialog_line_parse("http://192.168.31.145:50004/check", request_json) |
|
|
|
dialog_line_parse("http://192.168.31.145:50004/check", request_json) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -1126,6 +1130,7 @@ def classify_accurate_check(): |
|
|
|
recall_data_list_dict = json.loads(query_recall_dict["data"]) |
|
|
|
recall_data_list = ulit_recall_paper(recall_data_list_dict) |
|
|
|
data_dict_path = redis_.get(query_recall_uuid + "_request_check") |
|
|
|
print(data_dict_path) |
|
|
|
with open(data_dict_path, encoding='utf8') as f: |
|
|
|
data_dict = json.loads(f.read()) |
|
|
|
|
|
|
|