Browse Source

修改反向标红标注bug

master
majiahui@haimaqingfan.com 2 years ago
parent
commit
366be0e0f4
  1. 68
      flask_check_bert_test.py

68
flask_check_bert_test.py

@ -34,6 +34,12 @@ rouge = Rouge()
rouge_model = Rouge_w()
rouge_l_model = Rouge_l()
def jaccard_similarity(s1, s2):
set1 = set(s1)
set2 = set(s2)
intersection = set1 & set2
union = set1 | set2
return len(intersection) / len(union)
def bert_check(text, recall_data_list):
'''
@ -77,7 +83,8 @@ def rouge_value_self(data_1, data_2):
for sen_1, sen_2 in zip(data_1, data_2):
sen_1 = sen_1.split(" ")
sen_2 = sen_2.split(" ")
rouge_l_score = rouge_l_model.score(sen_1, sen_2)
# rouge_l_score = rouge_l_model.score(sen_1, sen_2)
rouge_l_score = jaccard_similarity(sen_1, sen_2)
rouge_l_list.append(rouge_l_score)
return "", "", rouge_l_list
@ -104,8 +111,6 @@ def rouge_pre(text, df_train_nuoche):
def rouge_pre_m(text, df_train_nuoche):
return_list = []
index_rouge_list = []
@ -165,12 +170,12 @@ def original_text_contrast_func(data_sentence_dan, paper_dict):
}
sentence_0_bool, sentence_0_dan_red = original_text_marked_red(i[1], paper_dict[i[0]][0],
paper_dict[i[0]][
1]) # text_original, bert_text, bert_text_pre
paper_dict[i[0]][4][0],
paper_dict[i[0]][4][1]) # text_original, bert_text, bert_text_pre
sentence_1_bool, sentence_1_dan_red = original_text_marked_red(i[2], paper_dict[i[0]][2],
paper_dict[i[0]][
3]) # text_original, bert_text, bert_text_pre
paper_dict[i[0]][4][0],
paper_dict[i[0]][4][1]) # text_original, bert_text, bert_text_pre
if sentence_0_bool == False or sentence_1_bool == False:
continue
@ -571,7 +576,7 @@ def accurate_check_rouge(
# rouge算法
for i in range(len(data_zong)):
for j in range(len(data_zong[i])):
if data_zong[i][j][1] > 0.47:
if data_zong[i][j][1] > 0.35:
bool_check_sentense.append([i, data_zong[i][j][0]])
biao_red = biaohong(bool_check_sentense, data_zong,
@ -744,7 +749,7 @@ def is_english_char(char):
return 32 <= code <= 126
def original_text_marked_red(text_original, bert_text, bert_text_pre):
def original_text_marked_red(text_original, bert_text, start, end):
'''
把原文标红字段找到
:param text_original:
@ -759,45 +764,6 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre):
pointer_list = []
if len(bert_text_pre) > len(bert_text):
return False, ""
while True:
if down_pointer >= len(bert_text_pre):
break
elif down_pointer == len(bert_text_pre) - 1:
if bert_text[up_pointer] == bert_text_pre[down_pointer]:
pointer_list.append(up_pointer)
break
else:
up_pointer += 1
down_pointer = 0
pointer_list = []
elif bert_text[up_pointer] in fuhao:
up_pointer += 1
else:
if bert_text[up_pointer] == bert_text_pre[down_pointer]:
pointer_list.append(up_pointer)
up_pointer += 1
down_pointer += 1
else:
if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]":
up_pointer += 1
down_pointer += 5
pointer_list.append(up_pointer)
elif is_english_char(bert_text_pre[down_pointer]) == True:
up_pointer += 1
down_pointer += 1
pointer_list.append(up_pointer)
else:
up_pointer += 1
down_pointer = 0
pointer_list = []
start = pointer_list[0]
end = pointer_list[-1]
bert_text_list = list(bert_text)
bert_text_list.insert(start, "<red>")
bert_text_list.insert(end + 2, "</red>")
@ -829,7 +795,7 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre):
return True, bert_text
except:
print("句子标红报错")
print(text_original, bert_text, bert_text_pre)
print(text_original, bert_text)
return False, ""
@ -931,7 +897,7 @@ def ulit_recall_paper(recall_data_list_dict):
# return data
data = []
for i in list(recall_data_list_dict.items())[:5]:
for i in list(recall_data_list_dict.items()):
data_one = processing_one_text(i[0])
data.extend(data_one)
@ -952,7 +918,7 @@ def recall_10(queue_uuid, title, abst_zh, content):
"content": content
}
print(request_json)
dialog_line_parse("http://192.168.31.145:50004/check", request_json)
dialog_line_parse("http://192.168.31.145:50004/check1", request_json)

Loading…
Cancel
Save