Browse Source

修改反向标红标注bug

master
majiahui@haimaqingfan.com 2 years ago
parent
commit
366be0e0f4
  1. 68
      flask_check_bert_test.py

68
flask_check_bert_test.py

@ -34,6 +34,12 @@ rouge = Rouge()
rouge_model = Rouge_w() rouge_model = Rouge_w()
rouge_l_model = Rouge_l() rouge_l_model = Rouge_l()
def jaccard_similarity(s1, s2):
set1 = set(s1)
set2 = set(s2)
intersection = set1 & set2
union = set1 | set2
return len(intersection) / len(union)
def bert_check(text, recall_data_list): def bert_check(text, recall_data_list):
''' '''
@ -77,7 +83,8 @@ def rouge_value_self(data_1, data_2):
for sen_1, sen_2 in zip(data_1, data_2): for sen_1, sen_2 in zip(data_1, data_2):
sen_1 = sen_1.split(" ") sen_1 = sen_1.split(" ")
sen_2 = sen_2.split(" ") sen_2 = sen_2.split(" ")
rouge_l_score = rouge_l_model.score(sen_1, sen_2) # rouge_l_score = rouge_l_model.score(sen_1, sen_2)
rouge_l_score = jaccard_similarity(sen_1, sen_2)
rouge_l_list.append(rouge_l_score) rouge_l_list.append(rouge_l_score)
return "", "", rouge_l_list return "", "", rouge_l_list
@ -104,8 +111,6 @@ def rouge_pre(text, df_train_nuoche):
def rouge_pre_m(text, df_train_nuoche): def rouge_pre_m(text, df_train_nuoche):
return_list = [] return_list = []
index_rouge_list = [] index_rouge_list = []
@ -165,12 +170,12 @@ def original_text_contrast_func(data_sentence_dan, paper_dict):
} }
sentence_0_bool, sentence_0_dan_red = original_text_marked_red(i[1], paper_dict[i[0]][0], sentence_0_bool, sentence_0_dan_red = original_text_marked_red(i[1], paper_dict[i[0]][0],
paper_dict[i[0]][ paper_dict[i[0]][4][0],
1]) # text_original, bert_text, bert_text_pre paper_dict[i[0]][4][1]) # text_original, bert_text, bert_text_pre
sentence_1_bool, sentence_1_dan_red = original_text_marked_red(i[2], paper_dict[i[0]][2], sentence_1_bool, sentence_1_dan_red = original_text_marked_red(i[2], paper_dict[i[0]][2],
paper_dict[i[0]][ paper_dict[i[0]][4][0],
3]) # text_original, bert_text, bert_text_pre paper_dict[i[0]][4][1]) # text_original, bert_text, bert_text_pre
if sentence_0_bool == False or sentence_1_bool == False: if sentence_0_bool == False or sentence_1_bool == False:
continue continue
@ -571,7 +576,7 @@ def accurate_check_rouge(
# rouge算法 # rouge算法
for i in range(len(data_zong)): for i in range(len(data_zong)):
for j in range(len(data_zong[i])): for j in range(len(data_zong[i])):
if data_zong[i][j][1] > 0.47: if data_zong[i][j][1] > 0.35:
bool_check_sentense.append([i, data_zong[i][j][0]]) bool_check_sentense.append([i, data_zong[i][j][0]])
biao_red = biaohong(bool_check_sentense, data_zong, biao_red = biaohong(bool_check_sentense, data_zong,
@ -744,7 +749,7 @@ def is_english_char(char):
return 32 <= code <= 126 return 32 <= code <= 126
def original_text_marked_red(text_original, bert_text, bert_text_pre): def original_text_marked_red(text_original, bert_text, start, end):
''' '''
把原文标红字段找到 把原文标红字段找到
:param text_original: :param text_original:
@ -759,45 +764,6 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre):
pointer_list = [] pointer_list = []
if len(bert_text_pre) > len(bert_text):
return False, ""
while True:
if down_pointer >= len(bert_text_pre):
break
elif down_pointer == len(bert_text_pre) - 1:
if bert_text[up_pointer] == bert_text_pre[down_pointer]:
pointer_list.append(up_pointer)
break
else:
up_pointer += 1
down_pointer = 0
pointer_list = []
elif bert_text[up_pointer] in fuhao:
up_pointer += 1
else:
if bert_text[up_pointer] == bert_text_pre[down_pointer]:
pointer_list.append(up_pointer)
up_pointer += 1
down_pointer += 1
else:
if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]":
up_pointer += 1
down_pointer += 5
pointer_list.append(up_pointer)
elif is_english_char(bert_text_pre[down_pointer]) == True:
up_pointer += 1
down_pointer += 1
pointer_list.append(up_pointer)
else:
up_pointer += 1
down_pointer = 0
pointer_list = []
start = pointer_list[0]
end = pointer_list[-1]
bert_text_list = list(bert_text) bert_text_list = list(bert_text)
bert_text_list.insert(start, "<red>") bert_text_list.insert(start, "<red>")
bert_text_list.insert(end + 2, "</red>") bert_text_list.insert(end + 2, "</red>")
@ -829,7 +795,7 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre):
return True, bert_text return True, bert_text
except: except:
print("句子标红报错") print("句子标红报错")
print(text_original, bert_text, bert_text_pre) print(text_original, bert_text)
return False, "" return False, ""
@ -931,7 +897,7 @@ def ulit_recall_paper(recall_data_list_dict):
# return data # return data
data = [] data = []
for i in list(recall_data_list_dict.items())[:5]: for i in list(recall_data_list_dict.items()):
data_one = processing_one_text(i[0]) data_one = processing_one_text(i[0])
data.extend(data_one) data.extend(data_one)
@ -952,7 +918,7 @@ def recall_10(queue_uuid, title, abst_zh, content):
"content": content "content": content
} }
print(request_json) print(request_json)
dialog_line_parse("http://192.168.31.145:50004/check", request_json) dialog_line_parse("http://192.168.31.145:50004/check1", request_json)

Loading…
Cancel
Save