|
|
@ -34,6 +34,12 @@ rouge = Rouge() |
|
|
|
rouge_model = Rouge_w() |
|
|
|
rouge_l_model = Rouge_l() |
|
|
|
|
|
|
|
def jaccard_similarity(s1, s2): |
|
|
|
set1 = set(s1) |
|
|
|
set2 = set(s2) |
|
|
|
intersection = set1 & set2 |
|
|
|
union = set1 | set2 |
|
|
|
return len(intersection) / len(union) |
|
|
|
|
|
|
|
def bert_check(text, recall_data_list): |
|
|
|
''' |
|
|
@ -77,7 +83,8 @@ def rouge_value_self(data_1, data_2): |
|
|
|
for sen_1, sen_2 in zip(data_1, data_2): |
|
|
|
sen_1 = sen_1.split(" ") |
|
|
|
sen_2 = sen_2.split(" ") |
|
|
|
rouge_l_score = rouge_l_model.score(sen_1, sen_2) |
|
|
|
# rouge_l_score = rouge_l_model.score(sen_1, sen_2) |
|
|
|
rouge_l_score = jaccard_similarity(sen_1, sen_2) |
|
|
|
rouge_l_list.append(rouge_l_score) |
|
|
|
|
|
|
|
return "", "", rouge_l_list |
|
|
@ -104,8 +111,6 @@ def rouge_pre(text, df_train_nuoche): |
|
|
|
|
|
|
|
def rouge_pre_m(text, df_train_nuoche): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return_list = [] |
|
|
|
index_rouge_list = [] |
|
|
|
|
|
|
@ -165,12 +170,12 @@ def original_text_contrast_func(data_sentence_dan, paper_dict): |
|
|
|
} |
|
|
|
|
|
|
|
sentence_0_bool, sentence_0_dan_red = original_text_marked_red(i[1], paper_dict[i[0]][0], |
|
|
|
paper_dict[i[0]][ |
|
|
|
1]) # text_original, bert_text, bert_text_pre |
|
|
|
paper_dict[i[0]][4][0], |
|
|
|
paper_dict[i[0]][4][1]) # text_original, bert_text, bert_text_pre |
|
|
|
|
|
|
|
sentence_1_bool, sentence_1_dan_red = original_text_marked_red(i[2], paper_dict[i[0]][2], |
|
|
|
paper_dict[i[0]][ |
|
|
|
3]) # text_original, bert_text, bert_text_pre |
|
|
|
paper_dict[i[0]][4][0], |
|
|
|
paper_dict[i[0]][4][1]) # text_original, bert_text, bert_text_pre |
|
|
|
|
|
|
|
if sentence_0_bool == False or sentence_1_bool == False: |
|
|
|
continue |
|
|
@ -571,7 +576,7 @@ def accurate_check_rouge( |
|
|
|
# rouge算法 |
|
|
|
for i in range(len(data_zong)): |
|
|
|
for j in range(len(data_zong[i])): |
|
|
|
if data_zong[i][j][1] > 0.47: |
|
|
|
if data_zong[i][j][1] > 0.35: |
|
|
|
bool_check_sentense.append([i, data_zong[i][j][0]]) |
|
|
|
|
|
|
|
biao_red = biaohong(bool_check_sentense, data_zong, |
|
|
@ -744,7 +749,7 @@ def is_english_char(char): |
|
|
|
return 32 <= code <= 126 |
|
|
|
|
|
|
|
|
|
|
|
def original_text_marked_red(text_original, bert_text, bert_text_pre): |
|
|
|
def original_text_marked_red(text_original, bert_text, start, end): |
|
|
|
''' |
|
|
|
把原文标红字段找到 |
|
|
|
:param text_original: |
|
|
@ -759,45 +764,6 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre): |
|
|
|
|
|
|
|
pointer_list = [] |
|
|
|
|
|
|
|
if len(bert_text_pre) > len(bert_text): |
|
|
|
return False, "" |
|
|
|
|
|
|
|
while True: |
|
|
|
if down_pointer >= len(bert_text_pre): |
|
|
|
break |
|
|
|
elif down_pointer == len(bert_text_pre) - 1: |
|
|
|
if bert_text[up_pointer] == bert_text_pre[down_pointer]: |
|
|
|
pointer_list.append(up_pointer) |
|
|
|
break |
|
|
|
else: |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer = 0 |
|
|
|
pointer_list = [] |
|
|
|
|
|
|
|
elif bert_text[up_pointer] in fuhao: |
|
|
|
up_pointer += 1 |
|
|
|
|
|
|
|
else: |
|
|
|
if bert_text[up_pointer] == bert_text_pre[down_pointer]: |
|
|
|
pointer_list.append(up_pointer) |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer += 1 |
|
|
|
else: |
|
|
|
if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]": |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer += 5 |
|
|
|
pointer_list.append(up_pointer) |
|
|
|
elif is_english_char(bert_text_pre[down_pointer]) == True: |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer += 1 |
|
|
|
pointer_list.append(up_pointer) |
|
|
|
else: |
|
|
|
up_pointer += 1 |
|
|
|
down_pointer = 0 |
|
|
|
pointer_list = [] |
|
|
|
|
|
|
|
start = pointer_list[0] |
|
|
|
end = pointer_list[-1] |
|
|
|
bert_text_list = list(bert_text) |
|
|
|
bert_text_list.insert(start, "<red>") |
|
|
|
bert_text_list.insert(end + 2, "</red>") |
|
|
@ -829,7 +795,7 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre): |
|
|
|
return True, bert_text |
|
|
|
except: |
|
|
|
print("句子标红报错") |
|
|
|
print(text_original, bert_text, bert_text_pre) |
|
|
|
print(text_original, bert_text) |
|
|
|
return False, "" |
|
|
|
|
|
|
|
|
|
|
@ -931,7 +897,7 @@ def ulit_recall_paper(recall_data_list_dict): |
|
|
|
# return data |
|
|
|
|
|
|
|
data = [] |
|
|
|
for i in list(recall_data_list_dict.items())[:5]: |
|
|
|
for i in list(recall_data_list_dict.items()): |
|
|
|
data_one = processing_one_text(i[0]) |
|
|
|
data.extend(data_one) |
|
|
|
|
|
|
@ -952,7 +918,7 @@ def recall_10(queue_uuid, title, abst_zh, content): |
|
|
|
"content": content |
|
|
|
} |
|
|
|
print(request_json) |
|
|
|
dialog_line_parse("http://192.168.31.145:50004/check", request_json) |
|
|
|
dialog_line_parse("http://192.168.31.145:50004/check1", request_json) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|