From 366be0e0f4168722f249d278163bfdb9291ca19f Mon Sep 17 00:00:00 2001 From: "majiahui@haimaqingfan.com" <majiahui@haimaqingfan.com> Date: Sun, 10 Sep 2023 16:31:31 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=8F=8D=E5=90=91=E6=A0=87?= =?UTF-8?q?=E7=BA=A2=E6=A0=87=E6=B3=A8bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_check_bert_test.py | 68 ++++++++++++------------------------------------ 1 file changed, 17 insertions(+), 51 deletions(-) diff --git a/flask_check_bert_test.py b/flask_check_bert_test.py index f853612..9c9bd8e 100644 --- a/flask_check_bert_test.py +++ b/flask_check_bert_test.py @@ -34,6 +34,12 @@ rouge = Rouge() rouge_model = Rouge_w() rouge_l_model = Rouge_l() +def jaccard_similarity(s1, s2): + set1 = set(s1) + set2 = set(s2) + intersection = set1 & set2 + union = set1 | set2 + return len(intersection) / len(union) def bert_check(text, recall_data_list): ''' @@ -77,7 +83,8 @@ def rouge_value_self(data_1, data_2): for sen_1, sen_2 in zip(data_1, data_2): sen_1 = sen_1.split(" ") sen_2 = sen_2.split(" ") - rouge_l_score = rouge_l_model.score(sen_1, sen_2) + # rouge_l_score = rouge_l_model.score(sen_1, sen_2) + rouge_l_score = jaccard_similarity(sen_1, sen_2) rouge_l_list.append(rouge_l_score) return "", "", rouge_l_list @@ -104,8 +111,6 @@ def rouge_pre(text, df_train_nuoche): def rouge_pre_m(text, df_train_nuoche): - - return_list = [] index_rouge_list = [] @@ -165,12 +170,12 @@ def original_text_contrast_func(data_sentence_dan, paper_dict): } sentence_0_bool, sentence_0_dan_red = original_text_marked_red(i[1], paper_dict[i[0]][0], - paper_dict[i[0]][ - 1]) # text_original, bert_text, bert_text_pre + paper_dict[i[0]][4][0], + paper_dict[i[0]][4][1]) # text_original, bert_text, bert_text_pre sentence_1_bool, sentence_1_dan_red = original_text_marked_red(i[2], paper_dict[i[0]][2], - paper_dict[i[0]][ - 3]) # text_original, bert_text, bert_text_pre + paper_dict[i[0]][4][0], + paper_dict[i[0]][4][1]) # text_original, bert_text, bert_text_pre if sentence_0_bool == False or sentence_1_bool == False: continue @@ -571,7 +576,7 @@ def accurate_check_rouge( # rouge算法 for i in range(len(data_zong)): for j in range(len(data_zong[i])): - if data_zong[i][j][1] > 0.47: + if data_zong[i][j][1] > 0.35: bool_check_sentense.append([i, data_zong[i][j][0]]) biao_red = biaohong(bool_check_sentense, data_zong, @@ -744,7 +749,7 @@ def is_english_char(char): return 32 <= code <= 126 -def original_text_marked_red(text_original, bert_text, bert_text_pre): +def original_text_marked_red(text_original, bert_text, start, end): ''' 把原文标红字段找到 :param text_original: @@ -759,45 +764,6 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre): pointer_list = [] - if len(bert_text_pre) > len(bert_text): - return False, "" - - while True: - if down_pointer >= len(bert_text_pre): - break - elif down_pointer == len(bert_text_pre) - 1: - if bert_text[up_pointer] == bert_text_pre[down_pointer]: - pointer_list.append(up_pointer) - break - else: - up_pointer += 1 - down_pointer = 0 - pointer_list = [] - - elif bert_text[up_pointer] in fuhao: - up_pointer += 1 - - else: - if bert_text[up_pointer] == bert_text_pre[down_pointer]: - pointer_list.append(up_pointer) - up_pointer += 1 - down_pointer += 1 - else: - if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]": - up_pointer += 1 - down_pointer += 5 - pointer_list.append(up_pointer) - elif is_english_char(bert_text_pre[down_pointer]) == True: - up_pointer += 1 - down_pointer += 1 - pointer_list.append(up_pointer) - else: - up_pointer += 1 - down_pointer = 0 - pointer_list = [] - - start = pointer_list[0] - end = pointer_list[-1] bert_text_list = list(bert_text) bert_text_list.insert(start, "<red>") bert_text_list.insert(end + 2, "</red>") @@ -829,7 +795,7 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre): return True, bert_text except: print("句子标红报错") - print(text_original, bert_text, bert_text_pre) + print(text_original, bert_text) return False, "" @@ -931,7 +897,7 @@ def ulit_recall_paper(recall_data_list_dict): # return data data = [] - for i in list(recall_data_list_dict.items())[:5]: + for i in list(recall_data_list_dict.items()): data_one = processing_one_text(i[0]) data.extend(data_one) @@ -952,7 +918,7 @@ def recall_10(queue_uuid, title, abst_zh, content): "content": content } print(request_json) - dialog_line_parse("http://192.168.31.145:50004/check", request_json) + dialog_line_parse("http://192.168.31.145:50004/check1", request_json)