diff --git a/flask_check_bert_test.py b/flask_check_bert_test.py index 263e3ba..f853612 100644 --- a/flask_check_bert_test.py +++ b/flask_check_bert_test.py @@ -172,6 +172,9 @@ def original_text_contrast_func(data_sentence_dan, paper_dict): paper_dict[i[0]][ 3]) # text_original, bert_text, bert_text_pre + if sentence_0_bool == False or sentence_1_bool == False: + continue + start_dan = sentence_0_dan_red.index("") end_dan = sentence_0_dan_red.index("") - len("") @@ -180,9 +183,6 @@ def original_text_contrast_func(data_sentence_dan, paper_dict): if end_dan > end: end = end_dan - if sentence_0_bool == False or sentence_1_bool == False: - continue - similar_content_dan["content"] = sentence_1_dan_red similar_content_dan["title"] = i[3]["title"] similar_content_dan["author"] = i[3]["author"] @@ -752,81 +752,85 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre): :param bert_text_pre: :return: ''' + try: + fuhao = ["\n"] + up_pointer = 0 + down_pointer = 0 - fuhao = ["\n"] - up_pointer = 0 - down_pointer = 0 - - pointer_list = [] + pointer_list = [] - if len(bert_text_pre) > len(bert_text): - return False, "" + if len(bert_text_pre) > len(bert_text): + return False, "" - while True: - if down_pointer >= len(bert_text_pre): - break - elif down_pointer == len(bert_text_pre) - 1: - if bert_text[up_pointer] == bert_text_pre[down_pointer]: - pointer_list.append(up_pointer) + while True: + if down_pointer >= len(bert_text_pre): break - else: - up_pointer += 1 - down_pointer = 0 - pointer_list = [] - - elif bert_text[up_pointer] in fuhao: - up_pointer += 1 - - else: - if bert_text[up_pointer] == bert_text_pre[down_pointer]: - pointer_list.append(up_pointer) - up_pointer += 1 - down_pointer += 1 - else: - if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]": - up_pointer += 1 - down_pointer += 5 - pointer_list.append(up_pointer) - elif is_english_char(bert_text_pre[down_pointer]) == True: - up_pointer += 1 - down_pointer += 1 + elif down_pointer == len(bert_text_pre) - 1: + if bert_text[up_pointer] == bert_text_pre[down_pointer]: pointer_list.append(up_pointer) + break else: up_pointer += 1 down_pointer = 0 pointer_list = [] - start = pointer_list[0] - end = pointer_list[-1] - bert_text_list = list(bert_text) - bert_text_list.insert(start, "") - bert_text_list.insert(end + 2, "") - - text_original_list = list(text_original) - - up = 0 - down = 0 - - while True: - if up == len(text_original_list): - break - - if text_original_list[up] == bert_text_list[down]: - up += 1 - down += 1 + elif bert_text[up_pointer] in fuhao: + up_pointer += 1 - else: - if bert_text_list[down] == "": - down += 1 - elif bert_text_list[down] == "": - down += 1 else: - bert_text_list.insert(down, text_original_list[up]) + if bert_text[up_pointer] == bert_text_pre[down_pointer]: + pointer_list.append(up_pointer) + up_pointer += 1 + down_pointer += 1 + else: + if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]": + up_pointer += 1 + down_pointer += 5 + pointer_list.append(up_pointer) + elif is_english_char(bert_text_pre[down_pointer]) == True: + up_pointer += 1 + down_pointer += 1 + pointer_list.append(up_pointer) + else: + up_pointer += 1 + down_pointer = 0 + pointer_list = [] + + start = pointer_list[0] + end = pointer_list[-1] + bert_text_list = list(bert_text) + bert_text_list.insert(start, "") + bert_text_list.insert(end + 2, "") + + text_original_list = list(text_original) + + up = 0 + down = 0 + + while True: + if up == len(text_original_list): + break + + if text_original_list[up] == bert_text_list[down]: up += 1 down += 1 - bert_text = "".join(bert_text_list) - return True, bert_text + else: + if bert_text_list[down] == "": + down += 1 + elif bert_text_list[down] == "": + down += 1 + else: + bert_text_list.insert(down, text_original_list[up]) + up += 1 + down += 1 + + bert_text = "".join(bert_text_list) + return True, bert_text + except: + print("句子标红报错") + print(text_original, bert_text, bert_text_pre) + return False, "" def biaohong_bert_predict(sentence_0_list, sentence_1_list): @@ -927,7 +931,7 @@ def ulit_recall_paper(recall_data_list_dict): # return data data = [] - for i in list(recall_data_list_dict.items()): + for i in list(recall_data_list_dict.items())[:5]: data_one = processing_one_text(i[0]) data.extend(data_one) @@ -948,7 +952,7 @@ def recall_10(queue_uuid, title, abst_zh, content): "content": content } print(request_json) - da = dialog_line_parse("http://192.168.31.145:50004/check", request_json) + dialog_line_parse("http://192.168.31.145:50004/check", request_json) @@ -1126,6 +1130,7 @@ def classify_accurate_check(): recall_data_list_dict = json.loads(query_recall_dict["data"]) recall_data_list = ulit_recall_paper(recall_data_list_dict) data_dict_path = redis_.get(query_recall_uuid + "_request_check") + print(data_dict_path) with open(data_dict_path, encoding='utf8') as f: data_dict = json.loads(f.read())