diff --git a/flask_check_bert_test.py b/flask_check_bert_test.py
index 263e3ba..f853612 100644
--- a/flask_check_bert_test.py
+++ b/flask_check_bert_test.py
@@ -172,6 +172,9 @@ def original_text_contrast_func(data_sentence_dan, paper_dict):
paper_dict[i[0]][
3]) # text_original, bert_text, bert_text_pre
+ if sentence_0_bool == False or sentence_1_bool == False:
+ continue
+
start_dan = sentence_0_dan_red.index("")
end_dan = sentence_0_dan_red.index("") - len("")
@@ -180,9 +183,6 @@ def original_text_contrast_func(data_sentence_dan, paper_dict):
if end_dan > end:
end = end_dan
- if sentence_0_bool == False or sentence_1_bool == False:
- continue
-
similar_content_dan["content"] = sentence_1_dan_red
similar_content_dan["title"] = i[3]["title"]
similar_content_dan["author"] = i[3]["author"]
@@ -752,81 +752,85 @@ def original_text_marked_red(text_original, bert_text, bert_text_pre):
:param bert_text_pre:
:return:
'''
+ try:
+ fuhao = ["\n"]
+ up_pointer = 0
+ down_pointer = 0
- fuhao = ["\n"]
- up_pointer = 0
- down_pointer = 0
-
- pointer_list = []
+ pointer_list = []
- if len(bert_text_pre) > len(bert_text):
- return False, ""
+ if len(bert_text_pre) > len(bert_text):
+ return False, ""
- while True:
- if down_pointer >= len(bert_text_pre):
- break
- elif down_pointer == len(bert_text_pre) - 1:
- if bert_text[up_pointer] == bert_text_pre[down_pointer]:
- pointer_list.append(up_pointer)
+ while True:
+ if down_pointer >= len(bert_text_pre):
break
- else:
- up_pointer += 1
- down_pointer = 0
- pointer_list = []
-
- elif bert_text[up_pointer] in fuhao:
- up_pointer += 1
-
- else:
- if bert_text[up_pointer] == bert_text_pre[down_pointer]:
- pointer_list.append(up_pointer)
- up_pointer += 1
- down_pointer += 1
- else:
- if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]":
- up_pointer += 1
- down_pointer += 5
- pointer_list.append(up_pointer)
- elif is_english_char(bert_text_pre[down_pointer]) == True:
- up_pointer += 1
- down_pointer += 1
+ elif down_pointer == len(bert_text_pre) - 1:
+ if bert_text[up_pointer] == bert_text_pre[down_pointer]:
pointer_list.append(up_pointer)
+ break
else:
up_pointer += 1
down_pointer = 0
pointer_list = []
- start = pointer_list[0]
- end = pointer_list[-1]
- bert_text_list = list(bert_text)
- bert_text_list.insert(start, "")
- bert_text_list.insert(end + 2, "")
-
- text_original_list = list(text_original)
-
- up = 0
- down = 0
-
- while True:
- if up == len(text_original_list):
- break
-
- if text_original_list[up] == bert_text_list[down]:
- up += 1
- down += 1
+ elif bert_text[up_pointer] in fuhao:
+ up_pointer += 1
- else:
- if bert_text_list[down] == "":
- down += 1
- elif bert_text_list[down] == "":
- down += 1
else:
- bert_text_list.insert(down, text_original_list[up])
+ if bert_text[up_pointer] == bert_text_pre[down_pointer]:
+ pointer_list.append(up_pointer)
+ up_pointer += 1
+ down_pointer += 1
+ else:
+ if bert_text_pre[down_pointer:down_pointer + 5] == "[UNK]":
+ up_pointer += 1
+ down_pointer += 5
+ pointer_list.append(up_pointer)
+ elif is_english_char(bert_text_pre[down_pointer]) == True:
+ up_pointer += 1
+ down_pointer += 1
+ pointer_list.append(up_pointer)
+ else:
+ up_pointer += 1
+ down_pointer = 0
+ pointer_list = []
+
+ start = pointer_list[0]
+ end = pointer_list[-1]
+ bert_text_list = list(bert_text)
+ bert_text_list.insert(start, "")
+ bert_text_list.insert(end + 2, "")
+
+ text_original_list = list(text_original)
+
+ up = 0
+ down = 0
+
+ while True:
+ if up == len(text_original_list):
+ break
+
+ if text_original_list[up] == bert_text_list[down]:
up += 1
down += 1
- bert_text = "".join(bert_text_list)
- return True, bert_text
+ else:
+ if bert_text_list[down] == "":
+ down += 1
+ elif bert_text_list[down] == "":
+ down += 1
+ else:
+ bert_text_list.insert(down, text_original_list[up])
+ up += 1
+ down += 1
+
+ bert_text = "".join(bert_text_list)
+ return True, bert_text
+ except:
+ print("句子标红报错")
+ print(text_original, bert_text, bert_text_pre)
+ return False, ""
def biaohong_bert_predict(sentence_0_list, sentence_1_list):
@@ -927,7 +931,7 @@ def ulit_recall_paper(recall_data_list_dict):
# return data
data = []
- for i in list(recall_data_list_dict.items()):
+ for i in list(recall_data_list_dict.items())[:5]:
data_one = processing_one_text(i[0])
data.extend(data_one)
@@ -948,7 +952,7 @@ def recall_10(queue_uuid, title, abst_zh, content):
"content": content
}
print(request_json)
- da = dialog_line_parse("http://192.168.31.145:50004/check", request_json)
+ dialog_line_parse("http://192.168.31.145:50004/check", request_json)
@@ -1126,6 +1130,7 @@ def classify_accurate_check():
recall_data_list_dict = json.loads(query_recall_dict["data"])
recall_data_list = ulit_recall_paper(recall_data_list_dict)
data_dict_path = redis_.get(query_recall_uuid + "_request_check")
+ print(data_dict_path)
with open(data_dict_path, encoding='utf8') as f:
data_dict = json.loads(f.read())