Browse Source

完整版v1.0,增加上多条多段文本匹配机制

master
majiahui@haimaqingfan.com 2 years ago
parent
commit
f2b8f1e092
  1. 69
      flask_check_bert_test.py

69
flask_check_bert_test.py

@ -451,7 +451,7 @@ def section_details_func(data_section_dan, paper_dict, num_words, centent_list):
original_text_list.append(original_text_contrast_dan["original_text"])
original_text = "".join(original_text_list)
repeat_rate = (repeat_words / section_words)* 100
repeat_rate = (repeat_words / section_words) * 100
repeat_rate = str(round(repeat_rate, 1)) + "%"
repeat_quote_info = repeat_quote_info_func(original_text_contrast, section_words)
@ -675,6 +675,7 @@ def accurate_check_rouge(
print("bool_check_sentense", bool_check_sentense)
print("找出相似的句子序号完成")
# print("data_zong", data_zong)
biao_red = biaohong(bool_check_sentense, data_zong,
recall_data_list) # [[[[0, 1, 2], [479, 480, 481]],[[0, 1, 2], [471, 472, 473]]], [[[3, 4, 5], [481, 482, 483]], [[3, 4, 5], [461, 462, 463]]]]
@ -836,6 +837,11 @@ def biaohong(bool_check_sentense, data_zong, df_train_nuoche):
tiaochu = True
break
# if bool_check_sentense[i][0][0] + 1 >= len(data_zong):
# if bool_check_sentense[]
# bool_check_sentense[i][0][0] + 1 = bool_check_sentense[i + 1][0][0]
# break
for j in bool_check_sentense[i]:
if j[1] + 1 >= len(df_train_nuoche):
tiaochu = True
@ -844,6 +850,21 @@ def biaohong(bool_check_sentense, data_zong, df_train_nuoche):
if tiaochu == True:
break
# elif bool_check_sentense[i-1][0][0] == start:
# biao_red_dan = []
# for j in range(len(bool_check_sentense[i-1])): # bool_check_sentense: [[[1, 223],[1, 226], [1, 562]],[[2, 243],[2, 226], [2, 561]]]
# biao_red_dan.append([[bool_check_sentense[i-1][j][0], bool_check_sentense[i-1][j][0]+ 1, bool_check_sentense[i-1][j][0] + 2],
# [bool_check_sentense[i-1][j][1] - 1, bool_check_sentense[i-1][j][1], bool_check_sentense[i+1][j][1] + 1]])
# biao_red.append(biao_red_dan)
#
# elif bool_check_sentense[i+1][0][0] == end:
# biao_red_dan = []
# for j in range(len(bool_check_sentense[i+1])): # bool_check_sentense: [[[1, 223],[1, 226], [1, 562]],[[2, 243],[2, 226], [2, 561]]]
# biao_red_dan.append([[bool_check_sentense[i+1][j][0]-2, bool_check_sentense[i+1][j][0]-1, bool_check_sentense[i+1][j][0]],
# [bool_check_sentense[i+1][j][1] - 1, bool_check_sentense[i+1][j][1], bool_check_sentense[i+1][j][1] + 1]])
# biao_red.append(biao_red_dan)
elif bool_check_sentense[i][0][0] - 1 == start:
i += 1
continue
@ -854,23 +875,43 @@ def biaohong(bool_check_sentense, data_zong, df_train_nuoche):
i += 1
continue
else:
biaohongset = set()
biao_red_dan = []
for j in range(len(bool_check_sentense[i])): # bool_check_sentense: [[[1, 223],[1, 226], [1, 562]],[[2, 243],[2, 226], [2, 561]]]
# biao_red_dan.append([bool_check_sentense[i][j][0] - 1, bool_check_sentense[i][j][1] - 1])
# biao_red_dan.append([bool_check_sentense[i][j][0], bool_check_sentense[i][j][1]])
# biao_red_dan.append([bool_check_sentense[i][j][0] + 1, bool_check_sentense[i][j][1] + 1])
# biao_red.append([[bool_check_sentense[i][0] - 1, bool_check_sentense[i][0], bool_check_sentense[i][0] + 1],
# [bool_check_sentense[i][1] - 1, bool_check_sentense[i][1], bool_check_sentense[i][1] + 1]])
# start = bool_check_sentense[i][0] - 1
# end = bool_check_sentense[i][0] + 1
# i += 1
# print("i:{}, j:{}".format(i, j), )
# print(bool_check_sentense)
# print([bool_check_sentense[i][j][0] - 1, bool_check_sentense[i][j][0], bool_check_sentense[i][j][0] + 1])
biao_red_dan.append([[bool_check_sentense[i][j][0] - 1, bool_check_sentense[i][j][0], bool_check_sentense[i][j][0] + 1],
[bool_check_sentense[i][j][1] - 1, bool_check_sentense[i][j][1], bool_check_sentense[i][j][1] + 1]])
if bool_check_sentense[i][j][1] not in biaohongset:
biao_red_dan.append([[bool_check_sentense[i][j][0] - 1, bool_check_sentense[i][j][0], bool_check_sentense[i][j][0] + 1],
[bool_check_sentense[i][j][1] - 1, bool_check_sentense[i][j][1], bool_check_sentense[i][j][1] + 1]])
biaohongset.add(bool_check_sentense[i][j][1] - 1)
biaohongset.add(bool_check_sentense[i][j][1])
biaohongset.add(bool_check_sentense[i][j][1] + 1)
else:
continue
start = bool_check_sentense[i][0][0] - 1
end = bool_check_sentense[i][0][0] + 1
if bool_check_sentense[i-1][0][0] == start:
for j in range(len(bool_check_sentense[i-1])): # bool_check_sentense: [[[1, 223],[1, 226], [1, 562]],[[2, 243],[2, 226], [2, 561]]]
if bool_check_sentense[i - 1][j][1] not in biaohongset:
biao_red_dan.append([[bool_check_sentense[i-1][j][0], bool_check_sentense[i-1][j][0] + 1, bool_check_sentense[i-1][j][0] + 2],
[bool_check_sentense[i-1][j][1] - 1, bool_check_sentense[i-1][j][1], bool_check_sentense[i-1][j][1] + 1]])
biaohongset.add(bool_check_sentense[i-1][j][1] - 1)
biaohongset.add(bool_check_sentense[i-1][j][1])
biaohongset.add(bool_check_sentense[i-1][j][1] + 1)
else:
continue
if bool_check_sentense[i+1][0][0] == end:
for j in range(len(bool_check_sentense[i+1])): # bool_check_sentense: [[[1, 223],[1, 226], [1, 562]],[[2, 243],[2, 226], [2, 561]]]
if bool_check_sentense[i + 1][j][1] not in biaohongset:
biao_red_dan.append([[bool_check_sentense[i+1][j][0]-2, bool_check_sentense[i+1][j][0]-1, bool_check_sentense[i+1][j][0]],
[bool_check_sentense[i+1][j][1] - 1, bool_check_sentense[i+1][j][1], bool_check_sentense[i+1][j][1] + 1]])
biaohongset.add(bool_check_sentense[i+1][j][1] - 1)
biaohongset.add(bool_check_sentense[i+1][j][1])
biaohongset.add(bool_check_sentense[i+1][j][1] + 1)
else:
continue
i += 1
biao_red.append(biao_red_dan)

Loading…
Cancel
Save