Browse Source

放弃使用短句更改,改为长句更改

master
majiahui@haimaqingfan.com 1 month ago
parent
commit
e12a62b8a8
  1. 58
      flask_drop_rewrite_request.py

58
flask_drop_rewrite_request.py

@ -317,65 +317,9 @@ def get_multiple_urls(text_info):
def chulipangban_test_1(snetence_id, text):
# 引号处理
dialogs_text, dialogs_index, other_index = get_dialogs_index(text)
for dialogs_text_dan in dialogs_text:
text_dan_list = text.split(dialogs_text_dan)
text = dialogs_text_dan.join(text_dan_list)
# text_new_str = "".join(text_new)
if has_chinese(text) == False:
spilt_word = ". "
spilt_sen_len = 1e9
is_chinese = False
else:
spilt_word = ""
spilt_sen_len = 120
is_chinese = True
# 存放整理完的数据
sentence_batch_list = []
if is_chinese == False:
__long_machine_en = StateMachine(long_cuter_en(max_len=25, min_len=3))
m_input = EnSequence(text)
__long_machine_en.run(m_input)
for v in m_input.sentence_list():
sentence_batch_list.append([v, snetence_id, 0])
else:
sentence_list = text.split(spilt_word)
# sentence_list_new = []
# for i in sentence_list:
# if i != "":
# sentence_list_new.append(i)
# sentence_list = sentence_list_new
sentence_batch_length = 0
for sentence in sentence_list[:-1]:
if len(sentence) < spilt_sen_len:
sentence_batch_length += len(sentence)
sentence_batch_list.append([sentence + spilt_word, snetence_id, 0])
# sentence_pre = autotitle.gen_synonyms_short(sentence)
# return_list.append(sentence_pre)
else:
sentence_split_list = chulichangju_1(sentence, snetence_id, [], 0)
for sentence_short in sentence_split_list[:-1]:
sentence_batch_list.append(sentence_short)
sentence_split_list[-1][0] = sentence_split_list[-1][0] + spilt_word
sentence_batch_list.append(sentence_split_list[-1])
if sentence_list[-1] != "":
if len(sentence_list[-1]) < spilt_sen_len:
sentence_batch_length += len(sentence_list[-1])
sentence_batch_list.append([sentence_list[-1], snetence_id, 0])
# sentence_pre = autotitle.gen_synonyms_short(sentence)
# return_list.append(sentence_pre)
else:
sentence_split_list = chulichangju_1(sentence_list[-1], snetence_id, [], 0)
for sentence_short in sentence_split_list:
sentence_batch_list.append(sentence_short)
sentence_batch_list.append([text, snetence_id, 0])
return sentence_batch_list

Loading…
Cancel
Save