|
|
@ -20,15 +20,64 @@ import uuid |
|
|
|
import time |
|
|
|
import json |
|
|
|
import docx2txt |
|
|
|
import re |
|
|
|
|
|
|
|
|
|
|
|
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=13, password="zhicheng123*") |
|
|
|
pool = redis.ConnectionPool(host='localhost', port=63179, max_connections=100, db=12, password="zhicheng123*") |
|
|
|
redis_ = redis.Redis(connection_pool=pool, decode_responses=True) |
|
|
|
|
|
|
|
db_key_query = 'query' |
|
|
|
db_key_querying = 'querying' |
|
|
|
db_key_queryset = 'queryset' |
|
|
|
batch_size = 32 |
|
|
|
RE_DIALOG = re.compile(r"\".*?\"|\'.*?\'|“.*?”") |
|
|
|
|
|
|
|
def get_dialogs_index(line: str): |
|
|
|
""" |
|
|
|
获取对话及其索引 |
|
|
|
:param line 文本 |
|
|
|
:return dialogs 对话内容 |
|
|
|
dialogs_index: 对话位置索引 |
|
|
|
other_index: 其他内容位置索引 |
|
|
|
""" |
|
|
|
dialogs = re.finditer(RE_DIALOG, line) |
|
|
|
dialogs_text = re.findall(RE_DIALOG, line) |
|
|
|
dialogs_index = [] |
|
|
|
for dialog in dialogs: |
|
|
|
all_ = [i for i in range(dialog.start(), dialog.end())] |
|
|
|
dialogs_index.extend(all_) |
|
|
|
other_index = [i for i in range(len(line)) if i not in dialogs_index] |
|
|
|
|
|
|
|
return dialogs_text, dialogs_index, other_index |
|
|
|
|
|
|
|
|
|
|
|
def chulichangju_1(text, chulipangban_return_list): |
|
|
|
fuhao = ["。"] |
|
|
|
dialogs_text, dialogs_index, other_index = get_dialogs_index(text) |
|
|
|
text_1 = text[:500] |
|
|
|
text_2 = text[500:] |
|
|
|
text_1_new = "" |
|
|
|
if text_2 == "": |
|
|
|
chulipangban_return_list.append(text_1) |
|
|
|
return chulipangban_return_list |
|
|
|
for i in range(len(text_1) - 1, -1, -1): |
|
|
|
if text_1[i] in fuhao: |
|
|
|
if i in dialogs_index: |
|
|
|
continue |
|
|
|
text_1_new = text_1[:i] |
|
|
|
text_1_new += text_1[i] |
|
|
|
chulipangban_return_list.append(text_1_new) |
|
|
|
if text_2 != "": |
|
|
|
if i + 1 != 500: |
|
|
|
text_2 = text_1[i + 1:] + text_2 |
|
|
|
break |
|
|
|
# else: |
|
|
|
# chulipangban_return_list.append(text_1) |
|
|
|
if text_1_new == "": |
|
|
|
chulipangban_return_list.append(text_1) |
|
|
|
if text_2 != "": |
|
|
|
chulipangban_return_list = chulichangju_1(text_2, chulipangban_return_list) |
|
|
|
return chulipangban_return_list |
|
|
|
|
|
|
|
def ulit_request_file(file): |
|
|
|
file_name = file.filename |
|
|
@ -47,6 +96,14 @@ def ulit_request_file(file): |
|
|
|
|
|
|
|
content_list = [i for i in content.split("\n")] |
|
|
|
print(content_list) |
|
|
|
|
|
|
|
content_list_new = [] |
|
|
|
for sen in content_list: |
|
|
|
if len(sen) < 500: |
|
|
|
content_list_new.append(sen) |
|
|
|
else: |
|
|
|
content_list_new.extend(chulichangju_1(sen, [])) |
|
|
|
|
|
|
|
return content_list |
|
|
|
|
|
|
|
|
|
|
|