You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
94 lines
2.0 KiB
94 lines
2.0 KiB
![]()
2 years ago
|
import unicodedata
|
||
|
def is_chinese(char):
|
||
|
if 'CJK' in unicodedata.name(char):
|
||
|
return True
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
|
||
|
|
||
|
a = "ab1我们12是一个"
|
||
|
|
||
|
b = [""] *len(a)
|
||
|
|
||
|
last_post = False
|
||
|
|
||
|
c = []
|
||
|
for i, d in enumerate(a):
|
||
|
bool_ = is_chinese(d)
|
||
|
if bool_ == False:
|
||
|
b[i] = d
|
||
|
last_post = False
|
||
|
else:
|
||
|
if last_post == False:
|
||
|
c.append([(i,d)])
|
||
|
else:
|
||
|
c[-1].append((i,d))
|
||
|
last_post = True
|
||
|
print(c)
|
||
|
print(b)
|
||
|
|
||
|
d = []
|
||
|
for i in c:
|
||
|
d.append("".join([j[1] for j in i]))
|
||
|
print(d)
|
||
|
|
||
|
e = d
|
||
|
|
||
|
f = ""
|
||
|
for i in e:
|
||
|
f += i
|
||
|
f_list = list(f)
|
||
|
print(f_list)
|
||
|
|
||
|
for i,d in enumerate(b):
|
||
|
if d == "":
|
||
|
zi = f_list.pop(0)
|
||
|
print(zi)
|
||
|
b[i] = zi
|
||
|
print(b)
|
||
|
|
||
|
class SentenceUlit:
|
||
|
def __init__(self,sentence):
|
||
|
self.sentence = sentence
|
||
|
self.sentence_list = [""] * len(sentence)
|
||
|
self.last_post = False
|
||
|
self.sentence_batch = []
|
||
|
self.pre_ulit()
|
||
|
self.inf_sentence_batch_str = ""
|
||
|
|
||
|
|
||
|
def is_chinese(self, char):
|
||
|
if 'CJK' in unicodedata.name(char):
|
||
|
return True
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
def pre_ulit(self):
|
||
|
for i, d in enumerate(self.sentence):
|
||
|
bool_ = is_chinese(d)
|
||
|
if bool_ == False:
|
||
|
self.sentence_list[i] = d
|
||
|
self.last_post = False
|
||
|
else:
|
||
|
if self.last_post == False:
|
||
|
self.sentence_batch.append(d)
|
||
|
else:
|
||
|
self.sentence_batch[-1] += d
|
||
|
self.last_post = True
|
||
|
|
||
|
def inf_ulit(self, sen):
|
||
|
for i in sen:
|
||
|
self.inf_sentence_batch_str += i
|
||
|
self.inf_sentence_batch_srt_list = list(self.inf_sentence_batch_str)
|
||
|
|
||
|
for i, d in enumerate(self.sentence_list):
|
||
|
if d == "":
|
||
|
zi = self.inf_sentence_batch_srt_list.pop(0)
|
||
|
self.sentence_list[i] = zi
|
||
|
|
||
|
|
||
|
sen = SentenceUlit("ab1我们12是一个")
|
||
|
|
||
|
print(sen.sentence_batch)
|
||
|
print(sen.sentence_list)
|