纠错任务
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
2.0 KiB

import unicodedata
def is_chinese(char):
if 'CJK' in unicodedata.name(char):
return True
else:
return False
a = "ab1我们12是一个"
b = [""] *len(a)
last_post = False
c = []
for i, d in enumerate(a):
bool_ = is_chinese(d)
if bool_ == False:
b[i] = d
last_post = False
else:
if last_post == False:
c.append([(i,d)])
else:
c[-1].append((i,d))
last_post = True
print(c)
print(b)
d = []
for i in c:
d.append("".join([j[1] for j in i]))
print(d)
e = d
f = ""
for i in e:
f += i
f_list = list(f)
print(f_list)
for i,d in enumerate(b):
if d == "":
zi = f_list.pop(0)
print(zi)
b[i] = zi
print(b)
class SentenceUlit:
def __init__(self,sentence):
self.sentence = sentence
self.sentence_list = [""] * len(sentence)
self.last_post = False
self.sentence_batch = []
self.pre_ulit()
self.inf_sentence_batch_str = ""
def is_chinese(self, char):
if 'CJK' in unicodedata.name(char):
return True
else:
return False
def pre_ulit(self):
for i, d in enumerate(self.sentence):
bool_ = is_chinese(d)
if bool_ == False:
self.sentence_list[i] = d
self.last_post = False
else:
if self.last_post == False:
self.sentence_batch.append(d)
else:
self.sentence_batch[-1] += d
self.last_post = True
def inf_ulit(self, sen):
for i in sen:
self.inf_sentence_batch_str += i
self.inf_sentence_batch_srt_list = list(self.inf_sentence_batch_str)
for i, d in enumerate(self.sentence_list):
if d == "":
zi = self.inf_sentence_batch_srt_list.pop(0)
self.sentence_list[i] = zi
sen = SentenceUlit("ab1我们12是一个")
print(sen.sentence_batch)
print(sen.sentence_list)