You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
40 lines
890 B
40 lines
890 B
![]()
2 years ago
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
"""
|
||
|
@Time : 2022/12/20 16:20
|
||
|
@Author :
|
||
|
@FileName:
|
||
|
@Software:
|
||
|
@Describe:
|
||
|
"""
|
||
|
import pandas as pd
|
||
|
from tqdm import tqdm
|
||
|
import json
|
||
|
|
||
|
|
||
|
path = "../data/论文_yy_小说.xlsx"
|
||
|
df_list = pd.read_excel(path).values.tolist()
|
||
|
|
||
|
fuhao = {}
|
||
|
for i in tqdm(df_list):
|
||
|
for word in i:
|
||
|
word = str(word)
|
||
|
if word == "nan":
|
||
|
continue
|
||
|
for ch in word:
|
||
|
if u'\u4e00' <= ch <= u'\u9fff':
|
||
|
continue
|
||
|
else:
|
||
|
if ch in fuhao:
|
||
|
fuhao[ch] += 1
|
||
|
else:
|
||
|
fuhao[ch] = 1
|
||
|
|
||
|
test_1 = sorted(fuhao.items(),key=lambda x:x[1],reverse=True)
|
||
|
fuhao_new = {}
|
||
|
for i in test_1:
|
||
|
fuhao_new[i[0]] = i[1]
|
||
|
|
||
|
json_data = json.dumps(fuhao_new, ensure_ascii=False, indent=2)
|
||
|
with open('../data/fuhao.json', 'w', encoding="utf-8") as f_six:
|
||
|
f_six.write(json_data)
|