普通版降重
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

40 lines
890 B

# -*- coding: utf-8 -*-
"""
@Time : 2022/12/20 16:20
@Author :
@FileName:
@Software:
@Describe:
"""
import pandas as pd
from tqdm import tqdm
import json
path = "../data/论文_yy_小说.xlsx"
df_list = pd.read_excel(path).values.tolist()
fuhao = {}
for i in tqdm(df_list):
for word in i:
word = str(word)
if word == "nan":
continue
for ch in word:
if u'\u4e00' <= ch <= u'\u9fff':
continue
else:
if ch in fuhao:
fuhao[ch] += 1
else:
fuhao[ch] = 1
test_1 = sorted(fuhao.items(),key=lambda x:x[1],reverse=True)
fuhao_new = {}
for i in test_1:
fuhao_new[i[0]] = i[1]
json_data = json.dumps(fuhao_new, ensure_ascii=False, indent=2)
with open('../data/fuhao.json', 'w', encoding="utf-8") as f_six:
f_six.write(json_data)