# -*- coding: utf-8 -*- """ @Time : 2022/12/20 16:20 @Author : @FileName: @Software: @Describe: """ import pandas as pd from tqdm import tqdm import json path = "../data/论文_yy_小说.xlsx" df_list = pd.read_excel(path).values.tolist() fuhao = {} for i in tqdm(df_list): for word in i: word = str(word) if word == "nan": continue for ch in word: if u'\u4e00' <= ch <= u'\u9fff': continue else: if ch in fuhao: fuhao[ch] += 1 else: fuhao[ch] = 1 test_1 = sorted(fuhao.items(),key=lambda x:x[1],reverse=True) fuhao_new = {} for i in test_1: fuhao_new[i[0]] = i[1] json_data = json.dumps(fuhao_new, ensure_ascii=False, indent=2) with open('../data/fuhao.json', 'w', encoding="utf-8") as f_six: f_six.write(json_data)