# -*- coding: utf-8 -*-

"""
@Time    :  2022/12/20 16:20
@Author  : 
@FileName: 
@Software: 
@Describe:
"""
import pandas as pd
from tqdm import tqdm
import json


path = "../data/论文_yy_小说.xlsx"
df_list = pd.read_excel(path).values.tolist()

fuhao = {}
for i in tqdm(df_list):
    for word in i:
        word = str(word)
        if word == "nan":
            continue
        for ch in word:
            if u'\u4e00' <= ch <= u'\u9fff':
                continue
            else:
                if ch in fuhao:
                    fuhao[ch] += 1
                else:
                    fuhao[ch] = 1

test_1 = sorted(fuhao.items(),key=lambda x:x[1],reverse=True)
fuhao_new = {}
for i in test_1:
    fuhao_new[i[0]] = i[1]

json_data = json.dumps(fuhao_new, ensure_ascii=False, indent=2)
with open('../data/fuhao.json', 'w', encoding="utf-8") as f_six:
    f_six.write(json_data)