You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
927 B
31 lines
927 B
import json
|
|
from pypinyin import pinyin, Style
|
|
import pandas as pd
|
|
|
|
|
|
def hanzi_to_pinyin(hanzi):
|
|
# 将汉字转换为拼音,Style.NORMAL表示以带音调的拼音形式输出
|
|
pinyin_list = pinyin(hanzi, style=Style.NORMAL, heteronym=False)
|
|
print(pinyin_list)
|
|
# 将拼音列表连接成字符串
|
|
pinyin_str = ''.join([i[0] for i in pinyin_list])
|
|
return pinyin_str
|
|
|
|
|
|
if __name__ == '__main__':
|
|
df_list = pd.read_excel("论文种类分类表1.xls").values.tolist()
|
|
print(df_list)
|
|
|
|
erji_dict = {}
|
|
|
|
for i in range(len(df_list)):
|
|
if str(df_list[i][1]) == "nan":
|
|
continue
|
|
if df_list[i][1] not in erji_dict :
|
|
erji_dict[df_list[i][1]] = hanzi_to_pinyin(df_list[i][1])
|
|
|
|
print(erji_dict)
|
|
print(len(erji_dict))
|
|
|
|
with open("discipline_types.json", "w", encoding="utf-8") as f:
|
|
f.write(json.dumps(erji_dict, ensure_ascii=False, indent=2))
|