参考文献生成项目,使用faiss实现
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
927 B

import json
from pypinyin import pinyin, Style
import pandas as pd
def hanzi_to_pinyin(hanzi):
# 将汉字转换为拼音,Style.NORMAL表示以带音调的拼音形式输出
pinyin_list = pinyin(hanzi, style=Style.NORMAL, heteronym=False)
print(pinyin_list)
# 将拼音列表连接成字符串
pinyin_str = ''.join([i[0] for i in pinyin_list])
return pinyin_str
if __name__ == '__main__':
df_list = pd.read_excel("论文种类分类表1.xls").values.tolist()
print(df_list)
erji_dict = {}
for i in range(len(df_list)):
if str(df_list[i][1]) == "nan":
continue
if df_list[i][1] not in erji_dict :
erji_dict[df_list[i][1]] = hanzi_to_pinyin(df_list[i][1])
print(erji_dict)
print(len(erji_dict))
with open("discipline_types.json", "w", encoding="utf-8") as f:
f.write(json.dumps(erji_dict, ensure_ascii=False, indent=2))