普通版降重
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

43 lines
1.1 KiB

# -*- coding: utf-8 -*-
"""
@Time : 2022/12/20 17:56
@Author :
@FileName:
@Software:
@Describe:
"""
import pandas as pd
from tqdm import tqdm
import json
path = "../data/论文_yy_小说_1.xlsx"
df_list = pd.read_excel(path).values.tolist()
def sentence_do(source,result):
source = str(source)
result = str(result)
if source == "nan" or result == "nan":
return False, source,result
if len(source) > 120 or len(result) > 120:
return False, source,result
else:
source = str(source).replace("\t", "").replace(" ", "").replace("", "").replace("‚", "")
result = str(result).replace("\t", "").replace(" ", "").replace("", "").replace("‚", "")
return True, source, result
df_list_new = []
for i in df_list:
source = i[0]
result = i[1]
bool, source, result = sentence_do(source, result)
if bool == False:
continue
else:
df_list_new.append([source,result])
df = pd.DataFrame(df_list_new, columns=["原文","yy降重"])
df.to_excel("../data/论文_yy_小说_3.xlsx",index=None)