# -*- coding: utf-8 -*- """ @Time : 2022/12/20 17:56 @Author : @FileName: @Software: @Describe: """ import pandas as pd from tqdm import tqdm import json path = "../data/论文_yy_小说_1.csv" df_list = pd.read_csv(path).values.tolist() def sentence_do(source,result): source = str(source) result = str(result) if source == "nan" or result == "nan": return False, source,result if len(source) > 120 or len(result) > 120: return False, source,result else: source = str(source).replace("\t", "").replace(" ", "").replace("", "").replace("‚", "") result = str(result).replace("\t", "").replace(" ", "").replace("", "").replace("‚", "") return True, source, result df_list_new = [] for i in df_list: source = i[0] result = i[1] bool, source, result = sentence_do(source, result) if bool == False: continue else: df_list_new.append([source,result]) df = pd.DataFrame(df_list_new, columns=["原文","yy降重"]) df.to_csv("../data/论文_yy_小说_3.csv",index=None)