# -*- coding: utf-8 -*-

"""
@Time    :  2022/12/20 17:56
@Author  : 
@FileName: 
@Software: 
@Describe:
"""
import pandas as pd
from tqdm import tqdm
import json


path = "../data/论文_yy_小说_1.xlsx"
df_list = pd.read_excel(path).values.tolist()


def sentence_do(source,result):
    source = str(source)
    result = str(result)
    if source == "nan" or result == "nan":
        return False, source,result
    if len(source) > 120 or len(result) > 120:
        return False, source,result
    else:
        source = str(source).replace("\t", "").replace(" ", "").replace("", "").replace("‚", "")
        result = str(result).replace("\t", "").replace(" ", "").replace("", "").replace("‚", "")
        return True, source, result


df_list_new = []
for i in df_list:
    source = i[0]
    result = i[1]
    bool, source, result = sentence_do(source, result)
    if bool == False:
        continue
    else:
        df_list_new.append([source,result])

df = pd.DataFrame(df_list_new, columns=["原文","yy降重"])
df.to_excel("../data/论文_yy_小说_3.xlsx",index=None)