You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
	
	
		
		
			
	
	
		
			
				
					
						
							|  |  |  | # -*- coding: utf-8 -*- | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | @Time    :  2023/2/27 18:24 | 
					
						
							|  |  |  | @Author  : | 
					
						
							|  |  |  | @FileName: | 
					
						
							|  |  |  | @Software: | 
					
						
							|  |  |  | @Describe: | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | import pandas as pd | 
					
						
							|  |  |  | import difflib | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # file = "../data/11篇汇总txt_new_predict_t5.txt" | 
					
						
							|  |  |  | file = "../data/11篇汇总txt_new_predict_t5_0724.txt" | 
					
						
							|  |  |  | try: | 
					
						
							|  |  |  |     with open(file, 'r', encoding="utf-8") as f: | 
					
						
							|  |  |  |         lines = [x.strip() for x in f if x.strip() != ''] | 
					
						
							|  |  |  | except: | 
					
						
							|  |  |  |     with open(file, 'r', encoding="gbk") as f: | 
					
						
							|  |  |  |         lines = [x.strip() for x in f if x.strip() != ''] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | data_new = [] | 
					
						
							|  |  |  | for i in lines: | 
					
						
							|  |  |  |     data_dan = i.split("\t") | 
					
						
							|  |  |  |     if len(data_dan) != 2: | 
					
						
							|  |  |  |         continue | 
					
						
							|  |  |  |     data_1 = data_dan[0] | 
					
						
							|  |  |  |     data_2 = data_dan[1] | 
					
						
							|  |  |  |     str_sim_value = difflib.SequenceMatcher(None, data_1, data_2).quick_ratio() | 
					
						
							|  |  |  |     data_new.append(data_dan + [str_sim_value]) | 
					
						
							|  |  |  | print(data_new) | 
					
						
							|  |  |  | data_new = sorted(data_new, key= lambda x:x[2], reverse=True) | 
					
						
							|  |  |  | df = pd.DataFrame(data_new) | 
					
						
							|  |  |  | df.to_excel("../data/11篇_t5_strsim_0724.xlsx", index=None) |