
15 changed files with 574 additions and 0 deletions
@ -0,0 +1,100 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/9 18:36 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
#! -*- coding: utf-8 -*- |
|||
# 用CRF做中文命名实体识别 |
|||
# 数据集 http://s3.bmio.net/kashgari/china-people-daily-ner-corpus.tar.gz |
|||
# 实测验证集的F1可以到96.18%,测试集的F1可以到95.35% |
|||
import os |
|||
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" |
|||
os.environ["CUDA_VISIBLE_DEVICES"] = "0" |
|||
import tensorflow as tf |
|||
import os |
|||
from src.basemodel import ClassifyModel |
|||
import numpy as np |
|||
from numpy.linalg import norm |
|||
import pandas as pd |
|||
|
|||
# a = [[1, 3, 2], [2, 2, 1]] |
|||
# print(cosine_similarity(a)) |
|||
|
|||
def cos_sim(a, b): |
|||
A = np.array(a) |
|||
B = np.array(b) |
|||
cosine = np.dot(A, B) / (norm(A) * norm(B)) |
|||
return cosine |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
maxlen = 512 |
|||
batch_size = 32 |
|||
# bert配置 |
|||
config_path = 'chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_config.json' |
|||
checkpoint_path = 'chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_model.ckpt' |
|||
dict_path = 'chinese_roberta_wwm_ext_L-12_H-768_A-12/vocab.txt' |
|||
|
|||
lable_vec_path = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/save_x.npy" |
|||
b = np.load(lable_vec_path) |
|||
df_train_nuoche = pd.read_csv("data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv", encoding="utf-8").values.tolist() |
|||
|
|||
classifymodel = ClassifyModel(config_path, checkpoint_path, dict_path, is_train=False, load_weights_path=None) |
|||
|
|||
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
|||
# while True: |
|||
# text = input("請輸入") |
|||
# data = classifymodel.data_generator([text], batch_size) |
|||
# token, segment = data[0][0], data[1][0] |
|||
# content_cls = classifymodel.predict(token, segment) |
|||
# content_cls = content_cls.reshape(-1) |
|||
# print(content_cls.shape) |
|||
# |
|||
# index_list = [] |
|||
# for vec in b: |
|||
# |
|||
# cos_value = cos_sim(content_cls, vec) |
|||
# index_list.append(cos_value) |
|||
# |
|||
# re1 = [(i[0],i[1]) for i in sorted(list(enumerate(index_list)), key=lambda x: x[1], reverse=True)] |
|||
# |
|||
# for i in range(0, 10): |
|||
# print(re1[i]) |
|||
# print(df_train_nuoche[re1[i][0]]) |
|||
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
|||
path_txt = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究.txt" |
|||
path_excel = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_2.xlsx" |
|||
f = open(path_txt, encoding="utf-8") |
|||
centent = f.read() |
|||
f.close() |
|||
|
|||
data_zong = [] |
|||
centent_list = centent.split("\n") |
|||
for text in centent_list: |
|||
if text[:5] == "*****": |
|||
continue |
|||
dan_data = [text] |
|||
data = classifymodel.data_generator([text], batch_size) |
|||
token, segment = data[0][0], data[1][0] |
|||
content_cls = classifymodel.predict(token, segment) |
|||
content_cls = content_cls.reshape(-1) |
|||
|
|||
index_list = [] |
|||
for vec in b: |
|||
|
|||
cos_value = cos_sim(content_cls, vec) |
|||
index_list.append(cos_value) |
|||
|
|||
re1 = [(i[0],i[1]) for i in sorted(list(enumerate(index_list)), key=lambda x: x[1], reverse=True)] |
|||
|
|||
for i in range(0, 10): |
|||
dan_data.append(re1[i][1]) |
|||
dan_data.append(df_train_nuoche[re1[i][0]][0]) |
|||
filename = df_train_nuoche[re1[i][0]][1].split("\\")[-1] |
|||
dan_data.append(filename) |
|||
data_zong.append(dan_data) |
|||
pd.DataFrame(data_zong).to_excel(path_excel, index=None) |
@ -0,0 +1,34 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/7 14:31 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
import numpy as np |
|||
d = 768 # 向量维度 |
|||
nb = 1000000 # index向量库的数据量 |
|||
nq = 5 # 待检索query的数目 |
|||
np.random.seed(1234) |
|||
xb = np.random.random((nb, d)).astype('float32') |
|||
xb[:, 0] += np.arange(nb) / 1000. # index向量库的向量 |
|||
xq = np.random.random((nq, d)).astype('float32') |
|||
xq[:, 0] += np.arange(nq) / 1000. |
|||
|
|||
print("0", xb) |
|||
print("1", xq) |
|||
|
|||
|
|||
import faiss |
|||
index = faiss.IndexFlatL2(d) |
|||
print(index.is_trained) # 输出为True,代表该类index不需要训练,只需要add向量进去即可 |
|||
index.add(xb) # 将向量库中的向量加入到index中 |
|||
print(index.ntotal) |
|||
|
|||
k = 4 # topK的K值 |
|||
D, I = index.search(xq, k)# xq为待检索向量,返回的I为每个待检索query最相似TopK的索引list,D为其对应的距离 |
|||
|
|||
print(D) |
|||
print(I) |
@ -0,0 +1,16 @@ |
|||
# This is a sample Python script. |
|||
|
|||
# Press Shift+F10 to execute it or replace it with your code. |
|||
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. |
|||
|
|||
|
|||
def print_hi(name): |
|||
# Use a breakpoint in the code line below to debug your script. |
|||
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint. |
|||
|
|||
|
|||
# Press the green button in the gutter to run the script. |
|||
if __name__ == '__main__': |
|||
print_hi('PyCharm') |
|||
|
|||
# See PyCharm help at https://www.jetbrains.com/help/pycharm/ |
@ -0,0 +1,58 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/14 17:52 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
from rouge import Rouge |
|||
import pandas as pd |
|||
from tqdm import tqdm |
|||
|
|||
rouge = Rouge() |
|||
def rouge_value(data_1, data_2): |
|||
data_1 = ' '.join(data_1) |
|||
data_2 = ' '.join(data_2) |
|||
scores = rouge.get_scores(hyps=[data_1], refs=[data_2]) |
|||
rouge_1 = scores[0]['rouge-1']['f'] |
|||
rouge_2 = scores[0]['rouge-2']['f'] |
|||
rouge_l = scores[0]['rouge-l']['f'] |
|||
# rouge_w = scores[0]['rouge-w']['f'] |
|||
# rouge_s = scores[0]['rouge-s']['f'] |
|||
|
|||
return rouge_1, rouge_2, rouge_l |
|||
|
|||
if __name__ == '__main__': |
|||
|
|||
df_train_nuoche = pd.read_csv("data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv", encoding="utf-8").values.tolist() |
|||
|
|||
path_txt = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究.txt" |
|||
path_excel = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_2_rouge.xlsx" |
|||
f = open(path_txt, encoding="utf-8") |
|||
centent = f.read() |
|||
f.close() |
|||
|
|||
data_zong = [] |
|||
centent_list = centent.split("\n") |
|||
for text in tqdm(centent_list): |
|||
dan_data = [text] |
|||
index_list = [] |
|||
if text[:5] == "*****": |
|||
continue |
|||
|
|||
for data_dan in df_train_nuoche: |
|||
|
|||
rouge_1, rouge_2, rouge_l = rouge_value(text, data_dan[0]) |
|||
index_list.append(rouge_l) |
|||
|
|||
re1 = [(i[0],i[1]) for i in sorted(list(enumerate(index_list)), key=lambda x: x[1], reverse=True)] |
|||
|
|||
for i in range(0, 10): |
|||
dan_data.append(re1[i][1]) |
|||
dan_data.append(df_train_nuoche[re1[i][0]][0]) |
|||
filename = df_train_nuoche[re1[i][0]][1].split("\\")[-1] |
|||
dan_data.append(filename) |
|||
data_zong.append(dan_data) |
|||
pd.DataFrame(data_zong).to_excel(path_excel, index=None) |
@ -0,0 +1,65 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/13 10:15 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
from bert4keras.backend import keras, K |
|||
from bert4keras.models import build_transformer_model |
|||
from bert4keras.tokenizers import Tokenizer |
|||
from bert4keras.optimizers import Adam |
|||
from bert4keras.snippets import sequence_padding, DataGenerator |
|||
from bert4keras.snippets import open |
|||
from bert4keras.layers import ConditionalRandomField |
|||
from keras.layers import Dense |
|||
from keras.models import Model |
|||
from tqdm import tqdm |
|||
import json |
|||
from keras.layers import * |
|||
|
|||
|
|||
class ClassifyModel: |
|||
def __init__(self, config_path, checkpoint_path, dict_path, is_train, load_weights_path=None): |
|||
self.config_path = config_path |
|||
self.checkpoint_path = checkpoint_path |
|||
self.dict_path = dict_path |
|||
self.is_train = True |
|||
self.load_weights_path = load_weights_path |
|||
self.model = self.create_model(self.is_train, self.load_weights_path) |
|||
self.tokenizer = Tokenizer(self.dict_path, do_lower_case=True) |
|||
self.maxlen = 256 |
|||
|
|||
def create_model(self, is_train, load_weights_path): |
|||
bert = build_transformer_model( |
|||
config_path=self.config_path, |
|||
checkpoint_path=self.checkpoint_path, |
|||
return_keras_model=False, |
|||
) |
|||
output = Lambda(lambda x: x[:, 0], name='CLS-token')(bert.model.output) |
|||
model = keras.models.Model(bert.model.input, output) |
|||
if is_train == False: |
|||
model.load_weights(load_weights_path) |
|||
return model |
|||
|
|||
def predict(self,token_ids, segment_ids): |
|||
return self.model.predict([token_ids, segment_ids]) |
|||
|
|||
def data_generator(self, texts, batch_size): |
|||
batch_token_ids = [] |
|||
batch_segment_ids = [] |
|||
batch_dan_token_ids = [] |
|||
batch_dan_segment_ids = [] |
|||
for id_, text in enumerate(texts): |
|||
token_ids, segment_ids = self.tokenizer.encode(text, maxlen=self.maxlen) |
|||
batch_dan_token_ids.append(token_ids) |
|||
batch_dan_segment_ids.append(segment_ids) |
|||
if len(batch_dan_token_ids) == batch_size or id_ == len(texts)-1: |
|||
batch_dan_token_ids = sequence_padding(batch_dan_token_ids) |
|||
batch_dan_segment_ids = sequence_padding(batch_dan_segment_ids) |
|||
batch_token_ids.append(batch_dan_token_ids) |
|||
batch_segment_ids.append(batch_dan_segment_ids) |
|||
batch_dan_token_ids, batch_dan_segment_ids = [], [] |
|||
return batch_token_ids, batch_segment_ids |
@ -0,0 +1,38 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/13 10:38 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
import os |
|||
|
|||
import pandas as pd |
|||
|
|||
file = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重txt_new' |
|||
file_csv = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv' |
|||
|
|||
path_list = [] |
|||
data = [] |
|||
|
|||
|
|||
for root, dirs, files in os.walk(file): |
|||
for file in files: |
|||
path = os.path.join(root, file) |
|||
path_list.append(path) |
|||
|
|||
print(path_list) |
|||
for path in path_list: |
|||
with open(path, encoding="gbk") as f: |
|||
text = f.read() |
|||
|
|||
text_list = text.split("@@@@@@@@@@") |
|||
text_zhengwen = text_list[-1] |
|||
text_zhengwen_list = text_zhengwen.split("\n") |
|||
for sentence in text_zhengwen_list: |
|||
if sentence != "": |
|||
data.append([sentence, path]) |
|||
|
|||
pd.DataFrame(data,columns=["sentence", "path"]).to_csv(file_csv, index=None) |
@ -0,0 +1,61 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/10 18:53 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
import os |
|||
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" |
|||
os.environ["CUDA_VISIBLE_DEVICES"] = "0" |
|||
# import pickle |
|||
# import redis |
|||
# from redis import ConnectionPool |
|||
# app = Flask(__name__) |
|||
import numpy as np |
|||
import pandas as pd |
|||
|
|||
import json |
|||
from keras.layers import * |
|||
from tqdm import tqdm |
|||
import time |
|||
from src.basemodel import ClassifyModel |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
maxlen = 256 |
|||
batch_size = 32 |
|||
# bert配置 |
|||
config_path = 'chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_config.json' |
|||
checkpoint_path = 'chinese_roberta_wwm_ext_L-12_H-768_A-12/bert_model.ckpt' |
|||
dict_path = 'chinese_roberta_wwm_ext_L-12_H-768_A-12/vocab.txt' |
|||
|
|||
texts = ["我们有个好朋友"] * 34 |
|||
print(texts) |
|||
classifymodel = ClassifyModel(config_path, checkpoint_path, dict_path, is_train=False, load_weights_path=None) |
|||
# data = classifymodel.data_generator(texts, batch_size) |
|||
# for token, segment in zip(data[0],data[1]): |
|||
# print(classifymodel.predict(token, segment).shape) |
|||
|
|||
df_train_nuoche = pd.read_csv("data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重.csv",encoding="utf-8") |
|||
Data = [] |
|||
for data_dan in df_train_nuoche.values.tolist(): |
|||
Data.append(data_dan[0]) |
|||
print(Data[0]) |
|||
print(len(Data)) |
|||
|
|||
data = classifymodel.data_generator(Data, batch_size) |
|||
|
|||
print(len(data[0][-1])) |
|||
# print(type(train_generator)) |
|||
# d = next(train_generator) |
|||
# print(d) |
|||
a1 = np.empty((0, 768), dtype=int) |
|||
for token, segment in zip(data[0],data[1]): |
|||
a2 = classifymodel.predict(token, segment) |
|||
a1 = np.concatenate([a1, a2]) |
|||
|
|||
print(a1.shape) |
|||
np.save('data/10235513_大型商业建筑人员疏散设计研究_沈福禹/save_x', a1) |
@ -0,0 +1,28 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/14 19:01 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
from gensim.models.word2vec import LineSentence |
|||
import numpy as np |
|||
from tqdm import tqdm |
|||
|
|||
path = "word2vec_model/word2vec.txt" |
|||
def iter_word(word, txt_path): |
|||
"""迭代器方法获取词向量""" |
|||
vec = 0 |
|||
iter1 = LineSentence(open(txt_path, 'r', encoding='utf-8')) |
|||
for i,v in tqdm(enumerate(iter1)): |
|||
if i == 0: |
|||
continue |
|||
if word == v[:1]: |
|||
vec = np.array([float(j) for j in v[1:]]) |
|||
break |
|||
return vec |
|||
|
|||
word = "公共" |
|||
print(iter_word(word,path)) |
@ -0,0 +1,34 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/15 11:39 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
import pandas as pd |
|||
import difflib |
|||
|
|||
|
|||
|
|||
path_txt = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究.txt" |
|||
path_csv = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文.csv" |
|||
path_csv_sim = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文对照.csv" |
|||
f = open(path_txt, encoding="utf-8") |
|||
centent = f.read() |
|||
f.close() |
|||
|
|||
data = [] |
|||
centent_text_list = centent.split("\n") |
|||
centent_csv_list = pd.read_csv(path_csv).values.tolist() |
|||
for dan_yuan in centent_csv_list: |
|||
str_sim_text = "##" |
|||
for dan_lable in centent_text_list: |
|||
str_sim_value = difflib.SequenceMatcher(None, dan_yuan[0], dan_lable).quick_ratio() |
|||
if str_sim_value >= 0.95: |
|||
str_sim_text = dan_lable |
|||
break |
|||
data.append([dan_yuan[0], str_sim_text]) |
|||
|
|||
pd.DataFrame(data).to_csv(path_csv_sim,index=None) |
@ -0,0 +1,42 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/10 17:45 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
import os |
|||
|
|||
file = './data/10235513_大型商业建筑人员疏散设计研究_沈福禹/查重txt' |
|||
|
|||
path_list = [] |
|||
|
|||
for root, dirs, files in os.walk(file): |
|||
for file in files: |
|||
path = os.path.join(root, file) |
|||
path_list.append(path) |
|||
|
|||
|
|||
for path in path_list: |
|||
with open(path, encoding="utf-8") as f: |
|||
text = f.read() |
|||
path_dan_list = path.split("\\") |
|||
root_path = path_dan_list[0] |
|||
file_path = path_dan_list[1] |
|||
root_pathdan_list = root_path.split("/") |
|||
root_pathdan_list = root_pathdan_list[:-1] |
|||
print(root_pathdan_list) |
|||
|
|||
text_list = text.split("@@@@@@@@@@") |
|||
text_zhengwen = text_list[-1] |
|||
text_list = [i.lstrip("\n") for i in text_list[:-1]] |
|||
print(text_list) |
|||
text_zhengwen = text_zhengwen.strip().replace("\n", "").replace(" ", "").replace("。", "。\n") |
|||
text_list = text_list + [text_zhengwen] |
|||
text_str = "@@@@@@@@@@".join(text_list) |
|||
path_new = "/".join(root_pathdan_list + ["查重txt_new", file_path]) |
|||
with open(path_new, "w") as f: |
|||
f.write(text_str) |
|||
f.close() |
@ -0,0 +1,46 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/15 10:38 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
import os |
|||
import docx |
|||
import pandas as pd |
|||
|
|||
|
|||
def read_docx(rawpath): # doc转docx |
|||
data = [] |
|||
data_new = [] |
|||
document = docx.Document(rawpath) |
|||
|
|||
# 获取所有段落 |
|||
all_paragraphs = document.paragraphs |
|||
for paragraph in all_paragraphs: |
|||
# 打印每一个段落的文字 |
|||
data.append(paragraph.text) |
|||
# for data_dan in data: |
|||
# if data_dan == "": |
|||
# continue |
|||
# else: |
|||
# data_list = str(data_dan).split("。") |
|||
# for data_dan_short in data_list: |
|||
# if data_dan_short == "": |
|||
# continue |
|||
# data_new.append(data_dan_short) |
|||
data = [dan for dan in data if dan != ""] |
|||
data = "".join(data) |
|||
data_list = str(data).split("。") |
|||
data_new = [dan + "。" for dan in data_list if dan != ""] |
|||
return data_new |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
pathls = r"E:\pycharm_workspace\duplicate_check\data\10235513_大型商业建筑人员疏散设计研究_沈福禹\10235513_沈福禹_大型商业建筑人员疏散设计研究.docx" |
|||
path_csv = "data/10235513_大型商业建筑人员疏散设计研究_沈福禹/大型商业建筑人员疏散设计研究_全文.csv" |
|||
data = read_docx(pathls) |
|||
data = [[i] for i in data] |
|||
pd.DataFrame(data).to_csv(path_csv, index=None) |
@ -0,0 +1,48 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
""" |
|||
@Time : 2023/3/9 15:34 |
|||
@Author : |
|||
@FileName: |
|||
@Software: |
|||
@Describe: |
|||
""" |
|||
import pdfplumber |
|||
import pandas as pd |
|||
|
|||
path = "./data/新建文件夹/13977991/全文对照.pdf" |
|||
# with pdfplumber.open(path) as pdf: |
|||
# first_page = pdf.pages[0] |
|||
# # 获取文本,直接得到字符串,包括了换行符【与PDF上的换行位置一致,而不是实际的“段落”】 |
|||
# print(first_page.extract_texts()) |
|||
# # 获取本页全部表格,也可以使用extract_table()获得单个表格 |
|||
# for table in p0.extract_tables(): |
|||
# #得到的table是嵌套list类型,转化成DataFrame更加方便查看和分析 |
|||
# df = pd.DataFrame(table[1:], columns=table[0]) |
|||
# print(df) |
|||
|
|||
|
|||
with pdfplumber.open(path) as pdf: |
|||
content = '' |
|||
for i in range(len(pdf.pages)): |
|||
# 读取PDF文档第i+1页 |
|||
page = pdf.pages[i] |
|||
|
|||
# page.extract_text()函数即读取文本内容,下面这步是去掉文档最下面的页码 |
|||
page_content = '\n'.join(page.extract_text().split('\n')[:-1]) |
|||
content = content + page_content |
|||
|
|||
print(content) |
|||
|
|||
import pdfplumber |
|||
import pandas as pd |
|||
|
|||
with pdfplumber.open(path) as pdf: |
|||
first_page = pdf.pages[3] |
|||
tables = first_page.extract_tables() |
|||
for table in tables: |
|||
df = pd.DataFrame(table) |
|||
# 第一列当成表头: |
|||
# df = pd.DataFrame(table[1:], columns=table[0]) |
|||
|
|||
print(df) |
Loading…
Reference in new issue