排版识别标题级别和正文
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

22 lines
509 B

from transformers import (
AutoConfig,
AutoModelForTokenClassification,
AutoTokenizer,
DataCollatorForTokenClassification,
HfArgumentParser,
PretrainedConfig,
PreTrainedTokenizerFast,
Trainer,
TrainingArguments,
set_seed,
BertTokenizer,
BertTokenizerFast,
BertModel
)
tokenizer = BertTokenizerFast.from_pretrained(
"/home/majiahui/project/models-llm/chinese-bigbird-wwm-base-4096",
use_fast=True,
revision="main",
trust_remote_code=False,
)