You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
34 lines
751 B
34 lines
751 B
![]()
2 years ago
|
{
|
||
|
"gradient_accumulation_steps": "auto",
|
||
|
"train_micro_batch_size_per_gpu": "auto",
|
||
|
"prescale_gradients": false,
|
||
|
"zero_allow_untested_optimizer": true,
|
||
|
"optimizer": {
|
||
|
"type": "AdamW",
|
||
|
"params": {
|
||
|
"lr": "auto",
|
||
|
"eps": "auto",
|
||
|
"betas": "auto",
|
||
|
"weight_decay": "auto"
|
||
|
}
|
||
|
},
|
||
|
"tensorboard": {
|
||
|
"enabled": true,
|
||
|
"output_path": "logs/",
|
||
|
"job_name": "openbuddy_llama-7b-pt"
|
||
|
},
|
||
|
"zero_optimization": {
|
||
|
"stage": 3,
|
||
|
"contiguous_gradients": false,
|
||
|
"allgather_bucket_size": 3e8,
|
||
|
"reduce_bucket_size": 3e8,
|
||
|
"overlap_comm": true,
|
||
|
"reduce_scatter": true
|
||
|
},
|
||
|
"steps_per_print": 16,
|
||
|
"gradient_clipping": 1.0,
|
||
|
"wall_clock_breakdown": true,
|
||
|
"bf16": {
|
||
|
"enabled": true
|
||
|
}
|
||
|
}
|