{ "gradient_accumulation_steps": "auto", "train_micro_batch_size_per_gpu": "auto", "prescale_gradients": false, "zero_allow_untested_optimizer": true, "optimizer": { "type": "AdamW", "params": { "lr": "auto", "eps": "auto", "betas": "auto", "weight_decay": "auto" } }, "tensorboard": { "enabled": true, "output_path": "logs/", "job_name": "openbuddy_llama-7b-pt" }, "zero_optimization": { "stage": 3, "contiguous_gradients": false, "allgather_bucket_size": 3e8, "reduce_bucket_size": 3e8, "overlap_comm": true, "reduce_scatter": true }, "steps_per_print": 16, "gradient_clipping": 1.0, "wall_clock_breakdown": true, "bf16": { "enabled": true } }