CUDA_VISIBLE_DEVICES=0,1,2 deepspeed --num_gpus=3 src/train_sft.py \ --deepspeed deepspeed_v2.json \ --model_name_or_path /home/majiahui/models-LLM/openbuddy-llama-7b-v1.4-fp16/ \ --do_train \ --dataset paper_data_v3_prompt \ --dataset_dir /home/majiahui/project2/LLaMA-Efficient-Tuning/data \ --finetuning_type freeze \ --max_source_length 700 \ --max_target_length 1400 \ --output_dir path_to_sft_checkpoint_paper_prompt_freeze_deepspeed \ --overwrite_cache \ --per_device_train_batch_size 2 \ --gradient_accumulation_steps 1 \ --lr_scheduler_type cosine \ --logging_steps 10 \ --save_steps 4000 \ --learning_rate 1e-5 \ --num_train_epochs 3.0 \ --plot_loss \ --fp16 \ --checkpoint_dir /home/majiahui/project2/LLaMA-Efficient-Tuning/path_to_sft_checkpoint_paper_prompt_freeze/checkpoint-104000