OUTPUT=./output_codellama_7b_lora_bf16 ZERO_STAGE=3 mkdir -p $OUTPUT deepspeed main.py \ --data_path local/jsonfile \ --data_split "10,0,0" \ --data_output_path /scratch/huijaean/garbage \ --model_name_or_path meta-llama/CodeLlama-7b-Instruct-hf \ --per_device_train_batch_size 8 \ --per_device_eval_batch_size 8 \ --max_seq_len 512 \ --learning_rate 5e-5 \ --num_train_epochs 1 \ --gradient_accumulation_steps 32 \ --lr_scheduler_type cosine \ --num_warmup_steps 0 \ --seed 1234 \ --gradient_checkpointing \ --dtype bf16 \ --zero_stage $ZERO_STAGE \ --deepspeed \ --lora_dim 32 \ --lora_module_name "layers." \ --output_dir $OUTPUT \ | tee $OUTPUT/training.log