model_id: Qwen/Qwen2.5-7B-Instruct model_family: qwen2.5 LoRA: r: 0 alpha: 128 dropout: 0.05 used_policy: true used_strategy: true step_token: , answer_token: , loss_type: grad_ascent tune_vision_tower: false tune_mm_projector: true tune_language_model: true data_path: ./dataset/safety_reasoning_v2_4step.json split: with_strategy_policy_4step batch_size: 2 gradient_accumulation_steps: 8 max_grad_norm: 1.0 num_epochs: 3 max_length: 3072 save_dir: models/final_ft_${num_epochs}_epochs_lr${lr}_${model_family}_${split} save_steps: 165 lr: 1.0e-05 weight_decay: 0.01 seed: 233 workers: 4 lr_scheduler_type: cosine warmup_ratio: 0.0 max_train_steps: -1 report_to: wandb resume_from_checkpoint: ''