#!/bin/bash # You can use 2B instead of 7B # MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct" # MODEL_NAME="Qwen/Qwen2-VL-2B-Instruct" MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct" # MODEL_NAME="Qwen/Qwen2.5-VL-7B-Instruct" GLOBAL_BATCH_SIZE=256 BATCH_PER_DEVICE=32 NUM_DEVICES=8 GRAD_ACCUM_STEPS=$((GLOBAL_BATCH_SIZE / (BATCH_PER_DEVICE * NUM_DEVICES))) export PYTHONPATH=src:$PYTHONPATH deepspeed src/training/train.py \ --use_liger True \ --deepspeed scripts/zero2_offload.json \ --model_id $MODEL_NAME \ --data_path /home/world_model/EVA/train_data_v4/dataset_stage0_600k_v1.json \ --image_folder /home/world_model/ \ --remove_unused_columns False \ --freeze_vision_tower False \ --freeze_llm False \ --tune_merger True \ --bf16 True \ --fp16 False \ --disable_flash_attn2 False \ --output_dir output/fft_qwen2vl \ --num_train_epochs 1 \ --per_device_train_batch_size $BATCH_PER_DEVICE \ --gradient_accumulation_steps $GRAD_ACCUM_STEPS \ --image_min_pixels $((512 * 28 * 28)) \ --image_max_pixels $((1280 * 28 * 28)) \ --learning_rate 1e-5 \ --merger_lr 1e-5 \ --vision_lr 2e-6 \ --weight_decay 0.1 \ --warmup_ratio 0.03 \ --lr_scheduler_type "cosine" \ --logging_steps 1 \ --tf32 True \ --gradient_checkpointing True \ --report_to tensorboard \ --lazy_preprocess True \ --save_strategy "steps" \ --save_steps 200 \ --save_total_limit 10 \ --dataloader_num_workers 4