litwell commited on
Commit
ae27c7b
·
verified ·
1 Parent(s): c0ad777

Upload models/scripts/finetune_qwen2_5.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. models/scripts/finetune_qwen2_5.sh +49 -0
models/scripts/finetune_qwen2_5.sh ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # You can use 2B instead of 7B
4
+ # MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct"
5
+ # MODEL_NAME="Qwen/Qwen2-VL-2B-Instruct"
6
+ MODEL_NAME="Qwen/Qwen2.5-VL-7B-Instruct"
7
+ # MODEL_NAME="Qwen/Qwen2.5-VL-7B-Instruct"
8
+
9
+ GLOBAL_BATCH_SIZE=128
10
+ BATCH_PER_DEVICE=4
11
+ NUM_DEVICES=8
12
+ GRAD_ACCUM_STEPS=$((GLOBAL_BATCH_SIZE / (BATCH_PER_DEVICE * NUM_DEVICES)))
13
+
14
+ export PYTHONPATH=src:$PYTHONPATH
15
+
16
+ deepspeed src/training/train.py \
17
+ --use_liger True \
18
+ --deepspeed scripts/zero2_offload.json \
19
+ --model_id $MODEL_NAME \
20
+ --data_path /home/world_model/EVA/train_data_v4/dataset_stage0_600k.json \
21
+ --image_folder /home/world_model/ \
22
+ --remove_unused_columns False \
23
+ --freeze_vision_tower False \
24
+ --freeze_llm False \
25
+ --tune_merger True \
26
+ --bf16 True \
27
+ --fp16 False \
28
+ --disable_flash_attn2 False \
29
+ --output_dir output/fft_qwen2_5vl \
30
+ --num_train_epochs 1 \
31
+ --per_device_train_batch_size $BATCH_PER_DEVICE \
32
+ --gradient_accumulation_steps $GRAD_ACCUM_STEPS \
33
+ --image_min_pixels $((512 * 28 * 28)) \
34
+ --image_max_pixels $((1280 * 28 * 28)) \
35
+ --learning_rate 1e-5 \
36
+ --merger_lr 1e-5 \
37
+ --vision_lr 2e-6 \
38
+ --weight_decay 0.1 \
39
+ --warmup_ratio 0.03 \
40
+ --lr_scheduler_type "cosine" \
41
+ --logging_steps 1 \
42
+ --tf32 True \
43
+ --gradient_checkpointing True \
44
+ --report_to tensorboard \
45
+ --lazy_preprocess True \
46
+ --save_strategy "steps" \
47
+ --save_steps 200 \
48
+ --save_total_limit 10 \
49
+ --dataloader_num_workers 4