EVA-qwen / models /scripts /finetune_lora_vision.sh

Upload models/scripts/finetune_lora_vision.sh with huggingface_hub

2529853 verified about 1 year ago

1.9 kB

	#!/bin/bash

	# You can use 2B instead of 7B
	# MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct"
	# MODEL_NAME="Qwen/Qwen2-VL-2B-Instruct"
	MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct"
	# MODEL_NAME="Qwen/Qwen2.5-VL-7B-Instruct"
	export CUDA_VISIBLE_DEVICES=2,3,4,5,6,7
	export PYTHONPATH=src:$PYTHONPATH

	GLOBAL_BATCH_SIZE=192
	BATCH_PER_DEVICE=32

	NUM_DEVICES=6
	GRAD_ACCUM_STEPS=$((GLOBAL_BATCH_SIZE / (BATCH_PER_DEVICE * NUM_DEVICES)))

	# If you want to tune the `embed_token` with LoRA, You need to tune `lm_head` together
	# You should freeze the the merger also, becuase the merger is included in the vision_tower.

	deepspeed src/training/train.py \
	--use_liger True \
	--lora_enable True \
	--vision_lora True \
	--use_dora False \
	--lora_namespan_exclude "['lm_head', 'embed_tokens']" \
	--lora_rank 64 \
	--lora_alpha 64 \
	--lora_dropout 0.05 \
	--num_lora_modules -1 \
	--deepspeed scripts/zero3.json \
	--model_id $MODEL_NAME \
	--data_path /home/world_model/EVA/train_data_v4/dataset_stage0_600k_v1.json \
	--image_folder /home/world_model/ \
	--remove_unused_columns False \
	--freeze_vision_tower True \
	--freeze_llm True \
	--tune_merger False \
	--bf16 True \
	--fp16 False \
	--disable_flash_attn2 False \
	--output_dir output/qwen2vl_lora_vision_gpu6 \
	--num_train_epochs 1 \
	--per_device_train_batch_size $BATCH_PER_DEVICE \
	--gradient_accumulation_steps $GRAD_ACCUM_STEPS \
	--image_min_pixels $((256 * 28 * 28)) \
	--image_max_pixels $((1280 * 28 * 28)) \
	--learning_rate 2e-4 \
	--weight_decay 0.1 \
	--warmup_ratio 0.03 \
	--lr_scheduler_type "cosine" \
	--logging_steps 1 \
	--tf32 True \
	--gradient_checkpointing True \
	--report_to tensorboard \
	--lazy_preprocess True \
	--save_strategy "steps" \
	--save_steps 200 \
	--save_total_limit 10 \
	--dataloader_num_workers 4