Files changed (1) hide show
  1. videollava_train.sh +49 -0
videollava_train.sh ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ JSON_FOLDER="llava_all_image_video/ft_json"
4
+ IMAGE_FOLDER="/global_data/sft_intern/lh/czr_video/VideoLLaMA2/datasets/videollava_sft"
5
+ VIDEO_FOLDER="/global_data/sft_intern/lh/czr_video/VideoLLaMA2/datasets/videollava_sft"
6
+ cd /global_data/sft_intern/lh/czr_video/Video-LLaVA-main
7
+
8
+
9
+ # https://huggingface.co/LanguageBind/Video-LLaVA-Pretrain-7B/tree/main
10
+ # --pretrain_mm_mlp_adapter ./checkpoints/videollava-7b-pretrain/mm_projector.bin
11
+ HF_DATASETS_OFFLINE=1 TRANSFORMERS_OFFLINE=1 deepspeed --include localhost:2,3,4,5,6,7 videollava/train/train_mem.py \
12
+ --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
13
+ --deepspeed ./scripts/zero2_offload.json \
14
+ --model_name_or_path /global_data/sft_intern/lh/huggingface_models/vicuna-7b-v1.5 \
15
+ --version v1 \
16
+ --data_path /global_data/sft_intern/lh/czr_video/VideoLLaMA2/datasets/videollava_sft/top_k_extraction2/50852_12_5.json \
17
+ --image_folder ${IMAGE_FOLDER} \
18
+ --image_tower /global_data/sft_intern/lh/czr_video/Video-LLaVA-main/checkpoints/LanguageBind_Image \
19
+ --video_folder ${VIDEO_FOLDER} \
20
+ --video_tower /global_data/sft_intern/lh/czr_video/Video-LLaVA-main/checkpoints/LanguageBind_Video_merge \
21
+ --mm_projector_type mlp2x_gelu \
22
+ --pretrain_mm_mlp_adapter ./checkpoints/videollava-7b-pretrain/mm_projector.bin \
23
+ --mm_vision_select_layer -2 \
24
+ --mm_use_im_start_end False \
25
+ --mm_use_im_patch_token False \
26
+ --image_aspect_ratio pad \
27
+ --group_by_modality_length True \
28
+ --bf16 True \
29
+ --output_dir ./checkpoints/pacs_plus_videollava-7b-lora-12_5 \
30
+ --num_train_epochs 1 \
31
+ --per_device_train_batch_size 16 \
32
+ --per_device_eval_batch_size 4 \
33
+ --gradient_accumulation_steps 1 \
34
+ --evaluation_strategy "no" \
35
+ --save_strategy "steps" \
36
+ --save_steps 50000 \
37
+ --save_total_limit 1 \
38
+ --learning_rate 2e-4 \
39
+ --weight_decay 0. \
40
+ --warmup_ratio 0.03 \
41
+ --lr_scheduler_type "cosine" \
42
+ --logging_steps 1 \
43
+ --tf32 True \
44
+ --model_max_length 2048 --tokenizer_model_max_length 3072 \
45
+ --gradient_checkpointing True \
46
+ --dataloader_num_workers 4 \
47
+ --lazy_preprocess True \
48
+ --report_to tensorboard \
49
+ --cache_dir "./cache_dir"