| | #!/bin/bash |
| |
|
| | export LOWRES_RESIZE=384x32 |
| | export VIDEO_RESIZE="0x32" |
| | export HIGHRES_BASE="0x32" |
| | export MAXRES=1536 |
| | export MINRES=0 |
| | export VIDEO_MAXRES=448 |
| | export VIDEO_MINRES=288 |
| | export PAD2STRIDE=1 |
| | export FORCE_NO_DOWNSAMPLE=1 |
| | |
| |
|
| | export PYTHONPATH=/path/to/Ola:$PYTHONPATH |
| |
|
| | EXP_NAME="ola_audio_8_8gpu" |
| | DATA='/data1/cxy/plm-v/modeling/data/audio_test.json' |
| |
|
| | CHECKPOINT='/data1/cxy/plm-v/modeling/plm_internvl3_5_ola' |
| |
|
| | echo $MASTER_ADDR; echo $nnode; echo $nrank |
| | nnode=1 |
| | nrank=0 |
| | MASTER_ADDR=localhost |
| | MASTER_PORT=12324 |
| | PROJECT=/data1/cxy/plm-v/modeling/Ola |
| | VISION_TOWER=null |
| | torchrun --nproc_per_node 8 --nnodes=$nnode --node_rank=$nrank --master_addr=$MASTER_ADDR --master_port=12324 \ |
| | /data1/cxy/plm-v/modeling/Ola/ola/train/train.py \ |
| | --deepspeed $PROJECT/scripts/zero2.json \ |
| | --run_name $EXP_NAME \ |
| | --model_name_or_path $CHECKPOINT \ |
| | --vision_tower $VISION_TOWER \ |
| | --mm_projector_type ola_internvl \ |
| | --mm_vision_select_layer -1 \ |
| | --mm_use_im_patch_token False \ |
| | --tune_speech_adapter True \ |
| | --version plm_v \ |
| | --data_path $DATA \ |
| | --bf16 True \ |
| | --output_dir /data1/cxy/plm-v/modeling/ckpt/$EXP_NAME \ |
| | --sample_independently True \ |
| | --fix_speech_encoder True \ |
| | --freeze_mm_vision_tower True \ |
| | --speech_encoder_type "dual" \ |
| | --speech_encoder_hidden_size 2048 \ |
| | --speech_encoder_ds_rate 10 \ |
| | --num_train_epochs 10 \ |
| | --per_device_train_batch_size 1 \ |
| | --per_device_eval_batch_size 1 \ |
| | --gradient_accumulation_steps 1 \ |
| | --save_strategy "steps" \ |
| | --save_steps 100 \ |
| | --save_total_limit 100 \ |
| | --learning_rate 1e-5 \ |
| | --weight_decay 0.01 \ |
| | --warmup_ratio 0.01 \ |
| | --min_lr_ratio 0.1 \ |
| | --max_grad_norm 5.0 \ |
| | --lr_scheduler_type "cosine" \ |
| | --logging_steps 1 \ |
| | --tf32 True \ |
| | --disable_tqdm False \ |
| | --dataloader_pin_memory False \ |
| | --model_max_length 16384 \ |
| | --gradient_checkpointing True \ |
| | --dataloader_num_workers 8 \ |
| | --frames_upbound 64 \ |
| | --report_to none |