llm_cp2 / examples /train_full /qwen2_5omni_full_sft_haptic.yaml
csuhan's picture
Upload folder using huggingface_hub
b0c0df0 verified
### model
model_name_or_path: ckpts/Qwen2.5-Omni-3B
image_max_pixels: 307200
video_max_pixels: 65536
trust_remote_code: true
### method
stage: sft
do_train: true
finetuning_type: full
deepspeed: examples/deepspeed/ds_z2_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
### dataset
# dataset: identity, mllm_audio_demo, mllm_demo, mllm_video_audio_demo
# dataset: scannet_det_train_4frames, scan2cap_train_32frames, scanrefer_train_32frames
dataset: hapticcap
# streaming: true
template: qwen2_omni
cutoff_len: 8192
# max_samples: 1000
# max_steps: 2000
overwrite_cache: false
# preprocessing_batch_size: 64
preprocessing_num_workers: 8
dataloader_num_workers: 16
# buffer_size: 8
### output
output_dir: saves/qwen2_omni-3b_haptic/full/sft_v2
logging_steps: 1
save_steps: 5000
plot_loss: true
overwrite_output_dir: false
save_only_model: false
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 4
freeze_vision_tower: true
learning_rate: 1.0e-5
num_train_epochs: 1.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
resume_from_checkpoint: null
report_to: tensorboard