| | #!/bin/bash |
| |
|
| | MODEL_PATH=/data1/speech/anhnmt2/Speech2Speech/LLaMA-Omni/models/llm/Qwen2.5-3B-Instruct |
| | SPEECH_ENCODER=/data1/speech/anhnmt2/Speech2Speech/LLaMA-Omni/models/speech_encoder/whisper-medium |
| | SPEECH_ADAPTER=/data1/speech/anhnmt2/Speech2Speech/half-streaming-speech-nlp/checkpoints/omni_whisper-medium_Qwen2.5-3B_pretrained-asr/speech_projector.bin |
| | PROMPT_VERSION=qwen |
| | DATA_PATH=/data1/speech/anhnmt2/dataset/s2s/english/qna/train_tmp.jsonl |
| | DEV_PATH=/data1/speech/anhnmt2/dataset/s2s/english/qna/dev_tmp.jsonl |
| | CACHE_DIR="../output/cached_sft" |
| |
|
| | deepspeed ../omni_speech/train/train_mem.py \ |
| | --deepspeed zero2.json \ |
| | --lora_enable True \ |
| | --model_name_or_path $MODEL_PATH \ |
| | --version $PROMPT_VERSION \ |
| | --data_path $DATA_PATH \ |
| | --dev_path $DEV_PATH \ |
| | --cache_dir $CACHE_DIR \ |
| | --speech_encoder $SPEECH_ENCODER \ |
| | --mel_size 80 \ |
| | --speech_encoder_hidden_size 1024 \ |
| | --speech_encoder_type whisper \ |
| | --pretrain_speech_projector $SPEECH_ADAPTER \ |
| | --bf16 True \ |
| | --output_dir ../checkpoints/omni_whisper-medium_Qwen2.5-3B_pretrained-sft-lora \ |
| | --num_train_epochs 18 \ |
| | --per_device_train_batch_size 2 \ |
| | --per_device_eval_batch_size 1 \ |
| | --gradient_accumulation_steps 4 \ |
| | --evaluation_strategy "steps" \ |
| | --save_strategy "steps" \ |
| | --eval_steps 1000 \ |
| | --save_steps 1000 \ |
| | --save_total_limit 1 \ |
| | --learning_rate 2e-5 \ |
| | --optim adamw_torch \ |
| | --weight_decay 0. \ |
| | --warmup_ratio 0.03 \ |
| | --logging_steps 1 \ |
| | --tf32 True \ |
| | --model_max_length 2048 \ |
| | --gradient_checkpointing True \ |
| | --dataloader_num_workers 8 |