#!/bin/bash # GPUS_PER_NODE=8 # NNODES=1 # NODE_RANK=0 # MASTER_ADDR=localhost # MASTER_PORT=6001 MODEL="/data1/speech/anhnmt2/Speech2Speech/half-streaming-speech-nlp/checkpoints/minicpmo_sft_asr" TOKENIZER_PATH="/data1/speech/anhnmt2/Speech2Speech/half-streaming-speech-nlp/omni_speech/model/minicpmo/MiniCPM-o-2_6" # or openbmb/MiniCPM-V-2, openbmb/MiniCPM-Llama3-V-2_5, openbmb/MiniCPM-V-2_6 # ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations. # See the section for finetuning in README for more information. DATA="/data1/speech/anhnmt2/dataset/s2s/minicpmo/asr/train_asr_mixed_500k.jsonl" EVAL_DATA="/data1/speech/anhnmt2/dataset/s2s/minicpmo/asr/dev_asr_mixed.jsonl" # if use openbmb/MiniCPM-V-2, please set LLM_TYPE=minicpm, if use openbmb/MiniCPM-Llama3-V-2_5, please set LLM_TYPE="llama3", # if use openbmb/MiniCPM-o-2_6 or openbmb/MiniCPM-V-2_6, please set LLM_TYPE=qwen LLM_TYPE="qwen" MODEL_MAX_Length=2048 # if conduct multi-images sft, please set MODEL_MAX_Length=4096 # DISTRIBUTED_ARGS=" # --nproc_per_node $GPUS_PER_NODE \ # --nnodes $NNODES \ # --node_rank $NODE_RANK \ # --master_addr $MASTER_ADDR \ # --master_port $MASTER_PORT # " deepspeed ../omni_speech/train/train_minicpmo.py \ --deepspeed zero2.json \ --model_name_or_path $MODEL \ --tokenizer_path $TOKENIZER_PATH \ --llm_type $LLM_TYPE \ --data_path $DATA \ --eval_data_path $EVAL_DATA \ --remove_unused_columns false \ --label_names "labels" \ --prediction_loss_only false \ --bf16 true \ --do_train \ --do_eval \ --tune_speech true \ --tune_llm false \ --model_max_length $MODEL_MAX_Length \ --eval_steps 2000 \ --output_dir ../checkpoints/minicpmo_sft_asr \ --num_train_epochs 2 \ --logging_strategy "steps" \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ --gradient_accumulation_steps 4 \ --evaluation_strategy "steps" \ --save_strategy "steps" \ --save_steps 5000 \ --save_total_limit 1 \ --learning_rate 1e-5 \ --max_grad_norm 20. \ --weight_decay 0. \ --warmup_ratio 0.03 \ --lr_scheduler_type "cosine" \ --logging_steps 1 \ --tf32 True \ --gradient_checkpointing true