| # GPUS_PER_NODE=8 | |
| # NNODES=1 | |
| # NODE_RANK=0 | |
| # MASTER_ADDR=localhost | |
| # MASTER_PORT=6001 | |
| MODEL="/data1/speech/anhnmt2/Speech2Speech/half-streaming-speech-nlp/checkpoints/minicpmo_sft_asr" | |
| TOKENIZER_PATH="/data1/speech/anhnmt2/Speech2Speech/half-streaming-speech-nlp/omni_speech/model/minicpmo/MiniCPM-o-2_6" | |
| # or openbmb/MiniCPM-V-2, openbmb/MiniCPM-Llama3-V-2_5, openbmb/MiniCPM-V-2_6 | |
| # ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations. | |
| # See the section for finetuning in README for more information. | |
| DATA="/data1/speech/anhnmt2/dataset/s2s/minicpmo/asr/train_asr_mixed_500k.jsonl" | |
| EVAL_DATA="/data1/speech/anhnmt2/dataset/s2s/minicpmo/asr/dev_asr_mixed.jsonl" | |
| # if use openbmb/MiniCPM-V-2, please set LLM_TYPE=minicpm, if use openbmb/MiniCPM-Llama3-V-2_5, please set LLM_TYPE="llama3", | |
| # if use openbmb/MiniCPM-o-2_6 or openbmb/MiniCPM-V-2_6, please set LLM_TYPE=qwen | |
| LLM_TYPE="qwen" | |
| MODEL_MAX_Length=2048 # if conduct multi-images sft, please set MODEL_MAX_Length=4096 | |
| # DISTRIBUTED_ARGS=" | |
| # --nproc_per_node $GPUS_PER_NODE \ | |
| # --nnodes $NNODES \ | |
| # --node_rank $NODE_RANK \ | |
| # --master_addr $MASTER_ADDR \ | |
| # --master_port $MASTER_PORT | |
| # " | |
| deepspeed ../omni_speech/train/train_minicpmo.py \ | |
| --deepspeed zero2.json \ | |
| --model_name_or_path $MODEL \ | |
| --tokenizer_path $TOKENIZER_PATH \ | |
| --llm_type $LLM_TYPE \ | |
| --data_path $DATA \ | |
| --eval_data_path $EVAL_DATA \ | |
| --remove_unused_columns false \ | |
| --label_names "labels" \ | |
| --prediction_loss_only false \ | |
| --bf16 true \ | |
| --do_train \ | |
| --do_eval \ | |
| --tune_speech true \ | |
| --tune_llm false \ | |
| --model_max_length $MODEL_MAX_Length \ | |
| --eval_steps 2000 \ | |
| --output_dir ../checkpoints/minicpmo_sft_asr \ | |
| --num_train_epochs 2 \ | |
| --logging_strategy "steps" \ | |
| --per_device_train_batch_size 1 \ | |
| --per_device_eval_batch_size 1 \ | |
| --gradient_accumulation_steps 4 \ | |
| --evaluation_strategy "steps" \ | |
| --save_strategy "steps" \ | |
| --save_steps 5000 \ | |
| --save_total_limit 1 \ | |
| --learning_rate 1e-5 \ | |
| --max_grad_norm 20. \ | |
| --weight_decay 0. \ | |
| --warmup_ratio 0.03 \ | |
| --lr_scheduler_type "cosine" \ | |
| --logging_steps 1 \ | |
| --tf32 True \ | |
| --gradient_checkpointing true |