NMCxyz
/

streaming-speech

Model card Files Files and versions

Metrics Training metrics Community

streaming-speech / scripts /continue.sh

NMCxyz's picture

Add files using upload-large-folder tool

9942354 verified 10 months ago

history blame contribute delete

2.31 kB

	#!/bin/bash

	# GPUS_PER_NODE=8
	# NNODES=1
	# NODE_RANK=0
	# MASTER_ADDR=localhost
	# MASTER_PORT=6001

	MODEL="/data1/speech/anhnmt2/Speech2Speech/half-streaming-speech-nlp/checkpoints/minicpmo_sft_asr"
	TOKENIZER_PATH="/data1/speech/anhnmt2/Speech2Speech/half-streaming-speech-nlp/omni_speech/model/minicpmo/MiniCPM-o-2_6"
	# or openbmb/MiniCPM-V-2, openbmb/MiniCPM-Llama3-V-2_5, openbmb/MiniCPM-V-2_6
	# ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations.
	# See the section for finetuning in README for more information.
	DATA="/data1/speech/anhnmt2/dataset/s2s/minicpmo/asr/train_asr_mixed_500k.jsonl"
	EVAL_DATA="/data1/speech/anhnmt2/dataset/s2s/minicpmo/asr/dev_asr_mixed.jsonl"

	# if use openbmb/MiniCPM-V-2, please set LLM_TYPE=minicpm, if use openbmb/MiniCPM-Llama3-V-2_5, please set LLM_TYPE="llama3",
	# if use openbmb/MiniCPM-o-2_6 or openbmb/MiniCPM-V-2_6, please set LLM_TYPE=qwen
	LLM_TYPE="qwen"
	MODEL_MAX_Length=2048 # if conduct multi-images sft, please set MODEL_MAX_Length=4096


	# DISTRIBUTED_ARGS="
	# --nproc_per_node $GPUS_PER_NODE \
	# --nnodes $NNODES \
	# --node_rank $NODE_RANK \
	# --master_addr $MASTER_ADDR \
	# --master_port $MASTER_PORT
	# "

	deepspeed ../omni_speech/train/train_minicpmo.py \
	--deepspeed zero2.json \
	--model_name_or_path $MODEL \
	--tokenizer_path $TOKENIZER_PATH \
	--llm_type $LLM_TYPE \
	--data_path $DATA \
	--eval_data_path $EVAL_DATA \
	--remove_unused_columns false \
	--label_names "labels" \
	--prediction_loss_only false \
	--bf16 true \
	--do_train \
	--do_eval \
	--tune_speech true \
	--tune_llm false \
	--model_max_length $MODEL_MAX_Length \
	--eval_steps 2000 \
	--output_dir ../checkpoints/minicpmo_sft_asr \
	--num_train_epochs 2 \
	--logging_strategy "steps" \
	--per_device_train_batch_size 1 \
	--per_device_eval_batch_size 1 \
	--gradient_accumulation_steps 4 \
	--evaluation_strategy "steps" \
	--save_strategy "steps" \
	--save_steps 5000 \
	--save_total_limit 1 \
	--learning_rate 1e-5 \
	--max_grad_norm 20. \
	--weight_decay 0. \
	--warmup_ratio 0.03 \
	--lr_scheduler_type "cosine" \
	--logging_steps 1 \
	--tf32 True \
	--gradient_checkpointing true