namin72
/

SenseVoice_Finetune

Model card Files Files and versions

xet

Community

namin72 commited on Dec 26, 2025

Commit

d73aad3

verified ·

1 Parent(s): 2c0793e

Upload 2 files

Browse files

Files changed (2) hide show

config.yaml +100 -0
finetune_ep500.sh +71 -0

config.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+encoder: SenseVoiceEncoderSmall
+encoder_conf:
+  output_size: 512
+  attention_heads: 4
+  linear_units: 2048
+  num_blocks: 50
+  tp_blocks: 20
+  dropout_rate: 0.1
+  positional_dropout_rate: 0.1
+  attention_dropout_rate: 0.1
+  input_layer: pe
+  pos_enc_class: SinusoidalPositionEncoder
+  normalize_before: true
+  kernel_size: 11
+  sanm_shfit: 0
+  selfattention_layer_type: sanm
+model: SenseVoiceSmall
+model_conf:
+  length_normalized_loss: true
+  sos: 1
+  eos: 2
+  ignore_id: -1
+tokenizer: SentencepiecesTokenizer
+tokenizer_conf:
+  bpemodel: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/chn_jpn_yue_eng_ko_spectok.bpe.model
+  unk_symbol: <unk>
+  split_with_space: true
+frontend: WavFrontend
+frontend_conf:
+  fs: 16000
+  window: hamming
+  n_mels: 80
+  frame_length: 25
+  frame_shift: 10
+  lfr_m: 7
+  lfr_n: 6
+  cmvn_file: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/am.mvn
+dataset: SenseVoiceCTCDataset
+dataset_conf:
+  index_ds: IndexDSJsonl
+  batch_sampler: BatchSampler
+  data_split_num: 1
+  batch_type: token
+  batch_size: 1200
+  max_token_length: 2000
+  min_token_length: 60
+  max_source_length: 2000
+  min_source_length: 60
+  max_target_length: 200
+  min_target_length: 0
+  shuffle: true
+  num_workers: 2
+  sos: 1
+  eos: 2
+  IndexDSJsonl: IndexDSJsonl
+  retry: 20
+  sort_size: 1024
+train_conf:
+  accum_grad: 1
+  grad_clip: 5
+  max_epoch: 500
+  keep_nbest_models: 1
+  avg_nbest_model: 0
+  log_interval: 1
+  resume: true
+  validate_interval: 2000
+  save_checkpoint_interval: 2000
+  use_deepspeed: false
+  deepspeed_config: /home/ubuntu/work/SenseVoice/deepspeed_conf/ds_stage1.json
+optim: adamw
+optim_conf:
+  lr: 0.0002
+scheduler: warmuplr
+scheduler_conf:
+  warmup_steps: 25000
+specaug: SpecAugLFR
+specaug_conf:
+  apply_time_warp: false
+  time_warp_window: 5
+  time_warp_mode: bicubic
+  apply_freq_mask: true
+  freq_mask_width_range:
+  - 0
+  - 30
+  lfr_rate: 6
+  num_freq_mask: 1
+  apply_time_mask: true
+  time_mask_width_range:
+  - 0
+  - 12
+  num_time_mask: 1
+init_param: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/model.pt
+config: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/config.yaml
+is_training: true
+trust_remote_code: true
+train_data_set_list: /home/ubuntu/work/SenseVoice/dataset/train_split.jsonl
+valid_data_set_list: /home/ubuntu/work/SenseVoice/dataset/val.jsonl
+output_dir: ./outputs_ep500
+model_path: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall
+device: cpu

finetune_ep500.sh ADDED Viewed

	@@ -0,0 +1,71 @@

+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+#  MIT License  (https://opensource.org/licenses/MIT)
+workspace=`pwd`
+# which gpu to train or finetune
+export CUDA_VISIBLE_DEVICES="0"
+gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
+# model_name from model_hub, or model_dir in local path
+## option 1, download model automatically
+model_name_or_model_dir="iic/SenseVoiceSmall"
+## option 2, download model by git
+#local_path_root=${workspace}/modelscope_models
+#mkdir -p ${local_path_root}/${model_name_or_model_dir}
+#git clone https://www.modelscope.cn/${model_name_or_model_dir}.git ${local_path_root}/${model_name_or_model_dir}
+#model_name_or_model_dir=${local_path_root}/${model_name_or_model_dir}
+# data dir, which contains: train.json, val.json
+train_data="/home/ubuntu/work/SenseVoice/dataset/train_split.jsonl"
+val_data="/home/ubuntu/work/SenseVoice/dataset/val.jsonl"
+# exp output dir
+output_dir="./outputs_ep500"
+log_file="${output_dir}/log.txt"
+deepspeed_config=${workspace}/deepspeed_conf/ds_stage1.json
+mkdir -p ${output_dir}
+echo "log_file: ${log_file}"
+DISTRIBUTED_ARGS="
+    --nnodes ${WORLD_SIZE:-1} \
+    --nproc_per_node $gpu_num \
+    --node_rank ${RANK:-0} \
+    --master_addr ${MASTER_ADDR:-127.0.0.1} \
+    --master_port ${MASTER_PORT:-26669}
+"
+echo $DISTRIBUTED_ARGS
+# funasr trainer path
+train_tool="/home/ubuntu/work/SenseVoice/FunASR/funasr/bin/train_ds.py"
+echo "Using funasr trainer: ${train_tool}"
+torchrun $DISTRIBUTED_ARGS \
+${train_tool} \
+++model="${model_name_or_model_dir}" \
+++trust_remote_code=true \
+++train_data_set_list="${train_data}" \
+++valid_data_set_list="${val_data}" \
+++dataset_conf.data_split_num=1 \
+++dataset_conf.batch_sampler="BatchSampler" \
+++dataset_conf.batch_size=1200  \
+++dataset_conf.sort_size=1024 \
+++dataset_conf.batch_type="token" \
+++dataset_conf.num_workers=2 \
+++train_conf.max_epoch=500 \
+++train_conf.log_interval=1 \
+++train_conf.resume=true \
+++train_conf.validate_interval=2000 \
+++train_conf.save_checkpoint_interval=2000 \
+++train_conf.keep_nbest_models=1 \
+++train_conf.avg_nbest_model=0 \
+++train_conf.use_deepspeed=false \
+++train_conf.deepspeed_config=${deepspeed_config} \
+++optim_conf.lr=0.0002 \
+++output_dir="${output_dir}" &> ${log_file}