namin72 commited on
Commit
d73aad3
·
verified ·
1 Parent(s): 2c0793e

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.yaml +100 -0
  2. finetune_ep500.sh +71 -0
config.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ encoder: SenseVoiceEncoderSmall
2
+ encoder_conf:
3
+ output_size: 512
4
+ attention_heads: 4
5
+ linear_units: 2048
6
+ num_blocks: 50
7
+ tp_blocks: 20
8
+ dropout_rate: 0.1
9
+ positional_dropout_rate: 0.1
10
+ attention_dropout_rate: 0.1
11
+ input_layer: pe
12
+ pos_enc_class: SinusoidalPositionEncoder
13
+ normalize_before: true
14
+ kernel_size: 11
15
+ sanm_shfit: 0
16
+ selfattention_layer_type: sanm
17
+ model: SenseVoiceSmall
18
+ model_conf:
19
+ length_normalized_loss: true
20
+ sos: 1
21
+ eos: 2
22
+ ignore_id: -1
23
+ tokenizer: SentencepiecesTokenizer
24
+ tokenizer_conf:
25
+ bpemodel: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/chn_jpn_yue_eng_ko_spectok.bpe.model
26
+ unk_symbol: <unk>
27
+ split_with_space: true
28
+ frontend: WavFrontend
29
+ frontend_conf:
30
+ fs: 16000
31
+ window: hamming
32
+ n_mels: 80
33
+ frame_length: 25
34
+ frame_shift: 10
35
+ lfr_m: 7
36
+ lfr_n: 6
37
+ cmvn_file: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/am.mvn
38
+ dataset: SenseVoiceCTCDataset
39
+ dataset_conf:
40
+ index_ds: IndexDSJsonl
41
+ batch_sampler: BatchSampler
42
+ data_split_num: 1
43
+ batch_type: token
44
+ batch_size: 1200
45
+ max_token_length: 2000
46
+ min_token_length: 60
47
+ max_source_length: 2000
48
+ min_source_length: 60
49
+ max_target_length: 200
50
+ min_target_length: 0
51
+ shuffle: true
52
+ num_workers: 2
53
+ sos: 1
54
+ eos: 2
55
+ IndexDSJsonl: IndexDSJsonl
56
+ retry: 20
57
+ sort_size: 1024
58
+ train_conf:
59
+ accum_grad: 1
60
+ grad_clip: 5
61
+ max_epoch: 500
62
+ keep_nbest_models: 1
63
+ avg_nbest_model: 0
64
+ log_interval: 1
65
+ resume: true
66
+ validate_interval: 2000
67
+ save_checkpoint_interval: 2000
68
+ use_deepspeed: false
69
+ deepspeed_config: /home/ubuntu/work/SenseVoice/deepspeed_conf/ds_stage1.json
70
+ optim: adamw
71
+ optim_conf:
72
+ lr: 0.0002
73
+ scheduler: warmuplr
74
+ scheduler_conf:
75
+ warmup_steps: 25000
76
+ specaug: SpecAugLFR
77
+ specaug_conf:
78
+ apply_time_warp: false
79
+ time_warp_window: 5
80
+ time_warp_mode: bicubic
81
+ apply_freq_mask: true
82
+ freq_mask_width_range:
83
+ - 0
84
+ - 30
85
+ lfr_rate: 6
86
+ num_freq_mask: 1
87
+ apply_time_mask: true
88
+ time_mask_width_range:
89
+ - 0
90
+ - 12
91
+ num_time_mask: 1
92
+ init_param: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/model.pt
93
+ config: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall/config.yaml
94
+ is_training: true
95
+ trust_remote_code: true
96
+ train_data_set_list: /home/ubuntu/work/SenseVoice/dataset/train_split.jsonl
97
+ valid_data_set_list: /home/ubuntu/work/SenseVoice/dataset/val.jsonl
98
+ output_dir: ./outputs_ep500
99
+ model_path: /home/ubuntu/.cache/modelscope/hub/models/iic/SenseVoiceSmall
100
+ device: cpu
finetune_ep500.sh ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
2
+ # MIT License (https://opensource.org/licenses/MIT)
3
+
4
+ workspace=`pwd`
5
+
6
+ # which gpu to train or finetune
7
+ export CUDA_VISIBLE_DEVICES="0"
8
+ gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
9
+
10
+ # model_name from model_hub, or model_dir in local path
11
+
12
+ ## option 1, download model automatically
13
+ model_name_or_model_dir="iic/SenseVoiceSmall"
14
+
15
+ ## option 2, download model by git
16
+ #local_path_root=${workspace}/modelscope_models
17
+ #mkdir -p ${local_path_root}/${model_name_or_model_dir}
18
+ #git clone https://www.modelscope.cn/${model_name_or_model_dir}.git ${local_path_root}/${model_name_or_model_dir}
19
+ #model_name_or_model_dir=${local_path_root}/${model_name_or_model_dir}
20
+
21
+
22
+ # data dir, which contains: train.json, val.json
23
+ train_data="/home/ubuntu/work/SenseVoice/dataset/train_split.jsonl"
24
+ val_data="/home/ubuntu/work/SenseVoice/dataset/val.jsonl"
25
+
26
+ # exp output dir
27
+ output_dir="./outputs_ep500"
28
+ log_file="${output_dir}/log.txt"
29
+
30
+ deepspeed_config=${workspace}/deepspeed_conf/ds_stage1.json
31
+
32
+ mkdir -p ${output_dir}
33
+ echo "log_file: ${log_file}"
34
+
35
+ DISTRIBUTED_ARGS="
36
+ --nnodes ${WORLD_SIZE:-1} \
37
+ --nproc_per_node $gpu_num \
38
+ --node_rank ${RANK:-0} \
39
+ --master_addr ${MASTER_ADDR:-127.0.0.1} \
40
+ --master_port ${MASTER_PORT:-26669}
41
+ "
42
+
43
+ echo $DISTRIBUTED_ARGS
44
+
45
+ # funasr trainer path
46
+ train_tool="/home/ubuntu/work/SenseVoice/FunASR/funasr/bin/train_ds.py"
47
+ echo "Using funasr trainer: ${train_tool}"
48
+
49
+ torchrun $DISTRIBUTED_ARGS \
50
+ ${train_tool} \
51
+ ++model="${model_name_or_model_dir}" \
52
+ ++trust_remote_code=true \
53
+ ++train_data_set_list="${train_data}" \
54
+ ++valid_data_set_list="${val_data}" \
55
+ ++dataset_conf.data_split_num=1 \
56
+ ++dataset_conf.batch_sampler="BatchSampler" \
57
+ ++dataset_conf.batch_size=1200 \
58
+ ++dataset_conf.sort_size=1024 \
59
+ ++dataset_conf.batch_type="token" \
60
+ ++dataset_conf.num_workers=2 \
61
+ ++train_conf.max_epoch=500 \
62
+ ++train_conf.log_interval=1 \
63
+ ++train_conf.resume=true \
64
+ ++train_conf.validate_interval=2000 \
65
+ ++train_conf.save_checkpoint_interval=2000 \
66
+ ++train_conf.keep_nbest_models=1 \
67
+ ++train_conf.avg_nbest_model=0 \
68
+ ++train_conf.use_deepspeed=false \
69
+ ++train_conf.deepspeed_config=${deepspeed_config} \
70
+ ++optim_conf.lr=0.0002 \
71
+ ++output_dir="${output_dir}" &> ${log_file}