update finetune script
Browse files
327M-uni-v2-dual-domain-mvq/finetune_rnnt_300m.sh
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
export PYTHONPATH=./../../../:$PYTHONPATH
|
| 4 |
+
|
| 5 |
+
# data related
|
| 6 |
+
use_librispeech=1
|
| 7 |
+
full_libri=0
|
| 8 |
+
|
| 9 |
+
causal=0
|
| 10 |
+
lr=0.045
|
| 11 |
+
|
| 12 |
+
# finetune checkpoint
|
| 13 |
+
do_finetune=1
|
| 14 |
+
finetune_ckpt=zipformer_audio_encoder/exp-316M-uniform-v2-zipformer-out-ds-2-lh-large-giga-xl-voxpopuli-1-as-full-x2-all-audio-w2v2-mask-p-0.65-l-10-cha-mask-p-0.25-l-20-musan-p-0.5-min-snr-10-multi-mvq-wavlm-all-wavlm-large-cb16-1.0-dasheng-cb8-0.1-md300/iter-496000-avg-4.pt
|
| 15 |
+
|
| 16 |
+
use_ctc=0
|
| 17 |
+
use_transducer=1
|
| 18 |
+
output_ds=2
|
| 19 |
+
post_output_ds=1
|
| 20 |
+
|
| 21 |
+
freeze_encoder=0
|
| 22 |
+
freeze_encoder_steps=2000
|
| 23 |
+
# freeze_encoder=1
|
| 24 |
+
# freeze_encoder_steps=-1
|
| 25 |
+
encoder_lr_scale=0.05
|
| 26 |
+
|
| 27 |
+
md=1000
|
| 28 |
+
|
| 29 |
+
exp_dir=zipformer_finetune/exp-finetune-rnnt-327M-multi-mvq-out-ds-2
|
| 30 |
+
|
| 31 |
+
echo $exp_dir
|
| 32 |
+
|
| 33 |
+
torchrun --nproc_per_node=2 --master_port=19291 \
|
| 34 |
+
zipformer_finetune/finetune_asr.py \
|
| 35 |
+
--num-epochs 30 \
|
| 36 |
+
--use-fp16 1 \
|
| 37 |
+
--start-epoch 1 \
|
| 38 |
+
--use-librispeech $use_librispeech --full-libri $full_libri \
|
| 39 |
+
--exp-dir $exp_dir \
|
| 40 |
+
--manifest-dir data/fbank \
|
| 41 |
+
--bpe-model data/lang_bpe_500/bpe.model \
|
| 42 |
+
--base-lr $lr \
|
| 43 |
+
--use-ctc $use_ctc --use-transducer $use_transducer \
|
| 44 |
+
--do-finetune $do_finetune --init-modules "encoder_embed,encoder" --finetune-ckpt $finetune_ckpt \
|
| 45 |
+
--freeze-encoder $freeze_encoder --freeze-encoder-steps $freeze_encoder_steps \
|
| 46 |
+
--encoder-lr-scale $encoder_lr_scale \
|
| 47 |
+
--causal $causal \
|
| 48 |
+
--downsampling-factor 1,2,4,8,4,2,1 \
|
| 49 |
+
--num-encoder-layers 1,2,2,3,1,1,1 \
|
| 50 |
+
--feedforward-dim 3072,3072,3072,3072,3072,3072,3072 \
|
| 51 |
+
--encoder-dim 1024,1024,1024,1024,1024,1024,1024 \
|
| 52 |
+
--encoder-unmasked-dim 512,512,512,512,512,512,512 \
|
| 53 |
+
--cnn-module-kernel 31,31,15,15,15,31,31 \
|
| 54 |
+
--num-heads 8,8,8,8,8,8,8 \
|
| 55 |
+
--output-downsampling-factor $output_ds \
|
| 56 |
+
--post-encoder-downsampling-factor $post_output_ds \
|
| 57 |
+
--on-the-fly-feats 1 \
|
| 58 |
+
--max-duration $md
|
| 59 |
+
|
| 60 |
+
for m in greedy_search modified_beam_search; do
|
| 61 |
+
for epoch in 23; do
|
| 62 |
+
for avg in 8; do
|
| 63 |
+
python zipformer_finetune/decode.py \
|
| 64 |
+
--epoch $epoch \
|
| 65 |
+
--avg $avg \
|
| 66 |
+
--manifest-dir data/fbank_librispeech \
|
| 67 |
+
--bpe-model data/lang_bpe_500/bpe.model \
|
| 68 |
+
--use-averaged-model 1 \
|
| 69 |
+
--downsampling-factor 1,2,4,8,4,2,1 \
|
| 70 |
+
--num-encoder-layers 1,2,2,3,1,1,1 \
|
| 71 |
+
--feedforward-dim 3072,3072,3072,3072,3072,3072,3072 \
|
| 72 |
+
--encoder-dim 1024,1024,1024,1024,1024,1024,1024 \
|
| 73 |
+
--encoder-unmasked-dim 512,512,512,512,512,512,512 \
|
| 74 |
+
--cnn-module-kernel 31,31,15,15,15,31,31 \
|
| 75 |
+
--num-heads 8,8,8,8,8,8,8 \
|
| 76 |
+
--use-ctc $use_ctc --use-transducer $use_transducer \
|
| 77 |
+
--output-downsampling-factor $output_ds \
|
| 78 |
+
--post-encoder-downsampling-factor $post_output_ds \
|
| 79 |
+
--on-the-fly-feats 1 \
|
| 80 |
+
--exp-dir $exp_dir \
|
| 81 |
+
--decoding-method $m \
|
| 82 |
+
--max-duration 1000
|
| 83 |
+
done
|
| 84 |
+
done
|
| 85 |
+
done
|
| 86 |
+
|
| 87 |
+
# rm $exp_dir/*.pt
|
| 88 |
+
|
| 89 |
+
echo "Done"
|