| model: | |
| _target_: modules.gaudio.sv_encoder.SenseVoiceSmall | |
| specaug: | |
| _target_: modules.gaudio.sv_encoder.SpecAugLFR | |
| apply_freq_mask: true | |
| apply_time_mask: true | |
| apply_time_warp: false | |
| freq_mask_width_range: [0, 30] | |
| lfr_rate: 6 | |
| num_freq_mask: 1 | |
| num_time_mask: 1 | |
| time_mask_width_range: [0, 12] | |
| time_warp_mode: bicubic | |
| time_warp_window: 5 | |
| encoder: | |
| _target_: modules.gaudio.sv_encoder.SenseVoiceEncoderSmall | |
| attention_dropout_rate: 0.1 | |
| attention_heads: 4 | |
| dropout_rate: 0.1 | |
| kernel_size: 11 | |
| linear_units: 2048 | |
| normalize_before: true | |
| num_blocks: 50 | |
| output_size: 512 | |
| sanm_shfit: 0 | |
| tp_blocks: 20 | |
| input_size: 560 | |
| length_normalized_loss: true | |
| input_size: 560 | |
| vocab_size: 25055 | |
| sos: 1 | |
| eos: 2 | |
| ignore_id: -1 |