| Wav2vec2.0-xlsr-53 model is fine-tuned on commonvoice russian dataset | |
| Configs (yaml) | |
| checkpoint: | |
| save_interval: 1000 | |
| save_interval_updates: 1000 | |
| keep_interval_updates: 1 | |
| no_epoch_ckechpoints: true | |
| best_checkpoint_metric: wer | |
| task: | |
| _name: audio_finetuning | |
| normalize: true | |
| labels: phn | |
| dataset: | |
| num_workers: 6 | |
| max_tokens: 800000 | |
| skip_invalid_size_inputs_valid_test: true | |
| valid_subset: valie | |
| distributed_training: | |
| ddp_backend: legacy_ddp | |
| distributed_world_size: 4 | |
| criterion: | |
| _name: ctc | |
| zero_infinity: true | |
| optimization: | |
| max_update: 25000 | |
| lr: [0.00001] | |
| sentence_avg: true | |
| update_freq: [4] | |
| optimizer: | |
| _name: adam | |
| adam_betas: (0.9, 0.98) | |
| adam_eps: 1e-8 | |
| lr_scheduler: | |
| _name: tri_stage | |
| phase_ratio: [0.1, 0.4, 0.5] | |
| final_lr_scale: 0.05 | |
| model: | |
| _name: wav2vec_ctc | |
| apply_mask: true | |
| mask_prob: 0.5 | |
| mask_channel_prob: 0.1 | |
| mask_channel_length: 64 | |
| layerdrop: 0.1 | |
| activation_dropout: 0.1 | |
| feature_grad_mult: 0.0 | |
| freeze_finetune_updates: 0 |