| [ | |
| 9216, | |
| 41, | |
| { | |
| "a_upsample_ratio": 1, | |
| "accum_grad": 2, | |
| "adim": 768, | |
| "aheads": 12, | |
| "apply_uttmvn": true, | |
| "aux_lsm_weight": 0.0, | |
| "backend": "pytorch", | |
| "badim": 320, | |
| "batch_bins": 0, | |
| "batch_count": "auto", | |
| "batch_frames_in": 0, | |
| "batch_frames_inout": 0, | |
| "batch_frames_out": 0, | |
| "bdropout_rate": 0.0, | |
| "beam_size": 4, | |
| "blayers": 2, | |
| "bnmask": 2, | |
| "bprojs": 300, | |
| "btype": "blstmp", | |
| "bunits": 300, | |
| "cnn_module_kernel": 31, | |
| "config2": null, | |
| "config3": null, | |
| "context_residual": false, | |
| "criterion": "acc", | |
| "ctc_type": "warpctc", | |
| "ctc_weight": 0.3, | |
| "debugmode": 1, | |
| "dec_init": null, | |
| "dec_init_mods": [ | |
| "att.", | |
| " dec." | |
| ], | |
| "dict": "data/lang_1char/units.txt", | |
| "dlayers": 6, | |
| "dropout_rate": 0.1, | |
| "dunits": 3072, | |
| "early_stop_criterion": "validation/main/acc", | |
| "elayers": 12, | |
| "enc_init": null, | |
| "enc_init_mods": [ | |
| "enc.enc." | |
| ], | |
| "eps": 1e-08, | |
| "eps_decay": 0.01, | |
| "eunits": 3072, | |
| "fbank_fmax": null, | |
| "fbank_fmin": 0.0, | |
| "fbank_fs": 16000, | |
| "grad_clip": 5.0, | |
| "grad_noise": false, | |
| "labels_type": "unigram5000", | |
| "lm_weight": 0.1, | |
| "lsm_weight": 0.1, | |
| "macaron_style": 1, | |
| "maxlen_in": 220, | |
| "maxlen_out": 220, | |
| "maxlenratio": 0.0, | |
| "minibatches": 0, | |
| "minlenratio": 0.0, | |
| "model_module": "espnet.nets.pytorch_backend.e2e_asr_transformer_multitask_dual:E2E", | |
| "mtl_custom_worker_l1_weight": 0.0, | |
| "mtl_custom_worker_length_normalized_loss": 0, | |
| "mtl_custom_worker_mlp_hdim": 256, | |
| "mtl_custom_worker_mlp_nlayers": 2, | |
| "mtl_custom_worker_mlp_nonlin_end": 0, | |
| "mtl_custom_worker_mlp_nonlin_type": "relu", | |
| "mtl_custom_worker_name": "patrickvonplaten/wav2vec2-base", | |
| "mtl_custom_worker_task_type": "", | |
| "mtl_custom_worker_tgt_type": "projected_quantized_states", | |
| "mtl_kl_weight": 0.0, | |
| "mtl_kl_weight_2": 0.0, | |
| "mtl_l1_weight": 0.4, | |
| "mtl_l1_weight_2": 0.4, | |
| "mtl_length_normalized_loss": 1, | |
| "mtl_length_normalized_loss_2": 1, | |
| "mtl_mlp_hdim": 256, | |
| "mtl_mlp_hdim_2": 256, | |
| "mtl_mlp_nlayers": 1, | |
| "mtl_mlp_nlayers_2": 1, | |
| "mtl_mlp_nonlin_end": 0, | |
| "mtl_mlp_nonlin_end_2": 0, | |
| "mtl_mlp_nonlin_type": "relu", | |
| "mtl_mlp_nonlin_type_2": "relu", | |
| "mtl_task_layer": "conformer6", | |
| "mtl_task_type": "l1", | |
| "mtl_task_type_2": "l1", | |
| "mtl_worker_source": "conv1d_lrs3_v04_lrs2", | |
| "mtl_worker_source_2": "conv3d_lrs3_v04_lrs2_dual", | |
| "mtlalpha": 0.1, | |
| "n_iter_processes": 12, | |
| "n_mels": 80, | |
| "nbest": 1, | |
| "ngpu": 1, | |
| "num_encs": 1, | |
| "num_input": 2, | |
| "num_save_attention": 3, | |
| "num_spkrs": 1, | |
| "opt": "noam", | |
| "patience": 0, | |
| "penalty": 0.0, | |
| "preprocess_conf": null, | |
| "pretrain_dataset": "lrs2_full_dual_ignore", | |
| "raw_max_freq_width": 150, | |
| "raw_max_speed_rate": 1.1, | |
| "raw_max_time_width": 0.4, | |
| "raw_min_speed_rate": 0.9, | |
| "raw_n_freq_mask": 2, | |
| "raw_n_time_mask": 2, | |
| "raw_speech_do_normalize": false, | |
| "ref_channel": -1, | |
| "rel_pos_type": "latest", | |
| "relu_type": "swish", | |
| "report_cer": false, | |
| "report_interval_iters": 100, | |
| "report_wer": false, | |
| "rnnlm": null, | |
| "rnnlm_conf": null, | |
| "save_interval_iters": 0, | |
| "seed": 1, | |
| "sortagrad": 0, | |
| "specaug_max_freq_width": 30, | |
| "specaug_max_time_warp": 5, | |
| "specaug_max_time_width": 40, | |
| "specaug_n_freq_mask": 2, | |
| "specaug_n_time_mask": 2, | |
| "sr_interp_mode": "nearest", | |
| "sr_interp_scale_factor": 1.0, | |
| "stats_file": null, | |
| "sym_blank": "<blank>", | |
| "sym_space": "<space>", | |
| "threshold": 0.0001, | |
| "train_dtype": "float32", | |
| "transformer_attn_dropout_rate": 0.1, | |
| "transformer_encoder_attn_layer_type": "rel_mha", | |
| "transformer_init": "pytorch", | |
| "transformer_input_layer": "conv3d", | |
| "transformer_length_normalized_loss": 0, | |
| "transformer_warmup_steps": 25000, | |
| "use_beamformer": true, | |
| "use_cnn_module": 1, | |
| "use_dnn_mask_for_wpe": false, | |
| "use_freqmask": false, | |
| "use_frontend": false, | |
| "use_noiseaug": false, | |
| "use_specaug": false, | |
| "use_speedaug": false, | |
| "use_timemask": false, | |
| "use_v_adaptive_timemask": true, | |
| "use_v_cutout": false, | |
| "use_v_timemask": false, | |
| "use_wpe": false, | |
| "uttmvn_norm_means": true, | |
| "uttmvn_norm_vars": false, | |
| "v_cutout_max_hole_length": 22, | |
| "v_cutout_n_holes": 1, | |
| "v_raw_max_time_width": 0.4, | |
| "v_raw_n_time_mask": 1, | |
| "v_timemask_replace_with_zero": false, | |
| "v_timemask_stride": 1.0, | |
| "verbose": 0, | |
| "wavaugments": null, | |
| "wdropout_rate": 0.0, | |
| "weight_decay": 0.0, | |
| "wlayers": 2, | |
| "wpe_delay": 3, | |
| "wpe_taps": 5, | |
| "wprojs": 300, | |
| "wtype": "blstmp", | |
| "wunits": 300, | |
| "zero_triu": false | |
| } | |
| ] |