diff --git a/data_respin/bh/nlsyms.txt b/data_respin/bh/nlsyms.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01e79c32a8c99c557f0757da7cb6d65b3414466d
--- /dev/null
+++ b/data_respin/bh/nlsyms.txt
@@ -0,0 +1,3 @@
+1
+2
+3
diff --git a/data_respin/bn/nlsyms.txt b/data_respin/bn/nlsyms.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a1218a1024a212bb3db30becd860315f9f3ac52
--- /dev/null
+++ b/data_respin/bn/nlsyms.txt
@@ -0,0 +1,5 @@
+1
+2
+3
+4
+5
diff --git a/data_respin/ch/nlsyms.txt b/data_respin/ch/nlsyms.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94ebaf900161394059478fd88aec30e59092a1d7
--- /dev/null
+++ b/data_respin/ch/nlsyms.txt
@@ -0,0 +1,4 @@
+1
+2
+3
+4
diff --git a/data_respin/hi/nlsyms.txt b/data_respin/hi/nlsyms.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a1218a1024a212bb3db30becd860315f9f3ac52
--- /dev/null
+++ b/data_respin/hi/nlsyms.txt
@@ -0,0 +1,5 @@
+1
+2
+3
+4
+5
diff --git a/data_respin/kn/nlsyms.txt b/data_respin/kn/nlsyms.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a1218a1024a212bb3db30becd860315f9f3ac52
--- /dev/null
+++ b/data_respin/kn/nlsyms.txt
@@ -0,0 +1,5 @@
+1
+2
+3
+4
+5
diff --git a/data_respin/mg/nlsyms.txt b/data_respin/mg/nlsyms.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94ebaf900161394059478fd88aec30e59092a1d7
--- /dev/null
+++ b/data_respin/mg/nlsyms.txt
@@ -0,0 +1,4 @@
+1
+2
+3
+4
diff --git a/data_respin/mr/nlsyms.txt b/data_respin/mr/nlsyms.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94ebaf900161394059478fd88aec30e59092a1d7
--- /dev/null
+++ b/data_respin/mr/nlsyms.txt
@@ -0,0 +1,4 @@
+1
+2
+3
+4
diff --git a/data_respin/mt/nlsyms.txt b/data_respin/mt/nlsyms.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94ebaf900161394059478fd88aec30e59092a1d7
--- /dev/null
+++ b/data_respin/mt/nlsyms.txt
@@ -0,0 +1,4 @@
+1
+2
+3
+4
diff --git a/data_respin/te/nlsyms.txt b/data_respin/te/nlsyms.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94ebaf900161394059478fd88aec30e59092a1d7
--- /dev/null
+++ b/data_respin/te/nlsyms.txt
@@ -0,0 +1,4 @@
+1
+2
+3
+4
diff --git a/exp_small/exp_bh/README.md b/exp_small/exp_bh/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4a7f1ba5d38b7bed4e13f2e678052afe235051bc
--- /dev/null
+++ b/exp_small/exp_bh/README.md
@@ -0,0 +1,405 @@
+---
+tags:
+- espnet
+- audio
+- automatic-speech-recognition
+language: bh
+datasets:
+- respin_small
+license: cc-by-4.0
+---
+
+## ESPnet2 ASR model
+
+### `SpireLab/spire_respin_baselines_espnet`
+
+This model was trained by wtc7 using respin_small recipe in [espnet](https://github.com/espnet/espnet/).
+
+### Demo: How to use in ESPnet2
+
+Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
+if you haven't done that already.
+
+```bash
+cd espnet
+
+pip install -e .
+cd egs2/respin_small/asr1
+./run.sh --skip_data_prep false --skip_train true --download_model SpireLab/spire_respin_baselines_espnet
+```
+
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:31:02 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_bh|2220|22453|85.9|13.4|0.7|1.1|15.2|73.2|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_bh|2220|104745|96.8|2.1|1.1|1.2|4.4|73.2|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+
+## ASR config
+
+expand
+
+```
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_bh/asr_stats_raw_bh_char_sp/train/speech_shape
+- exp_small/exp_bh/asr_stats_raw_bh_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_bh/asr_stats_raw_bh_char_sp/valid/speech_shape
+- exp_small/exp_bh/asr_stats_raw_bh_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/bh/raw/train_bh_sp/wav.scp
+ - speech
+ - sound
+- - dump/bh/raw/train_bh_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/bh/raw/dev_bh/wav.scp
+ - speech
+ - sound
+- - dump/bh/raw/dev_bh/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ा
+- े
+- क
+- र
+- ल
+- स
+- न
+- म
+- त
+- ी
+- ि
+- ह
+- ब
+- ्
+- प
+- व
+- ज
+- ं
+- ो
+- द
+- ख
+- य
+- ग
+- ट
+- ु
+- अ
+- ई
+- इ
+- च
+- भ
+- आ
+- ू
+- उ
+- ए
+- श
+- ै
+- ध
+- ड
+- फ
+- ड़
+- ौ
+- .
+- छ
+- ण
+- ष
+- थ
+- ओ
+- ढ़
+- घ
+- ठ
+- ॉ
+- ृ
+- ढ
+- ऑ
+- ँ
+- ऊ
+- ऋ
+- औ
+- झ
+- ज़
+- फ़
+- ऐ
+- ञ
+- ऽ
+- ख़
+- क़
+- ़
+- ः
+- ॅ
+- ऱ
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
+```
+
+
+
+
+
+### Citing ESPnet
+
+```BibTex
+@inproceedings{watanabe2018espnet,
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
+ year={2018},
+ booktitle={Proceedings of Interspeech},
+ pages={2207--2211},
+ doi={10.21437/Interspeech.2018-1456},
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
+}
+
+
+
+
+
+
+```
+
+or arXiv:
+
+```bibtex
+@misc{watanabe2018espnet,
+ title={ESPnet: End-to-End Speech Processing Toolkit},
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ year={2018},
+ eprint={1804.00015},
+ archivePrefix={arXiv},
+ primaryClass={cs.CL}
+}
+```
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
new file mode 100644
index 0000000000000000000000000000000000000000..29432e31344892186a8e3fe1dead866bf574b4bb
--- /dev/null
+++ b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
@@ -0,0 +1,27 @@
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:31:02 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_bh|2220|22453|85.9|13.4|0.7|1.1|15.2|73.2|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_bh|2220|104745|96.8|2.1|1.1|1.2|4.4|73.2|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c42ee80bdb7b7f9f8b1e310d612c1c4bff7c3ed1
--- /dev/null
+++ b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
@@ -0,0 +1,304 @@
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_bh/asr_stats_raw_bh_char_sp/train/speech_shape
+- exp_small/exp_bh/asr_stats_raw_bh_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_bh/asr_stats_raw_bh_char_sp/valid/speech_shape
+- exp_small/exp_bh/asr_stats_raw_bh_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/bh/raw/train_bh_sp/wav.scp
+ - speech
+ - sound
+- - dump/bh/raw/train_bh_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/bh/raw/dev_bh/wav.scp
+ - speech
+ - sound
+- - dump/bh/raw/dev_bh/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ा
+- े
+- क
+- र
+- ल
+- स
+- न
+- म
+- त
+- ी
+- ि
+- ह
+- ब
+- ्
+- प
+- व
+- ज
+- ं
+- ो
+- द
+- ख
+- य
+- ग
+- ट
+- ु
+- अ
+- ई
+- इ
+- च
+- भ
+- आ
+- ू
+- उ
+- ए
+- श
+- ै
+- ध
+- ड
+- फ
+- ड़
+- ौ
+- .
+- छ
+- ण
+- ष
+- थ
+- ओ
+- ढ़
+- घ
+- ठ
+- ॉ
+- ृ
+- ढ
+- ऑ
+- ँ
+- ऊ
+- ऋ
+- औ
+- झ
+- ज़
+- फ़
+- ऐ
+- ञ
+- ऽ
+- ख़
+- क़
+- ़
+- ः
+- ॅ
+- ऱ
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png
new file mode 100644
index 0000000000000000000000000000000000000000..dc22c60ab4cbf3935986a5c6a147917f4e8cb5cc
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..40fd1461dcc4be23aa2044f2765381bd0babeb63
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png
new file mode 100644
index 0000000000000000000000000000000000000000..dc59263d59a6701554d4a5ce6ee97237eae220b0
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..33d9ebfa5934023b842edf43f3b4ffb2eb4ed1b2
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png
new file mode 100644
index 0000000000000000000000000000000000000000..b81631543c9df8fe6cb495a10c59e30554838552
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..a5e18032c65dd1d6fb4f824d537bfc53b41a05ff
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png
new file mode 100644
index 0000000000000000000000000000000000000000..312541f6d1ab2648d316caa3d54d4cd5c3bb6540
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png
new file mode 100644
index 0000000000000000000000000000000000000000..7ec0636e69cdb9c21e172502aeb675a16eea29fe
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..c152b6e98daeef099ebbe5696e9ba367e9f705a1
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..a7af94bc4b5beb29b3ea04870bf32902759e9d61
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png
new file mode 100644
index 0000000000000000000000000000000000000000..6f0fac44a8f46b0364360513015e6df8db00f9d7
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..fbc890f02e1f270d6708a171480231d515167e95
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png
new file mode 100644
index 0000000000000000000000000000000000000000..3a39b83490bd817b24edfca5aa4beff696d0912d
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png
new file mode 100644
index 0000000000000000000000000000000000000000..6a2382085022615669e2661b8a686fbaa7f4878a
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..0a710a8f7f3e91a8be08adbf358270fc236e6a90
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..c99ba5d9d1a0a392074b5f22b2e14dd2ea2f45b9
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png
new file mode 100644
index 0000000000000000000000000000000000000000..d1c5e28d40cdad9bf2d270f1a9a2143b02882475
Binary files /dev/null and b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png differ
diff --git a/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6da7f4ce30fdae34445cb63c812d7c0284276e9c
--- /dev/null
+++ b/exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fd89d459111cb4a487d4809ee4278936f8a8eeec09385890a3a4ac45df5eabe
+size 112628010
diff --git a/exp_small/exp_bh/meta.yaml b/exp_small/exp_bh/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..288a5f5427a15e62a608635e3ff2e16e743bd325
--- /dev/null
+++ b/exp_small/exp_bh/meta.yaml
@@ -0,0 +1,8 @@
+espnet: '202412'
+files:
+ asr_model_file: exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
+python: "3.8.10 (default, Mar 18 2025, 20:04:55) \n[GCC 9.4.0]"
+timestamp: 1748120464.687666
+torch: 2.3.0+cu121
+yaml_files:
+ asr_train_config: exp_small/exp_bh/asr_bh_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
diff --git a/exp_small/exp_bn/README.md b/exp_small/exp_bn/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..adabbdbc89b38ddc395a8d4317081cf5a4da2c6c
--- /dev/null
+++ b/exp_small/exp_bn/README.md
@@ -0,0 +1,399 @@
+---
+tags:
+- espnet
+- audio
+- automatic-speech-recognition
+language: bn
+datasets:
+- respin_small
+license: cc-by-4.0
+---
+
+## ESPnet2 ASR model
+
+### `SpireLab/spire_respin_baselines_espnet`
+
+This model was trained by wtc7 using respin_small recipe in [espnet](https://github.com/espnet/espnet/).
+
+### Demo: How to use in ESPnet2
+
+Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
+if you haven't done that already.
+
+```bash
+cd espnet
+
+pip install -e .
+cd egs2/respin_small/asr1
+./run.sh --skip_data_prep false --skip_train true --download_model SpireLab/spire_respin_baselines_espnet
+```
+
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:31:23 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_bn|2174|20534|86.3|12.6|1.1|1.2|15.0|65.7|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_bn|2174|114101|97.1|1.6|1.2|1.2|4.1|65.7|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+
+## ASR config
+
+expand
+
+```
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_bn/asr_stats_raw_bn_char_sp/train/speech_shape
+- exp_small/exp_bn/asr_stats_raw_bn_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_bn/asr_stats_raw_bn_char_sp/valid/speech_shape
+- exp_small/exp_bn/asr_stats_raw_bn_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/bn/raw/train_bn_sp/wav.scp
+ - speech
+ - sound
+- - dump/bn/raw/train_bn_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/bn/raw/dev_bn/wav.scp
+ - speech
+ - sound
+- - dump/bn/raw/dev_bn/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- া
+- ে
+- র
+- ক
+- ্
+- ি
+- ন
+- ব
+- ল
+- য
+- ম
+- স
+- ত
+- প
+- ট
+- য়
+- হ
+- ু
+- দ
+- ো
+- জ
+- ই
+- গ
+- চ
+- ছ
+- শ
+- আ
+- থ
+- ভ
+- এ
+- ষ
+- ধ
+- ী
+- উ
+- ফ
+- খ
+- ড
+- অ
+- ং
+- ও
+- ড়
+- ণ
+- ঙ
+- ঁ
+- ৃ
+- .
+- ঠ
+- ৈ
+- ূ
+- ৎ
+- ঞ
+- ঘ
+- ঋ
+- ঝ
+- ৌ
+- ঢ
+- ়
+- ঢ়
+- ঃ
+- ঊ
+- ঐ
+- ঔ
+- ঈ
+- ৠ
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
+```
+
+
+
+
+
+### Citing ESPnet
+
+```BibTex
+@inproceedings{watanabe2018espnet,
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
+ year={2018},
+ booktitle={Proceedings of Interspeech},
+ pages={2207--2211},
+ doi={10.21437/Interspeech.2018-1456},
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
+}
+
+
+
+
+
+
+```
+
+or arXiv:
+
+```bibtex
+@misc{watanabe2018espnet,
+ title={ESPnet: End-to-End Speech Processing Toolkit},
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ year={2018},
+ eprint={1804.00015},
+ archivePrefix={arXiv},
+ primaryClass={cs.CL}
+}
+```
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
new file mode 100644
index 0000000000000000000000000000000000000000..5443480e2fc4f35be2ef9d81d111f5e32b2e25eb
--- /dev/null
+++ b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
@@ -0,0 +1,27 @@
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:31:23 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_bn|2174|20534|86.3|12.6|1.1|1.2|15.0|65.7|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_bn|2174|114101|97.1|1.6|1.2|1.2|4.1|65.7|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1419716fd885920738c116fc7c1ee849eb74e46
--- /dev/null
+++ b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
@@ -0,0 +1,298 @@
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_bn/asr_stats_raw_bn_char_sp/train/speech_shape
+- exp_small/exp_bn/asr_stats_raw_bn_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_bn/asr_stats_raw_bn_char_sp/valid/speech_shape
+- exp_small/exp_bn/asr_stats_raw_bn_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/bn/raw/train_bn_sp/wav.scp
+ - speech
+ - sound
+- - dump/bn/raw/train_bn_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/bn/raw/dev_bn/wav.scp
+ - speech
+ - sound
+- - dump/bn/raw/dev_bn/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- া
+- ে
+- র
+- ক
+- ্
+- ি
+- ন
+- ব
+- ল
+- য
+- ম
+- স
+- ত
+- প
+- ট
+- য়
+- হ
+- ু
+- দ
+- ো
+- জ
+- ই
+- গ
+- চ
+- ছ
+- শ
+- আ
+- থ
+- ভ
+- এ
+- ষ
+- ধ
+- ী
+- উ
+- ফ
+- খ
+- ড
+- অ
+- ং
+- ও
+- ড়
+- ণ
+- ঙ
+- ঁ
+- ৃ
+- .
+- ঠ
+- ৈ
+- ূ
+- ৎ
+- ঞ
+- ঘ
+- ঋ
+- ঝ
+- ৌ
+- ঢ
+- ়
+- ঢ়
+- ঃ
+- ঊ
+- ঐ
+- ঔ
+- ঈ
+- ৠ
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png
new file mode 100644
index 0000000000000000000000000000000000000000..c761fbe1a4309c233b24d8cd38d16543fba7fbd5
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..94c807bae89326eaf580a4eac61973e5fe23032d
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png
new file mode 100644
index 0000000000000000000000000000000000000000..9aafd9120f877993d5f5ff68d611c9e735fb838f
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..699b37544ffd0dd1fe482621b16ce0c7bfc07359
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png
new file mode 100644
index 0000000000000000000000000000000000000000..d2d0e4b9388766a7180a30f0a3081b00b9a4911d
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..d791e037051df025937d1d85f2af2f7aa0ef9509
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png
new file mode 100644
index 0000000000000000000000000000000000000000..09524877336acf572b7df4c5baea5cdd617e8f1c
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png
new file mode 100644
index 0000000000000000000000000000000000000000..59010b2dc657cae10c879645d6f7d0558a0adf7d
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..cadabe11c0f7b67bcd1039b59b7dd1b8857f6ca1
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..0e12f4361516bbf9f589bfc99f3efd5cbe59e064
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png
new file mode 100644
index 0000000000000000000000000000000000000000..dc3acc2d53d8e956d9a16fae0c64d3eaaa347570
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..a5cbd3dcf98f9513d77f01d6049e33c34fd6281a
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png
new file mode 100644
index 0000000000000000000000000000000000000000..b029de85095e0969451e2904f8b15fe26c3dc86e
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png
new file mode 100644
index 0000000000000000000000000000000000000000..496ffa5921abf7e0f8c9087267850423f3ef10d8
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..21eaaf53111319a33c77fc648f9ba7b82a7ef183
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..cf37d9cd34f1e8dcb42f50ea99bbdf0f3d6886da
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png
new file mode 100644
index 0000000000000000000000000000000000000000..b5eb8ccd9e15d50ddb672c31a066a77e0b76e9b5
Binary files /dev/null and b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png differ
diff --git a/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
new file mode 100644
index 0000000000000000000000000000000000000000..610f235d38ff660978ffcdf57e9b51bc8707bb45
--- /dev/null
+++ b/exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:467a3d2b8252e0530c63dbc92ce05e8d96a104907690b7f43b618d82d0806530
+size 112609578
diff --git a/exp_small/exp_bn/meta.yaml b/exp_small/exp_bn/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..90ecdedf038e0a51058b482a5885978271ccdb51
--- /dev/null
+++ b/exp_small/exp_bn/meta.yaml
@@ -0,0 +1,8 @@
+espnet: '202412'
+files:
+ asr_model_file: exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
+python: "3.8.10 (default, Mar 18 2025, 20:04:55) \n[GCC 9.4.0]"
+timestamp: 1748120485.2726
+torch: 2.3.0+cu121
+yaml_files:
+ asr_train_config: exp_small/exp_bn/asr_bn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
diff --git a/exp_small/exp_ch/README.md b/exp_small/exp_ch/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bceb479e831397cf9188a3c7373890a3e402bc9a
--- /dev/null
+++ b/exp_small/exp_ch/README.md
@@ -0,0 +1,403 @@
+---
+tags:
+- espnet
+- audio
+- automatic-speech-recognition
+language: ch
+datasets:
+- respin_small
+license: cc-by-4.0
+---
+
+## ESPnet2 ASR model
+
+### `SpireLab/spire_respin_baselines_espnet`
+
+This model was trained by wtc7 using respin_small recipe in [espnet](https://github.com/espnet/espnet/).
+
+### Demo: How to use in ESPnet2
+
+Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
+if you haven't done that already.
+
+```bash
+cd espnet
+
+pip install -e .
+cd egs2/respin_small/asr1
+./run.sh --skip_data_prep false --skip_train true --download_model SpireLab/spire_respin_baselines_espnet
+```
+
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:31:44 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_ch|2234|27969|89.9|9.6|0.5|0.5|10.6|67.5|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_ch|2234|120476|97.7|1.4|0.9|0.8|3.1|67.5|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+
+## ASR config
+
+expand
+
+```
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_ch/asr_stats_raw_ch_char_sp/train/speech_shape
+- exp_small/exp_ch/asr_stats_raw_ch_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_ch/asr_stats_raw_ch_char_sp/valid/speech_shape
+- exp_small/exp_ch/asr_stats_raw_ch_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/ch/raw/train_ch_sp/wav.scp
+ - speech
+ - sound
+- - dump/ch/raw/train_ch_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/ch/raw/dev_ch/wav.scp
+ - speech
+ - sound
+- - dump/ch/raw/dev_ch/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- े
+- ा
+- क
+- र
+- न
+- ल
+- म
+- ह
+- स
+- ब
+- ो
+- ी
+- ि
+- त
+- ज
+- प
+- ं
+- थ
+- य
+- ग
+- द
+- व
+- ख
+- इ
+- ्
+- ु
+- अ
+- ट
+- च
+- ू
+- ए
+- उ
+- भ
+- घ
+- फ
+- आ
+- ड़
+- ध
+- ओ
+- ई
+- ड
+- छ
+- .
+- ँ
+- ै
+- ठ
+- ौ
+- झ
+- ढ़
+- श
+- ढ
+- ण
+- ऊ
+- ॉ
+- ऑ
+- ष
+- ऋ
+- ृ
+- ऐ
+- औ
+- फ़
+- ज़
+- ॅ
+- ः
+- क़
+- ख़
+- ञ
+- ़
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
+```
+
+
+
+
+
+### Citing ESPnet
+
+```BibTex
+@inproceedings{watanabe2018espnet,
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
+ year={2018},
+ booktitle={Proceedings of Interspeech},
+ pages={2207--2211},
+ doi={10.21437/Interspeech.2018-1456},
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
+}
+
+
+
+
+
+
+```
+
+or arXiv:
+
+```bibtex
+@misc{watanabe2018espnet,
+ title={ESPnet: End-to-End Speech Processing Toolkit},
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ year={2018},
+ eprint={1804.00015},
+ archivePrefix={arXiv},
+ primaryClass={cs.CL}
+}
+```
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
new file mode 100644
index 0000000000000000000000000000000000000000..97ebfed36379cbd0192a32b216b297c5dd66c61e
--- /dev/null
+++ b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
@@ -0,0 +1,27 @@
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:31:44 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_ch|2234|27969|89.9|9.6|0.5|0.5|10.6|67.5|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_ch|2234|120476|97.7|1.4|0.9|0.8|3.1|67.5|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..164c54c514159cf6f19a2f04bdd962f51681b6e2
--- /dev/null
+++ b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
@@ -0,0 +1,302 @@
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_ch/asr_stats_raw_ch_char_sp/train/speech_shape
+- exp_small/exp_ch/asr_stats_raw_ch_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_ch/asr_stats_raw_ch_char_sp/valid/speech_shape
+- exp_small/exp_ch/asr_stats_raw_ch_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/ch/raw/train_ch_sp/wav.scp
+ - speech
+ - sound
+- - dump/ch/raw/train_ch_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/ch/raw/dev_ch/wav.scp
+ - speech
+ - sound
+- - dump/ch/raw/dev_ch/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- े
+- ा
+- क
+- र
+- न
+- ल
+- म
+- ह
+- स
+- ब
+- ो
+- ी
+- ि
+- त
+- ज
+- प
+- ं
+- थ
+- य
+- ग
+- द
+- व
+- ख
+- इ
+- ्
+- ु
+- अ
+- ट
+- च
+- ू
+- ए
+- उ
+- भ
+- घ
+- फ
+- आ
+- ड़
+- ध
+- ओ
+- ई
+- ड
+- छ
+- .
+- ँ
+- ै
+- ठ
+- ौ
+- झ
+- ढ़
+- श
+- ढ
+- ण
+- ऊ
+- ॉ
+- ऑ
+- ष
+- ऋ
+- ृ
+- ऐ
+- औ
+- फ़
+- ज़
+- ॅ
+- ः
+- क़
+- ख़
+- ञ
+- ़
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png
new file mode 100644
index 0000000000000000000000000000000000000000..bfbf084b5c303f273307444359cf0dd20168d664
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..1b094e48f1a29ad262dac6aa5a89ab614c73d042
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png
new file mode 100644
index 0000000000000000000000000000000000000000..848ffbb2d15326a984c3dbd07c3d82bdba0789f2
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..a9abec601b57e3dafaa2c96414aea294da05cd40
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png
new file mode 100644
index 0000000000000000000000000000000000000000..8a093ef42413b9d7a2e4650edf2c4c75f1fda416
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..31b49bf044b74f35166e3af0e654457417b56f52
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png
new file mode 100644
index 0000000000000000000000000000000000000000..7c60ccde4d6b2d28ac7d96c6b06df7a0c335206a
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png
new file mode 100644
index 0000000000000000000000000000000000000000..4c0dfde15aed9ac245bc7f83ba81e9ee7c1c0d55
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..cf6014d74ff10b27a435333ea008ca92d248f41f
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..1c0e2dbd7a4d9d976ec4726169ac37167fe337b7
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png
new file mode 100644
index 0000000000000000000000000000000000000000..44996e5ca0f8c68524bdf1aed0837e6294f31831
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..712645eff6127fd9482bd78e5c1c17fcbd5171c6
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png
new file mode 100644
index 0000000000000000000000000000000000000000..1744bd0c2e066fbb9629485e030f736bb8a84728
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png
new file mode 100644
index 0000000000000000000000000000000000000000..027caa52a656255e509e1cc7513840c7b21b69ab
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..9161b26b2378a41744d3802d6285c0ac42455cc0
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..e6d25269beae8ee3ca94a4e0a1b06087fa3d2358
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png
new file mode 100644
index 0000000000000000000000000000000000000000..7b2b74114e9174431c33ecb0b392d7b4a672ff38
Binary files /dev/null and b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png differ
diff --git a/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e9ef22231446dec3bee3a1a764042e1ac46e8c90
--- /dev/null
+++ b/exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d390cb7554a32f765a545a74b322ba57f3f419944ba4e2e673fbe9a2252df35
+size 112621866
diff --git a/exp_small/exp_ch/meta.yaml b/exp_small/exp_ch/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..79f85afef90066535b36341cd3ed87957441d4f2
--- /dev/null
+++ b/exp_small/exp_ch/meta.yaml
@@ -0,0 +1,8 @@
+espnet: '202412'
+files:
+ asr_model_file: exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
+python: "3.8.10 (default, Mar 18 2025, 20:04:55) \n[GCC 9.4.0]"
+timestamp: 1748120505.893883
+torch: 2.3.0+cu121
+yaml_files:
+ asr_train_config: exp_small/exp_ch/asr_ch_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
diff --git a/exp_small/exp_hi/README.md b/exp_small/exp_hi/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..98ecfb80925b74d98cdd4edc7bdd456077505da6
--- /dev/null
+++ b/exp_small/exp_hi/README.md
@@ -0,0 +1,407 @@
+---
+tags:
+- espnet
+- audio
+- automatic-speech-recognition
+language: hi
+datasets:
+- respin_small
+license: cc-by-4.0
+---
+
+## ESPnet2 ASR model
+
+### `SpireLab/spire_respin_baselines_espnet`
+
+This model was trained by wtc7 using respin_small recipe in [espnet](https://github.com/espnet/espnet/).
+
+### Demo: How to use in ESPnet2
+
+Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
+if you haven't done that already.
+
+```bash
+cd espnet
+
+pip install -e .
+cd egs2/respin_small/asr1
+./run.sh --skip_data_prep false --skip_train true --download_model SpireLab/spire_respin_baselines_espnet
+```
+
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:32:04 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_hi|2288|24958|90.9|8.6|0.5|0.9|9.9|55.8|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_hi|2288|121598|97.6|1.6|0.8|0.8|3.1|55.8|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+
+## ASR config
+
+expand
+
+```
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_hi/asr_stats_raw_hi_char_sp/train/speech_shape
+- exp_small/exp_hi/asr_stats_raw_hi_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_hi/asr_stats_raw_hi_char_sp/valid/speech_shape
+- exp_small/exp_hi/asr_stats_raw_hi_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/hi/raw/train_hi_sp/wav.scp
+ - speech
+ - sound
+- - dump/hi/raw/train_hi_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/hi/raw/dev_hi/wav.scp
+ - speech
+ - sound
+- - dump/hi/raw/dev_hi/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ा
+- क
+- े
+- र
+- ्
+- त
+- स
+- ी
+- न
+- ह
+- ं
+- ि
+- म
+- ो
+- प
+- ै
+- ल
+- य
+- ज
+- ब
+- व
+- द
+- ग
+- ु
+- ट
+- ए
+- ू
+- श
+- च
+- भ
+- अ
+- ख
+- आ
+- ध
+- ड
+- फ
+- उ
+- ण
+- ई
+- ष
+- इ
+- थ
+- ौ
+- ड़
+- .
+- छ
+- औ
+- ॉ
+- ृ
+- ँ
+- झ
+- ऋ
+- घ
+- ओ
+- ढ़
+- ठ
+- ज़
+- ऑ
+- ऊ
+- ऐ
+- ञ
+- ़
+- फ़
+- ढ
+- ः
+- ख़
+- क़
+- ग़
+- ङ
+- ॅ
+- ऍ
+- ॠ
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
+```
+
+
+
+
+
+### Citing ESPnet
+
+```BibTex
+@inproceedings{watanabe2018espnet,
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
+ year={2018},
+ booktitle={Proceedings of Interspeech},
+ pages={2207--2211},
+ doi={10.21437/Interspeech.2018-1456},
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
+}
+
+
+
+
+
+
+```
+
+or arXiv:
+
+```bibtex
+@misc{watanabe2018espnet,
+ title={ESPnet: End-to-End Speech Processing Toolkit},
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ year={2018},
+ eprint={1804.00015},
+ archivePrefix={arXiv},
+ primaryClass={cs.CL}
+}
+```
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
new file mode 100644
index 0000000000000000000000000000000000000000..d9b1ec533a7c45e95b9cbf25af798acede7d73c5
--- /dev/null
+++ b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
@@ -0,0 +1,27 @@
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:32:04 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_hi|2288|24958|90.9|8.6|0.5|0.9|9.9|55.8|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_hi|2288|121598|97.6|1.6|0.8|0.8|3.1|55.8|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa7c07027e7334c2fb8744d3a49eda525dae5a56
--- /dev/null
+++ b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
@@ -0,0 +1,306 @@
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_hi/asr_stats_raw_hi_char_sp/train/speech_shape
+- exp_small/exp_hi/asr_stats_raw_hi_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_hi/asr_stats_raw_hi_char_sp/valid/speech_shape
+- exp_small/exp_hi/asr_stats_raw_hi_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/hi/raw/train_hi_sp/wav.scp
+ - speech
+ - sound
+- - dump/hi/raw/train_hi_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/hi/raw/dev_hi/wav.scp
+ - speech
+ - sound
+- - dump/hi/raw/dev_hi/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ा
+- क
+- े
+- र
+- ्
+- त
+- स
+- ी
+- न
+- ह
+- ं
+- ि
+- म
+- ो
+- प
+- ै
+- ल
+- य
+- ज
+- ब
+- व
+- द
+- ग
+- ु
+- ट
+- ए
+- ू
+- श
+- च
+- भ
+- अ
+- ख
+- आ
+- ध
+- ड
+- फ
+- उ
+- ण
+- ई
+- ष
+- इ
+- थ
+- ौ
+- ड़
+- .
+- छ
+- औ
+- ॉ
+- ृ
+- ँ
+- झ
+- ऋ
+- घ
+- ओ
+- ढ़
+- ठ
+- ज़
+- ऑ
+- ऊ
+- ऐ
+- ञ
+- ़
+- फ़
+- ढ
+- ः
+- ख़
+- क़
+- ग़
+- ङ
+- ॅ
+- ऍ
+- ॠ
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png
new file mode 100644
index 0000000000000000000000000000000000000000..5c0ef73d558be2399067bf5a4f2ea62302da5392
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..4c8036b12af6348cc35ffef657b3f24a9218090b
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png
new file mode 100644
index 0000000000000000000000000000000000000000..fbf788d83c6481ea93ee70d379678b8e2e0939f3
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..3e7c2b4dc07ae034b22acfdc01252285fdb731dd
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png
new file mode 100644
index 0000000000000000000000000000000000000000..169ae08d82771c6d63353abf3c38f4cac6681668
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..89526f703af4b81ad523c1bf38ebc8e50173fd64
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png
new file mode 100644
index 0000000000000000000000000000000000000000..e6b4017a8f72723c39880dd9ee8e5136586ac751
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png
new file mode 100644
index 0000000000000000000000000000000000000000..8e7e2cc9ea859d67480cc1a200c4f703cf334083
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..746aa2f896c0103b75daa8992166cbfa39f2ce2e
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..694494cd3f31e8240ad979d7e23bb83bbfae5645
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png
new file mode 100644
index 0000000000000000000000000000000000000000..d0ac1b132ca6a08b38aecfde073281de16143304
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..570a93a70151a6f4b5fbfdd8161d1ee2fcba9955
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png
new file mode 100644
index 0000000000000000000000000000000000000000..c9f642024c8051477bee8c112b214e76b25d3469
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png
new file mode 100644
index 0000000000000000000000000000000000000000..a880d202e71140f5c856f7fb37a0545528ed0a5f
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..f9ab20dac983b9a9f67f413907c759c12023d356
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..9c13238cfc4c34d6e59a552030f7d57030220a11
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png
new file mode 100644
index 0000000000000000000000000000000000000000..6963553e15222cf2b01dd5cf6e00e355f5561b3a
Binary files /dev/null and b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png differ
diff --git a/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2e8d30b66164d54693abc3909820c6005aefc66a
--- /dev/null
+++ b/exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed3a266a4fc66a62b01b7dbf24fa03a91928caa1da09db3ad226509befe7a106
+size 112634154
diff --git a/exp_small/exp_hi/meta.yaml b/exp_small/exp_hi/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bdf9ef1318f7bc80c34d07d1e43c36451f447d23
--- /dev/null
+++ b/exp_small/exp_hi/meta.yaml
@@ -0,0 +1,8 @@
+espnet: '202412'
+files:
+ asr_model_file: exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
+python: "3.8.10 (default, Mar 18 2025, 20:04:55) \n[GCC 9.4.0]"
+timestamp: 1748120526.738138
+torch: 2.3.0+cu121
+yaml_files:
+ asr_train_config: exp_small/exp_hi/asr_hi_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
diff --git a/exp_small/exp_kn/README.md b/exp_small/exp_kn/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..50b449d5ea3a82b71bd5988f3d7a5d6b5a522042
--- /dev/null
+++ b/exp_small/exp_kn/README.md
@@ -0,0 +1,400 @@
+---
+tags:
+- espnet
+- audio
+- automatic-speech-recognition
+language: kn
+datasets:
+- respin_small
+license: cc-by-4.0
+---
+
+## ESPnet2 ASR model
+
+### `SpireLab/spire_respin_baselines_espnet`
+
+This model was trained by wtc7 using respin_small recipe in [espnet](https://github.com/espnet/espnet/).
+
+### Demo: How to use in ESPnet2
+
+Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
+if you haven't done that already.
+
+```bash
+cd espnet
+
+pip install -e .
+cd egs2/respin_small/asr1
+./run.sh --skip_data_prep false --skip_train true --download_model SpireLab/spire_respin_baselines_espnet
+```
+
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:32:26 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_kn|2161|17676|77.6|20.8|1.6|2.1|24.5|73.5|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_kn|2161|126552|97.0|1.7|1.4|1.6|4.6|73.5|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+
+## ASR config
+
+expand
+
+```
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_kn/asr_stats_raw_kn_char_sp/train/speech_shape
+- exp_small/exp_kn/asr_stats_raw_kn_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_kn/asr_stats_raw_kn_char_sp/valid/speech_shape
+- exp_small/exp_kn/asr_stats_raw_kn_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/kn/raw/train_kn_sp/wav.scp
+ - speech
+ - sound
+- - dump/kn/raw/train_kn_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/kn/raw/dev_kn/wav.scp
+ - speech
+ - sound
+- - dump/kn/raw/dev_kn/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ್
+- ಿ
+- ಾ
+- ರ
+- ು
+- ನ
+- ಕ
+- ತ
+- ದ
+- ೆ
+- ಗ
+- ಸ
+- ಲ
+- ವ
+- ಯ
+- ಂ
+- ಮ
+- ಬ
+- ಳ
+- ಡ
+- ಟ
+- ಹ
+- ಪ
+- ೇ
+- ಅ
+- ೊ
+- ಣ
+- ೋ
+- ಜ
+- ಇ
+- ೂ
+- ಷ
+- ಚ
+- ೀ
+- ಎ
+- ಆ
+- ಶ
+- ೈ
+- ಧ
+- ಒ
+- ಭ
+- .
+- ಉ
+- ಫ
+- ಥ
+- ಖ
+- ೃ
+- ೌ
+- ಏ
+- ಐ
+- ಈ
+- ಠ
+- ಘ
+- ಛ
+- ಓ
+- ಔ
+- ಞ
+- ಊ
+- ಋ
+- ೕ
+- ಢ
+- ಃ
+- ಝ
+- ೖ
+- ೯
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
+```
+
+
+
+
+
+### Citing ESPnet
+
+```BibTex
+@inproceedings{watanabe2018espnet,
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
+ year={2018},
+ booktitle={Proceedings of Interspeech},
+ pages={2207--2211},
+ doi={10.21437/Interspeech.2018-1456},
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
+}
+
+
+
+
+
+
+```
+
+or arXiv:
+
+```bibtex
+@misc{watanabe2018espnet,
+ title={ESPnet: End-to-End Speech Processing Toolkit},
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ year={2018},
+ eprint={1804.00015},
+ archivePrefix={arXiv},
+ primaryClass={cs.CL}
+}
+```
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
new file mode 100644
index 0000000000000000000000000000000000000000..af7951cb73872344f7ddd53242c0adc91cb7015e
--- /dev/null
+++ b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
@@ -0,0 +1,27 @@
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:32:26 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_kn|2161|17676|77.6|20.8|1.6|2.1|24.5|73.5|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_kn|2161|126552|97.0|1.7|1.4|1.6|4.6|73.5|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bdebe2ab6834c3f0cc595c3ade7b75eb02c5e650
--- /dev/null
+++ b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
@@ -0,0 +1,299 @@
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_kn/asr_stats_raw_kn_char_sp/train/speech_shape
+- exp_small/exp_kn/asr_stats_raw_kn_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_kn/asr_stats_raw_kn_char_sp/valid/speech_shape
+- exp_small/exp_kn/asr_stats_raw_kn_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/kn/raw/train_kn_sp/wav.scp
+ - speech
+ - sound
+- - dump/kn/raw/train_kn_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/kn/raw/dev_kn/wav.scp
+ - speech
+ - sound
+- - dump/kn/raw/dev_kn/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ್
+- ಿ
+- ಾ
+- ರ
+- ು
+- ನ
+- ಕ
+- ತ
+- ದ
+- ೆ
+- ಗ
+- ಸ
+- ಲ
+- ವ
+- ಯ
+- ಂ
+- ಮ
+- ಬ
+- ಳ
+- ಡ
+- ಟ
+- ಹ
+- ಪ
+- ೇ
+- ಅ
+- ೊ
+- ಣ
+- ೋ
+- ಜ
+- ಇ
+- ೂ
+- ಷ
+- ಚ
+- ೀ
+- ಎ
+- ಆ
+- ಶ
+- ೈ
+- ಧ
+- ಒ
+- ಭ
+- .
+- ಉ
+- ಫ
+- ಥ
+- ಖ
+- ೃ
+- ೌ
+- ಏ
+- ಐ
+- ಈ
+- ಠ
+- ಘ
+- ಛ
+- ಓ
+- ಔ
+- ಞ
+- ಊ
+- ಋ
+- ೕ
+- ಢ
+- ಃ
+- ಝ
+- ೖ
+- ೯
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png
new file mode 100644
index 0000000000000000000000000000000000000000..404ab7423b5f6ba9145b7f648908eff36c46ecdd
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..380c5e3d75a8568bc1cc5ff422d9506fc17c9280
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png
new file mode 100644
index 0000000000000000000000000000000000000000..e364b9e2e5e120f9a1fe2932d458d2e8e0855211
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..1c5440092d9d068cd8d86f9af644b2ee97488874
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png
new file mode 100644
index 0000000000000000000000000000000000000000..b17832fb8a9cb57375cb58db2cab7b0a5fdbc6ff
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..f7dec6912950fc4812a9933ece7756226be2ab7a
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png
new file mode 100644
index 0000000000000000000000000000000000000000..5d50993d6634745f3644404154baf49814456b4e
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png
new file mode 100644
index 0000000000000000000000000000000000000000..3e66b751a8b717cf81b70eab5da1f7dfa28bd439
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..4cbe3226b95822cf699cf98ed50e0965d84636d3
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..97e54ed5f68b3d91289d2260b2728be2aff653b3
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png
new file mode 100644
index 0000000000000000000000000000000000000000..79653393d7eb0dd369163bd6f8a40940bbeb002d
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..58d6fbbd506c37e07579b6655c12e53b8f92d7a7
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png
new file mode 100644
index 0000000000000000000000000000000000000000..4c95273c182c35c95fc33db28759ea315f3e06d3
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png
new file mode 100644
index 0000000000000000000000000000000000000000..1686d4073754dda7739b49fc4e5bc4b26469e000
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..b62670f84553dbf3ce9937c22f61257aa6534415
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..2715f1b09549822dad5fca85ea5382c41bbd7d1f
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png
new file mode 100644
index 0000000000000000000000000000000000000000..fe4e66889f6fb6ea6afd35248ca3d2b072170a8e
Binary files /dev/null and b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png differ
diff --git a/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
new file mode 100644
index 0000000000000000000000000000000000000000..63876ae7ede45a0d2595193062b461d4c77b0819
--- /dev/null
+++ b/exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b582860d8b57d7b3c87ecabdde6d7ade849c21f5bca2a9c6e31f9c2fe0f35994
+size 112612650
diff --git a/exp_small/exp_kn/meta.yaml b/exp_small/exp_kn/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ced4626c0e42f79fdd6a1f3477521f2bb2babbfa
--- /dev/null
+++ b/exp_small/exp_kn/meta.yaml
@@ -0,0 +1,8 @@
+espnet: '202412'
+files:
+ asr_model_file: exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
+python: "3.8.10 (default, Mar 18 2025, 20:04:55) \n[GCC 9.4.0]"
+timestamp: 1748124996.523497
+torch: 2.3.0+cu121
+yaml_files:
+ asr_train_config: exp_small/exp_kn/asr_kn_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
diff --git a/exp_small/exp_mg/README.md b/exp_small/exp_mg/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b08e3a56b2a16a1c06e6ed97677fdd8be2d6133e
--- /dev/null
+++ b/exp_small/exp_mg/README.md
@@ -0,0 +1,406 @@
+---
+tags:
+- espnet
+- audio
+- automatic-speech-recognition
+language: mg
+datasets:
+- respin_small
+license: cc-by-4.0
+---
+
+## ESPnet2 ASR model
+
+### `SpireLab/spire_respin_baselines_espnet`
+
+This model was trained by wtc7 using respin_small recipe in [espnet](https://github.com/espnet/espnet/).
+
+### Demo: How to use in ESPnet2
+
+Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
+if you haven't done that already.
+
+```bash
+cd espnet
+
+pip install -e .
+cd egs2/respin_small/asr1
+./run.sh --skip_data_prep false --skip_train true --download_model SpireLab/spire_respin_baselines_espnet
+```
+
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:32:46 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mg|2193|22217|81.5|17.6|0.9|1.8|20.4|82.2|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mg|2193|105161|95.6|3.1|1.3|1.6|6.0|82.2|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+
+## ASR config
+
+expand
+
+```
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_mg/asr_stats_raw_mg_char_sp/train/speech_shape
+- exp_small/exp_mg/asr_stats_raw_mg_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_mg/asr_stats_raw_mg_char_sp/valid/speech_shape
+- exp_small/exp_mg/asr_stats_raw_mg_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/mg/raw/train_mg_sp/wav.scp
+ - speech
+ - sound
+- - dump/mg/raw/train_mg_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/mg/raw/dev_mg/wav.scp
+ - speech
+ - sound
+- - dump/mg/raw/dev_mg/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ा
+- क
+- े
+- र
+- ्
+- स
+- ल
+- न
+- ह
+- त
+- म
+- ि
+- ी
+- प
+- ो
+- ब
+- य
+- ं
+- व
+- ज
+- द
+- ग
+- इ
+- ट
+- ु
+- ई
+- ै
+- ख
+- च
+- छ
+- ू
+- श
+- भ
+- अ
+- आ
+- ध
+- ए
+- ड
+- उ
+- फ
+- ष
+- ण
+- थ
+- ड़
+- ौ
+- .
+- ऽ
+- ृ
+- ॉ
+- औ
+- ढ़
+- घ
+- ठ
+- ँ
+- ओ
+- ऋ
+- ऑ
+- ऊ
+- झ
+- ज़
+- ढ
+- ऐ
+- फ़
+- ञ
+- ः
+- ख़
+- क़
+- ङ
+- ग़
+- ऍ
+- ॅ
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
+```
+
+
+
+
+
+### Citing ESPnet
+
+```BibTex
+@inproceedings{watanabe2018espnet,
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
+ year={2018},
+ booktitle={Proceedings of Interspeech},
+ pages={2207--2211},
+ doi={10.21437/Interspeech.2018-1456},
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
+}
+
+
+
+
+
+
+```
+
+or arXiv:
+
+```bibtex
+@misc{watanabe2018espnet,
+ title={ESPnet: End-to-End Speech Processing Toolkit},
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ year={2018},
+ eprint={1804.00015},
+ archivePrefix={arXiv},
+ primaryClass={cs.CL}
+}
+```
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
new file mode 100644
index 0000000000000000000000000000000000000000..7155c46bb2735ac572ae7cda24505b6a88067f73
--- /dev/null
+++ b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
@@ -0,0 +1,27 @@
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:32:46 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mg|2193|22217|81.5|17.6|0.9|1.8|20.4|82.2|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mg|2193|105161|95.6|3.1|1.3|1.6|6.0|82.2|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d76be829e5acdd299fa4870cd35bf4bd1964f03e
--- /dev/null
+++ b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
@@ -0,0 +1,305 @@
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_mg/asr_stats_raw_mg_char_sp/train/speech_shape
+- exp_small/exp_mg/asr_stats_raw_mg_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_mg/asr_stats_raw_mg_char_sp/valid/speech_shape
+- exp_small/exp_mg/asr_stats_raw_mg_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/mg/raw/train_mg_sp/wav.scp
+ - speech
+ - sound
+- - dump/mg/raw/train_mg_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/mg/raw/dev_mg/wav.scp
+ - speech
+ - sound
+- - dump/mg/raw/dev_mg/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ा
+- क
+- े
+- र
+- ्
+- स
+- ल
+- न
+- ह
+- त
+- म
+- ि
+- ी
+- प
+- ो
+- ब
+- य
+- ं
+- व
+- ज
+- द
+- ग
+- इ
+- ट
+- ु
+- ई
+- ै
+- ख
+- च
+- छ
+- ू
+- श
+- भ
+- अ
+- आ
+- ध
+- ए
+- ड
+- उ
+- फ
+- ष
+- ण
+- थ
+- ड़
+- ौ
+- .
+- ऽ
+- ृ
+- ॉ
+- औ
+- ढ़
+- घ
+- ठ
+- ँ
+- ओ
+- ऋ
+- ऑ
+- ऊ
+- झ
+- ज़
+- ढ
+- ऐ
+- फ़
+- ञ
+- ः
+- ख़
+- क़
+- ङ
+- ग़
+- ऍ
+- ॅ
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png
new file mode 100644
index 0000000000000000000000000000000000000000..a636cb7beeb95eaa1131f8c7bf2fa89076997153
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..4553580d9a207542eb8045db21712ed976ee6a88
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png
new file mode 100644
index 0000000000000000000000000000000000000000..63c11621f6658b26ef0ecb54c9712026d04291ae
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..1fb4126990ab5432b39e04d169f7a0d57883ba98
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png
new file mode 100644
index 0000000000000000000000000000000000000000..b17832fb8a9cb57375cb58db2cab7b0a5fdbc6ff
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..36af10f66129f167bb2bc623cde101dee6f9872e
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png
new file mode 100644
index 0000000000000000000000000000000000000000..05b8d531220bbf8e2d90f3803b482b9265be8fcc
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png
new file mode 100644
index 0000000000000000000000000000000000000000..d25cf17651c8070879fc0cafa0b80f0cba72763f
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..49f200cdc05d8dab4efa9968298e5bbfd1ab5324
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..6e80d0336f4fdc3f4f99135ed7702fb4af7b1b22
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png
new file mode 100644
index 0000000000000000000000000000000000000000..26b26416c7f8d880d7b990ef4cb6e837ce7917eb
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..80ec6708e26177481a9478731eabcb33fc0e4c96
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png
new file mode 100644
index 0000000000000000000000000000000000000000..b6a82d2cb5e76448e7cc9245bbeba7f352f6ce5d
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png
new file mode 100644
index 0000000000000000000000000000000000000000..6c710a2f158b9a717c784df90ee3c3df4fd3a33b
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..d8bb04294da9e38d85db76de2e7ee94b7fd1d196
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..6e7c58d345e72767884db3a104c603a18cf030bb
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png
new file mode 100644
index 0000000000000000000000000000000000000000..1e1f03905865d525918f57e57e464dc969e7c8ab
Binary files /dev/null and b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png differ
diff --git a/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bd91ab36763ec3f1f3855d117357552c3ba7c2e4
--- /dev/null
+++ b/exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9d0342d8e5a783c57d583c3e3737dc9711225d1aba758db3cac9698c81d2645
+size 112631082
diff --git a/exp_small/exp_mg/meta.yaml b/exp_small/exp_mg/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b97ba0229dbb4cac4ecbf0c373dfcb159ce61dd7
--- /dev/null
+++ b/exp_small/exp_mg/meta.yaml
@@ -0,0 +1,8 @@
+espnet: '202412'
+files:
+ asr_model_file: exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
+python: "3.8.10 (default, Mar 18 2025, 20:04:55) \n[GCC 9.4.0]"
+timestamp: 1748120568.092736
+torch: 2.3.0+cu121
+yaml_files:
+ asr_train_config: exp_small/exp_mg/asr_mg_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
diff --git a/exp_small/exp_mr/README.md b/exp_small/exp_mr/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a9a2947ee8f203951a8ee411b930e6e28995cd87
--- /dev/null
+++ b/exp_small/exp_mr/README.md
@@ -0,0 +1,404 @@
+---
+tags:
+- espnet
+- audio
+- automatic-speech-recognition
+language: mr
+datasets:
+- respin_small
+license: cc-by-4.0
+---
+
+## ESPnet2 ASR model
+
+### `SpireLab/spire_respin_baselines_espnet`
+
+This model was trained by wtc7 using respin_small recipe in [espnet](https://github.com/espnet/espnet/).
+
+### Demo: How to use in ESPnet2
+
+Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
+if you haven't done that already.
+
+```bash
+cd espnet
+
+pip install -e .
+cd egs2/respin_small/asr1
+./run.sh --skip_data_prep false --skip_train true --download_model SpireLab/spire_respin_baselines_espnet
+```
+
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:33:06 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mr|2170|17526|86.7|12.2|1.1|1.1|14.5|57.9|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mr|2170|108873|97.8|1.4|0.8|0.9|3.1|57.9|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+
+## ASR config
+
+expand
+
+```
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_mr/asr_stats_raw_mr_char_sp/train/speech_shape
+- exp_small/exp_mr/asr_stats_raw_mr_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_mr/asr_stats_raw_mr_char_sp/valid/speech_shape
+- exp_small/exp_mr/asr_stats_raw_mr_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/mr/raw/train_mr_sp/wav.scp
+ - speech
+ - sound
+- - dump/mr/raw/train_mr_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/mr/raw/dev_mr/wav.scp
+ - speech
+ - sound
+- - dump/mr/raw/dev_mr/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ा
+- ्
+- र
+- त
+- क
+- े
+- य
+- ी
+- स
+- न
+- व
+- ल
+- म
+- ि
+- प
+- च
+- ं
+- ह
+- ो
+- ज
+- ण
+- द
+- आ
+- ग
+- श
+- ब
+- ु
+- ट
+- ू
+- ड
+- ध
+- अ
+- ख
+- ठ
+- ळ
+- भ
+- ष
+- फ
+- उ
+- ए
+- थ
+- .
+- घ
+- झ
+- ँ
+- ै
+- ई
+- ढ
+- इ
+- ॉ
+- ऊ
+- ॅ
+- ृ
+- ऑ
+- ऱ
+- ओ
+- ौ
+- छ
+- ञ
+- औ
+- ॲ
+- ः
+- ऐ
+- ऍ
+- ऋ
+- ़
+- ':'
+- ड़
+- फ़
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
+```
+
+
+
+
+
+### Citing ESPnet
+
+```BibTex
+@inproceedings{watanabe2018espnet,
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
+ year={2018},
+ booktitle={Proceedings of Interspeech},
+ pages={2207--2211},
+ doi={10.21437/Interspeech.2018-1456},
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
+}
+
+
+
+
+
+
+```
+
+or arXiv:
+
+```bibtex
+@misc{watanabe2018espnet,
+ title={ESPnet: End-to-End Speech Processing Toolkit},
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ year={2018},
+ eprint={1804.00015},
+ archivePrefix={arXiv},
+ primaryClass={cs.CL}
+}
+```
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
new file mode 100644
index 0000000000000000000000000000000000000000..fc92c324c8070c96df9b05f3e74904985d76b43b
--- /dev/null
+++ b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
@@ -0,0 +1,27 @@
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:33:06 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mr|2170|17526|86.7|12.2|1.1|1.1|14.5|57.9|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mr|2170|108873|97.8|1.4|0.8|0.9|3.1|57.9|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3bf4668186b3c43b9ecff1432f3249f2dfc9620
--- /dev/null
+++ b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
@@ -0,0 +1,303 @@
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_mr/asr_stats_raw_mr_char_sp/train/speech_shape
+- exp_small/exp_mr/asr_stats_raw_mr_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_mr/asr_stats_raw_mr_char_sp/valid/speech_shape
+- exp_small/exp_mr/asr_stats_raw_mr_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/mr/raw/train_mr_sp/wav.scp
+ - speech
+ - sound
+- - dump/mr/raw/train_mr_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/mr/raw/dev_mr/wav.scp
+ - speech
+ - sound
+- - dump/mr/raw/dev_mr/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ा
+- ्
+- र
+- त
+- क
+- े
+- य
+- ी
+- स
+- न
+- व
+- ल
+- म
+- ि
+- प
+- च
+- ं
+- ह
+- ो
+- ज
+- ण
+- द
+- आ
+- ग
+- श
+- ब
+- ु
+- ट
+- ू
+- ड
+- ध
+- अ
+- ख
+- ठ
+- ळ
+- भ
+- ष
+- फ
+- उ
+- ए
+- थ
+- .
+- घ
+- झ
+- ँ
+- ै
+- ई
+- ढ
+- इ
+- ॉ
+- ऊ
+- ॅ
+- ृ
+- ऑ
+- ऱ
+- ओ
+- ौ
+- छ
+- ञ
+- औ
+- ॲ
+- ः
+- ऐ
+- ऍ
+- ऋ
+- ़
+- ':'
+- ड़
+- फ़
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png
new file mode 100644
index 0000000000000000000000000000000000000000..cc5b2ce43052b59baf99f2484f3aac853bf9908a
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..f8db215f4c8d465ca7afdf6bf2ed710360961ad6
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png
new file mode 100644
index 0000000000000000000000000000000000000000..c4f2797022bf774763c4ee97a9c87dcc6cf28786
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..0acb6cbc7799996cf994fb18be4a0833bd6b195c
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png
new file mode 100644
index 0000000000000000000000000000000000000000..b17832fb8a9cb57375cb58db2cab7b0a5fdbc6ff
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..7578383cfb7879d6feae0507e7c89a4cf4e4f1b7
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png
new file mode 100644
index 0000000000000000000000000000000000000000..9da5227d2cbff13011688245058cc25fb71794b3
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png
new file mode 100644
index 0000000000000000000000000000000000000000..3c3f72e7deee0bb4eb4d18d8eabf9127b2d13671
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..f0a0fe70a42531c96082bb8ef674472db9f1038e
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..1651a35f36659f404abfe7860e5e859d72854ab8
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png
new file mode 100644
index 0000000000000000000000000000000000000000..72b7a93e26b77ddd3225483fd847b664f3db3bf2
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..998722098dab63a884f4838453422a381017a746
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png
new file mode 100644
index 0000000000000000000000000000000000000000..31ae7789ef22db0a099e21a8dadad9fb265b86d7
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png
new file mode 100644
index 0000000000000000000000000000000000000000..1b3f78f633044decf897e7f3354a26ff8be51926
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..47d6dae6c135dca568ce77a15f00b983d34d5d18
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ded9f41cde4517616e2d906323bd017c1e6f530
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png
new file mode 100644
index 0000000000000000000000000000000000000000..531a2c555e64a3cd88943d8aabf40281b624fde1
Binary files /dev/null and b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png differ
diff --git a/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
new file mode 100644
index 0000000000000000000000000000000000000000..316d2e4d9ed27283b793467aadedc55111f3c0ea
--- /dev/null
+++ b/exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:142cdd748248259de4addba7ff2b17b2b576a3cc7aed1ea6f399bf0cc6a560eb
+size 112624938
diff --git a/exp_small/exp_mr/meta.yaml b/exp_small/exp_mr/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a343d6f6cd70353b600ba98492da0ca70940f2b2
--- /dev/null
+++ b/exp_small/exp_mr/meta.yaml
@@ -0,0 +1,8 @@
+espnet: '202412'
+files:
+ asr_model_file: exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
+python: "3.8.10 (default, Mar 18 2025, 20:04:55) \n[GCC 9.4.0]"
+timestamp: 1748120588.150078
+torch: 2.3.0+cu121
+yaml_files:
+ asr_train_config: exp_small/exp_mr/asr_mr_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
diff --git a/exp_small/exp_mt/README.md b/exp_small/exp_mt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f7d10a0173642c0edbd08ba072f6a384987ef004
--- /dev/null
+++ b/exp_small/exp_mt/README.md
@@ -0,0 +1,405 @@
+---
+tags:
+- espnet
+- audio
+- automatic-speech-recognition
+language: mt
+datasets:
+- respin_small
+license: cc-by-4.0
+---
+
+## ESPnet2 ASR model
+
+### `SpireLab/spire_respin_baselines_espnet`
+
+This model was trained by wtc7 using respin_small recipe in [espnet](https://github.com/espnet/espnet/).
+
+### Demo: How to use in ESPnet2
+
+Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
+if you haven't done that already.
+
+```bash
+cd espnet
+
+pip install -e .
+cd egs2/respin_small/asr1
+./run.sh --skip_data_prep false --skip_train true --download_model SpireLab/spire_respin_baselines_espnet
+```
+
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:33:26 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mt|2172|22835|83.1|16.2|0.7|1.0|17.9|78.2|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mt|2172|107571|96.2|2.6|1.2|1.2|5.0|78.2|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+
+## ASR config
+
+expand
+
+```
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e12_mactrue_edrop0.0_ddrop0.0.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 4
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 10
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 10
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 4
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 12000000
+valid_batch_bins: null
+train_shape_file:
+- exp_small/exp_mt/asr_stats_raw_mt_char_sp/train/speech_shape
+- exp_small/exp_mt/asr_stats_raw_mt_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_mt/asr_stats_raw_mt_char_sp/valid/speech_shape
+- exp_small/exp_mt/asr_stats_raw_mt_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/mt/raw/train_mt_sp/wav.scp
+ - speech
+ - sound
+- - dump/mt/raw/train_mt_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/mt/raw/dev_mt/wav.scp
+ - speech
+ - sound
+- - dump/mt/raw/dev_mt/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ा
+- क
+- र
+- े
+- ्
+- ल
+- स
+- न
+- ि
+- त
+- म
+- ै
+- ी
+- य
+- प
+- ो
+- ब
+- छ
+- ह
+- ज
+- ं
+- व
+- द
+- ग
+- ु
+- ट
+- इ
+- अ
+- भ
+- ख
+- आ
+- श
+- च
+- ए
+- ू
+- ध
+- उ
+- ण
+- ँ
+- ष
+- फ
+- ड
+- थ
+- ड़
+- .
+- ई
+- ृ
+- ौ
+- ॅ
+- ओ
+- ऋ
+- घ
+- ढ़
+- ठ
+- ॉ
+- ऽ
+- ऑ
+- झ
+- ऊ
+- औ
+- ञ
+- ढ
+- ः
+- ऐ
+- फ़
+- ज़
+- ॠ
+- ख़
+- क़
+- ङ
+- ग़
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 12
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202402'
+distributed: false
+```
+
+
+
+
+
+### Citing ESPnet
+
+```BibTex
+@inproceedings{watanabe2018espnet,
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
+ year={2018},
+ booktitle={Proceedings of Interspeech},
+ pages={2207--2211},
+ doi={10.21437/Interspeech.2018-1456},
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
+}
+
+
+
+
+
+
+```
+
+or arXiv:
+
+```bibtex
+@misc{watanabe2018espnet,
+ title={ESPnet: End-to-End Speech Processing Toolkit},
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ year={2018},
+ eprint={1804.00015},
+ archivePrefix={arXiv},
+ primaryClass={cs.CL}
+}
+```
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
new file mode 100644
index 0000000000000000000000000000000000000000..5614c6164c85cf60ece3994b94da219fe6862e06
--- /dev/null
+++ b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
@@ -0,0 +1,27 @@
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:33:26 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mt|2172|22835|83.1|16.2|0.7|1.0|17.9|78.2|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_mt|2172|107571|96.2|2.6|1.2|1.2|5.0|78.2|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..133b5df8c68031a0aa8584bc2fd151cf97f49b0f
--- /dev/null
+++ b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
@@ -0,0 +1,304 @@
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e12_mactrue_edrop0.0_ddrop0.0.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 4
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 10
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 10
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 4
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 12000000
+valid_batch_bins: null
+train_shape_file:
+- exp_small/exp_mt/asr_stats_raw_mt_char_sp/train/speech_shape
+- exp_small/exp_mt/asr_stats_raw_mt_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_mt/asr_stats_raw_mt_char_sp/valid/speech_shape
+- exp_small/exp_mt/asr_stats_raw_mt_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/mt/raw/train_mt_sp/wav.scp
+ - speech
+ - sound
+- - dump/mt/raw/train_mt_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/mt/raw/dev_mt/wav.scp
+ - speech
+ - sound
+- - dump/mt/raw/dev_mt/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ा
+- क
+- र
+- े
+- ्
+- ल
+- स
+- न
+- ि
+- त
+- म
+- ै
+- ी
+- य
+- प
+- ो
+- ब
+- छ
+- ह
+- ज
+- ं
+- व
+- द
+- ग
+- ु
+- ट
+- इ
+- अ
+- भ
+- ख
+- आ
+- श
+- च
+- ए
+- ू
+- ध
+- उ
+- ण
+- ँ
+- ष
+- फ
+- ड
+- थ
+- ड़
+- .
+- ई
+- ृ
+- ौ
+- ॅ
+- ओ
+- ऋ
+- घ
+- ढ़
+- ठ
+- ॉ
+- ऽ
+- ऑ
+- झ
+- ऊ
+- औ
+- ञ
+- ढ
+- ः
+- ऐ
+- फ़
+- ज़
+- ॠ
+- ख़
+- क़
+- ङ
+- ग़
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 12
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202402'
+distributed: false
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png
new file mode 100644
index 0000000000000000000000000000000000000000..460c129ca5b750ffd7c92e6f0a17639bc278dcc3
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..b0bacf4137d8261a805cbdb16665bc3d42d16af7
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png
new file mode 100644
index 0000000000000000000000000000000000000000..0b2f641a2b5dc9d93a476f2763139d122a604bfd
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..6e0ab7725bee495a752761ffdc435eec3fcbb2ea
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png
new file mode 100644
index 0000000000000000000000000000000000000000..d401ae9382d0d4d9bd4df70ff89e4031088552da
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..c13d2e4b0300fc1edc6d58b5e45a1b824ff38af2
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png
new file mode 100644
index 0000000000000000000000000000000000000000..ef8b2dfb2439662b08aca95573e331b11e7bcfbc
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png
new file mode 100644
index 0000000000000000000000000000000000000000..e9c4988b8a5bdb240f32c7a912f4f054a50c420f
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..add98ff1836b8222f70398ce8316b54d8e7f3fe4
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..fa04675541cd6bf6af3fab670abb78ab05cf3686
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png
new file mode 100644
index 0000000000000000000000000000000000000000..aa4e2cbc9a41a9176296acd6894894f33872f4a6
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..006414d983fb264a9cc34ab9893a5578287a8d24
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png
new file mode 100644
index 0000000000000000000000000000000000000000..90a7484646c772feedfa74d8692f0306e056d481
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png
new file mode 100644
index 0000000000000000000000000000000000000000..9edeaad2d808bbd5ba5ed4100ea8b3a228129ceb
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..f0ab54d792e5a369b15804dcaafeac0f35ff8ad6
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..139426d9fb6d2825d7fc41c7605f422988a91347
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png
new file mode 100644
index 0000000000000000000000000000000000000000..083223a47969bf2d021bd1c4de421fd8e07069a8
Binary files /dev/null and b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png differ
diff --git a/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_10best.pth b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_10best.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e758ef684d45b6a8493ba36239d2b4564e23ab98
--- /dev/null
+++ b/exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_10best.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58aa3e35f6fe4c1981fc160bd3d8a1ff0c12fbe64794fca806c5a597f9565be1
+size 139056430
diff --git a/exp_small/exp_mt/meta.yaml b/exp_small/exp_mt/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8c70b4a0d65850f93eea3ba9a8842d87de374c14
--- /dev/null
+++ b/exp_small/exp_mt/meta.yaml
@@ -0,0 +1,8 @@
+espnet: '202412'
+files:
+ asr_model_file: exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_10best.pth
+python: "3.8.10 (default, Mar 18 2025, 20:04:55) \n[GCC 9.4.0]"
+timestamp: 1748120608.234658
+torch: 2.3.0+cu121
+yaml_files:
+ asr_train_config: exp_small/exp_mt/asr_mt_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
diff --git a/exp_small/exp_te/README.md b/exp_small/exp_te/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a8ccbe91407709b5ca89a18a20b2eca286eb02ef
--- /dev/null
+++ b/exp_small/exp_te/README.md
@@ -0,0 +1,399 @@
+---
+tags:
+- espnet
+- audio
+- automatic-speech-recognition
+language: te
+datasets:
+- respin_small
+license: cc-by-4.0
+---
+
+## ESPnet2 ASR model
+
+### `SpireLab/spire_respin_baselines_espnet`
+
+This model was trained by wtc7 using respin_small recipe in [espnet](https://github.com/espnet/espnet/).
+
+### Demo: How to use in ESPnet2
+
+Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
+if you haven't done that already.
+
+```bash
+cd espnet
+
+pip install -e .
+cd egs2/respin_small/asr1
+./run.sh --skip_data_prep false --skip_train true --download_model SpireLab/spire_respin_baselines_espnet
+```
+
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:33:47 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_te|2226|17825|80.6|17.2|2.2|2.2|21.6|72.5|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_te|2226|125985|97.1|1.8|1.2|1.2|4.1|72.5|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+
+## ASR config
+
+expand
+
+```
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_te/asr_stats_raw_te_char_sp/train/speech_shape
+- exp_small/exp_te/asr_stats_raw_te_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_te/asr_stats_raw_te_char_sp/valid/speech_shape
+- exp_small/exp_te/asr_stats_raw_te_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/te/raw/train_te_sp/wav.scp
+ - speech
+ - sound
+- - dump/te/raw/train_te_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/te/raw/dev_te/wav.scp
+ - speech
+ - sound
+- - dump/te/raw/dev_te/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ్
+- ు
+- ా
+- ి
+- న
+- ల
+- ం
+- క
+- ర
+- ప
+- త
+- వ
+- ట
+- స
+- ే
+- య
+- డ
+- ద
+- మ
+- చ
+- ో
+- గ
+- ె
+- బ
+- ీ
+- అ
+- ొ
+- ఎ
+- ూ
+- జ
+- ై
+- ఉ
+- ధ
+- ఇ
+- ఆ
+- ష
+- భ
+- శ
+- ఏ
+- ళ
+- ఫ
+- ణ
+- .
+- హ
+- థ
+- ఒ
+- ఖ
+- ఈ
+- ౌ
+- ఐ
+- ృ
+- ఓ
+- ఊ
+- ఋ
+- ఛ
+- ఘ
+- ఠ
+- ఔ
+- ఱ
+- ఢ
+- ఞ
+- ః
+- ౖ
+- ౦
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
+```
+
+
+
+
+
+### Citing ESPnet
+
+```BibTex
+@inproceedings{watanabe2018espnet,
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
+ year={2018},
+ booktitle={Proceedings of Interspeech},
+ pages={2207--2211},
+ doi={10.21437/Interspeech.2018-1456},
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
+}
+
+
+
+
+
+
+```
+
+or arXiv:
+
+```bibtex
+@misc{watanabe2018espnet,
+ title={ESPnet: End-to-End Speech Processing Toolkit},
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
+ year={2018},
+ eprint={1804.00015},
+ archivePrefix={arXiv},
+ primaryClass={cs.CL}
+}
+```
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
new file mode 100644
index 0000000000000000000000000000000000000000..5dc08967c4c4b45e5a445f2595c5e4586d9d92d9
--- /dev/null
+++ b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/RESULTS.md
@@ -0,0 +1,27 @@
+
+# RESULTS
+## Environments
+- date: `Sun May 25 02:33:47 IST 2025`
+- python version: `3.8.10 (default, Mar 18 2025, 20:04:55) [GCC 9.4.0]`
+- espnet version: `espnet 202412`
+- pytorch version: `pytorch 2.3.0+cu121`
+- Git hash: `0fe7b8581fbc68841eb48776f052aa9a5989108c`
+ - Commit date: `Tue Jan 14 20:06:15 2025 -0500`
+
+## exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_te|2226|17825|80.6|17.2|2.2|2.2|21.6|72.5|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|decode_lid_asr_model_valid.acc.ave/test_te|2226|125985|97.1|1.8|1.2|1.2|4.1|72.5|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..43b85ee202b6e8510de04dccb89dcb4a6e2fece6
--- /dev/null
+++ b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml
@@ -0,0 +1,298 @@
+config: conf/tuning/train_asr_e_branchformer_size256_mlp1024_linear1024_e8_mactrue_bs6M_gacc1.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1
+ngpu: 1
+seed: 2022
+num_workers: 8
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+use_deepspeed: false
+deepspeed_config: null
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+use_tf32: false
+collect_stats: false
+write_collected_feats: false
+max_epoch: 70
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: true
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_adapter: false
+adapter: lora
+save_strategy: all
+adapter_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 6000000
+valid_batch_bins: null
+category_sample_size: 10
+train_shape_file:
+- exp_small/exp_te/asr_stats_raw_te_char_sp/train/speech_shape
+- exp_small/exp_te/asr_stats_raw_te_char_sp/train/text_shape.char
+valid_shape_file:
+- exp_small/exp_te/asr_stats_raw_te_char_sp/valid/speech_shape
+- exp_small/exp_te/asr_stats_raw_te_char_sp/valid/text_shape.char
+batch_type: numel
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+chunk_max_abs_length: null
+chunk_discard_short_samples: true
+train_data_path_and_name_and_type:
+- - dump/te/raw/train_te_sp/wav.scp
+ - speech
+ - sound
+- - dump/te/raw/train_te_sp/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - dump/te/raw/dev_te/wav.scp
+ - speech
+ - sound
+- - dump/te/raw/dev_te/text
+ - text
+ - text
+multi_task_dataset: false
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+ lr: 0.002
+ weight_decay: 1.0e-06
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 15000
+token_list:
+-
+-
+-
+- ్
+- ు
+- ా
+- ి
+- న
+- ల
+- ం
+- క
+- ర
+- ప
+- త
+- వ
+- ట
+- స
+- ే
+- య
+- డ
+- ద
+- మ
+- చ
+- ో
+- గ
+- ె
+- బ
+- ీ
+- అ
+- ొ
+- ఎ
+- ూ
+- జ
+- ై
+- ఉ
+- ధ
+- ఇ
+- ఆ
+- ష
+- భ
+- శ
+- ఏ
+- ళ
+- ఫ
+- ణ
+- .
+- హ
+- థ
+- ఒ
+- ఖ
+- ఈ
+- ౌ
+- ఐ
+- ృ
+- ఓ
+- ఊ
+- ఋ
+- ఛ
+- ఘ
+- ఠ
+- ఔ
+- ఱ
+- ఢ
+- ఞ
+- ః
+- ౖ
+- ౦
+-
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: null
+ zero_infinity: true
+ brctc_risk_strategy: exp
+ brctc_group_strategy: end
+ brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+ n_fft: 512
+ win_length: 400
+ hop_length: 160
+ fs: 16k
+specaug: specaug
+specaug_conf:
+ apply_time_warp: true
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 27
+ num_freq_mask: 2
+ apply_time_mask: true
+ time_mask_width_ratio_range:
+ - 0.0
+ - 0.05
+ num_time_mask: 5
+normalize: utterance_mvn
+normalize_conf: {}
+model: espnet
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1
+ length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: e_branchformer
+encoder_conf:
+ output_size: 256
+ attention_heads: 4
+ attention_layer_type: rel_selfattn
+ pos_enc_layer_type: rel_pos
+ rel_pos_type: latest
+ cgmlp_linear_units: 1024
+ cgmlp_conv_kernel: 31
+ use_linear_after_conv: false
+ gate_activation: identity
+ num_blocks: 8
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: conv2d2
+ layer_drop_rate: 0.0
+ linear_units: 1024
+ positionwise_layer_type: linear
+ use_ffn: true
+ macaron_ffn: true
+ merge_conv_kernel: 31
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ layer_drop_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202409'
+distributed: false
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png
new file mode 100644
index 0000000000000000000000000000000000000000..668c2f7bf46bcfae5c59bb0f80f51be82a5adb56
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/acc.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..a7cf18f0c6cb7c4725ee7ae31d0114ca3919a058
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/backward_time.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png
new file mode 100644
index 0000000000000000000000000000000000000000..a731d3f1c176f8fdd42c4a0a67d063e0bb82fcdc
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..cc7e5ea1e8e26b463e23c53111116f10cb3f38aa
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/cer_ctc.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png
new file mode 100644
index 0000000000000000000000000000000000000000..8828a83c8359b9f04dff2b42974592b988de5f7c
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/clip.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..6c6dd4fe482e155656c5a81d1776bfc3e97948a2
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/forward_time.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png
new file mode 100644
index 0000000000000000000000000000000000000000..e0f201b13b70d6f39b710d14d448af7267eb30e8
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/gpu_max_cached_mem_GB.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png
new file mode 100644
index 0000000000000000000000000000000000000000..d3ab65a0aa6d19b0052c94788a88afe8ad3c7bdf
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/grad_norm.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..b149260ad21f773b0bd3f5bef9e43dfd6c8d4622
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/iter_time.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..4168ac17894040ab3110dd16dece0d853745b5e1
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png
new file mode 100644
index 0000000000000000000000000000000000000000..07cc79c141db9e70427604feed85fb21d2b0ab39
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_att.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png
new file mode 100644
index 0000000000000000000000000000000000000000..8ab909cfa53dc6b7a3bcab7eabb46dece0e647cf
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_ctc.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png
new file mode 100644
index 0000000000000000000000000000000000000000..4fae61d05b4f94a722f33f61fe8bb8033ff69758
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/loss_scale.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png
new file mode 100644
index 0000000000000000000000000000000000000000..074469f1174e617af3996152763be6b1a485a6c1
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim0_lr0.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..898b02185c1603f0aeaffd0b0ebe35d54d504489
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/optim_step_time.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png
new file mode 100644
index 0000000000000000000000000000000000000000..b40e50d4c4081e5f18666546c5ceab770830b81b
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/train_time.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png
new file mode 100644
index 0000000000000000000000000000000000000000..79e723aa7fde327f9db87d9138677d4bae755aaa
Binary files /dev/null and b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/images/wer.png differ
diff --git a/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9b86175b63a0271c9f9cb0257ccefa2d323d224c
--- /dev/null
+++ b/exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06215983b817c2cf5e803b343f335d8368701638c469cd86a8488af053c44b15
+size 112609578
diff --git a/exp_small/exp_te/meta.yaml b/exp_small/exp_te/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4092428d6ef5c97b8906df5243a6574ed5bbcd1d
--- /dev/null
+++ b/exp_small/exp_te/meta.yaml
@@ -0,0 +1,8 @@
+espnet: '202412'
+files:
+ asr_model_file: exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/valid.acc.ave_5best.pth
+python: "3.8.10 (default, Mar 18 2025, 20:04:55) \n[GCC 9.4.0]"
+timestamp: 1748120629.364632
+torch: 2.3.0+cu121
+yaml_files:
+ asr_train_config: exp_small/exp_te/asr_te_ebf_size256_mlp1024_lin1024_e8_mactrue_bs6M_gacc1/config.yaml