Ubuntu commited on
Commit
d4862a5
·
1 Parent(s): 1445b61

Update model

Browse files
Files changed (38) hide show
  1. README.md +356 -3
  2. data/en_token_list/bpe_unigram30/bpe.model +3 -0
  3. exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz +3 -0
  4. exp/asr_train_asr_transformer_raw_en_bpe30_sp/RESULTS.md +49 -0
  5. exp/asr_train_asr_transformer_raw_en_bpe30_sp/config.yaml +234 -0
  6. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/acc.png +0 -0
  7. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/backward_time.png +0 -0
  8. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer.png +0 -0
  9. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer_ctc.png +0 -0
  10. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/clip.png +0 -0
  11. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/forward_time.png +0 -0
  12. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/gpu_max_cached_mem_GB.png +0 -0
  13. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/grad_norm.png +0 -0
  14. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/iter_time.png +0 -0
  15. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss.png +0 -0
  16. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_att.png +0 -0
  17. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_ctc.png +0 -0
  18. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_scale.png +0 -0
  19. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim0_lr0.png +0 -0
  20. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim_step_time.png +0 -0
  21. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/train_time.png +0 -0
  22. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/wer.png +0 -0
  23. exp/asr_train_asr_transformer_raw_en_bpe30_sp/valid.acc.ave_10best.pth +3 -0
  24. exp/lm_train_lm_en_bpe30/34epoch.pth +3 -0
  25. exp/lm_train_lm_en_bpe30/config.yaml +170 -0
  26. exp/lm_train_lm_en_bpe30/images/backward_time.png +0 -0
  27. exp/lm_train_lm_en_bpe30/images/clip.png +0 -0
  28. exp/lm_train_lm_en_bpe30/images/forward_time.png +0 -0
  29. exp/lm_train_lm_en_bpe30/images/gpu_max_cached_mem_GB.png +0 -0
  30. exp/lm_train_lm_en_bpe30/images/grad_norm.png +0 -0
  31. exp/lm_train_lm_en_bpe30/images/iter_time.png +0 -0
  32. exp/lm_train_lm_en_bpe30/images/loss.png +0 -0
  33. exp/lm_train_lm_en_bpe30/images/loss_scale.png +0 -0
  34. exp/lm_train_lm_en_bpe30/images/optim0_lr0.png +0 -0
  35. exp/lm_train_lm_en_bpe30/images/optim_step_time.png +0 -0
  36. exp/lm_train_lm_en_bpe30/images/train_time.png +0 -0
  37. exp/lm_train_lm_en_bpe30/perplexity_test/ppl +1 -0
  38. meta.yaml +10 -0
README.md CHANGED
@@ -1,3 +1,356 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - an4
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `jestillore/an4-asr1`
15
+
16
+ This model was trained by jestillore using an4 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout main
26
+ pip install -e .
27
+ cd egs2/an4/asr1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model jestillore/an4-asr1
29
+ ```
30
+
31
+ [INFO] /home/ubuntu/espnet/egs2/an4/asr1/../../../tools/activate_python.sh is not present
32
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
33
+ # RESULTS
34
+ ## Environments
35
+ - date: `Tue May 27 14:02:02 UTC 2025`
36
+ - python version: `3.10.17 (main, May 27 2025, 08:52:46) [GCC 13.3.0]`
37
+ - espnet version: `espnet 202503`
38
+ - pytorch version: `pytorch 2.7.0+cu126`
39
+ - Git hash: `1efdaa835178b0ce5034904e29f89f8fc7e0a358`
40
+ - Commit date: `Thu May 22 12:09:45 2025 -0400`
41
+
42
+ ## exp/asr_train_asr_transformer_raw_en_bpe30_sp
43
+ ### WER
44
+
45
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
46
+ |---|---|---|---|---|---|---|---|---|
47
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|773|92.9|5.0|2.1|0.5|7.6|33.8|
48
+
49
+ ### CER
50
+
51
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
52
+ |---|---|---|---|---|---|---|---|---|
53
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2565|96.5|0.9|2.7|0.7|4.2|33.8|
54
+
55
+ ### TER
56
+
57
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
58
+ |---|---|---|---|---|---|---|---|---|
59
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2695|96.6|0.9|2.5|0.7|4.0|33.8|
60
+
61
+ ## exp/asr_train_asr_transformer_raw_en_bpe30_sp/decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave
62
+ ### WER
63
+
64
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
65
+ |---|---|---|---|---|---|---|---|---|
66
+ |org/train_dev|100|591|87.8|8.3|3.9|0.2|12.4|46.0|
67
+
68
+ ### CER
69
+
70
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
71
+ |---|---|---|---|---|---|---|---|---|
72
+ |org/train_dev|100|1915|91.9|2.7|5.5|0.1|8.3|46.0|
73
+
74
+ ### TER
75
+
76
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
77
+ |---|---|---|---|---|---|---|---|---|
78
+ |org/train_dev|100|2015|92.3|2.5|5.2|0.1|7.8|46.0|
79
+
80
+ ## ASR config
81
+
82
+ <details><summary>expand</summary>
83
+
84
+ ```
85
+ config: conf/train_asr_transformer.yaml
86
+ print_config: false
87
+ log_level: INFO
88
+ drop_last_iter: false
89
+ dry_run: false
90
+ iterator_type: sequence
91
+ valid_iterator_type: null
92
+ output_dir: exp/asr_train_asr_transformer_raw_en_bpe30_sp
93
+ ngpu: 1
94
+ seed: 0
95
+ num_workers: 1
96
+ num_att_plot: 3
97
+ dist_backend: nccl
98
+ dist_init_method: env://
99
+ dist_world_size: null
100
+ dist_rank: null
101
+ local_rank: 0
102
+ dist_master_addr: null
103
+ dist_master_port: null
104
+ dist_launcher: null
105
+ multiprocessing_distributed: false
106
+ unused_parameters: false
107
+ sharded_ddp: false
108
+ use_deepspeed: false
109
+ deepspeed_config: null
110
+ gradient_as_bucket_view: true
111
+ ddp_comm_hook: null
112
+ cudnn_enabled: true
113
+ cudnn_benchmark: false
114
+ cudnn_deterministic: true
115
+ use_tf32: false
116
+ collect_stats: false
117
+ write_collected_feats: false
118
+ max_epoch: 200
119
+ patience: null
120
+ val_scheduler_criterion:
121
+ - valid
122
+ - loss
123
+ early_stopping_criterion:
124
+ - valid
125
+ - loss
126
+ - min
127
+ best_model_criterion:
128
+ - - valid
129
+ - acc
130
+ - max
131
+ keep_nbest_models: 10
132
+ nbest_averaging_interval: 0
133
+ grad_clip: 5.0
134
+ grad_clip_type: 2.0
135
+ grad_noise: false
136
+ accum_grad: 1
137
+ no_forward_run: false
138
+ resume: true
139
+ train_dtype: float32
140
+ use_amp: false
141
+ log_interval: null
142
+ use_matplotlib: true
143
+ use_tensorboard: true
144
+ create_graph_in_tensorboard: false
145
+ use_wandb: false
146
+ wandb_project: null
147
+ wandb_id: null
148
+ wandb_entity: null
149
+ wandb_name: null
150
+ wandb_model_log_interval: -1
151
+ detect_anomaly: false
152
+ use_adapter: false
153
+ adapter: lora
154
+ save_strategy: all
155
+ adapter_conf: {}
156
+ pretrain_path: null
157
+ init_param: []
158
+ ignore_init_mismatch: false
159
+ freeze_param: []
160
+ num_iters_per_epoch: null
161
+ batch_size: 64
162
+ valid_batch_size: null
163
+ batch_bins: 1000000
164
+ valid_batch_bins: null
165
+ category_sample_size: 10
166
+ train_shape_file:
167
+ - exp/asr_stats_raw_en_bpe30_sp/train/speech_shape
168
+ - exp/asr_stats_raw_en_bpe30_sp/train/text_shape.bpe
169
+ valid_shape_file:
170
+ - exp/asr_stats_raw_en_bpe30_sp/valid/speech_shape
171
+ - exp/asr_stats_raw_en_bpe30_sp/valid/text_shape.bpe
172
+ batch_type: folded
173
+ valid_batch_type: null
174
+ fold_length:
175
+ - 80000
176
+ - 150
177
+ sort_in_batch: descending
178
+ shuffle_within_batch: false
179
+ sort_batch: descending
180
+ multiple_iterator: false
181
+ chunk_length: 500
182
+ chunk_shift_ratio: 0.5
183
+ num_cache_chunks: 1024
184
+ chunk_excluded_key_prefixes: []
185
+ chunk_default_fs: null
186
+ chunk_max_abs_length: null
187
+ chunk_discard_short_samples: true
188
+ train_data_path_and_name_and_type:
189
+ - - dump/raw/train_nodev_sp/wav.scp
190
+ - speech
191
+ - sound
192
+ - - dump/raw/train_nodev_sp/text
193
+ - text
194
+ - text
195
+ valid_data_path_and_name_and_type:
196
+ - - dump/raw/train_dev/wav.scp
197
+ - speech
198
+ - sound
199
+ - - dump/raw/train_dev/text
200
+ - text
201
+ - text
202
+ multi_task_dataset: false
203
+ allow_variable_data_keys: false
204
+ max_cache_size: 0.0
205
+ max_cache_fd: 32
206
+ allow_multi_rates: false
207
+ valid_max_cache_size: null
208
+ exclude_weight_decay: false
209
+ exclude_weight_decay_conf: {}
210
+ optim: adam
211
+ optim_conf:
212
+ lr: 0.001
213
+ scheduler: warmuplr
214
+ scheduler_conf:
215
+ warmup_steps: 2500
216
+ token_list:
217
+ - <blank>
218
+ - <unk>
219
+ - ▁
220
+ - T
221
+ - E
222
+ - R
223
+ - O
224
+ - A
225
+ - Y
226
+ - H
227
+ - U
228
+ - S
229
+ - I
230
+ - F
231
+ - B
232
+ - L
233
+ - P
234
+ - D
235
+ - G
236
+ - M
237
+ - C
238
+ - V
239
+ - X
240
+ - J
241
+ - K
242
+ - Z
243
+ - W
244
+ - N
245
+ - Q
246
+ - <sos/eos>
247
+ init: xavier_uniform
248
+ input_size: null
249
+ ctc_conf:
250
+ dropout_rate: 0.0
251
+ ctc_type: builtin
252
+ reduce: true
253
+ ignore_nan_grad: null
254
+ zero_infinity: true
255
+ brctc_risk_strategy: exp
256
+ brctc_group_strategy: end
257
+ brctc_risk_factor: 0.0
258
+ joint_net_conf: null
259
+ use_preprocessor: true
260
+ use_lang_prompt: false
261
+ use_nlp_prompt: false
262
+ token_type: bpe
263
+ bpemodel: data/en_token_list/bpe_unigram30/bpe.model
264
+ non_linguistic_symbols: null
265
+ cleaner: null
266
+ g2p: null
267
+ speech_volume_normalize: null
268
+ rir_scp: null
269
+ rir_apply_prob: 1.0
270
+ noise_scp: null
271
+ noise_apply_prob: 1.0
272
+ noise_db_range: '13_15'
273
+ short_noise_thres: 0.5
274
+ aux_ctc_tasks: []
275
+ frontend: default
276
+ frontend_conf:
277
+ fs: 16k
278
+ specaug: null
279
+ specaug_conf: {}
280
+ normalize: global_mvn
281
+ normalize_conf:
282
+ stats_file: exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz
283
+ model: espnet
284
+ model_conf:
285
+ ctc_weight: 0.3
286
+ lsm_weight: 0.1
287
+ length_normalized_loss: false
288
+ preencoder: null
289
+ preencoder_conf: {}
290
+ encoder: transformer
291
+ encoder_conf:
292
+ output_size: 256
293
+ attention_heads: 4
294
+ linear_units: 2048
295
+ num_blocks: 12
296
+ dropout_rate: 0.1
297
+ positional_dropout_rate: 0.1
298
+ attention_dropout_rate: 0.0
299
+ input_layer: conv2d
300
+ normalize_before: true
301
+ postencoder: null
302
+ postencoder_conf: {}
303
+ decoder: transformer
304
+ decoder_conf:
305
+ attention_heads: 4
306
+ linear_units: 2048
307
+ num_blocks: 6
308
+ dropout_rate: 0.1
309
+ positional_dropout_rate: 0.1
310
+ self_attention_dropout_rate: 0.0
311
+ src_attention_dropout_rate: 0.0
312
+ preprocessor: default
313
+ preprocessor_conf: {}
314
+ required:
315
+ - output_dir
316
+ - token_list
317
+ version: '202503'
318
+ distributed: false
319
+ ```
320
+
321
+ </details>
322
+
323
+
324
+
325
+ ### Citing ESPnet
326
+
327
+ ```BibTex
328
+ @inproceedings{watanabe2018espnet,
329
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
330
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
331
+ year={2018},
332
+ booktitle={Proceedings of Interspeech},
333
+ pages={2207--2211},
334
+ doi={10.21437/Interspeech.2018-1456},
335
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
336
+ }
337
+
338
+
339
+
340
+
341
+
342
+
343
+ ```
344
+
345
+ or arXiv:
346
+
347
+ ```bibtex
348
+ @misc{watanabe2018espnet,
349
+ title={ESPnet: End-to-End Speech Processing Toolkit},
350
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
351
+ year={2018},
352
+ eprint={1804.00015},
353
+ archivePrefix={arXiv},
354
+ primaryClass={cs.CL}
355
+ }
356
+ ```
data/en_token_list/bpe_unigram30/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a9870c3948cfd1b1b94e6c40cd9e64e1474d009773899c62223945fd869dc5b
3
+ size 237994
exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f31adca1b43e0bcb93e086743ceeecdedd4ad405eb7fb6cc550e37358595fe45
3
+ size 1402
exp/asr_train_asr_transformer_raw_en_bpe30_sp/RESULTS.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [INFO] /home/ubuntu/espnet/egs2/an4/asr1/../../../tools/activate_python.sh is not present
2
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
3
+ # RESULTS
4
+ ## Environments
5
+ - date: `Tue May 27 14:02:02 UTC 2025`
6
+ - python version: `3.10.17 (main, May 27 2025, 08:52:46) [GCC 13.3.0]`
7
+ - espnet version: `espnet 202503`
8
+ - pytorch version: `pytorch 2.7.0+cu126`
9
+ - Git hash: `1efdaa835178b0ce5034904e29f89f8fc7e0a358`
10
+ - Commit date: `Thu May 22 12:09:45 2025 -0400`
11
+
12
+ ## exp/asr_train_asr_transformer_raw_en_bpe30_sp
13
+ ### WER
14
+
15
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
16
+ |---|---|---|---|---|---|---|---|---|
17
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|773|92.9|5.0|2.1|0.5|7.6|33.8|
18
+
19
+ ### CER
20
+
21
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
22
+ |---|---|---|---|---|---|---|---|---|
23
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2565|96.5|0.9|2.7|0.7|4.2|33.8|
24
+
25
+ ### TER
26
+
27
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
28
+ |---|---|---|---|---|---|---|---|---|
29
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2695|96.6|0.9|2.5|0.7|4.0|33.8|
30
+
31
+ ## exp/asr_train_asr_transformer_raw_en_bpe30_sp/decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave
32
+ ### WER
33
+
34
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
35
+ |---|---|---|---|---|---|---|---|---|
36
+ |org/train_dev|100|591|87.8|8.3|3.9|0.2|12.4|46.0|
37
+
38
+ ### CER
39
+
40
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
41
+ |---|---|---|---|---|---|---|---|---|
42
+ |org/train_dev|100|1915|91.9|2.7|5.5|0.1|8.3|46.0|
43
+
44
+ ### TER
45
+
46
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
47
+ |---|---|---|---|---|---|---|---|---|
48
+ |org/train_dev|100|2015|92.3|2.5|5.2|0.1|7.8|46.0|
49
+
exp/asr_train_asr_transformer_raw_en_bpe30_sp/config.yaml ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr_transformer.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/asr_train_asr_transformer_raw_en_bpe30_sp
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 1
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ gradient_as_bucket_view: true
27
+ ddp_comm_hook: null
28
+ cudnn_enabled: true
29
+ cudnn_benchmark: false
30
+ cudnn_deterministic: true
31
+ use_tf32: false
32
+ collect_stats: false
33
+ write_collected_feats: false
34
+ max_epoch: 200
35
+ patience: null
36
+ val_scheduler_criterion:
37
+ - valid
38
+ - loss
39
+ early_stopping_criterion:
40
+ - valid
41
+ - loss
42
+ - min
43
+ best_model_criterion:
44
+ - - valid
45
+ - acc
46
+ - max
47
+ keep_nbest_models: 10
48
+ nbest_averaging_interval: 0
49
+ grad_clip: 5.0
50
+ grad_clip_type: 2.0
51
+ grad_noise: false
52
+ accum_grad: 1
53
+ no_forward_run: false
54
+ resume: true
55
+ train_dtype: float32
56
+ use_amp: false
57
+ log_interval: null
58
+ use_matplotlib: true
59
+ use_tensorboard: true
60
+ create_graph_in_tensorboard: false
61
+ use_wandb: false
62
+ wandb_project: null
63
+ wandb_id: null
64
+ wandb_entity: null
65
+ wandb_name: null
66
+ wandb_model_log_interval: -1
67
+ detect_anomaly: false
68
+ use_adapter: false
69
+ adapter: lora
70
+ save_strategy: all
71
+ adapter_conf: {}
72
+ pretrain_path: null
73
+ init_param: []
74
+ ignore_init_mismatch: false
75
+ freeze_param: []
76
+ num_iters_per_epoch: null
77
+ batch_size: 64
78
+ valid_batch_size: null
79
+ batch_bins: 1000000
80
+ valid_batch_bins: null
81
+ category_sample_size: 10
82
+ train_shape_file:
83
+ - exp/asr_stats_raw_en_bpe30_sp/train/speech_shape
84
+ - exp/asr_stats_raw_en_bpe30_sp/train/text_shape.bpe
85
+ valid_shape_file:
86
+ - exp/asr_stats_raw_en_bpe30_sp/valid/speech_shape
87
+ - exp/asr_stats_raw_en_bpe30_sp/valid/text_shape.bpe
88
+ batch_type: folded
89
+ valid_batch_type: null
90
+ fold_length:
91
+ - 80000
92
+ - 150
93
+ sort_in_batch: descending
94
+ shuffle_within_batch: false
95
+ sort_batch: descending
96
+ multiple_iterator: false
97
+ chunk_length: 500
98
+ chunk_shift_ratio: 0.5
99
+ num_cache_chunks: 1024
100
+ chunk_excluded_key_prefixes: []
101
+ chunk_default_fs: null
102
+ chunk_max_abs_length: null
103
+ chunk_discard_short_samples: true
104
+ train_data_path_and_name_and_type:
105
+ - - dump/raw/train_nodev_sp/wav.scp
106
+ - speech
107
+ - sound
108
+ - - dump/raw/train_nodev_sp/text
109
+ - text
110
+ - text
111
+ valid_data_path_and_name_and_type:
112
+ - - dump/raw/train_dev/wav.scp
113
+ - speech
114
+ - sound
115
+ - - dump/raw/train_dev/text
116
+ - text
117
+ - text
118
+ multi_task_dataset: false
119
+ allow_variable_data_keys: false
120
+ max_cache_size: 0.0
121
+ max_cache_fd: 32
122
+ allow_multi_rates: false
123
+ valid_max_cache_size: null
124
+ exclude_weight_decay: false
125
+ exclude_weight_decay_conf: {}
126
+ optim: adam
127
+ optim_conf:
128
+ lr: 0.001
129
+ scheduler: warmuplr
130
+ scheduler_conf:
131
+ warmup_steps: 2500
132
+ token_list:
133
+ - <blank>
134
+ - <unk>
135
+ - ▁
136
+ - T
137
+ - E
138
+ - R
139
+ - O
140
+ - A
141
+ - Y
142
+ - H
143
+ - U
144
+ - S
145
+ - I
146
+ - F
147
+ - B
148
+ - L
149
+ - P
150
+ - D
151
+ - G
152
+ - M
153
+ - C
154
+ - V
155
+ - X
156
+ - J
157
+ - K
158
+ - Z
159
+ - W
160
+ - N
161
+ - Q
162
+ - <sos/eos>
163
+ init: xavier_uniform
164
+ input_size: null
165
+ ctc_conf:
166
+ dropout_rate: 0.0
167
+ ctc_type: builtin
168
+ reduce: true
169
+ ignore_nan_grad: null
170
+ zero_infinity: true
171
+ brctc_risk_strategy: exp
172
+ brctc_group_strategy: end
173
+ brctc_risk_factor: 0.0
174
+ joint_net_conf: null
175
+ use_preprocessor: true
176
+ use_lang_prompt: false
177
+ use_nlp_prompt: false
178
+ token_type: bpe
179
+ bpemodel: data/en_token_list/bpe_unigram30/bpe.model
180
+ non_linguistic_symbols: null
181
+ cleaner: null
182
+ g2p: null
183
+ speech_volume_normalize: null
184
+ rir_scp: null
185
+ rir_apply_prob: 1.0
186
+ noise_scp: null
187
+ noise_apply_prob: 1.0
188
+ noise_db_range: '13_15'
189
+ short_noise_thres: 0.5
190
+ aux_ctc_tasks: []
191
+ frontend: default
192
+ frontend_conf:
193
+ fs: 16k
194
+ specaug: null
195
+ specaug_conf: {}
196
+ normalize: global_mvn
197
+ normalize_conf:
198
+ stats_file: exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz
199
+ model: espnet
200
+ model_conf:
201
+ ctc_weight: 0.3
202
+ lsm_weight: 0.1
203
+ length_normalized_loss: false
204
+ preencoder: null
205
+ preencoder_conf: {}
206
+ encoder: transformer
207
+ encoder_conf:
208
+ output_size: 256
209
+ attention_heads: 4
210
+ linear_units: 2048
211
+ num_blocks: 12
212
+ dropout_rate: 0.1
213
+ positional_dropout_rate: 0.1
214
+ attention_dropout_rate: 0.0
215
+ input_layer: conv2d
216
+ normalize_before: true
217
+ postencoder: null
218
+ postencoder_conf: {}
219
+ decoder: transformer
220
+ decoder_conf:
221
+ attention_heads: 4
222
+ linear_units: 2048
223
+ num_blocks: 6
224
+ dropout_rate: 0.1
225
+ positional_dropout_rate: 0.1
226
+ self_attention_dropout_rate: 0.0
227
+ src_attention_dropout_rate: 0.0
228
+ preprocessor: default
229
+ preprocessor_conf: {}
230
+ required:
231
+ - output_dir
232
+ - token_list
233
+ version: '202503'
234
+ distributed: false
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/acc.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/backward_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer_ctc.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/clip.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/forward_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/grad_norm.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/iter_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_att.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_ctc.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_scale.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim0_lr0.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim_step_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/train_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/wer.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ded281fee5a52d8146b7e7ac827f4bcc0e414007d445322d0a4525d2e76a2910
3
+ size 108687879
exp/lm_train_lm_en_bpe30/34epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e811709c83ca5da3ed21a5a8185db65977f116e87c9ad3afd2b2ae200b8ee46
3
+ size 27240733
exp/lm_train_lm_en_bpe30/config.yaml ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_lm.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/lm_train_lm_en_bpe30
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 1
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ gradient_as_bucket_view: true
27
+ ddp_comm_hook: null
28
+ cudnn_enabled: true
29
+ cudnn_benchmark: false
30
+ cudnn_deterministic: true
31
+ use_tf32: false
32
+ collect_stats: false
33
+ write_collected_feats: false
34
+ max_epoch: 40
35
+ patience: null
36
+ val_scheduler_criterion:
37
+ - valid
38
+ - loss
39
+ early_stopping_criterion:
40
+ - valid
41
+ - loss
42
+ - min
43
+ best_model_criterion:
44
+ - - valid
45
+ - loss
46
+ - min
47
+ keep_nbest_models: 1
48
+ nbest_averaging_interval: 0
49
+ grad_clip: 5.0
50
+ grad_clip_type: 2.0
51
+ grad_noise: false
52
+ accum_grad: 1
53
+ no_forward_run: false
54
+ resume: true
55
+ train_dtype: float32
56
+ use_amp: false
57
+ log_interval: null
58
+ use_matplotlib: true
59
+ use_tensorboard: true
60
+ create_graph_in_tensorboard: false
61
+ use_wandb: false
62
+ wandb_project: null
63
+ wandb_id: null
64
+ wandb_entity: null
65
+ wandb_name: null
66
+ wandb_model_log_interval: -1
67
+ detect_anomaly: false
68
+ use_adapter: false
69
+ adapter: lora
70
+ save_strategy: all
71
+ adapter_conf: {}
72
+ pretrain_path: null
73
+ init_param: []
74
+ ignore_init_mismatch: false
75
+ freeze_param: []
76
+ num_iters_per_epoch: null
77
+ batch_size: 256
78
+ valid_batch_size: null
79
+ batch_bins: 1000000
80
+ valid_batch_bins: null
81
+ category_sample_size: 10
82
+ train_shape_file:
83
+ - exp/lm_stats_en_bpe30/train/text_shape.bpe
84
+ valid_shape_file:
85
+ - exp/lm_stats_en_bpe30/valid/text_shape.bpe
86
+ batch_type: folded
87
+ valid_batch_type: null
88
+ fold_length:
89
+ - 150
90
+ sort_in_batch: descending
91
+ shuffle_within_batch: false
92
+ sort_batch: descending
93
+ multiple_iterator: false
94
+ chunk_length: 500
95
+ chunk_shift_ratio: 0.5
96
+ num_cache_chunks: 1024
97
+ chunk_excluded_key_prefixes: []
98
+ chunk_default_fs: null
99
+ chunk_max_abs_length: null
100
+ chunk_discard_short_samples: true
101
+ train_data_path_and_name_and_type:
102
+ - - dump/raw/lm_train.txt
103
+ - text
104
+ - text
105
+ valid_data_path_and_name_and_type:
106
+ - - dump/raw/org/train_dev/text
107
+ - text
108
+ - text
109
+ multi_task_dataset: false
110
+ allow_variable_data_keys: false
111
+ max_cache_size: 0.0
112
+ max_cache_fd: 32
113
+ allow_multi_rates: false
114
+ valid_max_cache_size: null
115
+ exclude_weight_decay: false
116
+ exclude_weight_decay_conf: {}
117
+ optim: adam
118
+ optim_conf:
119
+ lr: 0.1
120
+ scheduler: null
121
+ scheduler_conf: {}
122
+ token_list:
123
+ - <blank>
124
+ - <unk>
125
+ - ▁
126
+ - T
127
+ - E
128
+ - R
129
+ - O
130
+ - A
131
+ - Y
132
+ - H
133
+ - U
134
+ - S
135
+ - I
136
+ - F
137
+ - B
138
+ - L
139
+ - P
140
+ - D
141
+ - G
142
+ - M
143
+ - C
144
+ - V
145
+ - X
146
+ - J
147
+ - K
148
+ - Z
149
+ - W
150
+ - N
151
+ - Q
152
+ - <sos/eos>
153
+ init: null
154
+ use_preprocessor: true
155
+ token_type: bpe
156
+ bpemodel: data/en_token_list/bpe_unigram30/bpe.model
157
+ non_linguistic_symbols: null
158
+ cleaner: null
159
+ g2p: null
160
+ lm: seq_rnn
161
+ lm_conf:
162
+ unit: 650
163
+ nlayers: 2
164
+ model: lm
165
+ model_conf: {}
166
+ required:
167
+ - output_dir
168
+ - token_list
169
+ version: '202503'
170
+ distributed: false
exp/lm_train_lm_en_bpe30/images/backward_time.png ADDED
exp/lm_train_lm_en_bpe30/images/clip.png ADDED
exp/lm_train_lm_en_bpe30/images/forward_time.png ADDED
exp/lm_train_lm_en_bpe30/images/gpu_max_cached_mem_GB.png ADDED
exp/lm_train_lm_en_bpe30/images/grad_norm.png ADDED
exp/lm_train_lm_en_bpe30/images/iter_time.png ADDED
exp/lm_train_lm_en_bpe30/images/loss.png ADDED
exp/lm_train_lm_en_bpe30/images/loss_scale.png ADDED
exp/lm_train_lm_en_bpe30/images/optim0_lr0.png ADDED
exp/lm_train_lm_en_bpe30/images/optim_step_time.png ADDED
exp/lm_train_lm_en_bpe30/images/train_time.png ADDED
exp/lm_train_lm_en_bpe30/perplexity_test/ppl ADDED
@@ -0,0 +1 @@
 
 
1
+ 5.18038911498181
meta.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ espnet: '202503'
2
+ files:
3
+ asr_model_file: exp/asr_train_asr_transformer_raw_en_bpe30_sp/valid.acc.ave_10best.pth
4
+ lm_file: exp/lm_train_lm_en_bpe30/34epoch.pth
5
+ python: 3.10.17 (main, May 27 2025, 08:52:46) [GCC 13.3.0]
6
+ timestamp: 1748356644.688333
7
+ torch: 2.7.0+cu126
8
+ yaml_files:
9
+ asr_train_config: exp/asr_train_asr_transformer_raw_en_bpe30_sp/config.yaml
10
+ lm_train_config: exp/lm_train_lm_en_bpe30/config.yaml