Jialu Li commited on
Commit
c1b4c86
·
1 Parent(s): 36b99a0

add all exp files

Browse files
Files changed (21) hide show
  1. data/token_list/bpe_unigram150/bpe.model +3 -0
  2. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/RESULTS.md +35 -0
  3. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/config.yaml +364 -0
  4. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/acc.png +0 -0
  5. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/backward_time.png +0 -0
  6. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/cer.png +0 -0
  7. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/cer_ctc.png +0 -0
  8. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/clip.png +0 -0
  9. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/forward_time.png +0 -0
  10. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/gpu_max_cached_mem_GB.png +0 -0
  11. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/grad_norm.png +0 -0
  12. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/iter_time.png +0 -0
  13. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/loss.png +0 -0
  14. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/loss_att.png +0 -0
  15. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/loss_ctc.png +0 -0
  16. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/loss_scale.png +0 -0
  17. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/optim0_lr0.png +0 -0
  18. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/optim_step_time.png +0 -0
  19. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/train_time.png +0 -0
  20. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/wer.png +0 -0
  21. exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/valid.loss.ave_5best.pth +3 -0
data/token_list/bpe_unigram150/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0d67432adee6143fd0c0122606ee8b32712a544976bfbc159feeec2e7840193
3
+ size 239511
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/RESULTS.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Tue Mar 4 22:56:38 EST 2025`
5
+ - python version: `3.9.21 (main, Dec 11 2024, 16:24:11) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202412`
7
+ - pytorch version: `pytorch 2.2.0+cu121`
8
+ - Git hash: `b57dd0418ab2f7a03a21239f70decb9837cb2b0f`
9
+ - Commit date: `Wed Feb 5 07:19:24 2025 -0500`
10
+
11
+ ## exp_new/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/decode_asr_ctc_lm_lm_train_lm_en_bpe150_valid.loss.ave_asr_model_valid.loss.ave
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |test/Hidalgo|324|3760|9.9|57.4|32.7|8.0|98.1|100.0|
17
+ |test/Tequila|870|9337|14.0|62.5|23.4|8.8|94.8|100.0|
18
+ |test/Zacatlan|1836|16318|21.9|56.6|21.5|5.1|83.2|99.6|
19
+
20
+ ### CER
21
+
22
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
23
+ |---|---|---|---|---|---|---|---|---|
24
+ |test/Hidalgo|324|23998|62.4|13.0|24.6|15.3|52.9|100.0|
25
+ |test/Tequila|870|65418|70.4|11.6|18.0|14.2|43.8|100.0|
26
+ |test/Zacatlan|1836|108155|79.9|7.6|12.5|9.4|29.5|99.6|
27
+
28
+ ### TER
29
+
30
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
31
+ |---|---|---|---|---|---|---|---|---|
32
+ |test/Hidalgo|324|13202|48.2|27.2|24.6|15.9|67.7|100.0|
33
+ |test/Tequila|870|37224|55.5|25.3|19.3|13.8|58.3|100.0|
34
+ |test/Zacatlan|1836|61871|66.2|19.4|14.4|8.4|42.2|99.6|
35
+
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/config.yaml ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr_s3prl_single_lr1e-3.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp_new/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 4
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ gradient_as_bucket_view: true
27
+ ddp_comm_hook: null
28
+ cudnn_enabled: true
29
+ cudnn_benchmark: false
30
+ cudnn_deterministic: true
31
+ use_tf32: false
32
+ collect_stats: false
33
+ write_collected_feats: false
34
+ max_epoch: 15
35
+ patience: null
36
+ val_scheduler_criterion:
37
+ - valid
38
+ - loss
39
+ early_stopping_criterion:
40
+ - valid
41
+ - loss
42
+ - min
43
+ best_model_criterion:
44
+ - - valid
45
+ - loss
46
+ - min
47
+ keep_nbest_models: 5
48
+ nbest_averaging_interval: 0
49
+ grad_clip: 5.0
50
+ grad_clip_type: 2.0
51
+ grad_noise: false
52
+ accum_grad: 4
53
+ no_forward_run: false
54
+ resume: true
55
+ train_dtype: float32
56
+ use_amp: false
57
+ log_interval: null
58
+ use_matplotlib: true
59
+ use_tensorboard: true
60
+ create_graph_in_tensorboard: false
61
+ use_wandb: false
62
+ wandb_project: null
63
+ wandb_id: null
64
+ wandb_entity: null
65
+ wandb_name: null
66
+ wandb_model_log_interval: -1
67
+ detect_anomaly: false
68
+ use_adapter: false
69
+ adapter: lora
70
+ save_strategy: all
71
+ adapter_conf: {}
72
+ pretrain_path: null
73
+ init_param: []
74
+ ignore_init_mismatch: false
75
+ freeze_param:
76
+ - frontend.upstream
77
+ num_iters_per_epoch: 500
78
+ batch_size: 8
79
+ valid_batch_size: null
80
+ batch_bins: 1000000
81
+ valid_batch_bins: null
82
+ category_sample_size: 10
83
+ train_shape_file:
84
+ - exp_new/asr_stats_raw_en_bpe150_sp/train/speech_shape
85
+ - exp_new/asr_stats_raw_en_bpe150_sp/train/text_shape.bpe
86
+ valid_shape_file:
87
+ - exp_new/asr_stats_raw_en_bpe150_sp/valid/speech_shape
88
+ - exp_new/asr_stats_raw_en_bpe150_sp/valid/text_shape.bpe
89
+ batch_type: sorted
90
+ valid_batch_type: null
91
+ fold_length:
92
+ - 80000
93
+ - 150
94
+ sort_in_batch: descending
95
+ shuffle_within_batch: false
96
+ sort_batch: descending
97
+ multiple_iterator: false
98
+ chunk_length: 500
99
+ chunk_shift_ratio: 0.5
100
+ num_cache_chunks: 1024
101
+ chunk_excluded_key_prefixes: []
102
+ chunk_default_fs: null
103
+ chunk_max_abs_length: null
104
+ chunk_discard_short_samples: true
105
+ train_data_path_and_name_and_type:
106
+ - - dump/raw/train_sp/wav.scp
107
+ - speech
108
+ - sound
109
+ - - dump/raw/train_sp/text
110
+ - text
111
+ - text
112
+ valid_data_path_and_name_and_type:
113
+ - - dump/raw/dev/wav.scp
114
+ - speech
115
+ - sound
116
+ - - dump/raw/dev/text
117
+ - text
118
+ - text
119
+ multi_task_dataset: false
120
+ allow_variable_data_keys: false
121
+ max_cache_size: 0.0
122
+ max_cache_fd: 32
123
+ allow_multi_rates: false
124
+ valid_max_cache_size: null
125
+ exclude_weight_decay: false
126
+ exclude_weight_decay_conf: {}
127
+ optim: adam
128
+ optim_conf:
129
+ lr: 0.001
130
+ weight_decay: 1.0e-06
131
+ scheduler: null
132
+ scheduler_conf: {}
133
+ token_list:
134
+ - <blank>
135
+ - <unk>
136
+ - ':'
137
+ - N
138
+ - H
139
+ - ▁
140
+ - K
141
+ - A
142
+ - KA
143
+ - ▁O
144
+ - WA
145
+ - ▁I
146
+ - KI
147
+ - S
148
+ - TI
149
+ - ▁A
150
+ - MO
151
+ - L
152
+ - MA
153
+ - TLA
154
+ - NI
155
+ - O
156
+ - ▁KI
157
+ - ▁N
158
+ - WI
159
+ - CHI
160
+ - PA
161
+ - ▁NO
162
+ - CH
163
+ - KO
164
+ - X
165
+ - ▁SE
166
+ - LI
167
+ - TO
168
+ - ▁TLA
169
+ - ▁WA
170
+ - TL
171
+ - I
172
+ - TSI
173
+ - TA
174
+ - E
175
+ - ▁NE
176
+ - MI
177
+ - ▁KE
178
+ - 'NO'
179
+ - TE
180
+ - ▁NI
181
+ - YA
182
+ - ▁KA
183
+ - ▁MA
184
+ - ▁TI
185
+ - NE
186
+ - ''''
187
+ - C
188
+ - ▁MO
189
+ - NA
190
+ - KEH
191
+ - ▁POS
192
+ - ▁YA
193
+ - ▁TLE
194
+ - LA
195
+ - ▁YO
196
+ - PO
197
+ - T
198
+ - R
199
+ - U
200
+ - LIA
201
+ - ▁TE
202
+ - W
203
+ - ▁KEH
204
+ - ▁TO
205
+ - TS
206
+ - ▁YEH
207
+ - MEH
208
+ - ▁NEH
209
+ - TOK
210
+ - ▁OMPA
211
+ - YO
212
+ - SI
213
+ - PI
214
+ - ▁NIKA
215
+ - LO
216
+ - KE
217
+ - B
218
+ - D
219
+ - LE
220
+ - PE
221
+ - XA
222
+ - XI
223
+ - ▁KO
224
+ - M
225
+ - SE
226
+ - ME
227
+ - ▁MM
228
+ - ▁PERO
229
+ - ▁OHKÓ
230
+ - ▁NIK
231
+ - ▁SI
232
+ - MPA
233
+ - Í
234
+ - WE
235
+ - ▁SAN
236
+ - ▁TOS
237
+ - ▁TIK
238
+ - TLE
239
+ - ▁NIKI
240
+ - YI
241
+ - ▁PARA
242
+ - ▁PA
243
+ - WEH
244
+ - ▁DE
245
+ - ▁NOCHI
246
+ - TLI
247
+ - P
248
+ - TSO
249
+ - HKEH
250
+ - ▁SA
251
+ - ▁TIKI
252
+ - ▁E
253
+ - RO
254
+ - TSA
255
+ - ▁IHKÓ
256
+ - ▁B
257
+ - ▁KWA
258
+ - PIA
259
+ - SO
260
+ - Á
261
+ - RA
262
+ - V
263
+ - ▁YE
264
+ - ▁YEHWA
265
+ - ▁AHÁ
266
+ - YE
267
+ - Z
268
+ - J
269
+ - Ó
270
+ - G
271
+ - É
272
+ - Y
273
+ - '-'
274
+ - ̈
275
+ - ¡
276
+ - Ú
277
+ - Ñ
278
+ - F
279
+ - Q
280
+ - Ü
281
+ - ¿
282
+ - Ï
283
+ - <sos/eos>
284
+ init: null
285
+ input_size: null
286
+ ctc_conf:
287
+ dropout_rate: 0.0
288
+ ctc_type: builtin
289
+ reduce: true
290
+ ignore_nan_grad: null
291
+ zero_infinity: true
292
+ brctc_risk_strategy: exp
293
+ brctc_group_strategy: end
294
+ brctc_risk_factor: 0.0
295
+ joint_net_conf: null
296
+ use_preprocessor: true
297
+ use_lang_prompt: false
298
+ use_nlp_prompt: false
299
+ token_type: bpe
300
+ bpemodel: data/en_token_list/bpe_unigram150/bpe.model
301
+ non_linguistic_symbols: null
302
+ cleaner: null
303
+ g2p: null
304
+ speech_volume_normalize: null
305
+ rir_scp: null
306
+ rir_apply_prob: 1.0
307
+ noise_scp: null
308
+ noise_apply_prob: 1.0
309
+ noise_db_range: '13_15'
310
+ short_noise_thres: 0.5
311
+ aux_ctc_tasks: []
312
+ frontend: s3prl
313
+ frontend_conf:
314
+ frontend_conf:
315
+ upstream: hubert_large_ll60k
316
+ download_dir: ./hub
317
+ multilayer_feature: true
318
+ fs: 16k
319
+ specaug: specaug
320
+ specaug_conf:
321
+ apply_time_warp: true
322
+ time_warp_window: 5
323
+ time_warp_mode: bicubic
324
+ apply_freq_mask: true
325
+ freq_mask_width_range:
326
+ - 0
327
+ - 27
328
+ num_freq_mask: 2
329
+ apply_time_mask: true
330
+ time_mask_width_ratio_range:
331
+ - 0.0
332
+ - 0.05
333
+ num_time_mask: 10
334
+ normalize: utterance_mvn
335
+ normalize_conf: {}
336
+ model: espnet
337
+ model_conf:
338
+ ctc_weight: 1.0
339
+ preencoder: linear
340
+ preencoder_conf:
341
+ input_size: 1024
342
+ output_size: 80
343
+ encoder: transformer
344
+ encoder_conf:
345
+ output_size: 256
346
+ attention_heads: 8
347
+ linear_units: 1024
348
+ num_blocks: 2
349
+ dropout_rate: 0.1
350
+ positional_dropout_rate: 0.1
351
+ attention_dropout_rate: 0.1
352
+ input_layer: conv2d2
353
+ normalize_before: true
354
+ postencoder: null
355
+ postencoder_conf: {}
356
+ decoder: null
357
+ decoder_conf: {}
358
+ preprocessor: default
359
+ preprocessor_conf: {}
360
+ required:
361
+ - output_dir
362
+ - token_list
363
+ version: '202412'
364
+ distributed: false
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/acc.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/backward_time.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/cer.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/cer_ctc.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/clip.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/forward_time.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/grad_norm.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/iter_time.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/loss.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/loss_att.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/loss_ctc.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/loss_scale.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/optim0_lr0.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/optim_step_time.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/train_time.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/images/wer.png ADDED
exp/asr_train_asr_s3prl_single_lr1e-3_raw_en_bpe150_sp/valid.loss.ave_5best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31d5303f7c423f1e870dd74a6d4b09a13d70e0acf3f7863035226934eabaef3e
3
+ size 1285514315