popcornell commited on
Commit
4205e68
·
1 Parent(s): 2abc64d

Update model

Browse files
Files changed (21) hide show
  1. README.md +779 -0
  2. data/en_token_list/bpe_unigram500/bpe.model +3 -0
  3. data/nlsyms.txt +0 -0
  4. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/RESULTS.md +10 -0
  5. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/config.yaml +701 -0
  6. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/acc.png +0 -0
  7. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/backward_time.png +0 -0
  8. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/cer.png +0 -0
  9. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/cer_ctc.png +0 -0
  10. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/forward_time.png +0 -0
  11. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/gpu_max_cached_mem_GB.png +0 -0
  12. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/iter_time.png +0 -0
  13. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/loss.png +0 -0
  14. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/loss_att.png +0 -0
  15. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/loss_ctc.png +0 -0
  16. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/optim0_lr0.png +0 -0
  17. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/optim_step_time.png +0 -0
  18. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/train_time.png +0 -0
  19. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/wer.png +0 -0
  20. exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/valid.acc.ave_5best.pth +3 -0
  21. meta.yaml +8 -0
README.md CHANGED
@@ -1,3 +1,782 @@
1
  ---
 
 
 
 
 
 
 
2
  license: cc-by-4.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - chime7_task1
9
  license: cc-by-4.0
10
  ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `popcornell/chime7_task1_asr1_baseline`
15
+
16
+ This model was trained by popcornell using chime7_task1 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout 8fee771ea66f53a4b5e66c47159f7548c5efacee
26
+ pip install -e .
27
+ cd egs2/chime7_task1/asr1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model popcornell/chime7_task1_asr1_baseline
29
+ ```
30
+
31
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
32
+ # RESULTS
33
+ ## Environments
34
+ - date: `Wed Feb 8 23:41:28 UTC 2023`
35
+ - python version: `3.9.2 (default, Mar 3 2021, 20:02:32) [GCC 7.3.0]`
36
+ - espnet version: `espnet 202301`
37
+ - pytorch version: `pytorch 1.13.1+cu116`
38
+ - Git hash: ``
39
+ - Commit date: ``
40
+
41
+ ## ASR config
42
+
43
+ <details><summary>expand</summary>
44
+
45
+ ```
46
+ config: conf/tuning/train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k.yaml
47
+ print_config: false
48
+ log_level: INFO
49
+ dry_run: false
50
+ iterator_type: sequence
51
+ output_dir: exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp
52
+ ngpu: 1
53
+ seed: 0
54
+ num_workers: 1
55
+ num_att_plot: 3
56
+ dist_backend: nccl
57
+ dist_init_method: env://
58
+ dist_world_size: 5
59
+ dist_rank: 0
60
+ local_rank: 0
61
+ dist_master_addr: localhost
62
+ dist_master_port: 44341
63
+ dist_launcher: null
64
+ multiprocessing_distributed: true
65
+ unused_parameters: true
66
+ sharded_ddp: false
67
+ cudnn_enabled: true
68
+ cudnn_benchmark: false
69
+ cudnn_deterministic: true
70
+ collect_stats: false
71
+ write_collected_feats: false
72
+ max_epoch: 8
73
+ patience: 4
74
+ val_scheduler_criterion:
75
+ - valid
76
+ - loss
77
+ early_stopping_criterion:
78
+ - valid
79
+ - loss
80
+ - min
81
+ best_model_criterion:
82
+ - - valid
83
+ - acc
84
+ - max
85
+ keep_nbest_models: 5
86
+ nbest_averaging_interval: 0
87
+ grad_clip: 5
88
+ grad_clip_type: 2.0
89
+ grad_noise: false
90
+ accum_grad: 1
91
+ no_forward_run: false
92
+ resume: true
93
+ train_dtype: float32
94
+ use_amp: false
95
+ log_interval: null
96
+ use_matplotlib: true
97
+ use_tensorboard: true
98
+ create_graph_in_tensorboard: false
99
+ use_wandb: false
100
+ wandb_project: null
101
+ wandb_id: null
102
+ wandb_entity: null
103
+ wandb_name: null
104
+ wandb_model_log_interval: -1
105
+ detect_anomaly: false
106
+ pretrain_path: null
107
+ init_param: []
108
+ ignore_init_mismatch: false
109
+ freeze_param:
110
+ - frontend.upstream
111
+ num_iters_per_epoch: null
112
+ batch_size: 640
113
+ valid_batch_size: null
114
+ batch_bins: 1000000
115
+ valid_batch_bins: null
116
+ train_shape_file:
117
+ - exp/asr_stats_raw_en_bpe500_sp/train/speech_shape
118
+ - exp/asr_stats_raw_en_bpe500_sp/train/text_shape.bpe
119
+ valid_shape_file:
120
+ - exp/asr_stats_raw_en_bpe500_sp/valid/speech_shape
121
+ - exp/asr_stats_raw_en_bpe500_sp/valid/text_shape.bpe
122
+ batch_type: folded
123
+ valid_batch_type: null
124
+ fold_length:
125
+ - 80000
126
+ - 150
127
+ sort_in_batch: descending
128
+ sort_batch: descending
129
+ multiple_iterator: false
130
+ chunk_length: 500
131
+ chunk_shift_ratio: 0.5
132
+ num_cache_chunks: 1024
133
+ train_data_path_and_name_and_type:
134
+ - - dump/raw/kaldi/train_all_mdm_ihm_rvb_gss_sp/wav.scp
135
+ - speech
136
+ - sound
137
+ - - dump/raw/kaldi/train_all_mdm_ihm_rvb_gss_sp/text
138
+ - text
139
+ - text
140
+ valid_data_path_and_name_and_type:
141
+ - - dump/raw/kaldi/chime6/dev/gss/wav.scp
142
+ - speech
143
+ - sound
144
+ - - dump/raw/kaldi/chime6/dev/gss/text
145
+ - text
146
+ - text
147
+ allow_variable_data_keys: false
148
+ max_cache_size: 0.0
149
+ max_cache_fd: 32
150
+ valid_max_cache_size: null
151
+ exclude_weight_decay: false
152
+ exclude_weight_decay_conf: {}
153
+ optim: adam
154
+ optim_conf:
155
+ lr: 0.0005
156
+ scheduler: warmuplr
157
+ scheduler_conf:
158
+ warmup_steps: 8000
159
+ token_list:
160
+ - <blank>
161
+ - <unk>
162
+ - s
163
+ - ''''
164
+ - ▁i
165
+ - t
166
+ - ▁it
167
+ - ▁a
168
+ - e
169
+ - ▁you
170
+ - ▁the
171
+ - ▁like
172
+ - ▁yeah
173
+ - a
174
+ - d
175
+ - ▁and
176
+ - m
177
+ - ▁that
178
+ - ▁to
179
+ - n
180
+ - i
181
+ - y
182
+ - ing
183
+ - o
184
+ - u
185
+ - ▁so
186
+ - p
187
+ - ▁of
188
+ - ▁in
189
+ - re
190
+ - ▁was
191
+ - c
192
+ - r
193
+ - ▁just
194
+ - er
195
+ - ▁know
196
+ - ▁oh
197
+ - ed
198
+ - ▁but
199
+ - ▁ummm
200
+ - ▁we
201
+ - l
202
+ - ▁no
203
+ - ▁they
204
+ - ▁have
205
+ - ▁do
206
+ - g
207
+ - ▁he
208
+ - k
209
+ - ll
210
+ - ▁uhhh
211
+ - ▁don
212
+ - ▁for
213
+ - h
214
+ - ▁what
215
+ - ▁be
216
+ - ar
217
+ - ▁is
218
+ - ▁there
219
+ - '-'
220
+ - ▁s
221
+ - ▁this
222
+ - in
223
+ - b
224
+ - ▁
225
+ - en
226
+ - ▁on
227
+ - ▁p
228
+ - ▁can
229
+ - al
230
+ - ▁not
231
+ - w
232
+ - ▁my
233
+ - ▁one
234
+ - ic
235
+ - f
236
+ - ▁or
237
+ - ▁really
238
+ - ▁go
239
+ - ▁right
240
+ - ▁me
241
+ - an
242
+ - ▁w
243
+ - or
244
+ - le
245
+ - ▁f
246
+ - ▁think
247
+ - ▁okay
248
+ - ▁all
249
+ - ▁then
250
+ - ▁with
251
+ - ▁are
252
+ - ▁get
253
+ - it
254
+ - ▁t
255
+ - ▁st
256
+ - ve
257
+ - ▁hmmm
258
+ - ▁g
259
+ - ▁if
260
+ - ce
261
+ - 'on'
262
+ - ▁she
263
+ - ▁good
264
+ - ▁e
265
+ - es
266
+ - ▁well
267
+ - v
268
+ - ▁re
269
+ - th
270
+ - ter
271
+ - ch
272
+ - ▁out
273
+ - ▁up
274
+ - ly
275
+ - ▁b
276
+ - ▁ma
277
+ - il
278
+ - ▁would
279
+ - ▁at
280
+ - ▁want
281
+ - ▁mean
282
+ - ▁ch
283
+ - ▁your
284
+ - ▁people
285
+ - ur
286
+ - ▁how
287
+ - ▁k
288
+ - ▁co
289
+ - ▁about
290
+ - ▁tr
291
+ - ▁ba
292
+ - ▁kind
293
+ - ▁when
294
+ - ▁mi
295
+ - ▁because
296
+ - ro
297
+ - ▁had
298
+ - ▁ho
299
+ - ▁gonna
300
+ - ▁time
301
+ - ▁more
302
+ - ▁got
303
+ - ▁some
304
+ - ▁two
305
+ - ▁did
306
+ - ▁see
307
+ - ▁now
308
+ - ▁pa
309
+ - ra
310
+ - ▁de
311
+ - ▁lot
312
+ - ▁actually
313
+ - ▁o
314
+ - ▁too
315
+ - ate
316
+ - ▁here
317
+ - ▁cuz
318
+ - ▁sp
319
+ - ▁where
320
+ - ▁going
321
+ - ▁j
322
+ - ▁from
323
+ - ▁bo
324
+ - ▁them
325
+ - ▁bu
326
+ - ▁put
327
+ - ▁thing
328
+ - ng
329
+ - ▁were
330
+ - ▁n
331
+ - ▁sh
332
+ - ▁work
333
+ - el
334
+ - ▁something
335
+ - ▁se
336
+ - ▁say
337
+ - ke
338
+ - ow
339
+ - ▁ca
340
+ - ▁fa
341
+ - ▁need
342
+ - sh
343
+ - ▁di
344
+ - ▁po
345
+ - ▁make
346
+ - la
347
+ - ▁br
348
+ - ▁v
349
+ - ▁an
350
+ - ▁who
351
+ - ion
352
+ - ▁y
353
+ - ▁look
354
+ - ▁didn
355
+ - ▁could
356
+ - ▁little
357
+ - ver
358
+ - ▁c
359
+ - ▁mo
360
+ - ▁much
361
+ - ▁very
362
+ - ir
363
+ - ▁sa
364
+ - ▁play
365
+ - ▁pretty
366
+ - ▁been
367
+ - ▁d
368
+ - ▁other
369
+ - ▁year
370
+ - and
371
+ - ▁mm
372
+ - ▁stuff
373
+ - ▁dr
374
+ - ▁why
375
+ - ▁con
376
+ - ▁su
377
+ - ▁back
378
+ - ▁ex
379
+ - ting
380
+ - ▁take
381
+ - ▁li
382
+ - ▁even
383
+ - ▁should
384
+ - ▁her
385
+ - ally
386
+ - lo
387
+ - ation
388
+ - ▁way
389
+ - ▁guess
390
+ - ▁has
391
+ - z
392
+ - ▁three
393
+ - ry
394
+ - ▁ha
395
+ - ies
396
+ - is
397
+ - x
398
+ - ▁ro
399
+ - ▁yes
400
+ - ▁th
401
+ - ▁use
402
+ - ▁down
403
+ - ous
404
+ - ▁over
405
+ - ▁probably
406
+ - ▁guys
407
+ - ▁maybe
408
+ - ▁still
409
+ - ▁cr
410
+ - ▁which
411
+ - ▁nice
412
+ - und
413
+ - ▁sure
414
+ - ▁l
415
+ - ▁off
416
+ - ▁la
417
+ - ▁cu
418
+ - est
419
+ - ▁any
420
+ - ▁fi
421
+ - ▁these
422
+ - ▁ra
423
+ - ▁went
424
+ - ▁things
425
+ - ment
426
+ - ▁doing
427
+ - ▁day
428
+ - ▁un
429
+ - ▁lo
430
+ - ▁da
431
+ - ▁only
432
+ - igh
433
+ - ▁come
434
+ - ▁big
435
+ - ▁those
436
+ - ▁wanna
437
+ - ▁bit
438
+ - ▁never
439
+ - ▁us
440
+ - ol
441
+ - ▁though
442
+ - ▁first
443
+ - ive
444
+ - ▁their
445
+ - ▁let
446
+ - ▁start
447
+ - ▁his
448
+ - ▁four
449
+ - ▁le
450
+ - ▁eat
451
+ - ist
452
+ - ▁school
453
+ - us
454
+ - ▁into
455
+ - ▁yep
456
+ - uck
457
+ - ▁than
458
+ - ▁him
459
+ - ▁hi
460
+ - ▁also
461
+ - ▁five
462
+ - side
463
+ - ▁new
464
+ - ▁comp
465
+ - ▁cool
466
+ - ▁talk
467
+ - ▁said
468
+ - ▁pro
469
+ - ▁r
470
+ - ▁always
471
+ - ▁ri
472
+ - ▁cl
473
+ - ▁long
474
+ - able
475
+ - ▁sc
476
+ - ▁gra
477
+ - ▁by
478
+ - ▁friend
479
+ - age
480
+ - ▁different
481
+ - ▁live
482
+ - ▁doesn
483
+ - ▁place
484
+ - ▁sorry
485
+ - ▁will
486
+ - ▁feel
487
+ - ▁does
488
+ - ▁part
489
+ - ▁wait
490
+ - ▁six
491
+ - ▁watch
492
+ - ▁anything
493
+ - ▁man
494
+ - ▁our
495
+ - ▁car
496
+ - ▁huh
497
+ - ▁whatever
498
+ - ▁last
499
+ - ▁give
500
+ - ▁ten
501
+ - ▁before
502
+ - ▁thought
503
+ - ▁after
504
+ - ▁game
505
+ - ▁card
506
+ - ▁fl
507
+ - ▁every
508
+ - cause
509
+ - ▁same
510
+ - ▁around
511
+ - ▁cook
512
+ - ▁week
513
+ - ▁hu
514
+ - ▁everything
515
+ - ▁fine
516
+ - ▁many
517
+ - ▁qu
518
+ - ▁read
519
+ - ▁tea
520
+ - ough
521
+ - ance
522
+ - ▁turn
523
+ - ▁wow
524
+ - ▁fun
525
+ - ▁hard
526
+ - ▁great
527
+ - ▁love
528
+ - ▁remember
529
+ - ▁twenty
530
+ - ▁whole
531
+ - ▁happen
532
+ - ▁seven
533
+ - ▁keep
534
+ - ▁food
535
+ - ▁most
536
+ - j
537
+ - ▁might
538
+ - ▁thank
539
+ - ▁move
540
+ - ▁job
541
+ - ▁eight
542
+ - ▁mu
543
+ - ▁sort
544
+ - ▁better
545
+ - port
546
+ - ▁another
547
+ - ful
548
+ - ▁point
549
+ - ▁show
550
+ - ▁again
551
+ - ▁high
552
+ - ize
553
+ - ▁house
554
+ - ▁home
555
+ - ▁person
556
+ - ▁old
557
+ - ▁end
558
+ - ▁through
559
+ - ▁pick
560
+ - ▁else
561
+ - ▁guy
562
+ - ▁app
563
+ - ▁find
564
+ - ▁nine
565
+ - ▁hand
566
+ - ▁kid
567
+ - ▁interesting
568
+ - ▁city
569
+ - ▁called
570
+ - ▁tell
571
+ - ▁half
572
+ - ▁name
573
+ - ▁definitely
574
+ - ▁made
575
+ - ▁exactly
576
+ - ▁came
577
+ - ▁wood
578
+ - ▁funny
579
+ - ▁basically
580
+ - ▁count
581
+ - ▁usually
582
+ - ▁help
583
+ - ▁someone
584
+ - ▁already
585
+ - ▁dunno
586
+ - ▁enough
587
+ - ction
588
+ - ▁own
589
+ - ▁weird
590
+ - ▁next
591
+ - ▁hundred
592
+ - ▁small
593
+ - ▁money
594
+ - ▁couple
595
+ - ▁while
596
+ - ▁close
597
+ - ▁movie
598
+ - ▁sometimes
599
+ - ▁everyone
600
+ - ▁away
601
+ - ▁true
602
+ - ▁super
603
+ - ▁cheese
604
+ - ▁class
605
+ - ▁night
606
+ - ▁life
607
+ - ▁leave
608
+ - ▁plan
609
+ - ▁water
610
+ - ▁left
611
+ - ▁thirty
612
+ - ▁family
613
+ - ▁phone
614
+ - ▁build
615
+ - ▁room
616
+ - ▁month
617
+ - ▁open
618
+ - ▁idea
619
+ - ▁second
620
+ - ▁dude
621
+ - ▁music
622
+ - ▁each
623
+ - ▁learn
624
+ - ▁girl
625
+ - ▁together
626
+ - ▁under
627
+ - ▁run
628
+ - ▁chicken
629
+ - ▁having
630
+ - ▁either
631
+ - ▁almost
632
+ - ▁crazy
633
+ - ▁book
634
+ - ▁sauce
635
+ - ▁supposed
636
+ - ▁course
637
+ - ▁speak
638
+ - ▁awesome
639
+ - ▁anyway
640
+ - ▁throw
641
+ - ▁finish
642
+ - ▁world
643
+ - ▁reason
644
+ - ▁check
645
+ - ▁least
646
+ - ▁parents
647
+ - ▁everybody
648
+ - ▁change
649
+ - '&'
650
+ - ä
651
+ - '#'
652
+ - ñ
653
+ - â
654
+ - é
655
+ - ü
656
+ - ']'
657
+ - q
658
+ - î
659
+ - <sos/eos>
660
+ init: xavier_uniform
661
+ input_size: null
662
+ ctc_conf:
663
+ dropout_rate: 0.0
664
+ ctc_type: builtin
665
+ reduce: true
666
+ ignore_nan_grad: null
667
+ zero_infinity: true
668
+ joint_net_conf: null
669
+ use_preprocessor: true
670
+ token_type: bpe
671
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
672
+ non_linguistic_symbols: data/nlsyms.txt
673
+ cleaner: null
674
+ g2p: null
675
+ speech_volume_normalize: null
676
+ rir_scp: null
677
+ rir_apply_prob: 1.0
678
+ noise_scp: null
679
+ noise_apply_prob: 1.0
680
+ noise_db_range: '13_15'
681
+ short_noise_thres: 0.5
682
+ aux_ctc_tasks: []
683
+ frontend: s3prl
684
+ frontend_conf:
685
+ frontend_conf:
686
+ upstream: wavlm_large
687
+ download_dir: ./hub
688
+ multilayer_feature: true
689
+ fs: 16k
690
+ specaug: specaug
691
+ specaug_conf:
692
+ apply_time_warp: false
693
+ time_warp_window: 5
694
+ time_warp_mode: bicubic
695
+ apply_freq_mask: false
696
+ freq_mask_width_range:
697
+ - 0
698
+ - 150
699
+ num_freq_mask: 4
700
+ apply_time_mask: true
701
+ time_mask_width_ratio_range:
702
+ - 0.0
703
+ - 0.15
704
+ num_time_mask: 3
705
+ normalize: utterance_mvn
706
+ normalize_conf: {}
707
+ model: espnet
708
+ model_conf:
709
+ ctc_weight: 0.3
710
+ lsm_weight: 0.1
711
+ length_normalized_loss: false
712
+ extract_feats_in_collect_stats: false
713
+ preencoder: linear
714
+ preencoder_conf:
715
+ input_size: 1024
716
+ output_size: 128
717
+ dropout: 0.2
718
+ encoder: transformer
719
+ encoder_conf:
720
+ output_size: 256
721
+ attention_heads: 4
722
+ linear_units: 2048
723
+ num_blocks: 12
724
+ dropout_rate: 0.1
725
+ attention_dropout_rate: 0.0
726
+ input_layer: conv2d2
727
+ normalize_before: true
728
+ postencoder: null
729
+ postencoder_conf: {}
730
+ decoder: transformer
731
+ decoder_conf:
732
+ input_layer: embed
733
+ attention_heads: 4
734
+ linear_units: 2048
735
+ num_blocks: 6
736
+ dropout_rate: 0.1
737
+ positional_dropout_rate: 0.0
738
+ self_attention_dropout_rate: 0.0
739
+ src_attention_dropout_rate: 0.0
740
+ preprocessor: default
741
+ preprocessor_conf: {}
742
+ required:
743
+ - output_dir
744
+ - token_list
745
+ version: '202301'
746
+ distributed: true
747
+ ```
748
+
749
+ </details>
750
+
751
+
752
+
753
+ ### Citing ESPnet
754
+
755
+ ```BibTex
756
+ @inproceedings{watanabe2018espnet,
757
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
758
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
759
+ year={2018},
760
+ booktitle={Proceedings of Interspeech},
761
+ pages={2207--2211},
762
+ doi={10.21437/Interspeech.2018-1456},
763
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
764
+ }
765
+
766
+
767
+
768
+
769
+ ```
770
+
771
+ or arXiv:
772
+
773
+ ```bibtex
774
+ @misc{watanabe2018espnet,
775
+ title={ESPnet: End-to-End Speech Processing Toolkit},
776
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
777
+ year={2018},
778
+ eprint={1804.00015},
779
+ archivePrefix={arXiv},
780
+ primaryClass={cs.CL}
781
+ }
782
+ ```
data/en_token_list/bpe_unigram500/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d76f699b72690d3ec4dea6613e405554e9d544fa343edd22a79a4d04eeba1e90
3
+ size 245153
data/nlsyms.txt ADDED
File without changes
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/RESULTS.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Wed Feb 8 23:41:28 UTC 2023`
5
+ - python version: `3.9.2 (default, Mar 3 2021, 20:02:32) [GCC 7.3.0]`
6
+ - espnet version: `espnet 202301`
7
+ - pytorch version: `pytorch 1.13.1+cu116`
8
+ - Git hash: ``
9
+ - Commit date: ``
10
+
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/config.yaml ADDED
@@ -0,0 +1,701 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 5
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 44341
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: true
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 8
28
+ patience: 4
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ keep_nbest_models: 5
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ create_graph_in_tensorboard: false
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param:
65
+ - frontend.upstream
66
+ num_iters_per_epoch: null
67
+ batch_size: 640
68
+ valid_batch_size: null
69
+ batch_bins: 1000000
70
+ valid_batch_bins: null
71
+ train_shape_file:
72
+ - exp/asr_stats_raw_en_bpe500_sp/train/speech_shape
73
+ - exp/asr_stats_raw_en_bpe500_sp/train/text_shape.bpe
74
+ valid_shape_file:
75
+ - exp/asr_stats_raw_en_bpe500_sp/valid/speech_shape
76
+ - exp/asr_stats_raw_en_bpe500_sp/valid/text_shape.bpe
77
+ batch_type: folded
78
+ valid_batch_type: null
79
+ fold_length:
80
+ - 80000
81
+ - 150
82
+ sort_in_batch: descending
83
+ sort_batch: descending
84
+ multiple_iterator: false
85
+ chunk_length: 500
86
+ chunk_shift_ratio: 0.5
87
+ num_cache_chunks: 1024
88
+ train_data_path_and_name_and_type:
89
+ - - dump/raw/kaldi/train_all_mdm_ihm_rvb_gss_sp/wav.scp
90
+ - speech
91
+ - sound
92
+ - - dump/raw/kaldi/train_all_mdm_ihm_rvb_gss_sp/text
93
+ - text
94
+ - text
95
+ valid_data_path_and_name_and_type:
96
+ - - dump/raw/kaldi/chime6/dev/gss/wav.scp
97
+ - speech
98
+ - sound
99
+ - - dump/raw/kaldi/chime6/dev/gss/text
100
+ - text
101
+ - text
102
+ allow_variable_data_keys: false
103
+ max_cache_size: 0.0
104
+ max_cache_fd: 32
105
+ valid_max_cache_size: null
106
+ exclude_weight_decay: false
107
+ exclude_weight_decay_conf: {}
108
+ optim: adam
109
+ optim_conf:
110
+ lr: 0.0005
111
+ scheduler: warmuplr
112
+ scheduler_conf:
113
+ warmup_steps: 8000
114
+ token_list:
115
+ - <blank>
116
+ - <unk>
117
+ - s
118
+ - ''''
119
+ - ▁i
120
+ - t
121
+ - ▁it
122
+ - ▁a
123
+ - e
124
+ - ▁you
125
+ - ▁the
126
+ - ▁like
127
+ - ▁yeah
128
+ - a
129
+ - d
130
+ - ▁and
131
+ - m
132
+ - ▁that
133
+ - ▁to
134
+ - n
135
+ - i
136
+ - y
137
+ - ing
138
+ - o
139
+ - u
140
+ - ▁so
141
+ - p
142
+ - ▁of
143
+ - ▁in
144
+ - re
145
+ - ▁was
146
+ - c
147
+ - r
148
+ - ▁just
149
+ - er
150
+ - ▁know
151
+ - ▁oh
152
+ - ed
153
+ - ▁but
154
+ - ▁ummm
155
+ - ▁we
156
+ - l
157
+ - ▁no
158
+ - ▁they
159
+ - ▁have
160
+ - ▁do
161
+ - g
162
+ - ▁he
163
+ - k
164
+ - ll
165
+ - ▁uhhh
166
+ - ▁don
167
+ - ▁for
168
+ - h
169
+ - ▁what
170
+ - ▁be
171
+ - ar
172
+ - ▁is
173
+ - ▁there
174
+ - '-'
175
+ - ▁s
176
+ - ▁this
177
+ - in
178
+ - b
179
+ - ▁
180
+ - en
181
+ - ▁on
182
+ - ▁p
183
+ - ▁can
184
+ - al
185
+ - ▁not
186
+ - w
187
+ - ▁my
188
+ - ▁one
189
+ - ic
190
+ - f
191
+ - ▁or
192
+ - ▁really
193
+ - ▁go
194
+ - ▁right
195
+ - ▁me
196
+ - an
197
+ - ▁w
198
+ - or
199
+ - le
200
+ - ▁f
201
+ - ▁think
202
+ - ▁okay
203
+ - ▁all
204
+ - ▁then
205
+ - ▁with
206
+ - ▁are
207
+ - ▁get
208
+ - it
209
+ - ▁t
210
+ - ▁st
211
+ - ve
212
+ - ▁hmmm
213
+ - ▁g
214
+ - ▁if
215
+ - ce
216
+ - 'on'
217
+ - ▁she
218
+ - ▁good
219
+ - ▁e
220
+ - es
221
+ - ▁well
222
+ - v
223
+ - ▁re
224
+ - th
225
+ - ter
226
+ - ch
227
+ - ▁out
228
+ - ▁up
229
+ - ly
230
+ - ▁b
231
+ - ▁ma
232
+ - il
233
+ - ▁would
234
+ - ▁at
235
+ - ▁want
236
+ - ▁mean
237
+ - ▁ch
238
+ - ▁your
239
+ - ▁people
240
+ - ur
241
+ - ▁how
242
+ - ▁k
243
+ - ▁co
244
+ - ▁about
245
+ - ▁tr
246
+ - ▁ba
247
+ - ▁kind
248
+ - ▁when
249
+ - ▁mi
250
+ - ▁because
251
+ - ro
252
+ - ▁had
253
+ - ▁ho
254
+ - ▁gonna
255
+ - ▁time
256
+ - ▁more
257
+ - ▁got
258
+ - ▁some
259
+ - ▁two
260
+ - ▁did
261
+ - ▁see
262
+ - ▁now
263
+ - ▁pa
264
+ - ra
265
+ - ▁de
266
+ - ▁lot
267
+ - ▁actually
268
+ - ▁o
269
+ - ▁too
270
+ - ate
271
+ - ▁here
272
+ - ▁cuz
273
+ - ▁sp
274
+ - ▁where
275
+ - ▁going
276
+ - ▁j
277
+ - ▁from
278
+ - ▁bo
279
+ - ▁them
280
+ - ▁bu
281
+ - ▁put
282
+ - ▁thing
283
+ - ng
284
+ - ▁were
285
+ - ▁n
286
+ - ▁sh
287
+ - ▁work
288
+ - el
289
+ - ▁something
290
+ - ▁se
291
+ - ▁say
292
+ - ke
293
+ - ow
294
+ - ▁ca
295
+ - ▁fa
296
+ - ▁need
297
+ - sh
298
+ - ▁di
299
+ - ▁po
300
+ - ▁make
301
+ - la
302
+ - ▁br
303
+ - ▁v
304
+ - ▁an
305
+ - ▁who
306
+ - ion
307
+ - ▁y
308
+ - ▁look
309
+ - ▁didn
310
+ - ▁could
311
+ - ▁little
312
+ - ver
313
+ - ▁c
314
+ - ▁mo
315
+ - ▁much
316
+ - ▁very
317
+ - ir
318
+ - ▁sa
319
+ - ▁play
320
+ - ▁pretty
321
+ - ▁been
322
+ - ▁d
323
+ - ▁other
324
+ - ▁year
325
+ - and
326
+ - ▁mm
327
+ - ▁stuff
328
+ - ▁dr
329
+ - ▁why
330
+ - ▁con
331
+ - ▁su
332
+ - ▁back
333
+ - ▁ex
334
+ - ting
335
+ - ▁take
336
+ - ▁li
337
+ - ▁even
338
+ - ▁should
339
+ - ▁her
340
+ - ally
341
+ - lo
342
+ - ation
343
+ - ▁way
344
+ - ▁guess
345
+ - ▁has
346
+ - z
347
+ - ▁three
348
+ - ry
349
+ - ▁ha
350
+ - ies
351
+ - is
352
+ - x
353
+ - ▁ro
354
+ - ▁yes
355
+ - ▁th
356
+ - ▁use
357
+ - ▁down
358
+ - ous
359
+ - ▁over
360
+ - ▁probably
361
+ - ▁guys
362
+ - ▁maybe
363
+ - ▁still
364
+ - ▁cr
365
+ - ▁which
366
+ - ▁nice
367
+ - und
368
+ - ▁sure
369
+ - ▁l
370
+ - ▁off
371
+ - ▁la
372
+ - ▁cu
373
+ - est
374
+ - ▁any
375
+ - ▁fi
376
+ - ▁these
377
+ - ▁ra
378
+ - ▁went
379
+ - ▁things
380
+ - ment
381
+ - ▁doing
382
+ - ▁day
383
+ - ▁un
384
+ - ▁lo
385
+ - ▁da
386
+ - ▁only
387
+ - igh
388
+ - ▁come
389
+ - ▁big
390
+ - ▁those
391
+ - ▁wanna
392
+ - ▁bit
393
+ - ▁never
394
+ - ▁us
395
+ - ol
396
+ - ▁though
397
+ - ▁first
398
+ - ive
399
+ - ▁their
400
+ - ▁let
401
+ - ▁start
402
+ - ▁his
403
+ - ▁four
404
+ - ▁le
405
+ - ▁eat
406
+ - ist
407
+ - ▁school
408
+ - us
409
+ - ▁into
410
+ - ▁yep
411
+ - uck
412
+ - ▁than
413
+ - ▁him
414
+ - ▁hi
415
+ - ▁also
416
+ - ▁five
417
+ - side
418
+ - ▁new
419
+ - ▁comp
420
+ - ▁cool
421
+ - ▁talk
422
+ - ▁said
423
+ - ▁pro
424
+ - ▁r
425
+ - ▁always
426
+ - ▁ri
427
+ - ▁cl
428
+ - ▁long
429
+ - able
430
+ - ▁sc
431
+ - ▁gra
432
+ - ▁by
433
+ - ▁friend
434
+ - age
435
+ - ▁different
436
+ - ▁live
437
+ - ▁doesn
438
+ - ▁place
439
+ - ▁sorry
440
+ - ▁will
441
+ - ▁feel
442
+ - ▁does
443
+ - ▁part
444
+ - ▁wait
445
+ - ▁six
446
+ - ▁watch
447
+ - ▁anything
448
+ - ▁man
449
+ - ▁our
450
+ - ▁car
451
+ - ▁huh
452
+ - ▁whatever
453
+ - ▁last
454
+ - ▁give
455
+ - ▁ten
456
+ - ▁before
457
+ - ▁thought
458
+ - ▁after
459
+ - ▁game
460
+ - ▁card
461
+ - ▁fl
462
+ - ▁every
463
+ - cause
464
+ - ▁same
465
+ - ▁around
466
+ - ▁cook
467
+ - ▁week
468
+ - ▁hu
469
+ - ▁everything
470
+ - ▁fine
471
+ - ▁many
472
+ - ▁qu
473
+ - ▁read
474
+ - ▁tea
475
+ - ough
476
+ - ance
477
+ - ▁turn
478
+ - ▁wow
479
+ - ▁fun
480
+ - ▁hard
481
+ - ▁great
482
+ - ▁love
483
+ - ▁remember
484
+ - ▁twenty
485
+ - ▁whole
486
+ - ▁happen
487
+ - ▁seven
488
+ - ▁keep
489
+ - ▁food
490
+ - ▁most
491
+ - j
492
+ - ▁might
493
+ - ▁thank
494
+ - ▁move
495
+ - ▁job
496
+ - ▁eight
497
+ - ▁mu
498
+ - ▁sort
499
+ - ▁better
500
+ - port
501
+ - ▁another
502
+ - ful
503
+ - ▁point
504
+ - ▁show
505
+ - ▁again
506
+ - ▁high
507
+ - ize
508
+ - ▁house
509
+ - ▁home
510
+ - ▁person
511
+ - ▁old
512
+ - ▁end
513
+ - ▁through
514
+ - ▁pick
515
+ - ▁else
516
+ - ▁guy
517
+ - ▁app
518
+ - ▁find
519
+ - ▁nine
520
+ - ▁hand
521
+ - ▁kid
522
+ - ▁interesting
523
+ - ▁city
524
+ - ▁called
525
+ - ▁tell
526
+ - ▁half
527
+ - ▁name
528
+ - ▁definitely
529
+ - ▁made
530
+ - ▁exactly
531
+ - ▁came
532
+ - ▁wood
533
+ - ▁funny
534
+ - ▁basically
535
+ - ▁count
536
+ - ▁usually
537
+ - ▁help
538
+ - ▁someone
539
+ - ▁already
540
+ - ▁dunno
541
+ - ▁enough
542
+ - ction
543
+ - ▁own
544
+ - ▁weird
545
+ - ▁next
546
+ - ▁hundred
547
+ - ▁small
548
+ - ▁money
549
+ - ▁couple
550
+ - ▁while
551
+ - ▁close
552
+ - ▁movie
553
+ - ▁sometimes
554
+ - ▁everyone
555
+ - ▁away
556
+ - ▁true
557
+ - ▁super
558
+ - ▁cheese
559
+ - ▁class
560
+ - ▁night
561
+ - ▁life
562
+ - ▁leave
563
+ - ▁plan
564
+ - ▁water
565
+ - ▁left
566
+ - ▁thirty
567
+ - ▁family
568
+ - ▁phone
569
+ - ▁build
570
+ - ▁room
571
+ - ▁month
572
+ - ▁open
573
+ - ▁idea
574
+ - ▁second
575
+ - ▁dude
576
+ - ▁music
577
+ - ▁each
578
+ - ▁learn
579
+ - ▁girl
580
+ - ▁together
581
+ - ▁under
582
+ - ▁run
583
+ - ▁chicken
584
+ - ▁having
585
+ - ▁either
586
+ - ▁almost
587
+ - ▁crazy
588
+ - ▁book
589
+ - ▁sauce
590
+ - ▁supposed
591
+ - ▁course
592
+ - ▁speak
593
+ - ▁awesome
594
+ - ▁anyway
595
+ - ▁throw
596
+ - ▁finish
597
+ - ▁world
598
+ - ▁reason
599
+ - ▁check
600
+ - ▁least
601
+ - ▁parents
602
+ - ▁everybody
603
+ - ▁change
604
+ - '&'
605
+ - ä
606
+ - '#'
607
+ - ñ
608
+ - â
609
+ - é
610
+ - ü
611
+ - ']'
612
+ - q
613
+ - î
614
+ - <sos/eos>
615
+ init: xavier_uniform
616
+ input_size: null
617
+ ctc_conf:
618
+ dropout_rate: 0.0
619
+ ctc_type: builtin
620
+ reduce: true
621
+ ignore_nan_grad: null
622
+ zero_infinity: true
623
+ joint_net_conf: null
624
+ use_preprocessor: true
625
+ token_type: bpe
626
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
627
+ non_linguistic_symbols: data/nlsyms.txt
628
+ cleaner: null
629
+ g2p: null
630
+ speech_volume_normalize: null
631
+ rir_scp: null
632
+ rir_apply_prob: 1.0
633
+ noise_scp: null
634
+ noise_apply_prob: 1.0
635
+ noise_db_range: '13_15'
636
+ short_noise_thres: 0.5
637
+ aux_ctc_tasks: []
638
+ frontend: s3prl
639
+ frontend_conf:
640
+ frontend_conf:
641
+ upstream: wavlm_large
642
+ download_dir: ./hub
643
+ multilayer_feature: true
644
+ fs: 16k
645
+ specaug: specaug
646
+ specaug_conf:
647
+ apply_time_warp: false
648
+ time_warp_window: 5
649
+ time_warp_mode: bicubic
650
+ apply_freq_mask: false
651
+ freq_mask_width_range:
652
+ - 0
653
+ - 150
654
+ num_freq_mask: 4
655
+ apply_time_mask: true
656
+ time_mask_width_ratio_range:
657
+ - 0.0
658
+ - 0.15
659
+ num_time_mask: 3
660
+ normalize: utterance_mvn
661
+ normalize_conf: {}
662
+ model: espnet
663
+ model_conf:
664
+ ctc_weight: 0.3
665
+ lsm_weight: 0.1
666
+ length_normalized_loss: false
667
+ extract_feats_in_collect_stats: false
668
+ preencoder: linear
669
+ preencoder_conf:
670
+ input_size: 1024
671
+ output_size: 128
672
+ dropout: 0.2
673
+ encoder: transformer
674
+ encoder_conf:
675
+ output_size: 256
676
+ attention_heads: 4
677
+ linear_units: 2048
678
+ num_blocks: 12
679
+ dropout_rate: 0.1
680
+ attention_dropout_rate: 0.0
681
+ input_layer: conv2d2
682
+ normalize_before: true
683
+ postencoder: null
684
+ postencoder_conf: {}
685
+ decoder: transformer
686
+ decoder_conf:
687
+ input_layer: embed
688
+ attention_heads: 4
689
+ linear_units: 2048
690
+ num_blocks: 6
691
+ dropout_rate: 0.1
692
+ positional_dropout_rate: 0.0
693
+ self_attention_dropout_rate: 0.0
694
+ src_attention_dropout_rate: 0.0
695
+ preprocessor: default
696
+ preprocessor_conf: {}
697
+ required:
698
+ - output_dir
699
+ - token_list
700
+ version: '202301'
701
+ distributed: true
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/acc.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/backward_time.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/cer.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/cer_ctc.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/forward_time.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/iter_time.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/loss.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/loss_att.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/loss_ctc.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/optim0_lr0.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/optim_step_time.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/train_time.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/images/wer.png ADDED
exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/valid.acc.ave_5best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef025dd9cc97fe7bbce1c7f13e13ce1446c4ebc6103cfcc3add3381b87b5b913
3
+ size 1383608373
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202301'
2
+ files:
3
+ asr_model_file: exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/valid.acc.ave_5best.pth
4
+ python: "3.9.2 (default, Mar 3 2021, 20:02:32) \n[GCC 7.3.0]"
5
+ timestamp: 1675899805.515879
6
+ torch: 1.13.1+cu116
7
+ yaml_files:
8
+ asr_train_config: exp/asr_train_asr_transformer_wavlm_lr1e-4_specaugm_accum1_preenc128_warmup20k_raw_en_bpe500_batch_size640_scheduler_confwarmup_steps8000_max_epoch8_optim_conflr0.000500000000_sp/config.yaml