longdnk commited on
Commit
f53ff1c
·
verified ·
1 Parent(s): c9d4e41

Upload 10 files

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fde7d702f16adb04894390615fe7257ea7e432cc8c618b145e21e24d96027064
3
  size 377851056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82a05a7df138ff182263e2e6f0ba1f5e82f9b1a190e63085de22c42fd8931237
3
  size 377851056
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:440455cfc382222c8f82d640f542694776d5509f7f69dc1794bb575bc34757b6
3
  size 722217338
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f05c926aab3b5559ee01ae30be37a775d29c2720683d1d6df7d301632c7e60a
3
  size 722217338
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd5bb1d1b8e8892cbeae3b90dba5cfe5683bab00ce5b9e82d9cf9347195d22c8
3
  size 377899102
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1a522037a4b89dbbfead54522650100d82562cbd01c086bc5e3b1c8b42ddc55
3
  size 377899102
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02d1956f4d55df5967abc0a1cd9710622902be7319dcd00020acbd5d7eca165b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1e731ad63e753d284193b80f173a2ff1be45026693bc2996a76b495f4e45667
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a30cfca4f11f8b61f9e23428a9d301331ae17ea3f0a4bfb786fddd335aaca9a2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b9e3258117db71c9cd1d1b5432f9dc26b0468be84ff12e8519902ace3644fea
3
  size 1064
trainer_state.json CHANGED
@@ -1,578 +1,198 @@
1
  {
2
- "best_metric": 0.0727572962641716,
3
- "best_model_checkpoint": "./wav2vec2-base-demo/checkpoint-10500",
4
- "epoch": 30.0,
5
  "eval_steps": 500,
6
- "global_step": 14430,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0395010395010396,
13
- "grad_norm": 2.8997974395751953,
14
  "learning_rate": 5e-05,
15
- "loss": 0.3549,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0395010395010396,
20
- "eval_cer": 0.04969298673090043,
21
- "eval_loss": 0.1506405621767044,
22
- "eval_mer": 0.1328236435214241,
23
- "eval_runtime": 33.659,
24
- "eval_samples_per_second": 45.664,
25
- "eval_steps_per_second": 5.734,
26
- "eval_wer": 0.13357048597165644,
27
- "eval_wil": 0.23588589232550194,
28
- "eval_wip": 0.764114107674498,
29
  "step": 500
30
  },
31
  {
32
  "epoch": 2.079002079002079,
33
- "grad_norm": 3.4246842861175537,
34
- "learning_rate": 4.820890165111271e-05,
35
- "loss": 0.3256,
36
  "step": 1000
37
  },
38
  {
39
  "epoch": 2.079002079002079,
40
- "eval_cer": 0.04905250553459295,
41
- "eval_loss": 0.14544856548309326,
42
- "eval_mer": 0.13181039406053682,
43
- "eval_runtime": 33.3911,
44
- "eval_samples_per_second": 46.03,
45
- "eval_steps_per_second": 5.78,
46
- "eval_wer": 0.13242297320557692,
47
- "eval_wil": 0.23395019111086257,
48
- "eval_wip": 0.7660498088891374,
49
  "step": 1000
50
  },
51
  {
52
  "epoch": 3.1185031185031185,
53
- "grad_norm": 1.9387176036834717,
54
- "learning_rate": 4.641780330222542e-05,
55
- "loss": 0.3255,
56
  "step": 1500
57
  },
58
  {
59
  "epoch": 3.1185031185031185,
60
- "eval_cer": 0.04656019827070077,
61
- "eval_loss": 0.13236825168132782,
62
- "eval_mer": 0.12753556939603453,
63
- "eval_runtime": 33.3664,
64
- "eval_samples_per_second": 46.064,
65
- "eval_steps_per_second": 5.784,
66
- "eval_wer": 0.12806242469447474,
67
- "eval_wil": 0.22709355878713633,
68
- "eval_wip": 0.7729064412128637,
69
  "step": 1500
70
  },
71
  {
72
  "epoch": 4.158004158004158,
73
- "grad_norm": 1.5572136640548706,
74
- "learning_rate": 4.4626704953338125e-05,
75
- "loss": 0.2866,
76
  "step": 2000
77
  },
78
  {
79
  "epoch": 4.158004158004158,
80
- "eval_cer": 0.04639311621949012,
81
- "eval_loss": 0.12307066470384598,
82
- "eval_mer": 0.12514292247884748,
83
- "eval_runtime": 33.3438,
84
- "eval_samples_per_second": 46.096,
85
- "eval_steps_per_second": 5.788,
86
- "eval_wer": 0.12559527224740374,
87
- "eval_wil": 0.22333348147081955,
88
- "eval_wip": 0.7766665185291804,
89
  "step": 2000
90
  },
91
  {
92
  "epoch": 5.197505197505198,
93
- "grad_norm": 2.1455116271972656,
94
- "learning_rate": 4.283201722900216e-05,
95
- "loss": 0.2922,
96
  "step": 2500
97
  },
98
  {
99
  "epoch": 5.197505197505198,
100
- "eval_cer": 0.04445774912630011,
101
- "eval_loss": 0.1228480413556099,
102
- "eval_mer": 0.12246998284734134,
103
- "eval_runtime": 33.4324,
104
- "eval_samples_per_second": 45.973,
105
- "eval_steps_per_second": 5.773,
106
- "eval_wer": 0.12289861724711687,
107
- "eval_wil": 0.2199313920357091,
108
- "eval_wip": 0.7800686079642909,
109
  "step": 2500
110
  },
111
  {
112
  "epoch": 6.237006237006237,
113
- "grad_norm": 1.340448260307312,
114
- "learning_rate": 4.104091888011487e-05,
115
- "loss": 0.2675,
116
  "step": 3000
117
  },
118
  {
119
  "epoch": 6.237006237006237,
120
- "eval_cer": 0.04433243758789212,
121
- "eval_loss": 0.1106928363442421,
122
- "eval_mer": 0.12154127601189114,
123
- "eval_runtime": 33.3844,
124
- "eval_samples_per_second": 46.039,
125
- "eval_steps_per_second": 5.781,
126
- "eval_wer": 0.12198060703425326,
127
- "eval_wil": 0.21841637697011862,
128
- "eval_wip": 0.7815836230298814,
129
  "step": 3000
130
  },
131
  {
132
  "epoch": 7.276507276507276,
133
- "grad_norm": 2.0157642364501953,
134
- "learning_rate": 3.924982053122757e-05,
135
- "loss": 0.2506,
136
  "step": 3500
137
  },
138
  {
139
  "epoch": 7.276507276507276,
140
- "eval_cer": 0.04295401066540427,
141
- "eval_loss": 0.1074899286031723,
142
- "eval_mer": 0.1207804096578556,
143
- "eval_runtime": 33.3924,
144
- "eval_samples_per_second": 46.028,
145
- "eval_steps_per_second": 5.78,
146
- "eval_wer": 0.12111997245969361,
147
- "eval_wil": 0.2170950962978776,
148
- "eval_wip": 0.7829049037021224,
149
  "step": 3500
150
  },
151
  {
152
  "epoch": 8.316008316008316,
153
- "grad_norm": 1.8766114711761475,
154
- "learning_rate": 3.7458722182340276e-05,
155
- "loss": 0.2537,
156
  "step": 4000
157
  },
158
  {
159
  "epoch": 8.316008316008316,
160
- "eval_cer": 0.04140850169170577,
161
- "eval_loss": 0.10360275208950043,
162
- "eval_mer": 0.11801029159519726,
163
- "eval_runtime": 33.396,
164
- "eval_samples_per_second": 46.023,
165
- "eval_steps_per_second": 5.779,
166
- "eval_wer": 0.11842331745940674,
167
- "eval_wil": 0.21248246079158462,
168
- "eval_wip": 0.7875175392084154,
169
  "step": 4000
170
  },
171
  {
172
  "epoch": 9.355509355509355,
173
- "grad_norm": 1.7823630571365356,
174
- "learning_rate": 3.566403445800431e-05,
175
- "loss": 0.2473,
176
  "step": 4500
177
  },
178
  {
179
  "epoch": 9.355509355509355,
180
- "eval_cer": 0.04121357263196001,
181
- "eval_loss": 0.10312958061695099,
182
- "eval_mer": 0.11691058769723302,
183
- "eval_runtime": 33.7868,
184
- "eval_samples_per_second": 45.491,
185
- "eval_steps_per_second": 5.712,
186
- "eval_wer": 0.11733318033163119,
187
- "eval_wil": 0.21024574000779772,
188
- "eval_wip": 0.7897542599922023,
189
  "step": 4500
190
- },
191
- {
192
- "epoch": 10.395010395010395,
193
- "grad_norm": 3.1602580547332764,
194
- "learning_rate": 3.3869346733668345e-05,
195
- "loss": 0.2455,
196
- "step": 5000
197
- },
198
- {
199
- "epoch": 10.395010395010395,
200
- "eval_cer": 0.04086548502527116,
201
- "eval_loss": 0.09843221306800842,
202
- "eval_mer": 0.11552759508149843,
203
- "eval_runtime": 33.4361,
204
- "eval_samples_per_second": 45.968,
205
- "eval_steps_per_second": 5.772,
206
- "eval_wer": 0.11589878937403178,
207
- "eval_wil": 0.20822140556647772,
208
- "eval_wip": 0.7917785944335223,
209
- "step": 5000
210
- },
211
- {
212
- "epoch": 11.434511434511435,
213
- "grad_norm": 1.959326148033142,
214
- "learning_rate": 3.207465900933237e-05,
215
- "loss": 0.2322,
216
- "step": 5500
217
- },
218
- {
219
- "epoch": 11.434511434511435,
220
- "eval_cer": 0.03993261023934504,
221
- "eval_loss": 0.09337513148784637,
222
- "eval_mer": 0.11474472585901321,
223
- "eval_runtime": 33.5875,
224
- "eval_samples_per_second": 45.761,
225
- "eval_steps_per_second": 5.746,
226
- "eval_wer": 0.1151529060760801,
227
- "eval_wil": 0.2070981915905874,
228
- "eval_wip": 0.7929018084094126,
229
- "step": 5500
230
- },
231
- {
232
- "epoch": 12.474012474012474,
233
- "grad_norm": 1.514904499053955,
234
- "learning_rate": 3.027997128499641e-05,
235
- "loss": 0.251,
236
- "step": 6000
237
- },
238
- {
239
- "epoch": 12.474012474012474,
240
- "eval_cer": 0.03943136408571309,
241
- "eval_loss": 0.09094855934381485,
242
- "eval_mer": 0.11248355921541717,
243
- "eval_runtime": 33.5995,
244
- "eval_samples_per_second": 45.745,
245
- "eval_steps_per_second": 5.744,
246
- "eval_wer": 0.11285788054392105,
247
- "eval_wil": 0.20317729770906068,
248
- "eval_wip": 0.7968227022909393,
249
- "step": 6000
250
- },
251
- {
252
- "epoch": 13.513513513513514,
253
- "grad_norm": 3.5788846015930176,
254
- "learning_rate": 2.848528356066045e-05,
255
- "loss": 0.2243,
256
- "step": 6500
257
- },
258
- {
259
- "epoch": 13.513513513513514,
260
- "eval_cer": 0.03856810682112474,
261
- "eval_loss": 0.09059835970401764,
262
- "eval_mer": 0.11314599668059291,
263
- "eval_runtime": 33.4711,
264
- "eval_samples_per_second": 45.92,
265
- "eval_steps_per_second": 5.766,
266
- "eval_wer": 0.11343163692696082,
267
- "eval_wil": 0.20495219335271586,
268
- "eval_wip": 0.7950478066472841,
269
- "step": 6500
270
- },
271
- {
272
- "epoch": 14.553014553014552,
273
- "grad_norm": 2.408891201019287,
274
- "learning_rate": 2.6690595836324484e-05,
275
- "loss": 0.2186,
276
- "step": 7000
277
- },
278
- {
279
- "epoch": 14.553014553014552,
280
- "eval_cer": 0.0384288717784492,
281
- "eval_loss": 0.08281438052654266,
282
- "eval_mer": 0.11403860473108425,
283
- "eval_runtime": 33.5039,
284
- "eval_samples_per_second": 45.875,
285
- "eval_steps_per_second": 5.761,
286
- "eval_wer": 0.11423489586321647,
287
- "eval_wil": 0.20612773409748175,
288
- "eval_wip": 0.7938722659025182,
289
- "step": 7000
290
- },
291
- {
292
- "epoch": 15.592515592515593,
293
- "grad_norm": 1.2376307249069214,
294
- "learning_rate": 2.4903086862885857e-05,
295
- "loss": 0.2146,
296
- "step": 7500
297
- },
298
- {
299
- "epoch": 15.592515592515593,
300
- "eval_cer": 0.03816432519736567,
301
- "eval_loss": 0.08768957108259201,
302
- "eval_mer": 0.11135895676046671,
303
- "eval_runtime": 33.5042,
304
- "eval_samples_per_second": 45.875,
305
- "eval_steps_per_second": 5.76,
306
- "eval_wer": 0.11171036777784153,
307
- "eval_wil": 0.20133862836387018,
308
- "eval_wip": 0.7986613716361298,
309
- "step": 7500
310
- },
311
- {
312
- "epoch": 16.632016632016633,
313
- "grad_norm": 1.0698959827423096,
314
- "learning_rate": 2.3108399138549895e-05,
315
- "loss": 0.2076,
316
- "step": 8000
317
- },
318
- {
319
- "epoch": 16.632016632016633,
320
- "eval_cer": 0.03784408459921193,
321
- "eval_loss": 0.08166921883821487,
322
- "eval_mer": 0.11109204278931412,
323
- "eval_runtime": 33.5384,
324
- "eval_samples_per_second": 45.828,
325
- "eval_steps_per_second": 5.755,
326
- "eval_wer": 0.11142348958632164,
327
- "eval_wil": 0.20099477953828748,
328
- "eval_wip": 0.7990052204617125,
329
- "step": 8000
330
- },
331
- {
332
- "epoch": 17.671517671517673,
333
- "grad_norm": 2.2200756072998047,
334
- "learning_rate": 2.13173007896626e-05,
335
- "loss": 0.2017,
336
- "step": 8500
337
- },
338
- {
339
- "epoch": 17.671517671517673,
340
- "eval_cer": 0.03691120981328581,
341
- "eval_loss": 0.07834411412477493,
342
- "eval_mer": 0.11015056964561745,
343
- "eval_runtime": 33.5299,
344
- "eval_samples_per_second": 45.84,
345
- "eval_steps_per_second": 5.756,
346
- "eval_wer": 0.11039072809685008,
347
- "eval_wil": 0.20021436062866027,
348
- "eval_wip": 0.7997856393713397,
349
- "step": 8500
350
- },
351
- {
352
- "epoch": 18.71101871101871,
353
- "grad_norm": 3.6737000942230225,
354
- "learning_rate": 1.9526202440775307e-05,
355
- "loss": 0.199,
356
- "step": 9000
357
- },
358
- {
359
- "epoch": 18.71101871101871,
360
- "eval_cer": 0.037259297419974656,
361
- "eval_loss": 0.07848495990037918,
362
- "eval_mer": 0.1108757870635375,
363
- "eval_runtime": 33.4848,
364
- "eval_samples_per_second": 45.901,
365
- "eval_steps_per_second": 5.764,
366
- "eval_wer": 0.11113661139480177,
367
- "eval_wil": 0.20059739778337193,
368
- "eval_wip": 0.7994026022166281,
369
- "step": 9000
370
- },
371
- {
372
- "epoch": 19.75051975051975,
373
- "grad_norm": 2.476435899734497,
374
- "learning_rate": 1.773151471643934e-05,
375
- "loss": 0.1983,
376
- "step": 9500
377
- },
378
- {
379
- "epoch": 19.75051975051975,
380
- "eval_cer": 0.037482073488255524,
381
- "eval_loss": 0.08044513314962387,
382
- "eval_mer": 0.11039295315449293,
383
- "eval_runtime": 33.5182,
384
- "eval_samples_per_second": 45.856,
385
- "eval_steps_per_second": 5.758,
386
- "eval_wer": 0.11073498192667393,
387
- "eval_wil": 0.19978514503968547,
388
- "eval_wip": 0.8002148549603145,
389
- "step": 9500
390
- },
391
- {
392
- "epoch": 20.79002079002079,
393
- "grad_norm": 1.4843103885650635,
394
- "learning_rate": 1.5940416367552046e-05,
395
- "loss": 0.1931,
396
- "step": 10000
397
- },
398
- {
399
- "epoch": 20.79002079002079,
400
- "eval_cer": 0.03720360340290444,
401
- "eval_loss": 0.07748907804489136,
402
- "eval_mer": 0.11006811287276057,
403
- "eval_runtime": 33.4846,
404
- "eval_samples_per_second": 45.902,
405
- "eval_steps_per_second": 5.764,
406
- "eval_wer": 0.1103333524585461,
407
- "eval_wil": 0.19979205874359685,
408
- "eval_wip": 0.8002079412564032,
409
- "step": 10000
410
- },
411
- {
412
- "epoch": 21.82952182952183,
413
- "grad_norm": 7.590743064880371,
414
- "learning_rate": 1.4156496769562097e-05,
415
- "loss": 0.1723,
416
- "step": 10500
417
- },
418
- {
419
- "epoch": 21.82952182952183,
420
- "eval_cer": 0.03642388716392141,
421
- "eval_loss": 0.0727572962641716,
422
- "eval_mer": 0.10935531890530173,
423
- "eval_runtime": 33.5546,
424
- "eval_samples_per_second": 45.806,
425
- "eval_steps_per_second": 5.752,
426
- "eval_wer": 0.1095874691605944,
427
- "eval_wil": 0.1990145874796534,
428
- "eval_wip": 0.8009854125203466,
429
- "step": 10500
430
- },
431
- {
432
- "epoch": 22.86902286902287,
433
- "grad_norm": 1.670258641242981,
434
- "learning_rate": 1.2365398420674803e-05,
435
- "loss": 0.1996,
436
- "step": 11000
437
- },
438
- {
439
- "epoch": 22.86902286902287,
440
- "eval_cer": 0.03600618203589479,
441
- "eval_loss": 0.07854931801557541,
442
- "eval_mer": 0.10902427851580394,
443
- "eval_runtime": 33.6003,
444
- "eval_samples_per_second": 45.744,
445
- "eval_steps_per_second": 5.744,
446
- "eval_wer": 0.10924321533077055,
447
- "eval_wil": 0.19832511876306813,
448
- "eval_wip": 0.8016748812369319,
449
- "step": 11000
450
- },
451
- {
452
- "epoch": 23.908523908523907,
453
- "grad_norm": 1.4487839937210083,
454
- "learning_rate": 1.0577889447236182e-05,
455
- "loss": 0.1753,
456
- "step": 11500
457
- },
458
- {
459
- "epoch": 23.908523908523907,
460
- "eval_cer": 0.03652135169379429,
461
- "eval_loss": 0.08054520934820175,
462
- "eval_mer": 0.10793342103757936,
463
- "eval_runtime": 33.5332,
464
- "eval_samples_per_second": 45.835,
465
- "eval_steps_per_second": 5.755,
466
- "eval_wer": 0.10826782947960296,
467
- "eval_wil": 0.19572523190378577,
468
- "eval_wip": 0.8042747680962142,
469
- "step": 11500
470
- },
471
- {
472
- "epoch": 24.948024948024948,
473
- "grad_norm": 1.3358678817749023,
474
- "learning_rate": 8.783201722900215e-06,
475
- "loss": 0.1781,
476
- "step": 12000
477
- },
478
- {
479
- "epoch": 24.948024948024948,
480
- "eval_cer": 0.03600618203589479,
481
- "eval_loss": 0.07418987154960632,
482
- "eval_mer": 0.10750042908633217,
483
- "eval_runtime": 33.4349,
484
- "eval_samples_per_second": 45.97,
485
- "eval_steps_per_second": 5.772,
486
- "eval_wer": 0.10780882437317116,
487
- "eval_wil": 0.19526624761364586,
488
- "eval_wip": 0.8047337523863541,
489
- "step": 12000
490
- },
491
- {
492
- "epoch": 25.987525987525988,
493
- "grad_norm": 2.8383772373199463,
494
- "learning_rate": 6.98851399856425e-06,
495
- "loss": 0.1828,
496
- "step": 12500
497
- },
498
- {
499
- "epoch": 25.987525987525988,
500
- "eval_cer": 0.03568594143774105,
501
- "eval_loss": 0.07407771795988083,
502
- "eval_mer": 0.10735757228743201,
503
- "eval_runtime": 33.4962,
504
- "eval_samples_per_second": 45.886,
505
- "eval_steps_per_second": 5.762,
506
- "eval_wer": 0.10757932181995525,
507
- "eval_wil": 0.19555581909035935,
508
- "eval_wip": 0.8044441809096406,
509
- "step": 12500
510
- },
511
- {
512
- "epoch": 27.027027027027028,
513
- "grad_norm": 2.1374740600585938,
514
- "learning_rate": 5.197415649676956e-06,
515
- "loss": 0.1871,
516
- "step": 13000
517
- },
518
- {
519
- "epoch": 27.027027027027028,
520
- "eval_cer": 0.03571378844627616,
521
- "eval_loss": 0.0738675594329834,
522
- "eval_mer": 0.10779711472406686,
523
- "eval_runtime": 33.5044,
524
- "eval_samples_per_second": 45.875,
525
- "eval_steps_per_second": 5.76,
526
- "eval_wer": 0.10803832692638705,
527
- "eval_wil": 0.1961644777961592,
528
- "eval_wip": 0.8038355222038408,
529
- "step": 13000
530
- },
531
- {
532
- "epoch": 28.066528066528065,
533
- "grad_norm": 1.8452061414718628,
534
- "learning_rate": 3.409906676238335e-06,
535
- "loss": 0.1686,
536
- "step": 13500
537
- },
538
- {
539
- "epoch": 28.066528066528065,
540
- "eval_cer": 0.03556062989933306,
541
- "eval_loss": 0.0737072303891182,
542
- "eval_mer": 0.10779711472406686,
543
- "eval_runtime": 33.5159,
544
- "eval_samples_per_second": 45.859,
545
- "eval_steps_per_second": 5.758,
546
- "eval_wer": 0.10803832692638705,
547
- "eval_wil": 0.1961644777961592,
548
- "eval_wip": 0.8038355222038408,
549
- "step": 13500
550
- },
551
- {
552
- "epoch": 29.106029106029105,
553
- "grad_norm": 2.0490431785583496,
554
- "learning_rate": 1.615218951902369e-06,
555
- "loss": 0.1801,
556
- "step": 14000
557
- },
558
- {
559
- "epoch": 29.106029106029105,
560
- "eval_cer": 0.035518859386530405,
561
- "eval_loss": 0.07359343022108078,
562
- "eval_mer": 0.10764385914686515,
563
- "eval_runtime": 33.4499,
564
- "eval_samples_per_second": 45.949,
565
- "eval_steps_per_second": 5.77,
566
- "eval_wer": 0.10786620001147512,
567
- "eval_wil": 0.19607173646103337,
568
- "eval_wip": 0.8039282635389666,
569
- "step": 14000
570
  }
571
  ],
572
  "logging_steps": 500,
573
- "max_steps": 14430,
574
  "num_input_tokens_seen": 0,
575
- "num_train_epochs": 30,
576
  "save_steps": 500,
577
  "stateful_callbacks": {
578
  "TrainerControl": {
@@ -586,7 +206,7 @@
586
  "attributes": {}
587
  }
588
  },
589
- "total_flos": 9.233840952562297e+18,
590
  "train_batch_size": 16,
591
  "trial_name": null,
592
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.06431299448013306,
3
+ "best_model_checkpoint": "./wav2vec2-base-demo/checkpoint-3500",
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 4810,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0395010395010396,
13
+ "grad_norm": 2.656402349472046,
14
  "learning_rate": 5e-05,
15
+ "loss": 0.163,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0395010395010396,
20
+ "eval_cer": 0.042475895792468986,
21
+ "eval_loss": 0.0863703265786171,
22
+ "eval_mer": 0.11671012356875637,
23
+ "eval_runtime": 37.5343,
24
+ "eval_samples_per_second": 40.949,
25
+ "eval_steps_per_second": 5.142,
26
+ "eval_wer": 0.11718171987934665,
27
+ "eval_wil": 0.20816339596717104,
28
+ "eval_wip": 0.791836604032829,
29
  "step": 500
30
  },
31
  {
32
  "epoch": 2.079002079002079,
33
+ "grad_norm": 0.8406820297241211,
34
+ "learning_rate": 4.4199535962877034e-05,
35
+ "loss": 0.1635,
36
  "step": 1000
37
  },
38
  {
39
  "epoch": 2.079002079002079,
40
+ "eval_cer": 0.04208912340801724,
41
+ "eval_loss": 0.07893521338701248,
42
+ "eval_mer": 0.11592479329482387,
43
+ "eval_runtime": 37.2156,
44
+ "eval_samples_per_second": 41.3,
45
+ "eval_steps_per_second": 5.186,
46
+ "eval_wer": 0.11649877639292015,
47
+ "eval_wil": 0.20577031582271066,
48
+ "eval_wip": 0.7942296841772893,
49
  "step": 1000
50
  },
51
  {
52
  "epoch": 3.1185031185031185,
53
+ "grad_norm": 0.7278923392295837,
54
+ "learning_rate": 3.839907192575406e-05,
55
+ "loss": 0.172,
56
  "step": 1500
57
  },
58
  {
59
  "epoch": 3.1185031185031185,
60
+ "eval_cer": 0.04258640218802663,
61
+ "eval_loss": 0.09011566638946533,
62
+ "eval_mer": 0.11518887129608686,
63
+ "eval_runtime": 37.2272,
64
+ "eval_samples_per_second": 41.287,
65
+ "eval_steps_per_second": 5.184,
66
+ "eval_wer": 0.11592965682089806,
67
+ "eval_wil": 0.20315250387257455,
68
+ "eval_wip": 0.7968474961274254,
69
  "step": 1500
70
  },
71
  {
72
  "epoch": 4.158004158004158,
73
+ "grad_norm": 1.2081284523010254,
74
+ "learning_rate": 3.261020881670534e-05,
75
+ "loss": 0.148,
76
  "step": 2000
77
  },
78
  {
79
  "epoch": 4.158004158004158,
80
+ "eval_cer": 0.04066635356521259,
81
+ "eval_loss": 0.07975054532289505,
82
+ "eval_mer": 0.11372148450800136,
83
+ "eval_runtime": 37.1346,
84
+ "eval_samples_per_second": 41.39,
85
+ "eval_steps_per_second": 5.197,
86
+ "eval_wer": 0.1140515622332252,
87
+ "eval_wil": 0.2032785755642632,
88
+ "eval_wip": 0.7967214244357368,
89
  "step": 2000
90
  },
91
  {
92
  "epoch": 5.197505197505198,
93
+ "grad_norm": 1.7628921270370483,
94
+ "learning_rate": 2.6809744779582367e-05,
95
+ "loss": 0.1526,
96
  "step": 2500
97
  },
98
  {
99
  "epoch": 5.197505197505198,
100
+ "eval_cer": 0.04003094179075614,
101
+ "eval_loss": 0.07782892137765884,
102
+ "eval_mer": 0.11206896551724138,
103
+ "eval_runtime": 37.1954,
104
+ "eval_samples_per_second": 41.322,
105
+ "eval_steps_per_second": 5.189,
106
+ "eval_wer": 0.11245802743156337,
107
+ "eval_wil": 0.2010933536308993,
108
+ "eval_wip": 0.7989066463691007,
109
  "step": 2500
110
  },
111
  {
112
  "epoch": 6.237006237006237,
113
+ "grad_norm": 1.1964457035064697,
114
+ "learning_rate": 2.1020881670533645e-05,
115
+ "loss": 0.1412,
116
  "step": 3000
117
  },
118
  {
119
  "epoch": 6.237006237006237,
120
+ "eval_cer": 0.03990662209575379,
121
+ "eval_loss": 0.0688575804233551,
122
+ "eval_mer": 0.11312191524366029,
123
+ "eval_runtime": 37.255,
124
+ "eval_samples_per_second": 41.256,
125
+ "eval_steps_per_second": 5.181,
126
+ "eval_wer": 0.11348244266120312,
127
+ "eval_wil": 0.20307383068892626,
128
+ "eval_wip": 0.7969261693110737,
129
  "step": 3000
130
  },
131
  {
132
  "epoch": 7.276507276507276,
133
+ "grad_norm": 1.3279238939285278,
134
+ "learning_rate": 1.523201856148492e-05,
135
+ "loss": 0.1277,
136
  "step": 3500
137
  },
138
  {
139
  "epoch": 7.276507276507276,
140
+ "eval_cer": 0.03918833052462911,
141
+ "eval_loss": 0.06431299448013306,
142
+ "eval_mer": 0.1120195310282178,
143
+ "eval_runtime": 37.2025,
144
+ "eval_samples_per_second": 41.314,
145
+ "eval_steps_per_second": 5.188,
146
+ "eval_wer": 0.11228729155995675,
147
+ "eval_wil": 0.20144489838528856,
148
+ "eval_wip": 0.7985551016147114,
149
  "step": 3500
150
  },
151
  {
152
  "epoch": 8.316008316008316,
153
+ "grad_norm": 1.6859196424484253,
154
+ "learning_rate": 9.443155452436194e-06,
155
+ "loss": 0.1376,
156
  "step": 4000
157
  },
158
  {
159
  "epoch": 8.316008316008316,
160
+ "eval_cer": 0.039409343315744395,
161
+ "eval_loss": 0.06848356872797012,
162
+ "eval_mer": 0.11210711448995801,
163
+ "eval_runtime": 37.256,
164
+ "eval_samples_per_second": 41.255,
165
+ "eval_steps_per_second": 5.18,
166
+ "eval_wer": 0.11245802743156337,
167
+ "eval_wil": 0.20074433269163805,
168
+ "eval_wip": 0.799255667308362,
169
  "step": 4000
170
  },
171
  {
172
  "epoch": 9.355509355509355,
173
+ "grad_norm": 1.5659629106521606,
174
+ "learning_rate": 3.642691415313225e-06,
175
+ "loss": 0.131,
176
  "step": 4500
177
  },
178
  {
179
  "epoch": 9.355509355509355,
180
+ "eval_cer": 0.038787744840732656,
181
+ "eval_loss": 0.06696277111768723,
182
+ "eval_mer": 0.11123091765507065,
183
+ "eval_runtime": 37.5745,
184
+ "eval_samples_per_second": 40.905,
185
+ "eval_steps_per_second": 5.136,
186
+ "eval_wer": 0.11154743611632804,
187
+ "eval_wil": 0.19943676811944389,
188
+ "eval_wip": 0.8005632318805561,
189
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  }
191
  ],
192
  "logging_steps": 500,
193
+ "max_steps": 4810,
194
  "num_input_tokens_seen": 0,
195
+ "num_train_epochs": 10,
196
  "save_steps": 500,
197
  "stateful_callbacks": {
198
  "TrainerControl": {
 
206
  "attributes": {}
207
  }
208
  },
209
+ "total_flos": 3.07474151587841e+18,
210
  "train_batch_size": 16,
211
  "trial_name": null,
212
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8aa0ba76070730e78a2e05d51cda9e43beced2b5572af45c731b10724fafd32
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3091a22b40a081428e5812146fe9612f2921413ec1d4139f6c30058ff99d057
3
  size 5176