carmi commited on
Commit
63facbc
·
verified ·
1 Parent(s): 27e07d4

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -1149
trainer_state.json DELETED
@@ -1,1149 +0,0 @@
1
- {
2
- "best_metric": 13.76934528961673,
3
- "best_model_checkpoint": "/speechbrain/data/whis/whisper-medium-ar-aug30-cont3/checkpoint-3600",
4
- "epoch": 0.7889546351084813,
5
- "eval_steps": 300,
6
- "global_step": 3600,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.005478851632697786,
13
- "grad_norm": 0.5829852819442749,
14
- "learning_rate": 2.5000000000000004e-07,
15
- "loss": 0.1288,
16
- "step": 25
17
- },
18
- {
19
- "epoch": 0.010957703265395573,
20
- "grad_norm": 0.5360992550849915,
21
- "learning_rate": 5.000000000000001e-07,
22
- "loss": 0.13,
23
- "step": 50
24
- },
25
- {
26
- "epoch": 0.01643655489809336,
27
- "grad_norm": 0.5283806324005127,
28
- "learning_rate": 7.5e-07,
29
- "loss": 0.1272,
30
- "step": 75
31
- },
32
- {
33
- "epoch": 0.021915406530791146,
34
- "grad_norm": 0.5019442439079285,
35
- "learning_rate": 1.0000000000000002e-06,
36
- "loss": 0.1282,
37
- "step": 100
38
- },
39
- {
40
- "epoch": 0.027394258163488932,
41
- "grad_norm": 0.4962950050830841,
42
- "learning_rate": 1.25e-06,
43
- "loss": 0.1225,
44
- "step": 125
45
- },
46
- {
47
- "epoch": 0.03287310979618672,
48
- "grad_norm": 0.4953068792819977,
49
- "learning_rate": 1.5e-06,
50
- "loss": 0.1201,
51
- "step": 150
52
- },
53
- {
54
- "epoch": 0.03835196142888451,
55
- "grad_norm": 0.5007308125495911,
56
- "learning_rate": 1.75e-06,
57
- "loss": 0.1216,
58
- "step": 175
59
- },
60
- {
61
- "epoch": 0.04383081306158229,
62
- "grad_norm": 0.5254662036895752,
63
- "learning_rate": 2.0000000000000003e-06,
64
- "loss": 0.1179,
65
- "step": 200
66
- },
67
- {
68
- "epoch": 0.04930966469428008,
69
- "grad_norm": 0.45517662167549133,
70
- "learning_rate": 2.25e-06,
71
- "loss": 0.1171,
72
- "step": 225
73
- },
74
- {
75
- "epoch": 0.054788516326977864,
76
- "grad_norm": 0.4700426459312439,
77
- "learning_rate": 2.5e-06,
78
- "loss": 0.1135,
79
- "step": 250
80
- },
81
- {
82
- "epoch": 0.060267367959675654,
83
- "grad_norm": 0.48102590441703796,
84
- "learning_rate": 2.7500000000000004e-06,
85
- "loss": 0.1108,
86
- "step": 275
87
- },
88
- {
89
- "epoch": 0.06574621959237344,
90
- "grad_norm": 0.46885138750076294,
91
- "learning_rate": 3e-06,
92
- "loss": 0.111,
93
- "step": 300
94
- },
95
- {
96
- "epoch": 0.06574621959237344,
97
- "eval_loss": 0.10784981399774551,
98
- "eval_runtime": 1329.228,
99
- "eval_samples_per_second": 2.505,
100
- "eval_steps_per_second": 0.053,
101
- "eval_wer": 13.824164156539604,
102
- "step": 300
103
- },
104
- {
105
- "epoch": 0.07122507122507123,
106
- "grad_norm": 0.5266237854957581,
107
- "learning_rate": 3.2500000000000002e-06,
108
- "loss": 0.1135,
109
- "step": 325
110
- },
111
- {
112
- "epoch": 0.07670392285776902,
113
- "grad_norm": 0.5073336958885193,
114
- "learning_rate": 3.5e-06,
115
- "loss": 0.1102,
116
- "step": 350
117
- },
118
- {
119
- "epoch": 0.08218277449046679,
120
- "grad_norm": 0.4480571150779724,
121
- "learning_rate": 3.7500000000000005e-06,
122
- "loss": 0.1083,
123
- "step": 375
124
- },
125
- {
126
- "epoch": 0.08766162612316458,
127
- "grad_norm": 0.5027902126312256,
128
- "learning_rate": 4.000000000000001e-06,
129
- "loss": 0.1056,
130
- "step": 400
131
- },
132
- {
133
- "epoch": 0.09314047775586237,
134
- "grad_norm": 0.5120052695274353,
135
- "learning_rate": 4.25e-06,
136
- "loss": 0.1039,
137
- "step": 425
138
- },
139
- {
140
- "epoch": 0.09861932938856016,
141
- "grad_norm": 0.48518097400665283,
142
- "learning_rate": 4.5e-06,
143
- "loss": 0.1043,
144
- "step": 450
145
- },
146
- {
147
- "epoch": 0.10409818102125794,
148
- "grad_norm": 0.5022168159484863,
149
- "learning_rate": 4.75e-06,
150
- "loss": 0.1037,
151
- "step": 475
152
- },
153
- {
154
- "epoch": 0.10957703265395573,
155
- "grad_norm": 0.4528847932815552,
156
- "learning_rate": 5e-06,
157
- "loss": 0.0985,
158
- "step": 500
159
- },
160
- {
161
- "epoch": 0.11505588428665352,
162
- "grad_norm": 0.49148285388946533,
163
- "learning_rate": 4.9986227412957256e-06,
164
- "loss": 0.1004,
165
- "step": 525
166
- },
167
- {
168
- "epoch": 0.12053473591935131,
169
- "grad_norm": 0.4991222321987152,
170
- "learning_rate": 4.99724548259145e-06,
171
- "loss": 0.1,
172
- "step": 550
173
- },
174
- {
175
- "epoch": 0.12601358755204908,
176
- "grad_norm": 0.4580934941768646,
177
- "learning_rate": 4.995868223887175e-06,
178
- "loss": 0.0961,
179
- "step": 575
180
- },
181
- {
182
- "epoch": 0.13149243918474687,
183
- "grad_norm": 0.4897126853466034,
184
- "learning_rate": 4.9944909651829e-06,
185
- "loss": 0.1,
186
- "step": 600
187
- },
188
- {
189
- "epoch": 0.13149243918474687,
190
- "eval_loss": 0.1092229038476944,
191
- "eval_runtime": 1332.663,
192
- "eval_samples_per_second": 2.499,
193
- "eval_steps_per_second": 0.053,
194
- "eval_wer": 13.810892220337223,
195
- "step": 600
196
- },
197
- {
198
- "epoch": 0.13697129081744466,
199
- "grad_norm": 0.47739720344543457,
200
- "learning_rate": 4.993113706478625e-06,
201
- "loss": 0.1049,
202
- "step": 625
203
- },
204
- {
205
- "epoch": 0.14245014245014245,
206
- "grad_norm": 0.5223620533943176,
207
- "learning_rate": 4.9917364477743505e-06,
208
- "loss": 0.1001,
209
- "step": 650
210
- },
211
- {
212
- "epoch": 0.14792899408284024,
213
- "grad_norm": 0.4523641765117645,
214
- "learning_rate": 4.990359189070076e-06,
215
- "loss": 0.0986,
216
- "step": 675
217
- },
218
- {
219
- "epoch": 0.15340784571553803,
220
- "grad_norm": 0.47216111421585083,
221
- "learning_rate": 4.988981930365801e-06,
222
- "loss": 0.1015,
223
- "step": 700
224
- },
225
- {
226
- "epoch": 0.15888669734823582,
227
- "grad_norm": 0.5023489594459534,
228
- "learning_rate": 4.987604671661525e-06,
229
- "loss": 0.0987,
230
- "step": 725
231
- },
232
- {
233
- "epoch": 0.16436554898093358,
234
- "grad_norm": 0.5276343822479248,
235
- "learning_rate": 4.98622741295725e-06,
236
- "loss": 0.1003,
237
- "step": 750
238
- },
239
- {
240
- "epoch": 0.16984440061363137,
241
- "grad_norm": 0.49953222274780273,
242
- "learning_rate": 4.9848501542529754e-06,
243
- "loss": 0.0983,
244
- "step": 775
245
- },
246
- {
247
- "epoch": 0.17532325224632916,
248
- "grad_norm": 0.45781460404396057,
249
- "learning_rate": 4.983472895548701e-06,
250
- "loss": 0.0978,
251
- "step": 800
252
- },
253
- {
254
- "epoch": 0.18080210387902695,
255
- "grad_norm": 0.4533933997154236,
256
- "learning_rate": 4.982095636844425e-06,
257
- "loss": 0.0984,
258
- "step": 825
259
- },
260
- {
261
- "epoch": 0.18628095551172474,
262
- "grad_norm": 0.5115811824798584,
263
- "learning_rate": 4.98071837814015e-06,
264
- "loss": 0.097,
265
- "step": 850
266
- },
267
- {
268
- "epoch": 0.19175980714442253,
269
- "grad_norm": 0.5033650994300842,
270
- "learning_rate": 4.979341119435875e-06,
271
- "loss": 0.0985,
272
- "step": 875
273
- },
274
- {
275
- "epoch": 0.19723865877712032,
276
- "grad_norm": 0.4879961609840393,
277
- "learning_rate": 4.9779638607316e-06,
278
- "loss": 0.0968,
279
- "step": 900
280
- },
281
- {
282
- "epoch": 0.19723865877712032,
283
- "eval_loss": 0.10996146500110626,
284
- "eval_runtime": 1382.8033,
285
- "eval_samples_per_second": 2.408,
286
- "eval_steps_per_second": 0.051,
287
- "eval_wer": 13.83570497062863,
288
- "step": 900
289
- },
290
- {
291
- "epoch": 0.2027175104098181,
292
- "grad_norm": 0.4946504831314087,
293
- "learning_rate": 4.9765866020273255e-06,
294
- "loss": 0.0975,
295
- "step": 925
296
- },
297
- {
298
- "epoch": 0.20819636204251588,
299
- "grad_norm": 0.5116554498672485,
300
- "learning_rate": 4.975209343323051e-06,
301
- "loss": 0.0973,
302
- "step": 950
303
- },
304
- {
305
- "epoch": 0.21367521367521367,
306
- "grad_norm": 0.5216028094291687,
307
- "learning_rate": 4.973832084618776e-06,
308
- "loss": 0.097,
309
- "step": 975
310
- },
311
- {
312
- "epoch": 0.21915406530791146,
313
- "grad_norm": 0.5032294392585754,
314
- "learning_rate": 4.9724548259145e-06,
315
- "loss": 0.0909,
316
- "step": 1000
317
- },
318
- {
319
- "epoch": 0.22463291694060925,
320
- "grad_norm": 0.526467502117157,
321
- "learning_rate": 4.971077567210225e-06,
322
- "loss": 0.0969,
323
- "step": 1025
324
- },
325
- {
326
- "epoch": 0.23011176857330704,
327
- "grad_norm": 0.488610178232193,
328
- "learning_rate": 4.9697003085059505e-06,
329
- "loss": 0.098,
330
- "step": 1050
331
- },
332
- {
333
- "epoch": 0.23559062020600482,
334
- "grad_norm": 0.47755196690559387,
335
- "learning_rate": 4.968323049801675e-06,
336
- "loss": 0.0931,
337
- "step": 1075
338
- },
339
- {
340
- "epoch": 0.24106947183870261,
341
- "grad_norm": 0.5348175168037415,
342
- "learning_rate": 4.9669457910974e-06,
343
- "loss": 0.0952,
344
- "step": 1100
345
- },
346
- {
347
- "epoch": 0.2465483234714004,
348
- "grad_norm": 0.48804572224617004,
349
- "learning_rate": 4.965568532393125e-06,
350
- "loss": 0.0954,
351
- "step": 1125
352
- },
353
- {
354
- "epoch": 0.25202717510409817,
355
- "grad_norm": 0.48517024517059326,
356
- "learning_rate": 4.96419127368885e-06,
357
- "loss": 0.0958,
358
- "step": 1150
359
- },
360
- {
361
- "epoch": 0.25750602673679596,
362
- "grad_norm": 0.5918833017349243,
363
- "learning_rate": 4.9628140149845745e-06,
364
- "loss": 0.0958,
365
- "step": 1175
366
- },
367
- {
368
- "epoch": 0.26298487836949375,
369
- "grad_norm": 0.5274895429611206,
370
- "learning_rate": 4.9614367562803e-06,
371
- "loss": 0.0967,
372
- "step": 1200
373
- },
374
- {
375
- "epoch": 0.26298487836949375,
376
- "eval_loss": 0.11020273715257645,
377
- "eval_runtime": 1336.5505,
378
- "eval_samples_per_second": 2.491,
379
- "eval_steps_per_second": 0.052,
380
- "eval_wer": 14.059019723251279,
381
- "step": 1200
382
- },
383
- {
384
- "epoch": 0.26846373000219154,
385
- "grad_norm": 0.4892116189002991,
386
- "learning_rate": 4.960059497576025e-06,
387
- "loss": 0.0929,
388
- "step": 1225
389
- },
390
- {
391
- "epoch": 0.2739425816348893,
392
- "grad_norm": 0.4998278319835663,
393
- "learning_rate": 4.95868223887175e-06,
394
- "loss": 0.0948,
395
- "step": 1250
396
- },
397
- {
398
- "epoch": 0.2794214332675871,
399
- "grad_norm": 0.5273219347000122,
400
- "learning_rate": 4.957304980167475e-06,
401
- "loss": 0.0907,
402
- "step": 1275
403
- },
404
- {
405
- "epoch": 0.2849002849002849,
406
- "grad_norm": 0.47056299448013306,
407
- "learning_rate": 4.9559277214632e-06,
408
- "loss": 0.091,
409
- "step": 1300
410
- },
411
- {
412
- "epoch": 0.2903791365329827,
413
- "grad_norm": 0.4882357716560364,
414
- "learning_rate": 4.9545504627589255e-06,
415
- "loss": 0.0941,
416
- "step": 1325
417
- },
418
- {
419
- "epoch": 0.2958579881656805,
420
- "grad_norm": 0.5165619850158691,
421
- "learning_rate": 4.95317320405465e-06,
422
- "loss": 0.0921,
423
- "step": 1350
424
- },
425
- {
426
- "epoch": 0.3013368397983783,
427
- "grad_norm": 0.4642132520675659,
428
- "learning_rate": 4.951795945350375e-06,
429
- "loss": 0.0914,
430
- "step": 1375
431
- },
432
- {
433
- "epoch": 0.30681569143107607,
434
- "grad_norm": 0.5326189398765564,
435
- "learning_rate": 4.9504186866461e-06,
436
- "loss": 0.0959,
437
- "step": 1400
438
- },
439
- {
440
- "epoch": 0.31229454306377386,
441
- "grad_norm": 0.44957414269447327,
442
- "learning_rate": 4.949041427941824e-06,
443
- "loss": 0.0871,
444
- "step": 1425
445
- },
446
- {
447
- "epoch": 0.31777339469647164,
448
- "grad_norm": 0.4865795373916626,
449
- "learning_rate": 4.9476641692375496e-06,
450
- "loss": 0.0891,
451
- "step": 1450
452
- },
453
- {
454
- "epoch": 0.3232522463291694,
455
- "grad_norm": 0.49055206775665283,
456
- "learning_rate": 4.946286910533275e-06,
457
- "loss": 0.0953,
458
- "step": 1475
459
- },
460
- {
461
- "epoch": 0.32873109796186717,
462
- "grad_norm": 0.49437183141708374,
463
- "learning_rate": 4.944909651829e-06,
464
- "loss": 0.0896,
465
- "step": 1500
466
- },
467
- {
468
- "epoch": 0.32873109796186717,
469
- "eval_loss": 0.11099947988986969,
470
- "eval_runtime": 1334.6828,
471
- "eval_samples_per_second": 2.495,
472
- "eval_steps_per_second": 0.052,
473
- "eval_wer": 13.836282011333079,
474
- "step": 1500
475
- },
476
- {
477
- "epoch": 0.33420994959456496,
478
- "grad_norm": 0.5322751998901367,
479
- "learning_rate": 4.943532393124725e-06,
480
- "loss": 0.0929,
481
- "step": 1525
482
- },
483
- {
484
- "epoch": 0.33968880122726275,
485
- "grad_norm": 0.5024107098579407,
486
- "learning_rate": 4.94215513442045e-06,
487
- "loss": 0.0922,
488
- "step": 1550
489
- },
490
- {
491
- "epoch": 0.34516765285996054,
492
- "grad_norm": 0.4347039759159088,
493
- "learning_rate": 4.940777875716175e-06,
494
- "loss": 0.0908,
495
- "step": 1575
496
- },
497
- {
498
- "epoch": 0.35064650449265833,
499
- "grad_norm": 0.5164802074432373,
500
- "learning_rate": 4.9394006170119e-06,
501
- "loss": 0.0939,
502
- "step": 1600
503
- },
504
- {
505
- "epoch": 0.3561253561253561,
506
- "grad_norm": 0.4986899793148041,
507
- "learning_rate": 4.938023358307625e-06,
508
- "loss": 0.0883,
509
- "step": 1625
510
- },
511
- {
512
- "epoch": 0.3616042077580539,
513
- "grad_norm": 0.5192301869392395,
514
- "learning_rate": 4.93664609960335e-06,
515
- "loss": 0.0915,
516
- "step": 1650
517
- },
518
- {
519
- "epoch": 0.3670830593907517,
520
- "grad_norm": 0.5347697734832764,
521
- "learning_rate": 4.935268840899075e-06,
522
- "loss": 0.0884,
523
- "step": 1675
524
- },
525
- {
526
- "epoch": 0.3725619110234495,
527
- "grad_norm": 0.47178414463996887,
528
- "learning_rate": 4.9338915821947994e-06,
529
- "loss": 0.0922,
530
- "step": 1700
531
- },
532
- {
533
- "epoch": 0.3780407626561473,
534
- "grad_norm": 0.4868011772632599,
535
- "learning_rate": 4.932514323490525e-06,
536
- "loss": 0.0925,
537
- "step": 1725
538
- },
539
- {
540
- "epoch": 0.38351961428884507,
541
- "grad_norm": 0.491805762052536,
542
- "learning_rate": 4.93113706478625e-06,
543
- "loss": 0.091,
544
- "step": 1750
545
- },
546
- {
547
- "epoch": 0.38899846592154286,
548
- "grad_norm": 0.5111169219017029,
549
- "learning_rate": 4.929759806081975e-06,
550
- "loss": 0.0888,
551
- "step": 1775
552
- },
553
- {
554
- "epoch": 0.39447731755424065,
555
- "grad_norm": 0.4957449436187744,
556
- "learning_rate": 4.9283825473777e-06,
557
- "loss": 0.0907,
558
- "step": 1800
559
- },
560
- {
561
- "epoch": 0.39447731755424065,
562
- "eval_loss": 0.11147266626358032,
563
- "eval_runtime": 1330.9042,
564
- "eval_samples_per_second": 2.502,
565
- "eval_steps_per_second": 0.053,
566
- "eval_wer": 14.133457974125493,
567
- "step": 1800
568
- },
569
- {
570
- "epoch": 0.39995616918693844,
571
- "grad_norm": 0.46782732009887695,
572
- "learning_rate": 4.927005288673425e-06,
573
- "loss": 0.0882,
574
- "step": 1825
575
- },
576
- {
577
- "epoch": 0.4054350208196362,
578
- "grad_norm": 0.4959644079208374,
579
- "learning_rate": 4.92562802996915e-06,
580
- "loss": 0.0925,
581
- "step": 1850
582
- },
583
- {
584
- "epoch": 0.410913872452334,
585
- "grad_norm": 0.4934210479259491,
586
- "learning_rate": 4.924250771264875e-06,
587
- "loss": 0.0901,
588
- "step": 1875
589
- },
590
- {
591
- "epoch": 0.41639272408503175,
592
- "grad_norm": 0.520613968372345,
593
- "learning_rate": 4.9228735125606e-06,
594
- "loss": 0.0893,
595
- "step": 1900
596
- },
597
- {
598
- "epoch": 0.42187157571772954,
599
- "grad_norm": 0.48207858204841614,
600
- "learning_rate": 4.921496253856325e-06,
601
- "loss": 0.0918,
602
- "step": 1925
603
- },
604
- {
605
- "epoch": 0.42735042735042733,
606
- "grad_norm": 0.5212067365646362,
607
- "learning_rate": 4.920118995152049e-06,
608
- "loss": 0.0915,
609
- "step": 1950
610
- },
611
- {
612
- "epoch": 0.4328292789831251,
613
- "grad_norm": 0.4570591449737549,
614
- "learning_rate": 4.9187417364477744e-06,
615
- "loss": 0.0879,
616
- "step": 1975
617
- },
618
- {
619
- "epoch": 0.4383081306158229,
620
- "grad_norm": 0.5075387954711914,
621
- "learning_rate": 4.9173644777435e-06,
622
- "loss": 0.0921,
623
- "step": 2000
624
- },
625
- {
626
- "epoch": 0.4437869822485207,
627
- "grad_norm": 0.4904765784740448,
628
- "learning_rate": 4.915987219039225e-06,
629
- "loss": 0.0892,
630
- "step": 2025
631
- },
632
- {
633
- "epoch": 0.4492658338812185,
634
- "grad_norm": 0.4949191212654114,
635
- "learning_rate": 4.91460996033495e-06,
636
- "loss": 0.0909,
637
- "step": 2050
638
- },
639
- {
640
- "epoch": 0.4547446855139163,
641
- "grad_norm": 0.5112493634223938,
642
- "learning_rate": 4.913232701630675e-06,
643
- "loss": 0.089,
644
- "step": 2075
645
- },
646
- {
647
- "epoch": 0.46022353714661407,
648
- "grad_norm": 0.47857844829559326,
649
- "learning_rate": 4.9118554429264e-06,
650
- "loss": 0.0901,
651
- "step": 2100
652
- },
653
- {
654
- "epoch": 0.46022353714661407,
655
- "eval_loss": 0.112032450735569,
656
- "eval_runtime": 1387.0176,
657
- "eval_samples_per_second": 2.401,
658
- "eval_steps_per_second": 0.05,
659
- "eval_wer": 13.988043716603768,
660
- "step": 2100
661
- },
662
- {
663
- "epoch": 0.46570238877931186,
664
- "grad_norm": 0.4768081307411194,
665
- "learning_rate": 4.9104781842221245e-06,
666
- "loss": 0.0874,
667
- "step": 2125
668
- },
669
- {
670
- "epoch": 0.47118124041200965,
671
- "grad_norm": 0.4740845859050751,
672
- "learning_rate": 4.90910092551785e-06,
673
- "loss": 0.0885,
674
- "step": 2150
675
- },
676
- {
677
- "epoch": 0.47666009204470744,
678
- "grad_norm": 0.4519156813621521,
679
- "learning_rate": 4.907723666813575e-06,
680
- "loss": 0.0867,
681
- "step": 2175
682
- },
683
- {
684
- "epoch": 0.48213894367740523,
685
- "grad_norm": 0.5068197250366211,
686
- "learning_rate": 4.9063464081093e-06,
687
- "loss": 0.0878,
688
- "step": 2200
689
- },
690
- {
691
- "epoch": 0.487617795310103,
692
- "grad_norm": 0.49033084511756897,
693
- "learning_rate": 4.904969149405024e-06,
694
- "loss": 0.0862,
695
- "step": 2225
696
- },
697
- {
698
- "epoch": 0.4930966469428008,
699
- "grad_norm": 0.4625925123691559,
700
- "learning_rate": 4.9035918907007495e-06,
701
- "loss": 0.0866,
702
- "step": 2250
703
- },
704
- {
705
- "epoch": 0.4985754985754986,
706
- "grad_norm": 0.5056318640708923,
707
- "learning_rate": 4.902214631996475e-06,
708
- "loss": 0.086,
709
- "step": 2275
710
- },
711
- {
712
- "epoch": 0.5040543502081963,
713
- "grad_norm": 0.46904438734054565,
714
- "learning_rate": 4.9008373732922e-06,
715
- "loss": 0.0836,
716
- "step": 2300
717
- },
718
- {
719
- "epoch": 0.5095332018408941,
720
- "grad_norm": 0.5033324360847473,
721
- "learning_rate": 4.899460114587924e-06,
722
- "loss": 0.0879,
723
- "step": 2325
724
- },
725
- {
726
- "epoch": 0.5150120534735919,
727
- "grad_norm": 0.5081333518028259,
728
- "learning_rate": 4.898082855883649e-06,
729
- "loss": 0.0867,
730
- "step": 2350
731
- },
732
- {
733
- "epoch": 0.5204909051062897,
734
- "grad_norm": 0.44954633712768555,
735
- "learning_rate": 4.896705597179374e-06,
736
- "loss": 0.0859,
737
- "step": 2375
738
- },
739
- {
740
- "epoch": 0.5259697567389875,
741
- "grad_norm": 0.5036991238594055,
742
- "learning_rate": 4.8953283384750996e-06,
743
- "loss": 0.0823,
744
- "step": 2400
745
- },
746
- {
747
- "epoch": 0.5259697567389875,
748
- "eval_loss": 0.11307456344366074,
749
- "eval_runtime": 1331.1273,
750
- "eval_samples_per_second": 2.502,
751
- "eval_steps_per_second": 0.053,
752
- "eval_wer": 13.977079943219195,
753
- "step": 2400
754
- },
755
- {
756
- "epoch": 0.5314486083716853,
757
- "grad_norm": 0.48715198040008545,
758
- "learning_rate": 4.893951079770825e-06,
759
- "loss": 0.0853,
760
- "step": 2425
761
- },
762
- {
763
- "epoch": 0.5369274600043831,
764
- "grad_norm": 0.5139690041542053,
765
- "learning_rate": 4.89257382106655e-06,
766
- "loss": 0.0874,
767
- "step": 2450
768
- },
769
- {
770
- "epoch": 0.5424063116370809,
771
- "grad_norm": 0.49623942375183105,
772
- "learning_rate": 4.891196562362275e-06,
773
- "loss": 0.0893,
774
- "step": 2475
775
- },
776
- {
777
- "epoch": 0.5478851632697787,
778
- "grad_norm": 0.5240609645843506,
779
- "learning_rate": 4.889819303657999e-06,
780
- "loss": 0.0857,
781
- "step": 2500
782
- },
783
- {
784
- "epoch": 0.5533640149024764,
785
- "grad_norm": 0.5464821457862854,
786
- "learning_rate": 4.8884420449537245e-06,
787
- "loss": 0.0858,
788
- "step": 2525
789
- },
790
- {
791
- "epoch": 0.5588428665351742,
792
- "grad_norm": 0.49569082260131836,
793
- "learning_rate": 4.88706478624945e-06,
794
- "loss": 0.085,
795
- "step": 2550
796
- },
797
- {
798
- "epoch": 0.564321718167872,
799
- "grad_norm": 0.5617781281471252,
800
- "learning_rate": 4.885687527545174e-06,
801
- "loss": 0.0861,
802
- "step": 2575
803
- },
804
- {
805
- "epoch": 0.5698005698005698,
806
- "grad_norm": 0.538022518157959,
807
- "learning_rate": 4.884310268840899e-06,
808
- "loss": 0.0868,
809
- "step": 2600
810
- },
811
- {
812
- "epoch": 0.5752794214332676,
813
- "grad_norm": 0.4421217143535614,
814
- "learning_rate": 4.882933010136624e-06,
815
- "loss": 0.085,
816
- "step": 2625
817
- },
818
- {
819
- "epoch": 0.5807582730659654,
820
- "grad_norm": 0.4933975040912628,
821
- "learning_rate": 4.881555751432349e-06,
822
- "loss": 0.0836,
823
- "step": 2650
824
- },
825
- {
826
- "epoch": 0.5862371246986632,
827
- "grad_norm": 0.5269121527671814,
828
- "learning_rate": 4.880178492728075e-06,
829
- "loss": 0.0855,
830
- "step": 2675
831
- },
832
- {
833
- "epoch": 0.591715976331361,
834
- "grad_norm": 0.49818453192710876,
835
- "learning_rate": 4.8788012340238e-06,
836
- "loss": 0.0818,
837
- "step": 2700
838
- },
839
- {
840
- "epoch": 0.591715976331361,
841
- "eval_loss": 0.11333612352609634,
842
- "eval_runtime": 1354.7808,
843
- "eval_samples_per_second": 2.458,
844
- "eval_steps_per_second": 0.052,
845
- "eval_wer": 14.018049833235235,
846
- "step": 2700
847
- },
848
- {
849
- "epoch": 0.5971948279640588,
850
- "grad_norm": 0.5359761714935303,
851
- "learning_rate": 4.877423975319525e-06,
852
- "loss": 0.0803,
853
- "step": 2725
854
- },
855
- {
856
- "epoch": 0.6026736795967566,
857
- "grad_norm": 0.5219433903694153,
858
- "learning_rate": 4.876046716615249e-06,
859
- "loss": 0.0835,
860
- "step": 2750
861
- },
862
- {
863
- "epoch": 0.6081525312294543,
864
- "grad_norm": 0.4877767562866211,
865
- "learning_rate": 4.874669457910974e-06,
866
- "loss": 0.0827,
867
- "step": 2775
868
- },
869
- {
870
- "epoch": 0.6136313828621521,
871
- "grad_norm": 0.47034549713134766,
872
- "learning_rate": 4.8732921992066995e-06,
873
- "loss": 0.0803,
874
- "step": 2800
875
- },
876
- {
877
- "epoch": 0.6191102344948499,
878
- "grad_norm": 0.5331267714500427,
879
- "learning_rate": 4.871914940502425e-06,
880
- "loss": 0.0827,
881
- "step": 2825
882
- },
883
- {
884
- "epoch": 0.6245890861275477,
885
- "grad_norm": 0.5360026955604553,
886
- "learning_rate": 4.870537681798149e-06,
887
- "loss": 0.0848,
888
- "step": 2850
889
- },
890
- {
891
- "epoch": 0.6300679377602455,
892
- "grad_norm": 0.5023711323738098,
893
- "learning_rate": 4.869160423093874e-06,
894
- "loss": 0.0816,
895
- "step": 2875
896
- },
897
- {
898
- "epoch": 0.6355467893929433,
899
- "grad_norm": 0.43558841943740845,
900
- "learning_rate": 4.867783164389599e-06,
901
- "loss": 0.0823,
902
- "step": 2900
903
- },
904
- {
905
- "epoch": 0.6410256410256411,
906
- "grad_norm": 0.52950519323349,
907
- "learning_rate": 4.8664059056853244e-06,
908
- "loss": 0.0832,
909
- "step": 2925
910
- },
911
- {
912
- "epoch": 0.6465044926583388,
913
- "grad_norm": 0.49947696924209595,
914
- "learning_rate": 4.86502864698105e-06,
915
- "loss": 0.0825,
916
- "step": 2950
917
- },
918
- {
919
- "epoch": 0.6519833442910365,
920
- "grad_norm": 0.4842943549156189,
921
- "learning_rate": 4.863651388276775e-06,
922
- "loss": 0.0821,
923
- "step": 2975
924
- },
925
- {
926
- "epoch": 0.6574621959237343,
927
- "grad_norm": 0.6517378091812134,
928
- "learning_rate": 4.8622741295725e-06,
929
- "loss": 0.0945,
930
- "step": 3000
931
- },
932
- {
933
- "epoch": 0.6574621959237343,
934
- "eval_loss": 0.11205233633518219,
935
- "eval_runtime": 1330.2589,
936
- "eval_samples_per_second": 2.503,
937
- "eval_steps_per_second": 0.053,
938
- "eval_wer": 13.976502902514742,
939
- "step": 3000
940
- },
941
- {
942
- "epoch": 0.6629410475564321,
943
- "grad_norm": 0.5949074029922485,
944
- "learning_rate": 4.860896870868224e-06,
945
- "loss": 0.1253,
946
- "step": 3025
947
- },
948
- {
949
- "epoch": 0.6684198991891299,
950
- "grad_norm": 0.5628743171691895,
951
- "learning_rate": 4.859519612163949e-06,
952
- "loss": 0.1232,
953
- "step": 3050
954
- },
955
- {
956
- "epoch": 0.6738987508218277,
957
- "grad_norm": 0.5839057564735413,
958
- "learning_rate": 4.8581423534596745e-06,
959
- "loss": 0.1228,
960
- "step": 3075
961
- },
962
- {
963
- "epoch": 0.6793776024545255,
964
- "grad_norm": 0.5640609860420227,
965
- "learning_rate": 4.856765094755399e-06,
966
- "loss": 0.1181,
967
- "step": 3100
968
- },
969
- {
970
- "epoch": 0.6848564540872233,
971
- "grad_norm": 0.6778563261032104,
972
- "learning_rate": 4.855387836051124e-06,
973
- "loss": 0.1208,
974
- "step": 3125
975
- },
976
- {
977
- "epoch": 0.6903353057199211,
978
- "grad_norm": 0.603071928024292,
979
- "learning_rate": 4.854010577346849e-06,
980
- "loss": 0.1193,
981
- "step": 3150
982
- },
983
- {
984
- "epoch": 0.6958141573526189,
985
- "grad_norm": 0.6698121428489685,
986
- "learning_rate": 4.852633318642574e-06,
987
- "loss": 0.1209,
988
- "step": 3175
989
- },
990
- {
991
- "epoch": 0.7012930089853167,
992
- "grad_norm": 0.5631791353225708,
993
- "learning_rate": 4.8512560599382995e-06,
994
- "loss": 0.1223,
995
- "step": 3200
996
- },
997
- {
998
- "epoch": 0.7067718606180144,
999
- "grad_norm": 0.5904573798179626,
1000
- "learning_rate": 4.849878801234025e-06,
1001
- "loss": 0.119,
1002
- "step": 3225
1003
- },
1004
- {
1005
- "epoch": 0.7122507122507122,
1006
- "grad_norm": 0.6524720788002014,
1007
- "learning_rate": 4.84850154252975e-06,
1008
- "loss": 0.1194,
1009
- "step": 3250
1010
- },
1011
- {
1012
- "epoch": 0.71772956388341,
1013
- "grad_norm": 0.6679468154907227,
1014
- "learning_rate": 4.847124283825474e-06,
1015
- "loss": 0.1224,
1016
- "step": 3275
1017
- },
1018
- {
1019
- "epoch": 0.7232084155161078,
1020
- "grad_norm": 0.5249156951904297,
1021
- "learning_rate": 4.845747025121199e-06,
1022
- "loss": 0.1193,
1023
- "step": 3300
1024
- },
1025
- {
1026
- "epoch": 0.7232084155161078,
1027
- "eval_loss": 0.10731059312820435,
1028
- "eval_runtime": 1334.5691,
1029
- "eval_samples_per_second": 2.495,
1030
- "eval_steps_per_second": 0.052,
1031
- "eval_wer": 13.910143221502844,
1032
- "step": 3300
1033
- },
1034
- {
1035
- "epoch": 0.7286872671488056,
1036
- "grad_norm": 0.6170015931129456,
1037
- "learning_rate": 4.844369766416924e-06,
1038
- "loss": 0.118,
1039
- "step": 3325
1040
- },
1041
- {
1042
- "epoch": 0.7341661187815034,
1043
- "grad_norm": 0.622870922088623,
1044
- "learning_rate": 4.8429925077126496e-06,
1045
- "loss": 0.1213,
1046
- "step": 3350
1047
- },
1048
- {
1049
- "epoch": 0.7396449704142012,
1050
- "grad_norm": 0.6220366358757019,
1051
- "learning_rate": 4.841615249008374e-06,
1052
- "loss": 0.1199,
1053
- "step": 3375
1054
- },
1055
- {
1056
- "epoch": 0.745123822046899,
1057
- "grad_norm": 0.6058914661407471,
1058
- "learning_rate": 4.840237990304099e-06,
1059
- "loss": 0.1177,
1060
- "step": 3400
1061
- },
1062
- {
1063
- "epoch": 0.7506026736795968,
1064
- "grad_norm": 0.618475079536438,
1065
- "learning_rate": 4.838860731599824e-06,
1066
- "loss": 0.1193,
1067
- "step": 3425
1068
- },
1069
- {
1070
- "epoch": 0.7560815253122946,
1071
- "grad_norm": 0.6013332009315491,
1072
- "learning_rate": 4.8374834728955485e-06,
1073
- "loss": 0.1185,
1074
- "step": 3450
1075
- },
1076
- {
1077
- "epoch": 0.7615603769449923,
1078
- "grad_norm": 0.5604269504547119,
1079
- "learning_rate": 4.836106214191274e-06,
1080
- "loss": 0.1169,
1081
- "step": 3475
1082
- },
1083
- {
1084
- "epoch": 0.7670392285776901,
1085
- "grad_norm": 0.5583498477935791,
1086
- "learning_rate": 4.834728955486999e-06,
1087
- "loss": 0.1205,
1088
- "step": 3500
1089
- },
1090
- {
1091
- "epoch": 0.7725180802103879,
1092
- "grad_norm": 0.5525631904602051,
1093
- "learning_rate": 4.833351696782724e-06,
1094
- "loss": 0.1192,
1095
- "step": 3525
1096
- },
1097
- {
1098
- "epoch": 0.7779969318430857,
1099
- "grad_norm": 0.5905235409736633,
1100
- "learning_rate": 4.831974438078449e-06,
1101
- "loss": 0.1192,
1102
- "step": 3550
1103
- },
1104
- {
1105
- "epoch": 0.7834757834757835,
1106
- "grad_norm": 0.5701056122779846,
1107
- "learning_rate": 4.830597179374174e-06,
1108
- "loss": 0.1189,
1109
- "step": 3575
1110
- },
1111
- {
1112
- "epoch": 0.7889546351084813,
1113
- "grad_norm": 0.5544924139976501,
1114
- "learning_rate": 4.829219920669899e-06,
1115
- "loss": 0.1179,
1116
- "step": 3600
1117
- },
1118
- {
1119
- "epoch": 0.7889546351084813,
1120
- "eval_loss": 0.10721833258867264,
1121
- "eval_runtime": 1334.3717,
1122
- "eval_samples_per_second": 2.496,
1123
- "eval_steps_per_second": 0.052,
1124
- "eval_wer": 13.76934528961673,
1125
- "step": 3600
1126
- }
1127
- ],
1128
- "logging_steps": 25,
1129
- "max_steps": 91260,
1130
- "num_input_tokens_seen": 0,
1131
- "num_train_epochs": 20,
1132
- "save_steps": 300,
1133
- "stateful_callbacks": {
1134
- "TrainerControl": {
1135
- "args": {
1136
- "should_epoch_stop": false,
1137
- "should_evaluate": false,
1138
- "should_log": false,
1139
- "should_save": true,
1140
- "should_training_stop": false
1141
- },
1142
- "attributes": {}
1143
- }
1144
- },
1145
- "total_flos": 3.527210695946341e+20,
1146
- "train_batch_size": 32,
1147
- "trial_name": null,
1148
- "trial_params": null
1149
- }