Baselhany commited on
Commit
fdcb566
·
verified ·
1 Parent(s): a17103f

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "openai/whisper-tiny",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
1
  {
2
+ "_name_or_path": "Baselhany/Graduation_Project_Whisper_tiny_fine_tune_Quran",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a784561adf3515711b588dde9815b789359f9285ae4e16b60d15b896766a77f4
3
  size 151061672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b46e8196e00199074adc216fecc9513e28674d93785a33ee77eabc1cd8a07cd9
3
  size 151061672
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df2320e15026300ace7b1c40ff27c182f4c1e04edc644e47d1675df673ba4606
3
  size 297616186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dad509a51d888adea1ba97f4ed317aab1316125f8f2d85181dbff53f6e12bb5
3
  size 297616186
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80a0dbe0c127c8a83022b1dc5a596bef8c3c53bfe2d22a4b2beaa04fdf84b650
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5a0053688c21d4b223e60fe14000ea3131b6f050b39ef3f20662920444f38ad
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4ca5e30a66f9df41733f277a6e1d9acf5f7fe7b8ba8596acf4f37f13e3b3294
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:959811f2b632ac28cb0d8d27e9678f9580865cbedd9c0e82696e795f9bb8c37e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,646 +1,43 @@
1
  {
2
- "best_metric": 29.268641081801732,
3
- "best_model_checkpoint": "./Whisper_tiny_fine_tune_Quran/checkpoint-2669",
4
- "epoch": 19.8768,
5
  "eval_steps": 500,
6
- "global_step": 3120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.32,
13
- "grad_norm": 297566.90625,
14
  "learning_rate": 1e-05,
15
- "loss": 2.7962,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.64,
20
- "grad_norm": 237295.046875,
21
  "learning_rate": 2e-05,
22
- "loss": 0.1442,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.96,
27
- "grad_norm": 337374.4375,
28
  "learning_rate": 3e-05,
29
- "loss": 0.0908,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 1.0,
34
- "eval_cer": 51.62764895151448,
35
- "eval_loss": 0.04871196299791336,
36
- "eval_runtime": 827.6898,
37
- "eval_samples_per_second": 2.415,
38
- "eval_steps_per_second": 0.151,
39
- "eval_wer": 133.72535948957244,
40
  "step": 157
41
- },
42
- {
43
- "epoch": 1.2752,
44
- "grad_norm": 183927.1875,
45
- "learning_rate": 4e-05,
46
- "loss": 0.0662,
47
- "step": 200
48
- },
49
- {
50
- "epoch": 1.5952,
51
- "grad_norm": 234198.296875,
52
- "learning_rate": 5e-05,
53
- "loss": 0.0577,
54
- "step": 250
55
- },
56
- {
57
- "epoch": 1.9152,
58
- "grad_norm": 209207.671875,
59
- "learning_rate": 6e-05,
60
- "loss": 0.0559,
61
- "step": 300
62
- },
63
- {
64
- "epoch": 2.0,
65
- "eval_cer": 56.4423610340619,
66
- "eval_loss": 0.037598446011543274,
67
- "eval_runtime": 691.9923,
68
- "eval_samples_per_second": 2.889,
69
- "eval_steps_per_second": 0.181,
70
- "eval_wer": 116.80792305494714,
71
- "step": 314
72
- },
73
- {
74
- "epoch": 2.2304,
75
- "grad_norm": 127075.0234375,
76
- "learning_rate": 7e-05,
77
- "loss": 0.0456,
78
- "step": 350
79
- },
80
- {
81
- "epoch": 2.5504,
82
- "grad_norm": 112266.140625,
83
- "learning_rate": 8e-05,
84
- "loss": 0.0389,
85
- "step": 400
86
- },
87
- {
88
- "epoch": 2.8704,
89
- "grad_norm": 123542.2421875,
90
- "learning_rate": 9e-05,
91
- "loss": 0.0399,
92
- "step": 450
93
- },
94
- {
95
- "epoch": 3.0,
96
- "eval_cer": 29.94396982136913,
97
- "eval_loss": 0.03602377325296402,
98
- "eval_runtime": 643.0305,
99
- "eval_samples_per_second": 3.109,
100
- "eval_steps_per_second": 0.194,
101
- "eval_wer": 72.37405961337015,
102
- "step": 471
103
- },
104
- {
105
- "epoch": 3.1856,
106
- "grad_norm": 129181.1328125,
107
- "learning_rate": 0.0001,
108
- "loss": 0.0326,
109
- "step": 500
110
- },
111
- {
112
- "epoch": 3.5056000000000003,
113
- "grad_norm": 195835.4375,
114
- "learning_rate": 9.991016468658499e-05,
115
- "loss": 0.0327,
116
- "step": 550
117
- },
118
- {
119
- "epoch": 3.8256,
120
- "grad_norm": 177487.765625,
121
- "learning_rate": 9.964098156168142e-05,
122
- "loss": 0.0322,
123
- "step": 600
124
- },
125
- {
126
- "epoch": 4.0,
127
- "eval_cer": 18.302452013757904,
128
- "eval_loss": 0.033060286194086075,
129
- "eval_runtime": 627.0494,
130
- "eval_samples_per_second": 3.188,
131
- "eval_steps_per_second": 0.199,
132
- "eval_wer": 46.46700314255785,
133
- "step": 628
134
- },
135
- {
136
- "epoch": 4.1408,
137
- "grad_norm": 113981.9140625,
138
- "learning_rate": 9.919341791130496e-05,
139
- "loss": 0.0279,
140
- "step": 650
141
- },
142
- {
143
- "epoch": 4.4608,
144
- "grad_norm": 98992.5859375,
145
- "learning_rate": 9.85690820162878e-05,
146
- "loss": 0.0234,
147
- "step": 700
148
- },
149
- {
150
- "epoch": 4.7808,
151
- "grad_norm": 119039.2421875,
152
- "learning_rate": 9.777021737306214e-05,
153
- "loss": 0.0238,
154
- "step": 750
155
- },
156
- {
157
- "epoch": 5.0,
158
- "eval_cer": 33.07777654499057,
159
- "eval_loss": 0.03270672634243965,
160
- "eval_runtime": 643.9344,
161
- "eval_samples_per_second": 3.104,
162
- "eval_steps_per_second": 0.194,
163
- "eval_wer": 69.15531854109133,
164
- "step": 785
165
- },
166
- {
167
- "epoch": 5.096,
168
- "grad_norm": 73677.765625,
169
- "learning_rate": 9.6799694631852e-05,
170
- "loss": 0.0214,
171
- "step": 800
172
- },
173
- {
174
- "epoch": 5.416,
175
- "grad_norm": 103333.7421875,
176
- "learning_rate": 9.56610012812427e-05,
177
- "loss": 0.0168,
178
- "step": 850
179
- },
180
- {
181
- "epoch": 5.736,
182
- "grad_norm": 99932.6171875,
183
- "learning_rate": 9.435822911619564e-05,
184
- "loss": 0.0187,
185
- "step": 900
186
- },
187
- {
188
- "epoch": 6.0,
189
- "eval_cer": 21.48008432264507,
190
- "eval_loss": 0.03448895364999771,
191
- "eval_runtime": 624.9105,
192
- "eval_samples_per_second": 3.199,
193
- "eval_steps_per_second": 0.2,
194
- "eval_wer": 51.385582325492805,
195
- "step": 942
196
- },
197
- {
198
- "epoch": 6.0512,
199
- "grad_norm": 88239.1171875,
200
- "learning_rate": 9.289605953454107e-05,
201
- "loss": 0.0159,
202
- "step": 950
203
- },
204
- {
205
- "epoch": 6.3712,
206
- "grad_norm": 66374.265625,
207
- "learning_rate": 9.127974671478432e-05,
208
- "loss": 0.012,
209
- "step": 1000
210
- },
211
- {
212
- "epoch": 6.6912,
213
- "grad_norm": 64054.11328125,
214
- "learning_rate": 8.951509873567499e-05,
215
- "loss": 0.0125,
216
- "step": 1050
217
- },
218
- {
219
- "epoch": 7.0,
220
- "eval_cer": 12.522467546876733,
221
- "eval_loss": 0.03343856334686279,
222
- "eval_runtime": 610.1269,
223
- "eval_samples_per_second": 3.276,
224
- "eval_steps_per_second": 0.205,
225
- "eval_wer": 35.02523569183887,
226
- "step": 1099
227
- },
228
- {
229
- "epoch": 7.0064,
230
- "grad_norm": 62407.08203125,
231
- "learning_rate": 8.760845670538387e-05,
232
- "loss": 0.0129,
233
- "step": 1100
234
- },
235
- {
236
- "epoch": 7.3264,
237
- "grad_norm": 99814.7421875,
238
- "learning_rate": 8.556667197528543e-05,
239
- "loss": 0.0083,
240
- "step": 1150
241
- },
242
- {
243
- "epoch": 7.6464,
244
- "grad_norm": 82683.46875,
245
- "learning_rate": 8.339708152022585e-05,
246
- "loss": 0.008,
247
- "step": 1200
248
- },
249
- {
250
- "epoch": 7.9664,
251
- "grad_norm": 60897.98046875,
252
- "learning_rate": 8.110748157374565e-05,
253
- "loss": 0.0085,
254
- "step": 1250
255
- },
256
- {
257
- "epoch": 8.0,
258
- "eval_cer": 17.333296349717074,
259
- "eval_loss": 0.03527013212442398,
260
- "eval_runtime": 621.4593,
261
- "eval_samples_per_second": 3.217,
262
- "eval_steps_per_second": 0.201,
263
- "eval_wer": 44.2386439386725,
264
- "step": 1256
265
- },
266
- {
267
- "epoch": 8.2816,
268
- "grad_norm": 69180.1875,
269
- "learning_rate": 7.870609961299627e-05,
270
- "loss": 0.0057,
271
- "step": 1300
272
- },
273
- {
274
- "epoch": 8.6016,
275
- "grad_norm": 75431.734375,
276
- "learning_rate": 7.620156479402066e-05,
277
- "loss": 0.0053,
278
- "step": 1350
279
- },
280
- {
281
- "epoch": 8.9216,
282
- "grad_norm": 41871.2890625,
283
- "learning_rate": 7.360287694363566e-05,
284
- "loss": 0.0054,
285
- "step": 1400
286
- },
287
- {
288
- "epoch": 9.0,
289
- "eval_cer": 11.132808165982471,
290
- "eval_loss": 0.037653908133506775,
291
- "eval_runtime": 611.4108,
292
- "eval_samples_per_second": 3.269,
293
- "eval_steps_per_second": 0.204,
294
- "eval_wer": 32.14455766117513,
295
- "step": 1413
296
- },
297
- {
298
- "epoch": 9.2368,
299
- "grad_norm": 72383.8515625,
300
- "learning_rate": 7.091937421934157e-05,
301
- "loss": 0.0039,
302
- "step": 1450
303
- },
304
- {
305
- "epoch": 9.556799999999999,
306
- "grad_norm": 58724.45703125,
307
- "learning_rate": 6.816069955346985e-05,
308
- "loss": 0.0036,
309
- "step": 1500
310
- },
311
- {
312
- "epoch": 9.8768,
313
- "grad_norm": 55592.59765625,
314
- "learning_rate": 6.533676600214928e-05,
315
- "loss": 0.0031,
316
- "step": 1550
317
- },
318
- {
319
- "epoch": 10.0,
320
- "eval_cer": 16.999334294907356,
321
- "eval_loss": 0.038220491260290146,
322
- "eval_runtime": 634.3141,
323
- "eval_samples_per_second": 3.151,
324
- "eval_steps_per_second": 0.197,
325
- "eval_wer": 44.54813827254547,
326
- "step": 1570
327
- },
328
- {
329
- "epoch": 10.192,
330
- "grad_norm": 35171.7109375,
331
- "learning_rate": 6.245772112360568e-05,
332
- "loss": 0.0026,
333
- "step": 1600
334
- },
335
- {
336
- "epoch": 10.512,
337
- "grad_norm": 42814.4921875,
338
- "learning_rate": 5.953391051379904e-05,
339
- "loss": 0.0021,
340
- "step": 1650
341
- },
342
- {
343
- "epoch": 10.832,
344
- "grad_norm": 35447.25390625,
345
- "learning_rate": 5.6575840630429286e-05,
346
- "loss": 0.0022,
347
- "step": 1700
348
- },
349
- {
350
- "epoch": 11.0,
351
- "eval_cer": 13.609231110617998,
352
- "eval_loss": 0.039708610624074936,
353
- "eval_runtime": 620.4186,
354
- "eval_samples_per_second": 3.222,
355
- "eval_steps_per_second": 0.201,
356
- "eval_wer": 37.786877440243785,
357
- "step": 1727
358
- },
359
- {
360
- "epoch": 11.1472,
361
- "grad_norm": 55043.80078125,
362
- "learning_rate": 5.359414103889947e-05,
363
- "loss": 0.0017,
364
- "step": 1750
365
- },
366
- {
367
- "epoch": 11.4672,
368
- "grad_norm": 36882.04296875,
369
- "learning_rate": 5.059952621590216e-05,
370
- "loss": 0.0014,
371
- "step": 1800
372
- },
373
- {
374
- "epoch": 11.7872,
375
- "grad_norm": 36965.68359375,
376
- "learning_rate": 4.7602757047884595e-05,
377
- "loss": 0.0012,
378
- "step": 1850
379
- },
380
- {
381
- "epoch": 12.0,
382
- "eval_cer": 12.376012426495063,
383
- "eval_loss": 0.041336335241794586,
384
- "eval_runtime": 613.9261,
385
- "eval_samples_per_second": 3.256,
386
- "eval_steps_per_second": 0.204,
387
- "eval_wer": 35.02047424054852,
388
- "step": 1884
389
- },
390
- {
391
- "epoch": 12.1024,
392
- "grad_norm": 35653.84375,
393
- "learning_rate": 4.461460216274445e-05,
394
- "loss": 0.0011,
395
- "step": 1900
396
- },
397
- {
398
- "epoch": 12.4224,
399
- "grad_norm": 17012.79296875,
400
- "learning_rate": 4.1645799233707284e-05,
401
- "loss": 0.0008,
402
- "step": 1950
403
- },
404
- {
405
- "epoch": 12.7424,
406
- "grad_norm": 25617.26171875,
407
- "learning_rate": 3.870701639443698e-05,
408
- "loss": 0.0008,
409
- "step": 2000
410
- },
411
- {
412
- "epoch": 13.0,
413
- "eval_cer": 13.077221790746698,
414
- "eval_loss": 0.04063521698117256,
415
- "eval_runtime": 615.5521,
416
- "eval_samples_per_second": 3.247,
417
- "eval_steps_per_second": 0.203,
418
- "eval_wer": 33.51585563279688,
419
- "step": 2041
420
- },
421
- {
422
- "epoch": 13.0576,
423
- "grad_norm": 43133.0625,
424
- "learning_rate": 3.580881390403052e-05,
425
- "loss": 0.0007,
426
- "step": 2050
427
- },
428
- {
429
- "epoch": 13.3776,
430
- "grad_norm": 28860.919921875,
431
- "learning_rate": 3.296160619965056e-05,
432
- "loss": 0.0004,
433
- "step": 2100
434
- },
435
- {
436
- "epoch": 13.6976,
437
- "grad_norm": 27712.271484375,
438
- "learning_rate": 3.0175624473156316e-05,
439
- "loss": 0.0005,
440
- "step": 2150
441
- },
442
- {
443
- "epoch": 14.0,
444
- "eval_cer": 12.631754132919118,
445
- "eval_loss": 0.04265804588794708,
446
- "eval_runtime": 611.9643,
447
- "eval_samples_per_second": 3.267,
448
- "eval_steps_per_second": 0.204,
449
- "eval_wer": 33.94438624892867,
450
- "step": 2198
451
- },
452
- {
453
- "epoch": 14.0128,
454
- "grad_norm": 4747.3388671875,
455
- "learning_rate": 2.7460879906210487e-05,
456
- "loss": 0.0004,
457
- "step": 2200
458
- },
459
- {
460
- "epoch": 14.3328,
461
- "grad_norm": 10129.0400390625,
462
- "learning_rate": 2.482712769597363e-05,
463
- "loss": 0.0002,
464
- "step": 2250
465
- },
466
- {
467
- "epoch": 14.6528,
468
- "grad_norm": 17136.42578125,
469
- "learning_rate": 2.22838320006563e-05,
470
- "loss": 0.0002,
471
- "step": 2300
472
- },
473
- {
474
- "epoch": 14.9728,
475
- "grad_norm": 2899.75439453125,
476
- "learning_rate": 1.9840131930894333e-05,
477
- "loss": 0.0002,
478
- "step": 2350
479
- },
480
- {
481
- "epoch": 15.0,
482
- "eval_cer": 10.566958837235104,
483
- "eval_loss": 0.04313713312149048,
484
- "eval_runtime": 619.263,
485
- "eval_samples_per_second": 3.228,
486
- "eval_steps_per_second": 0.202,
487
- "eval_wer": 31.449385772783543,
488
- "step": 2355
489
- },
490
- {
491
- "epoch": 15.288,
492
- "grad_norm": 1868.322021484375,
493
- "learning_rate": 1.7504808709154104e-05,
494
- "loss": 0.0001,
495
- "step": 2400
496
- },
497
- {
498
- "epoch": 15.608,
499
- "grad_norm": 1876.469970703125,
500
- "learning_rate": 1.5286254115177623e-05,
501
- "loss": 0.0001,
502
- "step": 2450
503
- },
504
- {
505
- "epoch": 15.928,
506
- "grad_norm": 2221.869140625,
507
- "learning_rate": 1.3192440330856004e-05,
508
- "loss": 0.0001,
509
- "step": 2500
510
- },
511
- {
512
- "epoch": 16.0,
513
- "eval_cer": 12.524131809608344,
514
- "eval_loss": 0.043873097747564316,
515
- "eval_runtime": 616.7174,
516
- "eval_samples_per_second": 3.241,
517
- "eval_steps_per_second": 0.203,
518
- "eval_wer": 35.372821636034665,
519
- "step": 2512
520
- },
521
- {
522
- "epoch": 16.2432,
523
- "grad_norm": 693.1599731445312,
524
- "learning_rate": 1.123089129289117e-05,
525
- "loss": 0.0,
526
- "step": 2550
527
- },
528
- {
529
- "epoch": 16.5632,
530
- "grad_norm": 184.430908203125,
531
- "learning_rate": 9.408655656187282e-06,
532
- "loss": 0.0,
533
- "step": 2600
534
- },
535
- {
536
- "epoch": 16.8832,
537
- "grad_norm": 1887.3065185546875,
538
- "learning_rate": 7.732281465125907e-06,
539
- "loss": 0.0,
540
- "step": 2650
541
- },
542
- {
543
- "epoch": 17.0,
544
- "eval_cer": 10.107067569066903,
545
- "eval_loss": 0.04502426087856293,
546
- "eval_runtime": 611.9114,
547
- "eval_samples_per_second": 3.267,
548
- "eval_steps_per_second": 0.204,
549
- "eval_wer": 29.268641081801732,
550
- "step": 2669
551
- },
552
- {
553
- "epoch": 17.1984,
554
- "grad_norm": 270.3798828125,
555
- "learning_rate": 6.207792623741249e-06,
556
- "loss": 0.0,
557
- "step": 2700
558
- },
559
- {
560
- "epoch": 17.5184,
561
- "grad_norm": 287.86920166015625,
562
- "learning_rate": 4.840667249347824e-06,
563
- "loss": 0.0,
564
- "step": 2750
565
- },
566
- {
567
- "epoch": 17.8384,
568
- "grad_norm": 200.27944946289062,
569
- "learning_rate": 3.63581798740511e-06,
570
- "loss": 0.0,
571
- "step": 2800
572
- },
573
- {
574
- "epoch": 18.0,
575
- "eval_cer": 11.283146566071231,
576
- "eval_loss": 0.04671463742852211,
577
- "eval_runtime": 610.44,
578
- "eval_samples_per_second": 3.275,
579
- "eval_steps_per_second": 0.205,
580
- "eval_wer": 30.45900390439006,
581
- "step": 2826
582
- },
583
- {
584
- "epoch": 18.1536,
585
- "grad_norm": 53.246883392333984,
586
- "learning_rate": 2.59757435835567e-06,
587
- "loss": 0.0,
588
- "step": 2850
589
- },
590
- {
591
- "epoch": 18.4736,
592
- "grad_norm": 68.43990325927734,
593
- "learning_rate": 1.729667199872187e-06,
594
- "loss": 0.0,
595
- "step": 2900
596
- },
597
- {
598
- "epoch": 18.7936,
599
- "grad_norm": 63.084373474121094,
600
- "learning_rate": 1.0352152604185428e-06,
601
- "loss": 0.0,
602
- "step": 2950
603
- },
604
- {
605
- "epoch": 19.0,
606
- "eval_cer": 11.1267058692999,
607
- "eval_loss": 0.047411538660526276,
608
- "eval_runtime": 609.5688,
609
- "eval_samples_per_second": 3.279,
610
- "eval_steps_per_second": 0.205,
611
- "eval_wer": 30.335206170840873,
612
- "step": 2983
613
- },
614
- {
615
- "epoch": 19.1088,
616
- "grad_norm": 42.04766845703125,
617
- "learning_rate": 5.167139923000553e-07,
618
- "loss": 0.0,
619
- "step": 3000
620
- },
621
- {
622
- "epoch": 19.4288,
623
- "grad_norm": 56.3652458190918,
624
- "learning_rate": 1.760265844738096e-07,
625
- "loss": 0.0,
626
- "step": 3050
627
- },
628
- {
629
- "epoch": 19.7488,
630
- "grad_norm": 69.01725769042969,
631
- "learning_rate": 1.4377267342158274e-08,
632
- "loss": 0.0,
633
- "step": 3100
634
- },
635
- {
636
- "epoch": 19.8768,
637
- "eval_cer": 11.179407522467546,
638
- "eval_loss": 0.047550249844789505,
639
- "eval_runtime": 603.4051,
640
- "eval_samples_per_second": 3.313,
641
- "eval_steps_per_second": 0.207,
642
- "eval_wer": 30.378059232454053,
643
- "step": 3120
644
  }
645
  ],
646
  "logging_steps": 50,
@@ -655,12 +52,12 @@
655
  "should_evaluate": false,
656
  "should_log": false,
657
  "should_save": true,
658
- "should_training_stop": true
659
  },
660
  "attributes": {}
661
  }
662
  },
663
- "total_flos": 4.89344655753216e+18,
664
  "train_batch_size": 16,
665
  "trial_name": null,
666
  "trial_params": null
 
1
  {
2
+ "best_metric": 28.53061613179697,
3
+ "best_model_checkpoint": "./Whisper_tiny_fine_tune_Quran/checkpoint-157",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 157,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.32,
13
+ "grad_norm": 151793.484375,
14
  "learning_rate": 1e-05,
15
+ "loss": 0.0293,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.64,
20
+ "grad_norm": 88808.3671875,
21
  "learning_rate": 2e-05,
22
+ "loss": 0.025,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.96,
27
+ "grad_norm": 80433.359375,
28
  "learning_rate": 3e-05,
29
+ "loss": 0.0195,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 1.0,
34
+ "eval_cer": 9.66548319094641,
35
+ "eval_loss": 0.025752274319529533,
36
+ "eval_runtime": 658.049,
37
+ "eval_samples_per_second": 3.038,
38
+ "eval_steps_per_second": 0.19,
39
+ "eval_wer": 28.53061613179697,
40
  "step": 157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
  ],
43
  "logging_steps": 50,
 
52
  "should_evaluate": false,
53
  "should_log": false,
54
  "should_save": true,
55
+ "should_training_stop": false
56
  },
57
  "attributes": {}
58
  }
59
  },
60
+ "total_flos": 2.461888512e+17,
61
  "train_batch_size": 16,
62
  "trial_name": null,
63
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbf1baa24782173efebfe10f80eef23fe0325b67b31c3dc7f3dcf13d0ccaab5f
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:317f0bc90b9977ce1f17eccbf424721a3903a4332b9f78d27fa2e7877abd38d0
3
  size 5624