cpercivati commited on
Commit
79210d0
·
verified ·
1 Parent(s): 27a6f98

Upload 9 files

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "openai/whisper-base.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
@@ -25,7 +25,12 @@
25
  "encoder_layerdrop": 0.0,
26
  "encoder_layers": 6,
27
  "eos_token_id": 50256,
28
- "forced_decoder_ids": null,
 
 
 
 
 
29
  "init_std": 0.02,
30
  "is_encoder_decoder": true,
31
  "mask_feature_length": 10,
@@ -43,10 +48,101 @@
43
  "num_mel_bins": 80,
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
- "suppress_tokens": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  "torch_dtype": "float32",
48
- "transformers_version": "4.38.2",
49
- "use_cache": false,
50
  "use_weighted_layer_sum": false,
51
  "vocab_size": 51864
52
  }
 
1
  {
2
+ "_name_or_path": "/kaggle/working/whisper-ft-2/checkpoint-600/model.safetensors",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
25
  "encoder_layerdrop": 0.0,
26
  "encoder_layers": 6,
27
  "eos_token_id": 50256,
28
+ "forced_decoder_ids": [
29
+ [
30
+ 1,
31
+ 50362
32
+ ]
33
+ ],
34
  "init_std": 0.02,
35
  "is_encoder_decoder": true,
36
  "mask_feature_length": 10,
 
48
  "num_mel_bins": 80,
49
  "pad_token_id": 50256,
50
  "scale_embedding": false,
51
+ "suppress_tokens": [
52
+ 1,
53
+ 2,
54
+ 7,
55
+ 8,
56
+ 9,
57
+ 10,
58
+ 14,
59
+ 25,
60
+ 26,
61
+ 27,
62
+ 28,
63
+ 29,
64
+ 31,
65
+ 58,
66
+ 59,
67
+ 60,
68
+ 61,
69
+ 62,
70
+ 63,
71
+ 90,
72
+ 91,
73
+ 92,
74
+ 93,
75
+ 357,
76
+ 366,
77
+ 438,
78
+ 532,
79
+ 685,
80
+ 705,
81
+ 796,
82
+ 930,
83
+ 1058,
84
+ 1220,
85
+ 1267,
86
+ 1279,
87
+ 1303,
88
+ 1343,
89
+ 1377,
90
+ 1391,
91
+ 1635,
92
+ 1782,
93
+ 1875,
94
+ 2162,
95
+ 2361,
96
+ 2488,
97
+ 3467,
98
+ 4008,
99
+ 4211,
100
+ 4600,
101
+ 4808,
102
+ 5299,
103
+ 5855,
104
+ 6329,
105
+ 7203,
106
+ 9609,
107
+ 9959,
108
+ 10563,
109
+ 10786,
110
+ 11420,
111
+ 11709,
112
+ 11907,
113
+ 13163,
114
+ 13697,
115
+ 13700,
116
+ 14808,
117
+ 15306,
118
+ 16410,
119
+ 16791,
120
+ 17992,
121
+ 19203,
122
+ 19510,
123
+ 20724,
124
+ 22305,
125
+ 22935,
126
+ 27007,
127
+ 30109,
128
+ 30420,
129
+ 33409,
130
+ 34949,
131
+ 40283,
132
+ 40493,
133
+ 40549,
134
+ 47282,
135
+ 49146,
136
+ 50257,
137
+ 50357,
138
+ 50358,
139
+ 50359,
140
+ 50360,
141
+ 50361
142
+ ],
143
  "torch_dtype": "float32",
144
+ "transformers_version": "4.42.3",
145
+ "use_cache": true,
146
  "use_weighted_layer_sum": false,
147
  "vocab_size": 51864
148
  }
generation_config.json CHANGED
@@ -1,26 +1,4 @@
1
  {
2
- "alignment_heads": [
3
- [
4
- 3,
5
- 3
6
- ],
7
- [
8
- 4,
9
- 7
10
- ],
11
- [
12
- 5,
13
- 1
14
- ],
15
- [
16
- 5,
17
- 5
18
- ],
19
- [
20
- 5,
21
- 7
22
- ]
23
- ],
24
  "begin_suppress_tokens": [
25
  220,
26
  50256
@@ -34,13 +12,8 @@
34
  50362
35
  ]
36
  ],
37
- "is_multilingual": false,
38
- "max_initial_timestamp_index": 50,
39
  "max_length": 448,
40
- "no_timestamps_token_id": 50362,
41
  "pad_token_id": 50256,
42
- "prev_sot_token_id": 50360,
43
- "return_timestamps": false,
44
  "suppress_tokens": [
45
  1,
46
  2,
@@ -133,5 +106,5 @@
133
  50360,
134
  50361
135
  ],
136
- "transformers_version": "4.38.2"
137
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "begin_suppress_tokens": [
3
  220,
4
  50256
 
12
  50362
13
  ]
14
  ],
 
 
15
  "max_length": 448,
 
16
  "pad_token_id": 50256,
 
 
17
  "suppress_tokens": [
18
  1,
19
  2,
 
106
  50360,
107
  50361
108
  ],
109
+ "transformers_version": "4.42.3"
110
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f26bf17bf8046342e89171c8323b425642f7e61c3ea8bf22ed7c5b1ceb274ca
3
  size 290401888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22aa2c1a7ff282ac49e8b7b35970dc582ab111868362fb5d325524b1e181646a
3
  size 290401888
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64ff846159375dd759f4b87e313376fc9c713e195386cd980e0c0cf7865dc6af
3
  size 574807418
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e858eb0107d3a7caf2bce9b6dcbae6286b6e0670211dd4e01d9772be12056a79
3
  size 574807418
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75e86984aae1f369afbae73f50a6df2db09bc057180ac42ff74f5d33ce72e723
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f603c8d0789e2771e72e17a35d09664975138faec34bc1a21798b82a6ebaa3
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:408b86bc95e437bcb37ef1818966a87d9013926bc3d5ae3d46a9b5ee7f360e3e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78f763231c165c2571b07da4d26c7965824aff598ab2cc82420929859dcb97f0
3
  size 1064
trainer_state.json CHANGED
@@ -1,1466 +1,882 @@
1
  {
2
- "best_metric": 39.414129810453765,
3
- "best_model_checkpoint": "./whisper-ft-2/checkpoint-5000",
4
- "epoch": 1.0,
5
- "eval_steps": 1000,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01,
13
- "grad_norm": 5.948551654815674,
14
- "learning_rate": 7.8125e-06,
15
- "loss": 0.1937,
16
- "step": 25
17
- },
18
- {
19
- "epoch": 0.01,
20
- "grad_norm": 5.579243183135986,
21
- "learning_rate": 9.96376811594203e-06,
22
- "loss": 0.1739,
23
- "step": 50
24
- },
25
- {
26
- "epoch": 0.01,
27
- "grad_norm": 4.500240325927734,
28
- "learning_rate": 9.913446054750404e-06,
29
- "loss": 0.1795,
30
- "step": 75
31
- },
32
- {
33
- "epoch": 0.02,
34
- "grad_norm": 2.341679811477661,
35
- "learning_rate": 9.863123993558778e-06,
36
- "loss": 0.1617,
37
- "step": 100
38
- },
39
- {
40
- "epoch": 0.03,
41
- "grad_norm": 5.624894618988037,
42
- "learning_rate": 9.81280193236715e-06,
43
- "loss": 0.1669,
44
- "step": 125
45
- },
46
- {
47
- "epoch": 0.03,
48
- "grad_norm": 6.136338710784912,
49
- "learning_rate": 9.762479871175523e-06,
50
- "loss": 0.1635,
51
- "step": 150
52
- },
53
- {
54
- "epoch": 0.04,
55
- "grad_norm": 4.855004787445068,
56
- "learning_rate": 9.712157809983898e-06,
57
- "loss": 0.1611,
58
- "step": 175
59
- },
60
- {
61
- "epoch": 0.04,
62
- "grad_norm": 6.826947212219238,
63
- "learning_rate": 9.66183574879227e-06,
64
- "loss": 0.1661,
65
- "step": 200
66
- },
67
- {
68
- "epoch": 0.04,
69
- "grad_norm": 4.076170921325684,
70
- "learning_rate": 9.611513687600645e-06,
71
- "loss": 0.1754,
72
- "step": 225
73
- },
74
- {
75
- "epoch": 0.05,
76
- "grad_norm": 7.027714729309082,
77
- "learning_rate": 9.56119162640902e-06,
78
- "loss": 0.2,
79
- "step": 250
80
- },
81
- {
82
- "epoch": 0.06,
83
- "grad_norm": 4.252183437347412,
84
- "learning_rate": 9.510869565217392e-06,
85
- "loss": 0.1827,
86
- "step": 275
87
  },
88
  {
89
- "epoch": 0.06,
90
- "grad_norm": 7.006402492523193,
91
- "learning_rate": 9.460547504025765e-06,
92
- "loss": 0.1794,
93
- "step": 300
94
- },
95
- {
96
- "epoch": 0.07,
97
- "grad_norm": 5.446494102478027,
98
- "learning_rate": 9.41022544283414e-06,
99
- "loss": 0.1949,
100
- "step": 325
101
- },
102
- {
103
- "epoch": 0.07,
104
- "grad_norm": 4.943406581878662,
105
- "learning_rate": 9.359903381642514e-06,
106
- "loss": 0.1732,
107
- "step": 350
108
- },
109
- {
110
- "epoch": 0.07,
111
- "grad_norm": 6.656269073486328,
112
- "learning_rate": 9.309581320450886e-06,
113
- "loss": 0.1742,
114
- "step": 375
115
- },
116
- {
117
- "epoch": 0.08,
118
- "grad_norm": 4.511006832122803,
119
- "learning_rate": 9.25925925925926e-06,
120
- "loss": 0.163,
121
- "step": 400
122
- },
123
- {
124
- "epoch": 0.09,
125
- "grad_norm": 4.760077953338623,
126
- "learning_rate": 9.208937198067634e-06,
127
- "loss": 0.1564,
128
- "step": 425
129
- },
130
- {
131
- "epoch": 0.09,
132
- "grad_norm": 5.462475299835205,
133
- "learning_rate": 9.160628019323673e-06,
134
- "loss": 0.1381,
135
- "step": 450
136
  },
137
  {
138
  "epoch": 0.1,
139
- "grad_norm": 6.01568603515625,
140
- "learning_rate": 9.110305958132047e-06,
141
- "loss": 0.1295,
142
- "step": 475
143
  },
144
  {
145
- "epoch": 0.1,
146
- "grad_norm": 3.843243360519409,
147
- "learning_rate": 9.05998389694042e-06,
148
- "loss": 0.1139,
149
- "step": 500
150
  },
151
  {
152
- "epoch": 0.1,
153
- "grad_norm": 3.413757085800171,
154
- "learning_rate": 9.009661835748792e-06,
155
- "loss": 0.1145,
156
- "step": 525
157
- },
158
- {
159
- "epoch": 0.11,
160
- "grad_norm": 5.756856918334961,
161
- "learning_rate": 8.959339774557167e-06,
162
- "loss": 0.1043,
163
- "step": 550
164
- },
165
- {
166
- "epoch": 0.12,
167
- "grad_norm": 4.615521430969238,
168
- "learning_rate": 8.90901771336554e-06,
169
- "loss": 0.1617,
170
- "step": 575
171
- },
172
- {
173
- "epoch": 0.12,
174
- "grad_norm": 7.172885894775391,
175
- "learning_rate": 8.858695652173914e-06,
176
- "loss": 0.166,
177
- "step": 600
178
- },
179
- {
180
- "epoch": 0.12,
181
- "grad_norm": 4.559473514556885,
182
- "learning_rate": 8.808373590982288e-06,
183
- "loss": 0.1274,
184
- "step": 625
185
- },
186
- {
187
- "epoch": 0.13,
188
- "grad_norm": 4.201319694519043,
189
- "learning_rate": 8.758051529790661e-06,
190
- "loss": 0.1382,
191
- "step": 650
192
- },
193
- {
194
- "epoch": 0.14,
195
- "grad_norm": 4.814964771270752,
196
- "learning_rate": 8.707729468599034e-06,
197
- "loss": 0.1448,
198
- "step": 675
199
- },
200
- {
201
- "epoch": 0.14,
202
- "grad_norm": 5.314846992492676,
203
- "learning_rate": 8.657407407407408e-06,
204
- "loss": 0.1513,
205
- "step": 700
206
- },
207
- {
208
- "epoch": 0.14,
209
- "grad_norm": 3.898376941680908,
210
- "learning_rate": 8.607085346215783e-06,
211
- "loss": 0.1634,
212
- "step": 725
213
- },
214
- {
215
- "epoch": 0.15,
216
- "grad_norm": 5.028552055358887,
217
- "learning_rate": 8.556763285024155e-06,
218
- "loss": 0.1584,
219
- "step": 750
220
- },
221
- {
222
- "epoch": 0.15,
223
- "grad_norm": 5.497000694274902,
224
- "learning_rate": 8.506441223832528e-06,
225
- "loss": 0.1642,
226
- "step": 775
227
- },
228
- {
229
- "epoch": 0.16,
230
- "grad_norm": 4.642214298248291,
231
- "learning_rate": 8.456119162640902e-06,
232
- "loss": 0.174,
233
- "step": 800
234
- },
235
- {
236
- "epoch": 0.17,
237
- "grad_norm": 4.980218887329102,
238
- "learning_rate": 8.405797101449275e-06,
239
- "loss": 0.1885,
240
- "step": 825
241
- },
242
- {
243
- "epoch": 0.17,
244
- "grad_norm": 4.9581618309021,
245
- "learning_rate": 8.35547504025765e-06,
246
- "loss": 0.1928,
247
- "step": 850
248
- },
249
- {
250
- "epoch": 0.17,
251
- "grad_norm": 4.621245861053467,
252
- "learning_rate": 8.305152979066024e-06,
253
- "loss": 0.1629,
254
- "step": 875
255
- },
256
- {
257
- "epoch": 0.18,
258
- "grad_norm": 4.990960121154785,
259
- "learning_rate": 8.254830917874397e-06,
260
- "loss": 0.206,
261
- "step": 900
262
- },
263
- {
264
- "epoch": 0.18,
265
- "grad_norm": 4.905004501342773,
266
- "learning_rate": 8.20450885668277e-06,
267
- "loss": 0.232,
268
- "step": 925
269
- },
270
- {
271
- "epoch": 0.19,
272
- "grad_norm": 6.192000865936279,
273
- "learning_rate": 8.154186795491144e-06,
274
- "loss": 0.2406,
275
- "step": 950
276
- },
277
- {
278
- "epoch": 0.2,
279
- "grad_norm": 4.241088390350342,
280
- "learning_rate": 8.103864734299518e-06,
281
- "loss": 0.206,
282
- "step": 975
283
- },
284
- {
285
- "epoch": 0.2,
286
- "grad_norm": 5.220916271209717,
287
- "learning_rate": 8.053542673107891e-06,
288
- "loss": 0.2176,
289
- "step": 1000
290
- },
291
- {
292
- "epoch": 0.2,
293
- "eval_loss": 0.3194602429866791,
294
- "eval_runtime": 281.282,
295
- "eval_samples_per_second": 3.555,
296
- "eval_steps_per_second": 0.444,
297
- "eval_wer": 61.69442848937392,
298
- "step": 1000
299
  },
300
  {
301
  "epoch": 0.2,
302
- "grad_norm": 4.905993461608887,
303
- "learning_rate": 8.003220611916265e-06,
304
- "loss": 0.2084,
305
- "step": 1025
306
- },
307
- {
308
- "epoch": 0.21,
309
- "grad_norm": 6.92851448059082,
310
- "learning_rate": 7.952898550724638e-06,
311
- "loss": 0.1915,
312
- "step": 1050
313
- },
314
- {
315
- "epoch": 0.21,
316
- "grad_norm": 6.4899725914001465,
317
- "learning_rate": 7.90257648953301e-06,
318
- "loss": 0.2366,
319
- "step": 1075
320
- },
321
- {
322
- "epoch": 0.22,
323
- "grad_norm": 6.207301139831543,
324
- "learning_rate": 7.852254428341385e-06,
325
- "loss": 0.2334,
326
- "step": 1100
327
- },
328
- {
329
- "epoch": 0.23,
330
- "grad_norm": 7.392577648162842,
331
- "learning_rate": 7.80193236714976e-06,
332
- "loss": 0.2388,
333
- "step": 1125
334
- },
335
- {
336
- "epoch": 0.23,
337
- "grad_norm": 4.038721084594727,
338
- "learning_rate": 7.751610305958132e-06,
339
- "loss": 0.2316,
340
- "step": 1150
341
- },
342
- {
343
- "epoch": 0.23,
344
- "grad_norm": 5.786244869232178,
345
- "learning_rate": 7.701288244766507e-06,
346
- "loss": 0.2238,
347
- "step": 1175
348
- },
349
- {
350
- "epoch": 0.24,
351
- "grad_norm": 7.598459243774414,
352
- "learning_rate": 7.65096618357488e-06,
353
- "loss": 0.217,
354
- "step": 1200
355
- },
356
- {
357
- "epoch": 0.24,
358
- "grad_norm": 8.268415451049805,
359
- "learning_rate": 7.600644122383254e-06,
360
- "loss": 0.2337,
361
- "step": 1225
362
- },
363
- {
364
- "epoch": 0.25,
365
- "grad_norm": 7.092071533203125,
366
- "learning_rate": 7.5503220611916275e-06,
367
- "loss": 0.2106,
368
- "step": 1250
369
- },
370
- {
371
- "epoch": 0.26,
372
- "grad_norm": 5.1447954177856445,
373
- "learning_rate": 7.500000000000001e-06,
374
- "loss": 0.2088,
375
- "step": 1275
376
- },
377
- {
378
- "epoch": 0.26,
379
- "grad_norm": 6.847914695739746,
380
- "learning_rate": 7.449677938808374e-06,
381
- "loss": 0.2151,
382
- "step": 1300
383
- },
384
- {
385
- "epoch": 0.27,
386
- "grad_norm": 5.531068801879883,
387
- "learning_rate": 7.399355877616747e-06,
388
- "loss": 0.2031,
389
- "step": 1325
390
- },
391
- {
392
- "epoch": 0.27,
393
- "grad_norm": 5.454577445983887,
394
- "learning_rate": 7.349033816425122e-06,
395
- "loss": 0.1933,
396
- "step": 1350
397
- },
398
- {
399
- "epoch": 0.28,
400
- "grad_norm": 5.625954627990723,
401
- "learning_rate": 7.298711755233495e-06,
402
- "loss": 0.2437,
403
- "step": 1375
404
- },
405
- {
406
- "epoch": 0.28,
407
- "grad_norm": 5.812635898590088,
408
- "learning_rate": 7.248389694041869e-06,
409
- "loss": 0.2787,
410
- "step": 1400
411
- },
412
- {
413
- "epoch": 0.28,
414
- "grad_norm": 6.263451099395752,
415
- "learning_rate": 7.1980676328502416e-06,
416
- "loss": 0.3031,
417
- "step": 1425
418
- },
419
- {
420
- "epoch": 0.29,
421
- "grad_norm": 5.698704242706299,
422
- "learning_rate": 7.147745571658615e-06,
423
- "loss": 0.2982,
424
- "step": 1450
425
  },
426
  {
427
- "epoch": 0.29,
428
- "grad_norm": 7.498880386352539,
429
- "learning_rate": 7.0974235104669895e-06,
430
- "loss": 0.2548,
431
- "step": 1475
432
  },
433
  {
434
- "epoch": 0.3,
435
- "grad_norm": 5.943820476531982,
436
- "learning_rate": 7.047101449275363e-06,
437
- "loss": 0.2153,
438
- "step": 1500
439
  },
440
  {
441
  "epoch": 0.3,
442
- "grad_norm": 7.872410297393799,
443
- "learning_rate": 6.996779388083737e-06,
444
- "loss": 0.2617,
445
- "step": 1525
446
- },
447
- {
448
- "epoch": 0.31,
449
- "grad_norm": 8.562033653259277,
450
- "learning_rate": 6.94645732689211e-06,
451
- "loss": 0.2733,
452
- "step": 1550
453
- },
454
- {
455
- "epoch": 0.32,
456
- "grad_norm": 6.5205888748168945,
457
- "learning_rate": 6.896135265700483e-06,
458
- "loss": 0.2294,
459
- "step": 1575
460
- },
461
- {
462
- "epoch": 0.32,
463
- "grad_norm": 5.271300792694092,
464
- "learning_rate": 6.845813204508857e-06,
465
- "loss": 0.2271,
466
- "step": 1600
467
- },
468
- {
469
- "epoch": 0.33,
470
- "grad_norm": 9.590472221374512,
471
- "learning_rate": 6.795491143317231e-06,
472
- "loss": 0.2366,
473
- "step": 1625
474
- },
475
- {
476
- "epoch": 0.33,
477
- "grad_norm": 5.548666000366211,
478
- "learning_rate": 6.7451690821256045e-06,
479
- "loss": 0.188,
480
- "step": 1650
481
- },
482
- {
483
- "epoch": 0.34,
484
- "grad_norm": 7.139528751373291,
485
- "learning_rate": 6.694847020933978e-06,
486
- "loss": 0.1934,
487
- "step": 1675
488
- },
489
- {
490
- "epoch": 0.34,
491
- "grad_norm": 7.057426929473877,
492
- "learning_rate": 6.644524959742351e-06,
493
- "loss": 0.2008,
494
- "step": 1700
495
- },
496
- {
497
- "epoch": 0.34,
498
- "grad_norm": 5.961209774017334,
499
- "learning_rate": 6.594202898550725e-06,
500
- "loss": 0.2003,
501
- "step": 1725
502
- },
503
- {
504
- "epoch": 0.35,
505
- "grad_norm": 6.465278625488281,
506
- "learning_rate": 6.543880837359099e-06,
507
- "loss": 0.1979,
508
- "step": 1750
509
- },
510
- {
511
- "epoch": 0.35,
512
- "grad_norm": 4.620067119598389,
513
- "learning_rate": 6.493558776167472e-06,
514
- "loss": 0.2111,
515
- "step": 1775
516
- },
517
- {
518
- "epoch": 0.36,
519
- "grad_norm": 6.613126754760742,
520
- "learning_rate": 6.443236714975846e-06,
521
- "loss": 0.2045,
522
- "step": 1800
523
- },
524
- {
525
- "epoch": 0.36,
526
- "grad_norm": 4.759819507598877,
527
- "learning_rate": 6.3929146537842194e-06,
528
- "loss": 0.1908,
529
- "step": 1825
530
- },
531
- {
532
- "epoch": 0.37,
533
- "grad_norm": 7.598659515380859,
534
- "learning_rate": 6.342592592592594e-06,
535
- "loss": 0.2148,
536
- "step": 1850
537
- },
538
- {
539
- "epoch": 0.38,
540
- "grad_norm": 7.476439952850342,
541
- "learning_rate": 6.2922705314009666e-06,
542
- "loss": 0.2213,
543
- "step": 1875
544
- },
545
- {
546
- "epoch": 0.38,
547
- "grad_norm": 5.949714183807373,
548
- "learning_rate": 6.24194847020934e-06,
549
- "loss": 0.2106,
550
- "step": 1900
551
- },
552
- {
553
- "epoch": 0.39,
554
- "grad_norm": 4.579805374145508,
555
- "learning_rate": 6.191626409017714e-06,
556
- "loss": 0.2128,
557
- "step": 1925
558
  },
559
  {
560
- "epoch": 0.39,
561
- "grad_norm": 6.055177211761475,
562
- "learning_rate": 6.141304347826087e-06,
563
- "loss": 0.203,
564
- "step": 1950
565
- },
566
- {
567
- "epoch": 0.4,
568
- "grad_norm": 4.678502559661865,
569
- "learning_rate": 6.090982286634462e-06,
570
- "loss": 0.2089,
571
- "step": 1975
572
  },
573
  {
574
- "epoch": 0.4,
575
- "grad_norm": 4.454530715942383,
576
- "learning_rate": 6.040660225442834e-06,
577
- "loss": 0.1811,
578
- "step": 2000
579
  },
580
  {
581
  "epoch": 0.4,
582
- "eval_loss": 0.28770312666893005,
583
- "eval_runtime": 287.8226,
584
- "eval_samples_per_second": 3.474,
585
- "eval_steps_per_second": 0.434,
586
- "eval_wer": 48.40896036760483,
587
- "step": 2000
588
- },
589
- {
590
- "epoch": 0.41,
591
- "grad_norm": 8.851235389709473,
592
- "learning_rate": 5.990338164251208e-06,
593
- "loss": 0.1833,
594
- "step": 2025
595
- },
596
- {
597
- "epoch": 0.41,
598
- "grad_norm": 7.610278606414795,
599
- "learning_rate": 5.9400161030595815e-06,
600
- "loss": 0.2345,
601
- "step": 2050
602
  },
603
  {
604
- "epoch": 0.41,
605
- "grad_norm": 6.303083419799805,
606
- "learning_rate": 5.889694041867955e-06,
607
- "loss": 0.2612,
608
- "step": 2075
609
  },
610
  {
611
- "epoch": 0.42,
612
- "grad_norm": 6.611576557159424,
613
- "learning_rate": 5.8393719806763295e-06,
614
- "loss": 0.2757,
615
- "step": 2100
616
  },
617
  {
618
- "epoch": 0.42,
619
- "grad_norm": 7.016587257385254,
620
- "learning_rate": 5.789049919484703e-06,
621
- "loss": 0.2339,
622
- "step": 2125
623
- },
624
- {
625
- "epoch": 0.43,
626
- "grad_norm": 5.895429611206055,
627
- "learning_rate": 5.738727858293076e-06,
628
- "loss": 0.2227,
629
- "step": 2150
630
- },
631
- {
632
- "epoch": 0.43,
633
- "grad_norm": 6.225393295288086,
634
- "learning_rate": 5.688405797101449e-06,
635
- "loss": 0.2254,
636
- "step": 2175
637
- },
638
- {
639
- "epoch": 0.44,
640
- "grad_norm": 4.9755401611328125,
641
- "learning_rate": 5.638083735909823e-06,
642
- "loss": 0.2268,
643
- "step": 2200
644
- },
645
- {
646
- "epoch": 0.45,
647
- "grad_norm": 4.857475757598877,
648
- "learning_rate": 5.587761674718197e-06,
649
- "loss": 0.2554,
650
- "step": 2225
651
- },
652
- {
653
- "epoch": 0.45,
654
- "grad_norm": 6.710099697113037,
655
- "learning_rate": 5.537439613526571e-06,
656
- "loss": 0.2674,
657
- "step": 2250
658
- },
659
- {
660
- "epoch": 0.46,
661
- "grad_norm": 6.126322269439697,
662
- "learning_rate": 5.4871175523349444e-06,
663
- "loss": 0.2679,
664
- "step": 2275
665
- },
666
- {
667
- "epoch": 0.46,
668
- "grad_norm": 8.071053504943848,
669
- "learning_rate": 5.436795491143317e-06,
670
- "loss": 0.2781,
671
- "step": 2300
672
- },
673
- {
674
- "epoch": 0.47,
675
- "grad_norm": 6.979907035827637,
676
- "learning_rate": 5.386473429951691e-06,
677
- "loss": 0.2821,
678
- "step": 2325
679
- },
680
- {
681
- "epoch": 0.47,
682
- "grad_norm": 7.387816429138184,
683
- "learning_rate": 5.336151368760065e-06,
684
- "loss": 0.3021,
685
- "step": 2350
686
- },
687
- {
688
- "epoch": 0.47,
689
- "grad_norm": 4.772555351257324,
690
- "learning_rate": 5.285829307568439e-06,
691
- "loss": 0.2713,
692
- "step": 2375
693
- },
694
- {
695
- "epoch": 0.48,
696
- "grad_norm": 6.187594890594482,
697
- "learning_rate": 5.235507246376812e-06,
698
- "loss": 0.2674,
699
- "step": 2400
700
  },
701
  {
702
- "epoch": 0.48,
703
- "grad_norm": 3.790215492248535,
704
- "learning_rate": 5.185185185185185e-06,
705
- "loss": 0.212,
706
- "step": 2425
707
  },
708
  {
709
- "epoch": 0.49,
710
- "grad_norm": 5.831188201904297,
711
- "learning_rate": 5.1348631239935585e-06,
712
- "loss": 0.1897,
713
- "step": 2450
714
  },
715
  {
716
- "epoch": 0.49,
717
- "grad_norm": 5.055106163024902,
718
- "learning_rate": 5.084541062801933e-06,
719
- "loss": 0.2233,
720
- "step": 2475
721
  },
722
  {
723
- "epoch": 0.5,
724
- "grad_norm": 5.827037811279297,
725
- "learning_rate": 5.0342190016103065e-06,
726
- "loss": 0.2439,
727
- "step": 2500
728
  },
729
  {
730
- "epoch": 0.51,
731
- "grad_norm": 5.658998966217041,
732
- "learning_rate": 4.985909822866345e-06,
733
- "loss": 0.2912,
734
- "step": 2525
735
  },
736
  {
737
- "epoch": 0.51,
738
- "grad_norm": 7.745667457580566,
739
- "learning_rate": 4.935587761674719e-06,
740
- "loss": 0.3364,
741
- "step": 2550
742
  },
743
  {
744
- "epoch": 0.52,
745
- "grad_norm": 7.70411491394043,
746
- "learning_rate": 4.885265700483092e-06,
747
- "loss": 0.299,
748
- "step": 2575
749
  },
750
  {
751
- "epoch": 0.52,
752
- "grad_norm": 4.991663932800293,
753
- "learning_rate": 4.834943639291465e-06,
754
- "loss": 0.225,
755
- "step": 2600
756
  },
757
  {
758
- "epoch": 0.53,
759
- "grad_norm": 4.773478031158447,
760
- "learning_rate": 4.78462157809984e-06,
761
- "loss": 0.2118,
762
- "step": 2625
763
  },
764
  {
765
- "epoch": 0.53,
766
- "grad_norm": 5.240381717681885,
767
- "learning_rate": 4.7342995169082125e-06,
768
- "loss": 0.2136,
769
- "step": 2650
770
  },
771
  {
772
- "epoch": 0.54,
773
- "grad_norm": 5.283033847808838,
774
- "learning_rate": 4.683977455716587e-06,
775
- "loss": 0.2146,
776
- "step": 2675
777
  },
778
  {
779
- "epoch": 0.54,
780
- "grad_norm": 5.568673610687256,
781
- "learning_rate": 4.6336553945249605e-06,
782
- "loss": 0.2033,
783
- "step": 2700
784
  },
785
  {
786
- "epoch": 0.55,
787
- "grad_norm": 6.020970821380615,
788
- "learning_rate": 4.583333333333333e-06,
789
- "loss": 0.1728,
790
- "step": 2725
791
  },
792
  {
793
- "epoch": 0.55,
794
- "grad_norm": 6.083353042602539,
795
- "learning_rate": 4.533011272141708e-06,
796
- "loss": 0.1626,
797
- "step": 2750
798
  },
799
  {
800
- "epoch": 0.56,
801
- "grad_norm": 5.078822135925293,
802
- "learning_rate": 4.482689210950081e-06,
803
- "loss": 0.178,
804
- "step": 2775
805
  },
806
  {
807
- "epoch": 0.56,
808
- "grad_norm": 4.692159175872803,
809
- "learning_rate": 4.432367149758455e-06,
810
- "loss": 0.1805,
811
- "step": 2800
812
  },
813
  {
814
- "epoch": 0.56,
815
- "grad_norm": 6.026794910430908,
816
- "learning_rate": 4.382045088566828e-06,
817
- "loss": 0.2099,
818
- "step": 2825
819
  },
820
  {
821
- "epoch": 0.57,
822
- "grad_norm": 5.062854290008545,
823
- "learning_rate": 4.331723027375201e-06,
824
- "loss": 0.2449,
825
- "step": 2850
826
  },
827
  {
828
- "epoch": 0.57,
829
- "grad_norm": 5.74497652053833,
830
- "learning_rate": 4.2814009661835754e-06,
831
- "loss": 0.2114,
832
- "step": 2875
833
  },
834
  {
835
- "epoch": 0.58,
836
- "grad_norm": 8.087077140808105,
837
- "learning_rate": 4.231078904991949e-06,
838
- "loss": 0.2078,
839
- "step": 2900
840
  },
841
  {
842
- "epoch": 0.58,
843
- "grad_norm": 4.35167121887207,
844
- "learning_rate": 4.1807568438003226e-06,
845
- "loss": 0.2068,
846
- "step": 2925
847
  },
848
  {
849
- "epoch": 0.59,
850
- "grad_norm": 4.306957721710205,
851
- "learning_rate": 4.130434782608696e-06,
852
- "loss": 0.1802,
853
- "step": 2950
854
  },
855
  {
856
- "epoch": 0.59,
857
- "grad_norm": 3.7093899250030518,
858
- "learning_rate": 4.08011272141707e-06,
859
- "loss": 0.1632,
860
- "step": 2975
861
  },
862
  {
863
- "epoch": 0.6,
864
- "grad_norm": 3.872870922088623,
865
- "learning_rate": 4.029790660225443e-06,
866
- "loss": 0.1556,
867
- "step": 3000
868
  },
869
  {
870
- "epoch": 0.6,
871
- "eval_loss": 0.246797576546669,
872
- "eval_runtime": 287.6255,
873
- "eval_samples_per_second": 3.477,
874
- "eval_steps_per_second": 0.435,
875
- "eval_wer": 42.12521539345204,
876
- "step": 3000
877
- },
878
- {
879
- "epoch": 0.6,
880
- "grad_norm": 5.306319713592529,
881
- "learning_rate": 3.979468599033817e-06,
882
- "loss": 0.1559,
883
- "step": 3025
884
  },
885
  {
886
- "epoch": 0.61,
887
- "grad_norm": 4.230405807495117,
888
- "learning_rate": 3.92914653784219e-06,
889
- "loss": 0.1847,
890
- "step": 3050
891
  },
892
  {
893
- "epoch": 0.61,
894
- "grad_norm": 5.295231819152832,
895
- "learning_rate": 3.878824476650564e-06,
896
- "loss": 0.2108,
897
- "step": 3075
898
  },
899
  {
900
- "epoch": 0.62,
901
- "grad_norm": 8.07589054107666,
902
- "learning_rate": 3.8285024154589375e-06,
903
- "loss": 0.207,
904
- "step": 3100
905
  },
906
  {
907
- "epoch": 0.62,
908
- "grad_norm": 6.108493804931641,
909
- "learning_rate": 3.778180354267311e-06,
910
- "loss": 0.192,
911
- "step": 3125
912
  },
913
  {
914
- "epoch": 0.63,
915
- "grad_norm": 5.475741386413574,
916
- "learning_rate": 3.7278582930756846e-06,
917
- "loss": 0.1688,
918
- "step": 3150
919
  },
920
  {
921
- "epoch": 0.64,
922
- "grad_norm": 5.193215370178223,
923
- "learning_rate": 3.6775362318840586e-06,
924
- "loss": 0.1655,
925
- "step": 3175
926
  },
927
  {
928
- "epoch": 0.64,
929
- "grad_norm": 4.918320655822754,
930
- "learning_rate": 3.6272141706924318e-06,
931
- "loss": 0.1814,
932
- "step": 3200
933
  },
934
  {
935
- "epoch": 0.65,
936
- "grad_norm": 3.8058395385742188,
937
- "learning_rate": 3.5768921095008053e-06,
938
- "loss": 0.1758,
939
- "step": 3225
940
  },
941
  {
942
- "epoch": 0.65,
943
- "grad_norm": 4.73598051071167,
944
- "learning_rate": 3.5265700483091793e-06,
945
- "loss": 0.1731,
946
- "step": 3250
947
  },
948
  {
949
- "epoch": 0.66,
950
- "grad_norm": 5.590235233306885,
951
- "learning_rate": 3.4762479871175525e-06,
952
- "loss": 0.1465,
953
- "step": 3275
954
  },
955
  {
956
- "epoch": 0.66,
957
- "grad_norm": 3.9685428142547607,
958
- "learning_rate": 3.4259259259259265e-06,
959
- "loss": 0.1571,
960
- "step": 3300
961
  },
962
  {
963
- "epoch": 0.67,
964
- "grad_norm": 5.418837547302246,
965
- "learning_rate": 3.3756038647342996e-06,
966
- "loss": 0.1985,
967
- "step": 3325
968
  },
969
  {
970
- "epoch": 0.67,
971
- "grad_norm": 4.822926044464111,
972
- "learning_rate": 3.325281803542673e-06,
973
- "loss": 0.193,
974
- "step": 3350
975
  },
976
  {
977
- "epoch": 0.68,
978
- "grad_norm": 4.274627685546875,
979
- "learning_rate": 3.274959742351047e-06,
980
- "loss": 0.1745,
981
- "step": 3375
982
  },
983
  {
984
- "epoch": 0.68,
985
- "grad_norm": 4.962224006652832,
986
- "learning_rate": 3.2246376811594203e-06,
987
- "loss": 0.1825,
988
- "step": 3400
989
  },
990
  {
991
- "epoch": 0.69,
992
- "grad_norm": 7.427305698394775,
993
- "learning_rate": 3.1743156199677943e-06,
994
- "loss": 0.1795,
995
- "step": 3425
996
  },
997
  {
998
- "epoch": 0.69,
999
- "grad_norm": 4.612069129943848,
1000
- "learning_rate": 3.123993558776168e-06,
1001
- "loss": 0.1979,
1002
- "step": 3450
1003
  },
1004
  {
1005
- "epoch": 0.69,
1006
- "grad_norm": 4.866634368896484,
1007
- "learning_rate": 3.073671497584541e-06,
1008
- "loss": 0.1693,
1009
- "step": 3475
1010
  },
1011
  {
1012
- "epoch": 0.7,
1013
- "grad_norm": 6.59066915512085,
1014
- "learning_rate": 3.023349436392915e-06,
1015
- "loss": 0.1878,
1016
- "step": 3500
1017
  },
1018
  {
1019
- "epoch": 0.7,
1020
- "grad_norm": 7.696073055267334,
1021
- "learning_rate": 2.9730273752012885e-06,
1022
- "loss": 0.1841,
1023
- "step": 3525
1024
  },
1025
  {
1026
- "epoch": 0.71,
1027
- "grad_norm": 6.277599811553955,
1028
- "learning_rate": 2.922705314009662e-06,
1029
- "loss": 0.2039,
1030
- "step": 3550
1031
  },
1032
  {
1033
- "epoch": 0.71,
1034
- "grad_norm": 5.677185535430908,
1035
- "learning_rate": 2.8723832528180357e-06,
1036
- "loss": 0.2101,
1037
- "step": 3575
1038
  },
1039
  {
1040
- "epoch": 0.72,
1041
- "grad_norm": 4.459707260131836,
1042
- "learning_rate": 2.822061191626409e-06,
1043
- "loss": 0.1946,
1044
- "step": 3600
1045
  },
1046
  {
1047
- "epoch": 0.72,
1048
- "grad_norm": 5.484477996826172,
1049
- "learning_rate": 2.771739130434783e-06,
1050
- "loss": 0.1878,
1051
- "step": 3625
1052
  },
1053
  {
1054
- "epoch": 0.73,
1055
- "grad_norm": 4.7586469650268555,
1056
- "learning_rate": 2.7214170692431564e-06,
1057
- "loss": 0.1906,
1058
- "step": 3650
1059
  },
1060
  {
1061
- "epoch": 0.73,
1062
- "grad_norm": 8.146832466125488,
1063
- "learning_rate": 2.6710950080515303e-06,
1064
- "loss": 0.2291,
1065
- "step": 3675
1066
  },
1067
  {
1068
- "epoch": 0.74,
1069
- "grad_norm": 6.010147571563721,
1070
- "learning_rate": 2.6207729468599035e-06,
1071
- "loss": 0.2465,
1072
- "step": 3700
1073
  },
1074
  {
1075
- "epoch": 0.74,
1076
- "grad_norm": 4.747072696685791,
1077
- "learning_rate": 2.570450885668277e-06,
1078
- "loss": 0.2215,
1079
- "step": 3725
1080
  },
1081
  {
1082
- "epoch": 0.75,
1083
- "grad_norm": 4.160192012786865,
1084
- "learning_rate": 2.520128824476651e-06,
1085
- "loss": 0.2075,
1086
- "step": 3750
1087
  },
1088
  {
1089
- "epoch": 0.76,
1090
- "grad_norm": 4.015562534332275,
1091
- "learning_rate": 2.469806763285024e-06,
1092
- "loss": 0.1772,
1093
- "step": 3775
1094
  },
1095
  {
1096
- "epoch": 0.76,
1097
- "grad_norm": 6.313992023468018,
1098
- "learning_rate": 2.4194847020933977e-06,
1099
- "loss": 0.1977,
1100
- "step": 3800
1101
  },
1102
  {
1103
- "epoch": 0.77,
1104
- "grad_norm": 4.440485954284668,
1105
- "learning_rate": 2.3691626409017713e-06,
1106
- "loss": 0.2007,
1107
- "step": 3825
1108
  },
1109
  {
1110
- "epoch": 0.77,
1111
- "grad_norm": 4.781130790710449,
1112
- "learning_rate": 2.3188405797101453e-06,
1113
- "loss": 0.1795,
1114
- "step": 3850
1115
  },
1116
  {
1117
- "epoch": 0.78,
1118
- "grad_norm": 6.669640064239502,
1119
- "learning_rate": 2.268518518518519e-06,
1120
- "loss": 0.1715,
1121
- "step": 3875
1122
  },
1123
  {
1124
- "epoch": 0.78,
1125
- "grad_norm": 5.417683124542236,
1126
- "learning_rate": 2.2181964573268924e-06,
1127
- "loss": 0.1669,
1128
- "step": 3900
1129
  },
1130
  {
1131
- "epoch": 0.79,
1132
- "grad_norm": 4.186657428741455,
1133
- "learning_rate": 2.167874396135266e-06,
1134
- "loss": 0.153,
1135
- "step": 3925
1136
  },
1137
  {
1138
- "epoch": 0.79,
1139
- "grad_norm": 4.2640509605407715,
1140
- "learning_rate": 2.1175523349436395e-06,
1141
- "loss": 0.1805,
1142
- "step": 3950
1143
  },
1144
  {
1145
- "epoch": 0.8,
1146
- "grad_norm": 5.582681655883789,
1147
- "learning_rate": 2.067230273752013e-06,
1148
- "loss": 0.2054,
1149
- "step": 3975
1150
  },
1151
  {
1152
- "epoch": 0.8,
1153
- "grad_norm": 6.194969654083252,
1154
- "learning_rate": 2.0169082125603867e-06,
1155
- "loss": 0.2261,
1156
- "step": 4000
1157
  },
1158
  {
1159
- "epoch": 0.8,
1160
- "eval_loss": 0.24181929230690002,
1161
- "eval_runtime": 290.4148,
1162
- "eval_samples_per_second": 3.443,
1163
- "eval_steps_per_second": 0.43,
1164
- "eval_wer": 42.05628948879954,
1165
- "step": 4000
1166
  },
1167
  {
1168
- "epoch": 0.81,
1169
- "grad_norm": 5.402450084686279,
1170
- "learning_rate": 1.9665861513687602e-06,
1171
- "loss": 0.2351,
1172
- "step": 4025
1173
  },
1174
  {
1175
- "epoch": 0.81,
1176
- "grad_norm": 5.974727630615234,
1177
- "learning_rate": 1.916264090177134e-06,
1178
- "loss": 0.2482,
1179
- "step": 4050
1180
  },
1181
  {
1182
- "epoch": 0.81,
1183
- "grad_norm": 5.478939056396484,
1184
- "learning_rate": 1.8659420289855074e-06,
1185
- "loss": 0.234,
1186
- "step": 4075
1187
  },
1188
  {
1189
- "epoch": 0.82,
1190
- "grad_norm": 5.91377067565918,
1191
- "learning_rate": 1.815619967793881e-06,
1192
- "loss": 0.2,
1193
- "step": 4100
1194
  },
1195
  {
1196
- "epoch": 0.82,
1197
- "grad_norm": 4.323048114776611,
1198
- "learning_rate": 1.7652979066022547e-06,
1199
- "loss": 0.1868,
1200
- "step": 4125
1201
  },
1202
  {
1203
- "epoch": 0.83,
1204
- "grad_norm": 3.8323700428009033,
1205
- "learning_rate": 1.7149758454106283e-06,
1206
- "loss": 0.1604,
1207
- "step": 4150
1208
  },
1209
  {
1210
- "epoch": 0.83,
1211
- "grad_norm": 3.8948872089385986,
1212
- "learning_rate": 1.6646537842190016e-06,
1213
- "loss": 0.1687,
1214
- "step": 4175
1215
  },
1216
  {
1217
- "epoch": 0.84,
1218
- "grad_norm": 4.291049957275391,
1219
- "learning_rate": 1.6143317230273752e-06,
1220
- "loss": 0.1701,
1221
- "step": 4200
1222
  },
1223
  {
1224
- "epoch": 0.84,
1225
- "grad_norm": 4.441890716552734,
1226
- "learning_rate": 1.564009661835749e-06,
1227
- "loss": 0.1563,
1228
- "step": 4225
1229
  },
1230
  {
1231
- "epoch": 0.85,
1232
- "grad_norm": 4.571049690246582,
1233
- "learning_rate": 1.5136876006441225e-06,
1234
- "loss": 0.1431,
1235
- "step": 4250
1236
  },
1237
  {
1238
- "epoch": 0.85,
1239
- "grad_norm": 5.673919200897217,
1240
- "learning_rate": 1.4633655394524963e-06,
1241
- "loss": 0.1641,
1242
- "step": 4275
1243
  },
1244
  {
1245
- "epoch": 0.86,
1246
- "grad_norm": 3.778271436691284,
1247
- "learning_rate": 1.4130434782608697e-06,
1248
- "loss": 0.166,
1249
- "step": 4300
1250
  },
1251
  {
1252
- "epoch": 0.86,
1253
- "grad_norm": 4.124724864959717,
1254
- "learning_rate": 1.3627214170692432e-06,
1255
- "loss": 0.1611,
1256
- "step": 4325
1257
  },
1258
  {
1259
- "epoch": 0.87,
1260
- "grad_norm": 4.925194263458252,
1261
- "learning_rate": 1.3123993558776168e-06,
1262
- "loss": 0.1644,
1263
- "step": 4350
1264
  },
1265
  {
1266
- "epoch": 0.88,
1267
- "grad_norm": 6.828126907348633,
1268
- "learning_rate": 1.2620772946859906e-06,
1269
- "loss": 0.1798,
1270
- "step": 4375
1271
  },
1272
  {
1273
- "epoch": 0.88,
1274
- "grad_norm": 7.811668872833252,
1275
- "learning_rate": 1.211755233494364e-06,
1276
- "loss": 0.191,
1277
- "step": 4400
1278
  },
1279
  {
1280
- "epoch": 0.89,
1281
- "grad_norm": 4.744828701019287,
1282
- "learning_rate": 1.1614331723027377e-06,
1283
- "loss": 0.2006,
1284
- "step": 4425
1285
  },
1286
  {
1287
- "epoch": 0.89,
1288
- "grad_norm": 6.9806718826293945,
1289
- "learning_rate": 1.111111111111111e-06,
1290
- "loss": 0.2161,
1291
- "step": 4450
1292
  },
1293
  {
1294
- "epoch": 0.9,
1295
- "grad_norm": 5.690942287445068,
1296
- "learning_rate": 1.0607890499194848e-06,
1297
- "loss": 0.2092,
1298
- "step": 4475
1299
  },
1300
  {
1301
- "epoch": 0.9,
1302
- "grad_norm": 7.265722274780273,
1303
- "learning_rate": 1.0104669887278584e-06,
1304
- "loss": 0.2055,
1305
- "step": 4500
1306
  },
1307
  {
1308
- "epoch": 0.91,
1309
- "grad_norm": 6.037989616394043,
1310
- "learning_rate": 9.60144927536232e-07,
1311
- "loss": 0.1691,
1312
- "step": 4525
1313
  },
1314
  {
1315
- "epoch": 0.91,
1316
- "grad_norm": 4.537023544311523,
1317
- "learning_rate": 9.098228663446056e-07,
1318
- "loss": 0.1756,
1319
- "step": 4550
1320
  },
1321
  {
1322
- "epoch": 0.92,
1323
- "grad_norm": 4.457717418670654,
1324
- "learning_rate": 8.595008051529791e-07,
1325
- "loss": 0.1685,
1326
- "step": 4575
1327
  },
1328
  {
1329
- "epoch": 0.92,
1330
- "grad_norm": 4.620595932006836,
1331
- "learning_rate": 8.091787439613527e-07,
1332
- "loss": 0.1692,
1333
- "step": 4600
1334
  },
1335
  {
1336
- "epoch": 0.93,
1337
- "grad_norm": 5.932682514190674,
1338
- "learning_rate": 7.588566827697263e-07,
1339
- "loss": 0.184,
1340
- "step": 4625
1341
  },
1342
  {
1343
- "epoch": 0.93,
1344
- "grad_norm": 5.343090534210205,
1345
- "learning_rate": 7.085346215780999e-07,
1346
- "loss": 0.1755,
1347
- "step": 4650
1348
  },
1349
  {
1350
- "epoch": 0.94,
1351
- "grad_norm": 4.914378643035889,
1352
- "learning_rate": 6.582125603864735e-07,
1353
- "loss": 0.1598,
1354
- "step": 4675
1355
  },
1356
  {
1357
- "epoch": 0.94,
1358
- "grad_norm": 4.512668132781982,
1359
- "learning_rate": 6.078904991948471e-07,
1360
- "loss": 0.2097,
1361
- "step": 4700
1362
  },
1363
  {
1364
- "epoch": 0.94,
1365
- "grad_norm": 4.129415512084961,
1366
- "learning_rate": 5.575684380032207e-07,
1367
- "loss": 0.216,
1368
- "step": 4725
1369
  },
1370
  {
1371
- "epoch": 0.95,
1372
- "grad_norm": 4.950856685638428,
1373
- "learning_rate": 5.072463768115942e-07,
1374
- "loss": 0.1984,
1375
- "step": 4750
1376
  },
1377
  {
1378
- "epoch": 0.95,
1379
- "grad_norm": 5.21985387802124,
1380
- "learning_rate": 4.5893719806763294e-07,
1381
- "loss": 0.2017,
1382
- "step": 4775
1383
  },
1384
  {
1385
- "epoch": 0.96,
1386
- "grad_norm": 4.996951580047607,
1387
- "learning_rate": 4.086151368760065e-07,
1388
- "loss": 0.1827,
1389
- "step": 4800
1390
  },
1391
  {
1392
- "epoch": 0.96,
1393
- "grad_norm": 5.20858907699585,
1394
- "learning_rate": 3.5829307568438007e-07,
1395
- "loss": 0.1948,
1396
- "step": 4825
1397
  },
1398
  {
1399
- "epoch": 0.97,
1400
- "grad_norm": 5.125787258148193,
1401
- "learning_rate": 3.079710144927537e-07,
1402
- "loss": 0.1691,
1403
- "step": 4850
1404
  },
1405
  {
1406
- "epoch": 0.97,
1407
- "grad_norm": 3.7991411685943604,
1408
- "learning_rate": 2.5764895330112725e-07,
1409
- "loss": 0.147,
1410
- "step": 4875
1411
  },
1412
  {
1413
- "epoch": 0.98,
1414
- "grad_norm": 3.768537759780884,
1415
- "learning_rate": 2.073268921095008e-07,
1416
- "loss": 0.1255,
1417
- "step": 4900
1418
  },
1419
  {
1420
- "epoch": 0.98,
1421
- "grad_norm": 5.530201435089111,
1422
- "learning_rate": 1.570048309178744e-07,
1423
- "loss": 0.1241,
1424
- "step": 4925
1425
  },
1426
  {
1427
- "epoch": 0.99,
1428
- "grad_norm": 5.020101547241211,
1429
- "learning_rate": 1.0668276972624801e-07,
1430
- "loss": 0.1349,
1431
- "step": 4950
1432
  },
1433
  {
1434
- "epoch": 0.99,
1435
- "grad_norm": 6.2157673835754395,
1436
- "learning_rate": 5.6360708534621584e-08,
1437
- "loss": 0.1273,
1438
- "step": 4975
1439
  },
1440
  {
1441
- "epoch": 1.0,
1442
- "grad_norm": 4.496521949768066,
1443
- "learning_rate": 6.0386473429951695e-09,
1444
- "loss": 0.1257,
1445
- "step": 5000
1446
  },
1447
  {
1448
- "epoch": 1.0,
1449
- "eval_loss": 0.23232251405715942,
1450
- "eval_runtime": 287.2652,
1451
- "eval_samples_per_second": 3.481,
1452
  "eval_steps_per_second": 0.435,
1453
- "eval_wer": 39.414129810453765,
1454
- "step": 5000
1455
  }
1456
  ],
1457
- "logging_steps": 25,
1458
- "max_steps": 5000,
1459
  "num_input_tokens_seen": 0,
1460
- "num_train_epochs": 9223372036854775807,
1461
- "save_steps": 1000,
1462
- "total_flos": 5.1887996928e+18,
1463
- "train_batch_size": 8,
 
 
 
 
 
 
 
 
 
 
 
 
1464
  "trial_name": null,
1465
  "trial_params": null
1466
  }
 
1
  {
2
+ "best_metric": 19.701321079839175,
3
+ "best_model_checkpoint": "./whisper-ft-2/checkpoint-600",
4
+ "epoch": 4.0,
5
+ "eval_steps": 600,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.03333333333333333,
13
+ "grad_norm": 1.419047236442566,
14
+ "learning_rate": 5e-06,
15
+ "loss": 0.0144,
16
+ "step": 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  },
18
  {
19
+ "epoch": 0.06666666666666667,
20
+ "grad_norm": 2.3382816314697266,
21
+ "learning_rate": 1e-05,
22
+ "loss": 0.0143,
23
+ "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  },
25
  {
26
  "epoch": 0.1,
27
+ "grad_norm": 0.8841551542282104,
28
+ "learning_rate": 9.943820224719102e-06,
29
+ "loss": 0.0126,
30
+ "step": 15
31
  },
32
  {
33
+ "epoch": 0.13333333333333333,
34
+ "grad_norm": 2.564495325088501,
35
+ "learning_rate": 9.887640449438202e-06,
36
+ "loss": 0.0144,
37
+ "step": 20
38
  },
39
  {
40
+ "epoch": 0.16666666666666666,
41
+ "grad_norm": 1.858008623123169,
42
+ "learning_rate": 9.831460674157303e-06,
43
+ "loss": 0.0097,
44
+ "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  },
46
  {
47
  "epoch": 0.2,
48
+ "grad_norm": 1.3031666278839111,
49
+ "learning_rate": 9.775280898876405e-06,
50
+ "loss": 0.0129,
51
+ "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  },
53
  {
54
+ "epoch": 0.23333333333333334,
55
+ "grad_norm": 3.633366584777832,
56
+ "learning_rate": 9.719101123595506e-06,
57
+ "loss": 0.0083,
58
+ "step": 35
59
  },
60
  {
61
+ "epoch": 0.26666666666666666,
62
+ "grad_norm": 1.9989734888076782,
63
+ "learning_rate": 9.662921348314608e-06,
64
+ "loss": 0.0122,
65
+ "step": 40
66
  },
67
  {
68
  "epoch": 0.3,
69
+ "grad_norm": 0.9710230827331543,
70
+ "learning_rate": 9.60674157303371e-06,
71
+ "loss": 0.007,
72
+ "step": 45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  },
74
  {
75
+ "epoch": 0.3333333333333333,
76
+ "grad_norm": 1.8736152648925781,
77
+ "learning_rate": 9.55056179775281e-06,
78
+ "loss": 0.0122,
79
+ "step": 50
 
 
 
 
 
 
 
80
  },
81
  {
82
+ "epoch": 0.36666666666666664,
83
+ "grad_norm": 0.9838809370994568,
84
+ "learning_rate": 9.49438202247191e-06,
85
+ "loss": 0.005,
86
+ "step": 55
87
  },
88
  {
89
  "epoch": 0.4,
90
+ "grad_norm": 2.429737091064453,
91
+ "learning_rate": 9.438202247191012e-06,
92
+ "loss": 0.0115,
93
+ "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  },
95
  {
96
+ "epoch": 0.43333333333333335,
97
+ "grad_norm": 1.108884572982788,
98
+ "learning_rate": 9.382022471910113e-06,
99
+ "loss": 0.0058,
100
+ "step": 65
101
  },
102
  {
103
+ "epoch": 0.4666666666666667,
104
+ "grad_norm": 1.1713858842849731,
105
+ "learning_rate": 9.325842696629213e-06,
106
+ "loss": 0.0058,
107
+ "step": 70
108
  },
109
  {
110
+ "epoch": 0.5,
111
+ "grad_norm": 3.6589481830596924,
112
+ "learning_rate": 9.269662921348316e-06,
113
+ "loss": 0.0094,
114
+ "step": 75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  },
116
  {
117
+ "epoch": 0.5333333333333333,
118
+ "grad_norm": 1.9582005739212036,
119
+ "learning_rate": 9.213483146067417e-06,
120
+ "loss": 0.0076,
121
+ "step": 80
122
  },
123
  {
124
+ "epoch": 0.5666666666666667,
125
+ "grad_norm": 1.3726775646209717,
126
+ "learning_rate": 9.157303370786517e-06,
127
+ "loss": 0.0054,
128
+ "step": 85
129
  },
130
  {
131
+ "epoch": 0.6,
132
+ "grad_norm": 1.7600703239440918,
133
+ "learning_rate": 9.101123595505619e-06,
134
+ "loss": 0.0099,
135
+ "step": 90
136
  },
137
  {
138
+ "epoch": 0.6333333333333333,
139
+ "grad_norm": 0.9456138610839844,
140
+ "learning_rate": 9.04494382022472e-06,
141
+ "loss": 0.0047,
142
+ "step": 95
143
  },
144
  {
145
+ "epoch": 0.6666666666666666,
146
+ "grad_norm": 0.9388735294342041,
147
+ "learning_rate": 8.988764044943822e-06,
148
+ "loss": 0.0089,
149
+ "step": 100
150
  },
151
  {
152
+ "epoch": 0.7,
153
+ "grad_norm": 2.800154685974121,
154
+ "learning_rate": 8.932584269662921e-06,
155
+ "loss": 0.0074,
156
+ "step": 105
157
  },
158
  {
159
+ "epoch": 0.7333333333333333,
160
+ "grad_norm": 4.255383491516113,
161
+ "learning_rate": 8.876404494382023e-06,
162
+ "loss": 0.0057,
163
+ "step": 110
164
  },
165
  {
166
+ "epoch": 0.7666666666666667,
167
+ "grad_norm": 1.3430100679397583,
168
+ "learning_rate": 8.820224719101124e-06,
169
+ "loss": 0.0042,
170
+ "step": 115
171
  },
172
  {
173
+ "epoch": 0.8,
174
+ "grad_norm": 4.184587478637695,
175
+ "learning_rate": 8.764044943820226e-06,
176
+ "loss": 0.0063,
177
+ "step": 120
178
  },
179
  {
180
+ "epoch": 0.8333333333333334,
181
+ "grad_norm": 1.3114601373672485,
182
+ "learning_rate": 8.707865168539327e-06,
183
+ "loss": 0.0085,
184
+ "step": 125
185
  },
186
  {
187
+ "epoch": 0.8666666666666667,
188
+ "grad_norm": 1.0539401769638062,
189
+ "learning_rate": 8.651685393258428e-06,
190
+ "loss": 0.0072,
191
+ "step": 130
192
  },
193
  {
194
+ "epoch": 0.9,
195
+ "grad_norm": 4.378466606140137,
196
+ "learning_rate": 8.595505617977528e-06,
197
+ "loss": 0.0049,
198
+ "step": 135
199
  },
200
  {
201
+ "epoch": 0.9333333333333333,
202
+ "grad_norm": 1.5044472217559814,
203
+ "learning_rate": 8.53932584269663e-06,
204
+ "loss": 0.0067,
205
+ "step": 140
206
  },
207
  {
208
+ "epoch": 0.9666666666666667,
209
+ "grad_norm": 2.2182376384735107,
210
+ "learning_rate": 8.483146067415731e-06,
211
+ "loss": 0.0091,
212
+ "step": 145
213
  },
214
  {
215
+ "epoch": 1.0,
216
+ "grad_norm": 0.24589230120182037,
217
+ "learning_rate": 8.426966292134832e-06,
218
+ "loss": 0.0062,
219
+ "step": 150
220
  },
221
  {
222
+ "epoch": 1.0333333333333334,
223
+ "grad_norm": 0.48602986335754395,
224
+ "learning_rate": 8.370786516853934e-06,
225
+ "loss": 0.0056,
226
+ "step": 155
227
  },
228
  {
229
+ "epoch": 1.0666666666666667,
230
+ "grad_norm": 0.2779291570186615,
231
+ "learning_rate": 8.314606741573035e-06,
232
+ "loss": 0.0031,
233
+ "step": 160
234
  },
235
  {
236
+ "epoch": 1.1,
237
+ "grad_norm": 2.2703235149383545,
238
+ "learning_rate": 8.258426966292135e-06,
239
+ "loss": 0.0069,
240
+ "step": 165
241
  },
242
  {
243
+ "epoch": 1.1333333333333333,
244
+ "grad_norm": 1.3132015466690063,
245
+ "learning_rate": 8.202247191011237e-06,
246
+ "loss": 0.006,
247
+ "step": 170
248
  },
249
  {
250
+ "epoch": 1.1666666666666667,
251
+ "grad_norm": 0.24997250735759735,
252
+ "learning_rate": 8.146067415730338e-06,
253
+ "loss": 0.0027,
254
+ "step": 175
255
  },
256
  {
257
+ "epoch": 1.2,
258
+ "grad_norm": 0.14482256770133972,
259
+ "learning_rate": 8.08988764044944e-06,
260
+ "loss": 0.0023,
261
+ "step": 180
262
  },
263
  {
264
+ "epoch": 1.2333333333333334,
265
+ "grad_norm": 0.7420951128005981,
266
+ "learning_rate": 8.033707865168539e-06,
267
+ "loss": 0.0064,
268
+ "step": 185
269
  },
270
  {
271
+ "epoch": 1.2666666666666666,
272
+ "grad_norm": 0.21342052519321442,
273
+ "learning_rate": 7.97752808988764e-06,
274
+ "loss": 0.0026,
275
+ "step": 190
276
  },
277
  {
278
+ "epoch": 1.3,
279
+ "grad_norm": 0.2786453664302826,
280
+ "learning_rate": 7.921348314606742e-06,
281
+ "loss": 0.0036,
282
+ "step": 195
283
  },
284
  {
285
+ "epoch": 1.3333333333333333,
286
+ "grad_norm": 0.36970847845077515,
287
+ "learning_rate": 7.865168539325843e-06,
288
+ "loss": 0.0046,
289
+ "step": 200
 
 
 
 
 
 
 
 
 
290
  },
291
  {
292
+ "epoch": 1.3666666666666667,
293
+ "grad_norm": 0.17185050249099731,
294
+ "learning_rate": 7.808988764044945e-06,
295
+ "loss": 0.0022,
296
+ "step": 205
297
  },
298
  {
299
+ "epoch": 1.4,
300
+ "grad_norm": 1.4263725280761719,
301
+ "learning_rate": 7.752808988764046e-06,
302
+ "loss": 0.0034,
303
+ "step": 210
304
  },
305
  {
306
+ "epoch": 1.4333333333333333,
307
+ "grad_norm": 0.5556924939155579,
308
+ "learning_rate": 7.696629213483146e-06,
309
+ "loss": 0.005,
310
+ "step": 215
311
  },
312
  {
313
+ "epoch": 1.4666666666666668,
314
+ "grad_norm": 1.5778443813323975,
315
+ "learning_rate": 7.640449438202247e-06,
316
+ "loss": 0.0048,
317
+ "step": 220
318
  },
319
  {
320
+ "epoch": 1.5,
321
+ "grad_norm": 0.39948800206184387,
322
+ "learning_rate": 7.584269662921349e-06,
323
+ "loss": 0.0056,
324
+ "step": 225
325
  },
326
  {
327
+ "epoch": 1.5333333333333332,
328
+ "grad_norm": 1.9470425844192505,
329
+ "learning_rate": 7.5280898876404495e-06,
330
+ "loss": 0.0052,
331
+ "step": 230
332
  },
333
  {
334
+ "epoch": 1.5666666666666667,
335
+ "grad_norm": 1.3367302417755127,
336
+ "learning_rate": 7.471910112359552e-06,
337
+ "loss": 0.0056,
338
+ "step": 235
339
  },
340
  {
341
+ "epoch": 1.6,
342
+ "grad_norm": 2.2041380405426025,
343
+ "learning_rate": 7.415730337078652e-06,
344
+ "loss": 0.0026,
345
+ "step": 240
346
  },
347
  {
348
+ "epoch": 1.6333333333333333,
349
+ "grad_norm": 1.1062074899673462,
350
+ "learning_rate": 7.359550561797754e-06,
351
+ "loss": 0.0058,
352
+ "step": 245
353
  },
354
  {
355
+ "epoch": 1.6666666666666665,
356
+ "grad_norm": 1.558339238166809,
357
+ "learning_rate": 7.303370786516854e-06,
358
+ "loss": 0.0036,
359
+ "step": 250
360
  },
361
  {
362
+ "epoch": 1.7,
363
+ "grad_norm": 1.4909693002700806,
364
+ "learning_rate": 7.247191011235956e-06,
365
+ "loss": 0.0081,
366
+ "step": 255
367
  },
368
  {
369
+ "epoch": 1.7333333333333334,
370
+ "grad_norm": 1.0885131359100342,
371
+ "learning_rate": 7.191011235955056e-06,
372
+ "loss": 0.0035,
373
+ "step": 260
374
  },
375
  {
376
+ "epoch": 1.7666666666666666,
377
+ "grad_norm": 0.34988316893577576,
378
+ "learning_rate": 7.134831460674158e-06,
379
+ "loss": 0.0066,
380
+ "step": 265
381
  },
382
  {
383
+ "epoch": 1.8,
384
+ "grad_norm": 0.39315518736839294,
385
+ "learning_rate": 7.078651685393258e-06,
386
+ "loss": 0.0036,
387
+ "step": 270
388
  },
389
  {
390
+ "epoch": 1.8333333333333335,
391
+ "grad_norm": 0.4730512797832489,
392
+ "learning_rate": 7.022471910112361e-06,
393
+ "loss": 0.0025,
394
+ "step": 275
395
  },
396
  {
397
+ "epoch": 1.8666666666666667,
398
+ "grad_norm": 1.0061614513397217,
399
+ "learning_rate": 6.966292134831461e-06,
400
+ "loss": 0.0058,
401
+ "step": 280
402
  },
403
  {
404
+ "epoch": 1.9,
405
+ "grad_norm": 0.6261555552482605,
406
+ "learning_rate": 6.910112359550563e-06,
407
+ "loss": 0.0035,
408
+ "step": 285
409
  },
410
  {
411
+ "epoch": 1.9333333333333333,
412
+ "grad_norm": 0.54555344581604,
413
+ "learning_rate": 6.853932584269663e-06,
414
+ "loss": 0.0064,
415
+ "step": 290
416
  },
417
  {
418
+ "epoch": 1.9666666666666668,
419
+ "grad_norm": 0.26168444752693176,
420
+ "learning_rate": 6.797752808988765e-06,
421
+ "loss": 0.0063,
422
+ "step": 295
423
  },
424
  {
425
+ "epoch": 2.0,
426
+ "grad_norm": 2.1794660091400146,
427
+ "learning_rate": 6.741573033707865e-06,
428
+ "loss": 0.0047,
429
+ "step": 300
430
  },
431
  {
432
+ "epoch": 2.033333333333333,
433
+ "grad_norm": 0.07394399493932724,
434
+ "learning_rate": 6.685393258426967e-06,
435
+ "loss": 0.0033,
436
+ "step": 305
437
  },
438
  {
439
+ "epoch": 2.066666666666667,
440
+ "grad_norm": 0.32680198550224304,
441
+ "learning_rate": 6.629213483146067e-06,
442
+ "loss": 0.0033,
443
+ "step": 310
444
  },
445
  {
446
+ "epoch": 2.1,
447
+ "grad_norm": 1.1588655710220337,
448
+ "learning_rate": 6.57303370786517e-06,
449
+ "loss": 0.0044,
450
+ "step": 315
451
  },
452
  {
453
+ "epoch": 2.1333333333333333,
454
+ "grad_norm": 0.12549975514411926,
455
+ "learning_rate": 6.51685393258427e-06,
456
+ "loss": 0.0025,
457
+ "step": 320
458
  },
459
  {
460
+ "epoch": 2.1666666666666665,
461
+ "grad_norm": 1.6806613206863403,
462
+ "learning_rate": 6.460674157303372e-06,
463
+ "loss": 0.0073,
464
+ "step": 325
465
  },
466
  {
467
+ "epoch": 2.2,
468
+ "grad_norm": 1.2238233089447021,
469
+ "learning_rate": 6.404494382022472e-06,
470
+ "loss": 0.0063,
471
+ "step": 330
472
  },
473
  {
474
+ "epoch": 2.2333333333333334,
475
+ "grad_norm": 0.13195298612117767,
476
+ "learning_rate": 6.348314606741574e-06,
477
+ "loss": 0.0029,
478
+ "step": 335
479
  },
480
  {
481
+ "epoch": 2.2666666666666666,
482
+ "grad_norm": 0.6085399985313416,
483
+ "learning_rate": 6.292134831460674e-06,
484
+ "loss": 0.0014,
485
+ "step": 340
486
  },
487
  {
488
+ "epoch": 2.3,
489
+ "grad_norm": 0.1822354644536972,
490
+ "learning_rate": 6.235955056179776e-06,
491
+ "loss": 0.0017,
492
+ "step": 345
493
  },
494
  {
495
+ "epoch": 2.3333333333333335,
496
+ "grad_norm": 1.526226282119751,
497
+ "learning_rate": 6.179775280898876e-06,
498
+ "loss": 0.0016,
499
+ "step": 350
500
  },
501
  {
502
+ "epoch": 2.3666666666666667,
503
+ "grad_norm": 0.3068194091320038,
504
+ "learning_rate": 6.1235955056179785e-06,
505
+ "loss": 0.0053,
506
+ "step": 355
507
  },
508
  {
509
+ "epoch": 2.4,
510
+ "grad_norm": 0.3344336748123169,
511
+ "learning_rate": 6.06741573033708e-06,
512
+ "loss": 0.0034,
513
+ "step": 360
514
  },
515
  {
516
+ "epoch": 2.4333333333333336,
517
+ "grad_norm": 0.38717201352119446,
518
+ "learning_rate": 6.0112359550561805e-06,
519
+ "loss": 0.0032,
520
+ "step": 365
521
  },
522
  {
523
+ "epoch": 2.466666666666667,
524
+ "grad_norm": 3.577847957611084,
525
+ "learning_rate": 5.955056179775281e-06,
526
+ "loss": 0.0046,
527
+ "step": 370
528
  },
529
  {
530
+ "epoch": 2.5,
531
+ "grad_norm": 0.1945178508758545,
532
+ "learning_rate": 5.8988764044943826e-06,
533
+ "loss": 0.0016,
534
+ "step": 375
535
  },
536
  {
537
+ "epoch": 2.533333333333333,
538
+ "grad_norm": 0.12505494058132172,
539
+ "learning_rate": 5.842696629213483e-06,
540
+ "loss": 0.0021,
541
+ "step": 380
542
  },
543
  {
544
+ "epoch": 2.5666666666666664,
545
+ "grad_norm": 0.23506364226341248,
546
+ "learning_rate": 5.786516853932585e-06,
547
+ "loss": 0.0025,
548
+ "step": 385
549
  },
550
  {
551
+ "epoch": 2.6,
552
+ "grad_norm": 0.48507827520370483,
553
+ "learning_rate": 5.730337078651685e-06,
554
+ "loss": 0.0016,
555
+ "step": 390
556
  },
557
  {
558
+ "epoch": 2.6333333333333333,
559
+ "grad_norm": 0.14861613512039185,
560
+ "learning_rate": 5.6741573033707874e-06,
561
+ "loss": 0.0019,
562
+ "step": 395
563
  },
564
  {
565
+ "epoch": 2.6666666666666665,
566
+ "grad_norm": 0.07135830074548721,
567
+ "learning_rate": 5.617977528089889e-06,
568
+ "loss": 0.0018,
569
+ "step": 400
 
 
570
  },
571
  {
572
+ "epoch": 2.7,
573
+ "grad_norm": 0.35108867287635803,
574
+ "learning_rate": 5.5617977528089895e-06,
575
+ "loss": 0.0011,
576
+ "step": 405
577
  },
578
  {
579
+ "epoch": 2.7333333333333334,
580
+ "grad_norm": 1.0602957010269165,
581
+ "learning_rate": 5.50561797752809e-06,
582
+ "loss": 0.0015,
583
+ "step": 410
584
  },
585
  {
586
+ "epoch": 2.7666666666666666,
587
+ "grad_norm": 0.14372961223125458,
588
+ "learning_rate": 5.4494382022471915e-06,
589
+ "loss": 0.0023,
590
+ "step": 415
591
  },
592
  {
593
+ "epoch": 2.8,
594
+ "grad_norm": 1.5078669786453247,
595
+ "learning_rate": 5.393258426966292e-06,
596
+ "loss": 0.0017,
597
+ "step": 420
598
  },
599
  {
600
+ "epoch": 2.8333333333333335,
601
+ "grad_norm": 0.08180749416351318,
602
+ "learning_rate": 5.3370786516853935e-06,
603
+ "loss": 0.0022,
604
+ "step": 425
605
  },
606
  {
607
+ "epoch": 2.8666666666666667,
608
+ "grad_norm": 0.27330687642097473,
609
+ "learning_rate": 5.280898876404494e-06,
610
+ "loss": 0.0016,
611
+ "step": 430
612
  },
613
  {
614
+ "epoch": 2.9,
615
+ "grad_norm": 0.06831669807434082,
616
+ "learning_rate": 5.224719101123596e-06,
617
+ "loss": 0.0008,
618
+ "step": 435
619
  },
620
  {
621
+ "epoch": 2.9333333333333336,
622
+ "grad_norm": 0.3877633213996887,
623
+ "learning_rate": 5.168539325842698e-06,
624
+ "loss": 0.0022,
625
+ "step": 440
626
  },
627
  {
628
+ "epoch": 2.966666666666667,
629
+ "grad_norm": 2.5412757396698,
630
+ "learning_rate": 5.112359550561798e-06,
631
+ "loss": 0.003,
632
+ "step": 445
633
  },
634
  {
635
+ "epoch": 3.0,
636
+ "grad_norm": 0.09176287055015564,
637
+ "learning_rate": 5.0561797752809e-06,
638
+ "loss": 0.0011,
639
+ "step": 450
640
  },
641
  {
642
+ "epoch": 3.033333333333333,
643
+ "grad_norm": 0.03548486530780792,
644
+ "learning_rate": 5e-06,
645
+ "loss": 0.0016,
646
+ "step": 455
647
  },
648
  {
649
+ "epoch": 3.066666666666667,
650
+ "grad_norm": 0.11006509512662888,
651
+ "learning_rate": 4.943820224719101e-06,
652
+ "loss": 0.006,
653
+ "step": 460
654
  },
655
  {
656
+ "epoch": 3.1,
657
+ "grad_norm": 0.19691240787506104,
658
+ "learning_rate": 4.8876404494382024e-06,
659
+ "loss": 0.002,
660
+ "step": 465
661
  },
662
  {
663
+ "epoch": 3.1333333333333333,
664
+ "grad_norm": 0.30039843916893005,
665
+ "learning_rate": 4.831460674157304e-06,
666
+ "loss": 0.001,
667
+ "step": 470
668
  },
669
  {
670
+ "epoch": 3.1666666666666665,
671
+ "grad_norm": 0.0753551796078682,
672
+ "learning_rate": 4.775280898876405e-06,
673
+ "loss": 0.0008,
674
+ "step": 475
675
  },
676
  {
677
+ "epoch": 3.2,
678
+ "grad_norm": 0.12786687910556793,
679
+ "learning_rate": 4.719101123595506e-06,
680
+ "loss": 0.0026,
681
+ "step": 480
682
  },
683
  {
684
+ "epoch": 3.2333333333333334,
685
+ "grad_norm": 0.07095532864332199,
686
+ "learning_rate": 4.6629213483146065e-06,
687
+ "loss": 0.0004,
688
+ "step": 485
689
  },
690
  {
691
+ "epoch": 3.2666666666666666,
692
+ "grad_norm": 0.1347319781780243,
693
+ "learning_rate": 4.606741573033709e-06,
694
+ "loss": 0.0013,
695
+ "step": 490
696
  },
697
  {
698
+ "epoch": 3.3,
699
+ "grad_norm": 0.11095249652862549,
700
+ "learning_rate": 4.550561797752809e-06,
701
+ "loss": 0.0012,
702
+ "step": 495
703
  },
704
  {
705
+ "epoch": 3.3333333333333335,
706
+ "grad_norm": 0.10048757493495941,
707
+ "learning_rate": 4.494382022471911e-06,
708
+ "loss": 0.0014,
709
+ "step": 500
710
  },
711
  {
712
+ "epoch": 3.3666666666666667,
713
+ "grad_norm": 0.04148377105593681,
714
+ "learning_rate": 4.438202247191011e-06,
715
+ "loss": 0.0006,
716
+ "step": 505
717
  },
718
  {
719
+ "epoch": 3.4,
720
+ "grad_norm": 0.17400866746902466,
721
+ "learning_rate": 4.382022471910113e-06,
722
+ "loss": 0.0008,
723
+ "step": 510
724
  },
725
  {
726
+ "epoch": 3.4333333333333336,
727
+ "grad_norm": 0.2563508450984955,
728
+ "learning_rate": 4.325842696629214e-06,
729
+ "loss": 0.0027,
730
+ "step": 515
731
  },
732
  {
733
+ "epoch": 3.466666666666667,
734
+ "grad_norm": 0.06805615872144699,
735
+ "learning_rate": 4.269662921348315e-06,
736
+ "loss": 0.0011,
737
+ "step": 520
738
  },
739
  {
740
+ "epoch": 3.5,
741
+ "grad_norm": 0.20213201642036438,
742
+ "learning_rate": 4.213483146067416e-06,
743
+ "loss": 0.0031,
744
+ "step": 525
745
  },
746
  {
747
+ "epoch": 3.533333333333333,
748
+ "grad_norm": 0.03789810836315155,
749
+ "learning_rate": 4.157303370786518e-06,
750
+ "loss": 0.0038,
751
+ "step": 530
752
  },
753
  {
754
+ "epoch": 3.5666666666666664,
755
+ "grad_norm": 0.13470155000686646,
756
+ "learning_rate": 4.101123595505618e-06,
757
+ "loss": 0.0017,
758
+ "step": 535
759
  },
760
  {
761
+ "epoch": 3.6,
762
+ "grad_norm": 0.046678509563207626,
763
+ "learning_rate": 4.04494382022472e-06,
764
+ "loss": 0.0005,
765
+ "step": 540
766
  },
767
  {
768
+ "epoch": 3.6333333333333333,
769
+ "grad_norm": 0.05889086052775383,
770
+ "learning_rate": 3.98876404494382e-06,
771
+ "loss": 0.0036,
772
+ "step": 545
773
  },
774
  {
775
+ "epoch": 3.6666666666666665,
776
+ "grad_norm": 0.03523600473999977,
777
+ "learning_rate": 3.932584269662922e-06,
778
+ "loss": 0.0011,
779
+ "step": 550
780
  },
781
  {
782
+ "epoch": 3.7,
783
+ "grad_norm": 0.03970940411090851,
784
+ "learning_rate": 3.876404494382023e-06,
785
+ "loss": 0.0011,
786
+ "step": 555
787
  },
788
  {
789
+ "epoch": 3.7333333333333334,
790
+ "grad_norm": 0.2314375936985016,
791
+ "learning_rate": 3.820224719101124e-06,
792
+ "loss": 0.0026,
793
+ "step": 560
794
  },
795
  {
796
+ "epoch": 3.7666666666666666,
797
+ "grad_norm": 0.07075604051351547,
798
+ "learning_rate": 3.7640449438202247e-06,
799
+ "loss": 0.0031,
800
+ "step": 565
801
  },
802
  {
803
+ "epoch": 3.8,
804
+ "grad_norm": 0.09040886908769608,
805
+ "learning_rate": 3.707865168539326e-06,
806
+ "loss": 0.0004,
807
+ "step": 570
808
  },
809
  {
810
+ "epoch": 3.8333333333333335,
811
+ "grad_norm": 0.11175351589918137,
812
+ "learning_rate": 3.651685393258427e-06,
813
+ "loss": 0.0012,
814
+ "step": 575
815
  },
816
  {
817
+ "epoch": 3.8666666666666667,
818
+ "grad_norm": 0.5862079858779907,
819
+ "learning_rate": 3.595505617977528e-06,
820
+ "loss": 0.001,
821
+ "step": 580
822
  },
823
  {
824
+ "epoch": 3.9,
825
+ "grad_norm": 0.4760936200618744,
826
+ "learning_rate": 3.539325842696629e-06,
827
+ "loss": 0.0008,
828
+ "step": 585
829
  },
830
  {
831
+ "epoch": 3.9333333333333336,
832
+ "grad_norm": 0.18229596316814423,
833
+ "learning_rate": 3.4831460674157306e-06,
834
+ "loss": 0.001,
835
+ "step": 590
836
  },
837
  {
838
+ "epoch": 3.966666666666667,
839
+ "grad_norm": 0.033338598906993866,
840
+ "learning_rate": 3.4269662921348316e-06,
841
+ "loss": 0.002,
842
+ "step": 595
843
  },
844
  {
845
+ "epoch": 4.0,
846
+ "grad_norm": 0.19226883351802826,
847
+ "learning_rate": 3.3707865168539327e-06,
848
+ "loss": 0.0006,
849
+ "step": 600
850
  },
851
  {
852
+ "epoch": 4.0,
853
+ "eval_loss": 0.3506176173686981,
854
+ "eval_runtime": 287.3723,
855
+ "eval_samples_per_second": 3.48,
856
  "eval_steps_per_second": 0.435,
857
+ "eval_wer": 19.701321079839175,
858
+ "step": 600
859
  }
860
  ],
861
+ "logging_steps": 5,
862
+ "max_steps": 900,
863
  "num_input_tokens_seen": 0,
864
+ "num_train_epochs": 6,
865
+ "save_steps": 600,
866
+ "stateful_callbacks": {
867
+ "TrainerControl": {
868
+ "args": {
869
+ "should_epoch_stop": false,
870
+ "should_evaluate": false,
871
+ "should_log": false,
872
+ "should_save": true,
873
+ "should_training_stop": false
874
+ },
875
+ "attributes": {}
876
+ }
877
+ },
878
+ "total_flos": 7.7831995392e+17,
879
+ "train_batch_size": 10,
880
  "trial_name": null,
881
  "trial_params": null
882
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fba55210c552526ee1c649baa4ad5bafd32da08d4c4d293ad0eca838f857f04d
3
- size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db804bff1d34b49eeb93672a0155a7e464e05406a85421985e0ebf298394549d
3
+ size 5240