rbcurzon commited on
Commit
1fee334
·
verified ·
1 Parent(s): b1f8c65

End of training

Browse files
Files changed (5) hide show
  1. README.md +15 -2
  2. all_results.json +15 -0
  3. eval_results.json +9 -0
  4. train_results.json +9 -0
  5. trainer_state.json +621 -0
README.md CHANGED
@@ -4,11 +4,24 @@ license: apache-2.0
4
  base_model: openai/whisper-medium
5
  tags:
6
  - generated_from_trainer
 
 
7
  metrics:
8
  - wer
9
  model-index:
10
  - name: whisper-medium-ph
11
- results: []
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -16,7 +29,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # whisper-medium-ph
18
 
19
- This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.3113
22
  - Wer: 0.1283
 
4
  base_model: openai/whisper-medium
5
  tags:
6
  - generated_from_trainer
7
+ datasets:
8
+ - rbcurzon/ph_dialect_asr
9
  metrics:
10
  - wer
11
  model-index:
12
  - name: whisper-medium-ph
13
+ results:
14
+ - task:
15
+ name: Automatic Speech Recognition
16
+ type: automatic-speech-recognition
17
+ dataset:
18
+ name: rbcurzon/ph_dialect_asr all
19
+ type: rbcurzon/ph_dialect_asr
20
+ args: all
21
+ metrics:
22
+ - name: Wer
23
+ type: wer
24
+ value: 0.12829864835872132
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
29
 
30
  # whisper-medium-ph
31
 
32
+ This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on the rbcurzon/ph_dialect_asr all dataset.
33
  It achieves the following results on the evaluation set:
34
  - Loss: 0.3113
35
  - Wer: 0.1283
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.466091245376079,
3
+ "eval_loss": 0.3112793266773224,
4
+ "eval_runtime": 1173.9261,
5
+ "eval_samples": 3612,
6
+ "eval_samples_per_second": 3.077,
7
+ "eval_steps_per_second": 0.193,
8
+ "eval_wer": 0.12829864835872132,
9
+ "total_flos": 3.265323341119488e+19,
10
+ "train_loss": 0.2021937195956707,
11
+ "train_runtime": 9527.7148,
12
+ "train_samples": 12973,
13
+ "train_samples_per_second": 3.359,
14
+ "train_steps_per_second": 0.21
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.466091245376079,
3
+ "eval_loss": 0.3112793266773224,
4
+ "eval_runtime": 1173.9261,
5
+ "eval_samples": 3612,
6
+ "eval_samples_per_second": 3.077,
7
+ "eval_steps_per_second": 0.193,
8
+ "eval_wer": 0.12829864835872132
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.466091245376079,
3
+ "total_flos": 3.265323341119488e+19,
4
+ "train_loss": 0.2021937195956707,
5
+ "train_runtime": 9527.7148,
6
+ "train_samples": 12973,
7
+ "train_samples_per_second": 3.359,
8
+ "train_steps_per_second": 0.21
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.466091245376079,
6
+ "eval_steps": 1000,
7
+ "global_step": 2000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.030826140567200986,
14
+ "grad_norm": 11.499724388122559,
15
+ "learning_rate": 4.4e-07,
16
+ "loss": 1.2454,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.06165228113440197,
21
+ "grad_norm": 8.96716594696045,
22
+ "learning_rate": 9.400000000000001e-07,
23
+ "loss": 1.0189,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.09247842170160296,
28
+ "grad_norm": 7.870485305786133,
29
+ "learning_rate": 1.44e-06,
30
+ "loss": 0.8525,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 0.12330456226880394,
35
+ "grad_norm": 6.292770862579346,
36
+ "learning_rate": 1.94e-06,
37
+ "loss": 0.6987,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.15413070283600494,
42
+ "grad_norm": 6.656809329986572,
43
+ "learning_rate": 2.4400000000000004e-06,
44
+ "loss": 0.6285,
45
+ "step": 125
46
+ },
47
+ {
48
+ "epoch": 0.18495684340320592,
49
+ "grad_norm": 6.315510272979736,
50
+ "learning_rate": 2.9400000000000002e-06,
51
+ "loss": 0.6055,
52
+ "step": 150
53
+ },
54
+ {
55
+ "epoch": 0.2157829839704069,
56
+ "grad_norm": 7.333935737609863,
57
+ "learning_rate": 3.44e-06,
58
+ "loss": 0.5517,
59
+ "step": 175
60
+ },
61
+ {
62
+ "epoch": 0.2466091245376079,
63
+ "grad_norm": 7.557521820068359,
64
+ "learning_rate": 3.94e-06,
65
+ "loss": 0.4966,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 0.27743526510480887,
70
+ "grad_norm": 5.930201053619385,
71
+ "learning_rate": 4.440000000000001e-06,
72
+ "loss": 0.4821,
73
+ "step": 225
74
+ },
75
+ {
76
+ "epoch": 0.3082614056720099,
77
+ "grad_norm": 6.367243766784668,
78
+ "learning_rate": 4.94e-06,
79
+ "loss": 0.4661,
80
+ "step": 250
81
+ },
82
+ {
83
+ "epoch": 0.33908754623921084,
84
+ "grad_norm": 6.376411437988281,
85
+ "learning_rate": 5.4400000000000004e-06,
86
+ "loss": 0.4417,
87
+ "step": 275
88
+ },
89
+ {
90
+ "epoch": 0.36991368680641185,
91
+ "grad_norm": 5.1631574630737305,
92
+ "learning_rate": 5.94e-06,
93
+ "loss": 0.4297,
94
+ "step": 300
95
+ },
96
+ {
97
+ "epoch": 0.4007398273736128,
98
+ "grad_norm": 5.0071330070495605,
99
+ "learning_rate": 6.440000000000001e-06,
100
+ "loss": 0.3914,
101
+ "step": 325
102
+ },
103
+ {
104
+ "epoch": 0.4315659679408138,
105
+ "grad_norm": 4.759220123291016,
106
+ "learning_rate": 6.9400000000000005e-06,
107
+ "loss": 0.3548,
108
+ "step": 350
109
+ },
110
+ {
111
+ "epoch": 0.4623921085080148,
112
+ "grad_norm": 5.040701389312744,
113
+ "learning_rate": 7.440000000000001e-06,
114
+ "loss": 0.3546,
115
+ "step": 375
116
+ },
117
+ {
118
+ "epoch": 0.4932182490752158,
119
+ "grad_norm": 5.04915189743042,
120
+ "learning_rate": 7.94e-06,
121
+ "loss": 0.3388,
122
+ "step": 400
123
+ },
124
+ {
125
+ "epoch": 0.5240443896424167,
126
+ "grad_norm": 5.2604546546936035,
127
+ "learning_rate": 8.44e-06,
128
+ "loss": 0.3048,
129
+ "step": 425
130
+ },
131
+ {
132
+ "epoch": 0.5548705302096177,
133
+ "grad_norm": 5.409047603607178,
134
+ "learning_rate": 8.94e-06,
135
+ "loss": 0.3204,
136
+ "step": 450
137
+ },
138
+ {
139
+ "epoch": 0.5856966707768188,
140
+ "grad_norm": 7.095681190490723,
141
+ "learning_rate": 9.440000000000001e-06,
142
+ "loss": 0.2962,
143
+ "step": 475
144
+ },
145
+ {
146
+ "epoch": 0.6165228113440198,
147
+ "grad_norm": 5.518855571746826,
148
+ "learning_rate": 9.940000000000001e-06,
149
+ "loss": 0.2854,
150
+ "step": 500
151
+ },
152
+ {
153
+ "epoch": 0.6473489519112207,
154
+ "grad_norm": 5.083781719207764,
155
+ "learning_rate": 9.853333333333334e-06,
156
+ "loss": 0.2816,
157
+ "step": 525
158
+ },
159
+ {
160
+ "epoch": 0.6781750924784217,
161
+ "grad_norm": 4.599233150482178,
162
+ "learning_rate": 9.686666666666668e-06,
163
+ "loss": 0.2804,
164
+ "step": 550
165
+ },
166
+ {
167
+ "epoch": 0.7090012330456227,
168
+ "grad_norm": 5.882887840270996,
169
+ "learning_rate": 9.52e-06,
170
+ "loss": 0.254,
171
+ "step": 575
172
+ },
173
+ {
174
+ "epoch": 0.7398273736128237,
175
+ "grad_norm": 3.953178644180298,
176
+ "learning_rate": 9.353333333333334e-06,
177
+ "loss": 0.2522,
178
+ "step": 600
179
+ },
180
+ {
181
+ "epoch": 0.7706535141800247,
182
+ "grad_norm": 3.6629250049591064,
183
+ "learning_rate": 9.186666666666666e-06,
184
+ "loss": 0.2217,
185
+ "step": 625
186
+ },
187
+ {
188
+ "epoch": 0.8014796547472256,
189
+ "grad_norm": 5.168231010437012,
190
+ "learning_rate": 9.020000000000002e-06,
191
+ "loss": 0.2451,
192
+ "step": 650
193
+ },
194
+ {
195
+ "epoch": 0.8323057953144266,
196
+ "grad_norm": 4.211165904998779,
197
+ "learning_rate": 8.853333333333334e-06,
198
+ "loss": 0.216,
199
+ "step": 675
200
+ },
201
+ {
202
+ "epoch": 0.8631319358816276,
203
+ "grad_norm": 4.873836994171143,
204
+ "learning_rate": 8.686666666666668e-06,
205
+ "loss": 0.2027,
206
+ "step": 700
207
+ },
208
+ {
209
+ "epoch": 0.8939580764488286,
210
+ "grad_norm": 4.061721324920654,
211
+ "learning_rate": 8.52e-06,
212
+ "loss": 0.2184,
213
+ "step": 725
214
+ },
215
+ {
216
+ "epoch": 0.9247842170160296,
217
+ "grad_norm": 5.536831855773926,
218
+ "learning_rate": 8.353333333333335e-06,
219
+ "loss": 0.2212,
220
+ "step": 750
221
+ },
222
+ {
223
+ "epoch": 0.9556103575832305,
224
+ "grad_norm": 3.8625235557556152,
225
+ "learning_rate": 8.186666666666667e-06,
226
+ "loss": 0.1874,
227
+ "step": 775
228
+ },
229
+ {
230
+ "epoch": 0.9864364981504316,
231
+ "grad_norm": 4.030850887298584,
232
+ "learning_rate": 8.020000000000001e-06,
233
+ "loss": 0.1962,
234
+ "step": 800
235
+ },
236
+ {
237
+ "epoch": 1.0172626387176325,
238
+ "grad_norm": 3.3085479736328125,
239
+ "learning_rate": 7.853333333333333e-06,
240
+ "loss": 0.1508,
241
+ "step": 825
242
+ },
243
+ {
244
+ "epoch": 1.0480887792848335,
245
+ "grad_norm": 3.4279379844665527,
246
+ "learning_rate": 7.686666666666667e-06,
247
+ "loss": 0.0925,
248
+ "step": 850
249
+ },
250
+ {
251
+ "epoch": 1.0789149198520345,
252
+ "grad_norm": 3.8404757976531982,
253
+ "learning_rate": 7.520000000000001e-06,
254
+ "loss": 0.1182,
255
+ "step": 875
256
+ },
257
+ {
258
+ "epoch": 1.1097410604192355,
259
+ "grad_norm": 3.7630670070648193,
260
+ "learning_rate": 7.353333333333334e-06,
261
+ "loss": 0.1136,
262
+ "step": 900
263
+ },
264
+ {
265
+ "epoch": 1.1405672009864365,
266
+ "grad_norm": 3.491826295852661,
267
+ "learning_rate": 7.186666666666668e-06,
268
+ "loss": 0.1069,
269
+ "step": 925
270
+ },
271
+ {
272
+ "epoch": 1.1713933415536375,
273
+ "grad_norm": 2.2027463912963867,
274
+ "learning_rate": 7.0200000000000006e-06,
275
+ "loss": 0.1048,
276
+ "step": 950
277
+ },
278
+ {
279
+ "epoch": 1.2022194821208385,
280
+ "grad_norm": 4.981961250305176,
281
+ "learning_rate": 6.853333333333334e-06,
282
+ "loss": 0.1141,
283
+ "step": 975
284
+ },
285
+ {
286
+ "epoch": 1.2330456226880395,
287
+ "grad_norm": 3.4486851692199707,
288
+ "learning_rate": 6.6866666666666665e-06,
289
+ "loss": 0.1001,
290
+ "step": 1000
291
+ },
292
+ {
293
+ "epoch": 1.2330456226880395,
294
+ "eval_loss": 0.30403050780296326,
295
+ "eval_runtime": 1181.2571,
296
+ "eval_samples_per_second": 3.058,
297
+ "eval_steps_per_second": 0.191,
298
+ "eval_wer": 0.14332880402393383,
299
+ "step": 1000
300
+ },
301
+ {
302
+ "epoch": 1.2638717632552403,
303
+ "grad_norm": 2.620025396347046,
304
+ "learning_rate": 6.520000000000001e-06,
305
+ "loss": 0.0916,
306
+ "step": 1025
307
+ },
308
+ {
309
+ "epoch": 1.2946979038224415,
310
+ "grad_norm": 3.629256248474121,
311
+ "learning_rate": 6.353333333333333e-06,
312
+ "loss": 0.0949,
313
+ "step": 1050
314
+ },
315
+ {
316
+ "epoch": 1.3255240443896423,
317
+ "grad_norm": 2.832113265991211,
318
+ "learning_rate": 6.186666666666668e-06,
319
+ "loss": 0.084,
320
+ "step": 1075
321
+ },
322
+ {
323
+ "epoch": 1.3563501849568433,
324
+ "grad_norm": 3.5080323219299316,
325
+ "learning_rate": 6.02e-06,
326
+ "loss": 0.0881,
327
+ "step": 1100
328
+ },
329
+ {
330
+ "epoch": 1.3871763255240444,
331
+ "grad_norm": 3.9893321990966797,
332
+ "learning_rate": 5.853333333333335e-06,
333
+ "loss": 0.0781,
334
+ "step": 1125
335
+ },
336
+ {
337
+ "epoch": 1.4180024660912454,
338
+ "grad_norm": 2.786031723022461,
339
+ "learning_rate": 5.686666666666667e-06,
340
+ "loss": 0.0852,
341
+ "step": 1150
342
+ },
343
+ {
344
+ "epoch": 1.4488286066584464,
345
+ "grad_norm": 2.333205461502075,
346
+ "learning_rate": 5.5200000000000005e-06,
347
+ "loss": 0.0759,
348
+ "step": 1175
349
+ },
350
+ {
351
+ "epoch": 1.4796547472256474,
352
+ "grad_norm": 3.204261302947998,
353
+ "learning_rate": 5.3533333333333335e-06,
354
+ "loss": 0.0865,
355
+ "step": 1200
356
+ },
357
+ {
358
+ "epoch": 1.5104808877928484,
359
+ "grad_norm": 3.2963826656341553,
360
+ "learning_rate": 5.186666666666667e-06,
361
+ "loss": 0.0757,
362
+ "step": 1225
363
+ },
364
+ {
365
+ "epoch": 1.5413070283600492,
366
+ "grad_norm": 2.5825254917144775,
367
+ "learning_rate": 5.02e-06,
368
+ "loss": 0.0717,
369
+ "step": 1250
370
+ },
371
+ {
372
+ "epoch": 1.5721331689272504,
373
+ "grad_norm": 2.7192881107330322,
374
+ "learning_rate": 4.853333333333334e-06,
375
+ "loss": 0.0722,
376
+ "step": 1275
377
+ },
378
+ {
379
+ "epoch": 1.6029593094944512,
380
+ "grad_norm": 2.911716938018799,
381
+ "learning_rate": 4.686666666666667e-06,
382
+ "loss": 0.0757,
383
+ "step": 1300
384
+ },
385
+ {
386
+ "epoch": 1.6337854500616524,
387
+ "grad_norm": 2.1598548889160156,
388
+ "learning_rate": 4.520000000000001e-06,
389
+ "loss": 0.0704,
390
+ "step": 1325
391
+ },
392
+ {
393
+ "epoch": 1.6646115906288532,
394
+ "grad_norm": 2.1722934246063232,
395
+ "learning_rate": 4.353333333333334e-06,
396
+ "loss": 0.0621,
397
+ "step": 1350
398
+ },
399
+ {
400
+ "epoch": 1.6954377311960542,
401
+ "grad_norm": 2.0885589122772217,
402
+ "learning_rate": 4.1866666666666675e-06,
403
+ "loss": 0.0737,
404
+ "step": 1375
405
+ },
406
+ {
407
+ "epoch": 1.7262638717632552,
408
+ "grad_norm": 3.2038285732269287,
409
+ "learning_rate": 4.0200000000000005e-06,
410
+ "loss": 0.0729,
411
+ "step": 1400
412
+ },
413
+ {
414
+ "epoch": 1.7570900123304563,
415
+ "grad_norm": 2.2708075046539307,
416
+ "learning_rate": 3.853333333333334e-06,
417
+ "loss": 0.0571,
418
+ "step": 1425
419
+ },
420
+ {
421
+ "epoch": 1.7879161528976573,
422
+ "grad_norm": 1.496151089668274,
423
+ "learning_rate": 3.686666666666667e-06,
424
+ "loss": 0.0532,
425
+ "step": 1450
426
+ },
427
+ {
428
+ "epoch": 1.818742293464858,
429
+ "grad_norm": 1.8642607927322388,
430
+ "learning_rate": 3.52e-06,
431
+ "loss": 0.0544,
432
+ "step": 1475
433
+ },
434
+ {
435
+ "epoch": 1.8495684340320593,
436
+ "grad_norm": 2.1302435398101807,
437
+ "learning_rate": 3.3533333333333336e-06,
438
+ "loss": 0.0425,
439
+ "step": 1500
440
+ },
441
+ {
442
+ "epoch": 1.88039457459926,
443
+ "grad_norm": 1.2720330953598022,
444
+ "learning_rate": 3.186666666666667e-06,
445
+ "loss": 0.0486,
446
+ "step": 1525
447
+ },
448
+ {
449
+ "epoch": 1.9112207151664613,
450
+ "grad_norm": 1.3626000881195068,
451
+ "learning_rate": 3.0200000000000003e-06,
452
+ "loss": 0.0482,
453
+ "step": 1550
454
+ },
455
+ {
456
+ "epoch": 1.942046855733662,
457
+ "grad_norm": 2.652956485748291,
458
+ "learning_rate": 2.8533333333333337e-06,
459
+ "loss": 0.047,
460
+ "step": 1575
461
+ },
462
+ {
463
+ "epoch": 1.972872996300863,
464
+ "grad_norm": 2.178326368331909,
465
+ "learning_rate": 2.686666666666667e-06,
466
+ "loss": 0.0543,
467
+ "step": 1600
468
+ },
469
+ {
470
+ "epoch": 2.003699136868064,
471
+ "grad_norm": 0.6113713979721069,
472
+ "learning_rate": 2.52e-06,
473
+ "loss": 0.0416,
474
+ "step": 1625
475
+ },
476
+ {
477
+ "epoch": 2.034525277435265,
478
+ "grad_norm": 1.6302359104156494,
479
+ "learning_rate": 2.3533333333333334e-06,
480
+ "loss": 0.0167,
481
+ "step": 1650
482
+ },
483
+ {
484
+ "epoch": 2.065351418002466,
485
+ "grad_norm": 0.9459154605865479,
486
+ "learning_rate": 2.1866666666666668e-06,
487
+ "loss": 0.0137,
488
+ "step": 1675
489
+ },
490
+ {
491
+ "epoch": 2.096177558569667,
492
+ "grad_norm": 1.4943691492080688,
493
+ "learning_rate": 2.02e-06,
494
+ "loss": 0.0159,
495
+ "step": 1700
496
+ },
497
+ {
498
+ "epoch": 2.127003699136868,
499
+ "grad_norm": 0.5425832867622375,
500
+ "learning_rate": 1.8533333333333333e-06,
501
+ "loss": 0.0152,
502
+ "step": 1725
503
+ },
504
+ {
505
+ "epoch": 2.157829839704069,
506
+ "grad_norm": 1.4946790933609009,
507
+ "learning_rate": 1.6866666666666667e-06,
508
+ "loss": 0.0146,
509
+ "step": 1750
510
+ },
511
+ {
512
+ "epoch": 2.18865598027127,
513
+ "grad_norm": 0.9100169539451599,
514
+ "learning_rate": 1.52e-06,
515
+ "loss": 0.0191,
516
+ "step": 1775
517
+ },
518
+ {
519
+ "epoch": 2.219482120838471,
520
+ "grad_norm": 1.2448313236236572,
521
+ "learning_rate": 1.3533333333333334e-06,
522
+ "loss": 0.0159,
523
+ "step": 1800
524
+ },
525
+ {
526
+ "epoch": 2.250308261405672,
527
+ "grad_norm": 0.9976411461830139,
528
+ "learning_rate": 1.1866666666666668e-06,
529
+ "loss": 0.0203,
530
+ "step": 1825
531
+ },
532
+ {
533
+ "epoch": 2.281134401972873,
534
+ "grad_norm": 1.358780860900879,
535
+ "learning_rate": 1.02e-06,
536
+ "loss": 0.0139,
537
+ "step": 1850
538
+ },
539
+ {
540
+ "epoch": 2.311960542540074,
541
+ "grad_norm": 1.2800226211547852,
542
+ "learning_rate": 8.533333333333334e-07,
543
+ "loss": 0.0175,
544
+ "step": 1875
545
+ },
546
+ {
547
+ "epoch": 2.342786683107275,
548
+ "grad_norm": 1.007161021232605,
549
+ "learning_rate": 6.866666666666667e-07,
550
+ "loss": 0.013,
551
+ "step": 1900
552
+ },
553
+ {
554
+ "epoch": 2.373612823674476,
555
+ "grad_norm": 1.1838051080703735,
556
+ "learning_rate": 5.2e-07,
557
+ "loss": 0.0144,
558
+ "step": 1925
559
+ },
560
+ {
561
+ "epoch": 2.404438964241677,
562
+ "grad_norm": 1.2872673273086548,
563
+ "learning_rate": 3.533333333333334e-07,
564
+ "loss": 0.0134,
565
+ "step": 1950
566
+ },
567
+ {
568
+ "epoch": 2.435265104808878,
569
+ "grad_norm": 0.7094443440437317,
570
+ "learning_rate": 1.866666666666667e-07,
571
+ "loss": 0.0123,
572
+ "step": 1975
573
+ },
574
+ {
575
+ "epoch": 2.466091245376079,
576
+ "grad_norm": 0.6137486696243286,
577
+ "learning_rate": 2e-08,
578
+ "loss": 0.0125,
579
+ "step": 2000
580
+ },
581
+ {
582
+ "epoch": 2.466091245376079,
583
+ "eval_loss": 0.3112793266773224,
584
+ "eval_runtime": 1182.7026,
585
+ "eval_samples_per_second": 3.054,
586
+ "eval_steps_per_second": 0.191,
587
+ "eval_wer": 0.12829864835872132,
588
+ "step": 2000
589
+ },
590
+ {
591
+ "epoch": 2.466091245376079,
592
+ "step": 2000,
593
+ "total_flos": 3.265323341119488e+19,
594
+ "train_loss": 0.2021937195956707,
595
+ "train_runtime": 9527.7148,
596
+ "train_samples_per_second": 3.359,
597
+ "train_steps_per_second": 0.21
598
+ }
599
+ ],
600
+ "logging_steps": 25,
601
+ "max_steps": 2000,
602
+ "num_input_tokens_seen": 0,
603
+ "num_train_epochs": 3,
604
+ "save_steps": 1000,
605
+ "stateful_callbacks": {
606
+ "TrainerControl": {
607
+ "args": {
608
+ "should_epoch_stop": false,
609
+ "should_evaluate": false,
610
+ "should_log": false,
611
+ "should_save": true,
612
+ "should_training_stop": true
613
+ },
614
+ "attributes": {}
615
+ }
616
+ },
617
+ "total_flos": 3.265323341119488e+19,
618
+ "train_batch_size": 16,
619
+ "trial_name": null,
620
+ "trial_params": null
621
+ }