Zheng Li commited on
Commit
b0dfa85
·
verified ·
1 Parent(s): e05e4c9

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-base
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
  - superb
 
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-base
5
  tags:
6
+ - audio-classification
7
  - generated_from_trainer
8
  datasets:
9
  - superb
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 7.996245306633291,
3
- "eval_accuracy": 0.9814651368049426,
4
- "eval_loss": 0.12438357621431351,
5
- "eval_runtime": 5.5465,
6
- "eval_samples_per_second": 1225.644,
7
- "eval_steps_per_second": 38.403,
8
- "total_flos": 3.777723239743488e+18,
9
- "train_loss": 0.726146658611058,
10
- "train_runtime": 635.8846,
11
- "train_samples_per_second": 642.808,
12
- "train_steps_per_second": 2.504
13
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9811709326272433,
4
+ "eval_loss": 0.12430207431316376,
5
+ "eval_runtime": 5.5067,
6
+ "eval_samples_per_second": 1234.495,
7
+ "eval_steps_per_second": 38.68,
8
+ "total_flos": 4.72566865822464e+18,
9
+ "train_loss": 0.7669839228391647,
10
+ "train_runtime": 621.093,
11
+ "train_samples_per_second": 822.646,
12
+ "train_steps_per_second": 1.61
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.996245306633291,
3
- "eval_accuracy": 0.9814651368049426,
4
- "eval_loss": 0.12438357621431351,
5
- "eval_runtime": 5.5465,
6
- "eval_samples_per_second": 1225.644,
7
- "eval_steps_per_second": 38.403
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9811709326272433,
4
+ "eval_loss": 0.12430207431316376,
5
+ "eval_runtime": 5.5067,
6
+ "eval_samples_per_second": 1234.495,
7
+ "eval_steps_per_second": 38.68
8
  }
runs/May15_01-09-48_cs-Precision-7960-Tower/events.out.tfevents.1747286441.cs-Precision-7960-Tower.142382.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b4a01e10ace34301715b9656dc9fd42cad89a638c9363f5b4a92b534d963ea4
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.996245306633291,
3
- "total_flos": 3.777723239743488e+18,
4
- "train_loss": 0.726146658611058,
5
- "train_runtime": 635.8846,
6
- "train_samples_per_second": 642.808,
7
- "train_steps_per_second": 2.504
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 4.72566865822464e+18,
4
+ "train_loss": 0.7669839228391647,
5
+ "train_runtime": 621.093,
6
+ "train_samples_per_second": 822.646,
7
+ "train_steps_per_second": 1.61
8
  }
trainer_state.json CHANGED
@@ -1,1212 +1,817 @@
1
  {
2
- "best_metric": 0.9814651368049426,
3
- "best_model_checkpoint": "wav2vec2-base-ft-keyword-spotting/checkpoint-1393",
4
- "epoch": 7.996245306633291,
5
  "eval_steps": 500,
6
- "global_step": 1592,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.05006257822277847,
13
- "grad_norm": 2.2161571979522705,
14
- "learning_rate": 1.25e-06,
15
- "loss": 4.0993,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.10012515644555695,
20
- "grad_norm": 3.068307399749756,
21
- "learning_rate": 2.5e-06,
22
- "loss": 4.1801,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.15018773466833543,
27
- "grad_norm": 3.121358871459961,
28
- "learning_rate": 3.7500000000000005e-06,
29
- "loss": 4.11,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.2002503128911139,
34
- "grad_norm": 3.456892728805542,
35
- "learning_rate": 5e-06,
36
- "loss": 3.9697,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.2503128911138924,
41
- "grad_norm": 4.699773788452148,
42
- "learning_rate": 6.25e-06,
43
- "loss": 3.7271,
44
  "step": 50
45
  },
46
  {
47
- "epoch": 0.30037546933667086,
48
- "grad_norm": 6.0911078453063965,
49
- "learning_rate": 7.500000000000001e-06,
50
- "loss": 3.3436,
51
  "step": 60
52
  },
53
  {
54
- "epoch": 0.3504380475594493,
55
- "grad_norm": 5.996876239776611,
56
- "learning_rate": 8.750000000000001e-06,
57
- "loss": 2.8803,
58
  "step": 70
59
  },
60
  {
61
- "epoch": 0.4005006257822278,
62
- "grad_norm": 5.593225479125977,
63
- "learning_rate": 1e-05,
64
- "loss": 2.54,
65
  "step": 80
66
  },
67
  {
68
- "epoch": 0.45056320400500627,
69
- "grad_norm": 5.177072525024414,
70
- "learning_rate": 1.125e-05,
71
- "loss": 2.326,
72
  "step": 90
73
  },
74
  {
75
- "epoch": 0.5006257822277848,
76
- "grad_norm": 4.610163688659668,
77
- "learning_rate": 1.25e-05,
78
- "loss": 2.1907,
79
  "step": 100
80
  },
81
  {
82
- "epoch": 0.5506883604505632,
83
- "grad_norm": 4.401670932769775,
84
- "learning_rate": 1.375e-05,
85
- "loss": 2.054,
 
 
 
 
 
 
 
 
 
86
  "step": 110
87
  },
88
  {
89
- "epoch": 0.6007509386733417,
90
- "grad_norm": 3.190901041030884,
91
- "learning_rate": 1.5000000000000002e-05,
92
- "loss": 1.994,
93
  "step": 120
94
  },
95
  {
96
- "epoch": 0.6508135168961201,
97
- "grad_norm": 2.314962148666382,
98
- "learning_rate": 1.6250000000000002e-05,
99
- "loss": 1.9256,
100
  "step": 130
101
  },
102
  {
103
- "epoch": 0.7008760951188986,
104
- "grad_norm": 2.310375690460205,
105
- "learning_rate": 1.7500000000000002e-05,
106
- "loss": 1.7774,
107
  "step": 140
108
  },
109
  {
110
- "epoch": 0.7509386733416771,
111
- "grad_norm": 1.4242396354675293,
112
- "learning_rate": 1.8750000000000002e-05,
113
- "loss": 1.7961,
114
  "step": 150
115
  },
116
  {
117
- "epoch": 0.8010012515644556,
118
- "grad_norm": 0.6981024742126465,
119
- "learning_rate": 2e-05,
120
- "loss": 1.8108,
121
  "step": 160
122
  },
123
  {
124
- "epoch": 0.851063829787234,
125
- "grad_norm": 0.7541513442993164,
126
- "learning_rate": 1.9860335195530728e-05,
127
- "loss": 1.733,
128
  "step": 170
129
  },
130
  {
131
- "epoch": 0.9011264080100125,
132
- "grad_norm": 1.1699227094650269,
133
- "learning_rate": 1.9720670391061455e-05,
134
- "loss": 1.759,
135
  "step": 180
136
  },
137
  {
138
- "epoch": 0.951188986232791,
139
- "grad_norm": 2.1834521293640137,
140
- "learning_rate": 1.958100558659218e-05,
141
- "loss": 1.7191,
142
  "step": 190
143
  },
144
  {
145
- "epoch": 0.9962453066332916,
146
- "eval_accuracy": 0.6209179170344219,
147
- "eval_loss": 1.5815445184707642,
148
- "eval_runtime": 4.9783,
149
- "eval_samples_per_second": 1365.528,
150
- "eval_steps_per_second": 42.786,
151
- "step": 199
152
  },
153
  {
154
- "epoch": 1.0050062578222778,
155
- "grad_norm": 3.1251320838928223,
156
- "learning_rate": 1.9441340782122907e-05,
157
- "loss": 1.7482,
 
 
158
  "step": 200
159
  },
160
  {
161
- "epoch": 1.0550688360450564,
162
- "grad_norm": 4.351515293121338,
163
- "learning_rate": 1.9301675977653634e-05,
164
- "loss": 1.5606,
165
  "step": 210
166
  },
167
  {
168
- "epoch": 1.1051314142678348,
169
- "grad_norm": 2.624279022216797,
170
- "learning_rate": 1.9162011173184357e-05,
171
- "loss": 1.5657,
172
  "step": 220
173
  },
174
  {
175
- "epoch": 1.1551939924906134,
176
- "grad_norm": 2.795576333999634,
177
- "learning_rate": 1.9022346368715087e-05,
178
- "loss": 1.4578,
179
  "step": 230
180
  },
181
  {
182
- "epoch": 1.2052565707133918,
183
- "grad_norm": 2.7234373092651367,
184
- "learning_rate": 1.888268156424581e-05,
185
- "loss": 1.449,
186
  "step": 240
187
  },
188
  {
189
- "epoch": 1.2553191489361701,
190
- "grad_norm": 5.743226528167725,
191
- "learning_rate": 1.8743016759776536e-05,
192
- "loss": 1.4041,
193
  "step": 250
194
  },
195
  {
196
- "epoch": 1.3053817271589487,
197
- "grad_norm": 6.164392471313477,
198
- "learning_rate": 1.8603351955307266e-05,
199
- "loss": 1.3667,
200
  "step": 260
201
  },
202
  {
203
- "epoch": 1.355444305381727,
204
- "grad_norm": 3.474940299987793,
205
- "learning_rate": 1.846368715083799e-05,
206
- "loss": 1.2949,
207
  "step": 270
208
  },
209
  {
210
- "epoch": 1.4055068836045057,
211
- "grad_norm": 4.262696266174316,
212
- "learning_rate": 1.8324022346368716e-05,
213
- "loss": 1.2814,
214
  "step": 280
215
  },
216
  {
217
- "epoch": 1.455569461827284,
218
- "grad_norm": 7.003787994384766,
219
- "learning_rate": 1.8184357541899442e-05,
220
- "loss": 1.2096,
221
  "step": 290
222
  },
223
  {
224
- "epoch": 1.5056320400500627,
225
- "grad_norm": 3.957937240600586,
226
- "learning_rate": 1.804469273743017e-05,
227
- "loss": 1.183,
228
  "step": 300
229
  },
230
  {
231
- "epoch": 1.555694618272841,
232
- "grad_norm": 4.9982991218566895,
233
- "learning_rate": 1.7905027932960895e-05,
234
- "loss": 1.2105,
 
 
 
 
 
 
 
 
 
235
  "step": 310
236
  },
237
  {
238
- "epoch": 1.6057571964956194,
239
- "grad_norm": 3.900266647338867,
240
- "learning_rate": 1.776536312849162e-05,
241
- "loss": 1.1167,
242
  "step": 320
243
  },
244
  {
245
- "epoch": 1.655819774718398,
246
- "grad_norm": 4.397927284240723,
247
- "learning_rate": 1.7625698324022348e-05,
248
- "loss": 1.144,
249
  "step": 330
250
  },
251
  {
252
- "epoch": 1.7058823529411766,
253
- "grad_norm": 3.0843544006347656,
254
- "learning_rate": 1.7486033519553075e-05,
255
- "loss": 1.101,
256
  "step": 340
257
  },
258
  {
259
- "epoch": 1.7559449311639548,
260
- "grad_norm": 3.6377310752868652,
261
- "learning_rate": 1.73463687150838e-05,
262
- "loss": 1.0482,
263
  "step": 350
264
  },
265
  {
266
- "epoch": 1.8060075093867334,
267
- "grad_norm": 2.659217357635498,
268
- "learning_rate": 1.7206703910614527e-05,
269
- "loss": 1.0428,
270
  "step": 360
271
  },
272
  {
273
- "epoch": 1.856070087609512,
274
- "grad_norm": 4.358920097351074,
275
- "learning_rate": 1.7067039106145254e-05,
276
- "loss": 1.0483,
277
  "step": 370
278
  },
279
  {
280
- "epoch": 1.9061326658322904,
281
- "grad_norm": 4.7584452629089355,
282
- "learning_rate": 1.6927374301675977e-05,
283
- "loss": 0.9886,
284
  "step": 380
285
  },
286
  {
287
- "epoch": 1.9561952440550687,
288
- "grad_norm": 3.638371229171753,
289
- "learning_rate": 1.6787709497206707e-05,
290
- "loss": 1.0088,
291
  "step": 390
292
  },
293
  {
294
- "epoch": 1.9962453066332917,
295
- "eval_accuracy": 0.83480435422183,
296
- "eval_loss": 0.9595487713813782,
297
- "eval_runtime": 5.0807,
298
- "eval_samples_per_second": 1338.014,
299
- "eval_steps_per_second": 41.924,
300
- "step": 398
301
  },
302
  {
303
- "epoch": 2.0100125156445556,
304
- "grad_norm": 2.447006940841675,
305
- "learning_rate": 1.664804469273743e-05,
306
- "loss": 1.0295,
 
 
307
  "step": 400
308
  },
309
  {
310
- "epoch": 2.0600750938673342,
311
- "grad_norm": 3.891425132751465,
312
- "learning_rate": 1.6508379888268156e-05,
313
- "loss": 0.9607,
314
  "step": 410
315
  },
316
  {
317
- "epoch": 2.110137672090113,
318
- "grad_norm": 3.9283361434936523,
319
- "learning_rate": 1.6368715083798886e-05,
320
- "loss": 0.9797,
321
  "step": 420
322
  },
323
  {
324
- "epoch": 2.160200250312891,
325
- "grad_norm": 3.9875011444091797,
326
- "learning_rate": 1.622905027932961e-05,
327
- "loss": 0.923,
328
  "step": 430
329
  },
330
  {
331
- "epoch": 2.2102628285356696,
332
- "grad_norm": 2.9517266750335693,
333
- "learning_rate": 1.6089385474860336e-05,
334
- "loss": 0.872,
335
  "step": 440
336
  },
337
  {
338
- "epoch": 2.260325406758448,
339
- "grad_norm": 6.958038330078125,
340
- "learning_rate": 1.5949720670391062e-05,
341
- "loss": 0.8477,
342
  "step": 450
343
  },
344
  {
345
- "epoch": 2.3103879849812268,
346
- "grad_norm": 2.592712640762329,
347
- "learning_rate": 1.581005586592179e-05,
348
- "loss": 0.803,
349
  "step": 460
350
  },
351
  {
352
- "epoch": 2.360450563204005,
353
- "grad_norm": 3.0653076171875,
354
- "learning_rate": 1.5670391061452515e-05,
355
- "loss": 0.7824,
356
  "step": 470
357
  },
358
  {
359
- "epoch": 2.4105131414267835,
360
- "grad_norm": 4.134679794311523,
361
- "learning_rate": 1.553072625698324e-05,
362
- "loss": 0.7681,
363
  "step": 480
364
  },
365
  {
366
- "epoch": 2.460575719649562,
367
- "grad_norm": 2.6600096225738525,
368
- "learning_rate": 1.5391061452513968e-05,
369
- "loss": 0.6916,
370
  "step": 490
371
  },
372
  {
373
- "epoch": 2.5106382978723403,
374
- "grad_norm": 2.778372287750244,
375
- "learning_rate": 1.5251396648044694e-05,
376
- "loss": 0.7158,
377
  "step": 500
378
  },
379
  {
380
- "epoch": 2.560700876095119,
381
- "grad_norm": 2.7881994247436523,
382
- "learning_rate": 1.5111731843575421e-05,
383
- "loss": 0.689,
 
 
 
 
 
 
 
 
 
384
  "step": 510
385
  },
386
  {
387
- "epoch": 2.6107634543178975,
388
- "grad_norm": 3.771796226501465,
389
- "learning_rate": 1.4972067039106146e-05,
390
- "loss": 0.6383,
391
  "step": 520
392
  },
393
  {
394
- "epoch": 2.660826032540676,
395
- "grad_norm": 3.580981731414795,
396
- "learning_rate": 1.4832402234636874e-05,
397
- "loss": 0.6086,
398
  "step": 530
399
  },
400
  {
401
- "epoch": 2.710888610763454,
402
- "grad_norm": 2.298293352127075,
403
- "learning_rate": 1.4692737430167599e-05,
404
- "loss": 0.581,
405
  "step": 540
406
  },
407
  {
408
- "epoch": 2.760951188986233,
409
- "grad_norm": 3.767306327819824,
410
- "learning_rate": 1.4553072625698325e-05,
411
- "loss": 0.5556,
412
  "step": 550
413
  },
414
  {
415
- "epoch": 2.8110137672090114,
416
- "grad_norm": 3.712104558944702,
417
- "learning_rate": 1.4413407821229052e-05,
418
- "loss": 0.5637,
419
  "step": 560
420
  },
421
  {
422
- "epoch": 2.8610763454317896,
423
- "grad_norm": 2.500260829925537,
424
- "learning_rate": 1.4273743016759778e-05,
425
- "loss": 0.525,
426
  "step": 570
427
  },
428
  {
429
- "epoch": 2.911138923654568,
430
- "grad_norm": 3.4421885013580322,
431
- "learning_rate": 1.4134078212290503e-05,
432
- "loss": 0.5235,
433
  "step": 580
434
  },
435
  {
436
- "epoch": 2.9612015018773468,
437
- "grad_norm": 3.3740108013153076,
438
- "learning_rate": 1.3994413407821231e-05,
439
- "loss": 0.4964,
440
  "step": 590
441
  },
442
  {
443
- "epoch": 2.9962453066332917,
444
- "eval_accuracy": 0.9727861135628126,
445
- "eval_loss": 0.37298157811164856,
446
- "eval_runtime": 4.944,
447
- "eval_samples_per_second": 1374.987,
448
- "eval_steps_per_second": 43.082,
449
- "step": 597
450
  },
451
  {
452
- "epoch": 3.0150187734668337,
453
- "grad_norm": 5.472856521606445,
454
- "learning_rate": 1.3854748603351957e-05,
455
- "loss": 0.5243,
 
 
456
  "step": 600
457
  },
458
  {
459
- "epoch": 3.065081351689612,
460
- "grad_norm": 3.919224977493286,
461
- "learning_rate": 1.3715083798882682e-05,
462
- "loss": 0.4697,
463
  "step": 610
464
  },
465
  {
466
- "epoch": 3.1151439299123904,
467
- "grad_norm": 2.8592793941497803,
468
- "learning_rate": 1.357541899441341e-05,
469
- "loss": 0.4581,
470
  "step": 620
471
  },
472
  {
473
- "epoch": 3.165206508135169,
474
- "grad_norm": 3.517875909805298,
475
- "learning_rate": 1.3435754189944135e-05,
476
- "loss": 0.4609,
477
  "step": 630
478
  },
479
  {
480
- "epoch": 3.2152690863579476,
481
- "grad_norm": 3.0219643115997314,
482
- "learning_rate": 1.3296089385474861e-05,
483
- "loss": 0.4251,
484
  "step": 640
485
  },
486
  {
487
- "epoch": 3.2653316645807258,
488
- "grad_norm": 4.166126728057861,
489
- "learning_rate": 1.3156424581005586e-05,
490
- "loss": 0.4026,
491
  "step": 650
492
  },
493
  {
494
- "epoch": 3.3153942428035044,
495
- "grad_norm": 2.9157118797302246,
496
- "learning_rate": 1.3016759776536314e-05,
497
- "loss": 0.4101,
498
  "step": 660
499
  },
500
  {
501
- "epoch": 3.365456821026283,
502
- "grad_norm": 2.605928897857666,
503
- "learning_rate": 1.287709497206704e-05,
504
- "loss": 0.3951,
505
  "step": 670
506
  },
507
  {
508
- "epoch": 3.415519399249061,
509
- "grad_norm": 2.282757043838501,
510
- "learning_rate": 1.2737430167597766e-05,
511
- "loss": 0.3876,
512
  "step": 680
513
  },
514
  {
515
- "epoch": 3.4655819774718397,
516
- "grad_norm": 2.8429336547851562,
517
- "learning_rate": 1.2597765363128494e-05,
518
- "loss": 0.3735,
519
  "step": 690
520
  },
521
  {
522
- "epoch": 3.5156445556946183,
523
- "grad_norm": 3.3356730937957764,
524
- "learning_rate": 1.2458100558659219e-05,
525
- "loss": 0.3873,
526
  "step": 700
527
  },
528
  {
529
- "epoch": 3.565707133917397,
530
- "grad_norm": 3.1262238025665283,
531
- "learning_rate": 1.2318435754189945e-05,
532
- "loss": 0.3572,
 
 
 
 
 
 
 
 
 
533
  "step": 710
534
  },
535
  {
536
- "epoch": 3.615769712140175,
537
- "grad_norm": 3.589921712875366,
538
- "learning_rate": 1.2178770949720671e-05,
539
- "loss": 0.3558,
540
  "step": 720
541
  },
542
  {
543
- "epoch": 3.6658322903629537,
544
- "grad_norm": 3.085702896118164,
545
- "learning_rate": 1.2039106145251398e-05,
546
- "loss": 0.3499,
547
  "step": 730
548
  },
549
  {
550
- "epoch": 3.7158948685857323,
551
- "grad_norm": 3.2283403873443604,
552
- "learning_rate": 1.1899441340782123e-05,
553
- "loss": 0.3428,
554
  "step": 740
555
  },
556
  {
557
- "epoch": 3.7659574468085104,
558
- "grad_norm": 2.824887990951538,
559
- "learning_rate": 1.175977653631285e-05,
560
- "loss": 0.3399,
561
  "step": 750
562
  },
563
  {
564
- "epoch": 3.816020025031289,
565
- "grad_norm": 3.595198392868042,
566
- "learning_rate": 1.1620111731843577e-05,
567
- "loss": 0.3264,
568
  "step": 760
569
  },
570
  {
571
- "epoch": 3.8660826032540676,
572
- "grad_norm": 4.541857719421387,
573
- "learning_rate": 1.1480446927374302e-05,
574
- "loss": 0.3322,
575
  "step": 770
576
  },
577
  {
578
- "epoch": 3.916145181476846,
579
- "grad_norm": 3.149718999862671,
580
- "learning_rate": 1.134078212290503e-05,
581
- "loss": 0.3032,
582
  "step": 780
583
  },
584
  {
585
- "epoch": 3.966207759699625,
586
- "grad_norm": 3.3817384243011475,
587
- "learning_rate": 1.1201117318435755e-05,
588
- "loss": 0.3263,
589
  "step": 790
590
  },
591
  {
592
- "epoch": 3.9962453066332917,
593
- "eval_accuracy": 0.9783759929390997,
594
- "eval_loss": 0.21611753106117249,
595
- "eval_runtime": 4.9235,
596
- "eval_samples_per_second": 1380.729,
597
- "eval_steps_per_second": 43.262,
598
- "step": 796
599
  },
600
  {
601
- "epoch": 4.020025031289111,
602
- "grad_norm": 3.4890668392181396,
603
- "learning_rate": 1.1061452513966481e-05,
604
- "loss": 0.3262,
 
 
605
  "step": 800
606
  },
607
  {
608
- "epoch": 4.07008760951189,
609
- "grad_norm": 2.202880382537842,
610
- "learning_rate": 1.0921787709497206e-05,
611
- "loss": 0.3101,
612
  "step": 810
613
  },
614
  {
615
- "epoch": 4.1201501877346685,
616
- "grad_norm": 2.4988725185394287,
617
- "learning_rate": 1.0782122905027934e-05,
618
- "loss": 0.3022,
619
  "step": 820
620
  },
621
  {
622
- "epoch": 4.170212765957447,
623
- "grad_norm": 3.495452404022217,
624
- "learning_rate": 1.0642458100558659e-05,
625
- "loss": 0.2997,
626
  "step": 830
627
  },
628
  {
629
- "epoch": 4.220275344180226,
630
- "grad_norm": 2.8825905323028564,
631
- "learning_rate": 1.0502793296089386e-05,
632
- "loss": 0.3182,
633
  "step": 840
634
  },
635
  {
636
- "epoch": 4.270337922403003,
637
- "grad_norm": 2.3349449634552,
638
- "learning_rate": 1.0363128491620114e-05,
639
- "loss": 0.2884,
640
  "step": 850
641
  },
642
  {
643
- "epoch": 4.320400500625782,
644
- "grad_norm": 3.996652603149414,
645
- "learning_rate": 1.0223463687150838e-05,
646
- "loss": 0.29,
647
  "step": 860
648
  },
649
  {
650
- "epoch": 4.370463078848561,
651
- "grad_norm": 1.9067742824554443,
652
- "learning_rate": 1.0083798882681567e-05,
653
- "loss": 0.2703,
654
  "step": 870
655
  },
656
  {
657
- "epoch": 4.420525657071339,
658
- "grad_norm": 3.410153388977051,
659
- "learning_rate": 9.944134078212291e-06,
660
- "loss": 0.2864,
661
  "step": 880
662
  },
663
  {
664
- "epoch": 4.470588235294118,
665
- "grad_norm": 2.604459285736084,
666
- "learning_rate": 9.804469273743018e-06,
667
- "loss": 0.2814,
668
  "step": 890
669
  },
670
  {
671
- "epoch": 4.520650813516896,
672
- "grad_norm": 1.9956718683242798,
673
- "learning_rate": 9.664804469273744e-06,
674
- "loss": 0.2501,
 
 
 
 
 
 
 
 
 
675
  "step": 900
676
  },
677
  {
678
- "epoch": 4.570713391739675,
679
- "grad_norm": 3.1284875869750977,
680
- "learning_rate": 9.52513966480447e-06,
681
- "loss": 0.2584,
682
  "step": 910
683
  },
684
  {
685
- "epoch": 4.6207759699624535,
686
- "grad_norm": 3.3868837356567383,
687
- "learning_rate": 9.385474860335197e-06,
688
- "loss": 0.2486,
689
  "step": 920
690
  },
691
  {
692
- "epoch": 4.670838548185231,
693
- "grad_norm": 2.5720443725585938,
694
- "learning_rate": 9.245810055865922e-06,
695
- "loss": 0.2624,
696
  "step": 930
697
  },
698
  {
699
- "epoch": 4.72090112640801,
700
- "grad_norm": 3.343977689743042,
701
- "learning_rate": 9.106145251396648e-06,
702
- "loss": 0.2506,
703
  "step": 940
704
  },
705
  {
706
- "epoch": 4.7709637046307884,
707
- "grad_norm": 3.791302442550659,
708
- "learning_rate": 8.966480446927375e-06,
709
- "loss": 0.2712,
710
  "step": 950
711
  },
712
  {
713
- "epoch": 4.821026282853567,
714
- "grad_norm": 3.8650593757629395,
715
- "learning_rate": 8.826815642458101e-06,
716
- "loss": 0.2508,
717
  "step": 960
718
  },
719
  {
720
- "epoch": 4.871088861076346,
721
- "grad_norm": 3.790891408920288,
722
- "learning_rate": 8.687150837988828e-06,
723
- "loss": 0.2653,
724
  "step": 970
725
  },
726
  {
727
- "epoch": 4.921151439299124,
728
- "grad_norm": 3.150134325027466,
729
- "learning_rate": 8.547486033519554e-06,
730
- "loss": 0.2431,
731
  "step": 980
732
  },
733
  {
734
- "epoch": 4.971214017521902,
735
- "grad_norm": 3.999809741973877,
736
- "learning_rate": 8.40782122905028e-06,
737
- "loss": 0.2512,
738
  "step": 990
739
  },
740
  {
741
- "epoch": 4.996245306633291,
742
- "eval_accuracy": 0.979552809649897,
743
- "eval_loss": 0.16172775626182556,
744
- "eval_runtime": 5.0947,
745
- "eval_samples_per_second": 1334.339,
746
- "eval_steps_per_second": 41.809,
747
- "step": 995
748
- },
749
- {
750
- "epoch": 5.025031289111389,
751
- "grad_norm": 3.607999563217163,
752
- "learning_rate": 8.268156424581007e-06,
753
- "loss": 0.261,
754
  "step": 1000
755
  },
756
  {
757
- "epoch": 5.075093867334168,
758
- "grad_norm": 2.384503126144409,
759
- "learning_rate": 8.128491620111732e-06,
760
- "loss": 0.2301,
761
- "step": 1010
762
- },
763
- {
764
- "epoch": 5.1251564455569465,
765
- "grad_norm": 3.3627846240997314,
766
- "learning_rate": 7.988826815642458e-06,
767
- "loss": 0.2363,
768
- "step": 1020
769
- },
770
- {
771
- "epoch": 5.175219023779725,
772
- "grad_norm": 3.3233959674835205,
773
- "learning_rate": 7.849162011173185e-06,
774
- "loss": 0.2275,
775
- "step": 1030
776
- },
777
- {
778
- "epoch": 5.225281602002503,
779
- "grad_norm": 3.21708083152771,
780
- "learning_rate": 7.709497206703911e-06,
781
- "loss": 0.2247,
782
- "step": 1040
783
- },
784
- {
785
- "epoch": 5.275344180225281,
786
- "grad_norm": 2.4232187271118164,
787
- "learning_rate": 7.569832402234637e-06,
788
- "loss": 0.2044,
789
- "step": 1050
790
- },
791
- {
792
- "epoch": 5.32540675844806,
793
- "grad_norm": 2.9417362213134766,
794
- "learning_rate": 7.430167597765364e-06,
795
- "loss": 0.2156,
796
- "step": 1060
797
- },
798
- {
799
- "epoch": 5.375469336670839,
800
- "grad_norm": 5.781579971313477,
801
- "learning_rate": 7.290502793296091e-06,
802
- "loss": 0.2479,
803
- "step": 1070
804
- },
805
- {
806
- "epoch": 5.425531914893617,
807
- "grad_norm": 2.417659044265747,
808
- "learning_rate": 7.150837988826816e-06,
809
- "loss": 0.2349,
810
- "step": 1080
811
- },
812
- {
813
- "epoch": 5.475594493116396,
814
- "grad_norm": 3.229393482208252,
815
- "learning_rate": 7.011173184357543e-06,
816
- "loss": 0.218,
817
- "step": 1090
818
- },
819
- {
820
- "epoch": 5.5256570713391735,
821
- "grad_norm": 3.5707740783691406,
822
- "learning_rate": 6.871508379888268e-06,
823
- "loss": 0.2287,
824
- "step": 1100
825
- },
826
- {
827
- "epoch": 5.575719649561952,
828
- "grad_norm": 2.99178147315979,
829
- "learning_rate": 6.731843575418995e-06,
830
- "loss": 0.2474,
831
- "step": 1110
832
- },
833
- {
834
- "epoch": 5.625782227784731,
835
- "grad_norm": 4.041738986968994,
836
- "learning_rate": 6.592178770949721e-06,
837
- "loss": 0.254,
838
- "step": 1120
839
- },
840
- {
841
- "epoch": 5.675844806007509,
842
- "grad_norm": 3.7862696647644043,
843
- "learning_rate": 6.452513966480447e-06,
844
- "loss": 0.2336,
845
- "step": 1130
846
- },
847
- {
848
- "epoch": 5.725907384230288,
849
- "grad_norm": 3.772305488586426,
850
- "learning_rate": 6.312849162011173e-06,
851
- "loss": 0.2366,
852
- "step": 1140
853
- },
854
- {
855
- "epoch": 5.7759699624530665,
856
- "grad_norm": 3.721914291381836,
857
- "learning_rate": 6.173184357541901e-06,
858
- "loss": 0.2056,
859
- "step": 1150
860
- },
861
- {
862
- "epoch": 5.826032540675845,
863
- "grad_norm": 3.321104049682617,
864
- "learning_rate": 6.033519553072626e-06,
865
- "loss": 0.2083,
866
- "step": 1160
867
- },
868
- {
869
- "epoch": 5.876095118898624,
870
- "grad_norm": 2.396998405456543,
871
- "learning_rate": 5.893854748603353e-06,
872
- "loss": 0.2413,
873
- "step": 1170
874
- },
875
- {
876
- "epoch": 5.926157697121401,
877
- "grad_norm": 2.6875176429748535,
878
- "learning_rate": 5.754189944134078e-06,
879
- "loss": 0.2049,
880
- "step": 1180
881
- },
882
- {
883
- "epoch": 5.97622027534418,
884
- "grad_norm": 3.377181053161621,
885
- "learning_rate": 5.614525139664805e-06,
886
- "loss": 0.214,
887
- "step": 1190
888
- },
889
- {
890
- "epoch": 5.996245306633291,
891
- "eval_accuracy": 0.9807296263606943,
892
- "eval_loss": 0.13626398146152496,
893
- "eval_runtime": 4.9535,
894
- "eval_samples_per_second": 1372.375,
895
- "eval_steps_per_second": 43.0,
896
- "step": 1194
897
- },
898
- {
899
- "epoch": 6.030037546933667,
900
- "grad_norm": 2.5127053260803223,
901
- "learning_rate": 5.474860335195531e-06,
902
- "loss": 0.2374,
903
- "step": 1200
904
- },
905
- {
906
- "epoch": 6.080100125156446,
907
- "grad_norm": 2.9415111541748047,
908
- "learning_rate": 5.335195530726257e-06,
909
- "loss": 0.203,
910
- "step": 1210
911
- },
912
- {
913
- "epoch": 6.130162703379224,
914
- "grad_norm": 1.5115219354629517,
915
- "learning_rate": 5.195530726256983e-06,
916
- "loss": 0.2151,
917
- "step": 1220
918
- },
919
- {
920
- "epoch": 6.180225281602002,
921
- "grad_norm": 1.8936338424682617,
922
- "learning_rate": 5.055865921787711e-06,
923
- "loss": 0.2159,
924
- "step": 1230
925
- },
926
- {
927
- "epoch": 6.230287859824781,
928
- "grad_norm": 2.6654956340789795,
929
- "learning_rate": 4.916201117318436e-06,
930
- "loss": 0.2013,
931
- "step": 1240
932
- },
933
- {
934
- "epoch": 6.280350438047559,
935
- "grad_norm": 3.0140798091888428,
936
- "learning_rate": 4.776536312849163e-06,
937
- "loss": 0.1985,
938
- "step": 1250
939
- },
940
- {
941
- "epoch": 6.330413016270338,
942
- "grad_norm": 3.579763412475586,
943
- "learning_rate": 4.636871508379888e-06,
944
- "loss": 0.207,
945
- "step": 1260
946
- },
947
- {
948
- "epoch": 6.380475594493117,
949
- "grad_norm": 2.5355074405670166,
950
- "learning_rate": 4.497206703910615e-06,
951
- "loss": 0.193,
952
- "step": 1270
953
- },
954
- {
955
- "epoch": 6.430538172715895,
956
- "grad_norm": 2.3967621326446533,
957
- "learning_rate": 4.357541899441341e-06,
958
- "loss": 0.2139,
959
- "step": 1280
960
- },
961
- {
962
- "epoch": 6.480600750938673,
963
- "grad_norm": 2.9278903007507324,
964
- "learning_rate": 4.217877094972068e-06,
965
- "loss": 0.2048,
966
- "step": 1290
967
- },
968
- {
969
- "epoch": 6.5306633291614515,
970
- "grad_norm": 5.488080024719238,
971
- "learning_rate": 4.078212290502794e-06,
972
- "loss": 0.2098,
973
- "step": 1300
974
- },
975
- {
976
- "epoch": 6.58072590738423,
977
- "grad_norm": 4.781853199005127,
978
- "learning_rate": 3.93854748603352e-06,
979
- "loss": 0.208,
980
- "step": 1310
981
- },
982
- {
983
- "epoch": 6.630788485607009,
984
- "grad_norm": 2.9733641147613525,
985
- "learning_rate": 3.798882681564246e-06,
986
- "loss": 0.1923,
987
- "step": 1320
988
- },
989
- {
990
- "epoch": 6.680851063829787,
991
- "grad_norm": 2.823633909225464,
992
- "learning_rate": 3.6592178770949723e-06,
993
- "loss": 0.185,
994
- "step": 1330
995
- },
996
- {
997
- "epoch": 6.730913642052566,
998
- "grad_norm": 2.7353272438049316,
999
- "learning_rate": 3.5195530726256988e-06,
1000
- "loss": 0.1977,
1001
- "step": 1340
1002
- },
1003
- {
1004
- "epoch": 6.7809762202753445,
1005
- "grad_norm": 2.489300489425659,
1006
- "learning_rate": 3.3798882681564248e-06,
1007
- "loss": 0.1999,
1008
- "step": 1350
1009
- },
1010
- {
1011
- "epoch": 6.831038798498122,
1012
- "grad_norm": 2.8995063304901123,
1013
- "learning_rate": 3.240223463687151e-06,
1014
- "loss": 0.2111,
1015
- "step": 1360
1016
- },
1017
- {
1018
- "epoch": 6.881101376720901,
1019
- "grad_norm": 2.4604063034057617,
1020
- "learning_rate": 3.1005586592178773e-06,
1021
- "loss": 0.2043,
1022
- "step": 1370
1023
- },
1024
- {
1025
- "epoch": 6.931163954943679,
1026
- "grad_norm": 2.1975669860839844,
1027
- "learning_rate": 2.9608938547486037e-06,
1028
- "loss": 0.203,
1029
- "step": 1380
1030
- },
1031
- {
1032
- "epoch": 6.981226533166458,
1033
- "grad_norm": 2.8632118701934814,
1034
- "learning_rate": 2.8212290502793298e-06,
1035
- "loss": 0.1826,
1036
- "step": 1390
1037
- },
1038
- {
1039
- "epoch": 6.996245306633291,
1040
- "eval_accuracy": 0.9814651368049426,
1041
- "eval_loss": 0.12438357621431351,
1042
- "eval_runtime": 4.9437,
1043
- "eval_samples_per_second": 1375.072,
1044
- "eval_steps_per_second": 43.085,
1045
- "step": 1393
1046
- },
1047
- {
1048
- "epoch": 7.035043804755945,
1049
- "grad_norm": 2.0548951625823975,
1050
- "learning_rate": 2.6815642458100562e-06,
1051
- "loss": 0.2089,
1052
- "step": 1400
1053
- },
1054
- {
1055
- "epoch": 7.085106382978723,
1056
- "grad_norm": 2.192319631576538,
1057
- "learning_rate": 2.5418994413407823e-06,
1058
- "loss": 0.1803,
1059
- "step": 1410
1060
- },
1061
- {
1062
- "epoch": 7.135168961201502,
1063
- "grad_norm": 3.190675973892212,
1064
- "learning_rate": 2.4022346368715087e-06,
1065
- "loss": 0.1985,
1066
- "step": 1420
1067
- },
1068
- {
1069
- "epoch": 7.18523153942428,
1070
- "grad_norm": 3.319995880126953,
1071
- "learning_rate": 2.2625698324022348e-06,
1072
- "loss": 0.1815,
1073
- "step": 1430
1074
- },
1075
- {
1076
- "epoch": 7.235294117647059,
1077
- "grad_norm": 3.1613974571228027,
1078
- "learning_rate": 2.1229050279329612e-06,
1079
- "loss": 0.1874,
1080
- "step": 1440
1081
- },
1082
- {
1083
- "epoch": 7.2853566958698375,
1084
- "grad_norm": 5.030276298522949,
1085
- "learning_rate": 1.9832402234636873e-06,
1086
- "loss": 0.2316,
1087
- "step": 1450
1088
- },
1089
- {
1090
- "epoch": 7.335419274092616,
1091
- "grad_norm": 4.310720443725586,
1092
- "learning_rate": 1.8435754189944135e-06,
1093
- "loss": 0.187,
1094
- "step": 1460
1095
- },
1096
- {
1097
- "epoch": 7.385481852315394,
1098
- "grad_norm": 3.277670383453369,
1099
- "learning_rate": 1.7039106145251397e-06,
1100
- "loss": 0.189,
1101
- "step": 1470
1102
- },
1103
- {
1104
- "epoch": 7.435544430538172,
1105
- "grad_norm": 2.657017707824707,
1106
- "learning_rate": 1.564245810055866e-06,
1107
- "loss": 0.1963,
1108
- "step": 1480
1109
- },
1110
- {
1111
- "epoch": 7.485607008760951,
1112
- "grad_norm": 1.9806122779846191,
1113
- "learning_rate": 1.4245810055865922e-06,
1114
- "loss": 0.1736,
1115
- "step": 1490
1116
- },
1117
- {
1118
- "epoch": 7.53566958698373,
1119
- "grad_norm": 2.318047285079956,
1120
- "learning_rate": 1.2849162011173185e-06,
1121
- "loss": 0.192,
1122
- "step": 1500
1123
- },
1124
- {
1125
- "epoch": 7.585732165206508,
1126
- "grad_norm": 1.8546510934829712,
1127
- "learning_rate": 1.1452513966480447e-06,
1128
- "loss": 0.1915,
1129
- "step": 1510
1130
- },
1131
- {
1132
- "epoch": 7.635794743429287,
1133
- "grad_norm": 2.3725483417510986,
1134
- "learning_rate": 1.005586592178771e-06,
1135
- "loss": 0.1924,
1136
- "step": 1520
1137
- },
1138
- {
1139
- "epoch": 7.685857321652065,
1140
- "grad_norm": 2.3467774391174316,
1141
- "learning_rate": 8.659217877094973e-07,
1142
- "loss": 0.1997,
1143
- "step": 1530
1144
- },
1145
- {
1146
- "epoch": 7.735919899874844,
1147
- "grad_norm": 2.653728485107422,
1148
- "learning_rate": 7.262569832402236e-07,
1149
- "loss": 0.2081,
1150
- "step": 1540
1151
- },
1152
- {
1153
- "epoch": 7.785982478097622,
1154
- "grad_norm": 3.015143394470215,
1155
- "learning_rate": 5.865921787709498e-07,
1156
- "loss": 0.2194,
1157
- "step": 1550
1158
- },
1159
- {
1160
- "epoch": 7.8360450563204,
1161
- "grad_norm": 1.836916446685791,
1162
- "learning_rate": 4.46927374301676e-07,
1163
- "loss": 0.2127,
1164
- "step": 1560
1165
- },
1166
- {
1167
- "epoch": 7.886107634543179,
1168
- "grad_norm": 1.9544241428375244,
1169
- "learning_rate": 3.0726256983240227e-07,
1170
- "loss": 0.1995,
1171
- "step": 1570
1172
- },
1173
- {
1174
- "epoch": 7.9361702127659575,
1175
- "grad_norm": 1.683449625968933,
1176
- "learning_rate": 1.6759776536312851e-07,
1177
- "loss": 0.1974,
1178
- "step": 1580
1179
- },
1180
- {
1181
- "epoch": 7.986232790988736,
1182
- "grad_norm": 2.348433494567871,
1183
- "learning_rate": 2.793296089385475e-08,
1184
- "loss": 0.1982,
1185
- "step": 1590
1186
- },
1187
- {
1188
- "epoch": 7.996245306633291,
1189
- "eval_accuracy": 0.9814651368049426,
1190
- "eval_loss": 0.12064122408628464,
1191
- "eval_runtime": 5.5563,
1192
- "eval_samples_per_second": 1223.467,
1193
- "eval_steps_per_second": 38.335,
1194
- "step": 1592
1195
  },
1196
  {
1197
- "epoch": 7.996245306633291,
1198
- "step": 1592,
1199
- "total_flos": 3.777723239743488e+18,
1200
- "train_loss": 0.726146658611058,
1201
- "train_runtime": 635.8846,
1202
- "train_samples_per_second": 642.808,
1203
- "train_steps_per_second": 2.504
1204
  }
1205
  ],
1206
  "logging_steps": 10,
1207
- "max_steps": 1592,
1208
  "num_input_tokens_seen": 0,
1209
- "num_train_epochs": 8,
1210
  "save_steps": 500,
1211
  "stateful_callbacks": {
1212
  "TrainerControl": {
@@ -1220,8 +825,8 @@
1220
  "attributes": {}
1221
  }
1222
  },
1223
- "total_flos": 3.777723239743488e+18,
1224
- "train_batch_size": 64,
1225
  "trial_name": null,
1226
  "trial_params": null
1227
  }
 
1
  {
2
+ "best_metric": 0.9811709326272433,
3
+ "best_model_checkpoint": "wav2vec2-base-ft-keyword-spotting/checkpoint-1000",
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.1,
13
+ "grad_norm": 2.6891753673553467,
14
+ "learning_rate": 3e-06,
15
+ "loss": 4.1933,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.2,
20
+ "grad_norm": 3.1144065856933594,
21
+ "learning_rate": 6e-06,
22
+ "loss": 4.1225,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.3,
27
+ "grad_norm": 4.312869071960449,
28
+ "learning_rate": 9e-06,
29
+ "loss": 3.8887,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.4,
34
+ "grad_norm": 5.83105993270874,
35
+ "learning_rate": 1.2e-05,
36
+ "loss": 3.4137,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 0.5,
41
+ "grad_norm": 5.6602678298950195,
42
+ "learning_rate": 1.5e-05,
43
+ "loss": 2.7921,
44
  "step": 50
45
  },
46
  {
47
+ "epoch": 0.6,
48
+ "grad_norm": 4.747939586639404,
49
+ "learning_rate": 1.8e-05,
50
+ "loss": 2.3948,
51
  "step": 60
52
  },
53
  {
54
+ "epoch": 0.7,
55
+ "grad_norm": 4.421939849853516,
56
+ "learning_rate": 2.1e-05,
57
+ "loss": 2.1514,
58
  "step": 70
59
  },
60
  {
61
+ "epoch": 0.8,
62
+ "grad_norm": 3.393974542617798,
63
+ "learning_rate": 2.4e-05,
64
+ "loss": 2.0222,
65
  "step": 80
66
  },
67
  {
68
+ "epoch": 0.9,
69
+ "grad_norm": 2.5288150310516357,
70
+ "learning_rate": 2.7000000000000002e-05,
71
+ "loss": 1.8712,
72
  "step": 90
73
  },
74
  {
75
+ "epoch": 1.0,
76
+ "grad_norm": 1.0866141319274902,
77
+ "learning_rate": 3e-05,
78
+ "loss": 1.8178,
79
  "step": 100
80
  },
81
  {
82
+ "epoch": 1.0,
83
+ "eval_accuracy": 0.6209179170344219,
84
+ "eval_loss": 1.7386223077774048,
85
+ "eval_runtime": 5.0695,
86
+ "eval_samples_per_second": 1340.971,
87
+ "eval_steps_per_second": 42.016,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 1.1,
92
+ "grad_norm": 0.4499356150627136,
93
+ "learning_rate": 2.966666666666667e-05,
94
+ "loss": 1.7568,
95
  "step": 110
96
  },
97
  {
98
+ "epoch": 1.2,
99
+ "grad_norm": 5.13858699798584,
100
+ "learning_rate": 2.9333333333333333e-05,
101
+ "loss": 1.7009,
102
  "step": 120
103
  },
104
  {
105
+ "epoch": 1.3,
106
+ "grad_norm": 1.0781441926956177,
107
+ "learning_rate": 2.9e-05,
108
+ "loss": 1.7067,
109
  "step": 130
110
  },
111
  {
112
+ "epoch": 1.4,
113
+ "grad_norm": 2.4858882427215576,
114
+ "learning_rate": 2.8666666666666668e-05,
115
+ "loss": 1.6577,
116
  "step": 140
117
  },
118
  {
119
+ "epoch": 1.5,
120
+ "grad_norm": 3.1494271755218506,
121
+ "learning_rate": 2.8333333333333332e-05,
122
+ "loss": 1.5608,
123
  "step": 150
124
  },
125
  {
126
+ "epoch": 1.6,
127
+ "grad_norm": 2.8912174701690674,
128
+ "learning_rate": 2.8e-05,
129
+ "loss": 1.5741,
130
  "step": 160
131
  },
132
  {
133
+ "epoch": 1.7,
134
+ "grad_norm": 1.4826748371124268,
135
+ "learning_rate": 2.766666666666667e-05,
136
+ "loss": 1.5054,
137
  "step": 170
138
  },
139
  {
140
+ "epoch": 1.8,
141
+ "grad_norm": 5.051352024078369,
142
+ "learning_rate": 2.7333333333333335e-05,
143
+ "loss": 1.4572,
144
  "step": 180
145
  },
146
  {
147
+ "epoch": 1.9,
148
+ "grad_norm": 2.021836042404175,
149
+ "learning_rate": 2.7000000000000002e-05,
150
+ "loss": 1.3724,
151
  "step": 190
152
  },
153
  {
154
+ "epoch": 2.0,
155
+ "grad_norm": 3.871109962463379,
156
+ "learning_rate": 2.6666666666666667e-05,
157
+ "loss": 1.3138,
158
+ "step": 200
 
 
159
  },
160
  {
161
+ "epoch": 2.0,
162
+ "eval_accuracy": 0.6510738452486026,
163
+ "eval_loss": 1.1778711080551147,
164
+ "eval_runtime": 5.0951,
165
+ "eval_samples_per_second": 1334.21,
166
+ "eval_steps_per_second": 41.804,
167
  "step": 200
168
  },
169
  {
170
+ "epoch": 2.1,
171
+ "grad_norm": 5.006422519683838,
172
+ "learning_rate": 2.6333333333333334e-05,
173
+ "loss": 1.2749,
174
  "step": 210
175
  },
176
  {
177
+ "epoch": 2.2,
178
+ "grad_norm": 2.877138614654541,
179
+ "learning_rate": 2.6000000000000002e-05,
180
+ "loss": 1.2645,
181
  "step": 220
182
  },
183
  {
184
+ "epoch": 2.3,
185
+ "grad_norm": 4.362603187561035,
186
+ "learning_rate": 2.5666666666666666e-05,
187
+ "loss": 1.1934,
188
  "step": 230
189
  },
190
  {
191
+ "epoch": 2.4,
192
+ "grad_norm": 3.1135756969451904,
193
+ "learning_rate": 2.5333333333333334e-05,
194
+ "loss": 1.153,
195
  "step": 240
196
  },
197
  {
198
+ "epoch": 2.5,
199
+ "grad_norm": 4.02156400680542,
200
+ "learning_rate": 2.5e-05,
201
+ "loss": 1.0804,
202
  "step": 250
203
  },
204
  {
205
+ "epoch": 2.6,
206
+ "grad_norm": 3.90006160736084,
207
+ "learning_rate": 2.4666666666666665e-05,
208
+ "loss": 1.0651,
209
  "step": 260
210
  },
211
  {
212
+ "epoch": 2.7,
213
+ "grad_norm": 2.3373022079467773,
214
+ "learning_rate": 2.4333333333333333e-05,
215
+ "loss": 1.0293,
216
  "step": 270
217
  },
218
  {
219
+ "epoch": 2.8,
220
+ "grad_norm": 2.3652572631835938,
221
+ "learning_rate": 2.4e-05,
222
+ "loss": 1.0356,
223
  "step": 280
224
  },
225
  {
226
+ "epoch": 2.9,
227
+ "grad_norm": 2.4576399326324463,
228
+ "learning_rate": 2.3666666666666665e-05,
229
+ "loss": 0.9779,
230
  "step": 290
231
  },
232
  {
233
+ "epoch": 3.0,
234
+ "grad_norm": 4.176488876342773,
235
+ "learning_rate": 2.3333333333333336e-05,
236
+ "loss": 0.9632,
237
  "step": 300
238
  },
239
  {
240
+ "epoch": 3.0,
241
+ "eval_accuracy": 0.8679023242130038,
242
+ "eval_loss": 0.8326017260551453,
243
+ "eval_runtime": 5.1431,
244
+ "eval_samples_per_second": 1321.771,
245
+ "eval_steps_per_second": 41.415,
246
+ "step": 300
247
+ },
248
+ {
249
+ "epoch": 3.1,
250
+ "grad_norm": 2.0200586318969727,
251
+ "learning_rate": 2.3000000000000003e-05,
252
+ "loss": 0.8637,
253
  "step": 310
254
  },
255
  {
256
+ "epoch": 3.2,
257
+ "grad_norm": 2.459822416305542,
258
+ "learning_rate": 2.2666666666666668e-05,
259
+ "loss": 0.8304,
260
  "step": 320
261
  },
262
  {
263
+ "epoch": 3.3,
264
+ "grad_norm": 2.8096423149108887,
265
+ "learning_rate": 2.2333333333333335e-05,
266
+ "loss": 0.7811,
267
  "step": 330
268
  },
269
  {
270
+ "epoch": 3.4,
271
+ "grad_norm": 2.188521385192871,
272
+ "learning_rate": 2.2e-05,
273
+ "loss": 0.7311,
274
  "step": 340
275
  },
276
  {
277
+ "epoch": 3.5,
278
+ "grad_norm": 2.1101114749908447,
279
+ "learning_rate": 2.1666666666666667e-05,
280
+ "loss": 0.6899,
281
  "step": 350
282
  },
283
  {
284
+ "epoch": 3.6,
285
+ "grad_norm": 3.0662026405334473,
286
+ "learning_rate": 2.1333333333333335e-05,
287
+ "loss": 0.6275,
288
  "step": 360
289
  },
290
  {
291
+ "epoch": 3.7,
292
+ "grad_norm": 2.7955899238586426,
293
+ "learning_rate": 2.1e-05,
294
+ "loss": 0.5982,
295
  "step": 370
296
  },
297
  {
298
+ "epoch": 3.8,
299
+ "grad_norm": 2.069448947906494,
300
+ "learning_rate": 2.0666666666666666e-05,
301
+ "loss": 0.5618,
302
  "step": 380
303
  },
304
  {
305
+ "epoch": 3.9,
306
+ "grad_norm": 2.3345484733581543,
307
+ "learning_rate": 2.0333333333333334e-05,
308
+ "loss": 0.5223,
309
  "step": 390
310
  },
311
  {
312
+ "epoch": 4.0,
313
+ "grad_norm": 2.2423112392425537,
314
+ "learning_rate": 1.9999999999999998e-05,
315
+ "loss": 0.499,
316
+ "step": 400
 
 
317
  },
318
  {
319
+ "epoch": 4.0,
320
+ "eval_accuracy": 0.9724919093851133,
321
+ "eval_loss": 0.3696895241737366,
322
+ "eval_runtime": 4.9717,
323
+ "eval_samples_per_second": 1367.332,
324
+ "eval_steps_per_second": 42.842,
325
  "step": 400
326
  },
327
  {
328
+ "epoch": 4.1,
329
+ "grad_norm": 2.581549644470215,
330
+ "learning_rate": 1.9666666666666666e-05,
331
+ "loss": 0.4643,
332
  "step": 410
333
  },
334
  {
335
+ "epoch": 4.2,
336
+ "grad_norm": 2.2802562713623047,
337
+ "learning_rate": 1.9333333333333333e-05,
338
+ "loss": 0.4679,
339
  "step": 420
340
  },
341
  {
342
+ "epoch": 4.3,
343
+ "grad_norm": 2.000401496887207,
344
+ "learning_rate": 1.9e-05,
345
+ "loss": 0.4143,
346
  "step": 430
347
  },
348
  {
349
+ "epoch": 4.4,
350
+ "grad_norm": 1.913470983505249,
351
+ "learning_rate": 1.866666666666667e-05,
352
+ "loss": 0.3969,
353
  "step": 440
354
  },
355
  {
356
+ "epoch": 4.5,
357
+ "grad_norm": 1.812354564666748,
358
+ "learning_rate": 1.8333333333333336e-05,
359
+ "loss": 0.3903,
360
  "step": 450
361
  },
362
  {
363
+ "epoch": 4.6,
364
+ "grad_norm": 1.731414556503296,
365
+ "learning_rate": 1.8e-05,
366
+ "loss": 0.3536,
367
  "step": 460
368
  },
369
  {
370
+ "epoch": 4.7,
371
+ "grad_norm": 2.4097819328308105,
372
+ "learning_rate": 1.7666666666666668e-05,
373
+ "loss": 0.3537,
374
  "step": 470
375
  },
376
  {
377
+ "epoch": 4.8,
378
+ "grad_norm": 2.2438652515411377,
379
+ "learning_rate": 1.7333333333333332e-05,
380
+ "loss": 0.3417,
381
  "step": 480
382
  },
383
  {
384
+ "epoch": 4.9,
385
+ "grad_norm": 1.8313252925872803,
386
+ "learning_rate": 1.7e-05,
387
+ "loss": 0.3269,
388
  "step": 490
389
  },
390
  {
391
+ "epoch": 5.0,
392
+ "grad_norm": 3.7112584114074707,
393
+ "learning_rate": 1.6666666666666667e-05,
394
+ "loss": 0.3191,
395
  "step": 500
396
  },
397
  {
398
+ "epoch": 5.0,
399
+ "eval_accuracy": 0.9760223595175052,
400
+ "eval_loss": 0.22396039962768555,
401
+ "eval_runtime": 5.0015,
402
+ "eval_samples_per_second": 1359.188,
403
+ "eval_steps_per_second": 42.587,
404
+ "step": 500
405
+ },
406
+ {
407
+ "epoch": 5.1,
408
+ "grad_norm": 2.2409660816192627,
409
+ "learning_rate": 1.633333333333333e-05,
410
+ "loss": 0.3044,
411
  "step": 510
412
  },
413
  {
414
+ "epoch": 5.2,
415
+ "grad_norm": 1.6726042032241821,
416
+ "learning_rate": 1.6e-05,
417
+ "loss": 0.2886,
418
  "step": 520
419
  },
420
  {
421
+ "epoch": 5.3,
422
+ "grad_norm": 2.055961847305298,
423
+ "learning_rate": 1.5666666666666667e-05,
424
+ "loss": 0.2745,
425
  "step": 530
426
  },
427
  {
428
+ "epoch": 5.4,
429
+ "grad_norm": 2.2671661376953125,
430
+ "learning_rate": 1.533333333333333e-05,
431
+ "loss": 0.2943,
432
  "step": 540
433
  },
434
  {
435
+ "epoch": 5.5,
436
+ "grad_norm": 2.010511636734009,
437
+ "learning_rate": 1.5e-05,
438
+ "loss": 0.246,
439
  "step": 550
440
  },
441
  {
442
+ "epoch": 5.6,
443
+ "grad_norm": 2.027716636657715,
444
+ "learning_rate": 1.4666666666666666e-05,
445
+ "loss": 0.2962,
446
  "step": 560
447
  },
448
  {
449
+ "epoch": 5.7,
450
+ "grad_norm": 1.839068055152893,
451
+ "learning_rate": 1.4333333333333334e-05,
452
+ "loss": 0.2817,
453
  "step": 570
454
  },
455
  {
456
+ "epoch": 5.8,
457
+ "grad_norm": 2.8274471759796143,
458
+ "learning_rate": 1.4e-05,
459
+ "loss": 0.2512,
460
  "step": 580
461
  },
462
  {
463
+ "epoch": 5.9,
464
+ "grad_norm": 2.0226399898529053,
465
+ "learning_rate": 1.3666666666666667e-05,
466
+ "loss": 0.2756,
467
  "step": 590
468
  },
469
  {
470
+ "epoch": 6.0,
471
+ "grad_norm": 2.212151050567627,
472
+ "learning_rate": 1.3333333333333333e-05,
473
+ "loss": 0.242,
474
+ "step": 600
 
 
475
  },
476
  {
477
+ "epoch": 6.0,
478
+ "eval_accuracy": 0.9792586054721977,
479
+ "eval_loss": 0.1708967536687851,
480
+ "eval_runtime": 4.9642,
481
+ "eval_samples_per_second": 1369.415,
482
+ "eval_steps_per_second": 42.908,
483
  "step": 600
484
  },
485
  {
486
+ "epoch": 6.1,
487
+ "grad_norm": 2.019993305206299,
488
+ "learning_rate": 1.3000000000000001e-05,
489
+ "loss": 0.2557,
490
  "step": 610
491
  },
492
  {
493
+ "epoch": 6.2,
494
+ "grad_norm": 2.0448718070983887,
495
+ "learning_rate": 1.2666666666666667e-05,
496
+ "loss": 0.2455,
497
  "step": 620
498
  },
499
  {
500
+ "epoch": 6.3,
501
+ "grad_norm": 2.4769129753112793,
502
+ "learning_rate": 1.2333333333333333e-05,
503
+ "loss": 0.2435,
504
  "step": 630
505
  },
506
  {
507
+ "epoch": 6.4,
508
+ "grad_norm": 2.217947483062744,
509
+ "learning_rate": 1.2e-05,
510
+ "loss": 0.2392,
511
  "step": 640
512
  },
513
  {
514
+ "epoch": 6.5,
515
+ "grad_norm": 2.198836326599121,
516
+ "learning_rate": 1.1666666666666668e-05,
517
+ "loss": 0.2194,
518
  "step": 650
519
  },
520
  {
521
+ "epoch": 6.6,
522
+ "grad_norm": 2.2629637718200684,
523
+ "learning_rate": 1.1333333333333334e-05,
524
+ "loss": 0.2165,
525
  "step": 660
526
  },
527
  {
528
+ "epoch": 6.7,
529
+ "grad_norm": 1.9289292097091675,
530
+ "learning_rate": 1.1e-05,
531
+ "loss": 0.2264,
532
  "step": 670
533
  },
534
  {
535
+ "epoch": 6.8,
536
+ "grad_norm": 2.585972547531128,
537
+ "learning_rate": 1.0666666666666667e-05,
538
+ "loss": 0.2254,
539
  "step": 680
540
  },
541
  {
542
+ "epoch": 6.9,
543
+ "grad_norm": 2.3876583576202393,
544
+ "learning_rate": 1.0333333333333333e-05,
545
+ "loss": 0.2264,
546
  "step": 690
547
  },
548
  {
549
+ "epoch": 7.0,
550
+ "grad_norm": 2.297879695892334,
551
+ "learning_rate": 9.999999999999999e-06,
552
+ "loss": 0.2144,
553
  "step": 700
554
  },
555
  {
556
+ "epoch": 7.0,
557
+ "eval_accuracy": 0.9805825242718447,
558
+ "eval_loss": 0.14598147571086884,
559
+ "eval_runtime": 4.9644,
560
+ "eval_samples_per_second": 1369.359,
561
+ "eval_steps_per_second": 42.906,
562
+ "step": 700
563
+ },
564
+ {
565
+ "epoch": 7.1,
566
+ "grad_norm": 2.2874860763549805,
567
+ "learning_rate": 9.666666666666667e-06,
568
+ "loss": 0.2217,
569
  "step": 710
570
  },
571
  {
572
+ "epoch": 7.2,
573
+ "grad_norm": 1.6444002389907837,
574
+ "learning_rate": 9.333333333333334e-06,
575
+ "loss": 0.2048,
576
  "step": 720
577
  },
578
  {
579
+ "epoch": 7.3,
580
+ "grad_norm": 2.211688280105591,
581
+ "learning_rate": 9e-06,
582
+ "loss": 0.2122,
583
  "step": 730
584
  },
585
  {
586
+ "epoch": 7.4,
587
+ "grad_norm": 2.334533929824829,
588
+ "learning_rate": 8.666666666666666e-06,
589
+ "loss": 0.2142,
590
  "step": 740
591
  },
592
  {
593
+ "epoch": 7.5,
594
+ "grad_norm": 1.83983314037323,
595
+ "learning_rate": 8.333333333333334e-06,
596
+ "loss": 0.1873,
597
  "step": 750
598
  },
599
  {
600
+ "epoch": 7.6,
601
+ "grad_norm": 1.8291276693344116,
602
+ "learning_rate": 8e-06,
603
+ "loss": 0.2003,
604
  "step": 760
605
  },
606
  {
607
+ "epoch": 7.7,
608
+ "grad_norm": 2.4695987701416016,
609
+ "learning_rate": 7.666666666666666e-06,
610
+ "loss": 0.2075,
611
  "step": 770
612
  },
613
  {
614
+ "epoch": 7.8,
615
+ "grad_norm": 1.7656598091125488,
616
+ "learning_rate": 7.333333333333333e-06,
617
+ "loss": 0.2089,
618
  "step": 780
619
  },
620
  {
621
+ "epoch": 7.9,
622
+ "grad_norm": 1.8952357769012451,
623
+ "learning_rate": 7e-06,
624
+ "loss": 0.2013,
625
  "step": 790
626
  },
627
  {
628
+ "epoch": 8.0,
629
+ "grad_norm": 1.6189404726028442,
630
+ "learning_rate": 6.666666666666667e-06,
631
+ "loss": 0.1977,
632
+ "step": 800
 
 
633
  },
634
  {
635
+ "epoch": 8.0,
636
+ "eval_accuracy": 0.979994115916446,
637
+ "eval_loss": 0.13441701233386993,
638
+ "eval_runtime": 5.0435,
639
+ "eval_samples_per_second": 1347.881,
640
+ "eval_steps_per_second": 42.233,
641
  "step": 800
642
  },
643
  {
644
+ "epoch": 8.1,
645
+ "grad_norm": 2.2733678817749023,
646
+ "learning_rate": 6.333333333333333e-06,
647
+ "loss": 0.1908,
648
  "step": 810
649
  },
650
  {
651
+ "epoch": 8.2,
652
+ "grad_norm": 1.6874712705612183,
653
+ "learning_rate": 6e-06,
654
+ "loss": 0.1819,
655
  "step": 820
656
  },
657
  {
658
+ "epoch": 8.3,
659
+ "grad_norm": 1.4649100303649902,
660
+ "learning_rate": 5.666666666666667e-06,
661
+ "loss": 0.2037,
662
  "step": 830
663
  },
664
  {
665
+ "epoch": 8.4,
666
+ "grad_norm": 1.7231727838516235,
667
+ "learning_rate": 5.333333333333334e-06,
668
+ "loss": 0.1935,
669
  "step": 840
670
  },
671
  {
672
+ "epoch": 8.5,
673
+ "grad_norm": 1.6908072233200073,
674
+ "learning_rate": 4.9999999999999996e-06,
675
+ "loss": 0.1957,
676
  "step": 850
677
  },
678
  {
679
+ "epoch": 8.6,
680
+ "grad_norm": 2.1523678302764893,
681
+ "learning_rate": 4.666666666666667e-06,
682
+ "loss": 0.1955,
683
  "step": 860
684
  },
685
  {
686
+ "epoch": 8.7,
687
+ "grad_norm": 2.5662710666656494,
688
+ "learning_rate": 4.333333333333333e-06,
689
+ "loss": 0.2008,
690
  "step": 870
691
  },
692
  {
693
+ "epoch": 8.8,
694
+ "grad_norm": 1.984096884727478,
695
+ "learning_rate": 4e-06,
696
+ "loss": 0.186,
697
  "step": 880
698
  },
699
  {
700
+ "epoch": 8.9,
701
+ "grad_norm": 1.4925553798675537,
702
+ "learning_rate": 3.6666666666666666e-06,
703
+ "loss": 0.1905,
704
  "step": 890
705
  },
706
  {
707
+ "epoch": 9.0,
708
+ "grad_norm": 4.499406814575195,
709
+ "learning_rate": 3.3333333333333333e-06,
710
+ "loss": 0.1742,
711
+ "step": 900
712
+ },
713
+ {
714
+ "epoch": 9.0,
715
+ "eval_accuracy": 0.9796999117387467,
716
+ "eval_loss": 0.12820769846439362,
717
+ "eval_runtime": 5.0266,
718
+ "eval_samples_per_second": 1352.417,
719
+ "eval_steps_per_second": 42.375,
720
  "step": 900
721
  },
722
  {
723
+ "epoch": 9.1,
724
+ "grad_norm": 1.9934319257736206,
725
+ "learning_rate": 3e-06,
726
+ "loss": 0.1915,
727
  "step": 910
728
  },
729
  {
730
+ "epoch": 9.2,
731
+ "grad_norm": 1.9417580366134644,
732
+ "learning_rate": 2.666666666666667e-06,
733
+ "loss": 0.1759,
734
  "step": 920
735
  },
736
  {
737
+ "epoch": 9.3,
738
+ "grad_norm": 2.1848134994506836,
739
+ "learning_rate": 2.3333333333333336e-06,
740
+ "loss": 0.1811,
741
  "step": 930
742
  },
743
  {
744
+ "epoch": 9.4,
745
+ "grad_norm": 1.6830646991729736,
746
+ "learning_rate": 2e-06,
747
+ "loss": 0.1905,
748
  "step": 940
749
  },
750
  {
751
+ "epoch": 9.5,
752
+ "grad_norm": 2.51246976852417,
753
+ "learning_rate": 1.6666666666666667e-06,
754
+ "loss": 0.1889,
755
  "step": 950
756
  },
757
  {
758
+ "epoch": 9.6,
759
+ "grad_norm": 2.315727949142456,
760
+ "learning_rate": 1.3333333333333334e-06,
761
+ "loss": 0.1755,
762
  "step": 960
763
  },
764
  {
765
+ "epoch": 9.7,
766
+ "grad_norm": 1.6819000244140625,
767
+ "learning_rate": 1e-06,
768
+ "loss": 0.1717,
769
  "step": 970
770
  },
771
  {
772
+ "epoch": 9.8,
773
+ "grad_norm": 1.7445892095565796,
774
+ "learning_rate": 6.666666666666667e-07,
775
+ "loss": 0.1756,
776
  "step": 980
777
  },
778
  {
779
+ "epoch": 9.9,
780
+ "grad_norm": 1.542716145515442,
781
+ "learning_rate": 3.3333333333333335e-07,
782
+ "loss": 0.181,
783
  "step": 990
784
  },
785
  {
786
+ "epoch": 10.0,
787
+ "grad_norm": 2.6007721424102783,
788
+ "learning_rate": 0.0,
789
+ "loss": 0.1867,
 
 
 
 
 
 
 
 
 
790
  "step": 1000
791
  },
792
  {
793
+ "epoch": 10.0,
794
+ "eval_accuracy": 0.9811709326272433,
795
+ "eval_loss": 0.12430207431316376,
796
+ "eval_runtime": 5.3794,
797
+ "eval_samples_per_second": 1263.701,
798
+ "eval_steps_per_second": 39.595,
799
+ "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800
  },
801
  {
802
+ "epoch": 10.0,
803
+ "step": 1000,
804
+ "total_flos": 4.72566865822464e+18,
805
+ "train_loss": 0.7669839228391647,
806
+ "train_runtime": 621.093,
807
+ "train_samples_per_second": 822.646,
808
+ "train_steps_per_second": 1.61
809
  }
810
  ],
811
  "logging_steps": 10,
812
+ "max_steps": 1000,
813
  "num_input_tokens_seen": 0,
814
+ "num_train_epochs": 10,
815
  "save_steps": 500,
816
  "stateful_callbacks": {
817
  "TrainerControl": {
 
825
  "attributes": {}
826
  }
827
  },
828
+ "total_flos": 4.72566865822464e+18,
829
+ "train_batch_size": 128,
830
  "trial_name": null,
831
  "trial_params": null
832
  }