File size: 33,311 Bytes
7983357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
{
  "best_global_step": 471,
  "best_metric": 0.7473118279569892,
  "best_model_checkpoint": "/linkhome/rech/genini01/udd26kf/scratch/weborganizer/models/runs/answerdotai--ModernBERT-base_TopicAnnotations-Llama-3.1-8B_bsz512_lr1e-4_epochs5_warmup0.1_url1_TopicAnnotations-Llama-3.1-405B-FP8_bsz512_lr1e-4_epochs5_warmup0.1_url1/checkpoint-471",
  "epoch": 4.9728,
  "eval_steps": 500,
  "global_step": 780,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.64,
      "grad_norm": 8.25,
      "learning_rate": 9.686609686609687e-05,
      "loss": 2.1544,
      "num_input_tokens_seen": 249204064,
      "step": 100
    },
    {
      "epoch": 1.0,
      "eval_validation.parquet_accuracy": 0.8451,
      "eval_validation.parquet_accuracy__0": 0.9019607843137255,
      "eval_validation.parquet_accuracy__1": 0.7975460122699386,
      "eval_validation.parquet_accuracy__10": 0.9105882352941177,
      "eval_validation.parquet_accuracy__11": 0.8687258687258688,
      "eval_validation.parquet_accuracy__12": 0.8734622144112478,
      "eval_validation.parquet_accuracy__13": 0.6827956989247311,
      "eval_validation.parquet_accuracy__14": 0.8229461756373938,
      "eval_validation.parquet_accuracy__15": 0.8427947598253275,
      "eval_validation.parquet_accuracy__16": 0.8194842406876791,
      "eval_validation.parquet_accuracy__17": 0.8260135135135135,
      "eval_validation.parquet_accuracy__18": 0.8732876712328768,
      "eval_validation.parquet_accuracy__19": 0.861764705882353,
      "eval_validation.parquet_accuracy__2": 0.8159203980099502,
      "eval_validation.parquet_accuracy__20": 0.7183098591549296,
      "eval_validation.parquet_accuracy__21": 0.8975409836065574,
      "eval_validation.parquet_accuracy__22": 0.7981651376146789,
      "eval_validation.parquet_accuracy__23": 0.863481228668942,
      "eval_validation.parquet_accuracy__3": 0.9017857142857143,
      "eval_validation.parquet_accuracy__4": 0.8697829716193656,
      "eval_validation.parquet_accuracy__5": 0.8246268656716418,
      "eval_validation.parquet_accuracy__6": 0.8907168037602821,
      "eval_validation.parquet_accuracy__7": 0.7784946236559139,
      "eval_validation.parquet_accuracy__8": 0.8932584269662921,
      "eval_validation.parquet_accuracy__9": 0.8230403800475059,
      "eval_validation.parquet_accuracy_conf50": 0.8559460563955864,
      "eval_validation.parquet_accuracy_conf50__0": 0.9108910891089109,
      "eval_validation.parquet_accuracy_conf50__1": 0.8087774294670846,
      "eval_validation.parquet_accuracy_conf50__10": 0.9148936170212766,
      "eval_validation.parquet_accuracy_conf50__11": 0.8745098039215686,
      "eval_validation.parquet_accuracy_conf50__12": 0.8857142857142857,
      "eval_validation.parquet_accuracy_conf50__13": 0.7085714285714285,
      "eval_validation.parquet_accuracy_conf50__14": 0.8338150289017341,
      "eval_validation.parquet_accuracy_conf50__15": 0.8609865470852018,
      "eval_validation.parquet_accuracy_conf50__16": 0.8240469208211144,
      "eval_validation.parquet_accuracy_conf50__17": 0.8319039451114922,
      "eval_validation.parquet_accuracy_conf50__18": 0.8788927335640139,
      "eval_validation.parquet_accuracy_conf50__19": 0.8761329305135952,
      "eval_validation.parquet_accuracy_conf50__2": 0.8350515463917526,
      "eval_validation.parquet_accuracy_conf50__20": 0.7323529411764705,
      "eval_validation.parquet_accuracy_conf50__21": 0.9009628610729024,
      "eval_validation.parquet_accuracy_conf50__22": 0.8113207547169812,
      "eval_validation.parquet_accuracy_conf50__23": 0.865979381443299,
      "eval_validation.parquet_accuracy_conf50__3": 0.9195046439628483,
      "eval_validation.parquet_accuracy_conf50__4": 0.8807495741056218,
      "eval_validation.parquet_accuracy_conf50__5": 0.8358778625954199,
      "eval_validation.parquet_accuracy_conf50__6": 0.9020310633213859,
      "eval_validation.parquet_accuracy_conf50__7": 0.7986425339366516,
      "eval_validation.parquet_accuracy_conf50__8": 0.8951841359773371,
      "eval_validation.parquet_accuracy_conf50__9": 0.8345498783454988,
      "eval_validation.parquet_accuracy_conf75": 0.9065606361829026,
      "eval_validation.parquet_accuracy_conf75__0": 0.967391304347826,
      "eval_validation.parquet_accuracy_conf75__1": 0.8828125,
      "eval_validation.parquet_accuracy_conf75__10": 0.9493670886075949,
      "eval_validation.parquet_accuracy_conf75__11": 0.9037656903765691,
      "eval_validation.parquet_accuracy_conf75__12": 0.9176470588235294,
      "eval_validation.parquet_accuracy_conf75__13": 0.7906976744186046,
      "eval_validation.parquet_accuracy_conf75__14": 0.9027303754266212,
      "eval_validation.parquet_accuracy_conf75__15": 0.9281767955801105,
      "eval_validation.parquet_accuracy_conf75__16": 0.886986301369863,
      "eval_validation.parquet_accuracy_conf75__17": 0.8814229249011858,
      "eval_validation.parquet_accuracy_conf75__18": 0.9176029962546817,
      "eval_validation.parquet_accuracy_conf75__19": 0.9185185185185185,
      "eval_validation.parquet_accuracy_conf75__2": 0.9047619047619048,
      "eval_validation.parquet_accuracy_conf75__20": 0.8022813688212928,
      "eval_validation.parquet_accuracy_conf75__21": 0.9327485380116959,
      "eval_validation.parquet_accuracy_conf75__22": 0.8741007194244604,
      "eval_validation.parquet_accuracy_conf75__23": 0.9246031746031746,
      "eval_validation.parquet_accuracy_conf75__3": 0.9473684210526315,
      "eval_validation.parquet_accuracy_conf75__4": 0.9242718446601942,
      "eval_validation.parquet_accuracy_conf75__5": 0.8879310344827587,
      "eval_validation.parquet_accuracy_conf75__6": 0.9322033898305084,
      "eval_validation.parquet_accuracy_conf75__7": 0.848,
      "eval_validation.parquet_accuracy_conf75__8": 0.9339622641509434,
      "eval_validation.parquet_accuracy_conf75__9": 0.9001447178002895,
      "eval_validation.parquet_accuracy_label_average": 0.8398538864075228,
      "eval_validation.parquet_accuracy_label_average_conf50": 0.8508892890353281,
      "eval_validation.parquet_accuracy_label_average_conf75": 0.9024790252593734,
      "eval_validation.parquet_accuracy_label_min": 0.6827956989247311,
      "eval_validation.parquet_accuracy_label_min_conf50": 0.7085714285714285,
      "eval_validation.parquet_accuracy_label_min_conf75": 0.7906976744186046,
      "eval_validation.parquet_loss": 0.5004527568817139,
      "eval_validation.parquet_proportion_conf50": 0.9788,
      "eval_validation.parquet_proportion_conf75": 0.8551,
      "eval_validation.parquet_runtime": 10.52,
      "eval_validation.parquet_samples_per_second": 950.571,
      "eval_validation.parquet_steps_per_second": 29.753,
      "num_input_tokens_seen": 390215936,
      "step": 157
    },
    {
      "epoch": 1.2752,
      "grad_norm": 9.875,
      "learning_rate": 8.262108262108262e-05,
      "loss": 1.8475,
      "num_input_tokens_seen": 499147424,
      "step": 200
    },
    {
      "epoch": 1.9152,
      "grad_norm": 7.53125,
      "learning_rate": 6.837606837606838e-05,
      "loss": 1.7317,
      "num_input_tokens_seen": 751160992,
      "step": 300
    },
    {
      "epoch": 2.0,
      "eval_validation.parquet_accuracy": 0.8526,
      "eval_validation.parquet_accuracy__0": 0.8725490196078431,
      "eval_validation.parquet_accuracy__1": 0.8128834355828221,
      "eval_validation.parquet_accuracy__10": 0.9176470588235294,
      "eval_validation.parquet_accuracy__11": 0.9073359073359073,
      "eval_validation.parquet_accuracy__12": 0.9138840070298769,
      "eval_validation.parquet_accuracy__13": 0.7419354838709677,
      "eval_validation.parquet_accuracy__14": 0.7818696883852692,
      "eval_validation.parquet_accuracy__15": 0.8427947598253275,
      "eval_validation.parquet_accuracy__16": 0.8481375358166189,
      "eval_validation.parquet_accuracy__17": 0.8733108108108109,
      "eval_validation.parquet_accuracy__18": 0.8732876712328768,
      "eval_validation.parquet_accuracy__19": 0.8205882352941176,
      "eval_validation.parquet_accuracy__2": 0.7860696517412935,
      "eval_validation.parquet_accuracy__20": 0.7830985915492957,
      "eval_validation.parquet_accuracy__21": 0.9344262295081968,
      "eval_validation.parquet_accuracy__22": 0.8562691131498471,
      "eval_validation.parquet_accuracy__23": 0.9078498293515358,
      "eval_validation.parquet_accuracy__3": 0.8541666666666666,
      "eval_validation.parquet_accuracy__4": 0.8414023372287145,
      "eval_validation.parquet_accuracy__5": 0.8208955223880597,
      "eval_validation.parquet_accuracy__6": 0.8883666274970623,
      "eval_validation.parquet_accuracy__7": 0.7784946236559139,
      "eval_validation.parquet_accuracy__8": 0.8960674157303371,
      "eval_validation.parquet_accuracy__9": 0.8111638954869359,
      "eval_validation.parquet_accuracy_conf50": 0.8627911728647323,
      "eval_validation.parquet_accuracy_conf50__0": 0.8811881188118812,
      "eval_validation.parquet_accuracy_conf50__1": 0.8244514106583072,
      "eval_validation.parquet_accuracy_conf50__10": 0.9219858156028369,
      "eval_validation.parquet_accuracy_conf50__11": 0.9137254901960784,
      "eval_validation.parquet_accuracy_conf50__12": 0.9214285714285714,
      "eval_validation.parquet_accuracy_conf50__13": 0.7771428571428571,
      "eval_validation.parquet_accuracy_conf50__14": 0.7947976878612717,
      "eval_validation.parquet_accuracy_conf50__15": 0.8565022421524664,
      "eval_validation.parquet_accuracy_conf50__16": 0.8533724340175953,
      "eval_validation.parquet_accuracy_conf50__17": 0.8782161234991424,
      "eval_validation.parquet_accuracy_conf50__18": 0.8788927335640139,
      "eval_validation.parquet_accuracy_conf50__19": 0.8368580060422961,
      "eval_validation.parquet_accuracy_conf50__2": 0.8041237113402062,
      "eval_validation.parquet_accuracy_conf50__20": 0.8,
      "eval_validation.parquet_accuracy_conf50__21": 0.936726272352132,
      "eval_validation.parquet_accuracy_conf50__22": 0.8679245283018868,
      "eval_validation.parquet_accuracy_conf50__23": 0.9072164948453608,
      "eval_validation.parquet_accuracy_conf50__3": 0.8761609907120743,
      "eval_validation.parquet_accuracy_conf50__4": 0.8534923339011925,
      "eval_validation.parquet_accuracy_conf50__5": 0.8320610687022901,
      "eval_validation.parquet_accuracy_conf50__6": 0.8984468339307049,
      "eval_validation.parquet_accuracy_conf50__7": 0.7873303167420814,
      "eval_validation.parquet_accuracy_conf50__8": 0.8980169971671388,
      "eval_validation.parquet_accuracy_conf50__9": 0.8211678832116789,
      "eval_validation.parquet_accuracy_conf75": 0.9124079055081277,
      "eval_validation.parquet_accuracy_conf75__0": 0.9347826086956522,
      "eval_validation.parquet_accuracy_conf75__1": 0.8984375,
      "eval_validation.parquet_accuracy_conf75__10": 0.9620253164556962,
      "eval_validation.parquet_accuracy_conf75__11": 0.9372384937238494,
      "eval_validation.parquet_accuracy_conf75__12": 0.9529411764705882,
      "eval_validation.parquet_accuracy_conf75__13": 0.8294573643410853,
      "eval_validation.parquet_accuracy_conf75__14": 0.8686006825938567,
      "eval_validation.parquet_accuracy_conf75__15": 0.9226519337016574,
      "eval_validation.parquet_accuracy_conf75__16": 0.9143835616438356,
      "eval_validation.parquet_accuracy_conf75__17": 0.9209486166007905,
      "eval_validation.parquet_accuracy_conf75__18": 0.9250936329588015,
      "eval_validation.parquet_accuracy_conf75__19": 0.8962962962962963,
      "eval_validation.parquet_accuracy_conf75__2": 0.8809523809523809,
      "eval_validation.parquet_accuracy_conf75__20": 0.8593155893536122,
      "eval_validation.parquet_accuracy_conf75__21": 0.9576023391812866,
      "eval_validation.parquet_accuracy_conf75__22": 0.8992805755395683,
      "eval_validation.parquet_accuracy_conf75__23": 0.9603174603174603,
      "eval_validation.parquet_accuracy_conf75__3": 0.9192982456140351,
      "eval_validation.parquet_accuracy_conf75__4": 0.8990291262135922,
      "eval_validation.parquet_accuracy_conf75__5": 0.8879310344827587,
      "eval_validation.parquet_accuracy_conf75__6": 0.9282920469361148,
      "eval_validation.parquet_accuracy_conf75__7": 0.84,
      "eval_validation.parquet_accuracy_conf75__8": 0.9339622641509434,
      "eval_validation.parquet_accuracy_conf75__9": 0.8900144717800289,
      "eval_validation.parquet_accuracy_label_average": 0.8485205882320762,
      "eval_validation.parquet_accuracy_label_average_conf50": 0.8592178717576693,
      "eval_validation.parquet_accuracy_label_average_conf75": 0.909118863250162,
      "eval_validation.parquet_accuracy_label_min": 0.7419354838709677,
      "eval_validation.parquet_accuracy_label_min_conf50": 0.7771428571428571,
      "eval_validation.parquet_accuracy_label_min_conf75": 0.8294573643410853,
      "eval_validation.parquet_loss": 0.4816047251224518,
      "eval_validation.parquet_proportion_conf50": 0.9788,
      "eval_validation.parquet_proportion_conf75": 0.8551,
      "eval_validation.parquet_runtime": 8.307,
      "eval_validation.parquet_samples_per_second": 1203.799,
      "eval_validation.parquet_steps_per_second": 37.679,
      "num_input_tokens_seen": 783399104,
      "step": 314
    },
    {
      "epoch": 2.5504,
      "grad_norm": 7.59375,
      "learning_rate": 5.413105413105414e-05,
      "loss": 1.5837,
      "num_input_tokens_seen": 999700736,
      "step": 400
    },
    {
      "epoch": 3.0,
      "eval_validation.parquet_accuracy": 0.8558,
      "eval_validation.parquet_accuracy__0": 0.8627450980392157,
      "eval_validation.parquet_accuracy__1": 0.7607361963190185,
      "eval_validation.parquet_accuracy__10": 0.9035294117647059,
      "eval_validation.parquet_accuracy__11": 0.8764478764478765,
      "eval_validation.parquet_accuracy__12": 0.8980667838312829,
      "eval_validation.parquet_accuracy__13": 0.7473118279569892,
      "eval_validation.parquet_accuracy__14": 0.839943342776204,
      "eval_validation.parquet_accuracy__15": 0.8427947598253275,
      "eval_validation.parquet_accuracy__16": 0.830945558739255,
      "eval_validation.parquet_accuracy__17": 0.839527027027027,
      "eval_validation.parquet_accuracy__18": 0.8801369863013698,
      "eval_validation.parquet_accuracy__19": 0.8117647058823529,
      "eval_validation.parquet_accuracy__2": 0.7860696517412935,
      "eval_validation.parquet_accuracy__20": 0.7633802816901408,
      "eval_validation.parquet_accuracy__21": 0.9289617486338798,
      "eval_validation.parquet_accuracy__22": 0.8562691131498471,
      "eval_validation.parquet_accuracy__23": 0.8805460750853242,
      "eval_validation.parquet_accuracy__3": 0.8660714285714286,
      "eval_validation.parquet_accuracy__4": 0.8530884808013356,
      "eval_validation.parquet_accuracy__5": 0.8694029850746269,
      "eval_validation.parquet_accuracy__6": 0.900117508813161,
      "eval_validation.parquet_accuracy__7": 0.7741935483870968,
      "eval_validation.parquet_accuracy__8": 0.8904494382022472,
      "eval_validation.parquet_accuracy__9": 0.8669833729216152,
      "eval_validation.parquet_accuracy_conf50": 0.8663669799754802,
      "eval_validation.parquet_accuracy_conf50__0": 0.8712871287128713,
      "eval_validation.parquet_accuracy_conf50__1": 0.7711598746081505,
      "eval_validation.parquet_accuracy_conf50__10": 0.9078014184397163,
      "eval_validation.parquet_accuracy_conf50__11": 0.8823529411764706,
      "eval_validation.parquet_accuracy_conf50__12": 0.9089285714285714,
      "eval_validation.parquet_accuracy_conf50__13": 0.7828571428571428,
      "eval_validation.parquet_accuracy_conf50__14": 0.8511560693641619,
      "eval_validation.parquet_accuracy_conf50__15": 0.8565022421524664,
      "eval_validation.parquet_accuracy_conf50__16": 0.8357771260997068,
      "eval_validation.parquet_accuracy_conf50__17": 0.8456260720411664,
      "eval_validation.parquet_accuracy_conf50__18": 0.8858131487889274,
      "eval_validation.parquet_accuracy_conf50__19": 0.8277945619335347,
      "eval_validation.parquet_accuracy_conf50__2": 0.8041237113402062,
      "eval_validation.parquet_accuracy_conf50__20": 0.7794117647058824,
      "eval_validation.parquet_accuracy_conf50__21": 0.9312242090784044,
      "eval_validation.parquet_accuracy_conf50__22": 0.8710691823899371,
      "eval_validation.parquet_accuracy_conf50__23": 0.8797250859106529,
      "eval_validation.parquet_accuracy_conf50__3": 0.8885448916408669,
      "eval_validation.parquet_accuracy_conf50__4": 0.8637137989778535,
      "eval_validation.parquet_accuracy_conf50__5": 0.8816793893129771,
      "eval_validation.parquet_accuracy_conf50__6": 0.9115890083632019,
      "eval_validation.parquet_accuracy_conf50__7": 0.7850678733031674,
      "eval_validation.parquet_accuracy_conf50__8": 0.8923512747875354,
      "eval_validation.parquet_accuracy_conf50__9": 0.878345498783455,
      "eval_validation.parquet_accuracy_conf75": 0.9145129224652088,
      "eval_validation.parquet_accuracy_conf75__0": 0.9239130434782609,
      "eval_validation.parquet_accuracy_conf75__1": 0.85546875,
      "eval_validation.parquet_accuracy_conf75__10": 0.9493670886075949,
      "eval_validation.parquet_accuracy_conf75__11": 0.9121338912133892,
      "eval_validation.parquet_accuracy_conf75__12": 0.9450980392156862,
      "eval_validation.parquet_accuracy_conf75__13": 0.8294573643410853,
      "eval_validation.parquet_accuracy_conf75__14": 0.9129692832764505,
      "eval_validation.parquet_accuracy_conf75__15": 0.9392265193370166,
      "eval_validation.parquet_accuracy_conf75__16": 0.8972602739726028,
      "eval_validation.parquet_accuracy_conf75__17": 0.8893280632411067,
      "eval_validation.parquet_accuracy_conf75__18": 0.9288389513108615,
      "eval_validation.parquet_accuracy_conf75__19": 0.8925925925925926,
      "eval_validation.parquet_accuracy_conf75__2": 0.8809523809523809,
      "eval_validation.parquet_accuracy_conf75__20": 0.8479087452471483,
      "eval_validation.parquet_accuracy_conf75__21": 0.9502923976608187,
      "eval_validation.parquet_accuracy_conf75__22": 0.9064748201438849,
      "eval_validation.parquet_accuracy_conf75__23": 0.9404761904761905,
      "eval_validation.parquet_accuracy_conf75__3": 0.9228070175438596,
      "eval_validation.parquet_accuracy_conf75__4": 0.9067961165048544,
      "eval_validation.parquet_accuracy_conf75__5": 0.9224137931034483,
      "eval_validation.parquet_accuracy_conf75__6": 0.9374185136897001,
      "eval_validation.parquet_accuracy_conf75__7": 0.8453333333333334,
      "eval_validation.parquet_accuracy_conf75__8": 0.9276729559748428,
      "eval_validation.parquet_accuracy_conf75__9": 0.9305354558610709,
      "eval_validation.parquet_accuracy_label_average": 0.8470618003326092,
      "eval_validation.parquet_accuracy_label_average_conf50": 0.8580792494248763,
      "eval_validation.parquet_accuracy_label_average_conf75": 0.9081139825449239,
      "eval_validation.parquet_accuracy_label_min": 0.7473118279569892,
      "eval_validation.parquet_accuracy_label_min_conf50": 0.7711598746081505,
      "eval_validation.parquet_accuracy_label_min_conf75": 0.8294573643410853,
      "eval_validation.parquet_loss": 0.4807276427745819,
      "eval_validation.parquet_proportion_conf50": 0.9788,
      "eval_validation.parquet_proportion_conf75": 0.8551,
      "eval_validation.parquet_runtime": 8.2886,
      "eval_validation.parquet_samples_per_second": 1206.475,
      "eval_validation.parquet_steps_per_second": 37.763,
      "num_input_tokens_seen": 1176307328,
      "step": 471
    },
    {
      "epoch": 3.1856,
      "grad_norm": 6.53125,
      "learning_rate": 3.988603988603989e-05,
      "loss": 1.5392,
      "num_input_tokens_seen": 1250925472,
      "step": 500
    },
    {
      "epoch": 3.8256,
      "grad_norm": 7.0625,
      "learning_rate": 2.564102564102564e-05,
      "loss": 1.4928,
      "num_input_tokens_seen": 1499507040,
      "step": 600
    },
    {
      "epoch": 4.0,
      "eval_validation.parquet_accuracy": 0.8567,
      "eval_validation.parquet_accuracy__0": 0.8725490196078431,
      "eval_validation.parquet_accuracy__1": 0.8006134969325154,
      "eval_validation.parquet_accuracy__10": 0.9105882352941177,
      "eval_validation.parquet_accuracy__11": 0.888030888030888,
      "eval_validation.parquet_accuracy__12": 0.9086115992970123,
      "eval_validation.parquet_accuracy__13": 0.7419354838709677,
      "eval_validation.parquet_accuracy__14": 0.8271954674220963,
      "eval_validation.parquet_accuracy__15": 0.851528384279476,
      "eval_validation.parquet_accuracy__16": 0.8510028653295129,
      "eval_validation.parquet_accuracy__17": 0.8817567567567568,
      "eval_validation.parquet_accuracy__18": 0.8664383561643836,
      "eval_validation.parquet_accuracy__19": 0.8088235294117647,
      "eval_validation.parquet_accuracy__2": 0.8059701492537313,
      "eval_validation.parquet_accuracy__20": 0.7492957746478873,
      "eval_validation.parquet_accuracy__21": 0.924863387978142,
      "eval_validation.parquet_accuracy__22": 0.8379204892966361,
      "eval_validation.parquet_accuracy__23": 0.863481228668942,
      "eval_validation.parquet_accuracy__3": 0.8779761904761905,
      "eval_validation.parquet_accuracy__4": 0.8464106844741235,
      "eval_validation.parquet_accuracy__5": 0.8731343283582089,
      "eval_validation.parquet_accuracy__6": 0.881316098707403,
      "eval_validation.parquet_accuracy__7": 0.810752688172043,
      "eval_validation.parquet_accuracy__8": 0.8904494382022472,
      "eval_validation.parquet_accuracy__9": 0.8396674584323041,
      "eval_validation.parquet_accuracy_conf50": 0.8674908050674295,
      "eval_validation.parquet_accuracy_conf50__0": 0.8811881188118812,
      "eval_validation.parquet_accuracy_conf50__1": 0.8119122257053292,
      "eval_validation.parquet_accuracy_conf50__10": 0.9148936170212766,
      "eval_validation.parquet_accuracy_conf50__11": 0.8980392156862745,
      "eval_validation.parquet_accuracy_conf50__12": 0.9178571428571428,
      "eval_validation.parquet_accuracy_conf50__13": 0.7771428571428571,
      "eval_validation.parquet_accuracy_conf50__14": 0.838150289017341,
      "eval_validation.parquet_accuracy_conf50__15": 0.8654708520179372,
      "eval_validation.parquet_accuracy_conf50__16": 0.8563049853372434,
      "eval_validation.parquet_accuracy_conf50__17": 0.8867924528301887,
      "eval_validation.parquet_accuracy_conf50__18": 0.8719723183391004,
      "eval_validation.parquet_accuracy_conf50__19": 0.824773413897281,
      "eval_validation.parquet_accuracy_conf50__2": 0.8247422680412371,
      "eval_validation.parquet_accuracy_conf50__20": 0.7676470588235295,
      "eval_validation.parquet_accuracy_conf50__21": 0.9270976616231087,
      "eval_validation.parquet_accuracy_conf50__22": 0.8522012578616353,
      "eval_validation.parquet_accuracy_conf50__23": 0.8625429553264605,
      "eval_validation.parquet_accuracy_conf50__3": 0.9009287925696594,
      "eval_validation.parquet_accuracy_conf50__4": 0.858603066439523,
      "eval_validation.parquet_accuracy_conf50__5": 0.8854961832061069,
      "eval_validation.parquet_accuracy_conf50__6": 0.8936678614097969,
      "eval_validation.parquet_accuracy_conf50__7": 0.8235294117647058,
      "eval_validation.parquet_accuracy_conf50__8": 0.8923512747875354,
      "eval_validation.parquet_accuracy_conf50__9": 0.8503649635036497,
      "eval_validation.parquet_accuracy_conf75": 0.9156823763302537,
      "eval_validation.parquet_accuracy_conf75__0": 0.9347826086956522,
      "eval_validation.parquet_accuracy_conf75__1": 0.88671875,
      "eval_validation.parquet_accuracy_conf75__10": 0.9544303797468354,
      "eval_validation.parquet_accuracy_conf75__11": 0.9288702928870293,
      "eval_validation.parquet_accuracy_conf75__12": 0.9509803921568627,
      "eval_validation.parquet_accuracy_conf75__13": 0.8217054263565892,
      "eval_validation.parquet_accuracy_conf75__14": 0.9027303754266212,
      "eval_validation.parquet_accuracy_conf75__15": 0.9392265193370166,
      "eval_validation.parquet_accuracy_conf75__16": 0.910958904109589,
      "eval_validation.parquet_accuracy_conf75__17": 0.9308300395256917,
      "eval_validation.parquet_accuracy_conf75__18": 0.9138576779026217,
      "eval_validation.parquet_accuracy_conf75__19": 0.8851851851851852,
      "eval_validation.parquet_accuracy_conf75__2": 0.9047619047619048,
      "eval_validation.parquet_accuracy_conf75__20": 0.8365019011406845,
      "eval_validation.parquet_accuracy_conf75__21": 0.9488304093567251,
      "eval_validation.parquet_accuracy_conf75__22": 0.89568345323741,
      "eval_validation.parquet_accuracy_conf75__23": 0.9365079365079365,
      "eval_validation.parquet_accuracy_conf75__3": 0.9333333333333333,
      "eval_validation.parquet_accuracy_conf75__4": 0.8932038834951457,
      "eval_validation.parquet_accuracy_conf75__5": 0.9224137931034483,
      "eval_validation.parquet_accuracy_conf75__6": 0.9230769230769231,
      "eval_validation.parquet_accuracy_conf75__7": 0.8746666666666667,
      "eval_validation.parquet_accuracy_conf75__8": 0.9339622641509434,
      "eval_validation.parquet_accuracy_conf75__9": 0.91027496382055,
      "eval_validation.parquet_accuracy_label_average": 0.8504296666277162,
      "eval_validation.parquet_accuracy_label_average_conf50": 0.8618195935008668,
      "eval_validation.parquet_accuracy_label_average_conf75": 0.9113955826658905,
      "eval_validation.parquet_accuracy_label_min": 0.7419354838709677,
      "eval_validation.parquet_accuracy_label_min_conf50": 0.7676470588235295,
      "eval_validation.parquet_accuracy_label_min_conf75": 0.8217054263565892,
      "eval_validation.parquet_loss": 0.47853514552116394,
      "eval_validation.parquet_proportion_conf50": 0.9788,
      "eval_validation.parquet_proportion_conf75": 0.8551,
      "eval_validation.parquet_runtime": 8.3896,
      "eval_validation.parquet_samples_per_second": 1191.949,
      "eval_validation.parquet_steps_per_second": 37.308,
      "num_input_tokens_seen": 1566401088,
      "step": 628
    },
    {
      "epoch": 4.4608,
      "grad_norm": 7.5625,
      "learning_rate": 1.1396011396011397e-05,
      "loss": 1.4653,
      "num_input_tokens_seen": 1745927840,
      "step": 700
    },
    {
      "epoch": 4.9728,
      "eval_validation.parquet_accuracy": 0.8571,
      "eval_validation.parquet_accuracy__0": 0.8725490196078431,
      "eval_validation.parquet_accuracy__1": 0.7914110429447853,
      "eval_validation.parquet_accuracy__10": 0.9105882352941177,
      "eval_validation.parquet_accuracy__11": 0.8918918918918919,
      "eval_validation.parquet_accuracy__12": 0.9033391915641477,
      "eval_validation.parquet_accuracy__13": 0.7419354838709677,
      "eval_validation.parquet_accuracy__14": 0.8314447592067988,
      "eval_validation.parquet_accuracy__15": 0.8558951965065502,
      "eval_validation.parquet_accuracy__16": 0.8481375358166189,
      "eval_validation.parquet_accuracy__17": 0.875,
      "eval_validation.parquet_accuracy__18": 0.8595890410958904,
      "eval_validation.parquet_accuracy__19": 0.8117647058823529,
      "eval_validation.parquet_accuracy__2": 0.8109452736318408,
      "eval_validation.parquet_accuracy__20": 0.7436619718309859,
      "eval_validation.parquet_accuracy__21": 0.9262295081967213,
      "eval_validation.parquet_accuracy__22": 0.8440366972477065,
      "eval_validation.parquet_accuracy__23": 0.863481228668942,
      "eval_validation.parquet_accuracy__3": 0.8809523809523809,
      "eval_validation.parquet_accuracy__4": 0.8497495826377296,
      "eval_validation.parquet_accuracy__5": 0.8731343283582089,
      "eval_validation.parquet_accuracy__6": 0.8883666274970623,
      "eval_validation.parquet_accuracy__7": 0.7956989247311828,
      "eval_validation.parquet_accuracy__8": 0.9044943820224719,
      "eval_validation.parquet_accuracy__9": 0.8420427553444181,
      "eval_validation.parquet_accuracy_conf50": 0.8678994687372292,
      "eval_validation.parquet_accuracy_conf50__0": 0.8811881188118812,
      "eval_validation.parquet_accuracy_conf50__1": 0.8025078369905956,
      "eval_validation.parquet_accuracy_conf50__10": 0.9148936170212766,
      "eval_validation.parquet_accuracy_conf50__11": 0.8980392156862745,
      "eval_validation.parquet_accuracy_conf50__12": 0.9125,
      "eval_validation.parquet_accuracy_conf50__13": 0.7771428571428571,
      "eval_validation.parquet_accuracy_conf50__14": 0.8424855491329479,
      "eval_validation.parquet_accuracy_conf50__15": 0.8699551569506726,
      "eval_validation.parquet_accuracy_conf50__16": 0.8533724340175953,
      "eval_validation.parquet_accuracy_conf50__17": 0.8799313893653516,
      "eval_validation.parquet_accuracy_conf50__18": 0.8650519031141869,
      "eval_validation.parquet_accuracy_conf50__19": 0.8277945619335347,
      "eval_validation.parquet_accuracy_conf50__2": 0.8298969072164949,
      "eval_validation.parquet_accuracy_conf50__20": 0.7647058823529411,
      "eval_validation.parquet_accuracy_conf50__21": 0.9284731774415406,
      "eval_validation.parquet_accuracy_conf50__22": 0.8584905660377359,
      "eval_validation.parquet_accuracy_conf50__23": 0.8625429553264605,
      "eval_validation.parquet_accuracy_conf50__3": 0.9040247678018576,
      "eval_validation.parquet_accuracy_conf50__4": 0.8620102214650767,
      "eval_validation.parquet_accuracy_conf50__5": 0.8854961832061069,
      "eval_validation.parquet_accuracy_conf50__6": 0.9008363201911589,
      "eval_validation.parquet_accuracy_conf50__7": 0.8076923076923077,
      "eval_validation.parquet_accuracy_conf50__8": 0.9065155807365439,
      "eval_validation.parquet_accuracy_conf50__9": 0.8527980535279805,
      "eval_validation.parquet_accuracy_conf75": 0.9163840486492808,
      "eval_validation.parquet_accuracy_conf75__0": 0.9347826086956522,
      "eval_validation.parquet_accuracy_conf75__1": 0.87890625,
      "eval_validation.parquet_accuracy_conf75__10": 0.9518987341772152,
      "eval_validation.parquet_accuracy_conf75__11": 0.9288702928870293,
      "eval_validation.parquet_accuracy_conf75__12": 0.9470588235294117,
      "eval_validation.parquet_accuracy_conf75__13": 0.8217054263565892,
      "eval_validation.parquet_accuracy_conf75__14": 0.9061433447098977,
      "eval_validation.parquet_accuracy_conf75__15": 0.9447513812154696,
      "eval_validation.parquet_accuracy_conf75__16": 0.910958904109589,
      "eval_validation.parquet_accuracy_conf75__17": 0.924901185770751,
      "eval_validation.parquet_accuracy_conf75__18": 0.9101123595505618,
      "eval_validation.parquet_accuracy_conf75__19": 0.8888888888888888,
      "eval_validation.parquet_accuracy_conf75__2": 0.9047619047619048,
      "eval_validation.parquet_accuracy_conf75__20": 0.8326996197718631,
      "eval_validation.parquet_accuracy_conf75__21": 0.9502923976608187,
      "eval_validation.parquet_accuracy_conf75__22": 0.9028776978417267,
      "eval_validation.parquet_accuracy_conf75__23": 0.9325396825396826,
      "eval_validation.parquet_accuracy_conf75__3": 0.9368421052631579,
      "eval_validation.parquet_accuracy_conf75__4": 0.9009708737864077,
      "eval_validation.parquet_accuracy_conf75__5": 0.9224137931034483,
      "eval_validation.parquet_accuracy_conf75__6": 0.9308996088657105,
      "eval_validation.parquet_accuracy_conf75__7": 0.864,
      "eval_validation.parquet_accuracy_conf75__8": 0.940251572327044,
      "eval_validation.parquet_accuracy_conf75__9": 0.9117221418234442,
      "eval_validation.parquet_accuracy_label_average": 0.8506808235334006,
      "eval_validation.parquet_accuracy_label_average_conf50": 0.8620143984651407,
      "eval_validation.parquet_accuracy_label_average_conf75": 0.9116353999015111,
      "eval_validation.parquet_accuracy_label_min": 0.7419354838709677,
      "eval_validation.parquet_accuracy_label_min_conf50": 0.7647058823529411,
      "eval_validation.parquet_accuracy_label_min_conf75": 0.8217054263565892,
      "eval_validation.parquet_loss": 0.47900858521461487,
      "eval_validation.parquet_proportion_conf50": 0.9788,
      "eval_validation.parquet_proportion_conf75": 0.8551,
      "eval_validation.parquet_runtime": 8.446,
      "eval_validation.parquet_samples_per_second": 1183.995,
      "eval_validation.parquet_steps_per_second": 37.059,
      "num_input_tokens_seen": 1949274656,
      "step": 780
    },
    {
      "epoch": 4.9728,
      "num_input_tokens_seen": 1949274656,
      "step": 780,
      "total_flos": 1.297523316772307e+18,
      "train_loss": 1.6634563641670423,
      "train_runtime": 573.9155,
      "train_samples_per_second": 696.967,
      "train_steps_per_second": 1.359
    }
  ],
  "logging_steps": 100,
  "max_steps": 780,
  "num_input_tokens_seen": 1949274656,
  "num_train_epochs": 5,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.297523316772307e+18,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}