File size: 22,736 Bytes
4e6a79b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
{
  "best_global_step": 15500,
  "best_metric": 0.9434096975688787,
  "best_model_checkpoint": "./arabert_author_model_full/checkpoint-15500",
  "epoch": 3.374700631395602,
  "eval_steps": 500,
  "global_step": 15500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.10886131069018071,
      "grad_norm": 745471.1875,
      "learning_rate": 2.171926006528836e-05,
      "loss": 2.2995,
      "step": 500
    },
    {
      "epoch": 0.10886131069018071,
      "eval_accuracy": 0.6985195154777928,
      "eval_f1_macro": 0.601246564201863,
      "eval_f1_micro": 0.6985195154777928,
      "eval_loss": 1.2079353332519531,
      "eval_precision_macro": 0.6412966664769482,
      "eval_precision_micro": 0.6985195154777928,
      "eval_recall_macro": 0.6419387939365965,
      "eval_recall_micro": 0.6985195154777928,
      "eval_runtime": 14.7462,
      "eval_samples_per_second": 50.386,
      "eval_steps_per_second": 3.187,
      "step": 500
    },
    {
      "epoch": 0.21772262138036141,
      "grad_norm": 956772.5625,
      "learning_rate": 4.348204570184984e-05,
      "loss": 0.849,
      "step": 1000
    },
    {
      "epoch": 0.21772262138036141,
      "eval_accuracy": 0.819650067294751,
      "eval_f1_macro": 0.7996069224582287,
      "eval_f1_micro": 0.819650067294751,
      "eval_loss": 0.5631475448608398,
      "eval_precision_macro": 0.80345079706281,
      "eval_precision_micro": 0.819650067294751,
      "eval_recall_macro": 0.82483257704162,
      "eval_recall_micro": 0.819650067294751,
      "eval_runtime": 14.7707,
      "eval_samples_per_second": 50.302,
      "eval_steps_per_second": 3.182,
      "step": 1000
    },
    {
      "epoch": 0.32658393207054215,
      "grad_norm": 326904.5,
      "learning_rate": 6.524483133841132e-05,
      "loss": 0.5868,
      "step": 1500
    },
    {
      "epoch": 0.32658393207054215,
      "eval_accuracy": 0.8021534320323015,
      "eval_f1_macro": 0.793396840762137,
      "eval_f1_micro": 0.8021534320323015,
      "eval_loss": 0.6846649646759033,
      "eval_precision_macro": 0.8435106749075885,
      "eval_precision_micro": 0.8021534320323015,
      "eval_recall_macro": 0.7916833340258262,
      "eval_recall_micro": 0.8021534320323015,
      "eval_runtime": 14.8117,
      "eval_samples_per_second": 50.163,
      "eval_steps_per_second": 3.173,
      "step": 1500
    },
    {
      "epoch": 0.43544524276072283,
      "grad_norm": 218412.828125,
      "learning_rate": 7.998128491699842e-05,
      "loss": 0.5612,
      "step": 2000
    },
    {
      "epoch": 0.43544524276072283,
      "eval_accuracy": 0.8519515477792732,
      "eval_f1_macro": 0.8247069527158585,
      "eval_f1_micro": 0.8519515477792732,
      "eval_loss": 0.4477691948413849,
      "eval_precision_macro": 0.8896978331250329,
      "eval_precision_micro": 0.8519515477792732,
      "eval_recall_macro": 0.8379996548860711,
      "eval_recall_micro": 0.8519515477792732,
      "eval_runtime": 14.7186,
      "eval_samples_per_second": 50.48,
      "eval_steps_per_second": 3.193,
      "step": 2000
    },
    {
      "epoch": 0.5443065534509035,
      "grad_norm": 1928294.625,
      "learning_rate": 7.968493088594472e-05,
      "loss": 0.4929,
      "step": 2500
    },
    {
      "epoch": 0.5443065534509035,
      "eval_accuracy": 0.873485868102288,
      "eval_f1_macro": 0.8688756385026712,
      "eval_f1_micro": 0.873485868102288,
      "eval_loss": 0.3026486039161682,
      "eval_precision_macro": 0.878210989714668,
      "eval_precision_micro": 0.873485868102288,
      "eval_recall_macro": 0.8859921080399548,
      "eval_recall_micro": 0.873485868102288,
      "eval_runtime": 14.7131,
      "eval_samples_per_second": 50.499,
      "eval_steps_per_second": 3.194,
      "step": 2500
    },
    {
      "epoch": 0.6531678641410843,
      "grad_norm": 3023410.5,
      "learning_rate": 7.903065943344406e-05,
      "loss": 0.4618,
      "step": 3000
    },
    {
      "epoch": 0.6531678641410843,
      "eval_accuracy": 0.8613728129205922,
      "eval_f1_macro": 0.8296445269102163,
      "eval_f1_micro": 0.8613728129205922,
      "eval_loss": 0.43775779008865356,
      "eval_precision_macro": 0.8710561256381226,
      "eval_precision_micro": 0.8613728129205922,
      "eval_recall_macro": 0.8541696546910839,
      "eval_recall_micro": 0.8613728129205922,
      "eval_runtime": 14.7062,
      "eval_samples_per_second": 50.523,
      "eval_steps_per_second": 3.196,
      "step": 3000
    },
    {
      "epoch": 0.762029174831265,
      "grad_norm": 211605.15625,
      "learning_rate": 7.802437141773096e-05,
      "loss": 0.4028,
      "step": 3500
    },
    {
      "epoch": 0.762029174831265,
      "eval_accuracy": 0.8950201884253028,
      "eval_f1_macro": 0.8917785158702655,
      "eval_f1_micro": 0.8950201884253028,
      "eval_loss": 0.25510504841804504,
      "eval_precision_macro": 0.9107123575695487,
      "eval_precision_micro": 0.8950201884253028,
      "eval_recall_macro": 0.9057010565367906,
      "eval_recall_micro": 0.8950201884253028,
      "eval_runtime": 14.7188,
      "eval_samples_per_second": 50.48,
      "eval_steps_per_second": 3.193,
      "step": 3500
    },
    {
      "epoch": 0.8708904855214457,
      "grad_norm": 37626.74609375,
      "learning_rate": 7.667514252581752e-05,
      "loss": 0.3747,
      "step": 4000
    },
    {
      "epoch": 0.8708904855214457,
      "eval_accuracy": 0.892328398384926,
      "eval_f1_macro": 0.8948877387080549,
      "eval_f1_micro": 0.892328398384926,
      "eval_loss": 0.2622196674346924,
      "eval_precision_macro": 0.9437605053976897,
      "eval_precision_micro": 0.892328398384926,
      "eval_recall_macro": 0.9063603025064753,
      "eval_recall_micro": 0.892328398384926,
      "eval_runtime": 14.7613,
      "eval_samples_per_second": 50.334,
      "eval_steps_per_second": 3.184,
      "step": 4000
    },
    {
      "epoch": 0.9797517962116263,
      "grad_norm": 341548.65625,
      "learning_rate": 7.499514142009407e-05,
      "loss": 0.3686,
      "step": 4500
    },
    {
      "epoch": 0.9797517962116263,
      "eval_accuracy": 0.901749663526245,
      "eval_f1_macro": 0.9071958475193036,
      "eval_f1_micro": 0.9017496635262451,
      "eval_loss": 0.21770605444908142,
      "eval_precision_macro": 0.9392339212137314,
      "eval_precision_micro": 0.901749663526245,
      "eval_recall_macro": 0.9187280722751042,
      "eval_recall_micro": 0.901749663526245,
      "eval_runtime": 14.7411,
      "eval_samples_per_second": 50.403,
      "eval_steps_per_second": 3.188,
      "step": 4500
    },
    {
      "epoch": 1.088613106901807,
      "grad_norm": 51656.32421875,
      "learning_rate": 7.299951998946065e-05,
      "loss": 0.2762,
      "step": 5000
    },
    {
      "epoch": 1.088613106901807,
      "eval_accuracy": 0.8896366083445492,
      "eval_f1_macro": 0.8803954267807832,
      "eval_f1_micro": 0.8896366083445492,
      "eval_loss": 0.37781140208244324,
      "eval_precision_macro": 0.8980066417509999,
      "eval_precision_micro": 0.8896366083445492,
      "eval_recall_macro": 0.8882222866157216,
      "eval_recall_micro": 0.8896366083445492,
      "eval_runtime": 15.0879,
      "eval_samples_per_second": 49.245,
      "eval_steps_per_second": 3.115,
      "step": 5000
    },
    {
      "epoch": 1.1974744175919878,
      "grad_norm": 1009913.0625,
      "learning_rate": 7.070627669481137e-05,
      "loss": 0.2851,
      "step": 5500
    },
    {
      "epoch": 1.1974744175919878,
      "eval_accuracy": 0.882907133243607,
      "eval_f1_macro": 0.8672894626796113,
      "eval_f1_micro": 0.882907133243607,
      "eval_loss": 0.38583362102508545,
      "eval_precision_macro": 0.9049625152940963,
      "eval_precision_micro": 0.882907133243607,
      "eval_recall_macro": 0.8813935878782198,
      "eval_recall_micro": 0.882907133243607,
      "eval_runtime": 14.7029,
      "eval_samples_per_second": 50.534,
      "eval_steps_per_second": 3.197,
      "step": 5500
    },
    {
      "epoch": 1.3063357282821686,
      "grad_norm": 26227.69140625,
      "learning_rate": 6.813609424135567e-05,
      "loss": 0.2818,
      "step": 6000
    },
    {
      "epoch": 1.3063357282821686,
      "eval_accuracy": 0.9138627187079408,
      "eval_f1_macro": 0.9250807107212078,
      "eval_f1_micro": 0.9138627187079408,
      "eval_loss": 0.1822730302810669,
      "eval_precision_macro": 0.9436200764635643,
      "eval_precision_micro": 0.9138627187079408,
      "eval_recall_macro": 0.9322277636580386,
      "eval_recall_micro": 0.9138627187079408,
      "eval_runtime": 14.7441,
      "eval_samples_per_second": 50.393,
      "eval_steps_per_second": 3.188,
      "step": 6000
    },
    {
      "epoch": 1.4151970389723492,
      "grad_norm": 87145.015625,
      "learning_rate": 6.531215304180572e-05,
      "loss": 0.2539,
      "step": 6500
    },
    {
      "epoch": 1.4151970389723492,
      "eval_accuracy": 0.9044414535666218,
      "eval_f1_macro": 0.9159118265135213,
      "eval_f1_micro": 0.9044414535666218,
      "eval_loss": 0.19744105637073517,
      "eval_precision_macro": 0.9248731430404993,
      "eval_precision_micro": 0.9044414535666218,
      "eval_recall_macro": 0.9361879615931227,
      "eval_recall_micro": 0.9044414535666218,
      "eval_runtime": 14.7205,
      "eval_samples_per_second": 50.474,
      "eval_steps_per_second": 3.193,
      "step": 6500
    },
    {
      "epoch": 1.52405834966253,
      "grad_norm": 4197689.5,
      "learning_rate": 6.22599221528008e-05,
      "loss": 0.2342,
      "step": 7000
    },
    {
      "epoch": 1.52405834966253,
      "eval_accuracy": 0.9152086137281292,
      "eval_f1_macro": 0.9209521774588028,
      "eval_f1_micro": 0.9152086137281292,
      "eval_loss": 0.16721387207508087,
      "eval_precision_macro": 0.9316385374819118,
      "eval_precision_micro": 0.9152086137281292,
      "eval_recall_macro": 0.9305594066426393,
      "eval_recall_micro": 0.9152086137281292,
      "eval_runtime": 14.7185,
      "eval_samples_per_second": 50.481,
      "eval_steps_per_second": 3.193,
      "step": 7000
    },
    {
      "epoch": 1.6329196603527105,
      "grad_norm": 29691.1875,
      "learning_rate": 5.900692957010821e-05,
      "loss": 0.2658,
      "step": 7500
    },
    {
      "epoch": 1.6329196603527105,
      "eval_accuracy": 0.9205921938088829,
      "eval_f1_macro": 0.9292673927082579,
      "eval_f1_micro": 0.9205921938088829,
      "eval_loss": 0.16926071047782898,
      "eval_precision_macro": 0.9467601029387086,
      "eval_precision_micro": 0.9205921938088829,
      "eval_recall_macro": 0.9353857192023052,
      "eval_recall_micro": 0.9205921938088829,
      "eval_runtime": 14.7038,
      "eval_samples_per_second": 50.531,
      "eval_steps_per_second": 3.196,
      "step": 7500
    },
    {
      "epoch": 1.7417809710428913,
      "grad_norm": 82702.546875,
      "learning_rate": 5.5582513954302386e-05,
      "loss": 0.2703,
      "step": 8000
    },
    {
      "epoch": 1.7417809710428913,
      "eval_accuracy": 0.917900403768506,
      "eval_f1_macro": 0.9205592899943698,
      "eval_f1_micro": 0.917900403768506,
      "eval_loss": 0.22037993371486664,
      "eval_precision_macro": 0.9459349396324186,
      "eval_precision_micro": 0.917900403768506,
      "eval_recall_macro": 0.9278516945604416,
      "eval_recall_micro": 0.917900403768506,
      "eval_runtime": 14.7085,
      "eval_samples_per_second": 50.515,
      "eval_steps_per_second": 3.195,
      "step": 8000
    },
    {
      "epoch": 1.850642281733072,
      "grad_norm": 450699.1875,
      "learning_rate": 5.201756002610252e-05,
      "loss": 0.2566,
      "step": 8500
    },
    {
      "epoch": 1.850642281733072,
      "eval_accuracy": 0.9098250336473755,
      "eval_f1_macro": 0.9126391472355347,
      "eval_f1_micro": 0.9098250336473755,
      "eval_loss": 0.26449093222618103,
      "eval_precision_macro": 0.9352643525302922,
      "eval_precision_micro": 0.9098250336473755,
      "eval_recall_macro": 0.931955435163728,
      "eval_recall_micro": 0.9098250336473755,
      "eval_runtime": 14.6939,
      "eval_samples_per_second": 50.565,
      "eval_steps_per_second": 3.199,
      "step": 8500
    },
    {
      "epoch": 1.959503592423253,
      "grad_norm": 37148.73046875,
      "learning_rate": 4.834422001783138e-05,
      "loss": 0.2242,
      "step": 9000
    },
    {
      "epoch": 1.959503592423253,
      "eval_accuracy": 0.9246298788694481,
      "eval_f1_macro": 0.9278695233625198,
      "eval_f1_micro": 0.9246298788694481,
      "eval_loss": 0.20524874329566956,
      "eval_precision_macro": 0.9473174570200222,
      "eval_precision_micro": 0.9246298788694481,
      "eval_recall_macro": 0.9317137486146517,
      "eval_recall_micro": 0.9246298788694481,
      "eval_runtime": 14.65,
      "eval_samples_per_second": 50.717,
      "eval_steps_per_second": 3.208,
      "step": 9000
    },
    {
      "epoch": 2.0683649031134337,
      "grad_norm": 65893.8984375,
      "learning_rate": 4.45956236932181e-05,
      "loss": 0.1672,
      "step": 9500
    },
    {
      "epoch": 2.0683649031134337,
      "eval_accuracy": 0.9165545087483177,
      "eval_f1_macro": 0.9239702133396492,
      "eval_f1_micro": 0.9165545087483177,
      "eval_loss": 0.3571414351463318,
      "eval_precision_macro": 0.9412785975210729,
      "eval_precision_micro": 0.9165545087483177,
      "eval_recall_macro": 0.9173054563259597,
      "eval_recall_micro": 0.9165545087483177,
      "eval_runtime": 14.749,
      "eval_samples_per_second": 50.376,
      "eval_steps_per_second": 3.187,
      "step": 9500
    },
    {
      "epoch": 2.177226213803614,
      "grad_norm": 20243.5546875,
      "learning_rate": 4.0805579550869046e-05,
      "loss": 0.1593,
      "step": 10000
    },
    {
      "epoch": 2.177226213803614,
      "eval_accuracy": 0.9125168236877523,
      "eval_f1_macro": 0.9238184226911409,
      "eval_f1_micro": 0.9125168236877523,
      "eval_loss": 0.30988800525665283,
      "eval_precision_macro": 0.9555289484815556,
      "eval_precision_micro": 0.9125168236877523,
      "eval_recall_macro": 0.9275764985418137,
      "eval_recall_micro": 0.9125168236877523,
      "eval_runtime": 15.0155,
      "eval_samples_per_second": 49.482,
      "eval_steps_per_second": 3.13,
      "step": 10000
    },
    {
      "epoch": 2.286087524493795,
      "grad_norm": 33157.19140625,
      "learning_rate": 3.7008269906245454e-05,
      "loss": 0.1799,
      "step": 10500
    },
    {
      "epoch": 2.286087524493795,
      "eval_accuracy": 0.9246298788694481,
      "eval_f1_macro": 0.9287251727049811,
      "eval_f1_micro": 0.9246298788694481,
      "eval_loss": 0.23414301872253418,
      "eval_precision_macro": 0.959944603131214,
      "eval_precision_micro": 0.9246298788694481,
      "eval_recall_macro": 0.9306134629626335,
      "eval_recall_micro": 0.9246298788694481,
      "eval_runtime": 14.6983,
      "eval_samples_per_second": 50.55,
      "eval_steps_per_second": 3.198,
      "step": 10500
    },
    {
      "epoch": 2.3949488351839756,
      "grad_norm": 48777.84375,
      "learning_rate": 3.323794260219589e-05,
      "loss": 0.166,
      "step": 11000
    },
    {
      "epoch": 2.3949488351839756,
      "eval_accuracy": 0.9057873485868102,
      "eval_f1_macro": 0.9123153410480982,
      "eval_f1_micro": 0.9057873485868102,
      "eval_loss": 0.3453662395477295,
      "eval_precision_macro": 0.9446104426733389,
      "eval_precision_micro": 0.9057873485868102,
      "eval_recall_macro": 0.91935239522038,
      "eval_recall_micro": 0.9057873485868102,
      "eval_runtime": 14.7404,
      "eval_samples_per_second": 50.406,
      "eval_steps_per_second": 3.189,
      "step": 11000
    },
    {
      "epoch": 2.5038101458741564,
      "grad_norm": 33563.56640625,
      "learning_rate": 2.9528602128499004e-05,
      "loss": 0.162,
      "step": 11500
    },
    {
      "epoch": 2.5038101458741564,
      "eval_accuracy": 0.9098250336473755,
      "eval_f1_macro": 0.9212878627631594,
      "eval_f1_micro": 0.9098250336473755,
      "eval_loss": 0.22809743881225586,
      "eval_precision_macro": 0.9389309808956737,
      "eval_precision_micro": 0.9098250336473755,
      "eval_recall_macro": 0.9311247877025975,
      "eval_recall_micro": 0.9098250336473755,
      "eval_runtime": 14.666,
      "eval_samples_per_second": 50.661,
      "eval_steps_per_second": 3.205,
      "step": 11500
    },
    {
      "epoch": 2.612671456564337,
      "grad_norm": 58977.125,
      "learning_rate": 2.591370293620146e-05,
      "loss": 0.1452,
      "step": 12000
    },
    {
      "epoch": 2.612671456564337,
      "eval_accuracy": 0.9219380888290714,
      "eval_f1_macro": 0.9232635700162879,
      "eval_f1_micro": 0.9219380888290714,
      "eval_loss": 0.2860707640647888,
      "eval_precision_macro": 0.9426347574998575,
      "eval_precision_micro": 0.9219380888290714,
      "eval_recall_macro": 0.9262974863930373,
      "eval_recall_micro": 0.9219380888290714,
      "eval_runtime": 14.8095,
      "eval_samples_per_second": 50.171,
      "eval_steps_per_second": 3.174,
      "step": 12000
    },
    {
      "epoch": 2.7215327672545175,
      "grad_norm": 46900.25390625,
      "learning_rate": 2.2425847712741887e-05,
      "loss": 0.1418,
      "step": 12500
    },
    {
      "epoch": 2.7215327672545175,
      "eval_accuracy": 0.9286675639300135,
      "eval_f1_macro": 0.9357990563843356,
      "eval_f1_micro": 0.9286675639300135,
      "eval_loss": 0.15669873356819153,
      "eval_precision_macro": 0.9529768865317036,
      "eval_precision_micro": 0.9286675639300135,
      "eval_recall_macro": 0.9417303559122717,
      "eval_recall_micro": 0.9286675639300135,
      "eval_runtime": 14.7072,
      "eval_samples_per_second": 50.52,
      "eval_steps_per_second": 3.196,
      "step": 12500
    },
    {
      "epoch": 2.8303940779446983,
      "grad_norm": 37592.3515625,
      "learning_rate": 1.9096493339109878e-05,
      "loss": 0.1429,
      "step": 13000
    },
    {
      "epoch": 2.8303940779446983,
      "eval_accuracy": 0.9165545087483177,
      "eval_f1_macro": 0.9295728643158702,
      "eval_f1_micro": 0.9165545087483177,
      "eval_loss": 0.22479559481143951,
      "eval_precision_macro": 0.9605098350591709,
      "eval_precision_micro": 0.9165545087483177,
      "eval_recall_macro": 0.9328126952515738,
      "eval_recall_micro": 0.9165545087483177,
      "eval_runtime": 14.6901,
      "eval_samples_per_second": 50.578,
      "eval_steps_per_second": 3.199,
      "step": 13000
    },
    {
      "epoch": 2.939255388634879,
      "grad_norm": 79597.40625,
      "learning_rate": 1.5955667181005554e-05,
      "loss": 0.1293,
      "step": 13500
    },
    {
      "epoch": 2.939255388634879,
      "eval_accuracy": 0.9246298788694481,
      "eval_f1_macro": 0.9319848397676713,
      "eval_f1_micro": 0.9246298788694481,
      "eval_loss": 0.27543124556541443,
      "eval_precision_macro": 0.9589344708678029,
      "eval_precision_micro": 0.9246298788694481,
      "eval_recall_macro": 0.932925082879603,
      "eval_recall_micro": 0.9246298788694481,
      "eval_runtime": 14.726,
      "eval_samples_per_second": 50.455,
      "eval_steps_per_second": 3.192,
      "step": 13500
    },
    {
      "epoch": 3.04811669932506,
      "grad_norm": 25773.66796875,
      "learning_rate": 1.3031696272762192e-05,
      "loss": 0.1137,
      "step": 14000
    },
    {
      "epoch": 3.04811669932506,
      "eval_accuracy": 0.9246298788694481,
      "eval_f1_macro": 0.937910042741771,
      "eval_f1_micro": 0.9246298788694481,
      "eval_loss": 0.20125848054885864,
      "eval_precision_macro": 0.9546735463378956,
      "eval_precision_micro": 0.9246298788694481,
      "eval_recall_macro": 0.9429177293988182,
      "eval_recall_micro": 0.9246298788694481,
      "eval_runtime": 15.0054,
      "eval_samples_per_second": 49.515,
      "eval_steps_per_second": 3.132,
      "step": 14000
    },
    {
      "epoch": 3.1569780100152407,
      "grad_norm": 17888.46484375,
      "learning_rate": 1.0350951836516297e-05,
      "loss": 0.0987,
      "step": 14500
    },
    {
      "epoch": 3.1569780100152407,
      "eval_accuracy": 0.9232839838492598,
      "eval_f1_macro": 0.9266276405829272,
      "eval_f1_micro": 0.9232839838492598,
      "eval_loss": 0.29369959235191345,
      "eval_precision_macro": 0.9436536313571009,
      "eval_precision_micro": 0.9232839838492598,
      "eval_recall_macro": 0.9283196203410136,
      "eval_recall_micro": 0.9232839838492598,
      "eval_runtime": 14.7764,
      "eval_samples_per_second": 50.283,
      "eval_steps_per_second": 3.181,
      "step": 14500
    },
    {
      "epoch": 3.265839320705421,
      "grad_norm": 85828.9375,
      "learning_rate": 7.9376114407998e-06,
      "loss": 0.0859,
      "step": 15000
    },
    {
      "epoch": 3.265839320705421,
      "eval_accuracy": 0.9246298788694481,
      "eval_f1_macro": 0.9402166974265765,
      "eval_f1_micro": 0.9246298788694481,
      "eval_loss": 0.17889092862606049,
      "eval_precision_macro": 0.9685045177945787,
      "eval_precision_micro": 0.9246298788694481,
      "eval_recall_macro": 0.9463450172046672,
      "eval_recall_micro": 0.9246298788694481,
      "eval_runtime": 14.7495,
      "eval_samples_per_second": 50.375,
      "eval_steps_per_second": 3.187,
      "step": 15000
    },
    {
      "epoch": 3.374700631395602,
      "grad_norm": 146288.75,
      "learning_rate": 5.813440943640527e-06,
      "loss": 0.0857,
      "step": 15500
    },
    {
      "epoch": 3.374700631395602,
      "eval_accuracy": 0.927321668909825,
      "eval_f1_macro": 0.9434096975688787,
      "eval_f1_micro": 0.927321668909825,
      "eval_loss": 0.16961060464382172,
      "eval_precision_macro": 0.9641802881027017,
      "eval_precision_micro": 0.927321668909825,
      "eval_recall_macro": 0.9472331991452233,
      "eval_recall_micro": 0.927321668909825,
      "eval_runtime": 14.7305,
      "eval_samples_per_second": 50.44,
      "eval_steps_per_second": 3.191,
      "step": 15500
    }
  ],
  "logging_steps": 500,
  "max_steps": 18372,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 6.52555679969065e+16,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}