roberta-large-768-product / trainer_state.json
Quintu's picture
Upload 12 files
c6c569f verified
{
"best_metric": 0.8328847781417223,
"best_model_checkpoint": "output_classification_768/product/checkpoint-9285",
"epoch": 12.999649982499125,
"eval_steps": 500,
"global_step": 9285,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01400070003500175,
"grad_norm": 21.103994369506836,
"learning_rate": 8.403361344537815e-08,
"loss": 2.901,
"step": 10
},
{
"epoch": 0.0280014000700035,
"grad_norm": 24.394515991210938,
"learning_rate": 1.774042950513539e-07,
"loss": 2.8333,
"step": 20
},
{
"epoch": 0.04200210010500525,
"grad_norm": 17.568700790405273,
"learning_rate": 2.7077497665732963e-07,
"loss": 2.8567,
"step": 30
},
{
"epoch": 0.056002800140007,
"grad_norm": 24.28147315979004,
"learning_rate": 3.454715219421102e-07,
"loss": 2.8378,
"step": 40
},
{
"epoch": 0.07000350017500875,
"grad_norm": 19.30731201171875,
"learning_rate": 4.388422035480859e-07,
"loss": 2.8394,
"step": 50
},
{
"epoch": 0.0840042002100105,
"grad_norm": 22.653026580810547,
"learning_rate": 5.322128851540616e-07,
"loss": 2.7691,
"step": 60
},
{
"epoch": 0.09800490024501225,
"grad_norm": 18.282873153686523,
"learning_rate": 6.255835667600374e-07,
"loss": 2.7781,
"step": 70
},
{
"epoch": 0.112005600280014,
"grad_norm": 23.786239624023438,
"learning_rate": 7.096171802054156e-07,
"loss": 2.748,
"step": 80
},
{
"epoch": 0.12600630031501575,
"grad_norm": 49.12445831298828,
"learning_rate": 7.936507936507937e-07,
"loss": 2.5834,
"step": 90
},
{
"epoch": 0.1400070003500175,
"grad_norm": 47.40871047973633,
"learning_rate": 8.870214752567695e-07,
"loss": 2.5132,
"step": 100
},
{
"epoch": 0.15400770038501926,
"grad_norm": 37.690696716308594,
"learning_rate": 9.80392156862745e-07,
"loss": 2.4696,
"step": 110
},
{
"epoch": 0.168008400420021,
"grad_norm": 53.0093879699707,
"learning_rate": 1.0737628384687208e-06,
"loss": 2.3196,
"step": 120
},
{
"epoch": 0.18200910045502275,
"grad_norm": 27.630456924438477,
"learning_rate": 1.1671335200746967e-06,
"loss": 2.4033,
"step": 130
},
{
"epoch": 0.1960098004900245,
"grad_norm": 28.57124137878418,
"learning_rate": 1.2605042016806724e-06,
"loss": 2.2114,
"step": 140
},
{
"epoch": 0.21001050052502626,
"grad_norm": 32.31087875366211,
"learning_rate": 1.353874883286648e-06,
"loss": 2.1544,
"step": 150
},
{
"epoch": 0.224011200560028,
"grad_norm": 29.275854110717773,
"learning_rate": 1.447245564892624e-06,
"loss": 2.4179,
"step": 160
},
{
"epoch": 0.23801190059502975,
"grad_norm": 28.587217330932617,
"learning_rate": 1.5406162464985996e-06,
"loss": 2.2672,
"step": 170
},
{
"epoch": 0.2520126006300315,
"grad_norm": 63.85499954223633,
"learning_rate": 1.6339869281045753e-06,
"loss": 2.1925,
"step": 180
},
{
"epoch": 0.26601330066503326,
"grad_norm": 38.323062896728516,
"learning_rate": 1.727357609710551e-06,
"loss": 2.2907,
"step": 190
},
{
"epoch": 0.280014000700035,
"grad_norm": 36.11345672607422,
"learning_rate": 1.8207282913165267e-06,
"loss": 2.0923,
"step": 200
},
{
"epoch": 0.29401470073503677,
"grad_norm": 31.201120376586914,
"learning_rate": 1.914098972922503e-06,
"loss": 2.2186,
"step": 210
},
{
"epoch": 0.3080154007700385,
"grad_norm": 34.21009063720703,
"learning_rate": 2.0074696545284783e-06,
"loss": 2.326,
"step": 220
},
{
"epoch": 0.32201610080504023,
"grad_norm": 32.68191909790039,
"learning_rate": 2.100840336134454e-06,
"loss": 2.1694,
"step": 230
},
{
"epoch": 0.336016800840042,
"grad_norm": 27.508405685424805,
"learning_rate": 2.1942110177404298e-06,
"loss": 2.028,
"step": 240
},
{
"epoch": 0.35001750087504374,
"grad_norm": 24.13580894470215,
"learning_rate": 2.2875816993464053e-06,
"loss": 2.2815,
"step": 250
},
{
"epoch": 0.3640182009100455,
"grad_norm": 26.199440002441406,
"learning_rate": 2.380952380952381e-06,
"loss": 2.3411,
"step": 260
},
{
"epoch": 0.37801890094504725,
"grad_norm": 36.189361572265625,
"learning_rate": 2.474323062558357e-06,
"loss": 2.2379,
"step": 270
},
{
"epoch": 0.392019600980049,
"grad_norm": 22.63069725036621,
"learning_rate": 2.567693744164332e-06,
"loss": 2.1743,
"step": 280
},
{
"epoch": 0.40602030101505077,
"grad_norm": 28.03191566467285,
"learning_rate": 2.6610644257703085e-06,
"loss": 2.2767,
"step": 290
},
{
"epoch": 0.4200210010500525,
"grad_norm": 32.591068267822266,
"learning_rate": 2.7544351073762845e-06,
"loss": 2.1676,
"step": 300
},
{
"epoch": 0.4340217010850543,
"grad_norm": 23.519025802612305,
"learning_rate": 2.8478057889822595e-06,
"loss": 2.3134,
"step": 310
},
{
"epoch": 0.448022401120056,
"grad_norm": 26.249027252197266,
"learning_rate": 2.9411764705882355e-06,
"loss": 2.3326,
"step": 320
},
{
"epoch": 0.46202310115505774,
"grad_norm": 24.713272094726562,
"learning_rate": 3.034547152194211e-06,
"loss": 2.2709,
"step": 330
},
{
"epoch": 0.4760238011900595,
"grad_norm": 26.74544334411621,
"learning_rate": 3.127917833800187e-06,
"loss": 2.2261,
"step": 340
},
{
"epoch": 0.49002450122506125,
"grad_norm": 32.99632263183594,
"learning_rate": 3.221288515406163e-06,
"loss": 2.0507,
"step": 350
},
{
"epoch": 0.504025201260063,
"grad_norm": 44.55859375,
"learning_rate": 3.3146591970121383e-06,
"loss": 2.1637,
"step": 360
},
{
"epoch": 0.5180259012950648,
"grad_norm": 26.943199157714844,
"learning_rate": 3.4080298786181142e-06,
"loss": 2.2462,
"step": 370
},
{
"epoch": 0.5320266013300665,
"grad_norm": 21.44816780090332,
"learning_rate": 3.5014005602240897e-06,
"loss": 2.1052,
"step": 380
},
{
"epoch": 0.5460273013650683,
"grad_norm": 27.31263542175293,
"learning_rate": 3.5947712418300657e-06,
"loss": 2.1083,
"step": 390
},
{
"epoch": 0.56002800140007,
"grad_norm": 22.418066024780273,
"learning_rate": 3.6881419234360416e-06,
"loss": 1.7936,
"step": 400
},
{
"epoch": 0.5740287014350718,
"grad_norm": 34.41685104370117,
"learning_rate": 3.781512605042017e-06,
"loss": 1.8335,
"step": 410
},
{
"epoch": 0.5880294014700735,
"grad_norm": 27.243928909301758,
"learning_rate": 3.874883286647993e-06,
"loss": 1.8911,
"step": 420
},
{
"epoch": 0.6020301015050753,
"grad_norm": 24.380064010620117,
"learning_rate": 3.968253968253968e-06,
"loss": 1.8737,
"step": 430
},
{
"epoch": 0.616030801540077,
"grad_norm": 26.450056076049805,
"learning_rate": 4.0616246498599444e-06,
"loss": 1.6773,
"step": 440
},
{
"epoch": 0.6300315015750787,
"grad_norm": 28.065898895263672,
"learning_rate": 4.15499533146592e-06,
"loss": 1.675,
"step": 450
},
{
"epoch": 0.6440322016100805,
"grad_norm": 26.523653030395508,
"learning_rate": 4.2483660130718954e-06,
"loss": 1.6823,
"step": 460
},
{
"epoch": 0.6580329016450822,
"grad_norm": 26.737417221069336,
"learning_rate": 4.341736694677872e-06,
"loss": 1.414,
"step": 470
},
{
"epoch": 0.672033601680084,
"grad_norm": 21.968425750732422,
"learning_rate": 4.435107376283847e-06,
"loss": 1.4583,
"step": 480
},
{
"epoch": 0.6860343017150857,
"grad_norm": 23.044065475463867,
"learning_rate": 4.528478057889823e-06,
"loss": 1.6017,
"step": 490
},
{
"epoch": 0.7000350017500875,
"grad_norm": 35.77534866333008,
"learning_rate": 4.621848739495799e-06,
"loss": 1.5726,
"step": 500
},
{
"epoch": 0.7140357017850892,
"grad_norm": 35.47792053222656,
"learning_rate": 4.715219421101775e-06,
"loss": 1.4581,
"step": 510
},
{
"epoch": 0.728036401820091,
"grad_norm": 38.72248077392578,
"learning_rate": 4.80859010270775e-06,
"loss": 1.2325,
"step": 520
},
{
"epoch": 0.7420371018550928,
"grad_norm": 29.20247459411621,
"learning_rate": 4.901960784313726e-06,
"loss": 1.5076,
"step": 530
},
{
"epoch": 0.7560378018900945,
"grad_norm": 29.444625854492188,
"learning_rate": 4.995331465919702e-06,
"loss": 1.2232,
"step": 540
},
{
"epoch": 0.7700385019250963,
"grad_norm": 27.85977554321289,
"learning_rate": 5.088702147525677e-06,
"loss": 1.0447,
"step": 550
},
{
"epoch": 0.784039201960098,
"grad_norm": 29.185314178466797,
"learning_rate": 5.182072829131654e-06,
"loss": 1.0293,
"step": 560
},
{
"epoch": 0.7980399019950998,
"grad_norm": 17.427093505859375,
"learning_rate": 5.275443510737629e-06,
"loss": 1.1785,
"step": 570
},
{
"epoch": 0.8120406020301015,
"grad_norm": 36.874305725097656,
"learning_rate": 5.368814192343604e-06,
"loss": 1.1096,
"step": 580
},
{
"epoch": 0.8260413020651033,
"grad_norm": 25.060104370117188,
"learning_rate": 5.4621848739495795e-06,
"loss": 0.8118,
"step": 590
},
{
"epoch": 0.840042002100105,
"grad_norm": 41.775665283203125,
"learning_rate": 5.555555555555557e-06,
"loss": 1.1449,
"step": 600
},
{
"epoch": 0.8540427021351068,
"grad_norm": 28.053466796875,
"learning_rate": 5.648926237161531e-06,
"loss": 1.0124,
"step": 610
},
{
"epoch": 0.8680434021701086,
"grad_norm": 18.944034576416016,
"learning_rate": 5.742296918767507e-06,
"loss": 0.6939,
"step": 620
},
{
"epoch": 0.8820441022051102,
"grad_norm": 35.93363952636719,
"learning_rate": 5.835667600373483e-06,
"loss": 0.9968,
"step": 630
},
{
"epoch": 0.896044802240112,
"grad_norm": 31.607877731323242,
"learning_rate": 5.929038281979459e-06,
"loss": 0.9672,
"step": 640
},
{
"epoch": 0.9100455022751137,
"grad_norm": 24.163589477539062,
"learning_rate": 6.022408963585434e-06,
"loss": 1.1201,
"step": 650
},
{
"epoch": 0.9240462023101155,
"grad_norm": 32.112239837646484,
"learning_rate": 6.1157796451914105e-06,
"loss": 0.9118,
"step": 660
},
{
"epoch": 0.9380469023451172,
"grad_norm": 23.415752410888672,
"learning_rate": 6.209150326797386e-06,
"loss": 0.8296,
"step": 670
},
{
"epoch": 0.952047602380119,
"grad_norm": 16.017988204956055,
"learning_rate": 6.3025210084033615e-06,
"loss": 0.8337,
"step": 680
},
{
"epoch": 0.9660483024151207,
"grad_norm": 20.97634506225586,
"learning_rate": 6.395891690009337e-06,
"loss": 0.6947,
"step": 690
},
{
"epoch": 0.9800490024501225,
"grad_norm": 21.921537399291992,
"learning_rate": 6.489262371615313e-06,
"loss": 0.7419,
"step": 700
},
{
"epoch": 0.9940497024851243,
"grad_norm": 23.830955505371094,
"learning_rate": 6.582633053221289e-06,
"loss": 0.6673,
"step": 710
},
{
"epoch": 0.999649982499125,
"eval_f1": 0.7134617992065114,
"eval_loss": 0.7731016874313354,
"eval_precision": 0.7414465080254933,
"eval_recall": 0.7123862841147656,
"eval_runtime": 85.6445,
"eval_samples_per_second": 16.685,
"eval_steps_per_second": 8.348,
"step": 714
},
{
"epoch": 1.008050402520126,
"grad_norm": 41.797821044921875,
"learning_rate": 6.676003734827264e-06,
"loss": 0.9137,
"step": 720
},
{
"epoch": 1.0220511025551278,
"grad_norm": 22.665904998779297,
"learning_rate": 6.769374416433241e-06,
"loss": 0.7504,
"step": 730
},
{
"epoch": 1.0360518025901295,
"grad_norm": 24.145736694335938,
"learning_rate": 6.862745098039216e-06,
"loss": 0.7768,
"step": 740
},
{
"epoch": 1.0500525026251313,
"grad_norm": 34.773475646972656,
"learning_rate": 6.956115779645192e-06,
"loss": 0.7251,
"step": 750
},
{
"epoch": 1.064053202660133,
"grad_norm": 52.297454833984375,
"learning_rate": 7.049486461251168e-06,
"loss": 0.9331,
"step": 760
},
{
"epoch": 1.0780539026951348,
"grad_norm": 24.283205032348633,
"learning_rate": 7.1428571428571436e-06,
"loss": 0.8233,
"step": 770
},
{
"epoch": 1.0920546027301365,
"grad_norm": 50.00333786010742,
"learning_rate": 7.236227824463119e-06,
"loss": 0.8773,
"step": 780
},
{
"epoch": 1.1060553027651383,
"grad_norm": 15.482346534729004,
"learning_rate": 7.3295985060690946e-06,
"loss": 0.6958,
"step": 790
},
{
"epoch": 1.12005600280014,
"grad_norm": 22.290390014648438,
"learning_rate": 7.422969187675071e-06,
"loss": 0.4332,
"step": 800
},
{
"epoch": 1.1340567028351418,
"grad_norm": 31.88663673400879,
"learning_rate": 7.516339869281046e-06,
"loss": 0.6877,
"step": 810
},
{
"epoch": 1.1480574028701436,
"grad_norm": 35.20397186279297,
"learning_rate": 7.609710550887022e-06,
"loss": 0.6279,
"step": 820
},
{
"epoch": 1.1620581029051453,
"grad_norm": 18.677602767944336,
"learning_rate": 7.703081232492997e-06,
"loss": 0.7173,
"step": 830
},
{
"epoch": 1.176058802940147,
"grad_norm": 19.016010284423828,
"learning_rate": 7.796451914098973e-06,
"loss": 0.4821,
"step": 840
},
{
"epoch": 1.1900595029751488,
"grad_norm": 12.054780006408691,
"learning_rate": 7.889822595704948e-06,
"loss": 0.5104,
"step": 850
},
{
"epoch": 1.2040602030101506,
"grad_norm": 22.70314598083496,
"learning_rate": 7.983193277310926e-06,
"loss": 0.5903,
"step": 860
},
{
"epoch": 1.2180609030451524,
"grad_norm": 70.0879135131836,
"learning_rate": 8.076563958916901e-06,
"loss": 0.7494,
"step": 870
},
{
"epoch": 1.232061603080154,
"grad_norm": 23.438417434692383,
"learning_rate": 8.169934640522877e-06,
"loss": 0.5092,
"step": 880
},
{
"epoch": 1.2460623031151559,
"grad_norm": 23.705568313598633,
"learning_rate": 8.263305322128852e-06,
"loss": 0.4623,
"step": 890
},
{
"epoch": 1.2600630031501576,
"grad_norm": 54.969390869140625,
"learning_rate": 8.356676003734828e-06,
"loss": 0.6911,
"step": 900
},
{
"epoch": 1.2740637031851594,
"grad_norm": 36.40383529663086,
"learning_rate": 8.450046685340803e-06,
"loss": 0.6824,
"step": 910
},
{
"epoch": 1.2880644032201611,
"grad_norm": 29.539731979370117,
"learning_rate": 8.543417366946779e-06,
"loss": 0.8295,
"step": 920
},
{
"epoch": 1.302065103255163,
"grad_norm": 31.277971267700195,
"learning_rate": 8.636788048552756e-06,
"loss": 0.7891,
"step": 930
},
{
"epoch": 1.3160658032901644,
"grad_norm": 52.39353561401367,
"learning_rate": 8.730158730158731e-06,
"loss": 0.7638,
"step": 940
},
{
"epoch": 1.3300665033251662,
"grad_norm": 25.218196868896484,
"learning_rate": 8.823529411764707e-06,
"loss": 0.6171,
"step": 950
},
{
"epoch": 1.344067203360168,
"grad_norm": 41.754459381103516,
"learning_rate": 8.916900093370682e-06,
"loss": 0.6163,
"step": 960
},
{
"epoch": 1.3580679033951697,
"grad_norm": 25.492141723632812,
"learning_rate": 9.010270774976658e-06,
"loss": 0.5825,
"step": 970
},
{
"epoch": 1.3720686034301715,
"grad_norm": 46.11489486694336,
"learning_rate": 9.103641456582633e-06,
"loss": 0.8171,
"step": 980
},
{
"epoch": 1.3860693034651732,
"grad_norm": 42.723350524902344,
"learning_rate": 9.197012138188609e-06,
"loss": 0.5439,
"step": 990
},
{
"epoch": 1.400070003500175,
"grad_norm": 28.26615333557129,
"learning_rate": 9.281045751633987e-06,
"loss": 0.7337,
"step": 1000
},
{
"epoch": 1.4140707035351767,
"grad_norm": 23.162817001342773,
"learning_rate": 9.374416433239963e-06,
"loss": 0.7653,
"step": 1010
},
{
"epoch": 1.4280714035701785,
"grad_norm": 38.257198333740234,
"learning_rate": 9.467787114845938e-06,
"loss": 0.5201,
"step": 1020
},
{
"epoch": 1.4420721036051802,
"grad_norm": 31.445514678955078,
"learning_rate": 9.561157796451916e-06,
"loss": 0.5803,
"step": 1030
},
{
"epoch": 1.456072803640182,
"grad_norm": 64.94223022460938,
"learning_rate": 9.654528478057891e-06,
"loss": 0.5172,
"step": 1040
},
{
"epoch": 1.4700735036751837,
"grad_norm": 36.16607666015625,
"learning_rate": 9.747899159663867e-06,
"loss": 0.4673,
"step": 1050
},
{
"epoch": 1.4840742037101855,
"grad_norm": 13.556483268737793,
"learning_rate": 9.841269841269842e-06,
"loss": 0.7821,
"step": 1060
},
{
"epoch": 1.4980749037451873,
"grad_norm": 31.29158592224121,
"learning_rate": 9.934640522875818e-06,
"loss": 0.5207,
"step": 1070
},
{
"epoch": 1.512075603780189,
"grad_norm": 13.835955619812012,
"learning_rate": 9.999997609887913e-06,
"loss": 0.5909,
"step": 1080
},
{
"epoch": 1.5260763038151908,
"grad_norm": 64.40877532958984,
"learning_rate": 9.999955119069914e-06,
"loss": 0.6024,
"step": 1090
},
{
"epoch": 1.5400770038501925,
"grad_norm": 20.61574363708496,
"learning_rate": 9.999859515169501e-06,
"loss": 0.5886,
"step": 1100
},
{
"epoch": 1.5540777038851943,
"grad_norm": 71.72521209716797,
"learning_rate": 9.999710799202244e-06,
"loss": 0.7722,
"step": 1110
},
{
"epoch": 1.568078403920196,
"grad_norm": 49.93646240234375,
"learning_rate": 9.999508972747914e-06,
"loss": 0.5671,
"step": 1120
},
{
"epoch": 1.5820791039551978,
"grad_norm": 42.75546646118164,
"learning_rate": 9.999254037950452e-06,
"loss": 0.6359,
"step": 1130
},
{
"epoch": 1.5960798039901996,
"grad_norm": 33.113807678222656,
"learning_rate": 9.998945997517957e-06,
"loss": 0.5196,
"step": 1140
},
{
"epoch": 1.6100805040252013,
"grad_norm": 37.63560104370117,
"learning_rate": 9.998584854722655e-06,
"loss": 0.6019,
"step": 1150
},
{
"epoch": 1.624081204060203,
"grad_norm": 16.140146255493164,
"learning_rate": 9.998170613400862e-06,
"loss": 0.4827,
"step": 1160
},
{
"epoch": 1.6380819040952046,
"grad_norm": 47.94279861450195,
"learning_rate": 9.997703277952944e-06,
"loss": 0.6199,
"step": 1170
},
{
"epoch": 1.6520826041302064,
"grad_norm": 3.9183614253997803,
"learning_rate": 9.99718285334327e-06,
"loss": 0.5419,
"step": 1180
},
{
"epoch": 1.6660833041652081,
"grad_norm": 9.321074485778809,
"learning_rate": 9.99660934510016e-06,
"loss": 0.3374,
"step": 1190
},
{
"epoch": 1.6800840042002099,
"grad_norm": 37.75178909301758,
"learning_rate": 9.99598275931583e-06,
"loss": 0.5618,
"step": 1200
},
{
"epoch": 1.6940847042352116,
"grad_norm": 22.888437271118164,
"learning_rate": 9.995303102646316e-06,
"loss": 0.4591,
"step": 1210
},
{
"epoch": 1.7080854042702134,
"grad_norm": 32.00013732910156,
"learning_rate": 9.99457038231142e-06,
"loss": 0.4901,
"step": 1220
},
{
"epoch": 1.7220861043052151,
"grad_norm": 33.478763580322266,
"learning_rate": 9.993784606094612e-06,
"loss": 0.3999,
"step": 1230
},
{
"epoch": 1.736086804340217,
"grad_norm": 16.17192840576172,
"learning_rate": 9.992945782342973e-06,
"loss": 0.5772,
"step": 1240
},
{
"epoch": 1.7500875043752186,
"grad_norm": 13.883872985839844,
"learning_rate": 9.992053919967084e-06,
"loss": 0.481,
"step": 1250
},
{
"epoch": 1.7640882044102204,
"grad_norm": 45.30279541015625,
"learning_rate": 9.99110902844094e-06,
"loss": 0.5014,
"step": 1260
},
{
"epoch": 1.7780889044452222,
"grad_norm": 22.155324935913086,
"learning_rate": 9.990111117801852e-06,
"loss": 0.7349,
"step": 1270
},
{
"epoch": 1.792089604480224,
"grad_norm": 44.108768463134766,
"learning_rate": 9.989060198650337e-06,
"loss": 0.5384,
"step": 1280
},
{
"epoch": 1.8060903045152257,
"grad_norm": 35.5286865234375,
"learning_rate": 9.987956282150012e-06,
"loss": 0.6392,
"step": 1290
},
{
"epoch": 1.8200910045502274,
"grad_norm": 25.29277229309082,
"learning_rate": 9.986799380027454e-06,
"loss": 0.4703,
"step": 1300
},
{
"epoch": 1.8340917045852292,
"grad_norm": 50.539161682128906,
"learning_rate": 9.985589504572109e-06,
"loss": 0.598,
"step": 1310
},
{
"epoch": 1.848092404620231,
"grad_norm": 37.761070251464844,
"learning_rate": 9.984326668636131e-06,
"loss": 0.6036,
"step": 1320
},
{
"epoch": 1.8620931046552327,
"grad_norm": 28.276853561401367,
"learning_rate": 9.983010885634263e-06,
"loss": 0.7649,
"step": 1330
},
{
"epoch": 1.8760938046902345,
"grad_norm": 46.96693801879883,
"learning_rate": 9.981642169543691e-06,
"loss": 0.7382,
"step": 1340
},
{
"epoch": 1.8900945047252362,
"grad_norm": 32.133148193359375,
"learning_rate": 9.980220534903889e-06,
"loss": 0.774,
"step": 1350
},
{
"epoch": 1.904095204760238,
"grad_norm": 30.42446517944336,
"learning_rate": 9.978745996816473e-06,
"loss": 0.554,
"step": 1360
},
{
"epoch": 1.9180959047952397,
"grad_norm": 37.114498138427734,
"learning_rate": 9.977218570945036e-06,
"loss": 0.5324,
"step": 1370
},
{
"epoch": 1.9320966048302415,
"grad_norm": 24.40557098388672,
"learning_rate": 9.975638273514981e-06,
"loss": 0.4989,
"step": 1380
},
{
"epoch": 1.9460973048652432,
"grad_norm": 41.14036560058594,
"learning_rate": 9.974005121313356e-06,
"loss": 0.5818,
"step": 1390
},
{
"epoch": 1.960098004900245,
"grad_norm": 48.84564971923828,
"learning_rate": 9.972319131688666e-06,
"loss": 0.8088,
"step": 1400
},
{
"epoch": 1.9740987049352468,
"grad_norm": 43.53080368041992,
"learning_rate": 9.97058032255069e-06,
"loss": 0.3958,
"step": 1410
},
{
"epoch": 1.9880994049702485,
"grad_norm": 13.485721588134766,
"learning_rate": 9.968788712370296e-06,
"loss": 0.5372,
"step": 1420
},
{
"epoch": 1.99929996499825,
"eval_f1": 0.7711498702842724,
"eval_loss": 0.5365688800811768,
"eval_precision": 0.7706806893257392,
"eval_recall": 0.7711686494051785,
"eval_runtime": 85.6938,
"eval_samples_per_second": 16.676,
"eval_steps_per_second": 8.344,
"step": 1428
},
{
"epoch": 2.0021001050052503,
"grad_norm": 10.337446212768555,
"learning_rate": 9.966944320179247e-06,
"loss": 0.6646,
"step": 1430
},
{
"epoch": 2.016100805040252,
"grad_norm": 14.425756454467773,
"learning_rate": 9.965047165569985e-06,
"loss": 0.4885,
"step": 1440
},
{
"epoch": 2.0301015050752538,
"grad_norm": 42.77712631225586,
"learning_rate": 9.96309726869544e-06,
"loss": 0.3497,
"step": 1450
},
{
"epoch": 2.0441022051102555,
"grad_norm": 26.410226821899414,
"learning_rate": 9.961094650268803e-06,
"loss": 0.3965,
"step": 1460
},
{
"epoch": 2.0581029051452573,
"grad_norm": 27.177295684814453,
"learning_rate": 9.959039331563315e-06,
"loss": 0.5012,
"step": 1470
},
{
"epoch": 2.072103605180259,
"grad_norm": 13.967148780822754,
"learning_rate": 9.95693133441203e-06,
"loss": 0.4312,
"step": 1480
},
{
"epoch": 2.086104305215261,
"grad_norm": 38.66386032104492,
"learning_rate": 9.954770681207597e-06,
"loss": 0.3715,
"step": 1490
},
{
"epoch": 2.1001050052502626,
"grad_norm": 1.9330135583877563,
"learning_rate": 9.952557394902013e-06,
"loss": 0.366,
"step": 1500
},
{
"epoch": 2.1141057052852643,
"grad_norm": 22.95920753479004,
"learning_rate": 9.95029149900638e-06,
"loss": 0.2283,
"step": 1510
},
{
"epoch": 2.128106405320266,
"grad_norm": 25.927152633666992,
"learning_rate": 9.947973017590655e-06,
"loss": 0.3684,
"step": 1520
},
{
"epoch": 2.142107105355268,
"grad_norm": 9.549031257629395,
"learning_rate": 9.9456019752834e-06,
"loss": 0.3353,
"step": 1530
},
{
"epoch": 2.1561078053902696,
"grad_norm": 16.495037078857422,
"learning_rate": 9.943178397271513e-06,
"loss": 0.4955,
"step": 1540
},
{
"epoch": 2.1701085054252713,
"grad_norm": 9.032498359680176,
"learning_rate": 9.940702309299968e-06,
"loss": 0.234,
"step": 1550
},
{
"epoch": 2.184109205460273,
"grad_norm": 39.27081298828125,
"learning_rate": 9.938173737671531e-06,
"loss": 0.6408,
"step": 1560
},
{
"epoch": 2.198109905495275,
"grad_norm": 22.681249618530273,
"learning_rate": 9.935592709246489e-06,
"loss": 0.3353,
"step": 1570
},
{
"epoch": 2.2121106055302766,
"grad_norm": 35.17754364013672,
"learning_rate": 9.932959251442366e-06,
"loss": 0.4544,
"step": 1580
},
{
"epoch": 2.2261113055652784,
"grad_norm": 31.855209350585938,
"learning_rate": 9.930273392233624e-06,
"loss": 0.4868,
"step": 1590
},
{
"epoch": 2.24011200560028,
"grad_norm": 24.157930374145508,
"learning_rate": 9.92753516015137e-06,
"loss": 0.3609,
"step": 1600
},
{
"epoch": 2.254112705635282,
"grad_norm": 22.68416404724121,
"learning_rate": 9.92474458428306e-06,
"loss": 0.3278,
"step": 1610
},
{
"epoch": 2.2681134056702836,
"grad_norm": 4.546386241912842,
"learning_rate": 9.92190169427217e-06,
"loss": 0.3899,
"step": 1620
},
{
"epoch": 2.2821141057052854,
"grad_norm": 14.179669380187988,
"learning_rate": 9.919006520317903e-06,
"loss": 0.1604,
"step": 1630
},
{
"epoch": 2.296114805740287,
"grad_norm": 32.7485237121582,
"learning_rate": 9.916059093174862e-06,
"loss": 0.3212,
"step": 1640
},
{
"epoch": 2.310115505775289,
"grad_norm": 31.538394927978516,
"learning_rate": 9.913059444152711e-06,
"loss": 0.2861,
"step": 1650
},
{
"epoch": 2.3241162058102907,
"grad_norm": 49.36025619506836,
"learning_rate": 9.910007605115861e-06,
"loss": 0.4726,
"step": 1660
},
{
"epoch": 2.3381169058452924,
"grad_norm": 32.57158660888672,
"learning_rate": 9.906903608483116e-06,
"loss": 0.3142,
"step": 1670
},
{
"epoch": 2.352117605880294,
"grad_norm": 56.48556900024414,
"learning_rate": 9.903747487227339e-06,
"loss": 0.389,
"step": 1680
},
{
"epoch": 2.366118305915296,
"grad_norm": 20.73760414123535,
"learning_rate": 9.900539274875098e-06,
"loss": 0.2501,
"step": 1690
},
{
"epoch": 2.3801190059502977,
"grad_norm": 24.18535614013672,
"learning_rate": 9.897279005506306e-06,
"loss": 0.348,
"step": 1700
},
{
"epoch": 2.3941197059852994,
"grad_norm": 31.34202766418457,
"learning_rate": 9.893966713753864e-06,
"loss": 0.2551,
"step": 1710
},
{
"epoch": 2.408120406020301,
"grad_norm": 36.77047348022461,
"learning_rate": 9.890602434803296e-06,
"loss": 0.3296,
"step": 1720
},
{
"epoch": 2.422121106055303,
"grad_norm": 30.424179077148438,
"learning_rate": 9.887186204392368e-06,
"loss": 0.4624,
"step": 1730
},
{
"epoch": 2.4361218060903047,
"grad_norm": 37.91468811035156,
"learning_rate": 9.883718058810708e-06,
"loss": 0.605,
"step": 1740
},
{
"epoch": 2.4501225061253065,
"grad_norm": 20.114219665527344,
"learning_rate": 9.880198034899428e-06,
"loss": 0.5256,
"step": 1750
},
{
"epoch": 2.464123206160308,
"grad_norm": 32.55492401123047,
"learning_rate": 9.87662617005073e-06,
"loss": 0.4726,
"step": 1760
},
{
"epoch": 2.47812390619531,
"grad_norm": 24.271276473999023,
"learning_rate": 9.873002502207502e-06,
"loss": 0.2098,
"step": 1770
},
{
"epoch": 2.4921246062303117,
"grad_norm": 10.173416137695312,
"learning_rate": 9.869327069862924e-06,
"loss": 0.4108,
"step": 1780
},
{
"epoch": 2.5061253062653135,
"grad_norm": 22.572994232177734,
"learning_rate": 9.865599912060058e-06,
"loss": 0.3146,
"step": 1790
},
{
"epoch": 2.5201260063003152,
"grad_norm": 34.47822952270508,
"learning_rate": 9.861821068391424e-06,
"loss": 0.2967,
"step": 1800
},
{
"epoch": 2.534126706335317,
"grad_norm": 51.81456756591797,
"learning_rate": 9.857990578998589e-06,
"loss": 0.4418,
"step": 1810
},
{
"epoch": 2.5481274063703188,
"grad_norm": 29.156999588012695,
"learning_rate": 9.85410848457174e-06,
"loss": 0.2793,
"step": 1820
},
{
"epoch": 2.5621281064053205,
"grad_norm": 39.23643493652344,
"learning_rate": 9.850174826349246e-06,
"loss": 0.44,
"step": 1830
},
{
"epoch": 2.5761288064403223,
"grad_norm": 17.66337013244629,
"learning_rate": 9.846189646117224e-06,
"loss": 0.2916,
"step": 1840
},
{
"epoch": 2.590129506475324,
"grad_norm": 4.114482879638672,
"learning_rate": 9.842152986209098e-06,
"loss": 0.2428,
"step": 1850
},
{
"epoch": 2.604130206510326,
"grad_norm": 31.08041763305664,
"learning_rate": 9.83806488950514e-06,
"loss": 0.3134,
"step": 1860
},
{
"epoch": 2.6181309065453275,
"grad_norm": 34.51068115234375,
"learning_rate": 9.833925399432026e-06,
"loss": 0.3659,
"step": 1870
},
{
"epoch": 2.632131606580329,
"grad_norm": 6.492781639099121,
"learning_rate": 9.829734559962365e-06,
"loss": 0.4468,
"step": 1880
},
{
"epoch": 2.6461323066153306,
"grad_norm": 10.110315322875977,
"learning_rate": 9.825492415614235e-06,
"loss": 0.2452,
"step": 1890
},
{
"epoch": 2.6601330066503324,
"grad_norm": 27.26378631591797,
"learning_rate": 9.821199011450717e-06,
"loss": 0.1747,
"step": 1900
},
{
"epoch": 2.674133706685334,
"grad_norm": 1.9608994722366333,
"learning_rate": 9.816854393079402e-06,
"loss": 0.3288,
"step": 1910
},
{
"epoch": 2.688134406720336,
"grad_norm": 36.11700439453125,
"learning_rate": 9.812458606651922e-06,
"loss": 0.3384,
"step": 1920
},
{
"epoch": 2.7021351067553376,
"grad_norm": 11.049883842468262,
"learning_rate": 9.808011698863449e-06,
"loss": 0.5968,
"step": 1930
},
{
"epoch": 2.7161358067903394,
"grad_norm": 20.612592697143555,
"learning_rate": 9.803513716952203e-06,
"loss": 0.3655,
"step": 1940
},
{
"epoch": 2.730136506825341,
"grad_norm": 4.569857597351074,
"learning_rate": 9.798964708698947e-06,
"loss": 0.1961,
"step": 1950
},
{
"epoch": 2.744137206860343,
"grad_norm": 19.752294540405273,
"learning_rate": 9.794364722426488e-06,
"loss": 0.328,
"step": 1960
},
{
"epoch": 2.7581379068953447,
"grad_norm": 29.28534507751465,
"learning_rate": 9.789713806999154e-06,
"loss": 0.4217,
"step": 1970
},
{
"epoch": 2.7721386069303464,
"grad_norm": 38.84592056274414,
"learning_rate": 9.78501201182228e-06,
"loss": 0.3804,
"step": 1980
},
{
"epoch": 2.786139306965348,
"grad_norm": 30.860523223876953,
"learning_rate": 9.780259386841678e-06,
"loss": 0.3784,
"step": 1990
},
{
"epoch": 2.80014000700035,
"grad_norm": 0.2881031632423401,
"learning_rate": 9.775455982543116e-06,
"loss": 0.2319,
"step": 2000
},
{
"epoch": 2.8141407070353517,
"grad_norm": 9.092655181884766,
"learning_rate": 9.770601849951776e-06,
"loss": 0.2303,
"step": 2010
},
{
"epoch": 2.8281414070703534,
"grad_norm": 21.23202896118164,
"learning_rate": 9.765697040631703e-06,
"loss": 0.3774,
"step": 2020
},
{
"epoch": 2.842142107105355,
"grad_norm": 15.877593994140625,
"learning_rate": 9.760741606685282e-06,
"loss": 0.4424,
"step": 2030
},
{
"epoch": 2.856142807140357,
"grad_norm": 24.96131134033203,
"learning_rate": 9.755735600752652e-06,
"loss": 0.415,
"step": 2040
},
{
"epoch": 2.8701435071753587,
"grad_norm": 46.10403060913086,
"learning_rate": 9.750679076011175e-06,
"loss": 0.3205,
"step": 2050
},
{
"epoch": 2.8841442072103605,
"grad_norm": 49.756649017333984,
"learning_rate": 9.745572086174857e-06,
"loss": 0.4478,
"step": 2060
},
{
"epoch": 2.8981449072453622,
"grad_norm": 42.11957931518555,
"learning_rate": 9.740414685493777e-06,
"loss": 0.4191,
"step": 2070
},
{
"epoch": 2.912145607280364,
"grad_norm": 31.03801918029785,
"learning_rate": 9.735206928753518e-06,
"loss": 0.2388,
"step": 2080
},
{
"epoch": 2.9261463073153657,
"grad_norm": 47.63282775878906,
"learning_rate": 9.729948871274579e-06,
"loss": 0.6498,
"step": 2090
},
{
"epoch": 2.9401470073503675,
"grad_norm": 28.83293342590332,
"learning_rate": 9.724640568911788e-06,
"loss": 0.3742,
"step": 2100
},
{
"epoch": 2.9541477073853692,
"grad_norm": 5.075782775878906,
"learning_rate": 9.719282078053713e-06,
"loss": 0.361,
"step": 2110
},
{
"epoch": 2.968148407420371,
"grad_norm": 15.41346263885498,
"learning_rate": 9.713873455622058e-06,
"loss": 0.268,
"step": 2120
},
{
"epoch": 2.9821491074553728,
"grad_norm": 22.061138153076172,
"learning_rate": 9.70841475907106e-06,
"loss": 0.5841,
"step": 2130
},
{
"epoch": 2.9961498074903745,
"grad_norm": 29.209182739257812,
"learning_rate": 9.702906046386878e-06,
"loss": 0.285,
"step": 2140
},
{
"epoch": 2.998949947497375,
"eval_f1": 0.7860625313331145,
"eval_loss": 0.5379942655563354,
"eval_precision": 0.7910525243809028,
"eval_recall": 0.7858642407277817,
"eval_runtime": 85.7771,
"eval_samples_per_second": 16.659,
"eval_steps_per_second": 8.336,
"step": 2142
},
{
"epoch": 3.0101505075253763,
"grad_norm": 25.932525634765625,
"learning_rate": 9.69734737608698e-06,
"loss": 0.3992,
"step": 2150
},
{
"epoch": 3.024151207560378,
"grad_norm": 8.238982200622559,
"learning_rate": 9.692301907847981e-06,
"loss": 0.2451,
"step": 2160
},
{
"epoch": 3.03815190759538,
"grad_norm": 20.88953971862793,
"learning_rate": 9.686648481193994e-06,
"loss": 0.2381,
"step": 2170
},
{
"epoch": 3.0521526076303815,
"grad_norm": 52.287017822265625,
"learning_rate": 9.68094526962372e-06,
"loss": 0.2175,
"step": 2180
},
{
"epoch": 3.0661533076653833,
"grad_norm": 14.914299011230469,
"learning_rate": 9.675192333720735e-06,
"loss": 0.2471,
"step": 2190
},
{
"epoch": 3.080154007700385,
"grad_norm": 27.594337463378906,
"learning_rate": 9.669389734596819e-06,
"loss": 0.3744,
"step": 2200
},
{
"epoch": 3.094154707735387,
"grad_norm": 37.43633270263672,
"learning_rate": 9.66353753389131e-06,
"loss": 0.1572,
"step": 2210
},
{
"epoch": 3.1081554077703886,
"grad_norm": 39.02608871459961,
"learning_rate": 9.65763579377045e-06,
"loss": 0.1469,
"step": 2220
},
{
"epoch": 3.1221561078053903,
"grad_norm": 1.7573050260543823,
"learning_rate": 9.651684576926721e-06,
"loss": 0.2838,
"step": 2230
},
{
"epoch": 3.136156807840392,
"grad_norm": 19.469436645507812,
"learning_rate": 9.645683946578189e-06,
"loss": 0.3036,
"step": 2240
},
{
"epoch": 3.150157507875394,
"grad_norm": 19.4732723236084,
"learning_rate": 9.639633966467817e-06,
"loss": 0.2174,
"step": 2250
},
{
"epoch": 3.1641582079103956,
"grad_norm": 36.7767448425293,
"learning_rate": 9.633534700862804e-06,
"loss": 0.2017,
"step": 2260
},
{
"epoch": 3.1781589079453973,
"grad_norm": 27.999431610107422,
"learning_rate": 9.627386214553886e-06,
"loss": 0.2658,
"step": 2270
},
{
"epoch": 3.192159607980399,
"grad_norm": 29.238040924072266,
"learning_rate": 9.621188572854668e-06,
"loss": 0.1646,
"step": 2280
},
{
"epoch": 3.206160308015401,
"grad_norm": 22.505971908569336,
"learning_rate": 9.614941841600905e-06,
"loss": 0.1813,
"step": 2290
},
{
"epoch": 3.2201610080504026,
"grad_norm": 3.6057894229888916,
"learning_rate": 9.608646087149826e-06,
"loss": 0.101,
"step": 2300
},
{
"epoch": 3.2341617080854044,
"grad_norm": 30.187728881835938,
"learning_rate": 9.60230137637942e-06,
"loss": 0.2003,
"step": 2310
},
{
"epoch": 3.248162408120406,
"grad_norm": 39.583683013916016,
"learning_rate": 9.595907776687715e-06,
"loss": 0.2168,
"step": 2320
},
{
"epoch": 3.262163108155408,
"grad_norm": 8.797574996948242,
"learning_rate": 9.58946535599208e-06,
"loss": 0.243,
"step": 2330
},
{
"epoch": 3.2761638081904096,
"grad_norm": 42.47231674194336,
"learning_rate": 9.582974182728497e-06,
"loss": 0.269,
"step": 2340
},
{
"epoch": 3.2901645082254114,
"grad_norm": 38.17387008666992,
"learning_rate": 9.576434325850824e-06,
"loss": 0.3031,
"step": 2350
},
{
"epoch": 3.304165208260413,
"grad_norm": 1.5917739868164062,
"learning_rate": 9.56984585483008e-06,
"loss": 0.3918,
"step": 2360
},
{
"epoch": 3.318165908295415,
"grad_norm": 27.6890869140625,
"learning_rate": 9.56320883965369e-06,
"loss": 0.2479,
"step": 2370
},
{
"epoch": 3.3321666083304167,
"grad_norm": 21.0488224029541,
"learning_rate": 9.556523350824759e-06,
"loss": 0.186,
"step": 2380
},
{
"epoch": 3.3461673083654184,
"grad_norm": 1.7114077806472778,
"learning_rate": 9.549789459361303e-06,
"loss": 0.418,
"step": 2390
},
{
"epoch": 3.36016800840042,
"grad_norm": 23.43485450744629,
"learning_rate": 9.543007236795513e-06,
"loss": 0.1727,
"step": 2400
},
{
"epoch": 3.374168708435422,
"grad_norm": 37.15725326538086,
"learning_rate": 9.536176755172988e-06,
"loss": 0.1121,
"step": 2410
},
{
"epoch": 3.3881694084704237,
"grad_norm": 11.059165954589844,
"learning_rate": 9.52929808705196e-06,
"loss": 0.2375,
"step": 2420
},
{
"epoch": 3.4021701085054254,
"grad_norm": 72.36428833007812,
"learning_rate": 9.522371305502542e-06,
"loss": 0.2272,
"step": 2430
},
{
"epoch": 3.416170808540427,
"grad_norm": 0.8613013029098511,
"learning_rate": 9.515396484105938e-06,
"loss": 0.0909,
"step": 2440
},
{
"epoch": 3.430171508575429,
"grad_norm": 0.07998291403055191,
"learning_rate": 9.508373696953664e-06,
"loss": 0.1642,
"step": 2450
},
{
"epoch": 3.4441722086104303,
"grad_norm": 5.251307010650635,
"learning_rate": 9.501303018646766e-06,
"loss": 0.0964,
"step": 2460
},
{
"epoch": 3.458172908645432,
"grad_norm": 0.03508025407791138,
"learning_rate": 9.494184524295023e-06,
"loss": 0.3244,
"step": 2470
},
{
"epoch": 3.472173608680434,
"grad_norm": 22.236034393310547,
"learning_rate": 9.487018289516146e-06,
"loss": 0.2749,
"step": 2480
},
{
"epoch": 3.4861743087154355,
"grad_norm": 5.1189703941345215,
"learning_rate": 9.479804390434983e-06,
"loss": 0.2798,
"step": 2490
},
{
"epoch": 3.5001750087504373,
"grad_norm": 0.6922470331192017,
"learning_rate": 9.472542903682708e-06,
"loss": 0.3294,
"step": 2500
},
{
"epoch": 3.514175708785439,
"grad_norm": 1.1609469652175903,
"learning_rate": 9.465233906395998e-06,
"loss": 0.3309,
"step": 2510
},
{
"epoch": 3.528176408820441,
"grad_norm": 1.238772988319397,
"learning_rate": 9.457877476216228e-06,
"loss": 0.1799,
"step": 2520
},
{
"epoch": 3.5421771088554426,
"grad_norm": 5.87846040725708,
"learning_rate": 9.450473691288637e-06,
"loss": 0.2434,
"step": 2530
},
{
"epoch": 3.5561778088904443,
"grad_norm": 3.064596176147461,
"learning_rate": 9.443022630261495e-06,
"loss": 0.1289,
"step": 2540
},
{
"epoch": 3.570178508925446,
"grad_norm": 12.347147941589355,
"learning_rate": 9.435524372285279e-06,
"loss": 0.2065,
"step": 2550
},
{
"epoch": 3.584179208960448,
"grad_norm": 42.32487487792969,
"learning_rate": 9.42797899701182e-06,
"loss": 0.2232,
"step": 2560
},
{
"epoch": 3.5981799089954496,
"grad_norm": 21.179861068725586,
"learning_rate": 9.420386584593469e-06,
"loss": 0.1303,
"step": 2570
},
{
"epoch": 3.6121806090304514,
"grad_norm": 13.074398040771484,
"learning_rate": 9.412747215682231e-06,
"loss": 0.3421,
"step": 2580
},
{
"epoch": 3.626181309065453,
"grad_norm": 7.620582103729248,
"learning_rate": 9.405060971428924e-06,
"loss": 0.1988,
"step": 2590
},
{
"epoch": 3.640182009100455,
"grad_norm": 0.4482537806034088,
"learning_rate": 9.397327933482303e-06,
"loss": 0.183,
"step": 2600
},
{
"epoch": 3.6541827091354566,
"grad_norm": 3.92740535736084,
"learning_rate": 9.389548183988204e-06,
"loss": 0.1766,
"step": 2610
},
{
"epoch": 3.6681834091704584,
"grad_norm": 18.18442153930664,
"learning_rate": 9.381721805588663e-06,
"loss": 0.2622,
"step": 2620
},
{
"epoch": 3.68218410920546,
"grad_norm": 3.5551092624664307,
"learning_rate": 9.373848881421045e-06,
"loss": 0.1895,
"step": 2630
},
{
"epoch": 3.696184809240462,
"grad_norm": 0.5254050493240356,
"learning_rate": 9.36592949511715e-06,
"loss": 0.1418,
"step": 2640
},
{
"epoch": 3.7101855092754636,
"grad_norm": 44.7007942199707,
"learning_rate": 9.35796373080234e-06,
"loss": 0.3657,
"step": 2650
},
{
"epoch": 3.7241862093104654,
"grad_norm": 14.073615074157715,
"learning_rate": 9.349951673094633e-06,
"loss": 0.4366,
"step": 2660
},
{
"epoch": 3.738186909345467,
"grad_norm": 30.277467727661133,
"learning_rate": 9.341893407103808e-06,
"loss": 0.1471,
"step": 2670
},
{
"epoch": 3.752187609380469,
"grad_norm": 41.96673583984375,
"learning_rate": 9.333789018430505e-06,
"loss": 0.3122,
"step": 2680
},
{
"epoch": 3.7661883094154707,
"grad_norm": 22.88213539123535,
"learning_rate": 9.325638593165308e-06,
"loss": 0.2792,
"step": 2690
},
{
"epoch": 3.7801890094504724,
"grad_norm": 44.49440383911133,
"learning_rate": 9.317442217887835e-06,
"loss": 0.286,
"step": 2700
},
{
"epoch": 3.794189709485474,
"grad_norm": 70.30220794677734,
"learning_rate": 9.309199979665821e-06,
"loss": 0.3806,
"step": 2710
},
{
"epoch": 3.808190409520476,
"grad_norm": 21.966384887695312,
"learning_rate": 9.300911966054184e-06,
"loss": 0.2567,
"step": 2720
},
{
"epoch": 3.8221911095554777,
"grad_norm": 1.360113501548767,
"learning_rate": 9.292578265094109e-06,
"loss": 0.2901,
"step": 2730
},
{
"epoch": 3.8361918095904795,
"grad_norm": 0.4982340931892395,
"learning_rate": 9.284198965312096e-06,
"loss": 0.261,
"step": 2740
},
{
"epoch": 3.850192509625481,
"grad_norm": 24.56475067138672,
"learning_rate": 9.275774155719032e-06,
"loss": 0.2213,
"step": 2750
},
{
"epoch": 3.864193209660483,
"grad_norm": 36.820167541503906,
"learning_rate": 9.267303925809246e-06,
"loss": 0.1971,
"step": 2760
},
{
"epoch": 3.8781939096954847,
"grad_norm": 12.52115535736084,
"learning_rate": 9.258788365559543e-06,
"loss": 0.247,
"step": 2770
},
{
"epoch": 3.8921946097304865,
"grad_norm": 14.991462707519531,
"learning_rate": 9.25022756542827e-06,
"loss": 0.109,
"step": 2780
},
{
"epoch": 3.9061953097654882,
"grad_norm": 5.062511920928955,
"learning_rate": 9.24162161635434e-06,
"loss": 0.2759,
"step": 2790
},
{
"epoch": 3.92019600980049,
"grad_norm": 1.6984614133834839,
"learning_rate": 9.232970609756267e-06,
"loss": 0.2059,
"step": 2800
},
{
"epoch": 3.9341967098354917,
"grad_norm": 26.058155059814453,
"learning_rate": 9.224274637531204e-06,
"loss": 0.2886,
"step": 2810
},
{
"epoch": 3.9481974098704935,
"grad_norm": 64.70281219482422,
"learning_rate": 9.215533792053957e-06,
"loss": 0.204,
"step": 2820
},
{
"epoch": 3.9621981099054953,
"grad_norm": 0.972621738910675,
"learning_rate": 9.20674816617601e-06,
"loss": 0.2112,
"step": 2830
},
{
"epoch": 3.976198809940497,
"grad_norm": 29.663774490356445,
"learning_rate": 9.197917853224531e-06,
"loss": 0.1863,
"step": 2840
},
{
"epoch": 3.9901995099754988,
"grad_norm": 11.654136657714844,
"learning_rate": 9.189042947001395e-06,
"loss": 0.2924,
"step": 2850
},
{
"epoch": 4.0,
"eval_f1": 0.800743336162284,
"eval_loss": 0.6182411313056946,
"eval_precision": 0.8053584373354589,
"eval_recall": 0.8005598320503848,
"eval_runtime": 85.7236,
"eval_samples_per_second": 16.67,
"eval_steps_per_second": 8.341,
"step": 2857
},
{
"epoch": 4.0042002100105005,
"grad_norm": 0.10247134417295456,
"learning_rate": 9.180123541782172e-06,
"loss": 0.1368,
"step": 2860
},
{
"epoch": 4.018200910045502,
"grad_norm": 0.11311420053243637,
"learning_rate": 9.171159732315129e-06,
"loss": 0.0733,
"step": 2870
},
{
"epoch": 4.032201610080504,
"grad_norm": 43.14177703857422,
"learning_rate": 9.162151613820236e-06,
"loss": 0.1013,
"step": 2880
},
{
"epoch": 4.046202310115506,
"grad_norm": 41.66696548461914,
"learning_rate": 9.153099281988138e-06,
"loss": 0.201,
"step": 2890
},
{
"epoch": 4.0602030101505076,
"grad_norm": 3.4230246543884277,
"learning_rate": 9.144002832979149e-06,
"loss": 0.0481,
"step": 2900
},
{
"epoch": 4.074203710185509,
"grad_norm": 55.94377517700195,
"learning_rate": 9.134862363422223e-06,
"loss": 0.2921,
"step": 2910
},
{
"epoch": 4.088204410220511,
"grad_norm": 1.319650650024414,
"learning_rate": 9.125677970413935e-06,
"loss": 0.0984,
"step": 2920
},
{
"epoch": 4.102205110255513,
"grad_norm": 15.05123519897461,
"learning_rate": 9.116449751517448e-06,
"loss": 0.2502,
"step": 2930
},
{
"epoch": 4.116205810290515,
"grad_norm": 0.4123871922492981,
"learning_rate": 9.107177804761468e-06,
"loss": 0.1097,
"step": 2940
},
{
"epoch": 4.130206510325516,
"grad_norm": 90.21160888671875,
"learning_rate": 9.097862228639216e-06,
"loss": 0.172,
"step": 2950
},
{
"epoch": 4.144207210360518,
"grad_norm": 0.02424849569797516,
"learning_rate": 9.088503122107371e-06,
"loss": 0.2203,
"step": 2960
},
{
"epoch": 4.15820791039552,
"grad_norm": 14.979598045349121,
"learning_rate": 9.079100584585027e-06,
"loss": 0.0154,
"step": 2970
},
{
"epoch": 4.172208610430522,
"grad_norm": 11.164504051208496,
"learning_rate": 9.06965471595263e-06,
"loss": 0.1461,
"step": 2980
},
{
"epoch": 4.186209310465523,
"grad_norm": 73.4662094116211,
"learning_rate": 9.060165616550918e-06,
"loss": 0.2008,
"step": 2990
},
{
"epoch": 4.200210010500525,
"grad_norm": 0.1721985638141632,
"learning_rate": 9.050633387179861e-06,
"loss": 0.0428,
"step": 3000
},
{
"epoch": 4.214210710535527,
"grad_norm": 51.70530319213867,
"learning_rate": 9.041058129097586e-06,
"loss": 0.2124,
"step": 3010
},
{
"epoch": 4.228211410570529,
"grad_norm": 33.774085998535156,
"learning_rate": 9.031439944019302e-06,
"loss": 0.1633,
"step": 3020
},
{
"epoch": 4.24221211060553,
"grad_norm": 0.30398979783058167,
"learning_rate": 9.021778934116212e-06,
"loss": 0.2778,
"step": 3030
},
{
"epoch": 4.256212810640532,
"grad_norm": 19.708505630493164,
"learning_rate": 9.012075202014444e-06,
"loss": 0.0801,
"step": 3040
},
{
"epoch": 4.270213510675534,
"grad_norm": 20.29991340637207,
"learning_rate": 9.002328850793946e-06,
"loss": 0.2146,
"step": 3050
},
{
"epoch": 4.284214210710536,
"grad_norm": 9.303946495056152,
"learning_rate": 8.992539983987401e-06,
"loss": 0.1327,
"step": 3060
},
{
"epoch": 4.298214910745537,
"grad_norm": 43.80442428588867,
"learning_rate": 8.982708705579119e-06,
"loss": 0.2858,
"step": 3070
},
{
"epoch": 4.312215610780539,
"grad_norm": 3.160637855529785,
"learning_rate": 8.972835120003936e-06,
"loss": 0.0411,
"step": 3080
},
{
"epoch": 4.326216310815541,
"grad_norm": 0.1867658942937851,
"learning_rate": 8.962919332146107e-06,
"loss": 0.1155,
"step": 3090
},
{
"epoch": 4.340217010850543,
"grad_norm": 41.887699127197266,
"learning_rate": 8.952961447338192e-06,
"loss": 0.1599,
"step": 3100
},
{
"epoch": 4.354217710885544,
"grad_norm": 49.10065841674805,
"learning_rate": 8.942961571359927e-06,
"loss": 0.1876,
"step": 3110
},
{
"epoch": 4.368218410920546,
"grad_norm": 33.752349853515625,
"learning_rate": 8.932919810437117e-06,
"loss": 0.1184,
"step": 3120
},
{
"epoch": 4.382219110955548,
"grad_norm": 33.159061431884766,
"learning_rate": 8.92283627124049e-06,
"loss": 0.2899,
"step": 3130
},
{
"epoch": 4.39621981099055,
"grad_norm": 0.13434840738773346,
"learning_rate": 8.912711060884585e-06,
"loss": 0.1228,
"step": 3140
},
{
"epoch": 4.4102205110255515,
"grad_norm": 9.426471710205078,
"learning_rate": 8.902544286926585e-06,
"loss": 0.2344,
"step": 3150
},
{
"epoch": 4.424221211060553,
"grad_norm": 0.20675937831401825,
"learning_rate": 8.892336057365204e-06,
"loss": 0.2142,
"step": 3160
},
{
"epoch": 4.438221911095555,
"grad_norm": 24.473310470581055,
"learning_rate": 8.882086480639526e-06,
"loss": 0.3472,
"step": 3170
},
{
"epoch": 4.452222611130557,
"grad_norm": 54.27497100830078,
"learning_rate": 8.871795665627845e-06,
"loss": 0.1241,
"step": 3180
},
{
"epoch": 4.4662233111655585,
"grad_norm": 26.60704803466797,
"learning_rate": 8.861463721646528e-06,
"loss": 0.1468,
"step": 3190
},
{
"epoch": 4.48022401120056,
"grad_norm": 1.4064515829086304,
"learning_rate": 8.851090758448836e-06,
"loss": 0.3217,
"step": 3200
},
{
"epoch": 4.494224711235562,
"grad_norm": 6.67008113861084,
"learning_rate": 8.840676886223768e-06,
"loss": 0.0564,
"step": 3210
},
{
"epoch": 4.508225411270564,
"grad_norm": 4.312970161437988,
"learning_rate": 8.83022221559489e-06,
"loss": 0.1251,
"step": 3220
},
{
"epoch": 4.5222261113055655,
"grad_norm": 15.902132987976074,
"learning_rate": 8.819726857619156e-06,
"loss": 0.0224,
"step": 3230
},
{
"epoch": 4.536226811340567,
"grad_norm": 10.105917930603027,
"learning_rate": 8.809190923785724e-06,
"loss": 0.1252,
"step": 3240
},
{
"epoch": 4.550227511375569,
"grad_norm": 0.36241552233695984,
"learning_rate": 8.798614526014786e-06,
"loss": 0.2363,
"step": 3250
},
{
"epoch": 4.564228211410571,
"grad_norm": 26.39388656616211,
"learning_rate": 8.78799777665637e-06,
"loss": 0.1994,
"step": 3260
},
{
"epoch": 4.5782289114455725,
"grad_norm": 0.08990269154310226,
"learning_rate": 8.777340788489145e-06,
"loss": 0.0985,
"step": 3270
},
{
"epoch": 4.592229611480574,
"grad_norm": 0.17877697944641113,
"learning_rate": 8.76664367471922e-06,
"loss": 0.0589,
"step": 3280
},
{
"epoch": 4.606230311515576,
"grad_norm": 0.4577700197696686,
"learning_rate": 8.755906548978957e-06,
"loss": 0.1041,
"step": 3290
},
{
"epoch": 4.620231011550578,
"grad_norm": 29.546653747558594,
"learning_rate": 8.745129525325746e-06,
"loss": 0.2561,
"step": 3300
},
{
"epoch": 4.63423171158558,
"grad_norm": 6.642750263214111,
"learning_rate": 8.734312718240807e-06,
"loss": 0.2921,
"step": 3310
},
{
"epoch": 4.648232411620581,
"grad_norm": 1.9864214658737183,
"learning_rate": 8.723456242627961e-06,
"loss": 0.0626,
"step": 3320
},
{
"epoch": 4.662233111655583,
"grad_norm": 0.28439265489578247,
"learning_rate": 8.712560213812421e-06,
"loss": 0.1834,
"step": 3330
},
{
"epoch": 4.676233811690585,
"grad_norm": 14.246397018432617,
"learning_rate": 8.701624747539563e-06,
"loss": 0.1986,
"step": 3340
},
{
"epoch": 4.690234511725587,
"grad_norm": 15.254389762878418,
"learning_rate": 8.690649959973693e-06,
"loss": 0.0628,
"step": 3350
},
{
"epoch": 4.704235211760588,
"grad_norm": 3.156299352645874,
"learning_rate": 8.679635967696815e-06,
"loss": 0.0524,
"step": 3360
},
{
"epoch": 4.71823591179559,
"grad_norm": 12.631438255310059,
"learning_rate": 8.6685828877074e-06,
"loss": 0.2967,
"step": 3370
},
{
"epoch": 4.732236611830592,
"grad_norm": 41.207496643066406,
"learning_rate": 8.657490837419124e-06,
"loss": 0.1632,
"step": 3380
},
{
"epoch": 4.746237311865594,
"grad_norm": 1.7955949306488037,
"learning_rate": 8.646359934659648e-06,
"loss": 0.1216,
"step": 3390
},
{
"epoch": 4.760238011900595,
"grad_norm": 39.87535858154297,
"learning_rate": 8.63519029766934e-06,
"loss": 0.1489,
"step": 3400
},
{
"epoch": 4.774238711935597,
"grad_norm": 43.35482406616211,
"learning_rate": 8.623982045100037e-06,
"loss": 0.0805,
"step": 3410
},
{
"epoch": 4.788239411970599,
"grad_norm": 5.584963798522949,
"learning_rate": 8.612735296013777e-06,
"loss": 0.1821,
"step": 3420
},
{
"epoch": 4.802240112005601,
"grad_norm": 2.1387903690338135,
"learning_rate": 8.601450169881533e-06,
"loss": 0.1858,
"step": 3430
},
{
"epoch": 4.816240812040602,
"grad_norm": 19.0107421875,
"learning_rate": 8.590126786581948e-06,
"loss": 0.2241,
"step": 3440
},
{
"epoch": 4.830241512075604,
"grad_norm": 3.1484501361846924,
"learning_rate": 8.57876526640006e-06,
"loss": 0.1577,
"step": 3450
},
{
"epoch": 4.844242212110606,
"grad_norm": 0.8619286417961121,
"learning_rate": 8.567365730026025e-06,
"loss": 0.2343,
"step": 3460
},
{
"epoch": 4.858242912145608,
"grad_norm": 17.187009811401367,
"learning_rate": 8.55592829855383e-06,
"loss": 0.2091,
"step": 3470
},
{
"epoch": 4.872243612180609,
"grad_norm": 17.190269470214844,
"learning_rate": 8.544453093480017e-06,
"loss": 0.2174,
"step": 3480
},
{
"epoch": 4.886244312215611,
"grad_norm": 2.3351292610168457,
"learning_rate": 8.53294023670238e-06,
"loss": 0.166,
"step": 3490
},
{
"epoch": 4.900245012250613,
"grad_norm": 17.254716873168945,
"learning_rate": 8.521389850518682e-06,
"loss": 0.1901,
"step": 3500
},
{
"epoch": 4.914245712285615,
"grad_norm": 43.53018569946289,
"learning_rate": 8.509802057625345e-06,
"loss": 0.1017,
"step": 3510
},
{
"epoch": 4.928246412320616,
"grad_norm": 1.7526437044143677,
"learning_rate": 8.498176981116152e-06,
"loss": 0.108,
"step": 3520
},
{
"epoch": 4.942247112355618,
"grad_norm": 1.4841587543487549,
"learning_rate": 8.486514744480946e-06,
"loss": 0.1514,
"step": 3530
},
{
"epoch": 4.95624781239062,
"grad_norm": 64.86244201660156,
"learning_rate": 8.474815471604303e-06,
"loss": 0.2551,
"step": 3540
},
{
"epoch": 4.970248512425622,
"grad_norm": 4.625051498413086,
"learning_rate": 8.463079286764224e-06,
"loss": 0.144,
"step": 3550
},
{
"epoch": 4.9842492124606235,
"grad_norm": 2.651116132736206,
"learning_rate": 8.451306314630825e-06,
"loss": 0.2379,
"step": 3560
},
{
"epoch": 4.998249912495625,
"grad_norm": 2.087207078933716,
"learning_rate": 8.439496680264993e-06,
"loss": 0.0657,
"step": 3570
},
{
"epoch": 4.999649982499125,
"eval_f1": 0.8029782235248062,
"eval_loss": 0.6280491352081299,
"eval_precision": 0.8110370007061343,
"eval_recall": 0.8026592022393282,
"eval_runtime": 85.6522,
"eval_samples_per_second": 16.684,
"eval_steps_per_second": 8.348,
"step": 3571
},
{
"epoch": 5.012250612530626,
"grad_norm": 0.1314196139574051,
"learning_rate": 8.42765050911707e-06,
"loss": 0.0052,
"step": 3580
},
{
"epoch": 5.026251312565628,
"grad_norm": 0.12375902384519577,
"learning_rate": 8.41576792702552e-06,
"loss": 0.0193,
"step": 3590
},
{
"epoch": 5.04025201260063,
"grad_norm": 1.289878249168396,
"learning_rate": 8.403849060215587e-06,
"loss": 0.0272,
"step": 3600
},
{
"epoch": 5.054252712635631,
"grad_norm": 12.536774635314941,
"learning_rate": 8.391894035297962e-06,
"loss": 0.0311,
"step": 3610
},
{
"epoch": 5.068253412670633,
"grad_norm": 11.550432205200195,
"learning_rate": 8.379902979267424e-06,
"loss": 0.0884,
"step": 3620
},
{
"epoch": 5.082254112705635,
"grad_norm": 0.03312607482075691,
"learning_rate": 8.367876019501512e-06,
"loss": 0.0515,
"step": 3630
},
{
"epoch": 5.096254812740637,
"grad_norm": 12.821776390075684,
"learning_rate": 8.35581328375915e-06,
"loss": 0.0622,
"step": 3640
},
{
"epoch": 5.110255512775638,
"grad_norm": 30.363012313842773,
"learning_rate": 8.343714900179304e-06,
"loss": 0.1507,
"step": 3650
},
{
"epoch": 5.12425621281064,
"grad_norm": 22.729272842407227,
"learning_rate": 8.331580997279616e-06,
"loss": 0.0806,
"step": 3660
},
{
"epoch": 5.138256912845642,
"grad_norm": 64.62901306152344,
"learning_rate": 8.319411703955042e-06,
"loss": 0.0506,
"step": 3670
},
{
"epoch": 5.152257612880644,
"grad_norm": 81.08839416503906,
"learning_rate": 8.307207149476478e-06,
"loss": 0.11,
"step": 3680
},
{
"epoch": 5.166258312915645,
"grad_norm": 0.01688474230468273,
"learning_rate": 8.294967463489387e-06,
"loss": 0.1072,
"step": 3690
},
{
"epoch": 5.180259012950647,
"grad_norm": 5.698174476623535,
"learning_rate": 8.282692776012429e-06,
"loss": 0.0356,
"step": 3700
},
{
"epoch": 5.194259712985649,
"grad_norm": 0.30638906359672546,
"learning_rate": 8.27038321743607e-06,
"loss": 0.1622,
"step": 3710
},
{
"epoch": 5.208260413020651,
"grad_norm": 1.2199918031692505,
"learning_rate": 8.258038918521203e-06,
"loss": 0.0041,
"step": 3720
},
{
"epoch": 5.222261113055652,
"grad_norm": 5.87195348739624,
"learning_rate": 8.24566001039776e-06,
"loss": 0.0538,
"step": 3730
},
{
"epoch": 5.236261813090654,
"grad_norm": 69.89470672607422,
"learning_rate": 8.233246624563315e-06,
"loss": 0.2333,
"step": 3740
},
{
"epoch": 5.250262513125656,
"grad_norm": 0.06884055584669113,
"learning_rate": 8.220798892881686e-06,
"loss": 0.0731,
"step": 3750
},
{
"epoch": 5.264263213160658,
"grad_norm": 28.682525634765625,
"learning_rate": 8.208316947581543e-06,
"loss": 0.1084,
"step": 3760
},
{
"epoch": 5.2782639131956595,
"grad_norm": 0.05551927164196968,
"learning_rate": 8.19580092125499e-06,
"loss": 0.1332,
"step": 3770
},
{
"epoch": 5.292264613230661,
"grad_norm": 26.148853302001953,
"learning_rate": 8.183250946856173e-06,
"loss": 0.0403,
"step": 3780
},
{
"epoch": 5.306265313265663,
"grad_norm": 0.33171024918556213,
"learning_rate": 8.17066715769985e-06,
"loss": 0.1941,
"step": 3790
},
{
"epoch": 5.320266013300665,
"grad_norm": 35.20890808105469,
"learning_rate": 8.158049687459986e-06,
"loss": 0.1546,
"step": 3800
},
{
"epoch": 5.3342667133356665,
"grad_norm": 20.95752716064453,
"learning_rate": 8.145398670168336e-06,
"loss": 0.1069,
"step": 3810
},
{
"epoch": 5.348267413370668,
"grad_norm": 9.339089393615723,
"learning_rate": 8.132714240213009e-06,
"loss": 0.0952,
"step": 3820
},
{
"epoch": 5.36226811340567,
"grad_norm": 18.237586975097656,
"learning_rate": 8.119996532337047e-06,
"loss": 0.0832,
"step": 3830
},
{
"epoch": 5.376268813440672,
"grad_norm": 6.259382247924805,
"learning_rate": 8.107245681636997e-06,
"loss": 0.0349,
"step": 3840
},
{
"epoch": 5.3902695134756735,
"grad_norm": 2.9350638389587402,
"learning_rate": 8.094461823561473e-06,
"loss": 0.0669,
"step": 3850
},
{
"epoch": 5.404270213510675,
"grad_norm": 3.0102787017822266,
"learning_rate": 8.081645093909715e-06,
"loss": 0.0833,
"step": 3860
},
{
"epoch": 5.418270913545677,
"grad_norm": 0.038166593760252,
"learning_rate": 8.068795628830148e-06,
"loss": 0.1295,
"step": 3870
},
{
"epoch": 5.432271613580679,
"grad_norm": 74.43338012695312,
"learning_rate": 8.055913564818938e-06,
"loss": 0.089,
"step": 3880
},
{
"epoch": 5.4462723136156805,
"grad_norm": 1.8652153015136719,
"learning_rate": 8.042999038718538e-06,
"loss": 0.0432,
"step": 3890
},
{
"epoch": 5.460273013650682,
"grad_norm": 1.0438085794448853,
"learning_rate": 8.030052187716238e-06,
"loss": 0.2206,
"step": 3900
},
{
"epoch": 5.474273713685684,
"grad_norm": 23.055278778076172,
"learning_rate": 8.017073149342703e-06,
"loss": 0.0681,
"step": 3910
},
{
"epoch": 5.488274413720686,
"grad_norm": 3.861517906188965,
"learning_rate": 8.004062061470519e-06,
"loss": 0.1237,
"step": 3920
},
{
"epoch": 5.502275113755688,
"grad_norm": 67.68190002441406,
"learning_rate": 7.991019062312723e-06,
"loss": 0.3206,
"step": 3930
},
{
"epoch": 5.516275813790689,
"grad_norm": 0.18575100600719452,
"learning_rate": 7.97794429042134e-06,
"loss": 0.0167,
"step": 3940
},
{
"epoch": 5.530276513825691,
"grad_norm": 8.642852783203125,
"learning_rate": 7.9648378846859e-06,
"loss": 0.2486,
"step": 3950
},
{
"epoch": 5.544277213860693,
"grad_norm": 25.390499114990234,
"learning_rate": 7.951699984331973e-06,
"loss": 0.0729,
"step": 3960
},
{
"epoch": 5.558277913895695,
"grad_norm": 41.303653717041016,
"learning_rate": 7.93853072891969e-06,
"loss": 0.0872,
"step": 3970
},
{
"epoch": 5.572278613930696,
"grad_norm": 2.244464874267578,
"learning_rate": 7.925330258342261e-06,
"loss": 0.0456,
"step": 3980
},
{
"epoch": 5.586279313965698,
"grad_norm": 1.1481804847717285,
"learning_rate": 7.912098712824474e-06,
"loss": 0.0371,
"step": 3990
},
{
"epoch": 5.6002800140007,
"grad_norm": 51.855377197265625,
"learning_rate": 7.89883623292123e-06,
"loss": 0.0609,
"step": 4000
},
{
"epoch": 5.614280714035702,
"grad_norm": 36.11799621582031,
"learning_rate": 7.885542959516027e-06,
"loss": 0.0478,
"step": 4010
},
{
"epoch": 5.628281414070703,
"grad_norm": 0.010757026262581348,
"learning_rate": 7.872219033819479e-06,
"loss": 0.1272,
"step": 4020
},
{
"epoch": 5.642282114105705,
"grad_norm": 0.05869268625974655,
"learning_rate": 7.85886459736781e-06,
"loss": 0.0924,
"step": 4030
},
{
"epoch": 5.656282814140707,
"grad_norm": 19.658790588378906,
"learning_rate": 7.84547979202135e-06,
"loss": 0.1318,
"step": 4040
},
{
"epoch": 5.670283514175709,
"grad_norm": 1.9408730268478394,
"learning_rate": 7.832064759963028e-06,
"loss": 0.056,
"step": 4050
},
{
"epoch": 5.68428421421071,
"grad_norm": 11.6222562789917,
"learning_rate": 7.818619643696863e-06,
"loss": 0.0831,
"step": 4060
},
{
"epoch": 5.698284914245712,
"grad_norm": 0.004137192852795124,
"learning_rate": 7.805144586046454e-06,
"loss": 0.0741,
"step": 4070
},
{
"epoch": 5.712285614280714,
"grad_norm": 26.38702392578125,
"learning_rate": 7.791639730153453e-06,
"loss": 0.2131,
"step": 4080
},
{
"epoch": 5.726286314315716,
"grad_norm": 22.777111053466797,
"learning_rate": 7.778105219476053e-06,
"loss": 0.157,
"step": 4090
},
{
"epoch": 5.740287014350717,
"grad_norm": 28.669824600219727,
"learning_rate": 7.764541197787462e-06,
"loss": 0.1796,
"step": 4100
},
{
"epoch": 5.754287714385719,
"grad_norm": 3.9441118240356445,
"learning_rate": 7.750947809174372e-06,
"loss": 0.0144,
"step": 4110
},
{
"epoch": 5.768288414420721,
"grad_norm": 1.612039566040039,
"learning_rate": 7.737325198035435e-06,
"loss": 0.0634,
"step": 4120
},
{
"epoch": 5.782289114455723,
"grad_norm": 31.477649688720703,
"learning_rate": 7.723673509079718e-06,
"loss": 0.15,
"step": 4130
},
{
"epoch": 5.7962898144907244,
"grad_norm": 21.06825065612793,
"learning_rate": 7.709992887325187e-06,
"loss": 0.074,
"step": 4140
},
{
"epoch": 5.810290514525726,
"grad_norm": 23.094755172729492,
"learning_rate": 7.69628347809714e-06,
"loss": 0.089,
"step": 4150
},
{
"epoch": 5.824291214560728,
"grad_norm": 19.415878295898438,
"learning_rate": 7.68254542702668e-06,
"loss": 0.0734,
"step": 4160
},
{
"epoch": 5.83829191459573,
"grad_norm": 0.020781751722097397,
"learning_rate": 7.668778880049167e-06,
"loss": 0.0587,
"step": 4170
},
{
"epoch": 5.8522926146307315,
"grad_norm": 48.292869567871094,
"learning_rate": 7.654983983402662e-06,
"loss": 0.117,
"step": 4180
},
{
"epoch": 5.866293314665733,
"grad_norm": 27.23204803466797,
"learning_rate": 7.641160883626374e-06,
"loss": 0.1372,
"step": 4190
},
{
"epoch": 5.880294014700735,
"grad_norm": 0.6378940343856812,
"learning_rate": 7.627309727559114e-06,
"loss": 0.0299,
"step": 4200
},
{
"epoch": 5.894294714735737,
"grad_norm": 0.020519094541668892,
"learning_rate": 7.613430662337715e-06,
"loss": 0.1332,
"step": 4210
},
{
"epoch": 5.9082954147707385,
"grad_norm": 0.2816191017627716,
"learning_rate": 7.599523835395493e-06,
"loss": 0.0604,
"step": 4220
},
{
"epoch": 5.92229611480574,
"grad_norm": 0.048931095749139786,
"learning_rate": 7.585589394460661e-06,
"loss": 0.0944,
"step": 4230
},
{
"epoch": 5.936296814840742,
"grad_norm": 14.277728080749512,
"learning_rate": 7.571627487554769e-06,
"loss": 0.1221,
"step": 4240
},
{
"epoch": 5.950297514875744,
"grad_norm": 61.147769927978516,
"learning_rate": 7.5576382629911306e-06,
"loss": 0.2043,
"step": 4250
},
{
"epoch": 5.9642982149107455,
"grad_norm": 61.5135498046875,
"learning_rate": 7.543621869373249e-06,
"loss": 0.1118,
"step": 4260
},
{
"epoch": 5.978298914945747,
"grad_norm": 0.41074830293655396,
"learning_rate": 7.529578455593232e-06,
"loss": 0.0869,
"step": 4270
},
{
"epoch": 5.992299614980749,
"grad_norm": 22.36100959777832,
"learning_rate": 7.515508170830221e-06,
"loss": 0.0938,
"step": 4280
},
{
"epoch": 5.99929996499825,
"eval_f1": 0.8054250784759932,
"eval_loss": 0.707984209060669,
"eval_precision": 0.8045938711804601,
"eval_recall": 0.8054583624912526,
"eval_runtime": 85.6965,
"eval_samples_per_second": 16.675,
"eval_steps_per_second": 8.343,
"step": 4285
},
{
"epoch": 6.006300315015751,
"grad_norm": 2.4087300300598145,
"learning_rate": 7.501411164548792e-06,
"loss": 0.0317,
"step": 4290
},
{
"epoch": 6.0203010150507525,
"grad_norm": 8.070867538452148,
"learning_rate": 7.487287586497384e-06,
"loss": 0.0422,
"step": 4300
},
{
"epoch": 6.034301715085754,
"grad_norm": 20.54210090637207,
"learning_rate": 7.473137586706693e-06,
"loss": 0.0932,
"step": 4310
},
{
"epoch": 6.048302415120756,
"grad_norm": 0.29761630296707153,
"learning_rate": 7.458961315488095e-06,
"loss": 0.005,
"step": 4320
},
{
"epoch": 6.062303115155758,
"grad_norm": 0.02404841221868992,
"learning_rate": 7.444758923432028e-06,
"loss": 0.0024,
"step": 4330
},
{
"epoch": 6.07630381519076,
"grad_norm": 0.01488957554101944,
"learning_rate": 7.4305305614064145e-06,
"loss": 0.0225,
"step": 4340
},
{
"epoch": 6.090304515225761,
"grad_norm": 0.9857441186904907,
"learning_rate": 7.416276380555041e-06,
"loss": 0.0527,
"step": 4350
},
{
"epoch": 6.104305215260763,
"grad_norm": 13.299368858337402,
"learning_rate": 7.401996532295965e-06,
"loss": 0.033,
"step": 4360
},
{
"epoch": 6.118305915295765,
"grad_norm": 8.178146362304688,
"learning_rate": 7.3876911683198995e-06,
"loss": 0.0218,
"step": 4370
},
{
"epoch": 6.132306615330767,
"grad_norm": 0.02896474301815033,
"learning_rate": 7.373360440588604e-06,
"loss": 0.0457,
"step": 4380
},
{
"epoch": 6.146307315365768,
"grad_norm": 2.2403640747070312,
"learning_rate": 7.359004501333267e-06,
"loss": 0.0513,
"step": 4390
},
{
"epoch": 6.16030801540077,
"grad_norm": 19.458662033081055,
"learning_rate": 7.344623503052898e-06,
"loss": 0.0351,
"step": 4400
},
{
"epoch": 6.174308715435772,
"grad_norm": 0.648754358291626,
"learning_rate": 7.330217598512696e-06,
"loss": 0.0278,
"step": 4410
},
{
"epoch": 6.188309415470774,
"grad_norm": 1.0036877393722534,
"learning_rate": 7.315786940742432e-06,
"loss": 0.0331,
"step": 4420
},
{
"epoch": 6.202310115505775,
"grad_norm": 72.4751968383789,
"learning_rate": 7.301331683034827e-06,
"loss": 0.0618,
"step": 4430
},
{
"epoch": 6.216310815540777,
"grad_norm": 0.07326419651508331,
"learning_rate": 7.286851978943919e-06,
"loss": 0.0361,
"step": 4440
},
{
"epoch": 6.230311515575779,
"grad_norm": 0.35342127084732056,
"learning_rate": 7.2723479822834295e-06,
"loss": 0.0262,
"step": 4450
},
{
"epoch": 6.244312215610781,
"grad_norm": 0.19052493572235107,
"learning_rate": 7.257819847125136e-06,
"loss": 0.0653,
"step": 4460
},
{
"epoch": 6.258312915645782,
"grad_norm": 36.13716125488281,
"learning_rate": 7.243267727797235e-06,
"loss": 0.0784,
"step": 4470
},
{
"epoch": 6.272313615680784,
"grad_norm": 29.920095443725586,
"learning_rate": 7.2286917788826926e-06,
"loss": 0.0571,
"step": 4480
},
{
"epoch": 6.286314315715786,
"grad_norm": 0.20633961260318756,
"learning_rate": 7.214092155217614e-06,
"loss": 0.0306,
"step": 4490
},
{
"epoch": 6.300315015750788,
"grad_norm": 1.4535483121871948,
"learning_rate": 7.199469011889598e-06,
"loss": 0.0185,
"step": 4500
},
{
"epoch": 6.314315715785789,
"grad_norm": 0.011330017820000648,
"learning_rate": 7.18482250423608e-06,
"loss": 0.0009,
"step": 4510
},
{
"epoch": 6.328316415820791,
"grad_norm": 0.12344878911972046,
"learning_rate": 7.170152787842689e-06,
"loss": 0.0077,
"step": 4520
},
{
"epoch": 6.342317115855793,
"grad_norm": 0.0827026218175888,
"learning_rate": 7.155460018541597e-06,
"loss": 0.0199,
"step": 4530
},
{
"epoch": 6.356317815890795,
"grad_norm": 0.08871891349554062,
"learning_rate": 7.140744352409856e-06,
"loss": 0.0002,
"step": 4540
},
{
"epoch": 6.3703185159257965,
"grad_norm": 18.5615234375,
"learning_rate": 7.12600594576775e-06,
"loss": 0.0366,
"step": 4550
},
{
"epoch": 6.384319215960798,
"grad_norm": 3.0964789390563965,
"learning_rate": 7.1112449551771225e-06,
"loss": 0.0077,
"step": 4560
},
{
"epoch": 6.3983199159958,
"grad_norm": 0.0029246637132018805,
"learning_rate": 7.096461537439725e-06,
"loss": 0.0266,
"step": 4570
},
{
"epoch": 6.412320616030802,
"grad_norm": 2.371020555496216,
"learning_rate": 7.0816558495955435e-06,
"loss": 0.0496,
"step": 4580
},
{
"epoch": 6.4263213160658035,
"grad_norm": 0.021786697208881378,
"learning_rate": 7.066828048921133e-06,
"loss": 0.0161,
"step": 4590
},
{
"epoch": 6.440322016100805,
"grad_norm": 21.118539810180664,
"learning_rate": 7.051978292927947e-06,
"loss": 0.1049,
"step": 4600
},
{
"epoch": 6.454322716135807,
"grad_norm": 56.019474029541016,
"learning_rate": 7.0371067393606665e-06,
"loss": 0.0605,
"step": 4610
},
{
"epoch": 6.468323416170809,
"grad_norm": 0.010689962655305862,
"learning_rate": 7.022213546195516e-06,
"loss": 0.088,
"step": 4620
},
{
"epoch": 6.4823241162058105,
"grad_norm": 9.061432838439941,
"learning_rate": 7.007298871638597e-06,
"loss": 0.0127,
"step": 4630
},
{
"epoch": 6.496324816240812,
"grad_norm": 5.053805351257324,
"learning_rate": 6.9923628741242e-06,
"loss": 0.0748,
"step": 4640
},
{
"epoch": 6.510325516275814,
"grad_norm": 50.82999038696289,
"learning_rate": 6.97740571231312e-06,
"loss": 0.1141,
"step": 4650
},
{
"epoch": 6.524326216310816,
"grad_norm": 0.012241682969033718,
"learning_rate": 6.96242754509098e-06,
"loss": 0.1384,
"step": 4660
},
{
"epoch": 6.5383269163458175,
"grad_norm": 0.007444610353559256,
"learning_rate": 6.947428531566531e-06,
"loss": 0.0355,
"step": 4670
},
{
"epoch": 6.552327616380819,
"grad_norm": 8.445463180541992,
"learning_rate": 6.9324088310699745e-06,
"loss": 0.0322,
"step": 4680
},
{
"epoch": 6.566328316415821,
"grad_norm": 0.0021326360292732716,
"learning_rate": 6.9173686031512595e-06,
"loss": 0.1049,
"step": 4690
},
{
"epoch": 6.580329016450823,
"grad_norm": 61.38804626464844,
"learning_rate": 6.902308007578392e-06,
"loss": 0.1214,
"step": 4700
},
{
"epoch": 6.5943297164858246,
"grad_norm": 2.3494713306427,
"learning_rate": 6.887227204335739e-06,
"loss": 0.0139,
"step": 4710
},
{
"epoch": 6.608330416520826,
"grad_norm": 0.03077736310660839,
"learning_rate": 6.8721263536223295e-06,
"loss": 0.009,
"step": 4720
},
{
"epoch": 6.622331116555828,
"grad_norm": 0.17415045201778412,
"learning_rate": 6.857005615850148e-06,
"loss": 0.2035,
"step": 4730
},
{
"epoch": 6.63633181659083,
"grad_norm": 1.6760774850845337,
"learning_rate": 6.841865151642434e-06,
"loss": 0.0292,
"step": 4740
},
{
"epoch": 6.650332516625832,
"grad_norm": 0.017536571249365807,
"learning_rate": 6.8267051218319766e-06,
"loss": 0.0159,
"step": 4750
},
{
"epoch": 6.664333216660833,
"grad_norm": 31.02813148498535,
"learning_rate": 6.8115256874594015e-06,
"loss": 0.0545,
"step": 4760
},
{
"epoch": 6.678333916695835,
"grad_norm": 28.042268753051758,
"learning_rate": 6.7963270097714705e-06,
"loss": 0.2497,
"step": 4770
},
{
"epoch": 6.692334616730837,
"grad_norm": 0.08991402387619019,
"learning_rate": 6.781109250219353e-06,
"loss": 0.0343,
"step": 4780
},
{
"epoch": 6.706335316765839,
"grad_norm": 16.03205108642578,
"learning_rate": 6.765872570456926e-06,
"loss": 0.0407,
"step": 4790
},
{
"epoch": 6.72033601680084,
"grad_norm": 0.023787444457411766,
"learning_rate": 6.750617132339045e-06,
"loss": 0.1305,
"step": 4800
},
{
"epoch": 6.734336716835842,
"grad_norm": 0.12138766795396805,
"learning_rate": 6.735343097919838e-06,
"loss": 0.0951,
"step": 4810
},
{
"epoch": 6.748337416870844,
"grad_norm": 5.909520626068115,
"learning_rate": 6.720050629450963e-06,
"loss": 0.0345,
"step": 4820
},
{
"epoch": 6.762338116905846,
"grad_norm": 0.01361538004130125,
"learning_rate": 6.704739889379914e-06,
"loss": 0.0356,
"step": 4830
},
{
"epoch": 6.776338816940847,
"grad_norm": 0.11465916782617569,
"learning_rate": 6.689411040348267e-06,
"loss": 0.0626,
"step": 4840
},
{
"epoch": 6.790339516975849,
"grad_norm": 0.06471576541662216,
"learning_rate": 6.674064245189969e-06,
"loss": 0.085,
"step": 4850
},
{
"epoch": 6.804340217010851,
"grad_norm": 0.16323409974575043,
"learning_rate": 6.6586996669296014e-06,
"loss": 0.0755,
"step": 4860
},
{
"epoch": 6.818340917045853,
"grad_norm": 0.5551852583885193,
"learning_rate": 6.6433174687806525e-06,
"loss": 0.0152,
"step": 4870
},
{
"epoch": 6.832341617080854,
"grad_norm": 32.66178512573242,
"learning_rate": 6.62791781414378e-06,
"loss": 0.0479,
"step": 4880
},
{
"epoch": 6.846342317115856,
"grad_norm": 0.015004020184278488,
"learning_rate": 6.612500866605078e-06,
"loss": 0.0181,
"step": 4890
},
{
"epoch": 6.860343017150858,
"grad_norm": 42.09051513671875,
"learning_rate": 6.597066789934336e-06,
"loss": 0.078,
"step": 4900
},
{
"epoch": 6.87434371718586,
"grad_norm": 13.405208587646484,
"learning_rate": 6.581615748083306e-06,
"loss": 0.0522,
"step": 4910
},
{
"epoch": 6.8883444172208605,
"grad_norm": 58.45531463623047,
"learning_rate": 6.56614790518395e-06,
"loss": 0.0454,
"step": 4920
},
{
"epoch": 6.902345117255862,
"grad_norm": 24.918203353881836,
"learning_rate": 6.5506634255467085e-06,
"loss": 0.0912,
"step": 4930
},
{
"epoch": 6.916345817290864,
"grad_norm": 0.018271498382091522,
"learning_rate": 6.5351624736587446e-06,
"loss": 0.0156,
"step": 4940
},
{
"epoch": 6.930346517325866,
"grad_norm": 0.018698183819651604,
"learning_rate": 6.5196452141822045e-06,
"loss": 0.1512,
"step": 4950
},
{
"epoch": 6.944347217360868,
"grad_norm": 24.42111587524414,
"learning_rate": 6.504111811952463e-06,
"loss": 0.0333,
"step": 4960
},
{
"epoch": 6.958347917395869,
"grad_norm": 0.0020534696523100138,
"learning_rate": 6.488562431976376e-06,
"loss": 0.0105,
"step": 4970
},
{
"epoch": 6.972348617430871,
"grad_norm": 0.06145526468753815,
"learning_rate": 6.472997239430529e-06,
"loss": 0.0791,
"step": 4980
},
{
"epoch": 6.986349317465873,
"grad_norm": 0.0036424114368855953,
"learning_rate": 6.457416399659472e-06,
"loss": 0.0713,
"step": 4990
},
{
"epoch": 6.998949947497374,
"eval_f1": 0.8174142102409481,
"eval_loss": 0.6825958490371704,
"eval_precision": 0.8189024398260805,
"eval_recall": 0.8173547935619314,
"eval_runtime": 85.6833,
"eval_samples_per_second": 16.678,
"eval_steps_per_second": 8.345,
"step": 4999
},
{
"epoch": 7.000350017500875,
"grad_norm": 3.2184433937072754,
"learning_rate": 6.441820078173979e-06,
"loss": 0.1252,
"step": 5000
},
{
"epoch": 7.014350717535876,
"grad_norm": 0.02290227822959423,
"learning_rate": 6.426208440649278e-06,
"loss": 0.0076,
"step": 5010
},
{
"epoch": 7.028351417570878,
"grad_norm": 0.8488892912864685,
"learning_rate": 6.410581652923298e-06,
"loss": 0.0142,
"step": 5020
},
{
"epoch": 7.04235211760588,
"grad_norm": 6.883611679077148,
"learning_rate": 6.394939880994899e-06,
"loss": 0.0133,
"step": 5030
},
{
"epoch": 7.056352817640882,
"grad_norm": 0.7044736742973328,
"learning_rate": 6.379283291022118e-06,
"loss": 0.0148,
"step": 5040
},
{
"epoch": 7.070353517675883,
"grad_norm": 8.82105827331543,
"learning_rate": 6.363612049320398e-06,
"loss": 0.0019,
"step": 5050
},
{
"epoch": 7.084354217710885,
"grad_norm": 0.06456708908081055,
"learning_rate": 6.347926322360825e-06,
"loss": 0.0021,
"step": 5060
},
{
"epoch": 7.098354917745887,
"grad_norm": 0.15175148844718933,
"learning_rate": 6.3322262767683564e-06,
"loss": 0.0186,
"step": 5070
},
{
"epoch": 7.112355617780889,
"grad_norm": 0.7544161677360535,
"learning_rate": 6.31651207932005e-06,
"loss": 0.0667,
"step": 5080
},
{
"epoch": 7.12635631781589,
"grad_norm": 1.5102256536483765,
"learning_rate": 6.300783896943299e-06,
"loss": 0.0091,
"step": 5090
},
{
"epoch": 7.140357017850892,
"grad_norm": 0.09220393002033234,
"learning_rate": 6.285041896714052e-06,
"loss": 0.0247,
"step": 5100
},
{
"epoch": 7.154357717885894,
"grad_norm": 31.779438018798828,
"learning_rate": 6.269286245855039e-06,
"loss": 0.0128,
"step": 5110
},
{
"epoch": 7.168358417920896,
"grad_norm": 0.2819765508174896,
"learning_rate": 6.253517111734004e-06,
"loss": 0.0381,
"step": 5120
},
{
"epoch": 7.182359117955897,
"grad_norm": 0.007849560119211674,
"learning_rate": 6.237734661861909e-06,
"loss": 0.0311,
"step": 5130
},
{
"epoch": 7.196359817990899,
"grad_norm": 0.43997815251350403,
"learning_rate": 6.221939063891176e-06,
"loss": 0.0156,
"step": 5140
},
{
"epoch": 7.210360518025901,
"grad_norm": 0.0021186801604926586,
"learning_rate": 6.206130485613887e-06,
"loss": 0.0042,
"step": 5150
},
{
"epoch": 7.224361218060903,
"grad_norm": 32.345211029052734,
"learning_rate": 6.1903090949600144e-06,
"loss": 0.1152,
"step": 5160
},
{
"epoch": 7.2383619180959045,
"grad_norm": 22.896106719970703,
"learning_rate": 6.1744750599956315e-06,
"loss": 0.0107,
"step": 5170
},
{
"epoch": 7.252362618130906,
"grad_norm": 2.5360770225524902,
"learning_rate": 6.158628548921132e-06,
"loss": 0.0306,
"step": 5180
},
{
"epoch": 7.266363318165908,
"grad_norm": 0.03135138005018234,
"learning_rate": 6.142769730069431e-06,
"loss": 0.0608,
"step": 5190
},
{
"epoch": 7.28036401820091,
"grad_norm": 0.004951399751007557,
"learning_rate": 6.126898771904197e-06,
"loss": 0.108,
"step": 5200
},
{
"epoch": 7.2943647182359115,
"grad_norm": 0.04001991078257561,
"learning_rate": 6.11101584301804e-06,
"loss": 0.015,
"step": 5210
},
{
"epoch": 7.308365418270913,
"grad_norm": 0.25486335158348083,
"learning_rate": 6.095121112130742e-06,
"loss": 0.0409,
"step": 5220
},
{
"epoch": 7.322366118305915,
"grad_norm": 5.508518695831299,
"learning_rate": 6.079214748087444e-06,
"loss": 0.0025,
"step": 5230
},
{
"epoch": 7.336366818340917,
"grad_norm": 1.9795469045639038,
"learning_rate": 6.063296919856872e-06,
"loss": 0.0701,
"step": 5240
},
{
"epoch": 7.3503675183759185,
"grad_norm": 0.009440050460398197,
"learning_rate": 6.047367796529523e-06,
"loss": 0.0094,
"step": 5250
},
{
"epoch": 7.36436821841092,
"grad_norm": 0.03409438207745552,
"learning_rate": 6.031427547315889e-06,
"loss": 0.0715,
"step": 5260
},
{
"epoch": 7.378368918445922,
"grad_norm": 0.0017209186917170882,
"learning_rate": 6.0154763415446395e-06,
"loss": 0.0754,
"step": 5270
},
{
"epoch": 7.392369618480924,
"grad_norm": 0.07516732811927795,
"learning_rate": 5.9995143486608406e-06,
"loss": 0.0008,
"step": 5280
},
{
"epoch": 7.4063703185159255,
"grad_norm": 0.0012286275159567595,
"learning_rate": 5.983541738224141e-06,
"loss": 0.0191,
"step": 5290
},
{
"epoch": 7.420371018550927,
"grad_norm": 0.16271395981311798,
"learning_rate": 5.967558679906981e-06,
"loss": 0.0349,
"step": 5300
},
{
"epoch": 7.434371718585929,
"grad_norm": 28.011831283569336,
"learning_rate": 5.951565343492779e-06,
"loss": 0.0093,
"step": 5310
},
{
"epoch": 7.448372418620931,
"grad_norm": 0.0033658405300229788,
"learning_rate": 5.935561898874142e-06,
"loss": 0.0241,
"step": 5320
},
{
"epoch": 7.462373118655933,
"grad_norm": 0.0023493689950555563,
"learning_rate": 5.91954851605105e-06,
"loss": 0.043,
"step": 5330
},
{
"epoch": 7.476373818690934,
"grad_norm": 0.00039932539220899343,
"learning_rate": 5.9035253651290555e-06,
"loss": 0.0009,
"step": 5340
},
{
"epoch": 7.490374518725936,
"grad_norm": 0.0065233842469751835,
"learning_rate": 5.887492616317471e-06,
"loss": 0.0088,
"step": 5350
},
{
"epoch": 7.504375218760938,
"grad_norm": 0.0013134811306372285,
"learning_rate": 5.87145043992757e-06,
"loss": 0.0189,
"step": 5360
},
{
"epoch": 7.51837591879594,
"grad_norm": 28.32547950744629,
"learning_rate": 5.855399006370766e-06,
"loss": 0.0137,
"step": 5370
},
{
"epoch": 7.532376618830941,
"grad_norm": 0.007662694435566664,
"learning_rate": 5.839338486156812e-06,
"loss": 0.0001,
"step": 5380
},
{
"epoch": 7.546377318865943,
"grad_norm": 0.18727760016918182,
"learning_rate": 5.8232690498919906e-06,
"loss": 0.0743,
"step": 5390
},
{
"epoch": 7.560378018900945,
"grad_norm": 0.011461739428341389,
"learning_rate": 5.80719086827729e-06,
"loss": 0.0051,
"step": 5400
},
{
"epoch": 7.574378718935947,
"grad_norm": 1.8720335960388184,
"learning_rate": 5.7911041121066e-06,
"loss": 0.0027,
"step": 5410
},
{
"epoch": 7.588379418970948,
"grad_norm": 0.2441912293434143,
"learning_rate": 5.775008952264897e-06,
"loss": 0.0432,
"step": 5420
},
{
"epoch": 7.60238011900595,
"grad_norm": 0.01594419591128826,
"learning_rate": 5.7589055597264235e-06,
"loss": 0.0138,
"step": 5430
},
{
"epoch": 7.616380819040952,
"grad_norm": 3.6038880348205566,
"learning_rate": 5.742794105552879e-06,
"loss": 0.0211,
"step": 5440
},
{
"epoch": 7.630381519075954,
"grad_norm": 0.05175204947590828,
"learning_rate": 5.726674760891599e-06,
"loss": 0.0398,
"step": 5450
},
{
"epoch": 7.644382219110955,
"grad_norm": 0.30954572558403015,
"learning_rate": 5.71054769697374e-06,
"loss": 0.0859,
"step": 5460
},
{
"epoch": 7.658382919145957,
"grad_norm": 6.447484016418457,
"learning_rate": 5.694413085112448e-06,
"loss": 0.0376,
"step": 5470
},
{
"epoch": 7.672383619180959,
"grad_norm": 0.004400278907269239,
"learning_rate": 5.678271096701059e-06,
"loss": 0.0112,
"step": 5480
},
{
"epoch": 7.686384319215961,
"grad_norm": 0.009013152681291103,
"learning_rate": 5.662121903211265e-06,
"loss": 0.0751,
"step": 5490
},
{
"epoch": 7.700385019250962,
"grad_norm": 16.201181411743164,
"learning_rate": 5.645965676191294e-06,
"loss": 0.0557,
"step": 5500
},
{
"epoch": 7.714385719285964,
"grad_norm": 0.002877170220017433,
"learning_rate": 5.62980258726409e-06,
"loss": 0.0036,
"step": 5510
},
{
"epoch": 7.728386419320966,
"grad_norm": 0.004419579636305571,
"learning_rate": 5.6136328081254874e-06,
"loss": 0.0293,
"step": 5520
},
{
"epoch": 7.742387119355968,
"grad_norm": 0.005260075442492962,
"learning_rate": 5.597456510542395e-06,
"loss": 0.0313,
"step": 5530
},
{
"epoch": 7.756387819390969,
"grad_norm": 3.496971368789673,
"learning_rate": 5.581273866350955e-06,
"loss": 0.0377,
"step": 5540
},
{
"epoch": 7.770388519425971,
"grad_norm": 0.009419528767466545,
"learning_rate": 5.565085047454737e-06,
"loss": 0.0366,
"step": 5550
},
{
"epoch": 7.784389219460973,
"grad_norm": 0.004169847816228867,
"learning_rate": 5.548890225822896e-06,
"loss": 0.0304,
"step": 5560
},
{
"epoch": 7.798389919495975,
"grad_norm": 0.3091202974319458,
"learning_rate": 5.53268957348836e-06,
"loss": 0.0034,
"step": 5570
},
{
"epoch": 7.8123906195309765,
"grad_norm": 0.6783474683761597,
"learning_rate": 5.5164832625459865e-06,
"loss": 0.0056,
"step": 5580
},
{
"epoch": 7.826391319565978,
"grad_norm": 0.16367115080356598,
"learning_rate": 5.500271465150748e-06,
"loss": 0.0757,
"step": 5590
},
{
"epoch": 7.84039201960098,
"grad_norm": 0.19091859459877014,
"learning_rate": 5.484054353515896e-06,
"loss": 0.0141,
"step": 5600
},
{
"epoch": 7.854392719635982,
"grad_norm": 3.1218807697296143,
"learning_rate": 5.467832099911135e-06,
"loss": 0.0024,
"step": 5610
},
{
"epoch": 7.8683934196709835,
"grad_norm": 38.620418548583984,
"learning_rate": 5.451604876660787e-06,
"loss": 0.0546,
"step": 5620
},
{
"epoch": 7.882394119705985,
"grad_norm": 17.191007614135742,
"learning_rate": 5.435372856141975e-06,
"loss": 0.0388,
"step": 5630
},
{
"epoch": 7.896394819740987,
"grad_norm": 0.049534134566783905,
"learning_rate": 5.4191362107827704e-06,
"loss": 0.0471,
"step": 5640
},
{
"epoch": 7.910395519775989,
"grad_norm": 0.07554405927658081,
"learning_rate": 5.402895113060379e-06,
"loss": 0.0097,
"step": 5650
},
{
"epoch": 7.9243962198109905,
"grad_norm": 10.603336334228516,
"learning_rate": 5.3866497354993e-06,
"loss": 0.0395,
"step": 5660
},
{
"epoch": 7.938396919845992,
"grad_norm": 10.075541496276855,
"learning_rate": 5.370400250669504e-06,
"loss": 0.0305,
"step": 5670
},
{
"epoch": 7.952397619880994,
"grad_norm": 0.4186045825481415,
"learning_rate": 5.354146831184579e-06,
"loss": 0.0068,
"step": 5680
},
{
"epoch": 7.966398319915996,
"grad_norm": 0.040065351873636246,
"learning_rate": 5.337889649699921e-06,
"loss": 0.0233,
"step": 5690
},
{
"epoch": 7.9803990199509975,
"grad_norm": 0.0028217558283358812,
"learning_rate": 5.3216288789108805e-06,
"loss": 0.0021,
"step": 5700
},
{
"epoch": 7.994399719985999,
"grad_norm": 26.778573989868164,
"learning_rate": 5.305364691550944e-06,
"loss": 0.1025,
"step": 5710
},
{
"epoch": 8.0,
"eval_f1": 0.8188966340884549,
"eval_loss": 0.7210356593132019,
"eval_precision": 0.8224692809382289,
"eval_recall": 0.8187543736878936,
"eval_runtime": 85.9063,
"eval_samples_per_second": 16.634,
"eval_steps_per_second": 8.323,
"step": 5714
},
{
"epoch": 8.008400420021001,
"grad_norm": 0.029206441715359688,
"learning_rate": 5.289097260389881e-06,
"loss": 0.0313,
"step": 5720
},
{
"epoch": 8.022401120056003,
"grad_norm": 0.0030094946268945932,
"learning_rate": 5.2728267582319325e-06,
"loss": 0.0003,
"step": 5730
},
{
"epoch": 8.036401820091005,
"grad_norm": 0.002360534854233265,
"learning_rate": 5.2565533579139484e-06,
"loss": 0.0031,
"step": 5740
},
{
"epoch": 8.050402520126006,
"grad_norm": 0.5710445046424866,
"learning_rate": 5.240277232303574e-06,
"loss": 0.0168,
"step": 5750
},
{
"epoch": 8.064403220161008,
"grad_norm": 48.347259521484375,
"learning_rate": 5.2239985542974e-06,
"loss": 0.0518,
"step": 5760
},
{
"epoch": 8.07840392019601,
"grad_norm": 0.02666153386235237,
"learning_rate": 5.207717496819134e-06,
"loss": 0.0805,
"step": 5770
},
{
"epoch": 8.092404620231012,
"grad_norm": 1.3221057653427124,
"learning_rate": 5.191434232817753e-06,
"loss": 0.0019,
"step": 5780
},
{
"epoch": 8.106405320266013,
"grad_norm": 0.660656750202179,
"learning_rate": 5.1751489352656846e-06,
"loss": 0.0002,
"step": 5790
},
{
"epoch": 8.120406020301015,
"grad_norm": 0.0018193651922047138,
"learning_rate": 5.158861777156947e-06,
"loss": 0.0324,
"step": 5800
},
{
"epoch": 8.134406720336017,
"grad_norm": 0.34088951349258423,
"learning_rate": 5.14257293150533e-06,
"loss": 0.0005,
"step": 5810
},
{
"epoch": 8.148407420371019,
"grad_norm": 0.0037190490402281284,
"learning_rate": 5.126282571342547e-06,
"loss": 0.0036,
"step": 5820
},
{
"epoch": 8.16240812040602,
"grad_norm": 0.08573169261217117,
"learning_rate": 5.109990869716398e-06,
"loss": 0.0487,
"step": 5830
},
{
"epoch": 8.176408820441022,
"grad_norm": 18.21436882019043,
"learning_rate": 5.093697999688934e-06,
"loss": 0.0045,
"step": 5840
},
{
"epoch": 8.190409520476024,
"grad_norm": 0.003121648682281375,
"learning_rate": 5.077404134334623e-06,
"loss": 0.0012,
"step": 5850
},
{
"epoch": 8.204410220511026,
"grad_norm": 0.43288281559944153,
"learning_rate": 5.061109446738496e-06,
"loss": 0.0002,
"step": 5860
},
{
"epoch": 8.218410920546027,
"grad_norm": 1.0953173637390137,
"learning_rate": 5.044814109994327e-06,
"loss": 0.0333,
"step": 5870
},
{
"epoch": 8.23241162058103,
"grad_norm": 0.018074065446853638,
"learning_rate": 5.028518297202781e-06,
"loss": 0.0003,
"step": 5880
},
{
"epoch": 8.246412320616031,
"grad_norm": 0.1864849030971527,
"learning_rate": 5.0122221814695815e-06,
"loss": 0.0151,
"step": 5890
},
{
"epoch": 8.260413020651033,
"grad_norm": 0.0006015441031195223,
"learning_rate": 4.99592593590367e-06,
"loss": 0.0494,
"step": 5900
},
{
"epoch": 8.274413720686034,
"grad_norm": 0.00025341002037748694,
"learning_rate": 4.9796297336153685e-06,
"loss": 0.072,
"step": 5910
},
{
"epoch": 8.288414420721036,
"grad_norm": 0.022668078541755676,
"learning_rate": 4.963333747714536e-06,
"loss": 0.0003,
"step": 5920
},
{
"epoch": 8.302415120756038,
"grad_norm": 1.5527633428573608,
"learning_rate": 4.947038151308735e-06,
"loss": 0.0115,
"step": 5930
},
{
"epoch": 8.31641582079104,
"grad_norm": 0.057844631373882294,
"learning_rate": 4.930743117501393e-06,
"loss": 0.0574,
"step": 5940
},
{
"epoch": 8.330416520826041,
"grad_norm": 34.13492202758789,
"learning_rate": 4.9144488193899546e-06,
"loss": 0.018,
"step": 5950
},
{
"epoch": 8.344417220861043,
"grad_norm": 0.12001439183950424,
"learning_rate": 4.898155430064056e-06,
"loss": 0.0023,
"step": 5960
},
{
"epoch": 8.358417920896045,
"grad_norm": 0.058995530009269714,
"learning_rate": 4.881863122603675e-06,
"loss": 0.0036,
"step": 5970
},
{
"epoch": 8.372418620931047,
"grad_norm": 0.050912320613861084,
"learning_rate": 4.865572070077298e-06,
"loss": 0.0056,
"step": 5980
},
{
"epoch": 8.386419320966048,
"grad_norm": 0.03144632279872894,
"learning_rate": 4.849282445540085e-06,
"loss": 0.0048,
"step": 5990
},
{
"epoch": 8.40042002100105,
"grad_norm": 46.16802978515625,
"learning_rate": 4.832994422032022e-06,
"loss": 0.0472,
"step": 6000
},
{
"epoch": 8.414420721036052,
"grad_norm": 0.6442742943763733,
"learning_rate": 4.816708172576088e-06,
"loss": 0.0116,
"step": 6010
},
{
"epoch": 8.428421421071054,
"grad_norm": 0.01641531102359295,
"learning_rate": 4.800423870176417e-06,
"loss": 0.0012,
"step": 6020
},
{
"epoch": 8.442422121106055,
"grad_norm": 1.5970861911773682,
"learning_rate": 4.7841416878164625e-06,
"loss": 0.0004,
"step": 6030
},
{
"epoch": 8.456422821141057,
"grad_norm": 0.002396646188572049,
"learning_rate": 4.767861798457157e-06,
"loss": 0.0147,
"step": 6040
},
{
"epoch": 8.470423521176059,
"grad_norm": 25.83795166015625,
"learning_rate": 4.751584375035071e-06,
"loss": 0.0144,
"step": 6050
},
{
"epoch": 8.48442422121106,
"grad_norm": 0.007508122827857733,
"learning_rate": 4.735309590460585e-06,
"loss": 0.0458,
"step": 6060
},
{
"epoch": 8.498424921246063,
"grad_norm": 0.05192435532808304,
"learning_rate": 4.719037617616044e-06,
"loss": 0.0003,
"step": 6070
},
{
"epoch": 8.512425621281064,
"grad_norm": 0.001271701417863369,
"learning_rate": 4.702768629353928e-06,
"loss": 0.0279,
"step": 6080
},
{
"epoch": 8.526426321316066,
"grad_norm": 0.016449235379695892,
"learning_rate": 4.686502798495009e-06,
"loss": 0.0243,
"step": 6090
},
{
"epoch": 8.540427021351068,
"grad_norm": 0.006645840592682362,
"learning_rate": 4.6702402978265235e-06,
"loss": 0.0442,
"step": 6100
},
{
"epoch": 8.55442772138607,
"grad_norm": 0.001772402785718441,
"learning_rate": 4.6539813001003295e-06,
"loss": 0.0742,
"step": 6110
},
{
"epoch": 8.568428421421071,
"grad_norm": 0.02117299474775791,
"learning_rate": 4.637725978031072e-06,
"loss": 0.0005,
"step": 6120
},
{
"epoch": 8.582429121456073,
"grad_norm": 63.013153076171875,
"learning_rate": 4.621474504294358e-06,
"loss": 0.0331,
"step": 6130
},
{
"epoch": 8.596429821491075,
"grad_norm": 0.011541690677404404,
"learning_rate": 4.605227051524904e-06,
"loss": 0.0025,
"step": 6140
},
{
"epoch": 8.610430521526077,
"grad_norm": 28.012697219848633,
"learning_rate": 4.588983792314723e-06,
"loss": 0.0134,
"step": 6150
},
{
"epoch": 8.624431221561078,
"grad_norm": 0.0013819055166095495,
"learning_rate": 4.572744899211275e-06,
"loss": 0.0053,
"step": 6160
},
{
"epoch": 8.63843192159608,
"grad_norm": 0.22334392368793488,
"learning_rate": 4.5565105447156425e-06,
"loss": 0.0799,
"step": 6170
},
{
"epoch": 8.652432621631082,
"grad_norm": 0.005143929738551378,
"learning_rate": 4.540280901280696e-06,
"loss": 0.0098,
"step": 6180
},
{
"epoch": 8.666433321666084,
"grad_norm": 0.0026814427692443132,
"learning_rate": 4.524056141309259e-06,
"loss": 0.0006,
"step": 6190
},
{
"epoch": 8.680434021701085,
"grad_norm": 0.000591717311181128,
"learning_rate": 4.5078364371522815e-06,
"loss": 0.0166,
"step": 6200
},
{
"epoch": 8.694434721736087,
"grad_norm": 1.369355320930481,
"learning_rate": 4.49162196110701e-06,
"loss": 0.0081,
"step": 6210
},
{
"epoch": 8.708435421771089,
"grad_norm": 0.008222399279475212,
"learning_rate": 4.4754128854151465e-06,
"loss": 0.0001,
"step": 6220
},
{
"epoch": 8.72243612180609,
"grad_norm": 0.47795143723487854,
"learning_rate": 4.459209382261034e-06,
"loss": 0.0026,
"step": 6230
},
{
"epoch": 8.736436821841092,
"grad_norm": 0.0339123010635376,
"learning_rate": 4.4430116237698166e-06,
"loss": 0.0054,
"step": 6240
},
{
"epoch": 8.750437521876094,
"grad_norm": 0.007429391611367464,
"learning_rate": 4.426819782005614e-06,
"loss": 0.0001,
"step": 6250
},
{
"epoch": 8.764438221911096,
"grad_norm": 0.0037332987412810326,
"learning_rate": 4.410634028969698e-06,
"loss": 0.0471,
"step": 6260
},
{
"epoch": 8.778438921946098,
"grad_norm": 0.004143583122640848,
"learning_rate": 4.394454536598655e-06,
"loss": 0.0,
"step": 6270
},
{
"epoch": 8.7924396219811,
"grad_norm": 6.966057777404785,
"learning_rate": 4.3782814767625755e-06,
"loss": 0.0122,
"step": 6280
},
{
"epoch": 8.806440322016101,
"grad_norm": 0.0005727079114876688,
"learning_rate": 4.362115021263207e-06,
"loss": 0.0,
"step": 6290
},
{
"epoch": 8.820441022051103,
"grad_norm": 0.0036027561873197556,
"learning_rate": 4.345955341832156e-06,
"loss": 0.0012,
"step": 6300
},
{
"epoch": 8.834441722086105,
"grad_norm": 0.0872088223695755,
"learning_rate": 4.329802610129031e-06,
"loss": 0.0034,
"step": 6310
},
{
"epoch": 8.848442422121106,
"grad_norm": 0.018417010083794594,
"learning_rate": 4.313656997739651e-06,
"loss": 0.0,
"step": 6320
},
{
"epoch": 8.862443122156108,
"grad_norm": 0.007588675711303949,
"learning_rate": 4.297518676174205e-06,
"loss": 0.0506,
"step": 6330
},
{
"epoch": 8.87644382219111,
"grad_norm": 0.11294496059417725,
"learning_rate": 4.281387816865431e-06,
"loss": 0.0137,
"step": 6340
},
{
"epoch": 8.890444522226112,
"grad_norm": 0.0062033189460635185,
"learning_rate": 4.2652645911668e-06,
"loss": 0.0092,
"step": 6350
},
{
"epoch": 8.904445222261113,
"grad_norm": 0.3065476715564728,
"learning_rate": 4.249149170350689e-06,
"loss": 0.0005,
"step": 6360
},
{
"epoch": 8.918445922296115,
"grad_norm": 23.0545597076416,
"learning_rate": 4.233041725606573e-06,
"loss": 0.0876,
"step": 6370
},
{
"epoch": 8.932446622331117,
"grad_norm": 0.9219328761100769,
"learning_rate": 4.216942428039197e-06,
"loss": 0.0005,
"step": 6380
},
{
"epoch": 8.946447322366119,
"grad_norm": 30.67947006225586,
"learning_rate": 4.200851448666755e-06,
"loss": 0.0584,
"step": 6390
},
{
"epoch": 8.96044802240112,
"grad_norm": 0.005178861785680056,
"learning_rate": 4.1847689584190894e-06,
"loss": 0.1016,
"step": 6400
},
{
"epoch": 8.974448722436122,
"grad_norm": 0.0009322167607024312,
"learning_rate": 4.168695128135854e-06,
"loss": 0.0819,
"step": 6410
},
{
"epoch": 8.988449422471124,
"grad_norm": 0.005380717106163502,
"learning_rate": 4.152630128564719e-06,
"loss": 0.0199,
"step": 6420
},
{
"epoch": 8.999649982499125,
"eval_f1": 0.8161936279830228,
"eval_loss": 0.7878016233444214,
"eval_precision": 0.8221996054957592,
"eval_recall": 0.8159552134359692,
"eval_runtime": 85.7772,
"eval_samples_per_second": 16.659,
"eval_steps_per_second": 8.336,
"step": 6428
},
{
"epoch": 9.002450122506126,
"grad_norm": 0.008465089835226536,
"learning_rate": 4.136574130359548e-06,
"loss": 0.0,
"step": 6430
},
{
"epoch": 9.016450822541128,
"grad_norm": 0.00646022567525506,
"learning_rate": 4.120527304078579e-06,
"loss": 0.026,
"step": 6440
},
{
"epoch": 9.03045152257613,
"grad_norm": 0.0003980924666393548,
"learning_rate": 4.104489820182626e-06,
"loss": 0.0104,
"step": 6450
},
{
"epoch": 9.044452222611131,
"grad_norm": 0.018396450206637383,
"learning_rate": 4.088461849033257e-06,
"loss": 0.0001,
"step": 6460
},
{
"epoch": 9.058452922646133,
"grad_norm": 0.0020354725420475006,
"learning_rate": 4.072443560890993e-06,
"loss": 0.0001,
"step": 6470
},
{
"epoch": 9.072453622681135,
"grad_norm": 0.0024896147660911083,
"learning_rate": 4.05643512591349e-06,
"loss": 0.0009,
"step": 6480
},
{
"epoch": 9.086454322716136,
"grad_norm": 0.04813767969608307,
"learning_rate": 4.040436714153742e-06,
"loss": 0.0001,
"step": 6490
},
{
"epoch": 9.100455022751138,
"grad_norm": 0.2827114760875702,
"learning_rate": 4.024448495558267e-06,
"loss": 0.0,
"step": 6500
},
{
"epoch": 9.11445572278614,
"grad_norm": 4.328901290893555,
"learning_rate": 4.008470639965303e-06,
"loss": 0.0007,
"step": 6510
},
{
"epoch": 9.128456422821142,
"grad_norm": 0.0006460743024945259,
"learning_rate": 3.992503317103006e-06,
"loss": 0.062,
"step": 6520
},
{
"epoch": 9.142457122856143,
"grad_norm": 0.00047457695472985506,
"learning_rate": 3.976546696587645e-06,
"loss": 0.0555,
"step": 6530
},
{
"epoch": 9.156457822891145,
"grad_norm": 0.06896835565567017,
"learning_rate": 3.960600947921803e-06,
"loss": 0.001,
"step": 6540
},
{
"epoch": 9.170458522926147,
"grad_norm": 0.0008507549064233899,
"learning_rate": 3.9446662404925726e-06,
"loss": 0.0009,
"step": 6550
},
{
"epoch": 9.184459222961149,
"grad_norm": 0.0036549328360706568,
"learning_rate": 3.9287427435697575e-06,
"loss": 0.0004,
"step": 6560
},
{
"epoch": 9.19845992299615,
"grad_norm": 0.0027635847218334675,
"learning_rate": 3.91283062630408e-06,
"loss": 0.0009,
"step": 6570
},
{
"epoch": 9.212460623031152,
"grad_norm": 0.12736278772354126,
"learning_rate": 3.896930057725372e-06,
"loss": 0.0,
"step": 6580
},
{
"epoch": 9.226461323066154,
"grad_norm": 0.0035878296475857496,
"learning_rate": 3.881041206740793e-06,
"loss": 0.0008,
"step": 6590
},
{
"epoch": 9.240462023101156,
"grad_norm": 0.005411120597273111,
"learning_rate": 3.865164242133032e-06,
"loss": 0.0,
"step": 6600
},
{
"epoch": 9.254462723136157,
"grad_norm": 0.01727963052690029,
"learning_rate": 3.849299332558505e-06,
"loss": 0.0008,
"step": 6610
},
{
"epoch": 9.26846342317116,
"grad_norm": 0.4582098722457886,
"learning_rate": 3.833446646545577e-06,
"loss": 0.0004,
"step": 6620
},
{
"epoch": 9.28246412320616,
"grad_norm": 0.001933308900333941,
"learning_rate": 3.817606352492761e-06,
"loss": 0.0001,
"step": 6630
},
{
"epoch": 9.296464823241163,
"grad_norm": 0.013198823668062687,
"learning_rate": 3.8017786186669392e-06,
"loss": 0.0009,
"step": 6640
},
{
"epoch": 9.310465523276164,
"grad_norm": 0.002318366663530469,
"learning_rate": 3.7859636132015632e-06,
"loss": 0.0585,
"step": 6650
},
{
"epoch": 9.324466223311166,
"grad_norm": 0.0035817010793834925,
"learning_rate": 3.770161504094881e-06,
"loss": 0.0079,
"step": 6660
},
{
"epoch": 9.338466923346168,
"grad_norm": 0.0008065904839895666,
"learning_rate": 3.754372459208144e-06,
"loss": 0.0003,
"step": 6670
},
{
"epoch": 9.35246762338117,
"grad_norm": 0.2741522789001465,
"learning_rate": 3.7385966462638245e-06,
"loss": 0.0034,
"step": 6680
},
{
"epoch": 9.366468323416171,
"grad_norm": 0.00011428318248363212,
"learning_rate": 3.722834232843842e-06,
"loss": 0.0001,
"step": 6690
},
{
"epoch": 9.380469023451173,
"grad_norm": 0.0014890613965690136,
"learning_rate": 3.7070853863877655e-06,
"loss": 0.0,
"step": 6700
},
{
"epoch": 9.394469723486175,
"grad_norm": 0.02655262127518654,
"learning_rate": 3.691350274191057e-06,
"loss": 0.0005,
"step": 6710
},
{
"epoch": 9.408470423521177,
"grad_norm": 0.020148996263742447,
"learning_rate": 3.675629063403278e-06,
"loss": 0.0001,
"step": 6720
},
{
"epoch": 9.422471123556178,
"grad_norm": 0.2881470024585724,
"learning_rate": 3.6599219210263204e-06,
"loss": 0.0033,
"step": 6730
},
{
"epoch": 9.43647182359118,
"grad_norm": 0.038803525269031525,
"learning_rate": 3.6442290139126317e-06,
"loss": 0.0154,
"step": 6740
},
{
"epoch": 9.450472523626182,
"grad_norm": 21.950056076049805,
"learning_rate": 3.628550508763441e-06,
"loss": 0.0041,
"step": 6750
},
{
"epoch": 9.464473223661184,
"grad_norm": 0.0429142527282238,
"learning_rate": 3.612886572126991e-06,
"loss": 0.0,
"step": 6760
},
{
"epoch": 9.478473923696185,
"grad_norm": 0.09250881522893906,
"learning_rate": 3.5972373703967683e-06,
"loss": 0.041,
"step": 6770
},
{
"epoch": 9.492474623731187,
"grad_norm": 0.016302289441227913,
"learning_rate": 3.5816030698097294e-06,
"loss": 0.0006,
"step": 6780
},
{
"epoch": 9.506475323766189,
"grad_norm": 0.025519099086523056,
"learning_rate": 3.5659838364445505e-06,
"loss": 0.0007,
"step": 6790
},
{
"epoch": 9.52047602380119,
"grad_norm": 0.004305595997720957,
"learning_rate": 3.5503798362198394e-06,
"loss": 0.028,
"step": 6800
},
{
"epoch": 9.534476723836192,
"grad_norm": 0.0018996294820681214,
"learning_rate": 3.5347912348924002e-06,
"loss": 0.0001,
"step": 6810
},
{
"epoch": 9.548477423871194,
"grad_norm": 0.016481753438711166,
"learning_rate": 3.5192181980554475e-06,
"loss": 0.0001,
"step": 6820
},
{
"epoch": 9.562478123906196,
"grad_norm": 38.504085540771484,
"learning_rate": 3.5036608911368675e-06,
"loss": 0.0112,
"step": 6830
},
{
"epoch": 9.576478823941198,
"grad_norm": 0.013982472941279411,
"learning_rate": 3.4881194793974483e-06,
"loss": 0.0006,
"step": 6840
},
{
"epoch": 9.5904795239762,
"grad_norm": 0.028334472328424454,
"learning_rate": 3.4725941279291265e-06,
"loss": 0.0005,
"step": 6850
},
{
"epoch": 9.604480224011201,
"grad_norm": 5.324892044067383,
"learning_rate": 3.4570850016532386e-06,
"loss": 0.0011,
"step": 6860
},
{
"epoch": 9.618480924046203,
"grad_norm": 0.023565029725432396,
"learning_rate": 3.4415922653187626e-06,
"loss": 0.0001,
"step": 6870
},
{
"epoch": 9.632481624081205,
"grad_norm": 0.000177843525307253,
"learning_rate": 3.426116083500571e-06,
"loss": 0.0001,
"step": 6880
},
{
"epoch": 9.646482324116207,
"grad_norm": 1.4049161672592163,
"learning_rate": 3.410656620597689e-06,
"loss": 0.0005,
"step": 6890
},
{
"epoch": 9.660483024151208,
"grad_norm": 0.03700033575296402,
"learning_rate": 3.395214040831529e-06,
"loss": 0.0002,
"step": 6900
},
{
"epoch": 9.67448372418621,
"grad_norm": 0.3730657398700714,
"learning_rate": 3.3797885082441717e-06,
"loss": 0.0274,
"step": 6910
},
{
"epoch": 9.688484424221212,
"grad_norm": 0.11278026551008224,
"learning_rate": 3.3643801866965997e-06,
"loss": 0.0001,
"step": 6920
},
{
"epoch": 9.702485124256214,
"grad_norm": 0.00797436386346817,
"learning_rate": 3.348989239866976e-06,
"loss": 0.0057,
"step": 6930
},
{
"epoch": 9.716485824291215,
"grad_norm": 0.0002936197561211884,
"learning_rate": 3.3336158312488935e-06,
"loss": 0.0019,
"step": 6940
},
{
"epoch": 9.730486524326217,
"grad_norm": 0.01316259428858757,
"learning_rate": 3.3182601241496405e-06,
"loss": 0.0096,
"step": 6950
},
{
"epoch": 9.744487224361219,
"grad_norm": 0.001627352088689804,
"learning_rate": 3.3029222816884697e-06,
"loss": 0.042,
"step": 6960
},
{
"epoch": 9.75848792439622,
"grad_norm": 0.05984394624829292,
"learning_rate": 3.2876024667948603e-06,
"loss": 0.0003,
"step": 6970
},
{
"epoch": 9.772488624431222,
"grad_norm": 0.00035447083064354956,
"learning_rate": 3.2723008422067924e-06,
"loss": 0.0547,
"step": 6980
},
{
"epoch": 9.786489324466224,
"grad_norm": 47.53248977661133,
"learning_rate": 3.2570175704690143e-06,
"loss": 0.0149,
"step": 6990
},
{
"epoch": 9.800490024501226,
"grad_norm": 0.0007067213300615549,
"learning_rate": 3.241752813931316e-06,
"loss": 0.0,
"step": 7000
},
{
"epoch": 9.814490724536228,
"grad_norm": 1.0662330389022827,
"learning_rate": 3.2265067347468116e-06,
"loss": 0.0171,
"step": 7010
},
{
"epoch": 9.82849142457123,
"grad_norm": 0.05713279917836189,
"learning_rate": 3.2112794948702027e-06,
"loss": 0.0029,
"step": 7020
},
{
"epoch": 9.842492124606231,
"grad_norm": 0.0015575195429846644,
"learning_rate": 3.1960712560560724e-06,
"loss": 0.0,
"step": 7030
},
{
"epoch": 9.856492824641233,
"grad_norm": 0.008686025626957417,
"learning_rate": 3.1808821798571585e-06,
"loss": 0.0037,
"step": 7040
},
{
"epoch": 9.870493524676235,
"grad_norm": 0.4814838767051697,
"learning_rate": 3.1657124276226415e-06,
"loss": 0.0004,
"step": 7050
},
{
"epoch": 9.884494224711236,
"grad_norm": 0.14743080735206604,
"learning_rate": 3.1505621604964277e-06,
"loss": 0.0199,
"step": 7060
},
{
"epoch": 9.898494924746238,
"grad_norm": 0.002065706066787243,
"learning_rate": 3.1354315394154377e-06,
"loss": 0.0,
"step": 7070
},
{
"epoch": 9.91249562478124,
"grad_norm": 0.0025627650320529938,
"learning_rate": 3.1203207251079003e-06,
"loss": 0.0017,
"step": 7080
},
{
"epoch": 9.926496324816242,
"grad_norm": 0.01459525153040886,
"learning_rate": 3.105229878091641e-06,
"loss": 0.0855,
"step": 7090
},
{
"epoch": 9.940497024851243,
"grad_norm": 0.001635802211239934,
"learning_rate": 3.0901591586723777e-06,
"loss": 0.0351,
"step": 7100
},
{
"epoch": 9.954497724886245,
"grad_norm": 0.0009384243749082088,
"learning_rate": 3.0751087269420244e-06,
"loss": 0.0331,
"step": 7110
},
{
"epoch": 9.968498424921247,
"grad_norm": 0.029788050800561905,
"learning_rate": 3.060078742776975e-06,
"loss": 0.0,
"step": 7120
},
{
"epoch": 9.982499124956249,
"grad_norm": 0.017946625128388405,
"learning_rate": 3.0450693658364243e-06,
"loss": 0.0034,
"step": 7130
},
{
"epoch": 9.99649982499125,
"grad_norm": 0.9889459609985352,
"learning_rate": 3.030080755560656e-06,
"loss": 0.0018,
"step": 7140
},
{
"epoch": 9.99929996499825,
"eval_f1": 0.8204141390277229,
"eval_loss": 0.7978833317756653,
"eval_precision": 0.8269728499510313,
"eval_recall": 0.8201539538138558,
"eval_runtime": 85.2249,
"eval_samples_per_second": 16.767,
"eval_steps_per_second": 8.39,
"step": 7142
},
{
"epoch": 10.01050052502625,
"grad_norm": 0.011208614334464073,
"learning_rate": 3.015113071169359e-06,
"loss": 0.0005,
"step": 7150
},
{
"epoch": 10.024501225061252,
"grad_norm": 0.0011321509955450892,
"learning_rate": 3.000166471659929e-06,
"loss": 0.0001,
"step": 7160
},
{
"epoch": 10.038501925096254,
"grad_norm": 0.0020235786214470863,
"learning_rate": 2.985241115805788e-06,
"loss": 0.0007,
"step": 7170
},
{
"epoch": 10.052502625131256,
"grad_norm": 0.9804045557975769,
"learning_rate": 2.9703371621546908e-06,
"loss": 0.0001,
"step": 7180
},
{
"epoch": 10.066503325166257,
"grad_norm": 0.017487866804003716,
"learning_rate": 2.955454769027039e-06,
"loss": 0.001,
"step": 7190
},
{
"epoch": 10.08050402520126,
"grad_norm": 0.012673470191657543,
"learning_rate": 2.9405940945142106e-06,
"loss": 0.0008,
"step": 7200
},
{
"epoch": 10.094504725236261,
"grad_norm": 0.0016335515538230538,
"learning_rate": 2.9257552964768644e-06,
"loss": 0.0,
"step": 7210
},
{
"epoch": 10.108505425271263,
"grad_norm": 0.003731638891622424,
"learning_rate": 2.9109385325432793e-06,
"loss": 0.0,
"step": 7220
},
{
"epoch": 10.122506125306264,
"grad_norm": 0.007333674468100071,
"learning_rate": 2.8961439601076667e-06,
"loss": 0.0085,
"step": 7230
},
{
"epoch": 10.136506825341266,
"grad_norm": 17.61123275756836,
"learning_rate": 2.881371736328506e-06,
"loss": 0.0083,
"step": 7240
},
{
"epoch": 10.150507525376268,
"grad_norm": 2.581366777420044,
"learning_rate": 2.866622018126876e-06,
"loss": 0.0126,
"step": 7250
},
{
"epoch": 10.16450822541127,
"grad_norm": 0.0004104816180188209,
"learning_rate": 2.8518949621847793e-06,
"loss": 0.0001,
"step": 7260
},
{
"epoch": 10.178508925446272,
"grad_norm": 0.05607493594288826,
"learning_rate": 2.8371907249434917e-06,
"loss": 0.0216,
"step": 7270
},
{
"epoch": 10.192509625481273,
"grad_norm": 0.0017281303880736232,
"learning_rate": 2.822509462601886e-06,
"loss": 0.0,
"step": 7280
},
{
"epoch": 10.206510325516275,
"grad_norm": 0.010942882858216763,
"learning_rate": 2.807851331114778e-06,
"loss": 0.0,
"step": 7290
},
{
"epoch": 10.220511025551277,
"grad_norm": 0.0011507336748763919,
"learning_rate": 2.7932164861912805e-06,
"loss": 0.0008,
"step": 7300
},
{
"epoch": 10.234511725586279,
"grad_norm": 0.0005932246567681432,
"learning_rate": 2.778605083293131e-06,
"loss": 0.0,
"step": 7310
},
{
"epoch": 10.24851242562128,
"grad_norm": 1.4911444187164307,
"learning_rate": 2.7640172776330504e-06,
"loss": 0.0028,
"step": 7320
},
{
"epoch": 10.262513125656282,
"grad_norm": 0.015291115269064903,
"learning_rate": 2.7494532241730974e-06,
"loss": 0.0,
"step": 7330
},
{
"epoch": 10.276513825691284,
"grad_norm": 0.0419706292450428,
"learning_rate": 2.7349130776230132e-06,
"loss": 0.0,
"step": 7340
},
{
"epoch": 10.290514525726286,
"grad_norm": 0.0022001699544489384,
"learning_rate": 2.7203969924385885e-06,
"loss": 0.0,
"step": 7350
},
{
"epoch": 10.304515225761287,
"grad_norm": 0.010627568699419498,
"learning_rate": 2.705905122820006e-06,
"loss": 0.0001,
"step": 7360
},
{
"epoch": 10.318515925796289,
"grad_norm": 0.005599226802587509,
"learning_rate": 2.6914376227102266e-06,
"loss": 0.0,
"step": 7370
},
{
"epoch": 10.33251662583129,
"grad_norm": 0.003463909961283207,
"learning_rate": 2.676994645793331e-06,
"loss": 0.0001,
"step": 7380
},
{
"epoch": 10.346517325866293,
"grad_norm": 0.003562136786058545,
"learning_rate": 2.6625763454929048e-06,
"loss": 0.03,
"step": 7390
},
{
"epoch": 10.360518025901294,
"grad_norm": 0.007249193266034126,
"learning_rate": 2.648182874970395e-06,
"loss": 0.0,
"step": 7400
},
{
"epoch": 10.374518725936296,
"grad_norm": 0.0003962105547543615,
"learning_rate": 2.6338143871234905e-06,
"loss": 0.0009,
"step": 7410
},
{
"epoch": 10.388519425971298,
"grad_norm": 0.0006904040928930044,
"learning_rate": 2.6194710345845e-06,
"loss": 0.0001,
"step": 7420
},
{
"epoch": 10.4025201260063,
"grad_norm": 2.711101770401001,
"learning_rate": 2.6051529697187227e-06,
"loss": 0.0022,
"step": 7430
},
{
"epoch": 10.416520826041301,
"grad_norm": 0.08761586248874664,
"learning_rate": 2.5908603446228333e-06,
"loss": 0.0,
"step": 7440
},
{
"epoch": 10.430521526076303,
"grad_norm": 0.009707544930279255,
"learning_rate": 2.5765933111232734e-06,
"loss": 0.0003,
"step": 7450
},
{
"epoch": 10.444522226111305,
"grad_norm": 0.0028569665737450123,
"learning_rate": 2.5623520207746254e-06,
"loss": 0.0101,
"step": 7460
},
{
"epoch": 10.458522926146307,
"grad_norm": 0.12865599989891052,
"learning_rate": 2.5481366248580165e-06,
"loss": 0.0001,
"step": 7470
},
{
"epoch": 10.472523626181308,
"grad_norm": 0.0029542180709540844,
"learning_rate": 2.533947274379499e-06,
"loss": 0.0,
"step": 7480
},
{
"epoch": 10.48652432621631,
"grad_norm": 0.00011413331230869517,
"learning_rate": 2.5197841200684525e-06,
"loss": 0.0,
"step": 7490
},
{
"epoch": 10.500525026251312,
"grad_norm": 0.0017983964644372463,
"learning_rate": 2.5056473123759872e-06,
"loss": 0.0016,
"step": 7500
},
{
"epoch": 10.514525726286314,
"grad_norm": 0.0005576438270509243,
"learning_rate": 2.4915370014733365e-06,
"loss": 0.0012,
"step": 7510
},
{
"epoch": 10.528526426321315,
"grad_norm": 0.001333917840383947,
"learning_rate": 2.4774533372502657e-06,
"loss": 0.0143,
"step": 7520
},
{
"epoch": 10.542527126356317,
"grad_norm": 0.00315807550214231,
"learning_rate": 2.463396469313481e-06,
"loss": 0.0004,
"step": 7530
},
{
"epoch": 10.556527826391319,
"grad_norm": 0.00039284565718844533,
"learning_rate": 2.449366546985042e-06,
"loss": 0.0165,
"step": 7540
},
{
"epoch": 10.57052852642632,
"grad_norm": 0.00022277185053098947,
"learning_rate": 2.43536371930077e-06,
"loss": 0.0028,
"step": 7550
},
{
"epoch": 10.584529226461322,
"grad_norm": 0.0015283463289961219,
"learning_rate": 2.421388135008666e-06,
"loss": 0.0012,
"step": 7560
},
{
"epoch": 10.598529926496324,
"grad_norm": 21.00211524963379,
"learning_rate": 2.407439942567339e-06,
"loss": 0.0632,
"step": 7570
},
{
"epoch": 10.612530626531326,
"grad_norm": 0.7737404704093933,
"learning_rate": 2.3935192901444127e-06,
"loss": 0.0001,
"step": 7580
},
{
"epoch": 10.626531326566328,
"grad_norm": 0.000299283565254882,
"learning_rate": 2.3796263256149715e-06,
"loss": 0.0001,
"step": 7590
},
{
"epoch": 10.64053202660133,
"grad_norm": 0.04837600886821747,
"learning_rate": 2.365761196559972e-06,
"loss": 0.0008,
"step": 7600
},
{
"epoch": 10.654532726636331,
"grad_norm": 0.00024079847207758576,
"learning_rate": 2.3519240502646822e-06,
"loss": 0.0185,
"step": 7610
},
{
"epoch": 10.668533426671333,
"grad_norm": 0.0012332991464063525,
"learning_rate": 2.338115033717124e-06,
"loss": 0.0001,
"step": 7620
},
{
"epoch": 10.682534126706335,
"grad_norm": 26.770566940307617,
"learning_rate": 2.324334293606499e-06,
"loss": 0.008,
"step": 7630
},
{
"epoch": 10.696534826741336,
"grad_norm": 0.1293647587299347,
"learning_rate": 2.310581976321638e-06,
"loss": 0.0,
"step": 7640
},
{
"epoch": 10.710535526776338,
"grad_norm": 0.004871649201959372,
"learning_rate": 2.2968582279494432e-06,
"loss": 0.0001,
"step": 7650
},
{
"epoch": 10.72453622681134,
"grad_norm": 2.857433319091797,
"learning_rate": 2.2831631942733406e-06,
"loss": 0.0006,
"step": 7660
},
{
"epoch": 10.738536926846342,
"grad_norm": 0.001171862706542015,
"learning_rate": 2.269497020771728e-06,
"loss": 0.0131,
"step": 7670
},
{
"epoch": 10.752537626881344,
"grad_norm": 0.01222603302448988,
"learning_rate": 2.2558598526164265e-06,
"loss": 0.0148,
"step": 7680
},
{
"epoch": 10.766538326916345,
"grad_norm": 2.3237972259521484,
"learning_rate": 2.2422518346711445e-06,
"loss": 0.0002,
"step": 7690
},
{
"epoch": 10.780539026951347,
"grad_norm": 0.012020766735076904,
"learning_rate": 2.2286731114899322e-06,
"loss": 0.0,
"step": 7700
},
{
"epoch": 10.794539726986349,
"grad_norm": 0.0029932681936770678,
"learning_rate": 2.2151238273156577e-06,
"loss": 0.037,
"step": 7710
},
{
"epoch": 10.80854042702135,
"grad_norm": 0.0010985223343595862,
"learning_rate": 2.2016041260784604e-06,
"loss": 0.0119,
"step": 7720
},
{
"epoch": 10.822541127056352,
"grad_norm": 0.046439751982688904,
"learning_rate": 2.188114151394228e-06,
"loss": 0.0001,
"step": 7730
},
{
"epoch": 10.836541827091354,
"grad_norm": 0.000295175559585914,
"learning_rate": 2.1746540465630784e-06,
"loss": 0.0,
"step": 7740
},
{
"epoch": 10.850542527126356,
"grad_norm": 0.0027245362289249897,
"learning_rate": 2.1612239545678234e-06,
"loss": 0.0061,
"step": 7750
},
{
"epoch": 10.864543227161358,
"grad_norm": 0.00551482243463397,
"learning_rate": 2.1478240180724646e-06,
"loss": 0.0036,
"step": 7760
},
{
"epoch": 10.87854392719636,
"grad_norm": 0.0002639990416355431,
"learning_rate": 2.134454379420659e-06,
"loss": 0.0321,
"step": 7770
},
{
"epoch": 10.892544627231361,
"grad_norm": 0.004308238625526428,
"learning_rate": 2.1211151806342294e-06,
"loss": 0.0003,
"step": 7780
},
{
"epoch": 10.906545327266363,
"grad_norm": 7.97113037109375,
"learning_rate": 2.107806563411643e-06,
"loss": 0.0011,
"step": 7790
},
{
"epoch": 10.920546027301365,
"grad_norm": 0.001049870508722961,
"learning_rate": 2.0945286691265016e-06,
"loss": 0.0089,
"step": 7800
},
{
"epoch": 10.934546727336366,
"grad_norm": 0.00010801222379086539,
"learning_rate": 2.081281638826052e-06,
"loss": 0.0,
"step": 7810
},
{
"epoch": 10.948547427371368,
"grad_norm": 0.0027724995743483305,
"learning_rate": 2.0680656132296766e-06,
"loss": 0.0005,
"step": 7820
},
{
"epoch": 10.96254812740637,
"grad_norm": 0.0011577574769034982,
"learning_rate": 2.05488073272741e-06,
"loss": 0.0009,
"step": 7830
},
{
"epoch": 10.976548827441372,
"grad_norm": 23.115427017211914,
"learning_rate": 2.0417271373784403e-06,
"loss": 0.0033,
"step": 7840
},
{
"epoch": 10.990549527476373,
"grad_norm": 0.10808968544006348,
"learning_rate": 2.0286049669096147e-06,
"loss": 0.0039,
"step": 7850
},
{
"epoch": 10.998949947497374,
"eval_f1": 0.8251300522683318,
"eval_loss": 0.8002111315727234,
"eval_precision": 0.8270740039811031,
"eval_recall": 0.8250524842547236,
"eval_runtime": 85.4056,
"eval_samples_per_second": 16.732,
"eval_steps_per_second": 8.372,
"step": 7856
},
{
"epoch": 11.004550227511375,
"grad_norm": 0.009638884104788303,
"learning_rate": 2.015514360713974e-06,
"loss": 0.0212,
"step": 7860
},
{
"epoch": 11.018550927546377,
"grad_norm": 0.0014148970367386937,
"learning_rate": 2.0024554578492513e-06,
"loss": 0.0267,
"step": 7870
},
{
"epoch": 11.032551627581379,
"grad_norm": 0.0003119578759651631,
"learning_rate": 1.9894283970364135e-06,
"loss": 0.0,
"step": 7880
},
{
"epoch": 11.04655232761638,
"grad_norm": 0.015133386477828026,
"learning_rate": 1.976433316658168e-06,
"loss": 0.0,
"step": 7890
},
{
"epoch": 11.060553027651382,
"grad_norm": 0.00046241507516242564,
"learning_rate": 1.963470354757512e-06,
"loss": 0.0,
"step": 7900
},
{
"epoch": 11.074553727686384,
"grad_norm": 0.019241634756326675,
"learning_rate": 1.950539649036255e-06,
"loss": 0.0,
"step": 7910
},
{
"epoch": 11.088554427721386,
"grad_norm": 0.00012300013622734696,
"learning_rate": 1.9376413368535575e-06,
"loss": 0.0398,
"step": 7920
},
{
"epoch": 11.102555127756387,
"grad_norm": 0.018908197060227394,
"learning_rate": 1.924775555224472e-06,
"loss": 0.0323,
"step": 7930
},
{
"epoch": 11.11655582779139,
"grad_norm": 0.0012662785593420267,
"learning_rate": 1.911942440818487e-06,
"loss": 0.0,
"step": 7940
},
{
"epoch": 11.130556527826391,
"grad_norm": 0.0013872645795345306,
"learning_rate": 1.899142129958082e-06,
"loss": 0.0,
"step": 7950
},
{
"epoch": 11.144557227861393,
"grad_norm": 0.031151611357927322,
"learning_rate": 1.8863747586172731e-06,
"loss": 0.0236,
"step": 7960
},
{
"epoch": 11.158557927896394,
"grad_norm": 0.00018219469347968698,
"learning_rate": 1.8736404624201605e-06,
"loss": 0.0,
"step": 7970
},
{
"epoch": 11.172558627931396,
"grad_norm": 0.001954052597284317,
"learning_rate": 1.8609393766395083e-06,
"loss": 0.0,
"step": 7980
},
{
"epoch": 11.186559327966398,
"grad_norm": 0.000496099004521966,
"learning_rate": 1.8482716361952868e-06,
"loss": 0.0001,
"step": 7990
},
{
"epoch": 11.2005600280014,
"grad_norm": 0.0025121436920017004,
"learning_rate": 1.8356373756532557e-06,
"loss": 0.0472,
"step": 8000
},
{
"epoch": 11.214560728036401,
"grad_norm": 0.0013273832155391574,
"learning_rate": 1.8230367292235234e-06,
"loss": 0.0,
"step": 8010
},
{
"epoch": 11.228561428071403,
"grad_norm": 0.0005976692191325128,
"learning_rate": 1.810469830759123e-06,
"loss": 0.021,
"step": 8020
},
{
"epoch": 11.242562128106405,
"grad_norm": 0.00048146533663384616,
"learning_rate": 1.7979368137545988e-06,
"loss": 0.0,
"step": 8030
},
{
"epoch": 11.256562828141407,
"grad_norm": 0.0011883461847901344,
"learning_rate": 1.785437811344578e-06,
"loss": 0.0,
"step": 8040
},
{
"epoch": 11.270563528176408,
"grad_norm": 0.06734263896942139,
"learning_rate": 1.7729729563023613e-06,
"loss": 0.0,
"step": 8050
},
{
"epoch": 11.28456422821141,
"grad_norm": 0.001917374669574201,
"learning_rate": 1.7605423810385097e-06,
"loss": 0.0,
"step": 8060
},
{
"epoch": 11.298564928246412,
"grad_norm": 0.000647062377538532,
"learning_rate": 1.7481462175994447e-06,
"loss": 0.0,
"step": 8070
},
{
"epoch": 11.312565628281414,
"grad_norm": 0.11232136934995651,
"learning_rate": 1.7357845976660386e-06,
"loss": 0.0001,
"step": 8080
},
{
"epoch": 11.326566328316416,
"grad_norm": 0.0012726177228614688,
"learning_rate": 1.7234576525522172e-06,
"loss": 0.0001,
"step": 8090
},
{
"epoch": 11.340567028351417,
"grad_norm": 0.00020207905618008226,
"learning_rate": 1.7111655132035665e-06,
"loss": 0.0,
"step": 8100
},
{
"epoch": 11.354567728386419,
"grad_norm": 0.02561323344707489,
"learning_rate": 1.698908310195938e-06,
"loss": 0.0,
"step": 8110
},
{
"epoch": 11.36856842842142,
"grad_norm": 0.0024278999771922827,
"learning_rate": 1.6866861737340705e-06,
"loss": 0.0,
"step": 8120
},
{
"epoch": 11.382569128456423,
"grad_norm": 0.0060219429433345795,
"learning_rate": 1.674499233650197e-06,
"loss": 0.0,
"step": 8130
},
{
"epoch": 11.396569828491424,
"grad_norm": 1.4449529647827148,
"learning_rate": 1.6623476194026678e-06,
"loss": 0.0002,
"step": 8140
},
{
"epoch": 11.410570528526426,
"grad_norm": 0.002635002601891756,
"learning_rate": 1.6502314600745828e-06,
"loss": 0.0001,
"step": 8150
},
{
"epoch": 11.424571228561428,
"grad_norm": 0.003079216228798032,
"learning_rate": 1.6381508843724075e-06,
"loss": 0.0,
"step": 8160
},
{
"epoch": 11.43857192859643,
"grad_norm": 0.00011024038394680247,
"learning_rate": 1.6261060206246199e-06,
"loss": 0.0002,
"step": 8170
},
{
"epoch": 11.452572628631431,
"grad_norm": 0.0009657694026827812,
"learning_rate": 1.6140969967803355e-06,
"loss": 0.0,
"step": 8180
},
{
"epoch": 11.466573328666433,
"grad_norm": 0.8496362566947937,
"learning_rate": 1.6021239404079513e-06,
"loss": 0.0001,
"step": 8190
},
{
"epoch": 11.480574028701435,
"grad_norm": 0.0027748725842684507,
"learning_rate": 1.590186978693799e-06,
"loss": 0.0,
"step": 8200
},
{
"epoch": 11.494574728736437,
"grad_norm": 0.0007704569143243134,
"learning_rate": 1.5782862384407816e-06,
"loss": 0.0123,
"step": 8210
},
{
"epoch": 11.508575428771438,
"grad_norm": 0.009385130368173122,
"learning_rate": 1.5664218460670327e-06,
"loss": 0.0026,
"step": 8220
},
{
"epoch": 11.52257612880644,
"grad_norm": 0.06345506012439728,
"learning_rate": 1.554593927604573e-06,
"loss": 0.0003,
"step": 8230
},
{
"epoch": 11.536576828841442,
"grad_norm": 0.0005839611403644085,
"learning_rate": 1.5428026086979736e-06,
"loss": 0.0,
"step": 8240
},
{
"epoch": 11.550577528876444,
"grad_norm": 0.002742344280704856,
"learning_rate": 1.531048014603017e-06,
"loss": 0.0,
"step": 8250
},
{
"epoch": 11.564578228911445,
"grad_norm": 0.0002546895411796868,
"learning_rate": 1.5193302701853674e-06,
"loss": 0.0,
"step": 8260
},
{
"epoch": 11.578578928946447,
"grad_norm": 0.004499376751482487,
"learning_rate": 1.5076494999192498e-06,
"loss": 0.0,
"step": 8270
},
{
"epoch": 11.592579628981449,
"grad_norm": 0.0029184112790971994,
"learning_rate": 1.4960058278861172e-06,
"loss": 0.0,
"step": 8280
},
{
"epoch": 11.60658032901645,
"grad_norm": 0.0003372172359377146,
"learning_rate": 1.4843993777733467e-06,
"loss": 0.0,
"step": 8290
},
{
"epoch": 11.620581029051452,
"grad_norm": 0.016875434666872025,
"learning_rate": 1.4728302728729105e-06,
"loss": 0.0288,
"step": 8300
},
{
"epoch": 11.634581729086454,
"grad_norm": 0.0007035748567432165,
"learning_rate": 1.4612986360800751e-06,
"loss": 0.0,
"step": 8310
},
{
"epoch": 11.648582429121456,
"grad_norm": 0.004649253562092781,
"learning_rate": 1.4498045898920988e-06,
"loss": 0.0,
"step": 8320
},
{
"epoch": 11.662583129156458,
"grad_norm": 0.0004209627804812044,
"learning_rate": 1.4383482564069195e-06,
"loss": 0.0002,
"step": 8330
},
{
"epoch": 11.67658382919146,
"grad_norm": 0.9587376713752747,
"learning_rate": 1.4269297573218648e-06,
"loss": 0.0001,
"step": 8340
},
{
"epoch": 11.690584529226461,
"grad_norm": 0.0003986161027569324,
"learning_rate": 1.4155492139323645e-06,
"loss": 0.0008,
"step": 8350
},
{
"epoch": 11.704585229261463,
"grad_norm": 0.006131873466074467,
"learning_rate": 1.4042067471306475e-06,
"loss": 0.0,
"step": 8360
},
{
"epoch": 11.718585929296465,
"grad_norm": 0.001703021116554737,
"learning_rate": 1.3929024774044748e-06,
"loss": 0.0001,
"step": 8370
},
{
"epoch": 11.732586629331466,
"grad_norm": 0.008844499476253986,
"learning_rate": 1.3816365248358404e-06,
"loss": 0.0,
"step": 8380
},
{
"epoch": 11.746587329366468,
"grad_norm": 0.00026406109100207686,
"learning_rate": 1.3704090090997163e-06,
"loss": 0.0,
"step": 8390
},
{
"epoch": 11.76058802940147,
"grad_norm": 0.014780262485146523,
"learning_rate": 1.3592200494627634e-06,
"loss": 0.0085,
"step": 8400
},
{
"epoch": 11.774588729436472,
"grad_norm": 0.0006615730235353112,
"learning_rate": 1.3480697647820796e-06,
"loss": 0.0001,
"step": 8410
},
{
"epoch": 11.788589429471473,
"grad_norm": 0.028282400220632553,
"learning_rate": 1.3369582735039232e-06,
"loss": 0.0,
"step": 8420
},
{
"epoch": 11.802590129506475,
"grad_norm": 0.0006141592748463154,
"learning_rate": 1.3258856936624636e-06,
"loss": 0.0189,
"step": 8430
},
{
"epoch": 11.816590829541477,
"grad_norm": 0.08630286157131195,
"learning_rate": 1.3148521428785287e-06,
"loss": 0.0001,
"step": 8440
},
{
"epoch": 11.830591529576479,
"grad_norm": 0.020284445956349373,
"learning_rate": 1.3038577383583474e-06,
"loss": 0.0,
"step": 8450
},
{
"epoch": 11.84459222961148,
"grad_norm": 5.585657119750977,
"learning_rate": 1.2929025968923082e-06,
"loss": 0.0127,
"step": 8460
},
{
"epoch": 11.858592929646482,
"grad_norm": 0.0004054057062603533,
"learning_rate": 1.2819868348537263e-06,
"loss": 0.0002,
"step": 8470
},
{
"epoch": 11.872593629681484,
"grad_norm": 0.002798704197630286,
"learning_rate": 1.2711105681975927e-06,
"loss": 0.0002,
"step": 8480
},
{
"epoch": 11.886594329716486,
"grad_norm": 0.005588957108557224,
"learning_rate": 1.2602739124593572e-06,
"loss": 0.0031,
"step": 8490
},
{
"epoch": 11.900595029751488,
"grad_norm": 0.0005446571158245206,
"learning_rate": 1.249476982753689e-06,
"loss": 0.0,
"step": 8500
},
{
"epoch": 11.91459572978649,
"grad_norm": 0.0002528883924242109,
"learning_rate": 1.2387198937732597e-06,
"loss": 0.0,
"step": 8510
},
{
"epoch": 11.928596429821491,
"grad_norm": 0.0024309209547936916,
"learning_rate": 1.2280027597875288e-06,
"loss": 0.0,
"step": 8520
},
{
"epoch": 11.942597129856493,
"grad_norm": 0.6172399520874023,
"learning_rate": 1.2173256946415214e-06,
"loss": 0.0002,
"step": 8530
},
{
"epoch": 11.956597829891495,
"grad_norm": 0.07266847789287567,
"learning_rate": 1.2066888117546227e-06,
"loss": 0.0,
"step": 8540
},
{
"epoch": 11.970598529926496,
"grad_norm": 0.23300260305404663,
"learning_rate": 1.196092224119374e-06,
"loss": 0.0001,
"step": 8550
},
{
"epoch": 11.984599229961498,
"grad_norm": 0.0053339735604822636,
"learning_rate": 1.1855360443002728e-06,
"loss": 0.0,
"step": 8560
},
{
"epoch": 11.9985999299965,
"grad_norm": 0.0010158346267417073,
"learning_rate": 1.1750203844325787e-06,
"loss": 0.0,
"step": 8570
},
{
"epoch": 12.0,
"eval_f1": 0.8286292301679241,
"eval_loss": 0.79404217004776,
"eval_precision": 0.8305788792224167,
"eval_recall": 0.8285514345696291,
"eval_runtime": 85.049,
"eval_samples_per_second": 16.802,
"eval_steps_per_second": 8.407,
"step": 8571
},
{
"epoch": 12.012600630031502,
"grad_norm": 0.005007775500416756,
"learning_rate": 1.1645453562211101e-06,
"loss": 0.0,
"step": 8580
},
{
"epoch": 12.026601330066503,
"grad_norm": 0.9903329610824585,
"learning_rate": 1.1541110709390786e-06,
"loss": 0.0004,
"step": 8590
},
{
"epoch": 12.040602030101505,
"grad_norm": 0.0004595453501679003,
"learning_rate": 1.143717639426885e-06,
"loss": 0.0,
"step": 8600
},
{
"epoch": 12.054602730136507,
"grad_norm": 0.00516974413767457,
"learning_rate": 1.1333651720909621e-06,
"loss": 0.0,
"step": 8610
},
{
"epoch": 12.068603430171509,
"grad_norm": 0.00244425842538476,
"learning_rate": 1.1230537789025847e-06,
"loss": 0.0,
"step": 8620
},
{
"epoch": 12.08260413020651,
"grad_norm": 0.0008475360809825361,
"learning_rate": 1.1127835693967104e-06,
"loss": 0.0,
"step": 8630
},
{
"epoch": 12.096604830241512,
"grad_norm": 0.0027746877167373896,
"learning_rate": 1.1025546526708176e-06,
"loss": 0.0,
"step": 8640
},
{
"epoch": 12.110605530276514,
"grad_norm": 0.0005417768843472004,
"learning_rate": 1.0923671373837403e-06,
"loss": 0.0001,
"step": 8650
},
{
"epoch": 12.124606230311516,
"grad_norm": 0.0007752656820230186,
"learning_rate": 1.0822211317545167e-06,
"loss": 0.0,
"step": 8660
},
{
"epoch": 12.138606930346517,
"grad_norm": 0.0021129120141267776,
"learning_rate": 1.0721167435612402e-06,
"loss": 0.0,
"step": 8670
},
{
"epoch": 12.15260763038152,
"grad_norm": 0.010015271604061127,
"learning_rate": 1.062054080139916e-06,
"loss": 0.0,
"step": 8680
},
{
"epoch": 12.166608330416521,
"grad_norm": 0.00013190499157644808,
"learning_rate": 1.052033248383319e-06,
"loss": 0.0,
"step": 8690
},
{
"epoch": 12.180609030451523,
"grad_norm": 0.0027459298726171255,
"learning_rate": 1.0420543547398566e-06,
"loss": 0.0,
"step": 8700
},
{
"epoch": 12.194609730486524,
"grad_norm": 0.0001294492685701698,
"learning_rate": 1.03211750521244e-06,
"loss": 0.0029,
"step": 8710
},
{
"epoch": 12.208610430521526,
"grad_norm": 8.123584120767191e-05,
"learning_rate": 1.0222228053573568e-06,
"loss": 0.0,
"step": 8720
},
{
"epoch": 12.222611130556528,
"grad_norm": 0.06022586300969124,
"learning_rate": 1.0123703602831554e-06,
"loss": 0.0012,
"step": 8730
},
{
"epoch": 12.23661183059153,
"grad_norm": 0.0007046872633509338,
"learning_rate": 1.0025602746495189e-06,
"loss": 0.008,
"step": 8740
},
{
"epoch": 12.250612530626531,
"grad_norm": 0.01181920524686575,
"learning_rate": 9.92792652666159e-07,
"loss": 0.0,
"step": 8750
},
{
"epoch": 12.264613230661533,
"grad_norm": 0.0006115382420830429,
"learning_rate": 9.83067598091712e-07,
"loss": 0.0001,
"step": 8760
},
{
"epoch": 12.278613930696535,
"grad_norm": 0.010715884156525135,
"learning_rate": 9.733852142326272e-07,
"loss": 0.0,
"step": 8770
},
{
"epoch": 12.292614630731537,
"grad_norm": 0.0034116925671696663,
"learning_rate": 9.637456039420822e-07,
"loss": 0.0004,
"step": 8780
},
{
"epoch": 12.306615330766538,
"grad_norm": 0.006015400402247906,
"learning_rate": 9.54148869618875e-07,
"loss": 0.0286,
"step": 8790
},
{
"epoch": 12.32061603080154,
"grad_norm": 0.0004726126790046692,
"learning_rate": 9.445951132063502e-07,
"loss": 0.0003,
"step": 8800
},
{
"epoch": 12.334616730836542,
"grad_norm": 0.00167147780302912,
"learning_rate": 9.350844361913109e-07,
"loss": 0.0,
"step": 8810
},
{
"epoch": 12.348617430871544,
"grad_norm": 0.0014586917823180556,
"learning_rate": 9.256169396029374e-07,
"loss": 0.0001,
"step": 8820
},
{
"epoch": 12.362618130906545,
"grad_norm": 0.01397186890244484,
"learning_rate": 9.161927240117174e-07,
"loss": 0.0,
"step": 8830
},
{
"epoch": 12.376618830941547,
"grad_norm": 0.07062353193759918,
"learning_rate": 9.068118895283762e-07,
"loss": 0.0,
"step": 8840
},
{
"epoch": 12.390619530976549,
"grad_norm": 7.136345811886713e-05,
"learning_rate": 8.974745358028181e-07,
"loss": 0.0,
"step": 8850
},
{
"epoch": 12.40462023101155,
"grad_norm": 0.010756843723356724,
"learning_rate": 8.881807620230592e-07,
"loss": 0.0,
"step": 8860
},
{
"epoch": 12.418620931046553,
"grad_norm": 0.0017550095217302442,
"learning_rate": 8.789306669141795e-07,
"loss": 0.0,
"step": 8870
},
{
"epoch": 12.432621631081554,
"grad_norm": 6.513830661773682,
"learning_rate": 8.697243487372758e-07,
"loss": 0.0009,
"step": 8880
},
{
"epoch": 12.446622331116556,
"grad_norm": 0.014165784232318401,
"learning_rate": 8.605619052884106e-07,
"loss": 0.0,
"step": 8890
},
{
"epoch": 12.460623031151558,
"grad_norm": 0.00038699989090673625,
"learning_rate": 8.514434338975836e-07,
"loss": 0.0,
"step": 8900
},
{
"epoch": 12.47462373118656,
"grad_norm": 0.6719708442687988,
"learning_rate": 8.423690314276872e-07,
"loss": 0.0001,
"step": 8910
},
{
"epoch": 12.488624431221561,
"grad_norm": 0.0005971363862045109,
"learning_rate": 8.333387942734822e-07,
"loss": 0.0001,
"step": 8920
},
{
"epoch": 12.502625131256563,
"grad_norm": 0.03569694980978966,
"learning_rate": 8.243528183605782e-07,
"loss": 0.0,
"step": 8930
},
{
"epoch": 12.516625831291565,
"grad_norm": 0.0026051411405205727,
"learning_rate": 8.154111991444075e-07,
"loss": 0.0001,
"step": 8940
},
{
"epoch": 12.530626531326567,
"grad_norm": 0.00022519452613778412,
"learning_rate": 8.065140316092135e-07,
"loss": 0.0,
"step": 8950
},
{
"epoch": 12.544627231361568,
"grad_norm": 0.27866029739379883,
"learning_rate": 7.976614102670422e-07,
"loss": 0.0,
"step": 8960
},
{
"epoch": 12.55862793139657,
"grad_norm": 0.00033088220516219735,
"learning_rate": 7.888534291567401e-07,
"loss": 0.0,
"step": 8970
},
{
"epoch": 12.572628631431572,
"grad_norm": 0.0004716921248473227,
"learning_rate": 7.800901818429551e-07,
"loss": 0.0168,
"step": 8980
},
{
"epoch": 12.586629331466574,
"grad_norm": 8.946736488724127e-05,
"learning_rate": 7.713717614151334e-07,
"loss": 0.0,
"step": 8990
},
{
"epoch": 12.600630031501575,
"grad_norm": 0.0020332683343440294,
"learning_rate": 7.626982604865457e-07,
"loss": 0.0167,
"step": 9000
},
{
"epoch": 12.614630731536577,
"grad_norm": 0.0020869935397058725,
"learning_rate": 7.540697711932915e-07,
"loss": 0.0001,
"step": 9010
},
{
"epoch": 12.628631431571579,
"grad_norm": 0.051655784249305725,
"learning_rate": 7.45486385193328e-07,
"loss": 0.0,
"step": 9020
},
{
"epoch": 12.64263213160658,
"grad_norm": 0.0015947711654007435,
"learning_rate": 7.369481936654899e-07,
"loss": 0.0,
"step": 9030
},
{
"epoch": 12.656632831641582,
"grad_norm": 0.0003485260531306267,
"learning_rate": 7.284552873085249e-07,
"loss": 0.0004,
"step": 9040
},
{
"epoch": 12.670633531676584,
"grad_norm": 0.000255667808232829,
"learning_rate": 7.200077563401314e-07,
"loss": 0.0,
"step": 9050
},
{
"epoch": 12.684634231711586,
"grad_norm": 0.003299176227301359,
"learning_rate": 7.116056904959961e-07,
"loss": 0.0236,
"step": 9060
},
{
"epoch": 12.698634931746588,
"grad_norm": 0.0074769738130271435,
"learning_rate": 7.032491790288415e-07,
"loss": 0.0003,
"step": 9070
},
{
"epoch": 12.71263563178159,
"grad_norm": 0.0013627687003463507,
"learning_rate": 6.949383107074836e-07,
"loss": 0.0,
"step": 9080
},
{
"epoch": 12.726636331816591,
"grad_norm": 0.30319395661354065,
"learning_rate": 6.866731738158794e-07,
"loss": 0.0001,
"step": 9090
},
{
"epoch": 12.740637031851593,
"grad_norm": 0.0021405743900686502,
"learning_rate": 6.784538561521986e-07,
"loss": 0.0,
"step": 9100
},
{
"epoch": 12.754637731886595,
"grad_norm": 0.6895262598991394,
"learning_rate": 6.702804450278832e-07,
"loss": 0.0001,
"step": 9110
},
{
"epoch": 12.768638431921596,
"grad_norm": 0.0008189015206880867,
"learning_rate": 6.621530272667237e-07,
"loss": 0.0244,
"step": 9120
},
{
"epoch": 12.782639131956598,
"grad_norm": 0.0019172707106918097,
"learning_rate": 6.540716892039361e-07,
"loss": 0.0234,
"step": 9130
},
{
"epoch": 12.7966398319916,
"grad_norm": 0.0005183944012969732,
"learning_rate": 6.460365166852483e-07,
"loss": 0.0,
"step": 9140
},
{
"epoch": 12.810640532026602,
"grad_norm": 0.0005380522343330085,
"learning_rate": 6.380475950659815e-07,
"loss": 0.0,
"step": 9150
},
{
"epoch": 12.824641232061603,
"grad_norm": 0.00026877986965700984,
"learning_rate": 6.301050092101463e-07,
"loss": 0.0,
"step": 9160
},
{
"epoch": 12.838641932096605,
"grad_norm": 0.0017663196194916964,
"learning_rate": 6.222088434895462e-07,
"loss": 0.0,
"step": 9170
},
{
"epoch": 12.852642632131607,
"grad_norm": 0.0057665687054395676,
"learning_rate": 6.143591817828731e-07,
"loss": 0.0,
"step": 9180
},
{
"epoch": 12.866643332166609,
"grad_norm": 0.0044043478555977345,
"learning_rate": 6.065561074748194e-07,
"loss": 0.0001,
"step": 9190
},
{
"epoch": 12.88064403220161,
"grad_norm": 0.0019976862240582705,
"learning_rate": 5.987997034551984e-07,
"loss": 0.0001,
"step": 9200
},
{
"epoch": 12.894644732236612,
"grad_norm": 0.0017344317166134715,
"learning_rate": 5.910900521180518e-07,
"loss": 0.0001,
"step": 9210
},
{
"epoch": 12.908645432271614,
"grad_norm": 0.0007492689182981849,
"learning_rate": 5.834272353607867e-07,
"loss": 0.0,
"step": 9220
},
{
"epoch": 12.922646132306616,
"grad_norm": 0.0011607420165091753,
"learning_rate": 5.758113345832956e-07,
"loss": 0.0,
"step": 9230
},
{
"epoch": 12.936646832341617,
"grad_norm": 0.00039469017065130174,
"learning_rate": 5.682424306870977e-07,
"loss": 0.0,
"step": 9240
},
{
"epoch": 12.95064753237662,
"grad_norm": 0.002621919382363558,
"learning_rate": 5.607206040744789e-07,
"loss": 0.0003,
"step": 9250
},
{
"epoch": 12.964648232411621,
"grad_norm": 0.002265157410874963,
"learning_rate": 5.53245934647636e-07,
"loss": 0.0015,
"step": 9260
},
{
"epoch": 12.978648932446623,
"grad_norm": 0.1659821718931198,
"learning_rate": 5.458185018078277e-07,
"loss": 0.0,
"step": 9270
},
{
"epoch": 12.992649632481625,
"grad_norm": 5.271564960479736,
"learning_rate": 5.384383844545321e-07,
"loss": 0.0006,
"step": 9280
},
{
"epoch": 12.999649982499125,
"eval_f1": 0.8328847781417223,
"eval_loss": 0.8088664412498474,
"eval_precision": 0.8362640573272073,
"eval_recall": 0.8327501749475158,
"eval_runtime": 85.0045,
"eval_samples_per_second": 16.811,
"eval_steps_per_second": 8.411,
"step": 9285
}
],
"logging_steps": 10,
"max_steps": 10710,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.852337585794294e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}