sidewalk-validator-ai-obstacle / trainer_state.json
johnomeara's picture
Upload folder using huggingface_hub
8f0fe8f verified
{
"best_global_step": 2464,
"best_metric": 0.7966146756469337,
"best_model_checkpoint": "Obstacle/dinov2/checkpoint-2464",
"epoch": 22.0,
"eval_steps": 500,
"global_step": 2464,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0894854586129754,
"grad_norm": 68.26221466064453,
"learning_rate": 3.214285714285714e-07,
"loss": 0.9281,
"step": 10
},
{
"epoch": 0.1789709172259508,
"grad_norm": 49.921424865722656,
"learning_rate": 6.785714285714286e-07,
"loss": 0.8782,
"step": 20
},
{
"epoch": 0.2684563758389262,
"grad_norm": 63.9870491027832,
"learning_rate": 1.0357142857142859e-06,
"loss": 0.769,
"step": 30
},
{
"epoch": 0.3579418344519016,
"grad_norm": 27.94622230529785,
"learning_rate": 1.392857142857143e-06,
"loss": 0.7484,
"step": 40
},
{
"epoch": 0.44742729306487694,
"grad_norm": 31.261272430419922,
"learning_rate": 1.75e-06,
"loss": 0.7224,
"step": 50
},
{
"epoch": 0.5369127516778524,
"grad_norm": 30.373384475708008,
"learning_rate": 2.1071428571428572e-06,
"loss": 0.6935,
"step": 60
},
{
"epoch": 0.6263982102908278,
"grad_norm": 25.922801971435547,
"learning_rate": 2.4642857142857147e-06,
"loss": 0.656,
"step": 70
},
{
"epoch": 0.7158836689038032,
"grad_norm": 21.445323944091797,
"learning_rate": 2.8214285714285718e-06,
"loss": 0.6086,
"step": 80
},
{
"epoch": 0.8053691275167785,
"grad_norm": 47.42407989501953,
"learning_rate": 3.178571428571429e-06,
"loss": 0.7216,
"step": 90
},
{
"epoch": 0.8948545861297539,
"grad_norm": 23.003158569335938,
"learning_rate": 3.5357142857142863e-06,
"loss": 0.6132,
"step": 100
},
{
"epoch": 0.9843400447427293,
"grad_norm": 31.152210235595703,
"learning_rate": 3.892857142857143e-06,
"loss": 0.6263,
"step": 110
},
{
"epoch": 1.0,
"eval_loss": 0.572486162185669,
"eval_macro_f1": 0.6811600432857552,
"eval_runtime": 118.5033,
"eval_samples_per_second": 6.456,
"eval_steps_per_second": 0.81,
"step": 112
},
{
"epoch": 1.0715883668903803,
"grad_norm": 19.629968643188477,
"learning_rate": 4.25e-06,
"loss": 0.5971,
"step": 120
},
{
"epoch": 1.1610738255033557,
"grad_norm": 38.47784423828125,
"learning_rate": 4.6071428571428574e-06,
"loss": 0.5379,
"step": 130
},
{
"epoch": 1.250559284116331,
"grad_norm": 24.479284286499023,
"learning_rate": 4.964285714285715e-06,
"loss": 0.529,
"step": 140
},
{
"epoch": 1.3400447427293065,
"grad_norm": 20.997512817382812,
"learning_rate": 5.3214285714285715e-06,
"loss": 0.6006,
"step": 150
},
{
"epoch": 1.429530201342282,
"grad_norm": 52.60095977783203,
"learning_rate": 5.678571428571429e-06,
"loss": 0.7468,
"step": 160
},
{
"epoch": 1.5190156599552571,
"grad_norm": 15.642265319824219,
"learning_rate": 6.035714285714286e-06,
"loss": 0.559,
"step": 170
},
{
"epoch": 1.6085011185682325,
"grad_norm": 70.16302490234375,
"learning_rate": 6.392857142857143e-06,
"loss": 0.6043,
"step": 180
},
{
"epoch": 1.697986577181208,
"grad_norm": 26.643476486206055,
"learning_rate": 6.750000000000001e-06,
"loss": 0.5636,
"step": 190
},
{
"epoch": 1.7874720357941833,
"grad_norm": 177.88034057617188,
"learning_rate": 7.107142857142858e-06,
"loss": 0.5427,
"step": 200
},
{
"epoch": 1.8769574944071588,
"grad_norm": 38.7111701965332,
"learning_rate": 7.464285714285715e-06,
"loss": 0.5486,
"step": 210
},
{
"epoch": 1.9664429530201342,
"grad_norm": 21.803434371948242,
"learning_rate": 7.821428571428571e-06,
"loss": 0.609,
"step": 220
},
{
"epoch": 2.0,
"eval_loss": 0.5907321572303772,
"eval_macro_f1": 0.7001507404932478,
"eval_runtime": 121.4035,
"eval_samples_per_second": 6.301,
"eval_steps_per_second": 0.791,
"step": 224
},
{
"epoch": 2.053691275167785,
"grad_norm": 23.68389320373535,
"learning_rate": 8.17857142857143e-06,
"loss": 0.4989,
"step": 230
},
{
"epoch": 2.1431767337807606,
"grad_norm": 26.48926544189453,
"learning_rate": 8.535714285714286e-06,
"loss": 0.5119,
"step": 240
},
{
"epoch": 2.232662192393736,
"grad_norm": 25.900455474853516,
"learning_rate": 8.892857142857143e-06,
"loss": 0.5046,
"step": 250
},
{
"epoch": 2.3221476510067114,
"grad_norm": 26.200103759765625,
"learning_rate": 9.250000000000001e-06,
"loss": 0.4793,
"step": 260
},
{
"epoch": 2.411633109619687,
"grad_norm": 36.33053970336914,
"learning_rate": 9.607142857142858e-06,
"loss": 0.5127,
"step": 270
},
{
"epoch": 2.501118568232662,
"grad_norm": 51.3528938293457,
"learning_rate": 9.964285714285714e-06,
"loss": 0.5188,
"step": 280
},
{
"epoch": 2.5906040268456376,
"grad_norm": 28.10676383972168,
"learning_rate": 9.964285714285714e-06,
"loss": 0.4858,
"step": 290
},
{
"epoch": 2.680089485458613,
"grad_norm": 25.275537490844727,
"learning_rate": 9.924603174603175e-06,
"loss": 0.497,
"step": 300
},
{
"epoch": 2.7695749440715884,
"grad_norm": 23.899168014526367,
"learning_rate": 9.884920634920636e-06,
"loss": 0.4843,
"step": 310
},
{
"epoch": 2.859060402684564,
"grad_norm": 21.87393569946289,
"learning_rate": 9.845238095238097e-06,
"loss": 0.5006,
"step": 320
},
{
"epoch": 2.9485458612975393,
"grad_norm": 27.74087905883789,
"learning_rate": 9.805555555555556e-06,
"loss": 0.4471,
"step": 330
},
{
"epoch": 3.0,
"eval_loss": 0.5459941029548645,
"eval_macro_f1": 0.726109493936894,
"eval_runtime": 121.5217,
"eval_samples_per_second": 6.295,
"eval_steps_per_second": 0.79,
"step": 336
},
{
"epoch": 3.0357941834451903,
"grad_norm": 41.323997497558594,
"learning_rate": 9.765873015873017e-06,
"loss": 0.5304,
"step": 340
},
{
"epoch": 3.1252796420581657,
"grad_norm": 31.197467803955078,
"learning_rate": 9.726190476190477e-06,
"loss": 0.4863,
"step": 350
},
{
"epoch": 3.214765100671141,
"grad_norm": 15.407756805419922,
"learning_rate": 9.686507936507938e-06,
"loss": 0.4116,
"step": 360
},
{
"epoch": 3.3042505592841165,
"grad_norm": 19.091278076171875,
"learning_rate": 9.646825396825397e-06,
"loss": 0.4092,
"step": 370
},
{
"epoch": 3.393736017897092,
"grad_norm": 42.068511962890625,
"learning_rate": 9.607142857142858e-06,
"loss": 0.4627,
"step": 380
},
{
"epoch": 3.4832214765100673,
"grad_norm": 26.687232971191406,
"learning_rate": 9.567460317460319e-06,
"loss": 0.4487,
"step": 390
},
{
"epoch": 3.5727069351230423,
"grad_norm": 22.164098739624023,
"learning_rate": 9.527777777777778e-06,
"loss": 0.48,
"step": 400
},
{
"epoch": 3.662192393736018,
"grad_norm": 22.220373153686523,
"learning_rate": 9.488095238095238e-06,
"loss": 0.53,
"step": 410
},
{
"epoch": 3.751677852348993,
"grad_norm": 8.822561264038086,
"learning_rate": 9.4484126984127e-06,
"loss": 0.4999,
"step": 420
},
{
"epoch": 3.841163310961969,
"grad_norm": 11.927675247192383,
"learning_rate": 9.40873015873016e-06,
"loss": 0.4642,
"step": 430
},
{
"epoch": 3.930648769574944,
"grad_norm": 14.933186531066895,
"learning_rate": 9.36904761904762e-06,
"loss": 0.4145,
"step": 440
},
{
"epoch": 4.0,
"eval_loss": 0.5586118102073669,
"eval_macro_f1": 0.741893986276926,
"eval_runtime": 120.2217,
"eval_samples_per_second": 6.363,
"eval_steps_per_second": 0.799,
"step": 448
},
{
"epoch": 4.017897091722595,
"grad_norm": 23.335527420043945,
"learning_rate": 9.32936507936508e-06,
"loss": 0.4222,
"step": 450
},
{
"epoch": 4.10738255033557,
"grad_norm": 28.71408462524414,
"learning_rate": 9.28968253968254e-06,
"loss": 0.3959,
"step": 460
},
{
"epoch": 4.196868008948546,
"grad_norm": 20.4088077545166,
"learning_rate": 9.250000000000001e-06,
"loss": 0.3423,
"step": 470
},
{
"epoch": 4.286353467561521,
"grad_norm": 39.36516189575195,
"learning_rate": 9.21031746031746e-06,
"loss": 0.3851,
"step": 480
},
{
"epoch": 4.375838926174497,
"grad_norm": 32.29376983642578,
"learning_rate": 9.170634920634921e-06,
"loss": 0.3766,
"step": 490
},
{
"epoch": 4.465324384787472,
"grad_norm": 13.637434959411621,
"learning_rate": 9.130952380952382e-06,
"loss": 0.3328,
"step": 500
},
{
"epoch": 4.554809843400448,
"grad_norm": 23.359638214111328,
"learning_rate": 9.091269841269843e-06,
"loss": 0.3988,
"step": 510
},
{
"epoch": 4.644295302013423,
"grad_norm": 25.244564056396484,
"learning_rate": 9.051587301587302e-06,
"loss": 0.3149,
"step": 520
},
{
"epoch": 4.733780760626399,
"grad_norm": 273.609619140625,
"learning_rate": 9.011904761904762e-06,
"loss": 0.3685,
"step": 530
},
{
"epoch": 4.823266219239374,
"grad_norm": 18.818504333496094,
"learning_rate": 8.972222222222223e-06,
"loss": 0.2867,
"step": 540
},
{
"epoch": 4.912751677852349,
"grad_norm": 19.458040237426758,
"learning_rate": 8.932539682539684e-06,
"loss": 0.3952,
"step": 550
},
{
"epoch": 5.0,
"grad_norm": 11.257533073425293,
"learning_rate": 8.892857142857143e-06,
"loss": 0.3553,
"step": 560
},
{
"epoch": 5.0,
"eval_loss": 0.5237393379211426,
"eval_macro_f1": 0.7685140098500236,
"eval_runtime": 124.8114,
"eval_samples_per_second": 6.129,
"eval_steps_per_second": 0.769,
"step": 560
},
{
"epoch": 5.089485458612975,
"grad_norm": 16.261404037475586,
"learning_rate": 8.853174603174604e-06,
"loss": 0.3108,
"step": 570
},
{
"epoch": 5.178970917225951,
"grad_norm": 16.059083938598633,
"learning_rate": 8.813492063492064e-06,
"loss": 0.2986,
"step": 580
},
{
"epoch": 5.268456375838926,
"grad_norm": 22.006534576416016,
"learning_rate": 8.773809523809525e-06,
"loss": 0.2952,
"step": 590
},
{
"epoch": 5.357941834451902,
"grad_norm": 16.75338363647461,
"learning_rate": 8.734126984126984e-06,
"loss": 0.2512,
"step": 600
},
{
"epoch": 5.447427293064877,
"grad_norm": 36.52522659301758,
"learning_rate": 8.694444444444445e-06,
"loss": 0.2308,
"step": 610
},
{
"epoch": 5.5369127516778525,
"grad_norm": 14.535757064819336,
"learning_rate": 8.654761904761906e-06,
"loss": 0.3012,
"step": 620
},
{
"epoch": 5.626398210290827,
"grad_norm": 22.867900848388672,
"learning_rate": 8.615079365079366e-06,
"loss": 0.3232,
"step": 630
},
{
"epoch": 5.715883668903803,
"grad_norm": 19.417451858520508,
"learning_rate": 8.575396825396826e-06,
"loss": 0.3173,
"step": 640
},
{
"epoch": 5.805369127516778,
"grad_norm": 21.25806427001953,
"learning_rate": 8.535714285714286e-06,
"loss": 0.3376,
"step": 650
},
{
"epoch": 5.894854586129754,
"grad_norm": 11.842672348022461,
"learning_rate": 8.496031746031747e-06,
"loss": 0.3756,
"step": 660
},
{
"epoch": 5.984340044742729,
"grad_norm": 16.4525203704834,
"learning_rate": 8.456349206349208e-06,
"loss": 0.2923,
"step": 670
},
{
"epoch": 6.0,
"eval_loss": 0.5606415271759033,
"eval_macro_f1": 0.7435031036046287,
"eval_runtime": 115.5398,
"eval_samples_per_second": 6.621,
"eval_steps_per_second": 0.831,
"step": 672
},
{
"epoch": 6.0715883668903805,
"grad_norm": 20.93574333190918,
"learning_rate": 8.416666666666667e-06,
"loss": 0.2473,
"step": 680
},
{
"epoch": 6.1610738255033555,
"grad_norm": 26.189205169677734,
"learning_rate": 8.376984126984128e-06,
"loss": 0.2403,
"step": 690
},
{
"epoch": 6.250559284116331,
"grad_norm": 13.84333610534668,
"learning_rate": 8.337301587301588e-06,
"loss": 0.231,
"step": 700
},
{
"epoch": 6.340044742729306,
"grad_norm": 22.295377731323242,
"learning_rate": 8.297619047619049e-06,
"loss": 0.2416,
"step": 710
},
{
"epoch": 6.429530201342282,
"grad_norm": 14.893708229064941,
"learning_rate": 8.257936507936508e-06,
"loss": 0.2409,
"step": 720
},
{
"epoch": 6.519015659955257,
"grad_norm": 14.828768730163574,
"learning_rate": 8.218253968253969e-06,
"loss": 0.1927,
"step": 730
},
{
"epoch": 6.608501118568233,
"grad_norm": 23.54037094116211,
"learning_rate": 8.17857142857143e-06,
"loss": 0.2373,
"step": 740
},
{
"epoch": 6.697986577181208,
"grad_norm": 20.361324310302734,
"learning_rate": 8.138888888888889e-06,
"loss": 0.2266,
"step": 750
},
{
"epoch": 6.787472035794184,
"grad_norm": 32.86820602416992,
"learning_rate": 8.09920634920635e-06,
"loss": 0.3065,
"step": 760
},
{
"epoch": 6.876957494407159,
"grad_norm": 25.08152198791504,
"learning_rate": 8.05952380952381e-06,
"loss": 0.307,
"step": 770
},
{
"epoch": 6.966442953020135,
"grad_norm": 21.257457733154297,
"learning_rate": 8.019841269841271e-06,
"loss": 0.2943,
"step": 780
},
{
"epoch": 7.0,
"eval_loss": 0.5504983067512512,
"eval_macro_f1": 0.7728941735285386,
"eval_runtime": 119.5498,
"eval_samples_per_second": 6.399,
"eval_steps_per_second": 0.803,
"step": 784
},
{
"epoch": 7.053691275167785,
"grad_norm": 13.639359474182129,
"learning_rate": 7.980158730158732e-06,
"loss": 0.2103,
"step": 790
},
{
"epoch": 7.143176733780761,
"grad_norm": 27.568639755249023,
"learning_rate": 7.94047619047619e-06,
"loss": 0.1676,
"step": 800
},
{
"epoch": 7.232662192393736,
"grad_norm": 17.152692794799805,
"learning_rate": 7.900793650793652e-06,
"loss": 0.1818,
"step": 810
},
{
"epoch": 7.322147651006711,
"grad_norm": 16.47798728942871,
"learning_rate": 7.861111111111112e-06,
"loss": 0.2058,
"step": 820
},
{
"epoch": 7.411633109619687,
"grad_norm": 17.176942825317383,
"learning_rate": 7.821428571428571e-06,
"loss": 0.1508,
"step": 830
},
{
"epoch": 7.501118568232663,
"grad_norm": 29.886573791503906,
"learning_rate": 7.781746031746032e-06,
"loss": 0.2434,
"step": 840
},
{
"epoch": 7.590604026845638,
"grad_norm": 22.851221084594727,
"learning_rate": 7.742063492063493e-06,
"loss": 0.2431,
"step": 850
},
{
"epoch": 7.680089485458613,
"grad_norm": 9.74971866607666,
"learning_rate": 7.702380952380954e-06,
"loss": 0.1905,
"step": 860
},
{
"epoch": 7.769574944071588,
"grad_norm": 22.99750328063965,
"learning_rate": 7.662698412698414e-06,
"loss": 0.2215,
"step": 870
},
{
"epoch": 7.859060402684563,
"grad_norm": 29.295093536376953,
"learning_rate": 7.623015873015873e-06,
"loss": 0.2222,
"step": 880
},
{
"epoch": 7.948545861297539,
"grad_norm": 13.15281867980957,
"learning_rate": 7.583333333333333e-06,
"loss": 0.2172,
"step": 890
},
{
"epoch": 8.0,
"eval_loss": 0.5693853497505188,
"eval_macro_f1": 0.7617472396955967,
"eval_runtime": 71.7674,
"eval_samples_per_second": 10.659,
"eval_steps_per_second": 1.338,
"step": 896
},
{
"epoch": 8.03579418344519,
"grad_norm": 17.01239585876465,
"learning_rate": 7.543650793650794e-06,
"loss": 0.2183,
"step": 900
},
{
"epoch": 8.125279642058166,
"grad_norm": 16.31254768371582,
"learning_rate": 7.503968253968255e-06,
"loss": 0.1393,
"step": 910
},
{
"epoch": 8.21476510067114,
"grad_norm": 21.926393508911133,
"learning_rate": 7.464285714285715e-06,
"loss": 0.2104,
"step": 920
},
{
"epoch": 8.304250559284116,
"grad_norm": 16.77183723449707,
"learning_rate": 7.4246031746031754e-06,
"loss": 0.1808,
"step": 930
},
{
"epoch": 8.393736017897092,
"grad_norm": 10.125628471374512,
"learning_rate": 7.384920634920636e-06,
"loss": 0.142,
"step": 940
},
{
"epoch": 8.483221476510067,
"grad_norm": 8.853920936584473,
"learning_rate": 7.345238095238096e-06,
"loss": 0.1674,
"step": 950
},
{
"epoch": 8.572706935123042,
"grad_norm": 16.407033920288086,
"learning_rate": 7.305555555555556e-06,
"loss": 0.1684,
"step": 960
},
{
"epoch": 8.662192393736017,
"grad_norm": 19.892669677734375,
"learning_rate": 7.265873015873016e-06,
"loss": 0.1673,
"step": 970
},
{
"epoch": 8.751677852348994,
"grad_norm": 5.113985061645508,
"learning_rate": 7.226190476190477e-06,
"loss": 0.2087,
"step": 980
},
{
"epoch": 8.841163310961969,
"grad_norm": 10.283279418945312,
"learning_rate": 7.186507936507937e-06,
"loss": 0.1728,
"step": 990
},
{
"epoch": 8.930648769574944,
"grad_norm": 11.258045196533203,
"learning_rate": 7.146825396825397e-06,
"loss": 0.2067,
"step": 1000
},
{
"epoch": 9.0,
"eval_loss": 0.6666957139968872,
"eval_macro_f1": 0.7585227272727273,
"eval_runtime": 75.2405,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 1.276,
"step": 1008
},
{
"epoch": 9.017897091722595,
"grad_norm": 31.886018753051758,
"learning_rate": 7.107142857142858e-06,
"loss": 0.1853,
"step": 1010
},
{
"epoch": 9.10738255033557,
"grad_norm": 21.961132049560547,
"learning_rate": 7.067460317460319e-06,
"loss": 0.1823,
"step": 1020
},
{
"epoch": 9.196868008948545,
"grad_norm": 9.536689758300781,
"learning_rate": 7.027777777777778e-06,
"loss": 0.1289,
"step": 1030
},
{
"epoch": 9.286353467561522,
"grad_norm": 17.52619171142578,
"learning_rate": 6.988095238095239e-06,
"loss": 0.1421,
"step": 1040
},
{
"epoch": 9.375838926174497,
"grad_norm": 5.5908379554748535,
"learning_rate": 6.9484126984126985e-06,
"loss": 0.1024,
"step": 1050
},
{
"epoch": 9.465324384787472,
"grad_norm": 15.923222541809082,
"learning_rate": 6.908730158730159e-06,
"loss": 0.1363,
"step": 1060
},
{
"epoch": 9.554809843400447,
"grad_norm": 7.426005840301514,
"learning_rate": 6.86904761904762e-06,
"loss": 0.1851,
"step": 1070
},
{
"epoch": 9.644295302013422,
"grad_norm": 2.362064838409424,
"learning_rate": 6.82936507936508e-06,
"loss": 0.1177,
"step": 1080
},
{
"epoch": 9.733780760626399,
"grad_norm": 16.235544204711914,
"learning_rate": 6.789682539682541e-06,
"loss": 0.0991,
"step": 1090
},
{
"epoch": 9.823266219239374,
"grad_norm": 8.337503433227539,
"learning_rate": 6.750000000000001e-06,
"loss": 0.13,
"step": 1100
},
{
"epoch": 9.912751677852349,
"grad_norm": 34.09331130981445,
"learning_rate": 6.7103174603174605e-06,
"loss": 0.1171,
"step": 1110
},
{
"epoch": 10.0,
"grad_norm": 25.452791213989258,
"learning_rate": 6.67063492063492e-06,
"loss": 0.1817,
"step": 1120
},
{
"epoch": 10.0,
"eval_loss": 0.8301987051963806,
"eval_macro_f1": 0.7432961635470596,
"eval_runtime": 71.6774,
"eval_samples_per_second": 10.673,
"eval_steps_per_second": 1.339,
"step": 1120
},
{
"epoch": 10.089485458612975,
"grad_norm": 19.931293487548828,
"learning_rate": 6.630952380952381e-06,
"loss": 0.1405,
"step": 1130
},
{
"epoch": 10.17897091722595,
"grad_norm": 20.297443389892578,
"learning_rate": 6.591269841269842e-06,
"loss": 0.1359,
"step": 1140
},
{
"epoch": 10.268456375838927,
"grad_norm": 11.013289451599121,
"learning_rate": 6.551587301587302e-06,
"loss": 0.0918,
"step": 1150
},
{
"epoch": 10.357941834451902,
"grad_norm": 19.394319534301758,
"learning_rate": 6.5119047619047626e-06,
"loss": 0.1392,
"step": 1160
},
{
"epoch": 10.447427293064877,
"grad_norm": 28.343791961669922,
"learning_rate": 6.472222222222223e-06,
"loss": 0.1866,
"step": 1170
},
{
"epoch": 10.536912751677852,
"grad_norm": 14.38354778289795,
"learning_rate": 6.432539682539683e-06,
"loss": 0.1381,
"step": 1180
},
{
"epoch": 10.626398210290828,
"grad_norm": 24.752470016479492,
"learning_rate": 6.392857142857143e-06,
"loss": 0.202,
"step": 1190
},
{
"epoch": 10.715883668903803,
"grad_norm": 19.56192970275879,
"learning_rate": 6.353174603174603e-06,
"loss": 0.1566,
"step": 1200
},
{
"epoch": 10.805369127516778,
"grad_norm": 20.234485626220703,
"learning_rate": 6.313492063492064e-06,
"loss": 0.1061,
"step": 1210
},
{
"epoch": 10.894854586129753,
"grad_norm": 19.531757354736328,
"learning_rate": 6.2738095238095245e-06,
"loss": 0.1873,
"step": 1220
},
{
"epoch": 10.98434004474273,
"grad_norm": 16.655208587646484,
"learning_rate": 6.2341269841269844e-06,
"loss": 0.1102,
"step": 1230
},
{
"epoch": 11.0,
"eval_loss": 0.6939279437065125,
"eval_macro_f1": 0.769257323981233,
"eval_runtime": 73.198,
"eval_samples_per_second": 10.451,
"eval_steps_per_second": 1.312,
"step": 1232
},
{
"epoch": 11.07158836689038,
"grad_norm": 5.695398330688477,
"learning_rate": 6.194444444444445e-06,
"loss": 0.1283,
"step": 1240
},
{
"epoch": 11.161073825503356,
"grad_norm": 9.851438522338867,
"learning_rate": 6.154761904761906e-06,
"loss": 0.1065,
"step": 1250
},
{
"epoch": 11.250559284116331,
"grad_norm": 29.838871002197266,
"learning_rate": 6.115079365079366e-06,
"loss": 0.1426,
"step": 1260
},
{
"epoch": 11.340044742729306,
"grad_norm": 7.144505500793457,
"learning_rate": 6.075396825396826e-06,
"loss": 0.093,
"step": 1270
},
{
"epoch": 11.429530201342281,
"grad_norm": 15.980908393859863,
"learning_rate": 6.035714285714286e-06,
"loss": 0.1508,
"step": 1280
},
{
"epoch": 11.519015659955258,
"grad_norm": 23.028108596801758,
"learning_rate": 5.996031746031746e-06,
"loss": 0.1719,
"step": 1290
},
{
"epoch": 11.608501118568233,
"grad_norm": 14.000625610351562,
"learning_rate": 5.956349206349207e-06,
"loss": 0.1063,
"step": 1300
},
{
"epoch": 11.697986577181208,
"grad_norm": 13.606029510498047,
"learning_rate": 5.916666666666667e-06,
"loss": 0.1194,
"step": 1310
},
{
"epoch": 11.787472035794183,
"grad_norm": 13.779529571533203,
"learning_rate": 5.876984126984128e-06,
"loss": 0.1728,
"step": 1320
},
{
"epoch": 11.87695749440716,
"grad_norm": 10.599024772644043,
"learning_rate": 5.8373015873015886e-06,
"loss": 0.0797,
"step": 1330
},
{
"epoch": 11.966442953020135,
"grad_norm": 11.666370391845703,
"learning_rate": 5.7976190476190485e-06,
"loss": 0.1175,
"step": 1340
},
{
"epoch": 12.0,
"eval_loss": 0.6560544967651367,
"eval_macro_f1": 0.7722084367245657,
"eval_runtime": 65.1441,
"eval_samples_per_second": 11.743,
"eval_steps_per_second": 1.474,
"step": 1344
},
{
"epoch": 12.053691275167786,
"grad_norm": 12.950461387634277,
"learning_rate": 5.7579365079365075e-06,
"loss": 0.1042,
"step": 1350
},
{
"epoch": 12.143176733780761,
"grad_norm": 9.609066009521484,
"learning_rate": 5.718253968253968e-06,
"loss": 0.12,
"step": 1360
},
{
"epoch": 12.232662192393736,
"grad_norm": 23.64732551574707,
"learning_rate": 5.678571428571429e-06,
"loss": 0.1145,
"step": 1370
},
{
"epoch": 12.322147651006711,
"grad_norm": 32.57529067993164,
"learning_rate": 5.638888888888889e-06,
"loss": 0.0992,
"step": 1380
},
{
"epoch": 12.411633109619686,
"grad_norm": 7.419304370880127,
"learning_rate": 5.59920634920635e-06,
"loss": 0.0988,
"step": 1390
},
{
"epoch": 12.501118568232663,
"grad_norm": 6.912314414978027,
"learning_rate": 5.5595238095238104e-06,
"loss": 0.1157,
"step": 1400
},
{
"epoch": 12.590604026845638,
"grad_norm": 19.71913719177246,
"learning_rate": 5.51984126984127e-06,
"loss": 0.1478,
"step": 1410
},
{
"epoch": 12.680089485458613,
"grad_norm": 20.64476203918457,
"learning_rate": 5.480158730158731e-06,
"loss": 0.1196,
"step": 1420
},
{
"epoch": 12.769574944071588,
"grad_norm": 15.437020301818848,
"learning_rate": 5.44047619047619e-06,
"loss": 0.0766,
"step": 1430
},
{
"epoch": 12.859060402684564,
"grad_norm": 9.825302124023438,
"learning_rate": 5.400793650793651e-06,
"loss": 0.1254,
"step": 1440
},
{
"epoch": 12.94854586129754,
"grad_norm": 26.381696701049805,
"learning_rate": 5.361111111111112e-06,
"loss": 0.1025,
"step": 1450
},
{
"epoch": 13.0,
"eval_loss": 0.7549577951431274,
"eval_macro_f1": 0.7658037701779237,
"eval_runtime": 75.4344,
"eval_samples_per_second": 10.141,
"eval_steps_per_second": 1.273,
"step": 1456
},
{
"epoch": 13.03579418344519,
"grad_norm": 25.113908767700195,
"learning_rate": 5.3214285714285715e-06,
"loss": 0.1438,
"step": 1460
},
{
"epoch": 13.125279642058166,
"grad_norm": 10.833968162536621,
"learning_rate": 5.281746031746032e-06,
"loss": 0.1749,
"step": 1470
},
{
"epoch": 13.21476510067114,
"grad_norm": 22.27955436706543,
"learning_rate": 5.242063492063493e-06,
"loss": 0.1608,
"step": 1480
},
{
"epoch": 13.304250559284116,
"grad_norm": 15.125386238098145,
"learning_rate": 5.202380952380953e-06,
"loss": 0.1503,
"step": 1490
},
{
"epoch": 13.393736017897092,
"grad_norm": 3.4376182556152344,
"learning_rate": 5.162698412698414e-06,
"loss": 0.0819,
"step": 1500
},
{
"epoch": 13.483221476510067,
"grad_norm": 15.98349380493164,
"learning_rate": 5.123015873015873e-06,
"loss": 0.0837,
"step": 1510
},
{
"epoch": 13.572706935123042,
"grad_norm": 11.065319061279297,
"learning_rate": 5.0833333333333335e-06,
"loss": 0.1035,
"step": 1520
},
{
"epoch": 13.662192393736017,
"grad_norm": 10.306619644165039,
"learning_rate": 5.043650793650794e-06,
"loss": 0.1075,
"step": 1530
},
{
"epoch": 13.751677852348994,
"grad_norm": 15.42297077178955,
"learning_rate": 5.003968253968254e-06,
"loss": 0.1208,
"step": 1540
},
{
"epoch": 13.841163310961969,
"grad_norm": 20.406225204467773,
"learning_rate": 4.964285714285715e-06,
"loss": 0.1014,
"step": 1550
},
{
"epoch": 13.930648769574944,
"grad_norm": 16.1427059173584,
"learning_rate": 4.924603174603176e-06,
"loss": 0.1593,
"step": 1560
},
{
"epoch": 14.0,
"eval_loss": 0.7316491007804871,
"eval_macro_f1": 0.7694998475634216,
"eval_runtime": 69.4982,
"eval_samples_per_second": 11.007,
"eval_steps_per_second": 1.381,
"step": 1568
},
{
"epoch": 14.017897091722595,
"grad_norm": 9.524744987487793,
"learning_rate": 4.8849206349206356e-06,
"loss": 0.1199,
"step": 1570
},
{
"epoch": 14.10738255033557,
"grad_norm": 14.937419891357422,
"learning_rate": 4.8452380952380955e-06,
"loss": 0.1189,
"step": 1580
},
{
"epoch": 14.196868008948545,
"grad_norm": 19.528654098510742,
"learning_rate": 4.805555555555556e-06,
"loss": 0.1114,
"step": 1590
},
{
"epoch": 14.286353467561522,
"grad_norm": 2.1810483932495117,
"learning_rate": 4.765873015873016e-06,
"loss": 0.0805,
"step": 1600
},
{
"epoch": 14.375838926174497,
"grad_norm": 5.020853042602539,
"learning_rate": 4.726190476190476e-06,
"loss": 0.1097,
"step": 1610
},
{
"epoch": 14.465324384787472,
"grad_norm": 12.4754638671875,
"learning_rate": 4.686507936507937e-06,
"loss": 0.1231,
"step": 1620
},
{
"epoch": 14.554809843400447,
"grad_norm": 25.44110679626465,
"learning_rate": 4.6468253968253975e-06,
"loss": 0.0903,
"step": 1630
},
{
"epoch": 14.644295302013422,
"grad_norm": 27.849111557006836,
"learning_rate": 4.6071428571428574e-06,
"loss": 0.0905,
"step": 1640
},
{
"epoch": 14.733780760626399,
"grad_norm": 18.283781051635742,
"learning_rate": 4.567460317460317e-06,
"loss": 0.0617,
"step": 1650
},
{
"epoch": 14.823266219239374,
"grad_norm": 17.507619857788086,
"learning_rate": 4.527777777777778e-06,
"loss": 0.13,
"step": 1660
},
{
"epoch": 14.912751677852349,
"grad_norm": 21.756675720214844,
"learning_rate": 4.488095238095239e-06,
"loss": 0.0926,
"step": 1670
},
{
"epoch": 15.0,
"grad_norm": 19.568700790405273,
"learning_rate": 4.448412698412699e-06,
"loss": 0.0954,
"step": 1680
},
{
"epoch": 15.0,
"eval_loss": 0.6716505885124207,
"eval_macro_f1": 0.7774340887550624,
"eval_runtime": 72.4554,
"eval_samples_per_second": 10.558,
"eval_steps_per_second": 1.325,
"step": 1680
},
{
"epoch": 15.089485458612975,
"grad_norm": 18.1299991607666,
"learning_rate": 4.408730158730159e-06,
"loss": 0.0678,
"step": 1690
},
{
"epoch": 15.17897091722595,
"grad_norm": 4.439563751220703,
"learning_rate": 4.369047619047619e-06,
"loss": 0.0767,
"step": 1700
},
{
"epoch": 15.268456375838927,
"grad_norm": 14.997693061828613,
"learning_rate": 4.32936507936508e-06,
"loss": 0.0664,
"step": 1710
},
{
"epoch": 15.357941834451902,
"grad_norm": 7.552863597869873,
"learning_rate": 4.28968253968254e-06,
"loss": 0.0842,
"step": 1720
},
{
"epoch": 15.447427293064877,
"grad_norm": 8.728134155273438,
"learning_rate": 4.25e-06,
"loss": 0.1015,
"step": 1730
},
{
"epoch": 15.536912751677852,
"grad_norm": 11.534658432006836,
"learning_rate": 4.210317460317461e-06,
"loss": 0.1085,
"step": 1740
},
{
"epoch": 15.626398210290828,
"grad_norm": 22.487648010253906,
"learning_rate": 4.1706349206349215e-06,
"loss": 0.0993,
"step": 1750
},
{
"epoch": 15.715883668903803,
"grad_norm": 4.885320663452148,
"learning_rate": 4.130952380952381e-06,
"loss": 0.093,
"step": 1760
},
{
"epoch": 15.805369127516778,
"grad_norm": 16.688884735107422,
"learning_rate": 4.091269841269841e-06,
"loss": 0.0732,
"step": 1770
},
{
"epoch": 15.894854586129753,
"grad_norm": 30.58871841430664,
"learning_rate": 4.051587301587302e-06,
"loss": 0.0867,
"step": 1780
},
{
"epoch": 15.98434004474273,
"grad_norm": 17.19178009033203,
"learning_rate": 4.011904761904763e-06,
"loss": 0.0676,
"step": 1790
},
{
"epoch": 16.0,
"eval_loss": 0.7994140386581421,
"eval_macro_f1": 0.7741348066298342,
"eval_runtime": 75.3551,
"eval_samples_per_second": 10.152,
"eval_steps_per_second": 1.274,
"step": 1792
},
{
"epoch": 16.07158836689038,
"grad_norm": 10.005722999572754,
"learning_rate": 3.972222222222223e-06,
"loss": 0.0858,
"step": 1800
},
{
"epoch": 16.161073825503355,
"grad_norm": 20.64703369140625,
"learning_rate": 3.932539682539683e-06,
"loss": 0.0836,
"step": 1810
},
{
"epoch": 16.25055928411633,
"grad_norm": 8.218932151794434,
"learning_rate": 3.892857142857143e-06,
"loss": 0.0969,
"step": 1820
},
{
"epoch": 16.340044742729308,
"grad_norm": 0.8766313195228577,
"learning_rate": 3.853174603174604e-06,
"loss": 0.0432,
"step": 1830
},
{
"epoch": 16.42953020134228,
"grad_norm": 12.166719436645508,
"learning_rate": 3.8134920634920636e-06,
"loss": 0.0844,
"step": 1840
},
{
"epoch": 16.519015659955258,
"grad_norm": 8.594962120056152,
"learning_rate": 3.773809523809524e-06,
"loss": 0.1276,
"step": 1850
},
{
"epoch": 16.60850111856823,
"grad_norm": 2.2263548374176025,
"learning_rate": 3.7341269841269846e-06,
"loss": 0.0661,
"step": 1860
},
{
"epoch": 16.697986577181208,
"grad_norm": 6.170251846313477,
"learning_rate": 3.694444444444445e-06,
"loss": 0.1007,
"step": 1870
},
{
"epoch": 16.787472035794185,
"grad_norm": 8.359641075134277,
"learning_rate": 3.654761904761905e-06,
"loss": 0.0993,
"step": 1880
},
{
"epoch": 16.876957494407158,
"grad_norm": 12.583647727966309,
"learning_rate": 3.615079365079365e-06,
"loss": 0.0826,
"step": 1890
},
{
"epoch": 16.966442953020135,
"grad_norm": 6.046731948852539,
"learning_rate": 3.575396825396826e-06,
"loss": 0.0444,
"step": 1900
},
{
"epoch": 17.0,
"eval_loss": 0.747003436088562,
"eval_macro_f1": 0.7839196689592837,
"eval_runtime": 75.1906,
"eval_samples_per_second": 10.174,
"eval_steps_per_second": 1.277,
"step": 1904
},
{
"epoch": 17.053691275167786,
"grad_norm": 20.228485107421875,
"learning_rate": 3.5357142857142863e-06,
"loss": 0.0961,
"step": 1910
},
{
"epoch": 17.14317673378076,
"grad_norm": 26.031396865844727,
"learning_rate": 3.496031746031746e-06,
"loss": 0.0873,
"step": 1920
},
{
"epoch": 17.232662192393736,
"grad_norm": 12.642768859863281,
"learning_rate": 3.4563492063492065e-06,
"loss": 0.1064,
"step": 1930
},
{
"epoch": 17.322147651006713,
"grad_norm": 18.097814559936523,
"learning_rate": 3.416666666666667e-06,
"loss": 0.0768,
"step": 1940
},
{
"epoch": 17.411633109619686,
"grad_norm": 10.280755043029785,
"learning_rate": 3.3769841269841276e-06,
"loss": 0.1336,
"step": 1950
},
{
"epoch": 17.501118568232663,
"grad_norm": 11.690203666687012,
"learning_rate": 3.3373015873015875e-06,
"loss": 0.0707,
"step": 1960
},
{
"epoch": 17.59060402684564,
"grad_norm": 22.514507293701172,
"learning_rate": 3.297619047619048e-06,
"loss": 0.1073,
"step": 1970
},
{
"epoch": 17.680089485458613,
"grad_norm": 10.396025657653809,
"learning_rate": 3.257936507936508e-06,
"loss": 0.0543,
"step": 1980
},
{
"epoch": 17.76957494407159,
"grad_norm": 12.947179794311523,
"learning_rate": 3.218253968253969e-06,
"loss": 0.0768,
"step": 1990
},
{
"epoch": 17.859060402684563,
"grad_norm": 2.5873477458953857,
"learning_rate": 3.178571428571429e-06,
"loss": 0.0766,
"step": 2000
},
{
"epoch": 17.94854586129754,
"grad_norm": 6.163917064666748,
"learning_rate": 3.138888888888889e-06,
"loss": 0.0455,
"step": 2010
},
{
"epoch": 18.0,
"eval_loss": 0.8047569990158081,
"eval_macro_f1": 0.7781014378358804,
"eval_runtime": 66.3878,
"eval_samples_per_second": 11.523,
"eval_steps_per_second": 1.446,
"step": 2016
},
{
"epoch": 18.03579418344519,
"grad_norm": 4.654578685760498,
"learning_rate": 3.0992063492063495e-06,
"loss": 0.0684,
"step": 2020
},
{
"epoch": 18.125279642058164,
"grad_norm": 1.1822065114974976,
"learning_rate": 3.05952380952381e-06,
"loss": 0.0961,
"step": 2030
},
{
"epoch": 18.21476510067114,
"grad_norm": 21.223234176635742,
"learning_rate": 3.0198412698412697e-06,
"loss": 0.1265,
"step": 2040
},
{
"epoch": 18.304250559284117,
"grad_norm": 12.335346221923828,
"learning_rate": 2.9801587301587305e-06,
"loss": 0.1095,
"step": 2050
},
{
"epoch": 18.39373601789709,
"grad_norm": 4.7988715171813965,
"learning_rate": 2.9404761904761908e-06,
"loss": 0.0944,
"step": 2060
},
{
"epoch": 18.483221476510067,
"grad_norm": 28.275365829467773,
"learning_rate": 2.900793650793651e-06,
"loss": 0.0549,
"step": 2070
},
{
"epoch": 18.572706935123044,
"grad_norm": 7.988637447357178,
"learning_rate": 2.861111111111111e-06,
"loss": 0.0881,
"step": 2080
},
{
"epoch": 18.662192393736017,
"grad_norm": 9.342594146728516,
"learning_rate": 2.8214285714285718e-06,
"loss": 0.0528,
"step": 2090
},
{
"epoch": 18.751677852348994,
"grad_norm": 12.687505722045898,
"learning_rate": 2.781746031746032e-06,
"loss": 0.0711,
"step": 2100
},
{
"epoch": 18.841163310961967,
"grad_norm": 7.692240238189697,
"learning_rate": 2.7420634920634924e-06,
"loss": 0.072,
"step": 2110
},
{
"epoch": 18.930648769574944,
"grad_norm": 14.148133277893066,
"learning_rate": 2.7023809523809523e-06,
"loss": 0.0677,
"step": 2120
},
{
"epoch": 19.0,
"eval_loss": 0.8003770709037781,
"eval_macro_f1": 0.766772035108551,
"eval_runtime": 25.3025,
"eval_samples_per_second": 30.234,
"eval_steps_per_second": 3.794,
"step": 2128
},
{
"epoch": 19.017897091722595,
"grad_norm": 17.935680389404297,
"learning_rate": 2.662698412698413e-06,
"loss": 0.0416,
"step": 2130
},
{
"epoch": 19.107382550335572,
"grad_norm": 7.1221537590026855,
"learning_rate": 2.6230158730158734e-06,
"loss": 0.1003,
"step": 2140
},
{
"epoch": 19.196868008948545,
"grad_norm": 17.375965118408203,
"learning_rate": 2.5833333333333337e-06,
"loss": 0.0854,
"step": 2150
},
{
"epoch": 19.286353467561522,
"grad_norm": 13.114810943603516,
"learning_rate": 2.5436507936507936e-06,
"loss": 0.0759,
"step": 2160
},
{
"epoch": 19.375838926174495,
"grad_norm": 20.650806427001953,
"learning_rate": 2.503968253968254e-06,
"loss": 0.0576,
"step": 2170
},
{
"epoch": 19.465324384787472,
"grad_norm": 1.7908034324645996,
"learning_rate": 2.4642857142857147e-06,
"loss": 0.0314,
"step": 2180
},
{
"epoch": 19.55480984340045,
"grad_norm": 15.814742088317871,
"learning_rate": 2.4246031746031746e-06,
"loss": 0.0754,
"step": 2190
},
{
"epoch": 19.644295302013422,
"grad_norm": 18.996606826782227,
"learning_rate": 2.3849206349206354e-06,
"loss": 0.0502,
"step": 2200
},
{
"epoch": 19.7337807606264,
"grad_norm": 24.4049015045166,
"learning_rate": 2.3452380952380953e-06,
"loss": 0.0502,
"step": 2210
},
{
"epoch": 19.823266219239372,
"grad_norm": 6.1143879890441895,
"learning_rate": 2.305555555555556e-06,
"loss": 0.054,
"step": 2220
},
{
"epoch": 19.91275167785235,
"grad_norm": 12.98304557800293,
"learning_rate": 2.265873015873016e-06,
"loss": 0.0737,
"step": 2230
},
{
"epoch": 20.0,
"grad_norm": 0.3279534876346588,
"learning_rate": 2.2261904761904763e-06,
"loss": 0.0353,
"step": 2240
},
{
"epoch": 20.0,
"eval_loss": 0.8695369958877563,
"eval_macro_f1": 0.7699478748997595,
"eval_runtime": 25.1606,
"eval_samples_per_second": 30.405,
"eval_steps_per_second": 3.815,
"step": 2240
},
{
"epoch": 20.089485458612977,
"grad_norm": 5.161200523376465,
"learning_rate": 2.1865079365079366e-06,
"loss": 0.0489,
"step": 2250
},
{
"epoch": 20.17897091722595,
"grad_norm": 24.046892166137695,
"learning_rate": 2.146825396825397e-06,
"loss": 0.0887,
"step": 2260
},
{
"epoch": 20.268456375838927,
"grad_norm": 3.9411873817443848,
"learning_rate": 2.1071428571428572e-06,
"loss": 0.0804,
"step": 2270
},
{
"epoch": 20.3579418344519,
"grad_norm": 12.20919418334961,
"learning_rate": 2.0674603174603176e-06,
"loss": 0.0318,
"step": 2280
},
{
"epoch": 20.447427293064877,
"grad_norm": 5.021272659301758,
"learning_rate": 2.027777777777778e-06,
"loss": 0.0983,
"step": 2290
},
{
"epoch": 20.536912751677853,
"grad_norm": 15.741971969604492,
"learning_rate": 1.9880952380952382e-06,
"loss": 0.0335,
"step": 2300
},
{
"epoch": 20.626398210290827,
"grad_norm": 16.82331085205078,
"learning_rate": 1.9484126984126985e-06,
"loss": 0.0744,
"step": 2310
},
{
"epoch": 20.715883668903803,
"grad_norm": 0.5343822240829468,
"learning_rate": 1.908730158730159e-06,
"loss": 0.0432,
"step": 2320
},
{
"epoch": 20.80536912751678,
"grad_norm": 18.707128524780273,
"learning_rate": 1.8690476190476192e-06,
"loss": 0.0517,
"step": 2330
},
{
"epoch": 20.894854586129753,
"grad_norm": 10.502820014953613,
"learning_rate": 1.8293650793650793e-06,
"loss": 0.0596,
"step": 2340
},
{
"epoch": 20.98434004474273,
"grad_norm": 3.260993242263794,
"learning_rate": 1.7896825396825399e-06,
"loss": 0.0262,
"step": 2350
},
{
"epoch": 21.0,
"eval_loss": 0.8509567975997925,
"eval_macro_f1": 0.7710241615578796,
"eval_runtime": 21.2423,
"eval_samples_per_second": 36.013,
"eval_steps_per_second": 4.519,
"step": 2352
},
{
"epoch": 21.07158836689038,
"grad_norm": 21.054967880249023,
"learning_rate": 1.75e-06,
"loss": 0.0623,
"step": 2360
},
{
"epoch": 21.161073825503355,
"grad_norm": 7.076012134552002,
"learning_rate": 1.7103174603174605e-06,
"loss": 0.0567,
"step": 2370
},
{
"epoch": 21.25055928411633,
"grad_norm": 9.261219024658203,
"learning_rate": 1.6706349206349206e-06,
"loss": 0.0805,
"step": 2380
},
{
"epoch": 21.340044742729308,
"grad_norm": 21.940967559814453,
"learning_rate": 1.6309523809523812e-06,
"loss": 0.062,
"step": 2390
},
{
"epoch": 21.42953020134228,
"grad_norm": 0.5533654093742371,
"learning_rate": 1.5912698412698413e-06,
"loss": 0.0443,
"step": 2400
},
{
"epoch": 21.519015659955258,
"grad_norm": 1.7748634815216064,
"learning_rate": 1.5515873015873018e-06,
"loss": 0.0465,
"step": 2410
},
{
"epoch": 21.60850111856823,
"grad_norm": 14.389286994934082,
"learning_rate": 1.511904761904762e-06,
"loss": 0.0426,
"step": 2420
},
{
"epoch": 21.697986577181208,
"grad_norm": 0.1624564379453659,
"learning_rate": 1.4722222222222225e-06,
"loss": 0.0399,
"step": 2430
},
{
"epoch": 21.787472035794185,
"grad_norm": 16.362260818481445,
"learning_rate": 1.4325396825396826e-06,
"loss": 0.0737,
"step": 2440
},
{
"epoch": 21.876957494407158,
"grad_norm": 9.44295883178711,
"learning_rate": 1.392857142857143e-06,
"loss": 0.1062,
"step": 2450
},
{
"epoch": 21.966442953020135,
"grad_norm": 12.541874885559082,
"learning_rate": 1.3531746031746033e-06,
"loss": 0.0628,
"step": 2460
},
{
"epoch": 22.0,
"eval_loss": 0.8052472472190857,
"eval_macro_f1": 0.7966146756469337,
"eval_runtime": 25.2623,
"eval_samples_per_second": 30.282,
"eval_steps_per_second": 3.8,
"step": 2464
}
],
"logging_steps": 10,
"max_steps": 2800,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.8247393356405015e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}