date2format-v2 / checkpoint-14406 /trainer_state.json
syarulzaffi's picture
Upload folder using huggingface_hub
5c06ae8 verified
{
"best_metric": 0.2098342627286911,
"best_model_checkpoint": "date2format-v2/checkpoint-14406",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 14406,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005206164098292378,
"grad_norm": 0.16246353089809418,
"learning_rate": 8.327550312283136e-07,
"loss": 0.1697,
"step": 25
},
{
"epoch": 0.010412328196584756,
"grad_norm": 1.2059249877929688,
"learning_rate": 1.7002081887578072e-06,
"loss": 0.214,
"step": 50
},
{
"epoch": 0.015618492294877134,
"grad_norm": 0.9199744462966919,
"learning_rate": 2.5676613462873006e-06,
"loss": 0.1357,
"step": 75
},
{
"epoch": 0.02082465639316951,
"grad_norm": 0.04551107436418533,
"learning_rate": 3.4351145038167944e-06,
"loss": 0.0982,
"step": 100
},
{
"epoch": 0.02603082049146189,
"grad_norm": 1.7336697578430176,
"learning_rate": 4.2678695350451074e-06,
"loss": 0.1863,
"step": 125
},
{
"epoch": 0.03123698458975427,
"grad_norm": 1.3021111488342285,
"learning_rate": 5.135322692574601e-06,
"loss": 0.1166,
"step": 150
},
{
"epoch": 0.03644314868804665,
"grad_norm": 6.950756549835205,
"learning_rate": 6.002775850104095e-06,
"loss": 0.2385,
"step": 175
},
{
"epoch": 0.04164931278633902,
"grad_norm": 0.139039546251297,
"learning_rate": 6.870229007633589e-06,
"loss": 0.1147,
"step": 200
},
{
"epoch": 0.0468554768846314,
"grad_norm": 6.5412492752075195,
"learning_rate": 7.737682165163083e-06,
"loss": 0.1682,
"step": 225
},
{
"epoch": 0.05206164098292378,
"grad_norm": 1.130511999130249,
"learning_rate": 8.605135322692574e-06,
"loss": 0.2311,
"step": 250
},
{
"epoch": 0.05726780508121616,
"grad_norm": 14.400012969970703,
"learning_rate": 9.472588480222069e-06,
"loss": 0.1815,
"step": 275
},
{
"epoch": 0.06247396917950854,
"grad_norm": 0.6946442723274231,
"learning_rate": 1.0340041637751562e-05,
"loss": 0.1107,
"step": 300
},
{
"epoch": 0.06768013327780091,
"grad_norm": 9.926340103149414,
"learning_rate": 1.1207494795281055e-05,
"loss": 0.1139,
"step": 325
},
{
"epoch": 0.0728862973760933,
"grad_norm": 1.6435810327529907,
"learning_rate": 1.207494795281055e-05,
"loss": 0.1471,
"step": 350
},
{
"epoch": 0.07809246147438567,
"grad_norm": 1.6755973100662231,
"learning_rate": 1.294240111034004e-05,
"loss": 0.2039,
"step": 375
},
{
"epoch": 0.08329862557267805,
"grad_norm": 4.045156002044678,
"learning_rate": 1.3809854267869535e-05,
"loss": 0.2085,
"step": 400
},
{
"epoch": 0.08850478967097043,
"grad_norm": 0.2624989449977875,
"learning_rate": 1.4677307425399028e-05,
"loss": 0.182,
"step": 425
},
{
"epoch": 0.0937109537692628,
"grad_norm": 9.571054458618164,
"learning_rate": 1.5544760582928523e-05,
"loss": 0.1874,
"step": 450
},
{
"epoch": 0.09891711786755518,
"grad_norm": 0.22022797167301178,
"learning_rate": 1.6412213740458016e-05,
"loss": 0.1219,
"step": 475
},
{
"epoch": 0.10412328196584757,
"grad_norm": 9.949447631835938,
"learning_rate": 1.727966689798751e-05,
"loss": 0.2049,
"step": 500
},
{
"epoch": 0.10932944606413994,
"grad_norm": 5.697494983673096,
"learning_rate": 1.8147120055517e-05,
"loss": 0.1151,
"step": 525
},
{
"epoch": 0.11453561016243231,
"grad_norm": 11.763550758361816,
"learning_rate": 1.9014573213046498e-05,
"loss": 0.1546,
"step": 550
},
{
"epoch": 0.1197417742607247,
"grad_norm": 0.10468779504299164,
"learning_rate": 1.988202637057599e-05,
"loss": 0.2386,
"step": 575
},
{
"epoch": 0.12494793835901707,
"grad_norm": 1.6756809949874878,
"learning_rate": 2.0749479528105484e-05,
"loss": 0.2487,
"step": 600
},
{
"epoch": 0.13015410245730946,
"grad_norm": 0.19562338292598724,
"learning_rate": 2.1616932685634977e-05,
"loss": 0.2314,
"step": 625
},
{
"epoch": 0.13536026655560182,
"grad_norm": 8.64395809173584,
"learning_rate": 2.248438584316447e-05,
"loss": 0.113,
"step": 650
},
{
"epoch": 0.1405664306538942,
"grad_norm": 0.2188766896724701,
"learning_rate": 2.3351839000693966e-05,
"loss": 0.1685,
"step": 675
},
{
"epoch": 0.1457725947521866,
"grad_norm": 0.1437786966562271,
"learning_rate": 2.421929215822346e-05,
"loss": 0.2467,
"step": 700
},
{
"epoch": 0.15097875885047896,
"grad_norm": 8.023161888122559,
"learning_rate": 2.5086745315752952e-05,
"loss": 0.2007,
"step": 725
},
{
"epoch": 0.15618492294877134,
"grad_norm": 1.2265409231185913,
"learning_rate": 2.5954198473282442e-05,
"loss": 0.1716,
"step": 750
},
{
"epoch": 0.16139108704706373,
"grad_norm": 5.027709484100342,
"learning_rate": 2.6821651630811938e-05,
"loss": 0.2271,
"step": 775
},
{
"epoch": 0.1665972511453561,
"grad_norm": 0.11683953553438187,
"learning_rate": 2.768910478834143e-05,
"loss": 0.2198,
"step": 800
},
{
"epoch": 0.17180341524364848,
"grad_norm": 0.0886659324169159,
"learning_rate": 2.8556557945870927e-05,
"loss": 0.2351,
"step": 825
},
{
"epoch": 0.17700957934194086,
"grad_norm": 10.608562469482422,
"learning_rate": 2.9424011103400417e-05,
"loss": 0.295,
"step": 850
},
{
"epoch": 0.18221574344023322,
"grad_norm": 9.556928634643555,
"learning_rate": 3.0291464260929913e-05,
"loss": 0.1789,
"step": 875
},
{
"epoch": 0.1874219075385256,
"grad_norm": 9.168340682983398,
"learning_rate": 3.1158917418459406e-05,
"loss": 0.2548,
"step": 900
},
{
"epoch": 0.192628071636818,
"grad_norm": 6.174153804779053,
"learning_rate": 3.2026370575988896e-05,
"loss": 0.257,
"step": 925
},
{
"epoch": 0.19783423573511036,
"grad_norm": 13.663798332214355,
"learning_rate": 3.289382373351839e-05,
"loss": 0.1947,
"step": 950
},
{
"epoch": 0.20304039983340275,
"grad_norm": 11.622993469238281,
"learning_rate": 3.376127689104788e-05,
"loss": 0.2765,
"step": 975
},
{
"epoch": 0.20824656393169513,
"grad_norm": 9.067373275756836,
"learning_rate": 3.462873004857738e-05,
"loss": 0.2063,
"step": 1000
},
{
"epoch": 0.2134527280299875,
"grad_norm": 0.04181263968348503,
"learning_rate": 3.549618320610687e-05,
"loss": 0.3131,
"step": 1025
},
{
"epoch": 0.21865889212827988,
"grad_norm": 1.1748377084732056,
"learning_rate": 3.6363636363636364e-05,
"loss": 0.2632,
"step": 1050
},
{
"epoch": 0.22386505622657227,
"grad_norm": 18.87828826904297,
"learning_rate": 3.723108952116586e-05,
"loss": 0.1721,
"step": 1075
},
{
"epoch": 0.22907122032486463,
"grad_norm": 0.07279066741466522,
"learning_rate": 3.809854267869536e-05,
"loss": 0.155,
"step": 1100
},
{
"epoch": 0.23427738442315701,
"grad_norm": 0.6264330148696899,
"learning_rate": 3.8965995836224846e-05,
"loss": 0.2785,
"step": 1125
},
{
"epoch": 0.2394835485214494,
"grad_norm": 0.07900076359510422,
"learning_rate": 3.983344899375434e-05,
"loss": 0.1977,
"step": 1150
},
{
"epoch": 0.24468971261974176,
"grad_norm": 0.14294634759426117,
"learning_rate": 4.070090215128383e-05,
"loss": 0.2297,
"step": 1175
},
{
"epoch": 0.24989587671803415,
"grad_norm": 12.401119232177734,
"learning_rate": 4.153365718251215e-05,
"loss": 0.3428,
"step": 1200
},
{
"epoch": 0.25510204081632654,
"grad_norm": 11.514626502990723,
"learning_rate": 4.240111034004164e-05,
"loss": 0.2381,
"step": 1225
},
{
"epoch": 0.2603082049146189,
"grad_norm": 0.33966752886772156,
"learning_rate": 4.3268563497571134e-05,
"loss": 0.2446,
"step": 1250
},
{
"epoch": 0.2655143690129113,
"grad_norm": 13.058968544006348,
"learning_rate": 4.4136016655100624e-05,
"loss": 0.2878,
"step": 1275
},
{
"epoch": 0.27072053311120364,
"grad_norm": 11.9553861618042,
"learning_rate": 4.500346981263012e-05,
"loss": 0.2735,
"step": 1300
},
{
"epoch": 0.27592669720949603,
"grad_norm": 0.3588428497314453,
"learning_rate": 4.5870922970159617e-05,
"loss": 0.2163,
"step": 1325
},
{
"epoch": 0.2811328613077884,
"grad_norm": 23.0421142578125,
"learning_rate": 4.673837612768911e-05,
"loss": 0.2516,
"step": 1350
},
{
"epoch": 0.2863390254060808,
"grad_norm": 0.3783215880393982,
"learning_rate": 4.757113115891742e-05,
"loss": 0.3394,
"step": 1375
},
{
"epoch": 0.2915451895043732,
"grad_norm": 0.13371586799621582,
"learning_rate": 4.843858431644692e-05,
"loss": 0.2155,
"step": 1400
},
{
"epoch": 0.2967513536026656,
"grad_norm": 15.4042329788208,
"learning_rate": 4.930603747397641e-05,
"loss": 0.3902,
"step": 1425
},
{
"epoch": 0.3019575177009579,
"grad_norm": 13.482477188110352,
"learning_rate": 4.9999981651326544e-05,
"loss": 0.3137,
"step": 1450
},
{
"epoch": 0.3071636817992503,
"grad_norm": 2.373297691345215,
"learning_rate": 4.9999339450583704e-05,
"loss": 0.3525,
"step": 1475
},
{
"epoch": 0.3123698458975427,
"grad_norm": 0.45967090129852295,
"learning_rate": 4.999777984310187e-05,
"loss": 0.3325,
"step": 1500
},
{
"epoch": 0.3175760099958351,
"grad_norm": 0.21776865422725677,
"learning_rate": 4.9995302886114334e-05,
"loss": 0.3339,
"step": 1525
},
{
"epoch": 0.32278217409412746,
"grad_norm": 4.673847198486328,
"learning_rate": 4.9991908670518586e-05,
"loss": 0.2315,
"step": 1550
},
{
"epoch": 0.32798833819241985,
"grad_norm": 2.8383209705352783,
"learning_rate": 4.998759732087296e-05,
"loss": 0.3757,
"step": 1575
},
{
"epoch": 0.3331945022907122,
"grad_norm": 6.966940402984619,
"learning_rate": 4.998236899539209e-05,
"loss": 0.2376,
"step": 1600
},
{
"epoch": 0.33840066638900457,
"grad_norm": 14.109524726867676,
"learning_rate": 4.997622388594109e-05,
"loss": 0.3046,
"step": 1625
},
{
"epoch": 0.34360683048729695,
"grad_norm": 0.9895745515823364,
"learning_rate": 4.9969162218028495e-05,
"loss": 0.2425,
"step": 1650
},
{
"epoch": 0.34881299458558934,
"grad_norm": 1.0948731899261475,
"learning_rate": 4.996118425079804e-05,
"loss": 0.3732,
"step": 1675
},
{
"epoch": 0.35401915868388173,
"grad_norm": 10.473814010620117,
"learning_rate": 4.9952290277019095e-05,
"loss": 0.3086,
"step": 1700
},
{
"epoch": 0.3592253227821741,
"grad_norm": 1.947543740272522,
"learning_rate": 4.994248062307593e-05,
"loss": 0.3405,
"step": 1725
},
{
"epoch": 0.36443148688046645,
"grad_norm": 7.7466254234313965,
"learning_rate": 4.993175564895577e-05,
"loss": 0.1969,
"step": 1750
},
{
"epoch": 0.36963765097875884,
"grad_norm": 11.565778732299805,
"learning_rate": 4.992011574823555e-05,
"loss": 0.3424,
"step": 1775
},
{
"epoch": 0.3748438150770512,
"grad_norm": 2.8535590171813965,
"learning_rate": 4.99075613480675e-05,
"loss": 0.2885,
"step": 1800
},
{
"epoch": 0.3800499791753436,
"grad_norm": 36.82617950439453,
"learning_rate": 4.9894092909163436e-05,
"loss": 0.3159,
"step": 1825
},
{
"epoch": 0.385256143273636,
"grad_norm": 0.5960198044776917,
"learning_rate": 4.98797109257779e-05,
"loss": 0.2981,
"step": 1850
},
{
"epoch": 0.3904623073719284,
"grad_norm": 23.564931869506836,
"learning_rate": 4.986441592568994e-05,
"loss": 0.2802,
"step": 1875
},
{
"epoch": 0.3956684714702207,
"grad_norm": 4.4088134765625,
"learning_rate": 4.984820847018388e-05,
"loss": 0.378,
"step": 1900
},
{
"epoch": 0.4008746355685131,
"grad_norm": 13.824947357177734,
"learning_rate": 4.983108915402855e-05,
"loss": 0.2409,
"step": 1925
},
{
"epoch": 0.4060807996668055,
"grad_norm": 14.21284294128418,
"learning_rate": 4.981305860545561e-05,
"loss": 0.3306,
"step": 1950
},
{
"epoch": 0.4112869637650979,
"grad_norm": 0.1888038069009781,
"learning_rate": 4.9794117486136404e-05,
"loss": 0.3458,
"step": 1975
},
{
"epoch": 0.41649312786339027,
"grad_norm": 0.19039294123649597,
"learning_rate": 4.977426649115773e-05,
"loss": 0.1629,
"step": 2000
},
{
"epoch": 0.42169929196168265,
"grad_norm": 0.42835375666618347,
"learning_rate": 4.9753506348996284e-05,
"loss": 0.2258,
"step": 2025
},
{
"epoch": 0.426905456059975,
"grad_norm": 4.263399124145508,
"learning_rate": 4.973183782149198e-05,
"loss": 0.3,
"step": 2050
},
{
"epoch": 0.4321116201582674,
"grad_norm": 0.17805464565753937,
"learning_rate": 4.970926170381995e-05,
"loss": 0.2086,
"step": 2075
},
{
"epoch": 0.43731778425655976,
"grad_norm": 0.9103575348854065,
"learning_rate": 4.968577882446139e-05,
"loss": 0.2435,
"step": 2100
},
{
"epoch": 0.44252394835485215,
"grad_norm": 10.369997024536133,
"learning_rate": 4.9661390045173126e-05,
"loss": 0.2264,
"step": 2125
},
{
"epoch": 0.44773011245314454,
"grad_norm": 2.6744861602783203,
"learning_rate": 4.963609626095603e-05,
"loss": 0.3012,
"step": 2150
},
{
"epoch": 0.4529362765514369,
"grad_norm": 12.971126556396484,
"learning_rate": 4.960989840002216e-05,
"loss": 0.2855,
"step": 2175
},
{
"epoch": 0.45814244064972925,
"grad_norm": 5.078539848327637,
"learning_rate": 4.958279742376068e-05,
"loss": 0.2832,
"step": 2200
},
{
"epoch": 0.46334860474802164,
"grad_norm": 11.258842468261719,
"learning_rate": 4.955479432670259e-05,
"loss": 0.3402,
"step": 2225
},
{
"epoch": 0.46855476884631403,
"grad_norm": 9.847345352172852,
"learning_rate": 4.952589013648425e-05,
"loss": 0.1914,
"step": 2250
},
{
"epoch": 0.4737609329446064,
"grad_norm": 0.0392930842936039,
"learning_rate": 4.949608591380963e-05,
"loss": 0.1341,
"step": 2275
},
{
"epoch": 0.4789670970428988,
"grad_norm": 7.803274154663086,
"learning_rate": 4.946538275241144e-05,
"loss": 0.3284,
"step": 2300
},
{
"epoch": 0.4841732611411912,
"grad_norm": 0.38506531715393066,
"learning_rate": 4.943378177901091e-05,
"loss": 0.3496,
"step": 2325
},
{
"epoch": 0.4893794252394835,
"grad_norm": 8.00683879852295,
"learning_rate": 4.940128415327654e-05,
"loss": 0.33,
"step": 2350
},
{
"epoch": 0.4945855893377759,
"grad_norm": 5.389349460601807,
"learning_rate": 4.9367891067781454e-05,
"loss": 0.2392,
"step": 2375
},
{
"epoch": 0.4997917534360683,
"grad_norm": 0.18322338163852692,
"learning_rate": 4.9333603747959725e-05,
"loss": 0.3306,
"step": 2400
},
{
"epoch": 0.5049979175343606,
"grad_norm": 47.38001251220703,
"learning_rate": 4.9298423452061306e-05,
"loss": 0.3651,
"step": 2425
},
{
"epoch": 0.5102040816326531,
"grad_norm": 1.591893196105957,
"learning_rate": 4.926235147110593e-05,
"loss": 0.1469,
"step": 2450
},
{
"epoch": 0.5154102457309454,
"grad_norm": 2.414854049682617,
"learning_rate": 4.922538912883573e-05,
"loss": 0.3108,
"step": 2475
},
{
"epoch": 0.5206164098292378,
"grad_norm": 0.7650768756866455,
"learning_rate": 4.91875377816666e-05,
"loss": 0.1384,
"step": 2500
},
{
"epoch": 0.5258225739275302,
"grad_norm": 12.073479652404785,
"learning_rate": 4.9148798818638486e-05,
"loss": 0.3325,
"step": 2525
},
{
"epoch": 0.5310287380258226,
"grad_norm": 1.0136282444000244,
"learning_rate": 4.910917366136438e-05,
"loss": 0.2324,
"step": 2550
},
{
"epoch": 0.536234902124115,
"grad_norm": 4.9315667152404785,
"learning_rate": 4.906866376397816e-05,
"loss": 0.3278,
"step": 2575
},
{
"epoch": 0.5414410662224073,
"grad_norm": 18.607717514038086,
"learning_rate": 4.902727061308121e-05,
"loss": 0.3609,
"step": 2600
},
{
"epoch": 0.5466472303206997,
"grad_norm": 28.146434783935547,
"learning_rate": 4.89849957276879e-05,
"loss": 0.229,
"step": 2625
},
{
"epoch": 0.5518533944189921,
"grad_norm": 6.487234592437744,
"learning_rate": 4.894184065916981e-05,
"loss": 0.1822,
"step": 2650
},
{
"epoch": 0.5570595585172845,
"grad_norm": 12.08668327331543,
"learning_rate": 4.8897806991198796e-05,
"loss": 0.1988,
"step": 2675
},
{
"epoch": 0.5622657226155768,
"grad_norm": 6.793110370635986,
"learning_rate": 4.885289633968891e-05,
"loss": 0.2108,
"step": 2700
},
{
"epoch": 0.5674718867138692,
"grad_norm": 13.04969596862793,
"learning_rate": 4.880711035273709e-05,
"loss": 0.2559,
"step": 2725
},
{
"epoch": 0.5726780508121616,
"grad_norm": 0.07108098268508911,
"learning_rate": 4.876045071056262e-05,
"loss": 0.2236,
"step": 2750
},
{
"epoch": 0.5778842149104539,
"grad_norm": 4.550630569458008,
"learning_rate": 4.871291912544558e-05,
"loss": 0.1857,
"step": 2775
},
{
"epoch": 0.5830903790087464,
"grad_norm": 3.2425384521484375,
"learning_rate": 4.8664517341663886e-05,
"loss": 0.1981,
"step": 2800
},
{
"epoch": 0.5882965431070387,
"grad_norm": 1.5111477375030518,
"learning_rate": 4.861524713542939e-05,
"loss": 0.2145,
"step": 2825
},
{
"epoch": 0.5935027072053312,
"grad_norm": 0.029720915481448174,
"learning_rate": 4.856511031482264e-05,
"loss": 0.1805,
"step": 2850
},
{
"epoch": 0.5987088713036235,
"grad_norm": 14.783000946044922,
"learning_rate": 4.8514108719726516e-05,
"loss": 0.2574,
"step": 2875
},
{
"epoch": 0.6039150354019158,
"grad_norm": 0.23782485723495483,
"learning_rate": 4.846224422175877e-05,
"loss": 0.272,
"step": 2900
},
{
"epoch": 0.6091211995002083,
"grad_norm": 0.6656458377838135,
"learning_rate": 4.840951872420327e-05,
"loss": 0.1804,
"step": 2925
},
{
"epoch": 0.6143273635985006,
"grad_norm": 7.324411869049072,
"learning_rate": 4.835593416194023e-05,
"loss": 0.2526,
"step": 2950
},
{
"epoch": 0.619533527696793,
"grad_norm": 17.861886978149414,
"learning_rate": 4.8301492501375135e-05,
"loss": 0.2352,
"step": 2975
},
{
"epoch": 0.6247396917950854,
"grad_norm": 16.718368530273438,
"learning_rate": 4.824619574036663e-05,
"loss": 0.394,
"step": 3000
},
{
"epoch": 0.6299458558933777,
"grad_norm": 0.1666000634431839,
"learning_rate": 4.819004590815317e-05,
"loss": 0.2006,
"step": 3025
},
{
"epoch": 0.6351520199916701,
"grad_norm": 4.644481182098389,
"learning_rate": 4.8133045065278584e-05,
"loss": 0.2226,
"step": 3050
},
{
"epoch": 0.6403581840899625,
"grad_norm": 23.20795249938965,
"learning_rate": 4.807519530351644e-05,
"loss": 0.2966,
"step": 3075
},
{
"epoch": 0.6455643481882549,
"grad_norm": 0.05183548480272293,
"learning_rate": 4.801649874579327e-05,
"loss": 0.1597,
"step": 3100
},
{
"epoch": 0.6507705122865473,
"grad_norm": 8.726778030395508,
"learning_rate": 4.79569575461107e-05,
"loss": 0.3323,
"step": 3125
},
{
"epoch": 0.6559766763848397,
"grad_norm": 0.2523828446865082,
"learning_rate": 4.789657388946637e-05,
"loss": 0.1146,
"step": 3150
},
{
"epoch": 0.661182840483132,
"grad_norm": 0.07221566885709763,
"learning_rate": 4.7835349991773775e-05,
"loss": 0.3426,
"step": 3175
},
{
"epoch": 0.6663890045814244,
"grad_norm": 0.2359621673822403,
"learning_rate": 4.7773288099780935e-05,
"loss": 0.2444,
"step": 3200
},
{
"epoch": 0.6715951686797168,
"grad_norm": 18.833477020263672,
"learning_rate": 4.7710390490987936e-05,
"loss": 0.3419,
"step": 3225
},
{
"epoch": 0.6768013327780091,
"grad_norm": 17.171281814575195,
"learning_rate": 4.764665947356337e-05,
"loss": 0.2313,
"step": 3250
},
{
"epoch": 0.6820074968763016,
"grad_norm": 0.4939661920070648,
"learning_rate": 4.758209738625963e-05,
"loss": 0.2724,
"step": 3275
},
{
"epoch": 0.6872136609745939,
"grad_norm": 0.8321401476860046,
"learning_rate": 4.751670659832708e-05,
"loss": 0.1976,
"step": 3300
},
{
"epoch": 0.6924198250728864,
"grad_norm": 1.6382664442062378,
"learning_rate": 4.74504895094271e-05,
"loss": 0.2768,
"step": 3325
},
{
"epoch": 0.6976259891711787,
"grad_norm": 0.03149043396115303,
"learning_rate": 4.738344854954404e-05,
"loss": 0.2127,
"step": 3350
},
{
"epoch": 0.702832153269471,
"grad_norm": 0.08274863660335541,
"learning_rate": 4.7315586178896035e-05,
"loss": 0.2105,
"step": 3375
},
{
"epoch": 0.7080383173677635,
"grad_norm": 2.460942268371582,
"learning_rate": 4.724690488784474e-05,
"loss": 0.3465,
"step": 3400
},
{
"epoch": 0.7132444814660558,
"grad_norm": 13.02415943145752,
"learning_rate": 4.717740719680391e-05,
"loss": 0.2309,
"step": 3425
},
{
"epoch": 0.7184506455643482,
"grad_norm": 15.129639625549316,
"learning_rate": 4.710709565614694e-05,
"loss": 0.1043,
"step": 3450
},
{
"epoch": 0.7236568096626406,
"grad_norm": 0.19271063804626465,
"learning_rate": 4.703597284611325e-05,
"loss": 0.312,
"step": 3475
},
{
"epoch": 0.7288629737609329,
"grad_norm": 16.245803833007812,
"learning_rate": 4.696404137671362e-05,
"loss": 0.3068,
"step": 3500
},
{
"epoch": 0.7340691378592253,
"grad_norm": 0.28644460439682007,
"learning_rate": 4.689130388763437e-05,
"loss": 0.3067,
"step": 3525
},
{
"epoch": 0.7392753019575177,
"grad_norm": 19.579248428344727,
"learning_rate": 4.681776304814056e-05,
"loss": 0.2592,
"step": 3550
},
{
"epoch": 0.7444814660558101,
"grad_norm": 16.12538719177246,
"learning_rate": 4.6743421556977934e-05,
"loss": 0.1631,
"step": 3575
},
{
"epoch": 0.7496876301541024,
"grad_norm": 0.7716278433799744,
"learning_rate": 4.6668282142274e-05,
"loss": 0.2962,
"step": 3600
},
{
"epoch": 0.7548937942523949,
"grad_norm": 6.197509288787842,
"learning_rate": 4.659234756143782e-05,
"loss": 0.2491,
"step": 3625
},
{
"epoch": 0.7600999583506872,
"grad_norm": 0.5237298011779785,
"learning_rate": 4.651562060105886e-05,
"loss": 0.2988,
"step": 3650
},
{
"epoch": 0.7653061224489796,
"grad_norm": 6.333150386810303,
"learning_rate": 4.643810407680475e-05,
"loss": 0.2558,
"step": 3675
},
{
"epoch": 0.770512286547272,
"grad_norm": 0.02860177680850029,
"learning_rate": 4.6359800833317915e-05,
"loss": 0.1318,
"step": 3700
},
{
"epoch": 0.7757184506455643,
"grad_norm": 0.030804630368947983,
"learning_rate": 4.6280713744111215e-05,
"loss": 0.2002,
"step": 3725
},
{
"epoch": 0.7809246147438568,
"grad_norm": 10.464898109436035,
"learning_rate": 4.620084571146247e-05,
"loss": 0.3029,
"step": 3750
},
{
"epoch": 0.7861307788421491,
"grad_norm": 1.6494271755218506,
"learning_rate": 4.6120199666307975e-05,
"loss": 0.377,
"step": 3775
},
{
"epoch": 0.7913369429404414,
"grad_norm": 10.32836627960205,
"learning_rate": 4.603877856813494e-05,
"loss": 0.1742,
"step": 3800
},
{
"epoch": 0.7965431070387339,
"grad_norm": 5.073435306549072,
"learning_rate": 4.5956585404872884e-05,
"loss": 0.1921,
"step": 3825
},
{
"epoch": 0.8017492711370262,
"grad_norm": 11.85318374633789,
"learning_rate": 4.587362319278397e-05,
"loss": 0.2957,
"step": 3850
},
{
"epoch": 0.8069554352353187,
"grad_norm": 3.998944044113159,
"learning_rate": 4.5789894976352344e-05,
"loss": 0.2102,
"step": 3875
},
{
"epoch": 0.812161599333611,
"grad_norm": 0.569960355758667,
"learning_rate": 4.570540382817239e-05,
"loss": 0.245,
"step": 3900
},
{
"epoch": 0.8173677634319034,
"grad_norm": 5.945977687835693,
"learning_rate": 4.562015284883597e-05,
"loss": 0.2113,
"step": 3925
},
{
"epoch": 0.8225739275301958,
"grad_norm": 10.898477554321289,
"learning_rate": 4.553414516681868e-05,
"loss": 0.2446,
"step": 3950
},
{
"epoch": 0.8277800916284881,
"grad_norm": 0.25490328669548035,
"learning_rate": 4.5447383938364974e-05,
"loss": 0.301,
"step": 3975
},
{
"epoch": 0.8329862557267805,
"grad_norm": 4.246921539306641,
"learning_rate": 4.535987234737242e-05,
"loss": 0.2944,
"step": 4000
},
{
"epoch": 0.8381924198250729,
"grad_norm": 6.019783973693848,
"learning_rate": 4.527161360527481e-05,
"loss": 0.3184,
"step": 4025
},
{
"epoch": 0.8433985839233653,
"grad_norm": 0.0902986004948616,
"learning_rate": 4.51826109509243e-05,
"loss": 0.1508,
"step": 4050
},
{
"epoch": 0.8486047480216576,
"grad_norm": 7.770606517791748,
"learning_rate": 4.509286765047258e-05,
"loss": 0.4018,
"step": 4075
},
{
"epoch": 0.85381091211995,
"grad_norm": 0.20602120459079742,
"learning_rate": 4.5002386997251025e-05,
"loss": 0.3172,
"step": 4100
},
{
"epoch": 0.8590170762182424,
"grad_norm": 9.447431564331055,
"learning_rate": 4.491117231164978e-05,
"loss": 0.2055,
"step": 4125
},
{
"epoch": 0.8642232403165347,
"grad_norm": 4.260950088500977,
"learning_rate": 4.481922694099602e-05,
"loss": 0.1652,
"step": 4150
},
{
"epoch": 0.8694294044148272,
"grad_norm": 8.093120574951172,
"learning_rate": 4.4726554259430966e-05,
"loss": 0.1701,
"step": 4175
},
{
"epoch": 0.8746355685131195,
"grad_norm": 9.310951232910156,
"learning_rate": 4.4633157667786194e-05,
"loss": 0.3233,
"step": 4200
},
{
"epoch": 0.879841732611412,
"grad_norm": 13.830717086791992,
"learning_rate": 4.453904059345877e-05,
"loss": 0.2724,
"step": 4225
},
{
"epoch": 0.8850478967097043,
"grad_norm": 7.378146648406982,
"learning_rate": 4.4444206490285465e-05,
"loss": 0.3583,
"step": 4250
},
{
"epoch": 0.8902540608079966,
"grad_norm": 0.06943502277135849,
"learning_rate": 4.4352494400727496e-05,
"loss": 0.3111,
"step": 4275
},
{
"epoch": 0.8954602249062891,
"grad_norm": 5.857403755187988,
"learning_rate": 4.425626504054255e-05,
"loss": 0.3536,
"step": 4300
},
{
"epoch": 0.9006663890045814,
"grad_norm": 12.8510160446167,
"learning_rate": 4.4159329028593835e-05,
"loss": 0.3129,
"step": 4325
},
{
"epoch": 0.9058725531028738,
"grad_norm": 0.7335798740386963,
"learning_rate": 4.406168992216536e-05,
"loss": 0.2536,
"step": 4350
},
{
"epoch": 0.9110787172011662,
"grad_norm": 0.06461632251739502,
"learning_rate": 4.396335130434277e-05,
"loss": 0.1635,
"step": 4375
},
{
"epoch": 0.9162848812994585,
"grad_norm": 0.6194918751716614,
"learning_rate": 4.386431678388183e-05,
"loss": 0.2273,
"step": 4400
},
{
"epoch": 0.921491045397751,
"grad_norm": 0.06244231015443802,
"learning_rate": 4.3764589995076045e-05,
"loss": 0.2393,
"step": 4425
},
{
"epoch": 0.9266972094960433,
"grad_norm": 4.408567428588867,
"learning_rate": 4.3664174597623225e-05,
"loss": 0.1189,
"step": 4450
},
{
"epoch": 0.9319033735943357,
"grad_norm": 0.7255458235740662,
"learning_rate": 4.3563074276491224e-05,
"loss": 0.2784,
"step": 4475
},
{
"epoch": 0.9371095376926281,
"grad_norm": 0.09919915348291397,
"learning_rate": 4.346129274178271e-05,
"loss": 0.1761,
"step": 4500
},
{
"epoch": 0.9423157017909205,
"grad_norm": 13.636004447937012,
"learning_rate": 4.335883372859901e-05,
"loss": 0.3252,
"step": 4525
},
{
"epoch": 0.9475218658892128,
"grad_norm": 0.1237548440694809,
"learning_rate": 4.3255700996903027e-05,
"loss": 0.1931,
"step": 4550
},
{
"epoch": 0.9527280299875052,
"grad_norm": 0.06882551312446594,
"learning_rate": 4.315189833138129e-05,
"loss": 0.256,
"step": 4575
},
{
"epoch": 0.9579341940857976,
"grad_norm": 0.21859368681907654,
"learning_rate": 4.304742954130504e-05,
"loss": 0.2087,
"step": 4600
},
{
"epoch": 0.9631403581840899,
"grad_norm": 4.428884983062744,
"learning_rate": 4.294229846039045e-05,
"loss": 0.1962,
"step": 4625
},
{
"epoch": 0.9683465222823824,
"grad_norm": 6.920009613037109,
"learning_rate": 4.2836508946657944e-05,
"loss": 0.2301,
"step": 4650
},
{
"epoch": 0.9735526863806747,
"grad_norm": 3.4040772914886475,
"learning_rate": 4.273006488229061e-05,
"loss": 0.287,
"step": 4675
},
{
"epoch": 0.978758850478967,
"grad_norm": 7.0576043128967285,
"learning_rate": 4.2622970173491734e-05,
"loss": 0.1524,
"step": 4700
},
{
"epoch": 0.9839650145772595,
"grad_norm": 0.026531610637903214,
"learning_rate": 4.2515228750341466e-05,
"loss": 0.1662,
"step": 4725
},
{
"epoch": 0.9891711786755518,
"grad_norm": 1.0448293685913086,
"learning_rate": 4.240684456665257e-05,
"loss": 0.2114,
"step": 4750
},
{
"epoch": 0.9943773427738443,
"grad_norm": 44.79957962036133,
"learning_rate": 4.229782159982536e-05,
"loss": 0.2157,
"step": 4775
},
{
"epoch": 0.9995835068721366,
"grad_norm": 10.862527847290039,
"learning_rate": 4.2188163850701734e-05,
"loss": 0.2925,
"step": 4800
},
{
"epoch": 1.0,
"eval_accuracy": 0.892994923857868,
"eval_f1_macro": 0.8730666664599849,
"eval_f1_micro": 0.892994923857868,
"eval_f1_weighted": 0.8798157289522093,
"eval_loss": 0.34922197461128235,
"eval_precision_macro": 0.9003919914273503,
"eval_precision_micro": 0.892994923857868,
"eval_precision_weighted": 0.9045471108195786,
"eval_recall_macro": 0.8862461348175634,
"eval_recall_micro": 0.892994923857868,
"eval_recall_weighted": 0.892994923857868,
"eval_runtime": 16.3696,
"eval_samples_per_second": 902.587,
"eval_steps_per_second": 56.446,
"step": 4802
},
{
"epoch": 1.004789670970429,
"grad_norm": 10.292591094970703,
"learning_rate": 4.2077875343418325e-05,
"loss": 0.2158,
"step": 4825
},
{
"epoch": 1.0099958350687213,
"grad_norm": 5.3238325119018555,
"learning_rate": 4.1966960125258884e-05,
"loss": 0.2987,
"step": 4850
},
{
"epoch": 1.0152019991670138,
"grad_norm": 0.32381194829940796,
"learning_rate": 4.1855422266505675e-05,
"loss": 0.3283,
"step": 4875
},
{
"epoch": 1.0204081632653061,
"grad_norm": 7.971839904785156,
"learning_rate": 4.174326586029018e-05,
"loss": 0.19,
"step": 4900
},
{
"epoch": 1.0256143273635985,
"grad_norm": 0.252726286649704,
"learning_rate": 4.1630495022442884e-05,
"loss": 0.3692,
"step": 4925
},
{
"epoch": 1.0308204914618908,
"grad_norm": 3.9147284030914307,
"learning_rate": 4.1517113891342174e-05,
"loss": 0.2321,
"step": 4950
},
{
"epoch": 1.0360266555601834,
"grad_norm": 3.525928020477295,
"learning_rate": 4.1403126627762546e-05,
"loss": 0.1547,
"step": 4975
},
{
"epoch": 1.0412328196584757,
"grad_norm": 0.9435555934906006,
"learning_rate": 4.128853741472187e-05,
"loss": 0.1764,
"step": 5000
},
{
"epoch": 1.046438983756768,
"grad_norm": 9.357694625854492,
"learning_rate": 4.117335045732791e-05,
"loss": 0.2553,
"step": 5025
},
{
"epoch": 1.0516451478550604,
"grad_norm": 0.10351333767175674,
"learning_rate": 4.105756998262399e-05,
"loss": 0.128,
"step": 5050
},
{
"epoch": 1.0568513119533527,
"grad_norm": 3.0234265327453613,
"learning_rate": 4.094120023943387e-05,
"loss": 0.2764,
"step": 5075
},
{
"epoch": 1.0620574760516452,
"grad_norm": 2.055335283279419,
"learning_rate": 4.082424549820586e-05,
"loss": 0.1752,
"step": 5100
},
{
"epoch": 1.0672636401499376,
"grad_norm": 9.345911026000977,
"learning_rate": 4.070671005085607e-05,
"loss": 0.2165,
"step": 5125
},
{
"epoch": 1.07246980424823,
"grad_norm": 0.36394619941711426,
"learning_rate": 4.058859821061092e-05,
"loss": 0.1157,
"step": 5150
},
{
"epoch": 1.0776759683465222,
"grad_norm": 8.331086158752441,
"learning_rate": 4.046991431184887e-05,
"loss": 0.1545,
"step": 5175
},
{
"epoch": 1.0828821324448146,
"grad_norm": 9.395284652709961,
"learning_rate": 4.0350662709941325e-05,
"loss": 0.2645,
"step": 5200
},
{
"epoch": 1.0880882965431071,
"grad_norm": 6.326489448547363,
"learning_rate": 4.023084778109283e-05,
"loss": 0.2223,
"step": 5225
},
{
"epoch": 1.0932944606413995,
"grad_norm": 11.211806297302246,
"learning_rate": 4.0110473922180526e-05,
"loss": 0.2022,
"step": 5250
},
{
"epoch": 1.0985006247396918,
"grad_norm": 0.9454079270362854,
"learning_rate": 3.998954555059266e-05,
"loss": 0.2831,
"step": 5275
},
{
"epoch": 1.1037067888379841,
"grad_norm": 2.5448479652404785,
"learning_rate": 3.9868067104066646e-05,
"loss": 0.1692,
"step": 5300
},
{
"epoch": 1.1089129529362765,
"grad_norm": 2.1148276329040527,
"learning_rate": 3.9746043040526074e-05,
"loss": 0.3686,
"step": 5325
},
{
"epoch": 1.114119117034569,
"grad_norm": 18.591083526611328,
"learning_rate": 3.9623477837917167e-05,
"loss": 0.2465,
"step": 5350
},
{
"epoch": 1.1193252811328613,
"grad_norm": 1.9231871366500854,
"learning_rate": 3.950037599404451e-05,
"loss": 0.2383,
"step": 5375
},
{
"epoch": 1.1245314452311537,
"grad_norm": 13.078465461730957,
"learning_rate": 3.93767420264059e-05,
"loss": 0.2129,
"step": 5400
},
{
"epoch": 1.129737609329446,
"grad_norm": 0.015137123875319958,
"learning_rate": 3.9252580472026616e-05,
"loss": 0.123,
"step": 5425
},
{
"epoch": 1.1349437734277386,
"grad_norm": 1.9116227626800537,
"learning_rate": 3.912789588729292e-05,
"loss": 0.2135,
"step": 5450
},
{
"epoch": 1.1401499375260309,
"grad_norm": 0.04986105486750603,
"learning_rate": 3.9002692847784824e-05,
"loss": 0.3574,
"step": 5475
},
{
"epoch": 1.1453561016243232,
"grad_norm": 8.818171501159668,
"learning_rate": 3.8876975948108236e-05,
"loss": 0.1246,
"step": 5500
},
{
"epoch": 1.1505622657226156,
"grad_norm": 0.3172764182090759,
"learning_rate": 3.8750749801726275e-05,
"loss": 0.182,
"step": 5525
},
{
"epoch": 1.1557684298209079,
"grad_norm": 9.364998817443848,
"learning_rate": 3.862401904079004e-05,
"loss": 0.2951,
"step": 5550
},
{
"epoch": 1.1609745939192004,
"grad_norm": 7.908264636993408,
"learning_rate": 3.849678831596855e-05,
"loss": 0.1521,
"step": 5575
},
{
"epoch": 1.1661807580174928,
"grad_norm": 0.3125484585762024,
"learning_rate": 3.8369062296278174e-05,
"loss": 0.2956,
"step": 5600
},
{
"epoch": 1.171386922115785,
"grad_norm": 0.6337696313858032,
"learning_rate": 3.824084566891118e-05,
"loss": 0.2355,
"step": 5625
},
{
"epoch": 1.1765930862140774,
"grad_norm": 0.07673631608486176,
"learning_rate": 3.8112143139063824e-05,
"loss": 0.2098,
"step": 5650
},
{
"epoch": 1.1817992503123698,
"grad_norm": 0.02253473550081253,
"learning_rate": 3.798295942976362e-05,
"loss": 0.1149,
"step": 5675
},
{
"epoch": 1.1870054144106623,
"grad_norm": 6.865312099456787,
"learning_rate": 3.7853299281696056e-05,
"loss": 0.3913,
"step": 5700
},
{
"epoch": 1.1922115785089547,
"grad_norm": 6.194192409515381,
"learning_rate": 3.7723167453030586e-05,
"loss": 0.1924,
"step": 5725
},
{
"epoch": 1.197417742607247,
"grad_norm": 0.12381599843502045,
"learning_rate": 3.759256871924604e-05,
"loss": 0.3331,
"step": 5750
},
{
"epoch": 1.2026239067055393,
"grad_norm": 0.02437855489552021,
"learning_rate": 3.74615078729554e-05,
"loss": 0.1819,
"step": 5775
},
{
"epoch": 1.2078300708038316,
"grad_norm": 14.187339782714844,
"learning_rate": 3.732998972372991e-05,
"loss": 0.2303,
"step": 5800
},
{
"epoch": 1.2130362349021242,
"grad_norm": 16.453699111938477,
"learning_rate": 3.719801909792251e-05,
"loss": 0.3025,
"step": 5825
},
{
"epoch": 1.2182423990004165,
"grad_norm": 1.748321771621704,
"learning_rate": 3.706560083849083e-05,
"loss": 0.2085,
"step": 5850
},
{
"epoch": 1.2234485630987089,
"grad_norm": 0.6774524450302124,
"learning_rate": 3.693273980481942e-05,
"loss": 0.2587,
"step": 5875
},
{
"epoch": 1.2286547271970012,
"grad_norm": 12.350029945373535,
"learning_rate": 3.679944087254141e-05,
"loss": 0.1885,
"step": 5900
},
{
"epoch": 1.2338608912952935,
"grad_norm": 6.396539688110352,
"learning_rate": 3.6665708933359576e-05,
"loss": 0.2404,
"step": 5925
},
{
"epoch": 1.239067055393586,
"grad_norm": 5.529823303222656,
"learning_rate": 3.6531548894866896e-05,
"loss": 0.2086,
"step": 5950
},
{
"epoch": 1.2442732194918784,
"grad_norm": 4.696390628814697,
"learning_rate": 3.639696568036639e-05,
"loss": 0.1309,
"step": 5975
},
{
"epoch": 1.2494793835901707,
"grad_norm": 0.10419386625289917,
"learning_rate": 3.6261964228690454e-05,
"loss": 0.1567,
"step": 6000
},
{
"epoch": 1.254685547688463,
"grad_norm": 0.19173868000507355,
"learning_rate": 3.612654949401967e-05,
"loss": 0.1632,
"step": 6025
},
{
"epoch": 1.2598917117867554,
"grad_norm": 10.412970542907715,
"learning_rate": 3.599072644570092e-05,
"loss": 0.1771,
"step": 6050
},
{
"epoch": 1.265097875885048,
"grad_norm": 13.899092674255371,
"learning_rate": 3.585450006806512e-05,
"loss": 0.0964,
"step": 6075
},
{
"epoch": 1.2703040399833403,
"grad_norm": 11.265216827392578,
"learning_rate": 3.5717875360244214e-05,
"loss": 0.3711,
"step": 6100
},
{
"epoch": 1.2755102040816326,
"grad_norm": 5.9480977058410645,
"learning_rate": 3.5580857335987774e-05,
"loss": 0.2482,
"step": 6125
},
{
"epoch": 1.280716368179925,
"grad_norm": 7.014443874359131,
"learning_rate": 3.544345102347902e-05,
"loss": 0.258,
"step": 6150
},
{
"epoch": 1.2859225322782173,
"grad_norm": 0.06656356900930405,
"learning_rate": 3.530566146515028e-05,
"loss": 0.1432,
"step": 6175
},
{
"epoch": 1.2911286963765098,
"grad_norm": 9.881967544555664,
"learning_rate": 3.5167493717497905e-05,
"loss": 0.127,
"step": 6200
},
{
"epoch": 1.2963348604748022,
"grad_norm": 7.715837478637695,
"learning_rate": 3.502895285089679e-05,
"loss": 0.229,
"step": 6225
},
{
"epoch": 1.3015410245730945,
"grad_norm": 0.07192976772785187,
"learning_rate": 3.489004394941425e-05,
"loss": 0.1111,
"step": 6250
},
{
"epoch": 1.3067471886713868,
"grad_norm": 1.0921132564544678,
"learning_rate": 3.475077211062346e-05,
"loss": 0.1642,
"step": 6275
},
{
"epoch": 1.3119533527696792,
"grad_norm": 0.2106587141752243,
"learning_rate": 3.461114244541641e-05,
"loss": 0.2393,
"step": 6300
},
{
"epoch": 1.3171595168679717,
"grad_norm": 5.716639518737793,
"learning_rate": 3.4471160077816314e-05,
"loss": 0.2525,
"step": 6325
},
{
"epoch": 1.322365680966264,
"grad_norm": 0.30353063344955444,
"learning_rate": 3.43308301447896e-05,
"loss": 0.2426,
"step": 6350
},
{
"epoch": 1.3275718450645564,
"grad_norm": 0.039772335439920425,
"learning_rate": 3.419015779605737e-05,
"loss": 0.2302,
"step": 6375
},
{
"epoch": 1.332778009162849,
"grad_norm": 0.7781673073768616,
"learning_rate": 3.404914819390646e-05,
"loss": 0.2583,
"step": 6400
},
{
"epoch": 1.3379841732611413,
"grad_norm": 24.190811157226562,
"learning_rate": 3.3907806512999966e-05,
"loss": 0.1873,
"step": 6425
},
{
"epoch": 1.3431903373594336,
"grad_norm": 9.922744750976562,
"learning_rate": 3.3766137940187364e-05,
"loss": 0.2441,
"step": 6450
},
{
"epoch": 1.348396501457726,
"grad_norm": 7.696400165557861,
"learning_rate": 3.362414767431414e-05,
"loss": 0.1051,
"step": 6475
},
{
"epoch": 1.3536026655560183,
"grad_norm": 5.509871482849121,
"learning_rate": 3.348184092603105e-05,
"loss": 0.1288,
"step": 6500
},
{
"epoch": 1.3588088296543108,
"grad_norm": 12.904980659484863,
"learning_rate": 3.333922291760286e-05,
"loss": 0.2988,
"step": 6525
},
{
"epoch": 1.3640149937526032,
"grad_norm": 0.11204788088798523,
"learning_rate": 3.319629888271675e-05,
"loss": 0.1962,
"step": 6550
},
{
"epoch": 1.3692211578508955,
"grad_norm": 0.16563156247138977,
"learning_rate": 3.3053074066290214e-05,
"loss": 0.2317,
"step": 6575
},
{
"epoch": 1.3744273219491878,
"grad_norm": 10.849634170532227,
"learning_rate": 3.290955372427858e-05,
"loss": 0.1759,
"step": 6600
},
{
"epoch": 1.3796334860474802,
"grad_norm": 13.899421691894531,
"learning_rate": 3.276574312348218e-05,
"loss": 0.3169,
"step": 6625
},
{
"epoch": 1.3848396501457727,
"grad_norm": 0.03900701552629471,
"learning_rate": 3.2621647541353015e-05,
"loss": 0.2785,
"step": 6650
},
{
"epoch": 1.390045814244065,
"grad_norm": 4.0548095703125,
"learning_rate": 3.2477272265801126e-05,
"loss": 0.2163,
"step": 6675
},
{
"epoch": 1.3952519783423574,
"grad_norm": 0.026262715458869934,
"learning_rate": 3.233841378366721e-05,
"loss": 0.2331,
"step": 6700
},
{
"epoch": 1.4004581424406497,
"grad_norm": 1.4128097295761108,
"learning_rate": 3.2193505687298915e-05,
"loss": 0.138,
"step": 6725
},
{
"epoch": 1.405664306538942,
"grad_norm": 0.03084075264632702,
"learning_rate": 3.2048333609131926e-05,
"loss": 0.2318,
"step": 6750
},
{
"epoch": 1.4108704706372346,
"grad_norm": 0.08986690640449524,
"learning_rate": 3.19029028765807e-05,
"loss": 0.1265,
"step": 6775
},
{
"epoch": 1.416076634735527,
"grad_norm": 0.33206573128700256,
"learning_rate": 3.1757218826551634e-05,
"loss": 0.0584,
"step": 6800
},
{
"epoch": 1.4212827988338192,
"grad_norm": 0.31883516907691956,
"learning_rate": 3.161128680524714e-05,
"loss": 0.1517,
"step": 6825
},
{
"epoch": 1.4264889629321116,
"grad_norm": 0.03625530004501343,
"learning_rate": 3.1465112167969504e-05,
"loss": 0.1541,
"step": 6850
},
{
"epoch": 1.431695127030404,
"grad_norm": 9.222389221191406,
"learning_rate": 3.1318700278924374e-05,
"loss": 0.267,
"step": 6875
},
{
"epoch": 1.4369012911286965,
"grad_norm": 9.740432739257812,
"learning_rate": 3.11779266463367e-05,
"loss": 0.2008,
"step": 6900
},
{
"epoch": 1.4421074552269888,
"grad_norm": 0.007975614629685879,
"learning_rate": 3.1031065337471356e-05,
"loss": 0.1723,
"step": 6925
},
{
"epoch": 1.4473136193252811,
"grad_norm": 9.000720977783203,
"learning_rate": 3.088398270515885e-05,
"loss": 0.1206,
"step": 6950
},
{
"epoch": 1.4525197834235735,
"grad_norm": 9.622346878051758,
"learning_rate": 3.0736684146925715e-05,
"loss": 0.1296,
"step": 6975
},
{
"epoch": 1.4577259475218658,
"grad_norm": 0.9583789706230164,
"learning_rate": 3.0589175068222385e-05,
"loss": 0.144,
"step": 7000
},
{
"epoch": 1.4629321116201583,
"grad_norm": 37.292327880859375,
"learning_rate": 3.044146088222479e-05,
"loss": 0.253,
"step": 7025
},
{
"epoch": 1.4681382757184507,
"grad_norm": 6.009835243225098,
"learning_rate": 3.0293547009635758e-05,
"loss": 0.1883,
"step": 7050
},
{
"epoch": 1.473344439816743,
"grad_norm": 0.2210305631160736,
"learning_rate": 3.0145438878486033e-05,
"loss": 0.2264,
"step": 7075
},
{
"epoch": 1.4785506039150353,
"grad_norm": 6.8800764083862305,
"learning_rate": 2.9997141923935136e-05,
"loss": 0.2641,
"step": 7100
},
{
"epoch": 1.4837567680133277,
"grad_norm": 0.2739255130290985,
"learning_rate": 2.9848661588071876e-05,
"loss": 0.2043,
"step": 7125
},
{
"epoch": 1.4889629321116202,
"grad_norm": 0.016436373814940453,
"learning_rate": 2.9700003319714648e-05,
"loss": 0.1275,
"step": 7150
},
{
"epoch": 1.4941690962099126,
"grad_norm": 3.340723752975464,
"learning_rate": 2.9551172574211478e-05,
"loss": 0.3281,
"step": 7175
},
{
"epoch": 1.499375260308205,
"grad_norm": 4.6393256187438965,
"learning_rate": 2.9402174813239836e-05,
"loss": 0.1756,
"step": 7200
},
{
"epoch": 1.5045814244064974,
"grad_norm": 6.234640121459961,
"learning_rate": 2.9253015504606197e-05,
"loss": 0.2412,
"step": 7225
},
{
"epoch": 1.5097875885047896,
"grad_norm": 2.9910178184509277,
"learning_rate": 2.910370012204537e-05,
"loss": 0.2625,
"step": 7250
},
{
"epoch": 1.5149937526030821,
"grad_norm": 0.030267061665654182,
"learning_rate": 2.8954234145019672e-05,
"loss": 0.1825,
"step": 7275
},
{
"epoch": 1.5201999167013744,
"grad_norm": 12.349358558654785,
"learning_rate": 2.8804623058517805e-05,
"loss": 0.2111,
"step": 7300
},
{
"epoch": 1.5254060807996668,
"grad_norm": 0.03321736305952072,
"learning_rate": 2.86548723528536e-05,
"loss": 0.1627,
"step": 7325
},
{
"epoch": 1.5306122448979593,
"grad_norm": 12.457853317260742,
"learning_rate": 2.8504987523464515e-05,
"loss": 0.2239,
"step": 7350
},
{
"epoch": 1.5358184089962514,
"grad_norm": 0.0661047101020813,
"learning_rate": 2.8354974070709983e-05,
"loss": 0.2431,
"step": 7375
},
{
"epoch": 1.541024573094544,
"grad_norm": 0.02619466558098793,
"learning_rate": 2.820483749966955e-05,
"loss": 0.1084,
"step": 7400
},
{
"epoch": 1.5462307371928363,
"grad_norm": 3.5517141819000244,
"learning_rate": 2.8054583319940896e-05,
"loss": 0.116,
"step": 7425
},
{
"epoch": 1.5514369012911287,
"grad_norm": 8.054045677185059,
"learning_rate": 2.7904217045437564e-05,
"loss": 0.2523,
"step": 7450
},
{
"epoch": 1.5566430653894212,
"grad_norm": 5.730096340179443,
"learning_rate": 2.775374419418671e-05,
"loss": 0.1687,
"step": 7475
},
{
"epoch": 1.5618492294877133,
"grad_norm": 0.11246989667415619,
"learning_rate": 2.7603170288126546e-05,
"loss": 0.1266,
"step": 7500
},
{
"epoch": 1.5670553935860059,
"grad_norm": 7.816544532775879,
"learning_rate": 2.74525008529037e-05,
"loss": 0.2546,
"step": 7525
},
{
"epoch": 1.5722615576842982,
"grad_norm": 0.06883124262094498,
"learning_rate": 2.7301741417670485e-05,
"loss": 0.1984,
"step": 7550
},
{
"epoch": 1.5774677217825905,
"grad_norm": 3.755141019821167,
"learning_rate": 2.715089751488195e-05,
"loss": 0.1695,
"step": 7575
},
{
"epoch": 1.582673885880883,
"grad_norm": 0.14307986199855804,
"learning_rate": 2.6999974680092882e-05,
"loss": 0.1959,
"step": 7600
},
{
"epoch": 1.5878800499791752,
"grad_norm": 16.175771713256836,
"learning_rate": 2.684897845175463e-05,
"loss": 0.21,
"step": 7625
},
{
"epoch": 1.5930862140774678,
"grad_norm": 0.39594829082489014,
"learning_rate": 2.6697914371011912e-05,
"loss": 0.1769,
"step": 7650
},
{
"epoch": 1.59829237817576,
"grad_norm": 11.465653419494629,
"learning_rate": 2.654678798149942e-05,
"loss": 0.1581,
"step": 7675
},
{
"epoch": 1.6034985422740524,
"grad_norm": 0.006868015043437481,
"learning_rate": 2.639560482913843e-05,
"loss": 0.2084,
"step": 7700
},
{
"epoch": 1.608704706372345,
"grad_norm": 23.506370544433594,
"learning_rate": 2.6244370461933226e-05,
"loss": 0.1521,
"step": 7725
},
{
"epoch": 1.613910870470637,
"grad_norm": 7.843533039093018,
"learning_rate": 2.609309042976757e-05,
"loss": 0.161,
"step": 7750
},
{
"epoch": 1.6191170345689296,
"grad_norm": 0.6957140564918518,
"learning_rate": 2.5941770284200968e-05,
"loss": 0.1201,
"step": 7775
},
{
"epoch": 1.624323198667222,
"grad_norm": 0.09775109589099884,
"learning_rate": 2.5790415578265e-05,
"loss": 0.0975,
"step": 7800
},
{
"epoch": 1.6295293627655143,
"grad_norm": 7.59830379486084,
"learning_rate": 2.5639031866259512e-05,
"loss": 0.1495,
"step": 7825
},
{
"epoch": 1.6347355268638069,
"grad_norm": 6.013265132904053,
"learning_rate": 2.5487624703548783e-05,
"loss": 0.2432,
"step": 7850
},
{
"epoch": 1.639941690962099,
"grad_norm": 0.011226486414670944,
"learning_rate": 2.5336199646357693e-05,
"loss": 0.1438,
"step": 7875
},
{
"epoch": 1.6451478550603915,
"grad_norm": 12.745257377624512,
"learning_rate": 2.518476225156776e-05,
"loss": 0.1853,
"step": 7900
},
{
"epoch": 1.6503540191586838,
"grad_norm": 0.028018401935696602,
"learning_rate": 2.5033318076513295e-05,
"loss": 0.269,
"step": 7925
},
{
"epoch": 1.6555601832569762,
"grad_norm": 0.29439255595207214,
"learning_rate": 2.4881872678777408e-05,
"loss": 0.2584,
"step": 7950
},
{
"epoch": 1.6607663473552687,
"grad_norm": 7.931988716125488,
"learning_rate": 2.473043161598808e-05,
"loss": 0.2546,
"step": 7975
},
{
"epoch": 1.665972511453561,
"grad_norm": 0.24659812450408936,
"learning_rate": 2.4579000445614214e-05,
"loss": 0.0941,
"step": 8000
},
{
"epoch": 1.6711786755518534,
"grad_norm": 0.13519421219825745,
"learning_rate": 2.4427584724761687e-05,
"loss": 0.1369,
"step": 8025
},
{
"epoch": 1.6763848396501457,
"grad_norm": 2.32975435256958,
"learning_rate": 2.4276190009969428e-05,
"loss": 0.0967,
"step": 8050
},
{
"epoch": 1.681591003748438,
"grad_norm": 10.135041236877441,
"learning_rate": 2.412482185700548e-05,
"loss": 0.2521,
"step": 8075
},
{
"epoch": 1.6867971678467306,
"grad_norm": 5.711327075958252,
"learning_rate": 2.397348582066317e-05,
"loss": 0.2155,
"step": 8100
},
{
"epoch": 1.692003331945023,
"grad_norm": 5.974716663360596,
"learning_rate": 2.382218745455721e-05,
"loss": 0.1574,
"step": 8125
},
{
"epoch": 1.6972094960433153,
"grad_norm": 0.01547964382916689,
"learning_rate": 2.3670932310919928e-05,
"loss": 0.1252,
"step": 8150
},
{
"epoch": 1.7024156601416076,
"grad_norm": 0.8667640089988708,
"learning_rate": 2.3519725940397516e-05,
"loss": 0.1717,
"step": 8175
},
{
"epoch": 1.7076218242399,
"grad_norm": 0.2800462245941162,
"learning_rate": 2.3368573891846307e-05,
"loss": 0.1726,
"step": 8200
},
{
"epoch": 1.7128279883381925,
"grad_norm": 0.2914607524871826,
"learning_rate": 2.321748171212919e-05,
"loss": 0.1686,
"step": 8225
},
{
"epoch": 1.7180341524364848,
"grad_norm": 5.2183051109313965,
"learning_rate": 2.3066454945912003e-05,
"loss": 0.2877,
"step": 8250
},
{
"epoch": 1.7232403165347772,
"grad_norm": 5.748877048492432,
"learning_rate": 2.2915499135460123e-05,
"loss": 0.157,
"step": 8275
},
{
"epoch": 1.7284464806330697,
"grad_norm": 9.520060539245605,
"learning_rate": 2.276461982043503e-05,
"loss": 0.2562,
"step": 8300
},
{
"epoch": 1.7336526447313618,
"grad_norm": 0.03092977963387966,
"learning_rate": 2.2613822537691016e-05,
"loss": 0.1034,
"step": 8325
},
{
"epoch": 1.7388588088296544,
"grad_norm": 0.1688009798526764,
"learning_rate": 2.2463112821072063e-05,
"loss": 0.1484,
"step": 8350
},
{
"epoch": 1.7440649729279467,
"grad_norm": 0.5068601369857788,
"learning_rate": 2.2312496201208654e-05,
"loss": 0.1381,
"step": 8375
},
{
"epoch": 1.749271137026239,
"grad_norm": 0.58343505859375,
"learning_rate": 2.2161978205314934e-05,
"loss": 0.2426,
"step": 8400
},
{
"epoch": 1.7544773011245316,
"grad_norm": 5.165789604187012,
"learning_rate": 2.20115643569858e-05,
"loss": 0.2173,
"step": 8425
},
{
"epoch": 1.7596834652228237,
"grad_norm": 0.6824327111244202,
"learning_rate": 2.1861260175994203e-05,
"loss": 0.1443,
"step": 8450
},
{
"epoch": 1.7648896293211163,
"grad_norm": 0.2100004255771637,
"learning_rate": 2.1711071178088633e-05,
"loss": 0.1741,
"step": 8475
},
{
"epoch": 1.7700957934194086,
"grad_norm": 0.46001136302948,
"learning_rate": 2.1561002874790662e-05,
"loss": 0.2116,
"step": 8500
},
{
"epoch": 1.775301957517701,
"grad_norm": 7.581360816955566,
"learning_rate": 2.1411060773192704e-05,
"loss": 0.1379,
"step": 8525
},
{
"epoch": 1.7805081216159935,
"grad_norm": 7.0736846923828125,
"learning_rate": 2.126125037575594e-05,
"loss": 0.1403,
"step": 8550
},
{
"epoch": 1.7857142857142856,
"grad_norm": 0.7845435738563538,
"learning_rate": 2.1111577180108343e-05,
"loss": 0.1406,
"step": 8575
},
{
"epoch": 1.7909204498125781,
"grad_norm": 0.2352413535118103,
"learning_rate": 2.0962046678842974e-05,
"loss": 0.1425,
"step": 8600
},
{
"epoch": 1.7961266139108705,
"grad_norm": 0.2534882426261902,
"learning_rate": 2.08126643593164e-05,
"loss": 0.2827,
"step": 8625
},
{
"epoch": 1.8013327780091628,
"grad_norm": 0.015448580496013165,
"learning_rate": 2.066343570344733e-05,
"loss": 0.2161,
"step": 8650
},
{
"epoch": 1.8065389421074554,
"grad_norm": 11.641529083251953,
"learning_rate": 2.0514366187515444e-05,
"loss": 0.141,
"step": 8675
},
{
"epoch": 1.8117451062057475,
"grad_norm": 8.481783866882324,
"learning_rate": 2.036546128196041e-05,
"loss": 0.1693,
"step": 8700
},
{
"epoch": 1.81695127030404,
"grad_norm": 0.046340491622686386,
"learning_rate": 2.021672645118118e-05,
"loss": 0.1685,
"step": 8725
},
{
"epoch": 1.8221574344023324,
"grad_norm": 0.02451934851706028,
"learning_rate": 2.0068167153335388e-05,
"loss": 0.2014,
"step": 8750
},
{
"epoch": 1.8273635985006247,
"grad_norm": 0.008840017020702362,
"learning_rate": 1.991978884013914e-05,
"loss": 0.139,
"step": 8775
},
{
"epoch": 1.8325697625989172,
"grad_norm": 0.03715880587697029,
"learning_rate": 1.9771596956666876e-05,
"loss": 0.1835,
"step": 8800
},
{
"epoch": 1.8377759266972093,
"grad_norm": 0.052263110876083374,
"learning_rate": 1.9623596941151584e-05,
"loss": 0.1522,
"step": 8825
},
{
"epoch": 1.842982090795502,
"grad_norm": 0.14420367777347565,
"learning_rate": 1.9475794224785242e-05,
"loss": 0.1649,
"step": 8850
},
{
"epoch": 1.8481882548937942,
"grad_norm": 5.102858066558838,
"learning_rate": 1.9328194231519464e-05,
"loss": 0.1966,
"step": 8875
},
{
"epoch": 1.8533944189920866,
"grad_norm": 2.9605753421783447,
"learning_rate": 1.918080237786651e-05,
"loss": 0.1057,
"step": 8900
},
{
"epoch": 1.8586005830903791,
"grad_norm": 0.06543917208909988,
"learning_rate": 1.9033624072700505e-05,
"loss": 0.2098,
"step": 8925
},
{
"epoch": 1.8638067471886712,
"grad_norm": 4.984257221221924,
"learning_rate": 1.8886664717058894e-05,
"loss": 0.2006,
"step": 8950
},
{
"epoch": 1.8690129112869638,
"grad_norm": 0.3344804048538208,
"learning_rate": 1.8739929703944314e-05,
"loss": 0.1492,
"step": 8975
},
{
"epoch": 1.8742190753852561,
"grad_norm": 6.675142765045166,
"learning_rate": 1.859342441812662e-05,
"loss": 0.1568,
"step": 9000
},
{
"epoch": 1.8794252394835484,
"grad_norm": 13.291411399841309,
"learning_rate": 1.8447154235945336e-05,
"loss": 0.2015,
"step": 9025
},
{
"epoch": 1.884631403581841,
"grad_norm": 0.030354047194123268,
"learning_rate": 1.8301124525112325e-05,
"loss": 0.1201,
"step": 9050
},
{
"epoch": 1.8898375676801331,
"grad_norm": 11.661486625671387,
"learning_rate": 1.8155340644514798e-05,
"loss": 0.1744,
"step": 9075
},
{
"epoch": 1.8950437317784257,
"grad_norm": 9.253254890441895,
"learning_rate": 1.8009807944018715e-05,
"loss": 0.1305,
"step": 9100
},
{
"epoch": 1.900249895876718,
"grad_norm": 0.049124184995889664,
"learning_rate": 1.7864531764272356e-05,
"loss": 0.225,
"step": 9125
},
{
"epoch": 1.9054560599750103,
"grad_norm": 10.553680419921875,
"learning_rate": 1.7719517436510462e-05,
"loss": 0.213,
"step": 9150
},
{
"epoch": 1.9106622240733029,
"grad_norm": 0.042677950114011765,
"learning_rate": 1.7574770282358505e-05,
"loss": 0.1058,
"step": 9175
},
{
"epoch": 1.9158683881715952,
"grad_norm": 0.8776458501815796,
"learning_rate": 1.7430295613637404e-05,
"loss": 0.1907,
"step": 9200
},
{
"epoch": 1.9210745522698875,
"grad_norm": 9.908956527709961,
"learning_rate": 1.7286098732168644e-05,
"loss": 0.1863,
"step": 9225
},
{
"epoch": 1.9262807163681799,
"grad_norm": 1.294067144393921,
"learning_rate": 1.7142184929579662e-05,
"loss": 0.1053,
"step": 9250
},
{
"epoch": 1.9314868804664722,
"grad_norm": 5.95350980758667,
"learning_rate": 1.6998559487109693e-05,
"loss": 0.1541,
"step": 9275
},
{
"epoch": 1.9366930445647648,
"grad_norm": 7.54666805267334,
"learning_rate": 1.6855227675415966e-05,
"loss": 0.1696,
"step": 9300
},
{
"epoch": 1.941899208663057,
"grad_norm": 0.0091070756316185,
"learning_rate": 1.671219475438024e-05,
"loss": 0.212,
"step": 9325
},
{
"epoch": 1.9471053727613494,
"grad_norm": 0.05602679401636124,
"learning_rate": 1.656946597291584e-05,
"loss": 0.133,
"step": 9350
},
{
"epoch": 1.9523115368596418,
"grad_norm": 0.04069928824901581,
"learning_rate": 1.6427046568774977e-05,
"loss": 0.1599,
"step": 9375
},
{
"epoch": 1.957517700957934,
"grad_norm": 0.21093979477882385,
"learning_rate": 1.628494176835661e-05,
"loss": 0.1432,
"step": 9400
},
{
"epoch": 1.9627238650562266,
"grad_norm": 6.424279689788818,
"learning_rate": 1.614315678651457e-05,
"loss": 0.1286,
"step": 9425
},
{
"epoch": 1.967930029154519,
"grad_norm": 8.281681060791016,
"learning_rate": 1.6001696826366247e-05,
"loss": 0.0896,
"step": 9450
},
{
"epoch": 1.9731361932528113,
"grad_norm": 3.2924630641937256,
"learning_rate": 1.5860567079101657e-05,
"loss": 0.2068,
"step": 9475
},
{
"epoch": 1.9783423573511039,
"grad_norm": 0.10305780172348022,
"learning_rate": 1.5719772723792857e-05,
"loss": 0.133,
"step": 9500
},
{
"epoch": 1.983548521449396,
"grad_norm": 0.013987602666020393,
"learning_rate": 1.5579318927204003e-05,
"loss": 0.1602,
"step": 9525
},
{
"epoch": 1.9887546855476885,
"grad_norm": 8.741622924804688,
"learning_rate": 1.5439210843601647e-05,
"loss": 0.1248,
"step": 9550
},
{
"epoch": 1.9939608496459809,
"grad_norm": 7.777089595794678,
"learning_rate": 1.5299453614565646e-05,
"loss": 0.1656,
"step": 9575
},
{
"epoch": 1.9991670137442732,
"grad_norm": 7.620024681091309,
"learning_rate": 1.5160052368800466e-05,
"loss": 0.1867,
"step": 9600
},
{
"epoch": 2.0,
"eval_accuracy": 0.9082910321489002,
"eval_f1_macro": 0.8934973914545552,
"eval_f1_micro": 0.9082910321489002,
"eval_f1_weighted": 0.8990272847487176,
"eval_loss": 0.23952987790107727,
"eval_precision_macro": 0.9212318034319655,
"eval_precision_micro": 0.9082910321489002,
"eval_precision_weighted": 0.9229994939426459,
"eval_recall_macro": 0.9014322820037105,
"eval_recall_micro": 0.9082910321489002,
"eval_recall_weighted": 0.9082910321489002,
"eval_runtime": 16.0892,
"eval_samples_per_second": 918.319,
"eval_steps_per_second": 57.43,
"step": 9604
},
{
"epoch": 2.0043731778425657,
"grad_norm": 0.036709289997816086,
"learning_rate": 1.5021012221946956e-05,
"loss": 0.1042,
"step": 9625
},
{
"epoch": 2.009579341940858,
"grad_norm": 0.03700362890958786,
"learning_rate": 1.4882338276394644e-05,
"loss": 0.1846,
"step": 9650
},
{
"epoch": 2.0147855060391504,
"grad_norm": 0.2605753540992737,
"learning_rate": 1.4744035621094468e-05,
"loss": 0.1531,
"step": 9675
},
{
"epoch": 2.0199916701374425,
"grad_norm": 1.4101078510284424,
"learning_rate": 1.460610933137206e-05,
"loss": 0.1186,
"step": 9700
},
{
"epoch": 2.025197834235735,
"grad_norm": 7.530198097229004,
"learning_rate": 1.4468564468741464e-05,
"loss": 0.1534,
"step": 9725
},
{
"epoch": 2.0304039983340276,
"grad_norm": 0.06903871148824692,
"learning_rate": 1.433140608071939e-05,
"loss": 0.2258,
"step": 9750
},
{
"epoch": 2.0356101624323197,
"grad_norm": 0.9064328670501709,
"learning_rate": 1.4194639200640023e-05,
"loss": 0.1681,
"step": 9775
},
{
"epoch": 2.0408163265306123,
"grad_norm": 0.04811558499932289,
"learning_rate": 1.4058268847470241e-05,
"loss": 0.1343,
"step": 9800
},
{
"epoch": 2.0460224906289044,
"grad_norm": 5.909759998321533,
"learning_rate": 1.3922300025625539e-05,
"loss": 0.0999,
"step": 9825
},
{
"epoch": 2.051228654727197,
"grad_norm": 1.272133708000183,
"learning_rate": 1.37867377247863e-05,
"loss": 0.1376,
"step": 9850
},
{
"epoch": 2.0564348188254895,
"grad_norm": 3.806577682495117,
"learning_rate": 1.3651586919714671e-05,
"loss": 0.158,
"step": 9875
},
{
"epoch": 2.0616409829237816,
"grad_norm": 1.572864294052124,
"learning_rate": 1.3516852570072086e-05,
"loss": 0.1046,
"step": 9900
},
{
"epoch": 2.066847147022074,
"grad_norm": 0.07213819772005081,
"learning_rate": 1.33825396202372e-05,
"loss": 0.142,
"step": 9925
},
{
"epoch": 2.0720533111203667,
"grad_norm": 6.35291862487793,
"learning_rate": 1.324865299912445e-05,
"loss": 0.1243,
"step": 9950
},
{
"epoch": 2.077259475218659,
"grad_norm": 11.946340560913086,
"learning_rate": 1.31151976200032e-05,
"loss": 0.1512,
"step": 9975
},
{
"epoch": 2.0824656393169514,
"grad_norm": 0.20692221820354462,
"learning_rate": 1.2982178380317416e-05,
"loss": 0.137,
"step": 10000
},
{
"epoch": 2.0876718034152435,
"grad_norm": 0.24739764630794525,
"learning_rate": 1.2849600161505957e-05,
"loss": 0.1535,
"step": 10025
},
{
"epoch": 2.092877967513536,
"grad_norm": 0.6494514346122742,
"learning_rate": 1.2717467828823409e-05,
"loss": 0.1838,
"step": 10050
},
{
"epoch": 2.0980841316118286,
"grad_norm": 13.001143455505371,
"learning_rate": 1.25857862311616e-05,
"loss": 0.1973,
"step": 10075
},
{
"epoch": 2.1032902957101207,
"grad_norm": 2.27883243560791,
"learning_rate": 1.2454560200871612e-05,
"loss": 0.2116,
"step": 10100
},
{
"epoch": 2.1084964598084133,
"grad_norm": 14.335226058959961,
"learning_rate": 1.2323794553586462e-05,
"loss": 0.1645,
"step": 10125
},
{
"epoch": 2.1137026239067054,
"grad_norm": 0.5073531866073608,
"learning_rate": 1.2193494088044399e-05,
"loss": 0.2067,
"step": 10150
},
{
"epoch": 2.118908788004998,
"grad_norm": 10.126875877380371,
"learning_rate": 1.206366358591274e-05,
"loss": 0.2034,
"step": 10175
},
{
"epoch": 2.1241149521032905,
"grad_norm": 0.4860388934612274,
"learning_rate": 1.1934307811612502e-05,
"loss": 0.1298,
"step": 10200
},
{
"epoch": 2.1293211162015826,
"grad_norm": 11.252408981323242,
"learning_rate": 1.1805431512143473e-05,
"loss": 0.2134,
"step": 10225
},
{
"epoch": 2.134527280299875,
"grad_norm": 0.6547976732254028,
"learning_rate": 1.1677039416910018e-05,
"loss": 0.2518,
"step": 10250
},
{
"epoch": 2.1397334443981673,
"grad_norm": 0.02918720245361328,
"learning_rate": 1.1549136237547577e-05,
"loss": 0.1141,
"step": 10275
},
{
"epoch": 2.14493960849646,
"grad_norm": 2.0306875705718994,
"learning_rate": 1.1421726667749714e-05,
"loss": 0.2479,
"step": 10300
},
{
"epoch": 2.1501457725947524,
"grad_norm": 3.7051842212677,
"learning_rate": 1.1294815383095877e-05,
"loss": 0.1666,
"step": 10325
},
{
"epoch": 2.1553519366930445,
"grad_norm": 7.603741645812988,
"learning_rate": 1.1168407040879842e-05,
"loss": 0.1395,
"step": 10350
},
{
"epoch": 2.160558100791337,
"grad_norm": 0.0051424442790448666,
"learning_rate": 1.1042506279938771e-05,
"loss": 0.1033,
"step": 10375
},
{
"epoch": 2.165764264889629,
"grad_norm": 0.43630650639533997,
"learning_rate": 1.0917117720483014e-05,
"loss": 0.061,
"step": 10400
},
{
"epoch": 2.1709704289879217,
"grad_norm": 0.06688734143972397,
"learning_rate": 1.0792245963926504e-05,
"loss": 0.1547,
"step": 10425
},
{
"epoch": 2.1761765930862143,
"grad_norm": 0.005384028889238834,
"learning_rate": 1.0667895592717969e-05,
"loss": 0.1822,
"step": 10450
},
{
"epoch": 2.1813827571845064,
"grad_norm": 6.822201251983643,
"learning_rate": 1.0544071170172723e-05,
"loss": 0.0977,
"step": 10475
},
{
"epoch": 2.186588921282799,
"grad_norm": 0.17409928143024445,
"learning_rate": 1.0420777240305213e-05,
"loss": 0.1472,
"step": 10500
},
{
"epoch": 2.191795085381091,
"grad_norm": 4.65806245803833,
"learning_rate": 1.0298018327662284e-05,
"loss": 0.1499,
"step": 10525
},
{
"epoch": 2.1970012494793836,
"grad_norm": 0.050723303109407425,
"learning_rate": 1.0175798937157085e-05,
"loss": 0.1228,
"step": 10550
},
{
"epoch": 2.202207413577676,
"grad_norm": 7.599793434143066,
"learning_rate": 1.0054123553903849e-05,
"loss": 0.1122,
"step": 10575
},
{
"epoch": 2.2074135776759682,
"grad_norm": 0.2461252212524414,
"learning_rate": 9.93299664305322e-06,
"loss": 0.0847,
"step": 10600
},
{
"epoch": 2.212619741774261,
"grad_norm": 0.014030307531356812,
"learning_rate": 9.812422649628388e-06,
"loss": 0.1286,
"step": 10625
},
{
"epoch": 2.217825905872553,
"grad_norm": 0.010962835513055325,
"learning_rate": 9.692405998362045e-06,
"loss": 0.0591,
"step": 10650
},
{
"epoch": 2.2230320699708455,
"grad_norm": 0.009474786929786205,
"learning_rate": 9.572951093533946e-06,
"loss": 0.1037,
"step": 10675
},
{
"epoch": 2.228238234069138,
"grad_norm": 0.02288031578063965,
"learning_rate": 9.454062318809313e-06,
"loss": 0.1174,
"step": 10700
},
{
"epoch": 2.23344439816743,
"grad_norm": 10.316886901855469,
"learning_rate": 9.335744037077918e-06,
"loss": 0.1019,
"step": 10725
},
{
"epoch": 2.2386505622657227,
"grad_norm": 3.191359758377075,
"learning_rate": 9.218000590294072e-06,
"loss": 0.0714,
"step": 10750
},
{
"epoch": 2.243856726364015,
"grad_norm": 0.9904782176017761,
"learning_rate": 9.100836299317201e-06,
"loss": 0.1085,
"step": 10775
},
{
"epoch": 2.2490628904623073,
"grad_norm": 0.00978647731244564,
"learning_rate": 8.9842554637533e-06,
"loss": 0.1005,
"step": 10800
},
{
"epoch": 2.2542690545606,
"grad_norm": 0.5314901471138,
"learning_rate": 8.868262361797181e-06,
"loss": 0.1389,
"step": 10825
},
{
"epoch": 2.259475218658892,
"grad_norm": 2.777710437774658,
"learning_rate": 8.75286125007545e-06,
"loss": 0.1211,
"step": 10850
},
{
"epoch": 2.2646813827571846,
"grad_norm": 9.188045501708984,
"learning_rate": 8.638056363490302e-06,
"loss": 0.1837,
"step": 10875
},
{
"epoch": 2.269887546855477,
"grad_norm": 8.704696655273438,
"learning_rate": 8.523851915064129e-06,
"loss": 0.1512,
"step": 10900
},
{
"epoch": 2.275093710953769,
"grad_norm": 11.265883445739746,
"learning_rate": 8.410252095784866e-06,
"loss": 0.1485,
"step": 10925
},
{
"epoch": 2.2802998750520618,
"grad_norm": 8.006060600280762,
"learning_rate": 8.297261074452281e-06,
"loss": 0.1927,
"step": 10950
},
{
"epoch": 2.285506039150354,
"grad_norm": 6.185884475708008,
"learning_rate": 8.184882997524884e-06,
"loss": 0.0961,
"step": 10975
},
{
"epoch": 2.2907122032486464,
"grad_norm": 8.097982406616211,
"learning_rate": 8.073121988967849e-06,
"loss": 0.1119,
"step": 11000
},
{
"epoch": 2.295918367346939,
"grad_norm": 6.152439117431641,
"learning_rate": 7.961982150101643e-06,
"loss": 0.116,
"step": 11025
},
{
"epoch": 2.301124531445231,
"grad_norm": 4.259856224060059,
"learning_rate": 7.851467559451508e-06,
"loss": 0.1936,
"step": 11050
},
{
"epoch": 2.3063306955435237,
"grad_norm": 3.0448789596557617,
"learning_rate": 7.741582272597823e-06,
"loss": 0.1005,
"step": 11075
},
{
"epoch": 2.3115368596418158,
"grad_norm": 0.018132351338863373,
"learning_rate": 7.632330322027212e-06,
"loss": 0.1356,
"step": 11100
},
{
"epoch": 2.3167430237401083,
"grad_norm": 12.663254737854004,
"learning_rate": 7.523715716984661e-06,
"loss": 0.1243,
"step": 11125
},
{
"epoch": 2.321949187838401,
"grad_norm": 0.21571685373783112,
"learning_rate": 7.415742443326309e-06,
"loss": 0.1938,
"step": 11150
},
{
"epoch": 2.327155351936693,
"grad_norm": 0.023954235017299652,
"learning_rate": 7.308414463373189e-06,
"loss": 0.1548,
"step": 11175
},
{
"epoch": 2.3323615160349855,
"grad_norm": 0.010295159183442593,
"learning_rate": 7.2017357157658585e-06,
"loss": 0.145,
"step": 11200
},
{
"epoch": 2.3375676801332776,
"grad_norm": 0.6988667845726013,
"learning_rate": 7.095710115319831e-06,
"loss": 0.0901,
"step": 11225
},
{
"epoch": 2.34277384423157,
"grad_norm": 4.98328161239624,
"learning_rate": 6.99034155288193e-06,
"loss": 0.1512,
"step": 11250
},
{
"epoch": 2.3479800083298628,
"grad_norm": 1.8333989381790161,
"learning_rate": 6.885633895187493e-06,
"loss": 0.152,
"step": 11275
},
{
"epoch": 2.353186172428155,
"grad_norm": 0.01870441809296608,
"learning_rate": 6.781590984718461e-06,
"loss": 0.2451,
"step": 11300
},
{
"epoch": 2.3583923365264474,
"grad_norm": 0.03275001421570778,
"learning_rate": 6.678216639562429e-06,
"loss": 0.1564,
"step": 11325
},
{
"epoch": 2.3635985006247395,
"grad_norm": 0.021069686859846115,
"learning_rate": 6.5755146532724495e-06,
"loss": 0.1644,
"step": 11350
},
{
"epoch": 2.368804664723032,
"grad_norm": 5.0085577964782715,
"learning_rate": 6.473488794727878e-06,
"loss": 0.1459,
"step": 11375
},
{
"epoch": 2.3740108288213246,
"grad_norm": 8.764829635620117,
"learning_rate": 6.372142807996051e-06,
"loss": 0.1538,
"step": 11400
},
{
"epoch": 2.3792169929196167,
"grad_norm": 10.779679298400879,
"learning_rate": 6.27148041219488e-06,
"loss": 0.163,
"step": 11425
},
{
"epoch": 2.3844231570179093,
"grad_norm": 0.08707818388938904,
"learning_rate": 6.17150530135639e-06,
"loss": 0.1641,
"step": 11450
},
{
"epoch": 2.3896293211162014,
"grad_norm": 0.5683345198631287,
"learning_rate": 6.07222114429111e-06,
"loss": 0.1532,
"step": 11475
},
{
"epoch": 2.394835485214494,
"grad_norm": 6.6075639724731445,
"learning_rate": 5.9736315844535235e-06,
"loss": 0.167,
"step": 11500
},
{
"epoch": 2.4000416493127865,
"grad_norm": 0.18501617014408112,
"learning_rate": 5.875740239808283e-06,
"loss": 0.1036,
"step": 11525
},
{
"epoch": 2.4052478134110786,
"grad_norm": 0.05444851890206337,
"learning_rate": 5.778550702697463e-06,
"loss": 0.1271,
"step": 11550
},
{
"epoch": 2.410453977509371,
"grad_norm": 4.797399044036865,
"learning_rate": 5.682066539708763e-06,
"loss": 0.1147,
"step": 11575
},
{
"epoch": 2.4156601416076633,
"grad_norm": 1.1078044176101685,
"learning_rate": 5.586291291544585e-06,
"loss": 0.1421,
"step": 11600
},
{
"epoch": 2.420866305705956,
"grad_norm": 0.07829868793487549,
"learning_rate": 5.491228472892118e-06,
"loss": 0.1598,
"step": 11625
},
{
"epoch": 2.4260724698042484,
"grad_norm": 3.3450565338134766,
"learning_rate": 5.396881572294363e-06,
"loss": 0.1359,
"step": 11650
},
{
"epoch": 2.4312786339025405,
"grad_norm": 0.017295390367507935,
"learning_rate": 5.303254052022075e-06,
"loss": 0.0646,
"step": 11675
},
{
"epoch": 2.436484798000833,
"grad_norm": 13.109850883483887,
"learning_rate": 5.210349347946783e-06,
"loss": 0.2058,
"step": 11700
},
{
"epoch": 2.441690962099125,
"grad_norm": 0.01693105697631836,
"learning_rate": 5.118170869414618e-06,
"loss": 0.1465,
"step": 11725
},
{
"epoch": 2.4468971261974177,
"grad_norm": 0.01354676578193903,
"learning_rate": 5.026721999121256e-06,
"loss": 0.0848,
"step": 11750
},
{
"epoch": 2.4521032902957103,
"grad_norm": 0.5930526852607727,
"learning_rate": 4.9360060929877636e-06,
"loss": 0.1185,
"step": 11775
},
{
"epoch": 2.4573094543940024,
"grad_norm": 12.73507308959961,
"learning_rate": 4.846026480037444e-06,
"loss": 0.1434,
"step": 11800
},
{
"epoch": 2.462515618492295,
"grad_norm": 6.715247631072998,
"learning_rate": 4.7567864622736824e-06,
"loss": 0.1317,
"step": 11825
},
{
"epoch": 2.467721782590587,
"grad_norm": 0.002648524707183242,
"learning_rate": 4.668289314558735e-06,
"loss": 0.1621,
"step": 11850
},
{
"epoch": 2.4729279466888796,
"grad_norm": 8.284907341003418,
"learning_rate": 4.580538284493616e-06,
"loss": 0.1486,
"step": 11875
},
{
"epoch": 2.478134110787172,
"grad_norm": 0.4824686348438263,
"learning_rate": 4.493536592298864e-06,
"loss": 0.192,
"step": 11900
},
{
"epoch": 2.4833402748854643,
"grad_norm": 6.810537338256836,
"learning_rate": 4.407287430696372e-06,
"loss": 0.1059,
"step": 11925
},
{
"epoch": 2.488546438983757,
"grad_norm": 5.896641731262207,
"learning_rate": 4.321793964792262e-06,
"loss": 0.0996,
"step": 11950
},
{
"epoch": 2.493752603082049,
"grad_norm": 10.901542663574219,
"learning_rate": 4.237059331960694e-06,
"loss": 0.1538,
"step": 11975
},
{
"epoch": 2.4989587671803415,
"grad_norm": 5.155703067779541,
"learning_rate": 4.153086641728765e-06,
"loss": 0.1304,
"step": 12000
},
{
"epoch": 2.504164931278634,
"grad_norm": 0.00832182727754116,
"learning_rate": 4.069878975662358e-06,
"loss": 0.1498,
"step": 12025
},
{
"epoch": 2.509371095376926,
"grad_norm": 8.369385719299316,
"learning_rate": 3.9874393872531e-06,
"loss": 0.2222,
"step": 12050
},
{
"epoch": 2.5145772594752187,
"grad_norm": 3.448890209197998,
"learning_rate": 3.905770901806299e-06,
"loss": 0.1876,
"step": 12075
},
{
"epoch": 2.519783423573511,
"grad_norm": 1.8173445463180542,
"learning_rate": 3.824876516329881e-06,
"loss": 0.0964,
"step": 12100
},
{
"epoch": 2.5249895876718034,
"grad_norm": 8.964319229125977,
"learning_rate": 3.7447591994244635e-06,
"loss": 0.1555,
"step": 12125
},
{
"epoch": 2.530195751770096,
"grad_norm": 1.7448962926864624,
"learning_rate": 3.6654218911743892e-06,
"loss": 0.1877,
"step": 12150
},
{
"epoch": 2.535401915868388,
"grad_norm": 0.03552517667412758,
"learning_rate": 3.5868675030398286e-06,
"loss": 0.1345,
"step": 12175
},
{
"epoch": 2.5406080799666806,
"grad_norm": 0.007262797094881535,
"learning_rate": 3.509098917749962e-06,
"loss": 0.1211,
"step": 12200
},
{
"epoch": 2.5458142440649727,
"grad_norm": 6.443802833557129,
"learning_rate": 3.43211898919715e-06,
"loss": 0.0961,
"step": 12225
},
{
"epoch": 2.5510204081632653,
"grad_norm": 3.5260281562805176,
"learning_rate": 3.3559305423322503e-06,
"loss": 0.1721,
"step": 12250
},
{
"epoch": 2.556226572261558,
"grad_norm": 2.5505025386810303,
"learning_rate": 3.280536373060919e-06,
"loss": 0.1488,
"step": 12275
},
{
"epoch": 2.56143273635985,
"grad_norm": 12.06968879699707,
"learning_rate": 3.2059392481410155e-06,
"loss": 0.145,
"step": 12300
},
{
"epoch": 2.5666389004581425,
"grad_norm": 0.0032713667023926973,
"learning_rate": 3.132141905081076e-06,
"loss": 0.1333,
"step": 12325
},
{
"epoch": 2.5718450645564346,
"grad_norm": 10.196958541870117,
"learning_rate": 3.0591470520398513e-06,
"loss": 0.1732,
"step": 12350
},
{
"epoch": 2.577051228654727,
"grad_norm": 0.015713948756456375,
"learning_rate": 2.9869573677269254e-06,
"loss": 0.1367,
"step": 12375
},
{
"epoch": 2.5822573927530197,
"grad_norm": 0.21436667442321777,
"learning_rate": 2.915575501304396e-06,
"loss": 0.235,
"step": 12400
},
{
"epoch": 2.587463556851312,
"grad_norm": 0.0038804244250059128,
"learning_rate": 2.845004072289695e-06,
"loss": 0.0992,
"step": 12425
},
{
"epoch": 2.5926697209496044,
"grad_norm": 6.616683483123779,
"learning_rate": 2.775245670459439e-06,
"loss": 0.1481,
"step": 12450
},
{
"epoch": 2.5978758850478965,
"grad_norm": 0.006720269098877907,
"learning_rate": 2.7063028557543753e-06,
"loss": 0.12,
"step": 12475
},
{
"epoch": 2.603082049146189,
"grad_norm": 1.3217498064041138,
"learning_rate": 2.638178158185467e-06,
"loss": 0.0997,
"step": 12500
},
{
"epoch": 2.6082882132444816,
"grad_norm": 13.17971420288086,
"learning_rate": 2.570874077741034e-06,
"loss": 0.2045,
"step": 12525
},
{
"epoch": 2.6134943773427737,
"grad_norm": 15.589527130126953,
"learning_rate": 2.504393084295015e-06,
"loss": 0.1074,
"step": 12550
},
{
"epoch": 2.6187005414410662,
"grad_norm": 1.773633360862732,
"learning_rate": 2.438737617516332e-06,
"loss": 0.1416,
"step": 12575
},
{
"epoch": 2.6239067055393583,
"grad_norm": 7.001961708068848,
"learning_rate": 2.373910086779338e-06,
"loss": 0.1821,
"step": 12600
},
{
"epoch": 2.629112869637651,
"grad_norm": 7.044321060180664,
"learning_rate": 2.309912871075445e-06,
"loss": 0.1201,
"step": 12625
},
{
"epoch": 2.6343190337359434,
"grad_norm": 8.048465728759766,
"learning_rate": 2.246748318925779e-06,
"loss": 0.167,
"step": 12650
},
{
"epoch": 2.639525197834236,
"grad_norm": 6.150434494018555,
"learning_rate": 2.1844187482950225e-06,
"loss": 0.1569,
"step": 12675
},
{
"epoch": 2.644731361932528,
"grad_norm": 1.490867018699646,
"learning_rate": 2.122926446506332e-06,
"loss": 0.0887,
"step": 12700
},
{
"epoch": 2.6499375260308202,
"grad_norm": 6.9760003089904785,
"learning_rate": 2.0622736701574136e-06,
"loss": 0.0619,
"step": 12725
},
{
"epoch": 2.6551436901291128,
"grad_norm": 4.531062602996826,
"learning_rate": 2.0024626450377144e-06,
"loss": 0.1153,
"step": 12750
},
{
"epoch": 2.6603498542274053,
"grad_norm": 0.011586461216211319,
"learning_rate": 1.9434955660467184e-06,
"loss": 0.1071,
"step": 12775
},
{
"epoch": 2.665556018325698,
"grad_norm": 0.5682029128074646,
"learning_rate": 1.885374597113429e-06,
"loss": 0.0935,
"step": 12800
},
{
"epoch": 2.67076218242399,
"grad_norm": 0.14094886183738708,
"learning_rate": 1.8281018711169522e-06,
"loss": 0.0884,
"step": 12825
},
{
"epoch": 2.6759683465222825,
"grad_norm": 0.025353549048304558,
"learning_rate": 1.7716794898082034e-06,
"loss": 0.1083,
"step": 12850
},
{
"epoch": 2.6811745106205747,
"grad_norm": 10.48271656036377,
"learning_rate": 1.716109523732809e-06,
"loss": 0.1437,
"step": 12875
},
{
"epoch": 2.686380674718867,
"grad_norm": 3.716670513153076,
"learning_rate": 1.6613940121551014e-06,
"loss": 0.177,
"step": 12900
},
{
"epoch": 2.6915868388171598,
"grad_norm": 2.8707427978515625,
"learning_rate": 1.6075349629832954e-06,
"loss": 0.1107,
"step": 12925
},
{
"epoch": 2.696793002915452,
"grad_norm": 3.083693742752075,
"learning_rate": 1.554534352695808e-06,
"loss": 0.1544,
"step": 12950
},
{
"epoch": 2.7019991670137444,
"grad_norm": 0.012157919816672802,
"learning_rate": 1.5023941262686997e-06,
"loss": 0.1065,
"step": 12975
},
{
"epoch": 2.7072053311120365,
"grad_norm": 6.145320415496826,
"learning_rate": 1.4511161971043351e-06,
"loss": 0.0982,
"step": 13000
},
{
"epoch": 2.712411495210329,
"grad_norm": 0.1649736911058426,
"learning_rate": 1.4007024469611441e-06,
"loss": 0.1541,
"step": 13025
},
{
"epoch": 2.7176176593086216,
"grad_norm": 0.16660048067569733,
"learning_rate": 1.3511547258845763e-06,
"loss": 0.0752,
"step": 13050
},
{
"epoch": 2.7228238234069138,
"grad_norm": 5.491020679473877,
"learning_rate": 1.3024748521392017e-06,
"loss": 0.0893,
"step": 13075
},
{
"epoch": 2.7280299875052063,
"grad_norm": 6.0252790451049805,
"learning_rate": 1.2546646121419868e-06,
"loss": 0.1347,
"step": 13100
},
{
"epoch": 2.7332361516034984,
"grad_norm": 2.7115368843078613,
"learning_rate": 1.2077257603967523e-06,
"loss": 0.1261,
"step": 13125
},
{
"epoch": 2.738442315701791,
"grad_norm": 14.104537963867188,
"learning_rate": 1.1616600194297555e-06,
"loss": 0.1585,
"step": 13150
},
{
"epoch": 2.7436484798000835,
"grad_norm": 0.006517982110381126,
"learning_rate": 1.1164690797265148e-06,
"loss": 0.1143,
"step": 13175
},
{
"epoch": 2.7488546438983756,
"grad_norm": 0.00706452364102006,
"learning_rate": 1.0721545996697585e-06,
"loss": 0.1482,
"step": 13200
},
{
"epoch": 2.754060807996668,
"grad_norm": 0.16163934767246246,
"learning_rate": 1.0287182054785544e-06,
"loss": 0.1259,
"step": 13225
},
{
"epoch": 2.7592669720949603,
"grad_norm": 0.0032609994523227215,
"learning_rate": 9.861614911486462e-07,
"loss": 0.1311,
"step": 13250
},
{
"epoch": 2.764473136193253,
"grad_norm": 1.620920181274414,
"learning_rate": 9.444860183939669e-07,
"loss": 0.1162,
"step": 13275
},
{
"epoch": 2.7696793002915454,
"grad_norm": 5.577692031860352,
"learning_rate": 9.036933165893019e-07,
"loss": 0.1143,
"step": 13300
},
{
"epoch": 2.7748854643898375,
"grad_norm": 3.6756861209869385,
"learning_rate": 8.637848827141859e-07,
"loss": 0.1157,
"step": 13325
},
{
"epoch": 2.78009162848813,
"grad_norm": 8.014864921569824,
"learning_rate": 8.247621812979639e-07,
"loss": 0.1125,
"step": 13350
},
{
"epoch": 2.785297792586422,
"grad_norm": 0.027799520641565323,
"learning_rate": 7.866266443660397e-07,
"loss": 0.1147,
"step": 13375
},
{
"epoch": 2.7905039566847147,
"grad_norm": 0.20379126071929932,
"learning_rate": 7.493796713873346e-07,
"loss": 0.0776,
"step": 13400
},
{
"epoch": 2.7957101207830073,
"grad_norm": 5.621152400970459,
"learning_rate": 7.130226292229314e-07,
"loss": 0.1281,
"step": 13425
},
{
"epoch": 2.8009162848812994,
"grad_norm": 9.663355827331543,
"learning_rate": 6.775568520758863e-07,
"loss": 0.1409,
"step": 13450
},
{
"epoch": 2.806122448979592,
"grad_norm": 7.826466083526611,
"learning_rate": 6.429836414423212e-07,
"loss": 0.13,
"step": 13475
},
{
"epoch": 2.811328613077884,
"grad_norm": 0.012044396251440048,
"learning_rate": 6.093042660636095e-07,
"loss": 0.2328,
"step": 13500
},
{
"epoch": 2.8165347771761766,
"grad_norm": 2.4445865154266357,
"learning_rate": 5.765199618798456e-07,
"loss": 0.1251,
"step": 13525
},
{
"epoch": 2.821740941274469,
"grad_norm": 4.6207451820373535,
"learning_rate": 5.446319319844794e-07,
"loss": 0.1069,
"step": 13550
},
{
"epoch": 2.8269471053727613,
"grad_norm": 0.5208550691604614,
"learning_rate": 5.13641346580171e-07,
"loss": 0.0791,
"step": 13575
},
{
"epoch": 2.832153269471054,
"grad_norm": 0.03781859204173088,
"learning_rate": 4.835493429358462e-07,
"loss": 0.1112,
"step": 13600
},
{
"epoch": 2.837359433569346,
"grad_norm": 1.6827815771102905,
"learning_rate": 4.5435702534495915e-07,
"loss": 0.1123,
"step": 13625
},
{
"epoch": 2.8425655976676385,
"grad_norm": 2.481886863708496,
"learning_rate": 4.2606546508497103e-07,
"loss": 0.1228,
"step": 13650
},
{
"epoch": 2.847771761765931,
"grad_norm": 7.118264198303223,
"learning_rate": 3.9867570037803725e-07,
"loss": 0.1172,
"step": 13675
},
{
"epoch": 2.852977925864223,
"grad_norm": 8.65257453918457,
"learning_rate": 3.7218873635290195e-07,
"loss": 0.111,
"step": 13700
},
{
"epoch": 2.8581840899625157,
"grad_norm": 0.8659380674362183,
"learning_rate": 3.466055450080191e-07,
"loss": 0.0999,
"step": 13725
},
{
"epoch": 2.863390254060808,
"grad_norm": 5.575057506561279,
"learning_rate": 3.219270651758782e-07,
"loss": 0.1201,
"step": 13750
},
{
"epoch": 2.8685964181591004,
"grad_norm": 4.377348899841309,
"learning_rate": 2.9815420248855977e-07,
"loss": 0.1469,
"step": 13775
},
{
"epoch": 2.873802582257393,
"grad_norm": 3.4709956645965576,
"learning_rate": 2.7528782934449215e-07,
"loss": 0.1214,
"step": 13800
},
{
"epoch": 2.879008746355685,
"grad_norm": 0.008634321391582489,
"learning_rate": 2.533287848764332e-07,
"loss": 0.1104,
"step": 13825
},
{
"epoch": 2.8842149104539776,
"grad_norm": 5.85574197769165,
"learning_rate": 2.3227787492069718e-07,
"loss": 0.1284,
"step": 13850
},
{
"epoch": 2.8894210745522697,
"grad_norm": 2.434326648712158,
"learning_rate": 2.1213587198755936e-07,
"loss": 0.0968,
"step": 13875
},
{
"epoch": 2.8946272386505623,
"grad_norm": 3.5275282859802246,
"learning_rate": 1.929035152329145e-07,
"loss": 0.1454,
"step": 13900
},
{
"epoch": 2.899833402748855,
"grad_norm": 3.965625762939453,
"learning_rate": 1.7458151043116544e-07,
"loss": 0.1299,
"step": 13925
},
{
"epoch": 2.905039566847147,
"grad_norm": 0.10151717066764832,
"learning_rate": 1.5717052994929927e-07,
"loss": 0.1459,
"step": 13950
},
{
"epoch": 2.9102457309454395,
"grad_norm": 0.03408972918987274,
"learning_rate": 1.4067121272223204e-07,
"loss": 0.11,
"step": 13975
},
{
"epoch": 2.9154518950437316,
"grad_norm": 0.0022567359264940023,
"learning_rate": 1.2508416422935542e-07,
"loss": 0.1083,
"step": 14000
},
{
"epoch": 2.920658059142024,
"grad_norm": 7.95089054107666,
"learning_rate": 1.1040995647231545e-07,
"loss": 0.1088,
"step": 14025
},
{
"epoch": 2.9258642232403167,
"grad_norm": 12.033370018005371,
"learning_rate": 9.664912795402381e-08,
"loss": 0.1305,
"step": 14050
},
{
"epoch": 2.931070387338609,
"grad_norm": 0.22287705540657043,
"learning_rate": 8.380218365889592e-08,
"loss": 0.1694,
"step": 14075
},
{
"epoch": 2.9362765514369014,
"grad_norm": 0.011814435012638569,
"learning_rate": 7.186959503431845e-08,
"loss": 0.083,
"step": 14100
},
{
"epoch": 2.9414827155351935,
"grad_norm": 0.22026073932647705,
"learning_rate": 6.08517999733521e-08,
"loss": 0.0816,
"step": 14125
},
{
"epoch": 2.946688879633486,
"grad_norm": 6.017068386077881,
"learning_rate": 5.0749202798652806e-08,
"loss": 0.1286,
"step": 14150
},
{
"epoch": 2.9518950437317786,
"grad_norm": 9.714811325073242,
"learning_rate": 4.156217424765296e-08,
"loss": 0.1439,
"step": 14175
},
{
"epoch": 2.9571012078300707,
"grad_norm": 0.37943577766418457,
"learning_rate": 3.3291051458927966e-08,
"loss": 0.0811,
"step": 14200
},
{
"epoch": 2.9623073719283632,
"grad_norm": 0.007510739378631115,
"learning_rate": 2.5936137959856054e-08,
"loss": 0.1679,
"step": 14225
},
{
"epoch": 2.9675135360266554,
"grad_norm": 4.679372310638428,
"learning_rate": 1.9497703655455e-08,
"loss": 0.0982,
"step": 14250
},
{
"epoch": 2.972719700124948,
"grad_norm": 5.683784484863281,
"learning_rate": 1.39759848184845e-08,
"loss": 0.1231,
"step": 14275
},
{
"epoch": 2.9779258642232405,
"grad_norm": 13.094088554382324,
"learning_rate": 9.37118408078641e-09,
"loss": 0.118,
"step": 14300
},
{
"epoch": 2.9831320283215326,
"grad_norm": 7.190179347991943,
"learning_rate": 5.683470425832394e-09,
"loss": 0.1788,
"step": 14325
},
{
"epoch": 2.988338192419825,
"grad_norm": 8.636795997619629,
"learning_rate": 2.9129791825344145e-09,
"loss": 0.1482,
"step": 14350
},
{
"epoch": 2.9935443565181172,
"grad_norm": 5.166715145111084,
"learning_rate": 1.0598120202681695e-09,
"loss": 0.106,
"step": 14375
},
{
"epoch": 2.99875052061641,
"grad_norm": 6.336511611938477,
"learning_rate": 1.2403694515661368e-10,
"loss": 0.1234,
"step": 14400
},
{
"epoch": 3.0,
"eval_accuracy": 0.9105245346869713,
"eval_f1_macro": 0.8981344155061894,
"eval_f1_micro": 0.9105245346869713,
"eval_f1_weighted": 0.9032382559745838,
"eval_loss": 0.2098342627286911,
"eval_precision_macro": 0.9192533404916904,
"eval_precision_micro": 0.9105245346869713,
"eval_precision_weighted": 0.920957800948379,
"eval_recall_macro": 0.9038812615955473,
"eval_recall_micro": 0.9105245346869713,
"eval_recall_weighted": 0.9105245346869713,
"eval_runtime": 16.1158,
"eval_samples_per_second": 916.801,
"eval_steps_per_second": 57.335,
"step": 14406
}
],
"logging_steps": 25,
"max_steps": 14406,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3853101049724160.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}