{ "best_metric": 0.2098342627286911, "best_model_checkpoint": "date2format-v2/checkpoint-14406", "epoch": 3.0, "eval_steps": 500, "global_step": 14406, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005206164098292378, "grad_norm": 0.16246353089809418, "learning_rate": 8.327550312283136e-07, "loss": 0.1697, "step": 25 }, { "epoch": 0.010412328196584756, "grad_norm": 1.2059249877929688, "learning_rate": 1.7002081887578072e-06, "loss": 0.214, "step": 50 }, { "epoch": 0.015618492294877134, "grad_norm": 0.9199744462966919, "learning_rate": 2.5676613462873006e-06, "loss": 0.1357, "step": 75 }, { "epoch": 0.02082465639316951, "grad_norm": 0.04551107436418533, "learning_rate": 3.4351145038167944e-06, "loss": 0.0982, "step": 100 }, { "epoch": 0.02603082049146189, "grad_norm": 1.7336697578430176, "learning_rate": 4.2678695350451074e-06, "loss": 0.1863, "step": 125 }, { "epoch": 0.03123698458975427, "grad_norm": 1.3021111488342285, "learning_rate": 5.135322692574601e-06, "loss": 0.1166, "step": 150 }, { "epoch": 0.03644314868804665, "grad_norm": 6.950756549835205, "learning_rate": 6.002775850104095e-06, "loss": 0.2385, "step": 175 }, { "epoch": 0.04164931278633902, "grad_norm": 0.139039546251297, "learning_rate": 6.870229007633589e-06, "loss": 0.1147, "step": 200 }, { "epoch": 0.0468554768846314, "grad_norm": 6.5412492752075195, "learning_rate": 7.737682165163083e-06, "loss": 0.1682, "step": 225 }, { "epoch": 0.05206164098292378, "grad_norm": 1.130511999130249, "learning_rate": 8.605135322692574e-06, "loss": 0.2311, "step": 250 }, { "epoch": 0.05726780508121616, "grad_norm": 14.400012969970703, "learning_rate": 9.472588480222069e-06, "loss": 0.1815, "step": 275 }, { "epoch": 0.06247396917950854, "grad_norm": 0.6946442723274231, "learning_rate": 1.0340041637751562e-05, "loss": 0.1107, "step": 300 }, { "epoch": 0.06768013327780091, "grad_norm": 9.926340103149414, "learning_rate": 1.1207494795281055e-05, "loss": 0.1139, "step": 325 }, { "epoch": 0.0728862973760933, "grad_norm": 1.6435810327529907, "learning_rate": 1.207494795281055e-05, "loss": 0.1471, "step": 350 }, { "epoch": 0.07809246147438567, "grad_norm": 1.6755973100662231, "learning_rate": 1.294240111034004e-05, "loss": 0.2039, "step": 375 }, { "epoch": 0.08329862557267805, "grad_norm": 4.045156002044678, "learning_rate": 1.3809854267869535e-05, "loss": 0.2085, "step": 400 }, { "epoch": 0.08850478967097043, "grad_norm": 0.2624989449977875, "learning_rate": 1.4677307425399028e-05, "loss": 0.182, "step": 425 }, { "epoch": 0.0937109537692628, "grad_norm": 9.571054458618164, "learning_rate": 1.5544760582928523e-05, "loss": 0.1874, "step": 450 }, { "epoch": 0.09891711786755518, "grad_norm": 0.22022797167301178, "learning_rate": 1.6412213740458016e-05, "loss": 0.1219, "step": 475 }, { "epoch": 0.10412328196584757, "grad_norm": 9.949447631835938, "learning_rate": 1.727966689798751e-05, "loss": 0.2049, "step": 500 }, { "epoch": 0.10932944606413994, "grad_norm": 5.697494983673096, "learning_rate": 1.8147120055517e-05, "loss": 0.1151, "step": 525 }, { "epoch": 0.11453561016243231, "grad_norm": 11.763550758361816, "learning_rate": 1.9014573213046498e-05, "loss": 0.1546, "step": 550 }, { "epoch": 0.1197417742607247, "grad_norm": 0.10468779504299164, "learning_rate": 1.988202637057599e-05, "loss": 0.2386, "step": 575 }, { "epoch": 0.12494793835901707, "grad_norm": 1.6756809949874878, "learning_rate": 2.0749479528105484e-05, "loss": 0.2487, "step": 600 }, { "epoch": 0.13015410245730946, "grad_norm": 0.19562338292598724, "learning_rate": 2.1616932685634977e-05, "loss": 0.2314, "step": 625 }, { "epoch": 0.13536026655560182, "grad_norm": 8.64395809173584, "learning_rate": 2.248438584316447e-05, "loss": 0.113, "step": 650 }, { "epoch": 0.1405664306538942, "grad_norm": 0.2188766896724701, "learning_rate": 2.3351839000693966e-05, "loss": 0.1685, "step": 675 }, { "epoch": 0.1457725947521866, "grad_norm": 0.1437786966562271, "learning_rate": 2.421929215822346e-05, "loss": 0.2467, "step": 700 }, { "epoch": 0.15097875885047896, "grad_norm": 8.023161888122559, "learning_rate": 2.5086745315752952e-05, "loss": 0.2007, "step": 725 }, { "epoch": 0.15618492294877134, "grad_norm": 1.2265409231185913, "learning_rate": 2.5954198473282442e-05, "loss": 0.1716, "step": 750 }, { "epoch": 0.16139108704706373, "grad_norm": 5.027709484100342, "learning_rate": 2.6821651630811938e-05, "loss": 0.2271, "step": 775 }, { "epoch": 0.1665972511453561, "grad_norm": 0.11683953553438187, "learning_rate": 2.768910478834143e-05, "loss": 0.2198, "step": 800 }, { "epoch": 0.17180341524364848, "grad_norm": 0.0886659324169159, "learning_rate": 2.8556557945870927e-05, "loss": 0.2351, "step": 825 }, { "epoch": 0.17700957934194086, "grad_norm": 10.608562469482422, "learning_rate": 2.9424011103400417e-05, "loss": 0.295, "step": 850 }, { "epoch": 0.18221574344023322, "grad_norm": 9.556928634643555, "learning_rate": 3.0291464260929913e-05, "loss": 0.1789, "step": 875 }, { "epoch": 0.1874219075385256, "grad_norm": 9.168340682983398, "learning_rate": 3.1158917418459406e-05, "loss": 0.2548, "step": 900 }, { "epoch": 0.192628071636818, "grad_norm": 6.174153804779053, "learning_rate": 3.2026370575988896e-05, "loss": 0.257, "step": 925 }, { "epoch": 0.19783423573511036, "grad_norm": 13.663798332214355, "learning_rate": 3.289382373351839e-05, "loss": 0.1947, "step": 950 }, { "epoch": 0.20304039983340275, "grad_norm": 11.622993469238281, "learning_rate": 3.376127689104788e-05, "loss": 0.2765, "step": 975 }, { "epoch": 0.20824656393169513, "grad_norm": 9.067373275756836, "learning_rate": 3.462873004857738e-05, "loss": 0.2063, "step": 1000 }, { "epoch": 0.2134527280299875, "grad_norm": 0.04181263968348503, "learning_rate": 3.549618320610687e-05, "loss": 0.3131, "step": 1025 }, { "epoch": 0.21865889212827988, "grad_norm": 1.1748377084732056, "learning_rate": 3.6363636363636364e-05, "loss": 0.2632, "step": 1050 }, { "epoch": 0.22386505622657227, "grad_norm": 18.87828826904297, "learning_rate": 3.723108952116586e-05, "loss": 0.1721, "step": 1075 }, { "epoch": 0.22907122032486463, "grad_norm": 0.07279066741466522, "learning_rate": 3.809854267869536e-05, "loss": 0.155, "step": 1100 }, { "epoch": 0.23427738442315701, "grad_norm": 0.6264330148696899, "learning_rate": 3.8965995836224846e-05, "loss": 0.2785, "step": 1125 }, { "epoch": 0.2394835485214494, "grad_norm": 0.07900076359510422, "learning_rate": 3.983344899375434e-05, "loss": 0.1977, "step": 1150 }, { "epoch": 0.24468971261974176, "grad_norm": 0.14294634759426117, "learning_rate": 4.070090215128383e-05, "loss": 0.2297, "step": 1175 }, { "epoch": 0.24989587671803415, "grad_norm": 12.401119232177734, "learning_rate": 4.153365718251215e-05, "loss": 0.3428, "step": 1200 }, { "epoch": 0.25510204081632654, "grad_norm": 11.514626502990723, "learning_rate": 4.240111034004164e-05, "loss": 0.2381, "step": 1225 }, { "epoch": 0.2603082049146189, "grad_norm": 0.33966752886772156, "learning_rate": 4.3268563497571134e-05, "loss": 0.2446, "step": 1250 }, { "epoch": 0.2655143690129113, "grad_norm": 13.058968544006348, "learning_rate": 4.4136016655100624e-05, "loss": 0.2878, "step": 1275 }, { "epoch": 0.27072053311120364, "grad_norm": 11.9553861618042, "learning_rate": 4.500346981263012e-05, "loss": 0.2735, "step": 1300 }, { "epoch": 0.27592669720949603, "grad_norm": 0.3588428497314453, "learning_rate": 4.5870922970159617e-05, "loss": 0.2163, "step": 1325 }, { "epoch": 0.2811328613077884, "grad_norm": 23.0421142578125, "learning_rate": 4.673837612768911e-05, "loss": 0.2516, "step": 1350 }, { "epoch": 0.2863390254060808, "grad_norm": 0.3783215880393982, "learning_rate": 4.757113115891742e-05, "loss": 0.3394, "step": 1375 }, { "epoch": 0.2915451895043732, "grad_norm": 0.13371586799621582, "learning_rate": 4.843858431644692e-05, "loss": 0.2155, "step": 1400 }, { "epoch": 0.2967513536026656, "grad_norm": 15.4042329788208, "learning_rate": 4.930603747397641e-05, "loss": 0.3902, "step": 1425 }, { "epoch": 0.3019575177009579, "grad_norm": 13.482477188110352, "learning_rate": 4.9999981651326544e-05, "loss": 0.3137, "step": 1450 }, { "epoch": 0.3071636817992503, "grad_norm": 2.373297691345215, "learning_rate": 4.9999339450583704e-05, "loss": 0.3525, "step": 1475 }, { "epoch": 0.3123698458975427, "grad_norm": 0.45967090129852295, "learning_rate": 4.999777984310187e-05, "loss": 0.3325, "step": 1500 }, { "epoch": 0.3175760099958351, "grad_norm": 0.21776865422725677, "learning_rate": 4.9995302886114334e-05, "loss": 0.3339, "step": 1525 }, { "epoch": 0.32278217409412746, "grad_norm": 4.673847198486328, "learning_rate": 4.9991908670518586e-05, "loss": 0.2315, "step": 1550 }, { "epoch": 0.32798833819241985, "grad_norm": 2.8383209705352783, "learning_rate": 4.998759732087296e-05, "loss": 0.3757, "step": 1575 }, { "epoch": 0.3331945022907122, "grad_norm": 6.966940402984619, "learning_rate": 4.998236899539209e-05, "loss": 0.2376, "step": 1600 }, { "epoch": 0.33840066638900457, "grad_norm": 14.109524726867676, "learning_rate": 4.997622388594109e-05, "loss": 0.3046, "step": 1625 }, { "epoch": 0.34360683048729695, "grad_norm": 0.9895745515823364, "learning_rate": 4.9969162218028495e-05, "loss": 0.2425, "step": 1650 }, { "epoch": 0.34881299458558934, "grad_norm": 1.0948731899261475, "learning_rate": 4.996118425079804e-05, "loss": 0.3732, "step": 1675 }, { "epoch": 0.35401915868388173, "grad_norm": 10.473814010620117, "learning_rate": 4.9952290277019095e-05, "loss": 0.3086, "step": 1700 }, { "epoch": 0.3592253227821741, "grad_norm": 1.947543740272522, "learning_rate": 4.994248062307593e-05, "loss": 0.3405, "step": 1725 }, { "epoch": 0.36443148688046645, "grad_norm": 7.7466254234313965, "learning_rate": 4.993175564895577e-05, "loss": 0.1969, "step": 1750 }, { "epoch": 0.36963765097875884, "grad_norm": 11.565778732299805, "learning_rate": 4.992011574823555e-05, "loss": 0.3424, "step": 1775 }, { "epoch": 0.3748438150770512, "grad_norm": 2.8535590171813965, "learning_rate": 4.99075613480675e-05, "loss": 0.2885, "step": 1800 }, { "epoch": 0.3800499791753436, "grad_norm": 36.82617950439453, "learning_rate": 4.9894092909163436e-05, "loss": 0.3159, "step": 1825 }, { "epoch": 0.385256143273636, "grad_norm": 0.5960198044776917, "learning_rate": 4.98797109257779e-05, "loss": 0.2981, "step": 1850 }, { "epoch": 0.3904623073719284, "grad_norm": 23.564931869506836, "learning_rate": 4.986441592568994e-05, "loss": 0.2802, "step": 1875 }, { "epoch": 0.3956684714702207, "grad_norm": 4.4088134765625, "learning_rate": 4.984820847018388e-05, "loss": 0.378, "step": 1900 }, { "epoch": 0.4008746355685131, "grad_norm": 13.824947357177734, "learning_rate": 4.983108915402855e-05, "loss": 0.2409, "step": 1925 }, { "epoch": 0.4060807996668055, "grad_norm": 14.21284294128418, "learning_rate": 4.981305860545561e-05, "loss": 0.3306, "step": 1950 }, { "epoch": 0.4112869637650979, "grad_norm": 0.1888038069009781, "learning_rate": 4.9794117486136404e-05, "loss": 0.3458, "step": 1975 }, { "epoch": 0.41649312786339027, "grad_norm": 0.19039294123649597, "learning_rate": 4.977426649115773e-05, "loss": 0.1629, "step": 2000 }, { "epoch": 0.42169929196168265, "grad_norm": 0.42835375666618347, "learning_rate": 4.9753506348996284e-05, "loss": 0.2258, "step": 2025 }, { "epoch": 0.426905456059975, "grad_norm": 4.263399124145508, "learning_rate": 4.973183782149198e-05, "loss": 0.3, "step": 2050 }, { "epoch": 0.4321116201582674, "grad_norm": 0.17805464565753937, "learning_rate": 4.970926170381995e-05, "loss": 0.2086, "step": 2075 }, { "epoch": 0.43731778425655976, "grad_norm": 0.9103575348854065, "learning_rate": 4.968577882446139e-05, "loss": 0.2435, "step": 2100 }, { "epoch": 0.44252394835485215, "grad_norm": 10.369997024536133, "learning_rate": 4.9661390045173126e-05, "loss": 0.2264, "step": 2125 }, { "epoch": 0.44773011245314454, "grad_norm": 2.6744861602783203, "learning_rate": 4.963609626095603e-05, "loss": 0.3012, "step": 2150 }, { "epoch": 0.4529362765514369, "grad_norm": 12.971126556396484, "learning_rate": 4.960989840002216e-05, "loss": 0.2855, "step": 2175 }, { "epoch": 0.45814244064972925, "grad_norm": 5.078539848327637, "learning_rate": 4.958279742376068e-05, "loss": 0.2832, "step": 2200 }, { "epoch": 0.46334860474802164, "grad_norm": 11.258842468261719, "learning_rate": 4.955479432670259e-05, "loss": 0.3402, "step": 2225 }, { "epoch": 0.46855476884631403, "grad_norm": 9.847345352172852, "learning_rate": 4.952589013648425e-05, "loss": 0.1914, "step": 2250 }, { "epoch": 0.4737609329446064, "grad_norm": 0.0392930842936039, "learning_rate": 4.949608591380963e-05, "loss": 0.1341, "step": 2275 }, { "epoch": 0.4789670970428988, "grad_norm": 7.803274154663086, "learning_rate": 4.946538275241144e-05, "loss": 0.3284, "step": 2300 }, { "epoch": 0.4841732611411912, "grad_norm": 0.38506531715393066, "learning_rate": 4.943378177901091e-05, "loss": 0.3496, "step": 2325 }, { "epoch": 0.4893794252394835, "grad_norm": 8.00683879852295, "learning_rate": 4.940128415327654e-05, "loss": 0.33, "step": 2350 }, { "epoch": 0.4945855893377759, "grad_norm": 5.389349460601807, "learning_rate": 4.9367891067781454e-05, "loss": 0.2392, "step": 2375 }, { "epoch": 0.4997917534360683, "grad_norm": 0.18322338163852692, "learning_rate": 4.9333603747959725e-05, "loss": 0.3306, "step": 2400 }, { "epoch": 0.5049979175343606, "grad_norm": 47.38001251220703, "learning_rate": 4.9298423452061306e-05, "loss": 0.3651, "step": 2425 }, { "epoch": 0.5102040816326531, "grad_norm": 1.591893196105957, "learning_rate": 4.926235147110593e-05, "loss": 0.1469, "step": 2450 }, { "epoch": 0.5154102457309454, "grad_norm": 2.414854049682617, "learning_rate": 4.922538912883573e-05, "loss": 0.3108, "step": 2475 }, { "epoch": 0.5206164098292378, "grad_norm": 0.7650768756866455, "learning_rate": 4.91875377816666e-05, "loss": 0.1384, "step": 2500 }, { "epoch": 0.5258225739275302, "grad_norm": 12.073479652404785, "learning_rate": 4.9148798818638486e-05, "loss": 0.3325, "step": 2525 }, { "epoch": 0.5310287380258226, "grad_norm": 1.0136282444000244, "learning_rate": 4.910917366136438e-05, "loss": 0.2324, "step": 2550 }, { "epoch": 0.536234902124115, "grad_norm": 4.9315667152404785, "learning_rate": 4.906866376397816e-05, "loss": 0.3278, "step": 2575 }, { "epoch": 0.5414410662224073, "grad_norm": 18.607717514038086, "learning_rate": 4.902727061308121e-05, "loss": 0.3609, "step": 2600 }, { "epoch": 0.5466472303206997, "grad_norm": 28.146434783935547, "learning_rate": 4.89849957276879e-05, "loss": 0.229, "step": 2625 }, { "epoch": 0.5518533944189921, "grad_norm": 6.487234592437744, "learning_rate": 4.894184065916981e-05, "loss": 0.1822, "step": 2650 }, { "epoch": 0.5570595585172845, "grad_norm": 12.08668327331543, "learning_rate": 4.8897806991198796e-05, "loss": 0.1988, "step": 2675 }, { "epoch": 0.5622657226155768, "grad_norm": 6.793110370635986, "learning_rate": 4.885289633968891e-05, "loss": 0.2108, "step": 2700 }, { "epoch": 0.5674718867138692, "grad_norm": 13.04969596862793, "learning_rate": 4.880711035273709e-05, "loss": 0.2559, "step": 2725 }, { "epoch": 0.5726780508121616, "grad_norm": 0.07108098268508911, "learning_rate": 4.876045071056262e-05, "loss": 0.2236, "step": 2750 }, { "epoch": 0.5778842149104539, "grad_norm": 4.550630569458008, "learning_rate": 4.871291912544558e-05, "loss": 0.1857, "step": 2775 }, { "epoch": 0.5830903790087464, "grad_norm": 3.2425384521484375, "learning_rate": 4.8664517341663886e-05, "loss": 0.1981, "step": 2800 }, { "epoch": 0.5882965431070387, "grad_norm": 1.5111477375030518, "learning_rate": 4.861524713542939e-05, "loss": 0.2145, "step": 2825 }, { "epoch": 0.5935027072053312, "grad_norm": 0.029720915481448174, "learning_rate": 4.856511031482264e-05, "loss": 0.1805, "step": 2850 }, { "epoch": 0.5987088713036235, "grad_norm": 14.783000946044922, "learning_rate": 4.8514108719726516e-05, "loss": 0.2574, "step": 2875 }, { "epoch": 0.6039150354019158, "grad_norm": 0.23782485723495483, "learning_rate": 4.846224422175877e-05, "loss": 0.272, "step": 2900 }, { "epoch": 0.6091211995002083, "grad_norm": 0.6656458377838135, "learning_rate": 4.840951872420327e-05, "loss": 0.1804, "step": 2925 }, { "epoch": 0.6143273635985006, "grad_norm": 7.324411869049072, "learning_rate": 4.835593416194023e-05, "loss": 0.2526, "step": 2950 }, { "epoch": 0.619533527696793, "grad_norm": 17.861886978149414, "learning_rate": 4.8301492501375135e-05, "loss": 0.2352, "step": 2975 }, { "epoch": 0.6247396917950854, "grad_norm": 16.718368530273438, "learning_rate": 4.824619574036663e-05, "loss": 0.394, "step": 3000 }, { "epoch": 0.6299458558933777, "grad_norm": 0.1666000634431839, "learning_rate": 4.819004590815317e-05, "loss": 0.2006, "step": 3025 }, { "epoch": 0.6351520199916701, "grad_norm": 4.644481182098389, "learning_rate": 4.8133045065278584e-05, "loss": 0.2226, "step": 3050 }, { "epoch": 0.6403581840899625, "grad_norm": 23.20795249938965, "learning_rate": 4.807519530351644e-05, "loss": 0.2966, "step": 3075 }, { "epoch": 0.6455643481882549, "grad_norm": 0.05183548480272293, "learning_rate": 4.801649874579327e-05, "loss": 0.1597, "step": 3100 }, { "epoch": 0.6507705122865473, "grad_norm": 8.726778030395508, "learning_rate": 4.79569575461107e-05, "loss": 0.3323, "step": 3125 }, { "epoch": 0.6559766763848397, "grad_norm": 0.2523828446865082, "learning_rate": 4.789657388946637e-05, "loss": 0.1146, "step": 3150 }, { "epoch": 0.661182840483132, "grad_norm": 0.07221566885709763, "learning_rate": 4.7835349991773775e-05, "loss": 0.3426, "step": 3175 }, { "epoch": 0.6663890045814244, "grad_norm": 0.2359621673822403, "learning_rate": 4.7773288099780935e-05, "loss": 0.2444, "step": 3200 }, { "epoch": 0.6715951686797168, "grad_norm": 18.833477020263672, "learning_rate": 4.7710390490987936e-05, "loss": 0.3419, "step": 3225 }, { "epoch": 0.6768013327780091, "grad_norm": 17.171281814575195, "learning_rate": 4.764665947356337e-05, "loss": 0.2313, "step": 3250 }, { "epoch": 0.6820074968763016, "grad_norm": 0.4939661920070648, "learning_rate": 4.758209738625963e-05, "loss": 0.2724, "step": 3275 }, { "epoch": 0.6872136609745939, "grad_norm": 0.8321401476860046, "learning_rate": 4.751670659832708e-05, "loss": 0.1976, "step": 3300 }, { "epoch": 0.6924198250728864, "grad_norm": 1.6382664442062378, "learning_rate": 4.74504895094271e-05, "loss": 0.2768, "step": 3325 }, { "epoch": 0.6976259891711787, "grad_norm": 0.03149043396115303, "learning_rate": 4.738344854954404e-05, "loss": 0.2127, "step": 3350 }, { "epoch": 0.702832153269471, "grad_norm": 0.08274863660335541, "learning_rate": 4.7315586178896035e-05, "loss": 0.2105, "step": 3375 }, { "epoch": 0.7080383173677635, "grad_norm": 2.460942268371582, "learning_rate": 4.724690488784474e-05, "loss": 0.3465, "step": 3400 }, { "epoch": 0.7132444814660558, "grad_norm": 13.02415943145752, "learning_rate": 4.717740719680391e-05, "loss": 0.2309, "step": 3425 }, { "epoch": 0.7184506455643482, "grad_norm": 15.129639625549316, "learning_rate": 4.710709565614694e-05, "loss": 0.1043, "step": 3450 }, { "epoch": 0.7236568096626406, "grad_norm": 0.19271063804626465, "learning_rate": 4.703597284611325e-05, "loss": 0.312, "step": 3475 }, { "epoch": 0.7288629737609329, "grad_norm": 16.245803833007812, "learning_rate": 4.696404137671362e-05, "loss": 0.3068, "step": 3500 }, { "epoch": 0.7340691378592253, "grad_norm": 0.28644460439682007, "learning_rate": 4.689130388763437e-05, "loss": 0.3067, "step": 3525 }, { "epoch": 0.7392753019575177, "grad_norm": 19.579248428344727, "learning_rate": 4.681776304814056e-05, "loss": 0.2592, "step": 3550 }, { "epoch": 0.7444814660558101, "grad_norm": 16.12538719177246, "learning_rate": 4.6743421556977934e-05, "loss": 0.1631, "step": 3575 }, { "epoch": 0.7496876301541024, "grad_norm": 0.7716278433799744, "learning_rate": 4.6668282142274e-05, "loss": 0.2962, "step": 3600 }, { "epoch": 0.7548937942523949, "grad_norm": 6.197509288787842, "learning_rate": 4.659234756143782e-05, "loss": 0.2491, "step": 3625 }, { "epoch": 0.7600999583506872, "grad_norm": 0.5237298011779785, "learning_rate": 4.651562060105886e-05, "loss": 0.2988, "step": 3650 }, { "epoch": 0.7653061224489796, "grad_norm": 6.333150386810303, "learning_rate": 4.643810407680475e-05, "loss": 0.2558, "step": 3675 }, { "epoch": 0.770512286547272, "grad_norm": 0.02860177680850029, "learning_rate": 4.6359800833317915e-05, "loss": 0.1318, "step": 3700 }, { "epoch": 0.7757184506455643, "grad_norm": 0.030804630368947983, "learning_rate": 4.6280713744111215e-05, "loss": 0.2002, "step": 3725 }, { "epoch": 0.7809246147438568, "grad_norm": 10.464898109436035, "learning_rate": 4.620084571146247e-05, "loss": 0.3029, "step": 3750 }, { "epoch": 0.7861307788421491, "grad_norm": 1.6494271755218506, "learning_rate": 4.6120199666307975e-05, "loss": 0.377, "step": 3775 }, { "epoch": 0.7913369429404414, "grad_norm": 10.32836627960205, "learning_rate": 4.603877856813494e-05, "loss": 0.1742, "step": 3800 }, { "epoch": 0.7965431070387339, "grad_norm": 5.073435306549072, "learning_rate": 4.5956585404872884e-05, "loss": 0.1921, "step": 3825 }, { "epoch": 0.8017492711370262, "grad_norm": 11.85318374633789, "learning_rate": 4.587362319278397e-05, "loss": 0.2957, "step": 3850 }, { "epoch": 0.8069554352353187, "grad_norm": 3.998944044113159, "learning_rate": 4.5789894976352344e-05, "loss": 0.2102, "step": 3875 }, { "epoch": 0.812161599333611, "grad_norm": 0.569960355758667, "learning_rate": 4.570540382817239e-05, "loss": 0.245, "step": 3900 }, { "epoch": 0.8173677634319034, "grad_norm": 5.945977687835693, "learning_rate": 4.562015284883597e-05, "loss": 0.2113, "step": 3925 }, { "epoch": 0.8225739275301958, "grad_norm": 10.898477554321289, "learning_rate": 4.553414516681868e-05, "loss": 0.2446, "step": 3950 }, { "epoch": 0.8277800916284881, "grad_norm": 0.25490328669548035, "learning_rate": 4.5447383938364974e-05, "loss": 0.301, "step": 3975 }, { "epoch": 0.8329862557267805, "grad_norm": 4.246921539306641, "learning_rate": 4.535987234737242e-05, "loss": 0.2944, "step": 4000 }, { "epoch": 0.8381924198250729, "grad_norm": 6.019783973693848, "learning_rate": 4.527161360527481e-05, "loss": 0.3184, "step": 4025 }, { "epoch": 0.8433985839233653, "grad_norm": 0.0902986004948616, "learning_rate": 4.51826109509243e-05, "loss": 0.1508, "step": 4050 }, { "epoch": 0.8486047480216576, "grad_norm": 7.770606517791748, "learning_rate": 4.509286765047258e-05, "loss": 0.4018, "step": 4075 }, { "epoch": 0.85381091211995, "grad_norm": 0.20602120459079742, "learning_rate": 4.5002386997251025e-05, "loss": 0.3172, "step": 4100 }, { "epoch": 0.8590170762182424, "grad_norm": 9.447431564331055, "learning_rate": 4.491117231164978e-05, "loss": 0.2055, "step": 4125 }, { "epoch": 0.8642232403165347, "grad_norm": 4.260950088500977, "learning_rate": 4.481922694099602e-05, "loss": 0.1652, "step": 4150 }, { "epoch": 0.8694294044148272, "grad_norm": 8.093120574951172, "learning_rate": 4.4726554259430966e-05, "loss": 0.1701, "step": 4175 }, { "epoch": 0.8746355685131195, "grad_norm": 9.310951232910156, "learning_rate": 4.4633157667786194e-05, "loss": 0.3233, "step": 4200 }, { "epoch": 0.879841732611412, "grad_norm": 13.830717086791992, "learning_rate": 4.453904059345877e-05, "loss": 0.2724, "step": 4225 }, { "epoch": 0.8850478967097043, "grad_norm": 7.378146648406982, "learning_rate": 4.4444206490285465e-05, "loss": 0.3583, "step": 4250 }, { "epoch": 0.8902540608079966, "grad_norm": 0.06943502277135849, "learning_rate": 4.4352494400727496e-05, "loss": 0.3111, "step": 4275 }, { "epoch": 0.8954602249062891, "grad_norm": 5.857403755187988, "learning_rate": 4.425626504054255e-05, "loss": 0.3536, "step": 4300 }, { "epoch": 0.9006663890045814, "grad_norm": 12.8510160446167, "learning_rate": 4.4159329028593835e-05, "loss": 0.3129, "step": 4325 }, { "epoch": 0.9058725531028738, "grad_norm": 0.7335798740386963, "learning_rate": 4.406168992216536e-05, "loss": 0.2536, "step": 4350 }, { "epoch": 0.9110787172011662, "grad_norm": 0.06461632251739502, "learning_rate": 4.396335130434277e-05, "loss": 0.1635, "step": 4375 }, { "epoch": 0.9162848812994585, "grad_norm": 0.6194918751716614, "learning_rate": 4.386431678388183e-05, "loss": 0.2273, "step": 4400 }, { "epoch": 0.921491045397751, "grad_norm": 0.06244231015443802, "learning_rate": 4.3764589995076045e-05, "loss": 0.2393, "step": 4425 }, { "epoch": 0.9266972094960433, "grad_norm": 4.408567428588867, "learning_rate": 4.3664174597623225e-05, "loss": 0.1189, "step": 4450 }, { "epoch": 0.9319033735943357, "grad_norm": 0.7255458235740662, "learning_rate": 4.3563074276491224e-05, "loss": 0.2784, "step": 4475 }, { "epoch": 0.9371095376926281, "grad_norm": 0.09919915348291397, "learning_rate": 4.346129274178271e-05, "loss": 0.1761, "step": 4500 }, { "epoch": 0.9423157017909205, "grad_norm": 13.636004447937012, "learning_rate": 4.335883372859901e-05, "loss": 0.3252, "step": 4525 }, { "epoch": 0.9475218658892128, "grad_norm": 0.1237548440694809, "learning_rate": 4.3255700996903027e-05, "loss": 0.1931, "step": 4550 }, { "epoch": 0.9527280299875052, "grad_norm": 0.06882551312446594, "learning_rate": 4.315189833138129e-05, "loss": 0.256, "step": 4575 }, { "epoch": 0.9579341940857976, "grad_norm": 0.21859368681907654, "learning_rate": 4.304742954130504e-05, "loss": 0.2087, "step": 4600 }, { "epoch": 0.9631403581840899, "grad_norm": 4.428884983062744, "learning_rate": 4.294229846039045e-05, "loss": 0.1962, "step": 4625 }, { "epoch": 0.9683465222823824, "grad_norm": 6.920009613037109, "learning_rate": 4.2836508946657944e-05, "loss": 0.2301, "step": 4650 }, { "epoch": 0.9735526863806747, "grad_norm": 3.4040772914886475, "learning_rate": 4.273006488229061e-05, "loss": 0.287, "step": 4675 }, { "epoch": 0.978758850478967, "grad_norm": 7.0576043128967285, "learning_rate": 4.2622970173491734e-05, "loss": 0.1524, "step": 4700 }, { "epoch": 0.9839650145772595, "grad_norm": 0.026531610637903214, "learning_rate": 4.2515228750341466e-05, "loss": 0.1662, "step": 4725 }, { "epoch": 0.9891711786755518, "grad_norm": 1.0448293685913086, "learning_rate": 4.240684456665257e-05, "loss": 0.2114, "step": 4750 }, { "epoch": 0.9943773427738443, "grad_norm": 44.79957962036133, "learning_rate": 4.229782159982536e-05, "loss": 0.2157, "step": 4775 }, { "epoch": 0.9995835068721366, "grad_norm": 10.862527847290039, "learning_rate": 4.2188163850701734e-05, "loss": 0.2925, "step": 4800 }, { "epoch": 1.0, "eval_accuracy": 0.892994923857868, "eval_f1_macro": 0.8730666664599849, "eval_f1_micro": 0.892994923857868, "eval_f1_weighted": 0.8798157289522093, "eval_loss": 0.34922197461128235, "eval_precision_macro": 0.9003919914273503, "eval_precision_micro": 0.892994923857868, "eval_precision_weighted": 0.9045471108195786, "eval_recall_macro": 0.8862461348175634, "eval_recall_micro": 0.892994923857868, "eval_recall_weighted": 0.892994923857868, "eval_runtime": 16.3696, "eval_samples_per_second": 902.587, "eval_steps_per_second": 56.446, "step": 4802 }, { "epoch": 1.004789670970429, "grad_norm": 10.292591094970703, "learning_rate": 4.2077875343418325e-05, "loss": 0.2158, "step": 4825 }, { "epoch": 1.0099958350687213, "grad_norm": 5.3238325119018555, "learning_rate": 4.1966960125258884e-05, "loss": 0.2987, "step": 4850 }, { "epoch": 1.0152019991670138, "grad_norm": 0.32381194829940796, "learning_rate": 4.1855422266505675e-05, "loss": 0.3283, "step": 4875 }, { "epoch": 1.0204081632653061, "grad_norm": 7.971839904785156, "learning_rate": 4.174326586029018e-05, "loss": 0.19, "step": 4900 }, { "epoch": 1.0256143273635985, "grad_norm": 0.252726286649704, "learning_rate": 4.1630495022442884e-05, "loss": 0.3692, "step": 4925 }, { "epoch": 1.0308204914618908, "grad_norm": 3.9147284030914307, "learning_rate": 4.1517113891342174e-05, "loss": 0.2321, "step": 4950 }, { "epoch": 1.0360266555601834, "grad_norm": 3.525928020477295, "learning_rate": 4.1403126627762546e-05, "loss": 0.1547, "step": 4975 }, { "epoch": 1.0412328196584757, "grad_norm": 0.9435555934906006, "learning_rate": 4.128853741472187e-05, "loss": 0.1764, "step": 5000 }, { "epoch": 1.046438983756768, "grad_norm": 9.357694625854492, "learning_rate": 4.117335045732791e-05, "loss": 0.2553, "step": 5025 }, { "epoch": 1.0516451478550604, "grad_norm": 0.10351333767175674, "learning_rate": 4.105756998262399e-05, "loss": 0.128, "step": 5050 }, { "epoch": 1.0568513119533527, "grad_norm": 3.0234265327453613, "learning_rate": 4.094120023943387e-05, "loss": 0.2764, "step": 5075 }, { "epoch": 1.0620574760516452, "grad_norm": 2.055335283279419, "learning_rate": 4.082424549820586e-05, "loss": 0.1752, "step": 5100 }, { "epoch": 1.0672636401499376, "grad_norm": 9.345911026000977, "learning_rate": 4.070671005085607e-05, "loss": 0.2165, "step": 5125 }, { "epoch": 1.07246980424823, "grad_norm": 0.36394619941711426, "learning_rate": 4.058859821061092e-05, "loss": 0.1157, "step": 5150 }, { "epoch": 1.0776759683465222, "grad_norm": 8.331086158752441, "learning_rate": 4.046991431184887e-05, "loss": 0.1545, "step": 5175 }, { "epoch": 1.0828821324448146, "grad_norm": 9.395284652709961, "learning_rate": 4.0350662709941325e-05, "loss": 0.2645, "step": 5200 }, { "epoch": 1.0880882965431071, "grad_norm": 6.326489448547363, "learning_rate": 4.023084778109283e-05, "loss": 0.2223, "step": 5225 }, { "epoch": 1.0932944606413995, "grad_norm": 11.211806297302246, "learning_rate": 4.0110473922180526e-05, "loss": 0.2022, "step": 5250 }, { "epoch": 1.0985006247396918, "grad_norm": 0.9454079270362854, "learning_rate": 3.998954555059266e-05, "loss": 0.2831, "step": 5275 }, { "epoch": 1.1037067888379841, "grad_norm": 2.5448479652404785, "learning_rate": 3.9868067104066646e-05, "loss": 0.1692, "step": 5300 }, { "epoch": 1.1089129529362765, "grad_norm": 2.1148276329040527, "learning_rate": 3.9746043040526074e-05, "loss": 0.3686, "step": 5325 }, { "epoch": 1.114119117034569, "grad_norm": 18.591083526611328, "learning_rate": 3.9623477837917167e-05, "loss": 0.2465, "step": 5350 }, { "epoch": 1.1193252811328613, "grad_norm": 1.9231871366500854, "learning_rate": 3.950037599404451e-05, "loss": 0.2383, "step": 5375 }, { "epoch": 1.1245314452311537, "grad_norm": 13.078465461730957, "learning_rate": 3.93767420264059e-05, "loss": 0.2129, "step": 5400 }, { "epoch": 1.129737609329446, "grad_norm": 0.015137123875319958, "learning_rate": 3.9252580472026616e-05, "loss": 0.123, "step": 5425 }, { "epoch": 1.1349437734277386, "grad_norm": 1.9116227626800537, "learning_rate": 3.912789588729292e-05, "loss": 0.2135, "step": 5450 }, { "epoch": 1.1401499375260309, "grad_norm": 0.04986105486750603, "learning_rate": 3.9002692847784824e-05, "loss": 0.3574, "step": 5475 }, { "epoch": 1.1453561016243232, "grad_norm": 8.818171501159668, "learning_rate": 3.8876975948108236e-05, "loss": 0.1246, "step": 5500 }, { "epoch": 1.1505622657226156, "grad_norm": 0.3172764182090759, "learning_rate": 3.8750749801726275e-05, "loss": 0.182, "step": 5525 }, { "epoch": 1.1557684298209079, "grad_norm": 9.364998817443848, "learning_rate": 3.862401904079004e-05, "loss": 0.2951, "step": 5550 }, { "epoch": 1.1609745939192004, "grad_norm": 7.908264636993408, "learning_rate": 3.849678831596855e-05, "loss": 0.1521, "step": 5575 }, { "epoch": 1.1661807580174928, "grad_norm": 0.3125484585762024, "learning_rate": 3.8369062296278174e-05, "loss": 0.2956, "step": 5600 }, { "epoch": 1.171386922115785, "grad_norm": 0.6337696313858032, "learning_rate": 3.824084566891118e-05, "loss": 0.2355, "step": 5625 }, { "epoch": 1.1765930862140774, "grad_norm": 0.07673631608486176, "learning_rate": 3.8112143139063824e-05, "loss": 0.2098, "step": 5650 }, { "epoch": 1.1817992503123698, "grad_norm": 0.02253473550081253, "learning_rate": 3.798295942976362e-05, "loss": 0.1149, "step": 5675 }, { "epoch": 1.1870054144106623, "grad_norm": 6.865312099456787, "learning_rate": 3.7853299281696056e-05, "loss": 0.3913, "step": 5700 }, { "epoch": 1.1922115785089547, "grad_norm": 6.194192409515381, "learning_rate": 3.7723167453030586e-05, "loss": 0.1924, "step": 5725 }, { "epoch": 1.197417742607247, "grad_norm": 0.12381599843502045, "learning_rate": 3.759256871924604e-05, "loss": 0.3331, "step": 5750 }, { "epoch": 1.2026239067055393, "grad_norm": 0.02437855489552021, "learning_rate": 3.74615078729554e-05, "loss": 0.1819, "step": 5775 }, { "epoch": 1.2078300708038316, "grad_norm": 14.187339782714844, "learning_rate": 3.732998972372991e-05, "loss": 0.2303, "step": 5800 }, { "epoch": 1.2130362349021242, "grad_norm": 16.453699111938477, "learning_rate": 3.719801909792251e-05, "loss": 0.3025, "step": 5825 }, { "epoch": 1.2182423990004165, "grad_norm": 1.748321771621704, "learning_rate": 3.706560083849083e-05, "loss": 0.2085, "step": 5850 }, { "epoch": 1.2234485630987089, "grad_norm": 0.6774524450302124, "learning_rate": 3.693273980481942e-05, "loss": 0.2587, "step": 5875 }, { "epoch": 1.2286547271970012, "grad_norm": 12.350029945373535, "learning_rate": 3.679944087254141e-05, "loss": 0.1885, "step": 5900 }, { "epoch": 1.2338608912952935, "grad_norm": 6.396539688110352, "learning_rate": 3.6665708933359576e-05, "loss": 0.2404, "step": 5925 }, { "epoch": 1.239067055393586, "grad_norm": 5.529823303222656, "learning_rate": 3.6531548894866896e-05, "loss": 0.2086, "step": 5950 }, { "epoch": 1.2442732194918784, "grad_norm": 4.696390628814697, "learning_rate": 3.639696568036639e-05, "loss": 0.1309, "step": 5975 }, { "epoch": 1.2494793835901707, "grad_norm": 0.10419386625289917, "learning_rate": 3.6261964228690454e-05, "loss": 0.1567, "step": 6000 }, { "epoch": 1.254685547688463, "grad_norm": 0.19173868000507355, "learning_rate": 3.612654949401967e-05, "loss": 0.1632, "step": 6025 }, { "epoch": 1.2598917117867554, "grad_norm": 10.412970542907715, "learning_rate": 3.599072644570092e-05, "loss": 0.1771, "step": 6050 }, { "epoch": 1.265097875885048, "grad_norm": 13.899092674255371, "learning_rate": 3.585450006806512e-05, "loss": 0.0964, "step": 6075 }, { "epoch": 1.2703040399833403, "grad_norm": 11.265216827392578, "learning_rate": 3.5717875360244214e-05, "loss": 0.3711, "step": 6100 }, { "epoch": 1.2755102040816326, "grad_norm": 5.9480977058410645, "learning_rate": 3.5580857335987774e-05, "loss": 0.2482, "step": 6125 }, { "epoch": 1.280716368179925, "grad_norm": 7.014443874359131, "learning_rate": 3.544345102347902e-05, "loss": 0.258, "step": 6150 }, { "epoch": 1.2859225322782173, "grad_norm": 0.06656356900930405, "learning_rate": 3.530566146515028e-05, "loss": 0.1432, "step": 6175 }, { "epoch": 1.2911286963765098, "grad_norm": 9.881967544555664, "learning_rate": 3.5167493717497905e-05, "loss": 0.127, "step": 6200 }, { "epoch": 1.2963348604748022, "grad_norm": 7.715837478637695, "learning_rate": 3.502895285089679e-05, "loss": 0.229, "step": 6225 }, { "epoch": 1.3015410245730945, "grad_norm": 0.07192976772785187, "learning_rate": 3.489004394941425e-05, "loss": 0.1111, "step": 6250 }, { "epoch": 1.3067471886713868, "grad_norm": 1.0921132564544678, "learning_rate": 3.475077211062346e-05, "loss": 0.1642, "step": 6275 }, { "epoch": 1.3119533527696792, "grad_norm": 0.2106587141752243, "learning_rate": 3.461114244541641e-05, "loss": 0.2393, "step": 6300 }, { "epoch": 1.3171595168679717, "grad_norm": 5.716639518737793, "learning_rate": 3.4471160077816314e-05, "loss": 0.2525, "step": 6325 }, { "epoch": 1.322365680966264, "grad_norm": 0.30353063344955444, "learning_rate": 3.43308301447896e-05, "loss": 0.2426, "step": 6350 }, { "epoch": 1.3275718450645564, "grad_norm": 0.039772335439920425, "learning_rate": 3.419015779605737e-05, "loss": 0.2302, "step": 6375 }, { "epoch": 1.332778009162849, "grad_norm": 0.7781673073768616, "learning_rate": 3.404914819390646e-05, "loss": 0.2583, "step": 6400 }, { "epoch": 1.3379841732611413, "grad_norm": 24.190811157226562, "learning_rate": 3.3907806512999966e-05, "loss": 0.1873, "step": 6425 }, { "epoch": 1.3431903373594336, "grad_norm": 9.922744750976562, "learning_rate": 3.3766137940187364e-05, "loss": 0.2441, "step": 6450 }, { "epoch": 1.348396501457726, "grad_norm": 7.696400165557861, "learning_rate": 3.362414767431414e-05, "loss": 0.1051, "step": 6475 }, { "epoch": 1.3536026655560183, "grad_norm": 5.509871482849121, "learning_rate": 3.348184092603105e-05, "loss": 0.1288, "step": 6500 }, { "epoch": 1.3588088296543108, "grad_norm": 12.904980659484863, "learning_rate": 3.333922291760286e-05, "loss": 0.2988, "step": 6525 }, { "epoch": 1.3640149937526032, "grad_norm": 0.11204788088798523, "learning_rate": 3.319629888271675e-05, "loss": 0.1962, "step": 6550 }, { "epoch": 1.3692211578508955, "grad_norm": 0.16563156247138977, "learning_rate": 3.3053074066290214e-05, "loss": 0.2317, "step": 6575 }, { "epoch": 1.3744273219491878, "grad_norm": 10.849634170532227, "learning_rate": 3.290955372427858e-05, "loss": 0.1759, "step": 6600 }, { "epoch": 1.3796334860474802, "grad_norm": 13.899421691894531, "learning_rate": 3.276574312348218e-05, "loss": 0.3169, "step": 6625 }, { "epoch": 1.3848396501457727, "grad_norm": 0.03900701552629471, "learning_rate": 3.2621647541353015e-05, "loss": 0.2785, "step": 6650 }, { "epoch": 1.390045814244065, "grad_norm": 4.0548095703125, "learning_rate": 3.2477272265801126e-05, "loss": 0.2163, "step": 6675 }, { "epoch": 1.3952519783423574, "grad_norm": 0.026262715458869934, "learning_rate": 3.233841378366721e-05, "loss": 0.2331, "step": 6700 }, { "epoch": 1.4004581424406497, "grad_norm": 1.4128097295761108, "learning_rate": 3.2193505687298915e-05, "loss": 0.138, "step": 6725 }, { "epoch": 1.405664306538942, "grad_norm": 0.03084075264632702, "learning_rate": 3.2048333609131926e-05, "loss": 0.2318, "step": 6750 }, { "epoch": 1.4108704706372346, "grad_norm": 0.08986690640449524, "learning_rate": 3.19029028765807e-05, "loss": 0.1265, "step": 6775 }, { "epoch": 1.416076634735527, "grad_norm": 0.33206573128700256, "learning_rate": 3.1757218826551634e-05, "loss": 0.0584, "step": 6800 }, { "epoch": 1.4212827988338192, "grad_norm": 0.31883516907691956, "learning_rate": 3.161128680524714e-05, "loss": 0.1517, "step": 6825 }, { "epoch": 1.4264889629321116, "grad_norm": 0.03625530004501343, "learning_rate": 3.1465112167969504e-05, "loss": 0.1541, "step": 6850 }, { "epoch": 1.431695127030404, "grad_norm": 9.222389221191406, "learning_rate": 3.1318700278924374e-05, "loss": 0.267, "step": 6875 }, { "epoch": 1.4369012911286965, "grad_norm": 9.740432739257812, "learning_rate": 3.11779266463367e-05, "loss": 0.2008, "step": 6900 }, { "epoch": 1.4421074552269888, "grad_norm": 0.007975614629685879, "learning_rate": 3.1031065337471356e-05, "loss": 0.1723, "step": 6925 }, { "epoch": 1.4473136193252811, "grad_norm": 9.000720977783203, "learning_rate": 3.088398270515885e-05, "loss": 0.1206, "step": 6950 }, { "epoch": 1.4525197834235735, "grad_norm": 9.622346878051758, "learning_rate": 3.0736684146925715e-05, "loss": 0.1296, "step": 6975 }, { "epoch": 1.4577259475218658, "grad_norm": 0.9583789706230164, "learning_rate": 3.0589175068222385e-05, "loss": 0.144, "step": 7000 }, { "epoch": 1.4629321116201583, "grad_norm": 37.292327880859375, "learning_rate": 3.044146088222479e-05, "loss": 0.253, "step": 7025 }, { "epoch": 1.4681382757184507, "grad_norm": 6.009835243225098, "learning_rate": 3.0293547009635758e-05, "loss": 0.1883, "step": 7050 }, { "epoch": 1.473344439816743, "grad_norm": 0.2210305631160736, "learning_rate": 3.0145438878486033e-05, "loss": 0.2264, "step": 7075 }, { "epoch": 1.4785506039150353, "grad_norm": 6.8800764083862305, "learning_rate": 2.9997141923935136e-05, "loss": 0.2641, "step": 7100 }, { "epoch": 1.4837567680133277, "grad_norm": 0.2739255130290985, "learning_rate": 2.9848661588071876e-05, "loss": 0.2043, "step": 7125 }, { "epoch": 1.4889629321116202, "grad_norm": 0.016436373814940453, "learning_rate": 2.9700003319714648e-05, "loss": 0.1275, "step": 7150 }, { "epoch": 1.4941690962099126, "grad_norm": 3.340723752975464, "learning_rate": 2.9551172574211478e-05, "loss": 0.3281, "step": 7175 }, { "epoch": 1.499375260308205, "grad_norm": 4.6393256187438965, "learning_rate": 2.9402174813239836e-05, "loss": 0.1756, "step": 7200 }, { "epoch": 1.5045814244064974, "grad_norm": 6.234640121459961, "learning_rate": 2.9253015504606197e-05, "loss": 0.2412, "step": 7225 }, { "epoch": 1.5097875885047896, "grad_norm": 2.9910178184509277, "learning_rate": 2.910370012204537e-05, "loss": 0.2625, "step": 7250 }, { "epoch": 1.5149937526030821, "grad_norm": 0.030267061665654182, "learning_rate": 2.8954234145019672e-05, "loss": 0.1825, "step": 7275 }, { "epoch": 1.5201999167013744, "grad_norm": 12.349358558654785, "learning_rate": 2.8804623058517805e-05, "loss": 0.2111, "step": 7300 }, { "epoch": 1.5254060807996668, "grad_norm": 0.03321736305952072, "learning_rate": 2.86548723528536e-05, "loss": 0.1627, "step": 7325 }, { "epoch": 1.5306122448979593, "grad_norm": 12.457853317260742, "learning_rate": 2.8504987523464515e-05, "loss": 0.2239, "step": 7350 }, { "epoch": 1.5358184089962514, "grad_norm": 0.0661047101020813, "learning_rate": 2.8354974070709983e-05, "loss": 0.2431, "step": 7375 }, { "epoch": 1.541024573094544, "grad_norm": 0.02619466558098793, "learning_rate": 2.820483749966955e-05, "loss": 0.1084, "step": 7400 }, { "epoch": 1.5462307371928363, "grad_norm": 3.5517141819000244, "learning_rate": 2.8054583319940896e-05, "loss": 0.116, "step": 7425 }, { "epoch": 1.5514369012911287, "grad_norm": 8.054045677185059, "learning_rate": 2.7904217045437564e-05, "loss": 0.2523, "step": 7450 }, { "epoch": 1.5566430653894212, "grad_norm": 5.730096340179443, "learning_rate": 2.775374419418671e-05, "loss": 0.1687, "step": 7475 }, { "epoch": 1.5618492294877133, "grad_norm": 0.11246989667415619, "learning_rate": 2.7603170288126546e-05, "loss": 0.1266, "step": 7500 }, { "epoch": 1.5670553935860059, "grad_norm": 7.816544532775879, "learning_rate": 2.74525008529037e-05, "loss": 0.2546, "step": 7525 }, { "epoch": 1.5722615576842982, "grad_norm": 0.06883124262094498, "learning_rate": 2.7301741417670485e-05, "loss": 0.1984, "step": 7550 }, { "epoch": 1.5774677217825905, "grad_norm": 3.755141019821167, "learning_rate": 2.715089751488195e-05, "loss": 0.1695, "step": 7575 }, { "epoch": 1.582673885880883, "grad_norm": 0.14307986199855804, "learning_rate": 2.6999974680092882e-05, "loss": 0.1959, "step": 7600 }, { "epoch": 1.5878800499791752, "grad_norm": 16.175771713256836, "learning_rate": 2.684897845175463e-05, "loss": 0.21, "step": 7625 }, { "epoch": 1.5930862140774678, "grad_norm": 0.39594829082489014, "learning_rate": 2.6697914371011912e-05, "loss": 0.1769, "step": 7650 }, { "epoch": 1.59829237817576, "grad_norm": 11.465653419494629, "learning_rate": 2.654678798149942e-05, "loss": 0.1581, "step": 7675 }, { "epoch": 1.6034985422740524, "grad_norm": 0.006868015043437481, "learning_rate": 2.639560482913843e-05, "loss": 0.2084, "step": 7700 }, { "epoch": 1.608704706372345, "grad_norm": 23.506370544433594, "learning_rate": 2.6244370461933226e-05, "loss": 0.1521, "step": 7725 }, { "epoch": 1.613910870470637, "grad_norm": 7.843533039093018, "learning_rate": 2.609309042976757e-05, "loss": 0.161, "step": 7750 }, { "epoch": 1.6191170345689296, "grad_norm": 0.6957140564918518, "learning_rate": 2.5941770284200968e-05, "loss": 0.1201, "step": 7775 }, { "epoch": 1.624323198667222, "grad_norm": 0.09775109589099884, "learning_rate": 2.5790415578265e-05, "loss": 0.0975, "step": 7800 }, { "epoch": 1.6295293627655143, "grad_norm": 7.59830379486084, "learning_rate": 2.5639031866259512e-05, "loss": 0.1495, "step": 7825 }, { "epoch": 1.6347355268638069, "grad_norm": 6.013265132904053, "learning_rate": 2.5487624703548783e-05, "loss": 0.2432, "step": 7850 }, { "epoch": 1.639941690962099, "grad_norm": 0.011226486414670944, "learning_rate": 2.5336199646357693e-05, "loss": 0.1438, "step": 7875 }, { "epoch": 1.6451478550603915, "grad_norm": 12.745257377624512, "learning_rate": 2.518476225156776e-05, "loss": 0.1853, "step": 7900 }, { "epoch": 1.6503540191586838, "grad_norm": 0.028018401935696602, "learning_rate": 2.5033318076513295e-05, "loss": 0.269, "step": 7925 }, { "epoch": 1.6555601832569762, "grad_norm": 0.29439255595207214, "learning_rate": 2.4881872678777408e-05, "loss": 0.2584, "step": 7950 }, { "epoch": 1.6607663473552687, "grad_norm": 7.931988716125488, "learning_rate": 2.473043161598808e-05, "loss": 0.2546, "step": 7975 }, { "epoch": 1.665972511453561, "grad_norm": 0.24659812450408936, "learning_rate": 2.4579000445614214e-05, "loss": 0.0941, "step": 8000 }, { "epoch": 1.6711786755518534, "grad_norm": 0.13519421219825745, "learning_rate": 2.4427584724761687e-05, "loss": 0.1369, "step": 8025 }, { "epoch": 1.6763848396501457, "grad_norm": 2.32975435256958, "learning_rate": 2.4276190009969428e-05, "loss": 0.0967, "step": 8050 }, { "epoch": 1.681591003748438, "grad_norm": 10.135041236877441, "learning_rate": 2.412482185700548e-05, "loss": 0.2521, "step": 8075 }, { "epoch": 1.6867971678467306, "grad_norm": 5.711327075958252, "learning_rate": 2.397348582066317e-05, "loss": 0.2155, "step": 8100 }, { "epoch": 1.692003331945023, "grad_norm": 5.974716663360596, "learning_rate": 2.382218745455721e-05, "loss": 0.1574, "step": 8125 }, { "epoch": 1.6972094960433153, "grad_norm": 0.01547964382916689, "learning_rate": 2.3670932310919928e-05, "loss": 0.1252, "step": 8150 }, { "epoch": 1.7024156601416076, "grad_norm": 0.8667640089988708, "learning_rate": 2.3519725940397516e-05, "loss": 0.1717, "step": 8175 }, { "epoch": 1.7076218242399, "grad_norm": 0.2800462245941162, "learning_rate": 2.3368573891846307e-05, "loss": 0.1726, "step": 8200 }, { "epoch": 1.7128279883381925, "grad_norm": 0.2914607524871826, "learning_rate": 2.321748171212919e-05, "loss": 0.1686, "step": 8225 }, { "epoch": 1.7180341524364848, "grad_norm": 5.2183051109313965, "learning_rate": 2.3066454945912003e-05, "loss": 0.2877, "step": 8250 }, { "epoch": 1.7232403165347772, "grad_norm": 5.748877048492432, "learning_rate": 2.2915499135460123e-05, "loss": 0.157, "step": 8275 }, { "epoch": 1.7284464806330697, "grad_norm": 9.520060539245605, "learning_rate": 2.276461982043503e-05, "loss": 0.2562, "step": 8300 }, { "epoch": 1.7336526447313618, "grad_norm": 0.03092977963387966, "learning_rate": 2.2613822537691016e-05, "loss": 0.1034, "step": 8325 }, { "epoch": 1.7388588088296544, "grad_norm": 0.1688009798526764, "learning_rate": 2.2463112821072063e-05, "loss": 0.1484, "step": 8350 }, { "epoch": 1.7440649729279467, "grad_norm": 0.5068601369857788, "learning_rate": 2.2312496201208654e-05, "loss": 0.1381, "step": 8375 }, { "epoch": 1.749271137026239, "grad_norm": 0.58343505859375, "learning_rate": 2.2161978205314934e-05, "loss": 0.2426, "step": 8400 }, { "epoch": 1.7544773011245316, "grad_norm": 5.165789604187012, "learning_rate": 2.20115643569858e-05, "loss": 0.2173, "step": 8425 }, { "epoch": 1.7596834652228237, "grad_norm": 0.6824327111244202, "learning_rate": 2.1861260175994203e-05, "loss": 0.1443, "step": 8450 }, { "epoch": 1.7648896293211163, "grad_norm": 0.2100004255771637, "learning_rate": 2.1711071178088633e-05, "loss": 0.1741, "step": 8475 }, { "epoch": 1.7700957934194086, "grad_norm": 0.46001136302948, "learning_rate": 2.1561002874790662e-05, "loss": 0.2116, "step": 8500 }, { "epoch": 1.775301957517701, "grad_norm": 7.581360816955566, "learning_rate": 2.1411060773192704e-05, "loss": 0.1379, "step": 8525 }, { "epoch": 1.7805081216159935, "grad_norm": 7.0736846923828125, "learning_rate": 2.126125037575594e-05, "loss": 0.1403, "step": 8550 }, { "epoch": 1.7857142857142856, "grad_norm": 0.7845435738563538, "learning_rate": 2.1111577180108343e-05, "loss": 0.1406, "step": 8575 }, { "epoch": 1.7909204498125781, "grad_norm": 0.2352413535118103, "learning_rate": 2.0962046678842974e-05, "loss": 0.1425, "step": 8600 }, { "epoch": 1.7961266139108705, "grad_norm": 0.2534882426261902, "learning_rate": 2.08126643593164e-05, "loss": 0.2827, "step": 8625 }, { "epoch": 1.8013327780091628, "grad_norm": 0.015448580496013165, "learning_rate": 2.066343570344733e-05, "loss": 0.2161, "step": 8650 }, { "epoch": 1.8065389421074554, "grad_norm": 11.641529083251953, "learning_rate": 2.0514366187515444e-05, "loss": 0.141, "step": 8675 }, { "epoch": 1.8117451062057475, "grad_norm": 8.481783866882324, "learning_rate": 2.036546128196041e-05, "loss": 0.1693, "step": 8700 }, { "epoch": 1.81695127030404, "grad_norm": 0.046340491622686386, "learning_rate": 2.021672645118118e-05, "loss": 0.1685, "step": 8725 }, { "epoch": 1.8221574344023324, "grad_norm": 0.02451934851706028, "learning_rate": 2.0068167153335388e-05, "loss": 0.2014, "step": 8750 }, { "epoch": 1.8273635985006247, "grad_norm": 0.008840017020702362, "learning_rate": 1.991978884013914e-05, "loss": 0.139, "step": 8775 }, { "epoch": 1.8325697625989172, "grad_norm": 0.03715880587697029, "learning_rate": 1.9771596956666876e-05, "loss": 0.1835, "step": 8800 }, { "epoch": 1.8377759266972093, "grad_norm": 0.052263110876083374, "learning_rate": 1.9623596941151584e-05, "loss": 0.1522, "step": 8825 }, { "epoch": 1.842982090795502, "grad_norm": 0.14420367777347565, "learning_rate": 1.9475794224785242e-05, "loss": 0.1649, "step": 8850 }, { "epoch": 1.8481882548937942, "grad_norm": 5.102858066558838, "learning_rate": 1.9328194231519464e-05, "loss": 0.1966, "step": 8875 }, { "epoch": 1.8533944189920866, "grad_norm": 2.9605753421783447, "learning_rate": 1.918080237786651e-05, "loss": 0.1057, "step": 8900 }, { "epoch": 1.8586005830903791, "grad_norm": 0.06543917208909988, "learning_rate": 1.9033624072700505e-05, "loss": 0.2098, "step": 8925 }, { "epoch": 1.8638067471886712, "grad_norm": 4.984257221221924, "learning_rate": 1.8886664717058894e-05, "loss": 0.2006, "step": 8950 }, { "epoch": 1.8690129112869638, "grad_norm": 0.3344804048538208, "learning_rate": 1.8739929703944314e-05, "loss": 0.1492, "step": 8975 }, { "epoch": 1.8742190753852561, "grad_norm": 6.675142765045166, "learning_rate": 1.859342441812662e-05, "loss": 0.1568, "step": 9000 }, { "epoch": 1.8794252394835484, "grad_norm": 13.291411399841309, "learning_rate": 1.8447154235945336e-05, "loss": 0.2015, "step": 9025 }, { "epoch": 1.884631403581841, "grad_norm": 0.030354047194123268, "learning_rate": 1.8301124525112325e-05, "loss": 0.1201, "step": 9050 }, { "epoch": 1.8898375676801331, "grad_norm": 11.661486625671387, "learning_rate": 1.8155340644514798e-05, "loss": 0.1744, "step": 9075 }, { "epoch": 1.8950437317784257, "grad_norm": 9.253254890441895, "learning_rate": 1.8009807944018715e-05, "loss": 0.1305, "step": 9100 }, { "epoch": 1.900249895876718, "grad_norm": 0.049124184995889664, "learning_rate": 1.7864531764272356e-05, "loss": 0.225, "step": 9125 }, { "epoch": 1.9054560599750103, "grad_norm": 10.553680419921875, "learning_rate": 1.7719517436510462e-05, "loss": 0.213, "step": 9150 }, { "epoch": 1.9106622240733029, "grad_norm": 0.042677950114011765, "learning_rate": 1.7574770282358505e-05, "loss": 0.1058, "step": 9175 }, { "epoch": 1.9158683881715952, "grad_norm": 0.8776458501815796, "learning_rate": 1.7430295613637404e-05, "loss": 0.1907, "step": 9200 }, { "epoch": 1.9210745522698875, "grad_norm": 9.908956527709961, "learning_rate": 1.7286098732168644e-05, "loss": 0.1863, "step": 9225 }, { "epoch": 1.9262807163681799, "grad_norm": 1.294067144393921, "learning_rate": 1.7142184929579662e-05, "loss": 0.1053, "step": 9250 }, { "epoch": 1.9314868804664722, "grad_norm": 5.95350980758667, "learning_rate": 1.6998559487109693e-05, "loss": 0.1541, "step": 9275 }, { "epoch": 1.9366930445647648, "grad_norm": 7.54666805267334, "learning_rate": 1.6855227675415966e-05, "loss": 0.1696, "step": 9300 }, { "epoch": 1.941899208663057, "grad_norm": 0.0091070756316185, "learning_rate": 1.671219475438024e-05, "loss": 0.212, "step": 9325 }, { "epoch": 1.9471053727613494, "grad_norm": 0.05602679401636124, "learning_rate": 1.656946597291584e-05, "loss": 0.133, "step": 9350 }, { "epoch": 1.9523115368596418, "grad_norm": 0.04069928824901581, "learning_rate": 1.6427046568774977e-05, "loss": 0.1599, "step": 9375 }, { "epoch": 1.957517700957934, "grad_norm": 0.21093979477882385, "learning_rate": 1.628494176835661e-05, "loss": 0.1432, "step": 9400 }, { "epoch": 1.9627238650562266, "grad_norm": 6.424279689788818, "learning_rate": 1.614315678651457e-05, "loss": 0.1286, "step": 9425 }, { "epoch": 1.967930029154519, "grad_norm": 8.281681060791016, "learning_rate": 1.6001696826366247e-05, "loss": 0.0896, "step": 9450 }, { "epoch": 1.9731361932528113, "grad_norm": 3.2924630641937256, "learning_rate": 1.5860567079101657e-05, "loss": 0.2068, "step": 9475 }, { "epoch": 1.9783423573511039, "grad_norm": 0.10305780172348022, "learning_rate": 1.5719772723792857e-05, "loss": 0.133, "step": 9500 }, { "epoch": 1.983548521449396, "grad_norm": 0.013987602666020393, "learning_rate": 1.5579318927204003e-05, "loss": 0.1602, "step": 9525 }, { "epoch": 1.9887546855476885, "grad_norm": 8.741622924804688, "learning_rate": 1.5439210843601647e-05, "loss": 0.1248, "step": 9550 }, { "epoch": 1.9939608496459809, "grad_norm": 7.777089595794678, "learning_rate": 1.5299453614565646e-05, "loss": 0.1656, "step": 9575 }, { "epoch": 1.9991670137442732, "grad_norm": 7.620024681091309, "learning_rate": 1.5160052368800466e-05, "loss": 0.1867, "step": 9600 }, { "epoch": 2.0, "eval_accuracy": 0.9082910321489002, "eval_f1_macro": 0.8934973914545552, "eval_f1_micro": 0.9082910321489002, "eval_f1_weighted": 0.8990272847487176, "eval_loss": 0.23952987790107727, "eval_precision_macro": 0.9212318034319655, "eval_precision_micro": 0.9082910321489002, "eval_precision_weighted": 0.9229994939426459, "eval_recall_macro": 0.9014322820037105, "eval_recall_micro": 0.9082910321489002, "eval_recall_weighted": 0.9082910321489002, "eval_runtime": 16.0892, "eval_samples_per_second": 918.319, "eval_steps_per_second": 57.43, "step": 9604 }, { "epoch": 2.0043731778425657, "grad_norm": 0.036709289997816086, "learning_rate": 1.5021012221946956e-05, "loss": 0.1042, "step": 9625 }, { "epoch": 2.009579341940858, "grad_norm": 0.03700362890958786, "learning_rate": 1.4882338276394644e-05, "loss": 0.1846, "step": 9650 }, { "epoch": 2.0147855060391504, "grad_norm": 0.2605753540992737, "learning_rate": 1.4744035621094468e-05, "loss": 0.1531, "step": 9675 }, { "epoch": 2.0199916701374425, "grad_norm": 1.4101078510284424, "learning_rate": 1.460610933137206e-05, "loss": 0.1186, "step": 9700 }, { "epoch": 2.025197834235735, "grad_norm": 7.530198097229004, "learning_rate": 1.4468564468741464e-05, "loss": 0.1534, "step": 9725 }, { "epoch": 2.0304039983340276, "grad_norm": 0.06903871148824692, "learning_rate": 1.433140608071939e-05, "loss": 0.2258, "step": 9750 }, { "epoch": 2.0356101624323197, "grad_norm": 0.9064328670501709, "learning_rate": 1.4194639200640023e-05, "loss": 0.1681, "step": 9775 }, { "epoch": 2.0408163265306123, "grad_norm": 0.04811558499932289, "learning_rate": 1.4058268847470241e-05, "loss": 0.1343, "step": 9800 }, { "epoch": 2.0460224906289044, "grad_norm": 5.909759998321533, "learning_rate": 1.3922300025625539e-05, "loss": 0.0999, "step": 9825 }, { "epoch": 2.051228654727197, "grad_norm": 1.272133708000183, "learning_rate": 1.37867377247863e-05, "loss": 0.1376, "step": 9850 }, { "epoch": 2.0564348188254895, "grad_norm": 3.806577682495117, "learning_rate": 1.3651586919714671e-05, "loss": 0.158, "step": 9875 }, { "epoch": 2.0616409829237816, "grad_norm": 1.572864294052124, "learning_rate": 1.3516852570072086e-05, "loss": 0.1046, "step": 9900 }, { "epoch": 2.066847147022074, "grad_norm": 0.07213819772005081, "learning_rate": 1.33825396202372e-05, "loss": 0.142, "step": 9925 }, { "epoch": 2.0720533111203667, "grad_norm": 6.35291862487793, "learning_rate": 1.324865299912445e-05, "loss": 0.1243, "step": 9950 }, { "epoch": 2.077259475218659, "grad_norm": 11.946340560913086, "learning_rate": 1.31151976200032e-05, "loss": 0.1512, "step": 9975 }, { "epoch": 2.0824656393169514, "grad_norm": 0.20692221820354462, "learning_rate": 1.2982178380317416e-05, "loss": 0.137, "step": 10000 }, { "epoch": 2.0876718034152435, "grad_norm": 0.24739764630794525, "learning_rate": 1.2849600161505957e-05, "loss": 0.1535, "step": 10025 }, { "epoch": 2.092877967513536, "grad_norm": 0.6494514346122742, "learning_rate": 1.2717467828823409e-05, "loss": 0.1838, "step": 10050 }, { "epoch": 2.0980841316118286, "grad_norm": 13.001143455505371, "learning_rate": 1.25857862311616e-05, "loss": 0.1973, "step": 10075 }, { "epoch": 2.1032902957101207, "grad_norm": 2.27883243560791, "learning_rate": 1.2454560200871612e-05, "loss": 0.2116, "step": 10100 }, { "epoch": 2.1084964598084133, "grad_norm": 14.335226058959961, "learning_rate": 1.2323794553586462e-05, "loss": 0.1645, "step": 10125 }, { "epoch": 2.1137026239067054, "grad_norm": 0.5073531866073608, "learning_rate": 1.2193494088044399e-05, "loss": 0.2067, "step": 10150 }, { "epoch": 2.118908788004998, "grad_norm": 10.126875877380371, "learning_rate": 1.206366358591274e-05, "loss": 0.2034, "step": 10175 }, { "epoch": 2.1241149521032905, "grad_norm": 0.4860388934612274, "learning_rate": 1.1934307811612502e-05, "loss": 0.1298, "step": 10200 }, { "epoch": 2.1293211162015826, "grad_norm": 11.252408981323242, "learning_rate": 1.1805431512143473e-05, "loss": 0.2134, "step": 10225 }, { "epoch": 2.134527280299875, "grad_norm": 0.6547976732254028, "learning_rate": 1.1677039416910018e-05, "loss": 0.2518, "step": 10250 }, { "epoch": 2.1397334443981673, "grad_norm": 0.02918720245361328, "learning_rate": 1.1549136237547577e-05, "loss": 0.1141, "step": 10275 }, { "epoch": 2.14493960849646, "grad_norm": 2.0306875705718994, "learning_rate": 1.1421726667749714e-05, "loss": 0.2479, "step": 10300 }, { "epoch": 2.1501457725947524, "grad_norm": 3.7051842212677, "learning_rate": 1.1294815383095877e-05, "loss": 0.1666, "step": 10325 }, { "epoch": 2.1553519366930445, "grad_norm": 7.603741645812988, "learning_rate": 1.1168407040879842e-05, "loss": 0.1395, "step": 10350 }, { "epoch": 2.160558100791337, "grad_norm": 0.0051424442790448666, "learning_rate": 1.1042506279938771e-05, "loss": 0.1033, "step": 10375 }, { "epoch": 2.165764264889629, "grad_norm": 0.43630650639533997, "learning_rate": 1.0917117720483014e-05, "loss": 0.061, "step": 10400 }, { "epoch": 2.1709704289879217, "grad_norm": 0.06688734143972397, "learning_rate": 1.0792245963926504e-05, "loss": 0.1547, "step": 10425 }, { "epoch": 2.1761765930862143, "grad_norm": 0.005384028889238834, "learning_rate": 1.0667895592717969e-05, "loss": 0.1822, "step": 10450 }, { "epoch": 2.1813827571845064, "grad_norm": 6.822201251983643, "learning_rate": 1.0544071170172723e-05, "loss": 0.0977, "step": 10475 }, { "epoch": 2.186588921282799, "grad_norm": 0.17409928143024445, "learning_rate": 1.0420777240305213e-05, "loss": 0.1472, "step": 10500 }, { "epoch": 2.191795085381091, "grad_norm": 4.65806245803833, "learning_rate": 1.0298018327662284e-05, "loss": 0.1499, "step": 10525 }, { "epoch": 2.1970012494793836, "grad_norm": 0.050723303109407425, "learning_rate": 1.0175798937157085e-05, "loss": 0.1228, "step": 10550 }, { "epoch": 2.202207413577676, "grad_norm": 7.599793434143066, "learning_rate": 1.0054123553903849e-05, "loss": 0.1122, "step": 10575 }, { "epoch": 2.2074135776759682, "grad_norm": 0.2461252212524414, "learning_rate": 9.93299664305322e-06, "loss": 0.0847, "step": 10600 }, { "epoch": 2.212619741774261, "grad_norm": 0.014030307531356812, "learning_rate": 9.812422649628388e-06, "loss": 0.1286, "step": 10625 }, { "epoch": 2.217825905872553, "grad_norm": 0.010962835513055325, "learning_rate": 9.692405998362045e-06, "loss": 0.0591, "step": 10650 }, { "epoch": 2.2230320699708455, "grad_norm": 0.009474786929786205, "learning_rate": 9.572951093533946e-06, "loss": 0.1037, "step": 10675 }, { "epoch": 2.228238234069138, "grad_norm": 0.02288031578063965, "learning_rate": 9.454062318809313e-06, "loss": 0.1174, "step": 10700 }, { "epoch": 2.23344439816743, "grad_norm": 10.316886901855469, "learning_rate": 9.335744037077918e-06, "loss": 0.1019, "step": 10725 }, { "epoch": 2.2386505622657227, "grad_norm": 3.191359758377075, "learning_rate": 9.218000590294072e-06, "loss": 0.0714, "step": 10750 }, { "epoch": 2.243856726364015, "grad_norm": 0.9904782176017761, "learning_rate": 9.100836299317201e-06, "loss": 0.1085, "step": 10775 }, { "epoch": 2.2490628904623073, "grad_norm": 0.00978647731244564, "learning_rate": 8.9842554637533e-06, "loss": 0.1005, "step": 10800 }, { "epoch": 2.2542690545606, "grad_norm": 0.5314901471138, "learning_rate": 8.868262361797181e-06, "loss": 0.1389, "step": 10825 }, { "epoch": 2.259475218658892, "grad_norm": 2.777710437774658, "learning_rate": 8.75286125007545e-06, "loss": 0.1211, "step": 10850 }, { "epoch": 2.2646813827571846, "grad_norm": 9.188045501708984, "learning_rate": 8.638056363490302e-06, "loss": 0.1837, "step": 10875 }, { "epoch": 2.269887546855477, "grad_norm": 8.704696655273438, "learning_rate": 8.523851915064129e-06, "loss": 0.1512, "step": 10900 }, { "epoch": 2.275093710953769, "grad_norm": 11.265883445739746, "learning_rate": 8.410252095784866e-06, "loss": 0.1485, "step": 10925 }, { "epoch": 2.2802998750520618, "grad_norm": 8.006060600280762, "learning_rate": 8.297261074452281e-06, "loss": 0.1927, "step": 10950 }, { "epoch": 2.285506039150354, "grad_norm": 6.185884475708008, "learning_rate": 8.184882997524884e-06, "loss": 0.0961, "step": 10975 }, { "epoch": 2.2907122032486464, "grad_norm": 8.097982406616211, "learning_rate": 8.073121988967849e-06, "loss": 0.1119, "step": 11000 }, { "epoch": 2.295918367346939, "grad_norm": 6.152439117431641, "learning_rate": 7.961982150101643e-06, "loss": 0.116, "step": 11025 }, { "epoch": 2.301124531445231, "grad_norm": 4.259856224060059, "learning_rate": 7.851467559451508e-06, "loss": 0.1936, "step": 11050 }, { "epoch": 2.3063306955435237, "grad_norm": 3.0448789596557617, "learning_rate": 7.741582272597823e-06, "loss": 0.1005, "step": 11075 }, { "epoch": 2.3115368596418158, "grad_norm": 0.018132351338863373, "learning_rate": 7.632330322027212e-06, "loss": 0.1356, "step": 11100 }, { "epoch": 2.3167430237401083, "grad_norm": 12.663254737854004, "learning_rate": 7.523715716984661e-06, "loss": 0.1243, "step": 11125 }, { "epoch": 2.321949187838401, "grad_norm": 0.21571685373783112, "learning_rate": 7.415742443326309e-06, "loss": 0.1938, "step": 11150 }, { "epoch": 2.327155351936693, "grad_norm": 0.023954235017299652, "learning_rate": 7.308414463373189e-06, "loss": 0.1548, "step": 11175 }, { "epoch": 2.3323615160349855, "grad_norm": 0.010295159183442593, "learning_rate": 7.2017357157658585e-06, "loss": 0.145, "step": 11200 }, { "epoch": 2.3375676801332776, "grad_norm": 0.6988667845726013, "learning_rate": 7.095710115319831e-06, "loss": 0.0901, "step": 11225 }, { "epoch": 2.34277384423157, "grad_norm": 4.98328161239624, "learning_rate": 6.99034155288193e-06, "loss": 0.1512, "step": 11250 }, { "epoch": 2.3479800083298628, "grad_norm": 1.8333989381790161, "learning_rate": 6.885633895187493e-06, "loss": 0.152, "step": 11275 }, { "epoch": 2.353186172428155, "grad_norm": 0.01870441809296608, "learning_rate": 6.781590984718461e-06, "loss": 0.2451, "step": 11300 }, { "epoch": 2.3583923365264474, "grad_norm": 0.03275001421570778, "learning_rate": 6.678216639562429e-06, "loss": 0.1564, "step": 11325 }, { "epoch": 2.3635985006247395, "grad_norm": 0.021069686859846115, "learning_rate": 6.5755146532724495e-06, "loss": 0.1644, "step": 11350 }, { "epoch": 2.368804664723032, "grad_norm": 5.0085577964782715, "learning_rate": 6.473488794727878e-06, "loss": 0.1459, "step": 11375 }, { "epoch": 2.3740108288213246, "grad_norm": 8.764829635620117, "learning_rate": 6.372142807996051e-06, "loss": 0.1538, "step": 11400 }, { "epoch": 2.3792169929196167, "grad_norm": 10.779679298400879, "learning_rate": 6.27148041219488e-06, "loss": 0.163, "step": 11425 }, { "epoch": 2.3844231570179093, "grad_norm": 0.08707818388938904, "learning_rate": 6.17150530135639e-06, "loss": 0.1641, "step": 11450 }, { "epoch": 2.3896293211162014, "grad_norm": 0.5683345198631287, "learning_rate": 6.07222114429111e-06, "loss": 0.1532, "step": 11475 }, { "epoch": 2.394835485214494, "grad_norm": 6.6075639724731445, "learning_rate": 5.9736315844535235e-06, "loss": 0.167, "step": 11500 }, { "epoch": 2.4000416493127865, "grad_norm": 0.18501617014408112, "learning_rate": 5.875740239808283e-06, "loss": 0.1036, "step": 11525 }, { "epoch": 2.4052478134110786, "grad_norm": 0.05444851890206337, "learning_rate": 5.778550702697463e-06, "loss": 0.1271, "step": 11550 }, { "epoch": 2.410453977509371, "grad_norm": 4.797399044036865, "learning_rate": 5.682066539708763e-06, "loss": 0.1147, "step": 11575 }, { "epoch": 2.4156601416076633, "grad_norm": 1.1078044176101685, "learning_rate": 5.586291291544585e-06, "loss": 0.1421, "step": 11600 }, { "epoch": 2.420866305705956, "grad_norm": 0.07829868793487549, "learning_rate": 5.491228472892118e-06, "loss": 0.1598, "step": 11625 }, { "epoch": 2.4260724698042484, "grad_norm": 3.3450565338134766, "learning_rate": 5.396881572294363e-06, "loss": 0.1359, "step": 11650 }, { "epoch": 2.4312786339025405, "grad_norm": 0.017295390367507935, "learning_rate": 5.303254052022075e-06, "loss": 0.0646, "step": 11675 }, { "epoch": 2.436484798000833, "grad_norm": 13.109850883483887, "learning_rate": 5.210349347946783e-06, "loss": 0.2058, "step": 11700 }, { "epoch": 2.441690962099125, "grad_norm": 0.01693105697631836, "learning_rate": 5.118170869414618e-06, "loss": 0.1465, "step": 11725 }, { "epoch": 2.4468971261974177, "grad_norm": 0.01354676578193903, "learning_rate": 5.026721999121256e-06, "loss": 0.0848, "step": 11750 }, { "epoch": 2.4521032902957103, "grad_norm": 0.5930526852607727, "learning_rate": 4.9360060929877636e-06, "loss": 0.1185, "step": 11775 }, { "epoch": 2.4573094543940024, "grad_norm": 12.73507308959961, "learning_rate": 4.846026480037444e-06, "loss": 0.1434, "step": 11800 }, { "epoch": 2.462515618492295, "grad_norm": 6.715247631072998, "learning_rate": 4.7567864622736824e-06, "loss": 0.1317, "step": 11825 }, { "epoch": 2.467721782590587, "grad_norm": 0.002648524707183242, "learning_rate": 4.668289314558735e-06, "loss": 0.1621, "step": 11850 }, { "epoch": 2.4729279466888796, "grad_norm": 8.284907341003418, "learning_rate": 4.580538284493616e-06, "loss": 0.1486, "step": 11875 }, { "epoch": 2.478134110787172, "grad_norm": 0.4824686348438263, "learning_rate": 4.493536592298864e-06, "loss": 0.192, "step": 11900 }, { "epoch": 2.4833402748854643, "grad_norm": 6.810537338256836, "learning_rate": 4.407287430696372e-06, "loss": 0.1059, "step": 11925 }, { "epoch": 2.488546438983757, "grad_norm": 5.896641731262207, "learning_rate": 4.321793964792262e-06, "loss": 0.0996, "step": 11950 }, { "epoch": 2.493752603082049, "grad_norm": 10.901542663574219, "learning_rate": 4.237059331960694e-06, "loss": 0.1538, "step": 11975 }, { "epoch": 2.4989587671803415, "grad_norm": 5.155703067779541, "learning_rate": 4.153086641728765e-06, "loss": 0.1304, "step": 12000 }, { "epoch": 2.504164931278634, "grad_norm": 0.00832182727754116, "learning_rate": 4.069878975662358e-06, "loss": 0.1498, "step": 12025 }, { "epoch": 2.509371095376926, "grad_norm": 8.369385719299316, "learning_rate": 3.9874393872531e-06, "loss": 0.2222, "step": 12050 }, { "epoch": 2.5145772594752187, "grad_norm": 3.448890209197998, "learning_rate": 3.905770901806299e-06, "loss": 0.1876, "step": 12075 }, { "epoch": 2.519783423573511, "grad_norm": 1.8173445463180542, "learning_rate": 3.824876516329881e-06, "loss": 0.0964, "step": 12100 }, { "epoch": 2.5249895876718034, "grad_norm": 8.964319229125977, "learning_rate": 3.7447591994244635e-06, "loss": 0.1555, "step": 12125 }, { "epoch": 2.530195751770096, "grad_norm": 1.7448962926864624, "learning_rate": 3.6654218911743892e-06, "loss": 0.1877, "step": 12150 }, { "epoch": 2.535401915868388, "grad_norm": 0.03552517667412758, "learning_rate": 3.5868675030398286e-06, "loss": 0.1345, "step": 12175 }, { "epoch": 2.5406080799666806, "grad_norm": 0.007262797094881535, "learning_rate": 3.509098917749962e-06, "loss": 0.1211, "step": 12200 }, { "epoch": 2.5458142440649727, "grad_norm": 6.443802833557129, "learning_rate": 3.43211898919715e-06, "loss": 0.0961, "step": 12225 }, { "epoch": 2.5510204081632653, "grad_norm": 3.5260281562805176, "learning_rate": 3.3559305423322503e-06, "loss": 0.1721, "step": 12250 }, { "epoch": 2.556226572261558, "grad_norm": 2.5505025386810303, "learning_rate": 3.280536373060919e-06, "loss": 0.1488, "step": 12275 }, { "epoch": 2.56143273635985, "grad_norm": 12.06968879699707, "learning_rate": 3.2059392481410155e-06, "loss": 0.145, "step": 12300 }, { "epoch": 2.5666389004581425, "grad_norm": 0.0032713667023926973, "learning_rate": 3.132141905081076e-06, "loss": 0.1333, "step": 12325 }, { "epoch": 2.5718450645564346, "grad_norm": 10.196958541870117, "learning_rate": 3.0591470520398513e-06, "loss": 0.1732, "step": 12350 }, { "epoch": 2.577051228654727, "grad_norm": 0.015713948756456375, "learning_rate": 2.9869573677269254e-06, "loss": 0.1367, "step": 12375 }, { "epoch": 2.5822573927530197, "grad_norm": 0.21436667442321777, "learning_rate": 2.915575501304396e-06, "loss": 0.235, "step": 12400 }, { "epoch": 2.587463556851312, "grad_norm": 0.0038804244250059128, "learning_rate": 2.845004072289695e-06, "loss": 0.0992, "step": 12425 }, { "epoch": 2.5926697209496044, "grad_norm": 6.616683483123779, "learning_rate": 2.775245670459439e-06, "loss": 0.1481, "step": 12450 }, { "epoch": 2.5978758850478965, "grad_norm": 0.006720269098877907, "learning_rate": 2.7063028557543753e-06, "loss": 0.12, "step": 12475 }, { "epoch": 2.603082049146189, "grad_norm": 1.3217498064041138, "learning_rate": 2.638178158185467e-06, "loss": 0.0997, "step": 12500 }, { "epoch": 2.6082882132444816, "grad_norm": 13.17971420288086, "learning_rate": 2.570874077741034e-06, "loss": 0.2045, "step": 12525 }, { "epoch": 2.6134943773427737, "grad_norm": 15.589527130126953, "learning_rate": 2.504393084295015e-06, "loss": 0.1074, "step": 12550 }, { "epoch": 2.6187005414410662, "grad_norm": 1.773633360862732, "learning_rate": 2.438737617516332e-06, "loss": 0.1416, "step": 12575 }, { "epoch": 2.6239067055393583, "grad_norm": 7.001961708068848, "learning_rate": 2.373910086779338e-06, "loss": 0.1821, "step": 12600 }, { "epoch": 2.629112869637651, "grad_norm": 7.044321060180664, "learning_rate": 2.309912871075445e-06, "loss": 0.1201, "step": 12625 }, { "epoch": 2.6343190337359434, "grad_norm": 8.048465728759766, "learning_rate": 2.246748318925779e-06, "loss": 0.167, "step": 12650 }, { "epoch": 2.639525197834236, "grad_norm": 6.150434494018555, "learning_rate": 2.1844187482950225e-06, "loss": 0.1569, "step": 12675 }, { "epoch": 2.644731361932528, "grad_norm": 1.490867018699646, "learning_rate": 2.122926446506332e-06, "loss": 0.0887, "step": 12700 }, { "epoch": 2.6499375260308202, "grad_norm": 6.9760003089904785, "learning_rate": 2.0622736701574136e-06, "loss": 0.0619, "step": 12725 }, { "epoch": 2.6551436901291128, "grad_norm": 4.531062602996826, "learning_rate": 2.0024626450377144e-06, "loss": 0.1153, "step": 12750 }, { "epoch": 2.6603498542274053, "grad_norm": 0.011586461216211319, "learning_rate": 1.9434955660467184e-06, "loss": 0.1071, "step": 12775 }, { "epoch": 2.665556018325698, "grad_norm": 0.5682029128074646, "learning_rate": 1.885374597113429e-06, "loss": 0.0935, "step": 12800 }, { "epoch": 2.67076218242399, "grad_norm": 0.14094886183738708, "learning_rate": 1.8281018711169522e-06, "loss": 0.0884, "step": 12825 }, { "epoch": 2.6759683465222825, "grad_norm": 0.025353549048304558, "learning_rate": 1.7716794898082034e-06, "loss": 0.1083, "step": 12850 }, { "epoch": 2.6811745106205747, "grad_norm": 10.48271656036377, "learning_rate": 1.716109523732809e-06, "loss": 0.1437, "step": 12875 }, { "epoch": 2.686380674718867, "grad_norm": 3.716670513153076, "learning_rate": 1.6613940121551014e-06, "loss": 0.177, "step": 12900 }, { "epoch": 2.6915868388171598, "grad_norm": 2.8707427978515625, "learning_rate": 1.6075349629832954e-06, "loss": 0.1107, "step": 12925 }, { "epoch": 2.696793002915452, "grad_norm": 3.083693742752075, "learning_rate": 1.554534352695808e-06, "loss": 0.1544, "step": 12950 }, { "epoch": 2.7019991670137444, "grad_norm": 0.012157919816672802, "learning_rate": 1.5023941262686997e-06, "loss": 0.1065, "step": 12975 }, { "epoch": 2.7072053311120365, "grad_norm": 6.145320415496826, "learning_rate": 1.4511161971043351e-06, "loss": 0.0982, "step": 13000 }, { "epoch": 2.712411495210329, "grad_norm": 0.1649736911058426, "learning_rate": 1.4007024469611441e-06, "loss": 0.1541, "step": 13025 }, { "epoch": 2.7176176593086216, "grad_norm": 0.16660048067569733, "learning_rate": 1.3511547258845763e-06, "loss": 0.0752, "step": 13050 }, { "epoch": 2.7228238234069138, "grad_norm": 5.491020679473877, "learning_rate": 1.3024748521392017e-06, "loss": 0.0893, "step": 13075 }, { "epoch": 2.7280299875052063, "grad_norm": 6.0252790451049805, "learning_rate": 1.2546646121419868e-06, "loss": 0.1347, "step": 13100 }, { "epoch": 2.7332361516034984, "grad_norm": 2.7115368843078613, "learning_rate": 1.2077257603967523e-06, "loss": 0.1261, "step": 13125 }, { "epoch": 2.738442315701791, "grad_norm": 14.104537963867188, "learning_rate": 1.1616600194297555e-06, "loss": 0.1585, "step": 13150 }, { "epoch": 2.7436484798000835, "grad_norm": 0.006517982110381126, "learning_rate": 1.1164690797265148e-06, "loss": 0.1143, "step": 13175 }, { "epoch": 2.7488546438983756, "grad_norm": 0.00706452364102006, "learning_rate": 1.0721545996697585e-06, "loss": 0.1482, "step": 13200 }, { "epoch": 2.754060807996668, "grad_norm": 0.16163934767246246, "learning_rate": 1.0287182054785544e-06, "loss": 0.1259, "step": 13225 }, { "epoch": 2.7592669720949603, "grad_norm": 0.0032609994523227215, "learning_rate": 9.861614911486462e-07, "loss": 0.1311, "step": 13250 }, { "epoch": 2.764473136193253, "grad_norm": 1.620920181274414, "learning_rate": 9.444860183939669e-07, "loss": 0.1162, "step": 13275 }, { "epoch": 2.7696793002915454, "grad_norm": 5.577692031860352, "learning_rate": 9.036933165893019e-07, "loss": 0.1143, "step": 13300 }, { "epoch": 2.7748854643898375, "grad_norm": 3.6756861209869385, "learning_rate": 8.637848827141859e-07, "loss": 0.1157, "step": 13325 }, { "epoch": 2.78009162848813, "grad_norm": 8.014864921569824, "learning_rate": 8.247621812979639e-07, "loss": 0.1125, "step": 13350 }, { "epoch": 2.785297792586422, "grad_norm": 0.027799520641565323, "learning_rate": 7.866266443660397e-07, "loss": 0.1147, "step": 13375 }, { "epoch": 2.7905039566847147, "grad_norm": 0.20379126071929932, "learning_rate": 7.493796713873346e-07, "loss": 0.0776, "step": 13400 }, { "epoch": 2.7957101207830073, "grad_norm": 5.621152400970459, "learning_rate": 7.130226292229314e-07, "loss": 0.1281, "step": 13425 }, { "epoch": 2.8009162848812994, "grad_norm": 9.663355827331543, "learning_rate": 6.775568520758863e-07, "loss": 0.1409, "step": 13450 }, { "epoch": 2.806122448979592, "grad_norm": 7.826466083526611, "learning_rate": 6.429836414423212e-07, "loss": 0.13, "step": 13475 }, { "epoch": 2.811328613077884, "grad_norm": 0.012044396251440048, "learning_rate": 6.093042660636095e-07, "loss": 0.2328, "step": 13500 }, { "epoch": 2.8165347771761766, "grad_norm": 2.4445865154266357, "learning_rate": 5.765199618798456e-07, "loss": 0.1251, "step": 13525 }, { "epoch": 2.821740941274469, "grad_norm": 4.6207451820373535, "learning_rate": 5.446319319844794e-07, "loss": 0.1069, "step": 13550 }, { "epoch": 2.8269471053727613, "grad_norm": 0.5208550691604614, "learning_rate": 5.13641346580171e-07, "loss": 0.0791, "step": 13575 }, { "epoch": 2.832153269471054, "grad_norm": 0.03781859204173088, "learning_rate": 4.835493429358462e-07, "loss": 0.1112, "step": 13600 }, { "epoch": 2.837359433569346, "grad_norm": 1.6827815771102905, "learning_rate": 4.5435702534495915e-07, "loss": 0.1123, "step": 13625 }, { "epoch": 2.8425655976676385, "grad_norm": 2.481886863708496, "learning_rate": 4.2606546508497103e-07, "loss": 0.1228, "step": 13650 }, { "epoch": 2.847771761765931, "grad_norm": 7.118264198303223, "learning_rate": 3.9867570037803725e-07, "loss": 0.1172, "step": 13675 }, { "epoch": 2.852977925864223, "grad_norm": 8.65257453918457, "learning_rate": 3.7218873635290195e-07, "loss": 0.111, "step": 13700 }, { "epoch": 2.8581840899625157, "grad_norm": 0.8659380674362183, "learning_rate": 3.466055450080191e-07, "loss": 0.0999, "step": 13725 }, { "epoch": 2.863390254060808, "grad_norm": 5.575057506561279, "learning_rate": 3.219270651758782e-07, "loss": 0.1201, "step": 13750 }, { "epoch": 2.8685964181591004, "grad_norm": 4.377348899841309, "learning_rate": 2.9815420248855977e-07, "loss": 0.1469, "step": 13775 }, { "epoch": 2.873802582257393, "grad_norm": 3.4709956645965576, "learning_rate": 2.7528782934449215e-07, "loss": 0.1214, "step": 13800 }, { "epoch": 2.879008746355685, "grad_norm": 0.008634321391582489, "learning_rate": 2.533287848764332e-07, "loss": 0.1104, "step": 13825 }, { "epoch": 2.8842149104539776, "grad_norm": 5.85574197769165, "learning_rate": 2.3227787492069718e-07, "loss": 0.1284, "step": 13850 }, { "epoch": 2.8894210745522697, "grad_norm": 2.434326648712158, "learning_rate": 2.1213587198755936e-07, "loss": 0.0968, "step": 13875 }, { "epoch": 2.8946272386505623, "grad_norm": 3.5275282859802246, "learning_rate": 1.929035152329145e-07, "loss": 0.1454, "step": 13900 }, { "epoch": 2.899833402748855, "grad_norm": 3.965625762939453, "learning_rate": 1.7458151043116544e-07, "loss": 0.1299, "step": 13925 }, { "epoch": 2.905039566847147, "grad_norm": 0.10151717066764832, "learning_rate": 1.5717052994929927e-07, "loss": 0.1459, "step": 13950 }, { "epoch": 2.9102457309454395, "grad_norm": 0.03408972918987274, "learning_rate": 1.4067121272223204e-07, "loss": 0.11, "step": 13975 }, { "epoch": 2.9154518950437316, "grad_norm": 0.0022567359264940023, "learning_rate": 1.2508416422935542e-07, "loss": 0.1083, "step": 14000 }, { "epoch": 2.920658059142024, "grad_norm": 7.95089054107666, "learning_rate": 1.1040995647231545e-07, "loss": 0.1088, "step": 14025 }, { "epoch": 2.9258642232403167, "grad_norm": 12.033370018005371, "learning_rate": 9.664912795402381e-08, "loss": 0.1305, "step": 14050 }, { "epoch": 2.931070387338609, "grad_norm": 0.22287705540657043, "learning_rate": 8.380218365889592e-08, "loss": 0.1694, "step": 14075 }, { "epoch": 2.9362765514369014, "grad_norm": 0.011814435012638569, "learning_rate": 7.186959503431845e-08, "loss": 0.083, "step": 14100 }, { "epoch": 2.9414827155351935, "grad_norm": 0.22026073932647705, "learning_rate": 6.08517999733521e-08, "loss": 0.0816, "step": 14125 }, { "epoch": 2.946688879633486, "grad_norm": 6.017068386077881, "learning_rate": 5.0749202798652806e-08, "loss": 0.1286, "step": 14150 }, { "epoch": 2.9518950437317786, "grad_norm": 9.714811325073242, "learning_rate": 4.156217424765296e-08, "loss": 0.1439, "step": 14175 }, { "epoch": 2.9571012078300707, "grad_norm": 0.37943577766418457, "learning_rate": 3.3291051458927966e-08, "loss": 0.0811, "step": 14200 }, { "epoch": 2.9623073719283632, "grad_norm": 0.007510739378631115, "learning_rate": 2.5936137959856054e-08, "loss": 0.1679, "step": 14225 }, { "epoch": 2.9675135360266554, "grad_norm": 4.679372310638428, "learning_rate": 1.9497703655455e-08, "loss": 0.0982, "step": 14250 }, { "epoch": 2.972719700124948, "grad_norm": 5.683784484863281, "learning_rate": 1.39759848184845e-08, "loss": 0.1231, "step": 14275 }, { "epoch": 2.9779258642232405, "grad_norm": 13.094088554382324, "learning_rate": 9.37118408078641e-09, "loss": 0.118, "step": 14300 }, { "epoch": 2.9831320283215326, "grad_norm": 7.190179347991943, "learning_rate": 5.683470425832394e-09, "loss": 0.1788, "step": 14325 }, { "epoch": 2.988338192419825, "grad_norm": 8.636795997619629, "learning_rate": 2.9129791825344145e-09, "loss": 0.1482, "step": 14350 }, { "epoch": 2.9935443565181172, "grad_norm": 5.166715145111084, "learning_rate": 1.0598120202681695e-09, "loss": 0.106, "step": 14375 }, { "epoch": 2.99875052061641, "grad_norm": 6.336511611938477, "learning_rate": 1.2403694515661368e-10, "loss": 0.1234, "step": 14400 }, { "epoch": 3.0, "eval_accuracy": 0.9105245346869713, "eval_f1_macro": 0.8981344155061894, "eval_f1_micro": 0.9105245346869713, "eval_f1_weighted": 0.9032382559745838, "eval_loss": 0.2098342627286911, "eval_precision_macro": 0.9192533404916904, "eval_precision_micro": 0.9105245346869713, "eval_precision_weighted": 0.920957800948379, "eval_recall_macro": 0.9038812615955473, "eval_recall_micro": 0.9105245346869713, "eval_recall_weighted": 0.9105245346869713, "eval_runtime": 16.1158, "eval_samples_per_second": 916.801, "eval_steps_per_second": 57.335, "step": 14406 } ], "logging_steps": 25, "max_steps": 14406, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3853101049724160.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }