| { | |
| "best_metric": 0.2098342627286911, | |
| "best_model_checkpoint": "date2format-v2/checkpoint-14406", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 14406, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005206164098292378, | |
| "grad_norm": 0.16246353089809418, | |
| "learning_rate": 8.327550312283136e-07, | |
| "loss": 0.1697, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.010412328196584756, | |
| "grad_norm": 1.2059249877929688, | |
| "learning_rate": 1.7002081887578072e-06, | |
| "loss": 0.214, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.015618492294877134, | |
| "grad_norm": 0.9199744462966919, | |
| "learning_rate": 2.5676613462873006e-06, | |
| "loss": 0.1357, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.02082465639316951, | |
| "grad_norm": 0.04551107436418533, | |
| "learning_rate": 3.4351145038167944e-06, | |
| "loss": 0.0982, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02603082049146189, | |
| "grad_norm": 1.7336697578430176, | |
| "learning_rate": 4.2678695350451074e-06, | |
| "loss": 0.1863, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03123698458975427, | |
| "grad_norm": 1.3021111488342285, | |
| "learning_rate": 5.135322692574601e-06, | |
| "loss": 0.1166, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03644314868804665, | |
| "grad_norm": 6.950756549835205, | |
| "learning_rate": 6.002775850104095e-06, | |
| "loss": 0.2385, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.04164931278633902, | |
| "grad_norm": 0.139039546251297, | |
| "learning_rate": 6.870229007633589e-06, | |
| "loss": 0.1147, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0468554768846314, | |
| "grad_norm": 6.5412492752075195, | |
| "learning_rate": 7.737682165163083e-06, | |
| "loss": 0.1682, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.05206164098292378, | |
| "grad_norm": 1.130511999130249, | |
| "learning_rate": 8.605135322692574e-06, | |
| "loss": 0.2311, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05726780508121616, | |
| "grad_norm": 14.400012969970703, | |
| "learning_rate": 9.472588480222069e-06, | |
| "loss": 0.1815, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.06247396917950854, | |
| "grad_norm": 0.6946442723274231, | |
| "learning_rate": 1.0340041637751562e-05, | |
| "loss": 0.1107, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06768013327780091, | |
| "grad_norm": 9.926340103149414, | |
| "learning_rate": 1.1207494795281055e-05, | |
| "loss": 0.1139, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.0728862973760933, | |
| "grad_norm": 1.6435810327529907, | |
| "learning_rate": 1.207494795281055e-05, | |
| "loss": 0.1471, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07809246147438567, | |
| "grad_norm": 1.6755973100662231, | |
| "learning_rate": 1.294240111034004e-05, | |
| "loss": 0.2039, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.08329862557267805, | |
| "grad_norm": 4.045156002044678, | |
| "learning_rate": 1.3809854267869535e-05, | |
| "loss": 0.2085, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08850478967097043, | |
| "grad_norm": 0.2624989449977875, | |
| "learning_rate": 1.4677307425399028e-05, | |
| "loss": 0.182, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.0937109537692628, | |
| "grad_norm": 9.571054458618164, | |
| "learning_rate": 1.5544760582928523e-05, | |
| "loss": 0.1874, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09891711786755518, | |
| "grad_norm": 0.22022797167301178, | |
| "learning_rate": 1.6412213740458016e-05, | |
| "loss": 0.1219, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.10412328196584757, | |
| "grad_norm": 9.949447631835938, | |
| "learning_rate": 1.727966689798751e-05, | |
| "loss": 0.2049, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10932944606413994, | |
| "grad_norm": 5.697494983673096, | |
| "learning_rate": 1.8147120055517e-05, | |
| "loss": 0.1151, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.11453561016243231, | |
| "grad_norm": 11.763550758361816, | |
| "learning_rate": 1.9014573213046498e-05, | |
| "loss": 0.1546, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1197417742607247, | |
| "grad_norm": 0.10468779504299164, | |
| "learning_rate": 1.988202637057599e-05, | |
| "loss": 0.2386, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.12494793835901707, | |
| "grad_norm": 1.6756809949874878, | |
| "learning_rate": 2.0749479528105484e-05, | |
| "loss": 0.2487, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.13015410245730946, | |
| "grad_norm": 0.19562338292598724, | |
| "learning_rate": 2.1616932685634977e-05, | |
| "loss": 0.2314, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.13536026655560182, | |
| "grad_norm": 8.64395809173584, | |
| "learning_rate": 2.248438584316447e-05, | |
| "loss": 0.113, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.1405664306538942, | |
| "grad_norm": 0.2188766896724701, | |
| "learning_rate": 2.3351839000693966e-05, | |
| "loss": 0.1685, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.1457725947521866, | |
| "grad_norm": 0.1437786966562271, | |
| "learning_rate": 2.421929215822346e-05, | |
| "loss": 0.2467, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.15097875885047896, | |
| "grad_norm": 8.023161888122559, | |
| "learning_rate": 2.5086745315752952e-05, | |
| "loss": 0.2007, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.15618492294877134, | |
| "grad_norm": 1.2265409231185913, | |
| "learning_rate": 2.5954198473282442e-05, | |
| "loss": 0.1716, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.16139108704706373, | |
| "grad_norm": 5.027709484100342, | |
| "learning_rate": 2.6821651630811938e-05, | |
| "loss": 0.2271, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.1665972511453561, | |
| "grad_norm": 0.11683953553438187, | |
| "learning_rate": 2.768910478834143e-05, | |
| "loss": 0.2198, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.17180341524364848, | |
| "grad_norm": 0.0886659324169159, | |
| "learning_rate": 2.8556557945870927e-05, | |
| "loss": 0.2351, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.17700957934194086, | |
| "grad_norm": 10.608562469482422, | |
| "learning_rate": 2.9424011103400417e-05, | |
| "loss": 0.295, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.18221574344023322, | |
| "grad_norm": 9.556928634643555, | |
| "learning_rate": 3.0291464260929913e-05, | |
| "loss": 0.1789, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.1874219075385256, | |
| "grad_norm": 9.168340682983398, | |
| "learning_rate": 3.1158917418459406e-05, | |
| "loss": 0.2548, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.192628071636818, | |
| "grad_norm": 6.174153804779053, | |
| "learning_rate": 3.2026370575988896e-05, | |
| "loss": 0.257, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.19783423573511036, | |
| "grad_norm": 13.663798332214355, | |
| "learning_rate": 3.289382373351839e-05, | |
| "loss": 0.1947, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.20304039983340275, | |
| "grad_norm": 11.622993469238281, | |
| "learning_rate": 3.376127689104788e-05, | |
| "loss": 0.2765, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.20824656393169513, | |
| "grad_norm": 9.067373275756836, | |
| "learning_rate": 3.462873004857738e-05, | |
| "loss": 0.2063, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2134527280299875, | |
| "grad_norm": 0.04181263968348503, | |
| "learning_rate": 3.549618320610687e-05, | |
| "loss": 0.3131, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.21865889212827988, | |
| "grad_norm": 1.1748377084732056, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 0.2632, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.22386505622657227, | |
| "grad_norm": 18.87828826904297, | |
| "learning_rate": 3.723108952116586e-05, | |
| "loss": 0.1721, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.22907122032486463, | |
| "grad_norm": 0.07279066741466522, | |
| "learning_rate": 3.809854267869536e-05, | |
| "loss": 0.155, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.23427738442315701, | |
| "grad_norm": 0.6264330148696899, | |
| "learning_rate": 3.8965995836224846e-05, | |
| "loss": 0.2785, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.2394835485214494, | |
| "grad_norm": 0.07900076359510422, | |
| "learning_rate": 3.983344899375434e-05, | |
| "loss": 0.1977, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.24468971261974176, | |
| "grad_norm": 0.14294634759426117, | |
| "learning_rate": 4.070090215128383e-05, | |
| "loss": 0.2297, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.24989587671803415, | |
| "grad_norm": 12.401119232177734, | |
| "learning_rate": 4.153365718251215e-05, | |
| "loss": 0.3428, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.25510204081632654, | |
| "grad_norm": 11.514626502990723, | |
| "learning_rate": 4.240111034004164e-05, | |
| "loss": 0.2381, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.2603082049146189, | |
| "grad_norm": 0.33966752886772156, | |
| "learning_rate": 4.3268563497571134e-05, | |
| "loss": 0.2446, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.2655143690129113, | |
| "grad_norm": 13.058968544006348, | |
| "learning_rate": 4.4136016655100624e-05, | |
| "loss": 0.2878, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.27072053311120364, | |
| "grad_norm": 11.9553861618042, | |
| "learning_rate": 4.500346981263012e-05, | |
| "loss": 0.2735, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.27592669720949603, | |
| "grad_norm": 0.3588428497314453, | |
| "learning_rate": 4.5870922970159617e-05, | |
| "loss": 0.2163, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.2811328613077884, | |
| "grad_norm": 23.0421142578125, | |
| "learning_rate": 4.673837612768911e-05, | |
| "loss": 0.2516, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.2863390254060808, | |
| "grad_norm": 0.3783215880393982, | |
| "learning_rate": 4.757113115891742e-05, | |
| "loss": 0.3394, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.2915451895043732, | |
| "grad_norm": 0.13371586799621582, | |
| "learning_rate": 4.843858431644692e-05, | |
| "loss": 0.2155, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2967513536026656, | |
| "grad_norm": 15.4042329788208, | |
| "learning_rate": 4.930603747397641e-05, | |
| "loss": 0.3902, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.3019575177009579, | |
| "grad_norm": 13.482477188110352, | |
| "learning_rate": 4.9999981651326544e-05, | |
| "loss": 0.3137, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.3071636817992503, | |
| "grad_norm": 2.373297691345215, | |
| "learning_rate": 4.9999339450583704e-05, | |
| "loss": 0.3525, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.3123698458975427, | |
| "grad_norm": 0.45967090129852295, | |
| "learning_rate": 4.999777984310187e-05, | |
| "loss": 0.3325, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3175760099958351, | |
| "grad_norm": 0.21776865422725677, | |
| "learning_rate": 4.9995302886114334e-05, | |
| "loss": 0.3339, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.32278217409412746, | |
| "grad_norm": 4.673847198486328, | |
| "learning_rate": 4.9991908670518586e-05, | |
| "loss": 0.2315, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.32798833819241985, | |
| "grad_norm": 2.8383209705352783, | |
| "learning_rate": 4.998759732087296e-05, | |
| "loss": 0.3757, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.3331945022907122, | |
| "grad_norm": 6.966940402984619, | |
| "learning_rate": 4.998236899539209e-05, | |
| "loss": 0.2376, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.33840066638900457, | |
| "grad_norm": 14.109524726867676, | |
| "learning_rate": 4.997622388594109e-05, | |
| "loss": 0.3046, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.34360683048729695, | |
| "grad_norm": 0.9895745515823364, | |
| "learning_rate": 4.9969162218028495e-05, | |
| "loss": 0.2425, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.34881299458558934, | |
| "grad_norm": 1.0948731899261475, | |
| "learning_rate": 4.996118425079804e-05, | |
| "loss": 0.3732, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.35401915868388173, | |
| "grad_norm": 10.473814010620117, | |
| "learning_rate": 4.9952290277019095e-05, | |
| "loss": 0.3086, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.3592253227821741, | |
| "grad_norm": 1.947543740272522, | |
| "learning_rate": 4.994248062307593e-05, | |
| "loss": 0.3405, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.36443148688046645, | |
| "grad_norm": 7.7466254234313965, | |
| "learning_rate": 4.993175564895577e-05, | |
| "loss": 0.1969, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.36963765097875884, | |
| "grad_norm": 11.565778732299805, | |
| "learning_rate": 4.992011574823555e-05, | |
| "loss": 0.3424, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.3748438150770512, | |
| "grad_norm": 2.8535590171813965, | |
| "learning_rate": 4.99075613480675e-05, | |
| "loss": 0.2885, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3800499791753436, | |
| "grad_norm": 36.82617950439453, | |
| "learning_rate": 4.9894092909163436e-05, | |
| "loss": 0.3159, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.385256143273636, | |
| "grad_norm": 0.5960198044776917, | |
| "learning_rate": 4.98797109257779e-05, | |
| "loss": 0.2981, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.3904623073719284, | |
| "grad_norm": 23.564931869506836, | |
| "learning_rate": 4.986441592568994e-05, | |
| "loss": 0.2802, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.3956684714702207, | |
| "grad_norm": 4.4088134765625, | |
| "learning_rate": 4.984820847018388e-05, | |
| "loss": 0.378, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.4008746355685131, | |
| "grad_norm": 13.824947357177734, | |
| "learning_rate": 4.983108915402855e-05, | |
| "loss": 0.2409, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.4060807996668055, | |
| "grad_norm": 14.21284294128418, | |
| "learning_rate": 4.981305860545561e-05, | |
| "loss": 0.3306, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.4112869637650979, | |
| "grad_norm": 0.1888038069009781, | |
| "learning_rate": 4.9794117486136404e-05, | |
| "loss": 0.3458, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.41649312786339027, | |
| "grad_norm": 0.19039294123649597, | |
| "learning_rate": 4.977426649115773e-05, | |
| "loss": 0.1629, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.42169929196168265, | |
| "grad_norm": 0.42835375666618347, | |
| "learning_rate": 4.9753506348996284e-05, | |
| "loss": 0.2258, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.426905456059975, | |
| "grad_norm": 4.263399124145508, | |
| "learning_rate": 4.973183782149198e-05, | |
| "loss": 0.3, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.4321116201582674, | |
| "grad_norm": 0.17805464565753937, | |
| "learning_rate": 4.970926170381995e-05, | |
| "loss": 0.2086, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.43731778425655976, | |
| "grad_norm": 0.9103575348854065, | |
| "learning_rate": 4.968577882446139e-05, | |
| "loss": 0.2435, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.44252394835485215, | |
| "grad_norm": 10.369997024536133, | |
| "learning_rate": 4.9661390045173126e-05, | |
| "loss": 0.2264, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.44773011245314454, | |
| "grad_norm": 2.6744861602783203, | |
| "learning_rate": 4.963609626095603e-05, | |
| "loss": 0.3012, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.4529362765514369, | |
| "grad_norm": 12.971126556396484, | |
| "learning_rate": 4.960989840002216e-05, | |
| "loss": 0.2855, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.45814244064972925, | |
| "grad_norm": 5.078539848327637, | |
| "learning_rate": 4.958279742376068e-05, | |
| "loss": 0.2832, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.46334860474802164, | |
| "grad_norm": 11.258842468261719, | |
| "learning_rate": 4.955479432670259e-05, | |
| "loss": 0.3402, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.46855476884631403, | |
| "grad_norm": 9.847345352172852, | |
| "learning_rate": 4.952589013648425e-05, | |
| "loss": 0.1914, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.4737609329446064, | |
| "grad_norm": 0.0392930842936039, | |
| "learning_rate": 4.949608591380963e-05, | |
| "loss": 0.1341, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.4789670970428988, | |
| "grad_norm": 7.803274154663086, | |
| "learning_rate": 4.946538275241144e-05, | |
| "loss": 0.3284, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.4841732611411912, | |
| "grad_norm": 0.38506531715393066, | |
| "learning_rate": 4.943378177901091e-05, | |
| "loss": 0.3496, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.4893794252394835, | |
| "grad_norm": 8.00683879852295, | |
| "learning_rate": 4.940128415327654e-05, | |
| "loss": 0.33, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.4945855893377759, | |
| "grad_norm": 5.389349460601807, | |
| "learning_rate": 4.9367891067781454e-05, | |
| "loss": 0.2392, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.4997917534360683, | |
| "grad_norm": 0.18322338163852692, | |
| "learning_rate": 4.9333603747959725e-05, | |
| "loss": 0.3306, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5049979175343606, | |
| "grad_norm": 47.38001251220703, | |
| "learning_rate": 4.9298423452061306e-05, | |
| "loss": 0.3651, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.5102040816326531, | |
| "grad_norm": 1.591893196105957, | |
| "learning_rate": 4.926235147110593e-05, | |
| "loss": 0.1469, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.5154102457309454, | |
| "grad_norm": 2.414854049682617, | |
| "learning_rate": 4.922538912883573e-05, | |
| "loss": 0.3108, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.5206164098292378, | |
| "grad_norm": 0.7650768756866455, | |
| "learning_rate": 4.91875377816666e-05, | |
| "loss": 0.1384, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5258225739275302, | |
| "grad_norm": 12.073479652404785, | |
| "learning_rate": 4.9148798818638486e-05, | |
| "loss": 0.3325, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.5310287380258226, | |
| "grad_norm": 1.0136282444000244, | |
| "learning_rate": 4.910917366136438e-05, | |
| "loss": 0.2324, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.536234902124115, | |
| "grad_norm": 4.9315667152404785, | |
| "learning_rate": 4.906866376397816e-05, | |
| "loss": 0.3278, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.5414410662224073, | |
| "grad_norm": 18.607717514038086, | |
| "learning_rate": 4.902727061308121e-05, | |
| "loss": 0.3609, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.5466472303206997, | |
| "grad_norm": 28.146434783935547, | |
| "learning_rate": 4.89849957276879e-05, | |
| "loss": 0.229, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.5518533944189921, | |
| "grad_norm": 6.487234592437744, | |
| "learning_rate": 4.894184065916981e-05, | |
| "loss": 0.1822, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5570595585172845, | |
| "grad_norm": 12.08668327331543, | |
| "learning_rate": 4.8897806991198796e-05, | |
| "loss": 0.1988, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.5622657226155768, | |
| "grad_norm": 6.793110370635986, | |
| "learning_rate": 4.885289633968891e-05, | |
| "loss": 0.2108, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5674718867138692, | |
| "grad_norm": 13.04969596862793, | |
| "learning_rate": 4.880711035273709e-05, | |
| "loss": 0.2559, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.5726780508121616, | |
| "grad_norm": 0.07108098268508911, | |
| "learning_rate": 4.876045071056262e-05, | |
| "loss": 0.2236, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.5778842149104539, | |
| "grad_norm": 4.550630569458008, | |
| "learning_rate": 4.871291912544558e-05, | |
| "loss": 0.1857, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.5830903790087464, | |
| "grad_norm": 3.2425384521484375, | |
| "learning_rate": 4.8664517341663886e-05, | |
| "loss": 0.1981, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5882965431070387, | |
| "grad_norm": 1.5111477375030518, | |
| "learning_rate": 4.861524713542939e-05, | |
| "loss": 0.2145, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.5935027072053312, | |
| "grad_norm": 0.029720915481448174, | |
| "learning_rate": 4.856511031482264e-05, | |
| "loss": 0.1805, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.5987088713036235, | |
| "grad_norm": 14.783000946044922, | |
| "learning_rate": 4.8514108719726516e-05, | |
| "loss": 0.2574, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.6039150354019158, | |
| "grad_norm": 0.23782485723495483, | |
| "learning_rate": 4.846224422175877e-05, | |
| "loss": 0.272, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.6091211995002083, | |
| "grad_norm": 0.6656458377838135, | |
| "learning_rate": 4.840951872420327e-05, | |
| "loss": 0.1804, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.6143273635985006, | |
| "grad_norm": 7.324411869049072, | |
| "learning_rate": 4.835593416194023e-05, | |
| "loss": 0.2526, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.619533527696793, | |
| "grad_norm": 17.861886978149414, | |
| "learning_rate": 4.8301492501375135e-05, | |
| "loss": 0.2352, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.6247396917950854, | |
| "grad_norm": 16.718368530273438, | |
| "learning_rate": 4.824619574036663e-05, | |
| "loss": 0.394, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6299458558933777, | |
| "grad_norm": 0.1666000634431839, | |
| "learning_rate": 4.819004590815317e-05, | |
| "loss": 0.2006, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.6351520199916701, | |
| "grad_norm": 4.644481182098389, | |
| "learning_rate": 4.8133045065278584e-05, | |
| "loss": 0.2226, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.6403581840899625, | |
| "grad_norm": 23.20795249938965, | |
| "learning_rate": 4.807519530351644e-05, | |
| "loss": 0.2966, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.6455643481882549, | |
| "grad_norm": 0.05183548480272293, | |
| "learning_rate": 4.801649874579327e-05, | |
| "loss": 0.1597, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6507705122865473, | |
| "grad_norm": 8.726778030395508, | |
| "learning_rate": 4.79569575461107e-05, | |
| "loss": 0.3323, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.6559766763848397, | |
| "grad_norm": 0.2523828446865082, | |
| "learning_rate": 4.789657388946637e-05, | |
| "loss": 0.1146, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.661182840483132, | |
| "grad_norm": 0.07221566885709763, | |
| "learning_rate": 4.7835349991773775e-05, | |
| "loss": 0.3426, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.6663890045814244, | |
| "grad_norm": 0.2359621673822403, | |
| "learning_rate": 4.7773288099780935e-05, | |
| "loss": 0.2444, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6715951686797168, | |
| "grad_norm": 18.833477020263672, | |
| "learning_rate": 4.7710390490987936e-05, | |
| "loss": 0.3419, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.6768013327780091, | |
| "grad_norm": 17.171281814575195, | |
| "learning_rate": 4.764665947356337e-05, | |
| "loss": 0.2313, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.6820074968763016, | |
| "grad_norm": 0.4939661920070648, | |
| "learning_rate": 4.758209738625963e-05, | |
| "loss": 0.2724, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.6872136609745939, | |
| "grad_norm": 0.8321401476860046, | |
| "learning_rate": 4.751670659832708e-05, | |
| "loss": 0.1976, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6924198250728864, | |
| "grad_norm": 1.6382664442062378, | |
| "learning_rate": 4.74504895094271e-05, | |
| "loss": 0.2768, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.6976259891711787, | |
| "grad_norm": 0.03149043396115303, | |
| "learning_rate": 4.738344854954404e-05, | |
| "loss": 0.2127, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.702832153269471, | |
| "grad_norm": 0.08274863660335541, | |
| "learning_rate": 4.7315586178896035e-05, | |
| "loss": 0.2105, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.7080383173677635, | |
| "grad_norm": 2.460942268371582, | |
| "learning_rate": 4.724690488784474e-05, | |
| "loss": 0.3465, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.7132444814660558, | |
| "grad_norm": 13.02415943145752, | |
| "learning_rate": 4.717740719680391e-05, | |
| "loss": 0.2309, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.7184506455643482, | |
| "grad_norm": 15.129639625549316, | |
| "learning_rate": 4.710709565614694e-05, | |
| "loss": 0.1043, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.7236568096626406, | |
| "grad_norm": 0.19271063804626465, | |
| "learning_rate": 4.703597284611325e-05, | |
| "loss": 0.312, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.7288629737609329, | |
| "grad_norm": 16.245803833007812, | |
| "learning_rate": 4.696404137671362e-05, | |
| "loss": 0.3068, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.7340691378592253, | |
| "grad_norm": 0.28644460439682007, | |
| "learning_rate": 4.689130388763437e-05, | |
| "loss": 0.3067, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.7392753019575177, | |
| "grad_norm": 19.579248428344727, | |
| "learning_rate": 4.681776304814056e-05, | |
| "loss": 0.2592, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.7444814660558101, | |
| "grad_norm": 16.12538719177246, | |
| "learning_rate": 4.6743421556977934e-05, | |
| "loss": 0.1631, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.7496876301541024, | |
| "grad_norm": 0.7716278433799744, | |
| "learning_rate": 4.6668282142274e-05, | |
| "loss": 0.2962, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7548937942523949, | |
| "grad_norm": 6.197509288787842, | |
| "learning_rate": 4.659234756143782e-05, | |
| "loss": 0.2491, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.7600999583506872, | |
| "grad_norm": 0.5237298011779785, | |
| "learning_rate": 4.651562060105886e-05, | |
| "loss": 0.2988, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.7653061224489796, | |
| "grad_norm": 6.333150386810303, | |
| "learning_rate": 4.643810407680475e-05, | |
| "loss": 0.2558, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.770512286547272, | |
| "grad_norm": 0.02860177680850029, | |
| "learning_rate": 4.6359800833317915e-05, | |
| "loss": 0.1318, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.7757184506455643, | |
| "grad_norm": 0.030804630368947983, | |
| "learning_rate": 4.6280713744111215e-05, | |
| "loss": 0.2002, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 0.7809246147438568, | |
| "grad_norm": 10.464898109436035, | |
| "learning_rate": 4.620084571146247e-05, | |
| "loss": 0.3029, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7861307788421491, | |
| "grad_norm": 1.6494271755218506, | |
| "learning_rate": 4.6120199666307975e-05, | |
| "loss": 0.377, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 0.7913369429404414, | |
| "grad_norm": 10.32836627960205, | |
| "learning_rate": 4.603877856813494e-05, | |
| "loss": 0.1742, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7965431070387339, | |
| "grad_norm": 5.073435306549072, | |
| "learning_rate": 4.5956585404872884e-05, | |
| "loss": 0.1921, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.8017492711370262, | |
| "grad_norm": 11.85318374633789, | |
| "learning_rate": 4.587362319278397e-05, | |
| "loss": 0.2957, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.8069554352353187, | |
| "grad_norm": 3.998944044113159, | |
| "learning_rate": 4.5789894976352344e-05, | |
| "loss": 0.2102, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.812161599333611, | |
| "grad_norm": 0.569960355758667, | |
| "learning_rate": 4.570540382817239e-05, | |
| "loss": 0.245, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.8173677634319034, | |
| "grad_norm": 5.945977687835693, | |
| "learning_rate": 4.562015284883597e-05, | |
| "loss": 0.2113, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 0.8225739275301958, | |
| "grad_norm": 10.898477554321289, | |
| "learning_rate": 4.553414516681868e-05, | |
| "loss": 0.2446, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.8277800916284881, | |
| "grad_norm": 0.25490328669548035, | |
| "learning_rate": 4.5447383938364974e-05, | |
| "loss": 0.301, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.8329862557267805, | |
| "grad_norm": 4.246921539306641, | |
| "learning_rate": 4.535987234737242e-05, | |
| "loss": 0.2944, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.8381924198250729, | |
| "grad_norm": 6.019783973693848, | |
| "learning_rate": 4.527161360527481e-05, | |
| "loss": 0.3184, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 0.8433985839233653, | |
| "grad_norm": 0.0902986004948616, | |
| "learning_rate": 4.51826109509243e-05, | |
| "loss": 0.1508, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.8486047480216576, | |
| "grad_norm": 7.770606517791748, | |
| "learning_rate": 4.509286765047258e-05, | |
| "loss": 0.4018, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 0.85381091211995, | |
| "grad_norm": 0.20602120459079742, | |
| "learning_rate": 4.5002386997251025e-05, | |
| "loss": 0.3172, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.8590170762182424, | |
| "grad_norm": 9.447431564331055, | |
| "learning_rate": 4.491117231164978e-05, | |
| "loss": 0.2055, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.8642232403165347, | |
| "grad_norm": 4.260950088500977, | |
| "learning_rate": 4.481922694099602e-05, | |
| "loss": 0.1652, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.8694294044148272, | |
| "grad_norm": 8.093120574951172, | |
| "learning_rate": 4.4726554259430966e-05, | |
| "loss": 0.1701, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 0.8746355685131195, | |
| "grad_norm": 9.310951232910156, | |
| "learning_rate": 4.4633157667786194e-05, | |
| "loss": 0.3233, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.879841732611412, | |
| "grad_norm": 13.830717086791992, | |
| "learning_rate": 4.453904059345877e-05, | |
| "loss": 0.2724, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 0.8850478967097043, | |
| "grad_norm": 7.378146648406982, | |
| "learning_rate": 4.4444206490285465e-05, | |
| "loss": 0.3583, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.8902540608079966, | |
| "grad_norm": 0.06943502277135849, | |
| "learning_rate": 4.4352494400727496e-05, | |
| "loss": 0.3111, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.8954602249062891, | |
| "grad_norm": 5.857403755187988, | |
| "learning_rate": 4.425626504054255e-05, | |
| "loss": 0.3536, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.9006663890045814, | |
| "grad_norm": 12.8510160446167, | |
| "learning_rate": 4.4159329028593835e-05, | |
| "loss": 0.3129, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 0.9058725531028738, | |
| "grad_norm": 0.7335798740386963, | |
| "learning_rate": 4.406168992216536e-05, | |
| "loss": 0.2536, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.9110787172011662, | |
| "grad_norm": 0.06461632251739502, | |
| "learning_rate": 4.396335130434277e-05, | |
| "loss": 0.1635, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.9162848812994585, | |
| "grad_norm": 0.6194918751716614, | |
| "learning_rate": 4.386431678388183e-05, | |
| "loss": 0.2273, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.921491045397751, | |
| "grad_norm": 0.06244231015443802, | |
| "learning_rate": 4.3764589995076045e-05, | |
| "loss": 0.2393, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.9266972094960433, | |
| "grad_norm": 4.408567428588867, | |
| "learning_rate": 4.3664174597623225e-05, | |
| "loss": 0.1189, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.9319033735943357, | |
| "grad_norm": 0.7255458235740662, | |
| "learning_rate": 4.3563074276491224e-05, | |
| "loss": 0.2784, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 0.9371095376926281, | |
| "grad_norm": 0.09919915348291397, | |
| "learning_rate": 4.346129274178271e-05, | |
| "loss": 0.1761, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.9423157017909205, | |
| "grad_norm": 13.636004447937012, | |
| "learning_rate": 4.335883372859901e-05, | |
| "loss": 0.3252, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 0.9475218658892128, | |
| "grad_norm": 0.1237548440694809, | |
| "learning_rate": 4.3255700996903027e-05, | |
| "loss": 0.1931, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.9527280299875052, | |
| "grad_norm": 0.06882551312446594, | |
| "learning_rate": 4.315189833138129e-05, | |
| "loss": 0.256, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.9579341940857976, | |
| "grad_norm": 0.21859368681907654, | |
| "learning_rate": 4.304742954130504e-05, | |
| "loss": 0.2087, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.9631403581840899, | |
| "grad_norm": 4.428884983062744, | |
| "learning_rate": 4.294229846039045e-05, | |
| "loss": 0.1962, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.9683465222823824, | |
| "grad_norm": 6.920009613037109, | |
| "learning_rate": 4.2836508946657944e-05, | |
| "loss": 0.2301, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.9735526863806747, | |
| "grad_norm": 3.4040772914886475, | |
| "learning_rate": 4.273006488229061e-05, | |
| "loss": 0.287, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 0.978758850478967, | |
| "grad_norm": 7.0576043128967285, | |
| "learning_rate": 4.2622970173491734e-05, | |
| "loss": 0.1524, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.9839650145772595, | |
| "grad_norm": 0.026531610637903214, | |
| "learning_rate": 4.2515228750341466e-05, | |
| "loss": 0.1662, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.9891711786755518, | |
| "grad_norm": 1.0448293685913086, | |
| "learning_rate": 4.240684456665257e-05, | |
| "loss": 0.2114, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.9943773427738443, | |
| "grad_norm": 44.79957962036133, | |
| "learning_rate": 4.229782159982536e-05, | |
| "loss": 0.2157, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 0.9995835068721366, | |
| "grad_norm": 10.862527847290039, | |
| "learning_rate": 4.2188163850701734e-05, | |
| "loss": 0.2925, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.892994923857868, | |
| "eval_f1_macro": 0.8730666664599849, | |
| "eval_f1_micro": 0.892994923857868, | |
| "eval_f1_weighted": 0.8798157289522093, | |
| "eval_loss": 0.34922197461128235, | |
| "eval_precision_macro": 0.9003919914273503, | |
| "eval_precision_micro": 0.892994923857868, | |
| "eval_precision_weighted": 0.9045471108195786, | |
| "eval_recall_macro": 0.8862461348175634, | |
| "eval_recall_micro": 0.892994923857868, | |
| "eval_recall_weighted": 0.892994923857868, | |
| "eval_runtime": 16.3696, | |
| "eval_samples_per_second": 902.587, | |
| "eval_steps_per_second": 56.446, | |
| "step": 4802 | |
| }, | |
| { | |
| "epoch": 1.004789670970429, | |
| "grad_norm": 10.292591094970703, | |
| "learning_rate": 4.2077875343418325e-05, | |
| "loss": 0.2158, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 1.0099958350687213, | |
| "grad_norm": 5.3238325119018555, | |
| "learning_rate": 4.1966960125258884e-05, | |
| "loss": 0.2987, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.0152019991670138, | |
| "grad_norm": 0.32381194829940796, | |
| "learning_rate": 4.1855422266505675e-05, | |
| "loss": 0.3283, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 1.0204081632653061, | |
| "grad_norm": 7.971839904785156, | |
| "learning_rate": 4.174326586029018e-05, | |
| "loss": 0.19, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.0256143273635985, | |
| "grad_norm": 0.252726286649704, | |
| "learning_rate": 4.1630495022442884e-05, | |
| "loss": 0.3692, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 1.0308204914618908, | |
| "grad_norm": 3.9147284030914307, | |
| "learning_rate": 4.1517113891342174e-05, | |
| "loss": 0.2321, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.0360266555601834, | |
| "grad_norm": 3.525928020477295, | |
| "learning_rate": 4.1403126627762546e-05, | |
| "loss": 0.1547, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 1.0412328196584757, | |
| "grad_norm": 0.9435555934906006, | |
| "learning_rate": 4.128853741472187e-05, | |
| "loss": 0.1764, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.046438983756768, | |
| "grad_norm": 9.357694625854492, | |
| "learning_rate": 4.117335045732791e-05, | |
| "loss": 0.2553, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 1.0516451478550604, | |
| "grad_norm": 0.10351333767175674, | |
| "learning_rate": 4.105756998262399e-05, | |
| "loss": 0.128, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.0568513119533527, | |
| "grad_norm": 3.0234265327453613, | |
| "learning_rate": 4.094120023943387e-05, | |
| "loss": 0.2764, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 1.0620574760516452, | |
| "grad_norm": 2.055335283279419, | |
| "learning_rate": 4.082424549820586e-05, | |
| "loss": 0.1752, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.0672636401499376, | |
| "grad_norm": 9.345911026000977, | |
| "learning_rate": 4.070671005085607e-05, | |
| "loss": 0.2165, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 1.07246980424823, | |
| "grad_norm": 0.36394619941711426, | |
| "learning_rate": 4.058859821061092e-05, | |
| "loss": 0.1157, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.0776759683465222, | |
| "grad_norm": 8.331086158752441, | |
| "learning_rate": 4.046991431184887e-05, | |
| "loss": 0.1545, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 1.0828821324448146, | |
| "grad_norm": 9.395284652709961, | |
| "learning_rate": 4.0350662709941325e-05, | |
| "loss": 0.2645, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.0880882965431071, | |
| "grad_norm": 6.326489448547363, | |
| "learning_rate": 4.023084778109283e-05, | |
| "loss": 0.2223, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 1.0932944606413995, | |
| "grad_norm": 11.211806297302246, | |
| "learning_rate": 4.0110473922180526e-05, | |
| "loss": 0.2022, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.0985006247396918, | |
| "grad_norm": 0.9454079270362854, | |
| "learning_rate": 3.998954555059266e-05, | |
| "loss": 0.2831, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 1.1037067888379841, | |
| "grad_norm": 2.5448479652404785, | |
| "learning_rate": 3.9868067104066646e-05, | |
| "loss": 0.1692, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.1089129529362765, | |
| "grad_norm": 2.1148276329040527, | |
| "learning_rate": 3.9746043040526074e-05, | |
| "loss": 0.3686, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 1.114119117034569, | |
| "grad_norm": 18.591083526611328, | |
| "learning_rate": 3.9623477837917167e-05, | |
| "loss": 0.2465, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.1193252811328613, | |
| "grad_norm": 1.9231871366500854, | |
| "learning_rate": 3.950037599404451e-05, | |
| "loss": 0.2383, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 1.1245314452311537, | |
| "grad_norm": 13.078465461730957, | |
| "learning_rate": 3.93767420264059e-05, | |
| "loss": 0.2129, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.129737609329446, | |
| "grad_norm": 0.015137123875319958, | |
| "learning_rate": 3.9252580472026616e-05, | |
| "loss": 0.123, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 1.1349437734277386, | |
| "grad_norm": 1.9116227626800537, | |
| "learning_rate": 3.912789588729292e-05, | |
| "loss": 0.2135, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.1401499375260309, | |
| "grad_norm": 0.04986105486750603, | |
| "learning_rate": 3.9002692847784824e-05, | |
| "loss": 0.3574, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 1.1453561016243232, | |
| "grad_norm": 8.818171501159668, | |
| "learning_rate": 3.8876975948108236e-05, | |
| "loss": 0.1246, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.1505622657226156, | |
| "grad_norm": 0.3172764182090759, | |
| "learning_rate": 3.8750749801726275e-05, | |
| "loss": 0.182, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 1.1557684298209079, | |
| "grad_norm": 9.364998817443848, | |
| "learning_rate": 3.862401904079004e-05, | |
| "loss": 0.2951, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.1609745939192004, | |
| "grad_norm": 7.908264636993408, | |
| "learning_rate": 3.849678831596855e-05, | |
| "loss": 0.1521, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 1.1661807580174928, | |
| "grad_norm": 0.3125484585762024, | |
| "learning_rate": 3.8369062296278174e-05, | |
| "loss": 0.2956, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.171386922115785, | |
| "grad_norm": 0.6337696313858032, | |
| "learning_rate": 3.824084566891118e-05, | |
| "loss": 0.2355, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 1.1765930862140774, | |
| "grad_norm": 0.07673631608486176, | |
| "learning_rate": 3.8112143139063824e-05, | |
| "loss": 0.2098, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.1817992503123698, | |
| "grad_norm": 0.02253473550081253, | |
| "learning_rate": 3.798295942976362e-05, | |
| "loss": 0.1149, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 1.1870054144106623, | |
| "grad_norm": 6.865312099456787, | |
| "learning_rate": 3.7853299281696056e-05, | |
| "loss": 0.3913, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.1922115785089547, | |
| "grad_norm": 6.194192409515381, | |
| "learning_rate": 3.7723167453030586e-05, | |
| "loss": 0.1924, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 1.197417742607247, | |
| "grad_norm": 0.12381599843502045, | |
| "learning_rate": 3.759256871924604e-05, | |
| "loss": 0.3331, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.2026239067055393, | |
| "grad_norm": 0.02437855489552021, | |
| "learning_rate": 3.74615078729554e-05, | |
| "loss": 0.1819, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 1.2078300708038316, | |
| "grad_norm": 14.187339782714844, | |
| "learning_rate": 3.732998972372991e-05, | |
| "loss": 0.2303, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.2130362349021242, | |
| "grad_norm": 16.453699111938477, | |
| "learning_rate": 3.719801909792251e-05, | |
| "loss": 0.3025, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 1.2182423990004165, | |
| "grad_norm": 1.748321771621704, | |
| "learning_rate": 3.706560083849083e-05, | |
| "loss": 0.2085, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.2234485630987089, | |
| "grad_norm": 0.6774524450302124, | |
| "learning_rate": 3.693273980481942e-05, | |
| "loss": 0.2587, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 1.2286547271970012, | |
| "grad_norm": 12.350029945373535, | |
| "learning_rate": 3.679944087254141e-05, | |
| "loss": 0.1885, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.2338608912952935, | |
| "grad_norm": 6.396539688110352, | |
| "learning_rate": 3.6665708933359576e-05, | |
| "loss": 0.2404, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 1.239067055393586, | |
| "grad_norm": 5.529823303222656, | |
| "learning_rate": 3.6531548894866896e-05, | |
| "loss": 0.2086, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.2442732194918784, | |
| "grad_norm": 4.696390628814697, | |
| "learning_rate": 3.639696568036639e-05, | |
| "loss": 0.1309, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 1.2494793835901707, | |
| "grad_norm": 0.10419386625289917, | |
| "learning_rate": 3.6261964228690454e-05, | |
| "loss": 0.1567, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.254685547688463, | |
| "grad_norm": 0.19173868000507355, | |
| "learning_rate": 3.612654949401967e-05, | |
| "loss": 0.1632, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 1.2598917117867554, | |
| "grad_norm": 10.412970542907715, | |
| "learning_rate": 3.599072644570092e-05, | |
| "loss": 0.1771, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.265097875885048, | |
| "grad_norm": 13.899092674255371, | |
| "learning_rate": 3.585450006806512e-05, | |
| "loss": 0.0964, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 1.2703040399833403, | |
| "grad_norm": 11.265216827392578, | |
| "learning_rate": 3.5717875360244214e-05, | |
| "loss": 0.3711, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.2755102040816326, | |
| "grad_norm": 5.9480977058410645, | |
| "learning_rate": 3.5580857335987774e-05, | |
| "loss": 0.2482, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 1.280716368179925, | |
| "grad_norm": 7.014443874359131, | |
| "learning_rate": 3.544345102347902e-05, | |
| "loss": 0.258, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.2859225322782173, | |
| "grad_norm": 0.06656356900930405, | |
| "learning_rate": 3.530566146515028e-05, | |
| "loss": 0.1432, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 1.2911286963765098, | |
| "grad_norm": 9.881967544555664, | |
| "learning_rate": 3.5167493717497905e-05, | |
| "loss": 0.127, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.2963348604748022, | |
| "grad_norm": 7.715837478637695, | |
| "learning_rate": 3.502895285089679e-05, | |
| "loss": 0.229, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 1.3015410245730945, | |
| "grad_norm": 0.07192976772785187, | |
| "learning_rate": 3.489004394941425e-05, | |
| "loss": 0.1111, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.3067471886713868, | |
| "grad_norm": 1.0921132564544678, | |
| "learning_rate": 3.475077211062346e-05, | |
| "loss": 0.1642, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 1.3119533527696792, | |
| "grad_norm": 0.2106587141752243, | |
| "learning_rate": 3.461114244541641e-05, | |
| "loss": 0.2393, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.3171595168679717, | |
| "grad_norm": 5.716639518737793, | |
| "learning_rate": 3.4471160077816314e-05, | |
| "loss": 0.2525, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 1.322365680966264, | |
| "grad_norm": 0.30353063344955444, | |
| "learning_rate": 3.43308301447896e-05, | |
| "loss": 0.2426, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.3275718450645564, | |
| "grad_norm": 0.039772335439920425, | |
| "learning_rate": 3.419015779605737e-05, | |
| "loss": 0.2302, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 1.332778009162849, | |
| "grad_norm": 0.7781673073768616, | |
| "learning_rate": 3.404914819390646e-05, | |
| "loss": 0.2583, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.3379841732611413, | |
| "grad_norm": 24.190811157226562, | |
| "learning_rate": 3.3907806512999966e-05, | |
| "loss": 0.1873, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 1.3431903373594336, | |
| "grad_norm": 9.922744750976562, | |
| "learning_rate": 3.3766137940187364e-05, | |
| "loss": 0.2441, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.348396501457726, | |
| "grad_norm": 7.696400165557861, | |
| "learning_rate": 3.362414767431414e-05, | |
| "loss": 0.1051, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 1.3536026655560183, | |
| "grad_norm": 5.509871482849121, | |
| "learning_rate": 3.348184092603105e-05, | |
| "loss": 0.1288, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.3588088296543108, | |
| "grad_norm": 12.904980659484863, | |
| "learning_rate": 3.333922291760286e-05, | |
| "loss": 0.2988, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 1.3640149937526032, | |
| "grad_norm": 0.11204788088798523, | |
| "learning_rate": 3.319629888271675e-05, | |
| "loss": 0.1962, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.3692211578508955, | |
| "grad_norm": 0.16563156247138977, | |
| "learning_rate": 3.3053074066290214e-05, | |
| "loss": 0.2317, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 1.3744273219491878, | |
| "grad_norm": 10.849634170532227, | |
| "learning_rate": 3.290955372427858e-05, | |
| "loss": 0.1759, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.3796334860474802, | |
| "grad_norm": 13.899421691894531, | |
| "learning_rate": 3.276574312348218e-05, | |
| "loss": 0.3169, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 1.3848396501457727, | |
| "grad_norm": 0.03900701552629471, | |
| "learning_rate": 3.2621647541353015e-05, | |
| "loss": 0.2785, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.390045814244065, | |
| "grad_norm": 4.0548095703125, | |
| "learning_rate": 3.2477272265801126e-05, | |
| "loss": 0.2163, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 1.3952519783423574, | |
| "grad_norm": 0.026262715458869934, | |
| "learning_rate": 3.233841378366721e-05, | |
| "loss": 0.2331, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.4004581424406497, | |
| "grad_norm": 1.4128097295761108, | |
| "learning_rate": 3.2193505687298915e-05, | |
| "loss": 0.138, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 1.405664306538942, | |
| "grad_norm": 0.03084075264632702, | |
| "learning_rate": 3.2048333609131926e-05, | |
| "loss": 0.2318, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.4108704706372346, | |
| "grad_norm": 0.08986690640449524, | |
| "learning_rate": 3.19029028765807e-05, | |
| "loss": 0.1265, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 1.416076634735527, | |
| "grad_norm": 0.33206573128700256, | |
| "learning_rate": 3.1757218826551634e-05, | |
| "loss": 0.0584, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.4212827988338192, | |
| "grad_norm": 0.31883516907691956, | |
| "learning_rate": 3.161128680524714e-05, | |
| "loss": 0.1517, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 1.4264889629321116, | |
| "grad_norm": 0.03625530004501343, | |
| "learning_rate": 3.1465112167969504e-05, | |
| "loss": 0.1541, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.431695127030404, | |
| "grad_norm": 9.222389221191406, | |
| "learning_rate": 3.1318700278924374e-05, | |
| "loss": 0.267, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 1.4369012911286965, | |
| "grad_norm": 9.740432739257812, | |
| "learning_rate": 3.11779266463367e-05, | |
| "loss": 0.2008, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.4421074552269888, | |
| "grad_norm": 0.007975614629685879, | |
| "learning_rate": 3.1031065337471356e-05, | |
| "loss": 0.1723, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 1.4473136193252811, | |
| "grad_norm": 9.000720977783203, | |
| "learning_rate": 3.088398270515885e-05, | |
| "loss": 0.1206, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.4525197834235735, | |
| "grad_norm": 9.622346878051758, | |
| "learning_rate": 3.0736684146925715e-05, | |
| "loss": 0.1296, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 1.4577259475218658, | |
| "grad_norm": 0.9583789706230164, | |
| "learning_rate": 3.0589175068222385e-05, | |
| "loss": 0.144, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.4629321116201583, | |
| "grad_norm": 37.292327880859375, | |
| "learning_rate": 3.044146088222479e-05, | |
| "loss": 0.253, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 1.4681382757184507, | |
| "grad_norm": 6.009835243225098, | |
| "learning_rate": 3.0293547009635758e-05, | |
| "loss": 0.1883, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.473344439816743, | |
| "grad_norm": 0.2210305631160736, | |
| "learning_rate": 3.0145438878486033e-05, | |
| "loss": 0.2264, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 1.4785506039150353, | |
| "grad_norm": 6.8800764083862305, | |
| "learning_rate": 2.9997141923935136e-05, | |
| "loss": 0.2641, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.4837567680133277, | |
| "grad_norm": 0.2739255130290985, | |
| "learning_rate": 2.9848661588071876e-05, | |
| "loss": 0.2043, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 1.4889629321116202, | |
| "grad_norm": 0.016436373814940453, | |
| "learning_rate": 2.9700003319714648e-05, | |
| "loss": 0.1275, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.4941690962099126, | |
| "grad_norm": 3.340723752975464, | |
| "learning_rate": 2.9551172574211478e-05, | |
| "loss": 0.3281, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 1.499375260308205, | |
| "grad_norm": 4.6393256187438965, | |
| "learning_rate": 2.9402174813239836e-05, | |
| "loss": 0.1756, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.5045814244064974, | |
| "grad_norm": 6.234640121459961, | |
| "learning_rate": 2.9253015504606197e-05, | |
| "loss": 0.2412, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 1.5097875885047896, | |
| "grad_norm": 2.9910178184509277, | |
| "learning_rate": 2.910370012204537e-05, | |
| "loss": 0.2625, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.5149937526030821, | |
| "grad_norm": 0.030267061665654182, | |
| "learning_rate": 2.8954234145019672e-05, | |
| "loss": 0.1825, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 1.5201999167013744, | |
| "grad_norm": 12.349358558654785, | |
| "learning_rate": 2.8804623058517805e-05, | |
| "loss": 0.2111, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.5254060807996668, | |
| "grad_norm": 0.03321736305952072, | |
| "learning_rate": 2.86548723528536e-05, | |
| "loss": 0.1627, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 1.5306122448979593, | |
| "grad_norm": 12.457853317260742, | |
| "learning_rate": 2.8504987523464515e-05, | |
| "loss": 0.2239, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.5358184089962514, | |
| "grad_norm": 0.0661047101020813, | |
| "learning_rate": 2.8354974070709983e-05, | |
| "loss": 0.2431, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 1.541024573094544, | |
| "grad_norm": 0.02619466558098793, | |
| "learning_rate": 2.820483749966955e-05, | |
| "loss": 0.1084, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.5462307371928363, | |
| "grad_norm": 3.5517141819000244, | |
| "learning_rate": 2.8054583319940896e-05, | |
| "loss": 0.116, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 1.5514369012911287, | |
| "grad_norm": 8.054045677185059, | |
| "learning_rate": 2.7904217045437564e-05, | |
| "loss": 0.2523, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.5566430653894212, | |
| "grad_norm": 5.730096340179443, | |
| "learning_rate": 2.775374419418671e-05, | |
| "loss": 0.1687, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 1.5618492294877133, | |
| "grad_norm": 0.11246989667415619, | |
| "learning_rate": 2.7603170288126546e-05, | |
| "loss": 0.1266, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.5670553935860059, | |
| "grad_norm": 7.816544532775879, | |
| "learning_rate": 2.74525008529037e-05, | |
| "loss": 0.2546, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 1.5722615576842982, | |
| "grad_norm": 0.06883124262094498, | |
| "learning_rate": 2.7301741417670485e-05, | |
| "loss": 0.1984, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.5774677217825905, | |
| "grad_norm": 3.755141019821167, | |
| "learning_rate": 2.715089751488195e-05, | |
| "loss": 0.1695, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 1.582673885880883, | |
| "grad_norm": 0.14307986199855804, | |
| "learning_rate": 2.6999974680092882e-05, | |
| "loss": 0.1959, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.5878800499791752, | |
| "grad_norm": 16.175771713256836, | |
| "learning_rate": 2.684897845175463e-05, | |
| "loss": 0.21, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 1.5930862140774678, | |
| "grad_norm": 0.39594829082489014, | |
| "learning_rate": 2.6697914371011912e-05, | |
| "loss": 0.1769, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.59829237817576, | |
| "grad_norm": 11.465653419494629, | |
| "learning_rate": 2.654678798149942e-05, | |
| "loss": 0.1581, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 1.6034985422740524, | |
| "grad_norm": 0.006868015043437481, | |
| "learning_rate": 2.639560482913843e-05, | |
| "loss": 0.2084, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.608704706372345, | |
| "grad_norm": 23.506370544433594, | |
| "learning_rate": 2.6244370461933226e-05, | |
| "loss": 0.1521, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 1.613910870470637, | |
| "grad_norm": 7.843533039093018, | |
| "learning_rate": 2.609309042976757e-05, | |
| "loss": 0.161, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.6191170345689296, | |
| "grad_norm": 0.6957140564918518, | |
| "learning_rate": 2.5941770284200968e-05, | |
| "loss": 0.1201, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 1.624323198667222, | |
| "grad_norm": 0.09775109589099884, | |
| "learning_rate": 2.5790415578265e-05, | |
| "loss": 0.0975, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.6295293627655143, | |
| "grad_norm": 7.59830379486084, | |
| "learning_rate": 2.5639031866259512e-05, | |
| "loss": 0.1495, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 1.6347355268638069, | |
| "grad_norm": 6.013265132904053, | |
| "learning_rate": 2.5487624703548783e-05, | |
| "loss": 0.2432, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.639941690962099, | |
| "grad_norm": 0.011226486414670944, | |
| "learning_rate": 2.5336199646357693e-05, | |
| "loss": 0.1438, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 1.6451478550603915, | |
| "grad_norm": 12.745257377624512, | |
| "learning_rate": 2.518476225156776e-05, | |
| "loss": 0.1853, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.6503540191586838, | |
| "grad_norm": 0.028018401935696602, | |
| "learning_rate": 2.5033318076513295e-05, | |
| "loss": 0.269, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 1.6555601832569762, | |
| "grad_norm": 0.29439255595207214, | |
| "learning_rate": 2.4881872678777408e-05, | |
| "loss": 0.2584, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.6607663473552687, | |
| "grad_norm": 7.931988716125488, | |
| "learning_rate": 2.473043161598808e-05, | |
| "loss": 0.2546, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 1.665972511453561, | |
| "grad_norm": 0.24659812450408936, | |
| "learning_rate": 2.4579000445614214e-05, | |
| "loss": 0.0941, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.6711786755518534, | |
| "grad_norm": 0.13519421219825745, | |
| "learning_rate": 2.4427584724761687e-05, | |
| "loss": 0.1369, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 1.6763848396501457, | |
| "grad_norm": 2.32975435256958, | |
| "learning_rate": 2.4276190009969428e-05, | |
| "loss": 0.0967, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.681591003748438, | |
| "grad_norm": 10.135041236877441, | |
| "learning_rate": 2.412482185700548e-05, | |
| "loss": 0.2521, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 1.6867971678467306, | |
| "grad_norm": 5.711327075958252, | |
| "learning_rate": 2.397348582066317e-05, | |
| "loss": 0.2155, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.692003331945023, | |
| "grad_norm": 5.974716663360596, | |
| "learning_rate": 2.382218745455721e-05, | |
| "loss": 0.1574, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 1.6972094960433153, | |
| "grad_norm": 0.01547964382916689, | |
| "learning_rate": 2.3670932310919928e-05, | |
| "loss": 0.1252, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.7024156601416076, | |
| "grad_norm": 0.8667640089988708, | |
| "learning_rate": 2.3519725940397516e-05, | |
| "loss": 0.1717, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 1.7076218242399, | |
| "grad_norm": 0.2800462245941162, | |
| "learning_rate": 2.3368573891846307e-05, | |
| "loss": 0.1726, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.7128279883381925, | |
| "grad_norm": 0.2914607524871826, | |
| "learning_rate": 2.321748171212919e-05, | |
| "loss": 0.1686, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 1.7180341524364848, | |
| "grad_norm": 5.2183051109313965, | |
| "learning_rate": 2.3066454945912003e-05, | |
| "loss": 0.2877, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.7232403165347772, | |
| "grad_norm": 5.748877048492432, | |
| "learning_rate": 2.2915499135460123e-05, | |
| "loss": 0.157, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 1.7284464806330697, | |
| "grad_norm": 9.520060539245605, | |
| "learning_rate": 2.276461982043503e-05, | |
| "loss": 0.2562, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.7336526447313618, | |
| "grad_norm": 0.03092977963387966, | |
| "learning_rate": 2.2613822537691016e-05, | |
| "loss": 0.1034, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 1.7388588088296544, | |
| "grad_norm": 0.1688009798526764, | |
| "learning_rate": 2.2463112821072063e-05, | |
| "loss": 0.1484, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.7440649729279467, | |
| "grad_norm": 0.5068601369857788, | |
| "learning_rate": 2.2312496201208654e-05, | |
| "loss": 0.1381, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 1.749271137026239, | |
| "grad_norm": 0.58343505859375, | |
| "learning_rate": 2.2161978205314934e-05, | |
| "loss": 0.2426, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.7544773011245316, | |
| "grad_norm": 5.165789604187012, | |
| "learning_rate": 2.20115643569858e-05, | |
| "loss": 0.2173, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 1.7596834652228237, | |
| "grad_norm": 0.6824327111244202, | |
| "learning_rate": 2.1861260175994203e-05, | |
| "loss": 0.1443, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.7648896293211163, | |
| "grad_norm": 0.2100004255771637, | |
| "learning_rate": 2.1711071178088633e-05, | |
| "loss": 0.1741, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 1.7700957934194086, | |
| "grad_norm": 0.46001136302948, | |
| "learning_rate": 2.1561002874790662e-05, | |
| "loss": 0.2116, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.775301957517701, | |
| "grad_norm": 7.581360816955566, | |
| "learning_rate": 2.1411060773192704e-05, | |
| "loss": 0.1379, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 1.7805081216159935, | |
| "grad_norm": 7.0736846923828125, | |
| "learning_rate": 2.126125037575594e-05, | |
| "loss": 0.1403, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.7857142857142856, | |
| "grad_norm": 0.7845435738563538, | |
| "learning_rate": 2.1111577180108343e-05, | |
| "loss": 0.1406, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 1.7909204498125781, | |
| "grad_norm": 0.2352413535118103, | |
| "learning_rate": 2.0962046678842974e-05, | |
| "loss": 0.1425, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.7961266139108705, | |
| "grad_norm": 0.2534882426261902, | |
| "learning_rate": 2.08126643593164e-05, | |
| "loss": 0.2827, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 1.8013327780091628, | |
| "grad_norm": 0.015448580496013165, | |
| "learning_rate": 2.066343570344733e-05, | |
| "loss": 0.2161, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.8065389421074554, | |
| "grad_norm": 11.641529083251953, | |
| "learning_rate": 2.0514366187515444e-05, | |
| "loss": 0.141, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 1.8117451062057475, | |
| "grad_norm": 8.481783866882324, | |
| "learning_rate": 2.036546128196041e-05, | |
| "loss": 0.1693, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.81695127030404, | |
| "grad_norm": 0.046340491622686386, | |
| "learning_rate": 2.021672645118118e-05, | |
| "loss": 0.1685, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 1.8221574344023324, | |
| "grad_norm": 0.02451934851706028, | |
| "learning_rate": 2.0068167153335388e-05, | |
| "loss": 0.2014, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.8273635985006247, | |
| "grad_norm": 0.008840017020702362, | |
| "learning_rate": 1.991978884013914e-05, | |
| "loss": 0.139, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 1.8325697625989172, | |
| "grad_norm": 0.03715880587697029, | |
| "learning_rate": 1.9771596956666876e-05, | |
| "loss": 0.1835, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.8377759266972093, | |
| "grad_norm": 0.052263110876083374, | |
| "learning_rate": 1.9623596941151584e-05, | |
| "loss": 0.1522, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 1.842982090795502, | |
| "grad_norm": 0.14420367777347565, | |
| "learning_rate": 1.9475794224785242e-05, | |
| "loss": 0.1649, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.8481882548937942, | |
| "grad_norm": 5.102858066558838, | |
| "learning_rate": 1.9328194231519464e-05, | |
| "loss": 0.1966, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 1.8533944189920866, | |
| "grad_norm": 2.9605753421783447, | |
| "learning_rate": 1.918080237786651e-05, | |
| "loss": 0.1057, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.8586005830903791, | |
| "grad_norm": 0.06543917208909988, | |
| "learning_rate": 1.9033624072700505e-05, | |
| "loss": 0.2098, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 1.8638067471886712, | |
| "grad_norm": 4.984257221221924, | |
| "learning_rate": 1.8886664717058894e-05, | |
| "loss": 0.2006, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.8690129112869638, | |
| "grad_norm": 0.3344804048538208, | |
| "learning_rate": 1.8739929703944314e-05, | |
| "loss": 0.1492, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 1.8742190753852561, | |
| "grad_norm": 6.675142765045166, | |
| "learning_rate": 1.859342441812662e-05, | |
| "loss": 0.1568, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.8794252394835484, | |
| "grad_norm": 13.291411399841309, | |
| "learning_rate": 1.8447154235945336e-05, | |
| "loss": 0.2015, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 1.884631403581841, | |
| "grad_norm": 0.030354047194123268, | |
| "learning_rate": 1.8301124525112325e-05, | |
| "loss": 0.1201, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.8898375676801331, | |
| "grad_norm": 11.661486625671387, | |
| "learning_rate": 1.8155340644514798e-05, | |
| "loss": 0.1744, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 1.8950437317784257, | |
| "grad_norm": 9.253254890441895, | |
| "learning_rate": 1.8009807944018715e-05, | |
| "loss": 0.1305, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.900249895876718, | |
| "grad_norm": 0.049124184995889664, | |
| "learning_rate": 1.7864531764272356e-05, | |
| "loss": 0.225, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 1.9054560599750103, | |
| "grad_norm": 10.553680419921875, | |
| "learning_rate": 1.7719517436510462e-05, | |
| "loss": 0.213, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.9106622240733029, | |
| "grad_norm": 0.042677950114011765, | |
| "learning_rate": 1.7574770282358505e-05, | |
| "loss": 0.1058, | |
| "step": 9175 | |
| }, | |
| { | |
| "epoch": 1.9158683881715952, | |
| "grad_norm": 0.8776458501815796, | |
| "learning_rate": 1.7430295613637404e-05, | |
| "loss": 0.1907, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.9210745522698875, | |
| "grad_norm": 9.908956527709961, | |
| "learning_rate": 1.7286098732168644e-05, | |
| "loss": 0.1863, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 1.9262807163681799, | |
| "grad_norm": 1.294067144393921, | |
| "learning_rate": 1.7142184929579662e-05, | |
| "loss": 0.1053, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.9314868804664722, | |
| "grad_norm": 5.95350980758667, | |
| "learning_rate": 1.6998559487109693e-05, | |
| "loss": 0.1541, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 1.9366930445647648, | |
| "grad_norm": 7.54666805267334, | |
| "learning_rate": 1.6855227675415966e-05, | |
| "loss": 0.1696, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.941899208663057, | |
| "grad_norm": 0.0091070756316185, | |
| "learning_rate": 1.671219475438024e-05, | |
| "loss": 0.212, | |
| "step": 9325 | |
| }, | |
| { | |
| "epoch": 1.9471053727613494, | |
| "grad_norm": 0.05602679401636124, | |
| "learning_rate": 1.656946597291584e-05, | |
| "loss": 0.133, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.9523115368596418, | |
| "grad_norm": 0.04069928824901581, | |
| "learning_rate": 1.6427046568774977e-05, | |
| "loss": 0.1599, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 1.957517700957934, | |
| "grad_norm": 0.21093979477882385, | |
| "learning_rate": 1.628494176835661e-05, | |
| "loss": 0.1432, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.9627238650562266, | |
| "grad_norm": 6.424279689788818, | |
| "learning_rate": 1.614315678651457e-05, | |
| "loss": 0.1286, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 1.967930029154519, | |
| "grad_norm": 8.281681060791016, | |
| "learning_rate": 1.6001696826366247e-05, | |
| "loss": 0.0896, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.9731361932528113, | |
| "grad_norm": 3.2924630641937256, | |
| "learning_rate": 1.5860567079101657e-05, | |
| "loss": 0.2068, | |
| "step": 9475 | |
| }, | |
| { | |
| "epoch": 1.9783423573511039, | |
| "grad_norm": 0.10305780172348022, | |
| "learning_rate": 1.5719772723792857e-05, | |
| "loss": 0.133, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.983548521449396, | |
| "grad_norm": 0.013987602666020393, | |
| "learning_rate": 1.5579318927204003e-05, | |
| "loss": 0.1602, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 1.9887546855476885, | |
| "grad_norm": 8.741622924804688, | |
| "learning_rate": 1.5439210843601647e-05, | |
| "loss": 0.1248, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.9939608496459809, | |
| "grad_norm": 7.777089595794678, | |
| "learning_rate": 1.5299453614565646e-05, | |
| "loss": 0.1656, | |
| "step": 9575 | |
| }, | |
| { | |
| "epoch": 1.9991670137442732, | |
| "grad_norm": 7.620024681091309, | |
| "learning_rate": 1.5160052368800466e-05, | |
| "loss": 0.1867, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9082910321489002, | |
| "eval_f1_macro": 0.8934973914545552, | |
| "eval_f1_micro": 0.9082910321489002, | |
| "eval_f1_weighted": 0.8990272847487176, | |
| "eval_loss": 0.23952987790107727, | |
| "eval_precision_macro": 0.9212318034319655, | |
| "eval_precision_micro": 0.9082910321489002, | |
| "eval_precision_weighted": 0.9229994939426459, | |
| "eval_recall_macro": 0.9014322820037105, | |
| "eval_recall_micro": 0.9082910321489002, | |
| "eval_recall_weighted": 0.9082910321489002, | |
| "eval_runtime": 16.0892, | |
| "eval_samples_per_second": 918.319, | |
| "eval_steps_per_second": 57.43, | |
| "step": 9604 | |
| }, | |
| { | |
| "epoch": 2.0043731778425657, | |
| "grad_norm": 0.036709289997816086, | |
| "learning_rate": 1.5021012221946956e-05, | |
| "loss": 0.1042, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 2.009579341940858, | |
| "grad_norm": 0.03700362890958786, | |
| "learning_rate": 1.4882338276394644e-05, | |
| "loss": 0.1846, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 2.0147855060391504, | |
| "grad_norm": 0.2605753540992737, | |
| "learning_rate": 1.4744035621094468e-05, | |
| "loss": 0.1531, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 2.0199916701374425, | |
| "grad_norm": 1.4101078510284424, | |
| "learning_rate": 1.460610933137206e-05, | |
| "loss": 0.1186, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.025197834235735, | |
| "grad_norm": 7.530198097229004, | |
| "learning_rate": 1.4468564468741464e-05, | |
| "loss": 0.1534, | |
| "step": 9725 | |
| }, | |
| { | |
| "epoch": 2.0304039983340276, | |
| "grad_norm": 0.06903871148824692, | |
| "learning_rate": 1.433140608071939e-05, | |
| "loss": 0.2258, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 2.0356101624323197, | |
| "grad_norm": 0.9064328670501709, | |
| "learning_rate": 1.4194639200640023e-05, | |
| "loss": 0.1681, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 2.0408163265306123, | |
| "grad_norm": 0.04811558499932289, | |
| "learning_rate": 1.4058268847470241e-05, | |
| "loss": 0.1343, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.0460224906289044, | |
| "grad_norm": 5.909759998321533, | |
| "learning_rate": 1.3922300025625539e-05, | |
| "loss": 0.0999, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 2.051228654727197, | |
| "grad_norm": 1.272133708000183, | |
| "learning_rate": 1.37867377247863e-05, | |
| "loss": 0.1376, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 2.0564348188254895, | |
| "grad_norm": 3.806577682495117, | |
| "learning_rate": 1.3651586919714671e-05, | |
| "loss": 0.158, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 2.0616409829237816, | |
| "grad_norm": 1.572864294052124, | |
| "learning_rate": 1.3516852570072086e-05, | |
| "loss": 0.1046, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.066847147022074, | |
| "grad_norm": 0.07213819772005081, | |
| "learning_rate": 1.33825396202372e-05, | |
| "loss": 0.142, | |
| "step": 9925 | |
| }, | |
| { | |
| "epoch": 2.0720533111203667, | |
| "grad_norm": 6.35291862487793, | |
| "learning_rate": 1.324865299912445e-05, | |
| "loss": 0.1243, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 2.077259475218659, | |
| "grad_norm": 11.946340560913086, | |
| "learning_rate": 1.31151976200032e-05, | |
| "loss": 0.1512, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 2.0824656393169514, | |
| "grad_norm": 0.20692221820354462, | |
| "learning_rate": 1.2982178380317416e-05, | |
| "loss": 0.137, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.0876718034152435, | |
| "grad_norm": 0.24739764630794525, | |
| "learning_rate": 1.2849600161505957e-05, | |
| "loss": 0.1535, | |
| "step": 10025 | |
| }, | |
| { | |
| "epoch": 2.092877967513536, | |
| "grad_norm": 0.6494514346122742, | |
| "learning_rate": 1.2717467828823409e-05, | |
| "loss": 0.1838, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 2.0980841316118286, | |
| "grad_norm": 13.001143455505371, | |
| "learning_rate": 1.25857862311616e-05, | |
| "loss": 0.1973, | |
| "step": 10075 | |
| }, | |
| { | |
| "epoch": 2.1032902957101207, | |
| "grad_norm": 2.27883243560791, | |
| "learning_rate": 1.2454560200871612e-05, | |
| "loss": 0.2116, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 2.1084964598084133, | |
| "grad_norm": 14.335226058959961, | |
| "learning_rate": 1.2323794553586462e-05, | |
| "loss": 0.1645, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 2.1137026239067054, | |
| "grad_norm": 0.5073531866073608, | |
| "learning_rate": 1.2193494088044399e-05, | |
| "loss": 0.2067, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 2.118908788004998, | |
| "grad_norm": 10.126875877380371, | |
| "learning_rate": 1.206366358591274e-05, | |
| "loss": 0.2034, | |
| "step": 10175 | |
| }, | |
| { | |
| "epoch": 2.1241149521032905, | |
| "grad_norm": 0.4860388934612274, | |
| "learning_rate": 1.1934307811612502e-05, | |
| "loss": 0.1298, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.1293211162015826, | |
| "grad_norm": 11.252408981323242, | |
| "learning_rate": 1.1805431512143473e-05, | |
| "loss": 0.2134, | |
| "step": 10225 | |
| }, | |
| { | |
| "epoch": 2.134527280299875, | |
| "grad_norm": 0.6547976732254028, | |
| "learning_rate": 1.1677039416910018e-05, | |
| "loss": 0.2518, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 2.1397334443981673, | |
| "grad_norm": 0.02918720245361328, | |
| "learning_rate": 1.1549136237547577e-05, | |
| "loss": 0.1141, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 2.14493960849646, | |
| "grad_norm": 2.0306875705718994, | |
| "learning_rate": 1.1421726667749714e-05, | |
| "loss": 0.2479, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.1501457725947524, | |
| "grad_norm": 3.7051842212677, | |
| "learning_rate": 1.1294815383095877e-05, | |
| "loss": 0.1666, | |
| "step": 10325 | |
| }, | |
| { | |
| "epoch": 2.1553519366930445, | |
| "grad_norm": 7.603741645812988, | |
| "learning_rate": 1.1168407040879842e-05, | |
| "loss": 0.1395, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 2.160558100791337, | |
| "grad_norm": 0.0051424442790448666, | |
| "learning_rate": 1.1042506279938771e-05, | |
| "loss": 0.1033, | |
| "step": 10375 | |
| }, | |
| { | |
| "epoch": 2.165764264889629, | |
| "grad_norm": 0.43630650639533997, | |
| "learning_rate": 1.0917117720483014e-05, | |
| "loss": 0.061, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.1709704289879217, | |
| "grad_norm": 0.06688734143972397, | |
| "learning_rate": 1.0792245963926504e-05, | |
| "loss": 0.1547, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 2.1761765930862143, | |
| "grad_norm": 0.005384028889238834, | |
| "learning_rate": 1.0667895592717969e-05, | |
| "loss": 0.1822, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 2.1813827571845064, | |
| "grad_norm": 6.822201251983643, | |
| "learning_rate": 1.0544071170172723e-05, | |
| "loss": 0.0977, | |
| "step": 10475 | |
| }, | |
| { | |
| "epoch": 2.186588921282799, | |
| "grad_norm": 0.17409928143024445, | |
| "learning_rate": 1.0420777240305213e-05, | |
| "loss": 0.1472, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.191795085381091, | |
| "grad_norm": 4.65806245803833, | |
| "learning_rate": 1.0298018327662284e-05, | |
| "loss": 0.1499, | |
| "step": 10525 | |
| }, | |
| { | |
| "epoch": 2.1970012494793836, | |
| "grad_norm": 0.050723303109407425, | |
| "learning_rate": 1.0175798937157085e-05, | |
| "loss": 0.1228, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 2.202207413577676, | |
| "grad_norm": 7.599793434143066, | |
| "learning_rate": 1.0054123553903849e-05, | |
| "loss": 0.1122, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 2.2074135776759682, | |
| "grad_norm": 0.2461252212524414, | |
| "learning_rate": 9.93299664305322e-06, | |
| "loss": 0.0847, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.212619741774261, | |
| "grad_norm": 0.014030307531356812, | |
| "learning_rate": 9.812422649628388e-06, | |
| "loss": 0.1286, | |
| "step": 10625 | |
| }, | |
| { | |
| "epoch": 2.217825905872553, | |
| "grad_norm": 0.010962835513055325, | |
| "learning_rate": 9.692405998362045e-06, | |
| "loss": 0.0591, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 2.2230320699708455, | |
| "grad_norm": 0.009474786929786205, | |
| "learning_rate": 9.572951093533946e-06, | |
| "loss": 0.1037, | |
| "step": 10675 | |
| }, | |
| { | |
| "epoch": 2.228238234069138, | |
| "grad_norm": 0.02288031578063965, | |
| "learning_rate": 9.454062318809313e-06, | |
| "loss": 0.1174, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.23344439816743, | |
| "grad_norm": 10.316886901855469, | |
| "learning_rate": 9.335744037077918e-06, | |
| "loss": 0.1019, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 2.2386505622657227, | |
| "grad_norm": 3.191359758377075, | |
| "learning_rate": 9.218000590294072e-06, | |
| "loss": 0.0714, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 2.243856726364015, | |
| "grad_norm": 0.9904782176017761, | |
| "learning_rate": 9.100836299317201e-06, | |
| "loss": 0.1085, | |
| "step": 10775 | |
| }, | |
| { | |
| "epoch": 2.2490628904623073, | |
| "grad_norm": 0.00978647731244564, | |
| "learning_rate": 8.9842554637533e-06, | |
| "loss": 0.1005, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.2542690545606, | |
| "grad_norm": 0.5314901471138, | |
| "learning_rate": 8.868262361797181e-06, | |
| "loss": 0.1389, | |
| "step": 10825 | |
| }, | |
| { | |
| "epoch": 2.259475218658892, | |
| "grad_norm": 2.777710437774658, | |
| "learning_rate": 8.75286125007545e-06, | |
| "loss": 0.1211, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 2.2646813827571846, | |
| "grad_norm": 9.188045501708984, | |
| "learning_rate": 8.638056363490302e-06, | |
| "loss": 0.1837, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 2.269887546855477, | |
| "grad_norm": 8.704696655273438, | |
| "learning_rate": 8.523851915064129e-06, | |
| "loss": 0.1512, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.275093710953769, | |
| "grad_norm": 11.265883445739746, | |
| "learning_rate": 8.410252095784866e-06, | |
| "loss": 0.1485, | |
| "step": 10925 | |
| }, | |
| { | |
| "epoch": 2.2802998750520618, | |
| "grad_norm": 8.006060600280762, | |
| "learning_rate": 8.297261074452281e-06, | |
| "loss": 0.1927, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 2.285506039150354, | |
| "grad_norm": 6.185884475708008, | |
| "learning_rate": 8.184882997524884e-06, | |
| "loss": 0.0961, | |
| "step": 10975 | |
| }, | |
| { | |
| "epoch": 2.2907122032486464, | |
| "grad_norm": 8.097982406616211, | |
| "learning_rate": 8.073121988967849e-06, | |
| "loss": 0.1119, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.295918367346939, | |
| "grad_norm": 6.152439117431641, | |
| "learning_rate": 7.961982150101643e-06, | |
| "loss": 0.116, | |
| "step": 11025 | |
| }, | |
| { | |
| "epoch": 2.301124531445231, | |
| "grad_norm": 4.259856224060059, | |
| "learning_rate": 7.851467559451508e-06, | |
| "loss": 0.1936, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 2.3063306955435237, | |
| "grad_norm": 3.0448789596557617, | |
| "learning_rate": 7.741582272597823e-06, | |
| "loss": 0.1005, | |
| "step": 11075 | |
| }, | |
| { | |
| "epoch": 2.3115368596418158, | |
| "grad_norm": 0.018132351338863373, | |
| "learning_rate": 7.632330322027212e-06, | |
| "loss": 0.1356, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.3167430237401083, | |
| "grad_norm": 12.663254737854004, | |
| "learning_rate": 7.523715716984661e-06, | |
| "loss": 0.1243, | |
| "step": 11125 | |
| }, | |
| { | |
| "epoch": 2.321949187838401, | |
| "grad_norm": 0.21571685373783112, | |
| "learning_rate": 7.415742443326309e-06, | |
| "loss": 0.1938, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 2.327155351936693, | |
| "grad_norm": 0.023954235017299652, | |
| "learning_rate": 7.308414463373189e-06, | |
| "loss": 0.1548, | |
| "step": 11175 | |
| }, | |
| { | |
| "epoch": 2.3323615160349855, | |
| "grad_norm": 0.010295159183442593, | |
| "learning_rate": 7.2017357157658585e-06, | |
| "loss": 0.145, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.3375676801332776, | |
| "grad_norm": 0.6988667845726013, | |
| "learning_rate": 7.095710115319831e-06, | |
| "loss": 0.0901, | |
| "step": 11225 | |
| }, | |
| { | |
| "epoch": 2.34277384423157, | |
| "grad_norm": 4.98328161239624, | |
| "learning_rate": 6.99034155288193e-06, | |
| "loss": 0.1512, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 2.3479800083298628, | |
| "grad_norm": 1.8333989381790161, | |
| "learning_rate": 6.885633895187493e-06, | |
| "loss": 0.152, | |
| "step": 11275 | |
| }, | |
| { | |
| "epoch": 2.353186172428155, | |
| "grad_norm": 0.01870441809296608, | |
| "learning_rate": 6.781590984718461e-06, | |
| "loss": 0.2451, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.3583923365264474, | |
| "grad_norm": 0.03275001421570778, | |
| "learning_rate": 6.678216639562429e-06, | |
| "loss": 0.1564, | |
| "step": 11325 | |
| }, | |
| { | |
| "epoch": 2.3635985006247395, | |
| "grad_norm": 0.021069686859846115, | |
| "learning_rate": 6.5755146532724495e-06, | |
| "loss": 0.1644, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 2.368804664723032, | |
| "grad_norm": 5.0085577964782715, | |
| "learning_rate": 6.473488794727878e-06, | |
| "loss": 0.1459, | |
| "step": 11375 | |
| }, | |
| { | |
| "epoch": 2.3740108288213246, | |
| "grad_norm": 8.764829635620117, | |
| "learning_rate": 6.372142807996051e-06, | |
| "loss": 0.1538, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.3792169929196167, | |
| "grad_norm": 10.779679298400879, | |
| "learning_rate": 6.27148041219488e-06, | |
| "loss": 0.163, | |
| "step": 11425 | |
| }, | |
| { | |
| "epoch": 2.3844231570179093, | |
| "grad_norm": 0.08707818388938904, | |
| "learning_rate": 6.17150530135639e-06, | |
| "loss": 0.1641, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 2.3896293211162014, | |
| "grad_norm": 0.5683345198631287, | |
| "learning_rate": 6.07222114429111e-06, | |
| "loss": 0.1532, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 2.394835485214494, | |
| "grad_norm": 6.6075639724731445, | |
| "learning_rate": 5.9736315844535235e-06, | |
| "loss": 0.167, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.4000416493127865, | |
| "grad_norm": 0.18501617014408112, | |
| "learning_rate": 5.875740239808283e-06, | |
| "loss": 0.1036, | |
| "step": 11525 | |
| }, | |
| { | |
| "epoch": 2.4052478134110786, | |
| "grad_norm": 0.05444851890206337, | |
| "learning_rate": 5.778550702697463e-06, | |
| "loss": 0.1271, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 2.410453977509371, | |
| "grad_norm": 4.797399044036865, | |
| "learning_rate": 5.682066539708763e-06, | |
| "loss": 0.1147, | |
| "step": 11575 | |
| }, | |
| { | |
| "epoch": 2.4156601416076633, | |
| "grad_norm": 1.1078044176101685, | |
| "learning_rate": 5.586291291544585e-06, | |
| "loss": 0.1421, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.420866305705956, | |
| "grad_norm": 0.07829868793487549, | |
| "learning_rate": 5.491228472892118e-06, | |
| "loss": 0.1598, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 2.4260724698042484, | |
| "grad_norm": 3.3450565338134766, | |
| "learning_rate": 5.396881572294363e-06, | |
| "loss": 0.1359, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 2.4312786339025405, | |
| "grad_norm": 0.017295390367507935, | |
| "learning_rate": 5.303254052022075e-06, | |
| "loss": 0.0646, | |
| "step": 11675 | |
| }, | |
| { | |
| "epoch": 2.436484798000833, | |
| "grad_norm": 13.109850883483887, | |
| "learning_rate": 5.210349347946783e-06, | |
| "loss": 0.2058, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.441690962099125, | |
| "grad_norm": 0.01693105697631836, | |
| "learning_rate": 5.118170869414618e-06, | |
| "loss": 0.1465, | |
| "step": 11725 | |
| }, | |
| { | |
| "epoch": 2.4468971261974177, | |
| "grad_norm": 0.01354676578193903, | |
| "learning_rate": 5.026721999121256e-06, | |
| "loss": 0.0848, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 2.4521032902957103, | |
| "grad_norm": 0.5930526852607727, | |
| "learning_rate": 4.9360060929877636e-06, | |
| "loss": 0.1185, | |
| "step": 11775 | |
| }, | |
| { | |
| "epoch": 2.4573094543940024, | |
| "grad_norm": 12.73507308959961, | |
| "learning_rate": 4.846026480037444e-06, | |
| "loss": 0.1434, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.462515618492295, | |
| "grad_norm": 6.715247631072998, | |
| "learning_rate": 4.7567864622736824e-06, | |
| "loss": 0.1317, | |
| "step": 11825 | |
| }, | |
| { | |
| "epoch": 2.467721782590587, | |
| "grad_norm": 0.002648524707183242, | |
| "learning_rate": 4.668289314558735e-06, | |
| "loss": 0.1621, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 2.4729279466888796, | |
| "grad_norm": 8.284907341003418, | |
| "learning_rate": 4.580538284493616e-06, | |
| "loss": 0.1486, | |
| "step": 11875 | |
| }, | |
| { | |
| "epoch": 2.478134110787172, | |
| "grad_norm": 0.4824686348438263, | |
| "learning_rate": 4.493536592298864e-06, | |
| "loss": 0.192, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.4833402748854643, | |
| "grad_norm": 6.810537338256836, | |
| "learning_rate": 4.407287430696372e-06, | |
| "loss": 0.1059, | |
| "step": 11925 | |
| }, | |
| { | |
| "epoch": 2.488546438983757, | |
| "grad_norm": 5.896641731262207, | |
| "learning_rate": 4.321793964792262e-06, | |
| "loss": 0.0996, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 2.493752603082049, | |
| "grad_norm": 10.901542663574219, | |
| "learning_rate": 4.237059331960694e-06, | |
| "loss": 0.1538, | |
| "step": 11975 | |
| }, | |
| { | |
| "epoch": 2.4989587671803415, | |
| "grad_norm": 5.155703067779541, | |
| "learning_rate": 4.153086641728765e-06, | |
| "loss": 0.1304, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.504164931278634, | |
| "grad_norm": 0.00832182727754116, | |
| "learning_rate": 4.069878975662358e-06, | |
| "loss": 0.1498, | |
| "step": 12025 | |
| }, | |
| { | |
| "epoch": 2.509371095376926, | |
| "grad_norm": 8.369385719299316, | |
| "learning_rate": 3.9874393872531e-06, | |
| "loss": 0.2222, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 2.5145772594752187, | |
| "grad_norm": 3.448890209197998, | |
| "learning_rate": 3.905770901806299e-06, | |
| "loss": 0.1876, | |
| "step": 12075 | |
| }, | |
| { | |
| "epoch": 2.519783423573511, | |
| "grad_norm": 1.8173445463180542, | |
| "learning_rate": 3.824876516329881e-06, | |
| "loss": 0.0964, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.5249895876718034, | |
| "grad_norm": 8.964319229125977, | |
| "learning_rate": 3.7447591994244635e-06, | |
| "loss": 0.1555, | |
| "step": 12125 | |
| }, | |
| { | |
| "epoch": 2.530195751770096, | |
| "grad_norm": 1.7448962926864624, | |
| "learning_rate": 3.6654218911743892e-06, | |
| "loss": 0.1877, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.535401915868388, | |
| "grad_norm": 0.03552517667412758, | |
| "learning_rate": 3.5868675030398286e-06, | |
| "loss": 0.1345, | |
| "step": 12175 | |
| }, | |
| { | |
| "epoch": 2.5406080799666806, | |
| "grad_norm": 0.007262797094881535, | |
| "learning_rate": 3.509098917749962e-06, | |
| "loss": 0.1211, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.5458142440649727, | |
| "grad_norm": 6.443802833557129, | |
| "learning_rate": 3.43211898919715e-06, | |
| "loss": 0.0961, | |
| "step": 12225 | |
| }, | |
| { | |
| "epoch": 2.5510204081632653, | |
| "grad_norm": 3.5260281562805176, | |
| "learning_rate": 3.3559305423322503e-06, | |
| "loss": 0.1721, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 2.556226572261558, | |
| "grad_norm": 2.5505025386810303, | |
| "learning_rate": 3.280536373060919e-06, | |
| "loss": 0.1488, | |
| "step": 12275 | |
| }, | |
| { | |
| "epoch": 2.56143273635985, | |
| "grad_norm": 12.06968879699707, | |
| "learning_rate": 3.2059392481410155e-06, | |
| "loss": 0.145, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.5666389004581425, | |
| "grad_norm": 0.0032713667023926973, | |
| "learning_rate": 3.132141905081076e-06, | |
| "loss": 0.1333, | |
| "step": 12325 | |
| }, | |
| { | |
| "epoch": 2.5718450645564346, | |
| "grad_norm": 10.196958541870117, | |
| "learning_rate": 3.0591470520398513e-06, | |
| "loss": 0.1732, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 2.577051228654727, | |
| "grad_norm": 0.015713948756456375, | |
| "learning_rate": 2.9869573677269254e-06, | |
| "loss": 0.1367, | |
| "step": 12375 | |
| }, | |
| { | |
| "epoch": 2.5822573927530197, | |
| "grad_norm": 0.21436667442321777, | |
| "learning_rate": 2.915575501304396e-06, | |
| "loss": 0.235, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.587463556851312, | |
| "grad_norm": 0.0038804244250059128, | |
| "learning_rate": 2.845004072289695e-06, | |
| "loss": 0.0992, | |
| "step": 12425 | |
| }, | |
| { | |
| "epoch": 2.5926697209496044, | |
| "grad_norm": 6.616683483123779, | |
| "learning_rate": 2.775245670459439e-06, | |
| "loss": 0.1481, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 2.5978758850478965, | |
| "grad_norm": 0.006720269098877907, | |
| "learning_rate": 2.7063028557543753e-06, | |
| "loss": 0.12, | |
| "step": 12475 | |
| }, | |
| { | |
| "epoch": 2.603082049146189, | |
| "grad_norm": 1.3217498064041138, | |
| "learning_rate": 2.638178158185467e-06, | |
| "loss": 0.0997, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.6082882132444816, | |
| "grad_norm": 13.17971420288086, | |
| "learning_rate": 2.570874077741034e-06, | |
| "loss": 0.2045, | |
| "step": 12525 | |
| }, | |
| { | |
| "epoch": 2.6134943773427737, | |
| "grad_norm": 15.589527130126953, | |
| "learning_rate": 2.504393084295015e-06, | |
| "loss": 0.1074, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 2.6187005414410662, | |
| "grad_norm": 1.773633360862732, | |
| "learning_rate": 2.438737617516332e-06, | |
| "loss": 0.1416, | |
| "step": 12575 | |
| }, | |
| { | |
| "epoch": 2.6239067055393583, | |
| "grad_norm": 7.001961708068848, | |
| "learning_rate": 2.373910086779338e-06, | |
| "loss": 0.1821, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.629112869637651, | |
| "grad_norm": 7.044321060180664, | |
| "learning_rate": 2.309912871075445e-06, | |
| "loss": 0.1201, | |
| "step": 12625 | |
| }, | |
| { | |
| "epoch": 2.6343190337359434, | |
| "grad_norm": 8.048465728759766, | |
| "learning_rate": 2.246748318925779e-06, | |
| "loss": 0.167, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 2.639525197834236, | |
| "grad_norm": 6.150434494018555, | |
| "learning_rate": 2.1844187482950225e-06, | |
| "loss": 0.1569, | |
| "step": 12675 | |
| }, | |
| { | |
| "epoch": 2.644731361932528, | |
| "grad_norm": 1.490867018699646, | |
| "learning_rate": 2.122926446506332e-06, | |
| "loss": 0.0887, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.6499375260308202, | |
| "grad_norm": 6.9760003089904785, | |
| "learning_rate": 2.0622736701574136e-06, | |
| "loss": 0.0619, | |
| "step": 12725 | |
| }, | |
| { | |
| "epoch": 2.6551436901291128, | |
| "grad_norm": 4.531062602996826, | |
| "learning_rate": 2.0024626450377144e-06, | |
| "loss": 0.1153, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 2.6603498542274053, | |
| "grad_norm": 0.011586461216211319, | |
| "learning_rate": 1.9434955660467184e-06, | |
| "loss": 0.1071, | |
| "step": 12775 | |
| }, | |
| { | |
| "epoch": 2.665556018325698, | |
| "grad_norm": 0.5682029128074646, | |
| "learning_rate": 1.885374597113429e-06, | |
| "loss": 0.0935, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.67076218242399, | |
| "grad_norm": 0.14094886183738708, | |
| "learning_rate": 1.8281018711169522e-06, | |
| "loss": 0.0884, | |
| "step": 12825 | |
| }, | |
| { | |
| "epoch": 2.6759683465222825, | |
| "grad_norm": 0.025353549048304558, | |
| "learning_rate": 1.7716794898082034e-06, | |
| "loss": 0.1083, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 2.6811745106205747, | |
| "grad_norm": 10.48271656036377, | |
| "learning_rate": 1.716109523732809e-06, | |
| "loss": 0.1437, | |
| "step": 12875 | |
| }, | |
| { | |
| "epoch": 2.686380674718867, | |
| "grad_norm": 3.716670513153076, | |
| "learning_rate": 1.6613940121551014e-06, | |
| "loss": 0.177, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.6915868388171598, | |
| "grad_norm": 2.8707427978515625, | |
| "learning_rate": 1.6075349629832954e-06, | |
| "loss": 0.1107, | |
| "step": 12925 | |
| }, | |
| { | |
| "epoch": 2.696793002915452, | |
| "grad_norm": 3.083693742752075, | |
| "learning_rate": 1.554534352695808e-06, | |
| "loss": 0.1544, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 2.7019991670137444, | |
| "grad_norm": 0.012157919816672802, | |
| "learning_rate": 1.5023941262686997e-06, | |
| "loss": 0.1065, | |
| "step": 12975 | |
| }, | |
| { | |
| "epoch": 2.7072053311120365, | |
| "grad_norm": 6.145320415496826, | |
| "learning_rate": 1.4511161971043351e-06, | |
| "loss": 0.0982, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.712411495210329, | |
| "grad_norm": 0.1649736911058426, | |
| "learning_rate": 1.4007024469611441e-06, | |
| "loss": 0.1541, | |
| "step": 13025 | |
| }, | |
| { | |
| "epoch": 2.7176176593086216, | |
| "grad_norm": 0.16660048067569733, | |
| "learning_rate": 1.3511547258845763e-06, | |
| "loss": 0.0752, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 2.7228238234069138, | |
| "grad_norm": 5.491020679473877, | |
| "learning_rate": 1.3024748521392017e-06, | |
| "loss": 0.0893, | |
| "step": 13075 | |
| }, | |
| { | |
| "epoch": 2.7280299875052063, | |
| "grad_norm": 6.0252790451049805, | |
| "learning_rate": 1.2546646121419868e-06, | |
| "loss": 0.1347, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 2.7332361516034984, | |
| "grad_norm": 2.7115368843078613, | |
| "learning_rate": 1.2077257603967523e-06, | |
| "loss": 0.1261, | |
| "step": 13125 | |
| }, | |
| { | |
| "epoch": 2.738442315701791, | |
| "grad_norm": 14.104537963867188, | |
| "learning_rate": 1.1616600194297555e-06, | |
| "loss": 0.1585, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 2.7436484798000835, | |
| "grad_norm": 0.006517982110381126, | |
| "learning_rate": 1.1164690797265148e-06, | |
| "loss": 0.1143, | |
| "step": 13175 | |
| }, | |
| { | |
| "epoch": 2.7488546438983756, | |
| "grad_norm": 0.00706452364102006, | |
| "learning_rate": 1.0721545996697585e-06, | |
| "loss": 0.1482, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.754060807996668, | |
| "grad_norm": 0.16163934767246246, | |
| "learning_rate": 1.0287182054785544e-06, | |
| "loss": 0.1259, | |
| "step": 13225 | |
| }, | |
| { | |
| "epoch": 2.7592669720949603, | |
| "grad_norm": 0.0032609994523227215, | |
| "learning_rate": 9.861614911486462e-07, | |
| "loss": 0.1311, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 2.764473136193253, | |
| "grad_norm": 1.620920181274414, | |
| "learning_rate": 9.444860183939669e-07, | |
| "loss": 0.1162, | |
| "step": 13275 | |
| }, | |
| { | |
| "epoch": 2.7696793002915454, | |
| "grad_norm": 5.577692031860352, | |
| "learning_rate": 9.036933165893019e-07, | |
| "loss": 0.1143, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 2.7748854643898375, | |
| "grad_norm": 3.6756861209869385, | |
| "learning_rate": 8.637848827141859e-07, | |
| "loss": 0.1157, | |
| "step": 13325 | |
| }, | |
| { | |
| "epoch": 2.78009162848813, | |
| "grad_norm": 8.014864921569824, | |
| "learning_rate": 8.247621812979639e-07, | |
| "loss": 0.1125, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 2.785297792586422, | |
| "grad_norm": 0.027799520641565323, | |
| "learning_rate": 7.866266443660397e-07, | |
| "loss": 0.1147, | |
| "step": 13375 | |
| }, | |
| { | |
| "epoch": 2.7905039566847147, | |
| "grad_norm": 0.20379126071929932, | |
| "learning_rate": 7.493796713873346e-07, | |
| "loss": 0.0776, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 2.7957101207830073, | |
| "grad_norm": 5.621152400970459, | |
| "learning_rate": 7.130226292229314e-07, | |
| "loss": 0.1281, | |
| "step": 13425 | |
| }, | |
| { | |
| "epoch": 2.8009162848812994, | |
| "grad_norm": 9.663355827331543, | |
| "learning_rate": 6.775568520758863e-07, | |
| "loss": 0.1409, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 2.806122448979592, | |
| "grad_norm": 7.826466083526611, | |
| "learning_rate": 6.429836414423212e-07, | |
| "loss": 0.13, | |
| "step": 13475 | |
| }, | |
| { | |
| "epoch": 2.811328613077884, | |
| "grad_norm": 0.012044396251440048, | |
| "learning_rate": 6.093042660636095e-07, | |
| "loss": 0.2328, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.8165347771761766, | |
| "grad_norm": 2.4445865154266357, | |
| "learning_rate": 5.765199618798456e-07, | |
| "loss": 0.1251, | |
| "step": 13525 | |
| }, | |
| { | |
| "epoch": 2.821740941274469, | |
| "grad_norm": 4.6207451820373535, | |
| "learning_rate": 5.446319319844794e-07, | |
| "loss": 0.1069, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 2.8269471053727613, | |
| "grad_norm": 0.5208550691604614, | |
| "learning_rate": 5.13641346580171e-07, | |
| "loss": 0.0791, | |
| "step": 13575 | |
| }, | |
| { | |
| "epoch": 2.832153269471054, | |
| "grad_norm": 0.03781859204173088, | |
| "learning_rate": 4.835493429358462e-07, | |
| "loss": 0.1112, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 2.837359433569346, | |
| "grad_norm": 1.6827815771102905, | |
| "learning_rate": 4.5435702534495915e-07, | |
| "loss": 0.1123, | |
| "step": 13625 | |
| }, | |
| { | |
| "epoch": 2.8425655976676385, | |
| "grad_norm": 2.481886863708496, | |
| "learning_rate": 4.2606546508497103e-07, | |
| "loss": 0.1228, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 2.847771761765931, | |
| "grad_norm": 7.118264198303223, | |
| "learning_rate": 3.9867570037803725e-07, | |
| "loss": 0.1172, | |
| "step": 13675 | |
| }, | |
| { | |
| "epoch": 2.852977925864223, | |
| "grad_norm": 8.65257453918457, | |
| "learning_rate": 3.7218873635290195e-07, | |
| "loss": 0.111, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 2.8581840899625157, | |
| "grad_norm": 0.8659380674362183, | |
| "learning_rate": 3.466055450080191e-07, | |
| "loss": 0.0999, | |
| "step": 13725 | |
| }, | |
| { | |
| "epoch": 2.863390254060808, | |
| "grad_norm": 5.575057506561279, | |
| "learning_rate": 3.219270651758782e-07, | |
| "loss": 0.1201, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 2.8685964181591004, | |
| "grad_norm": 4.377348899841309, | |
| "learning_rate": 2.9815420248855977e-07, | |
| "loss": 0.1469, | |
| "step": 13775 | |
| }, | |
| { | |
| "epoch": 2.873802582257393, | |
| "grad_norm": 3.4709956645965576, | |
| "learning_rate": 2.7528782934449215e-07, | |
| "loss": 0.1214, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 2.879008746355685, | |
| "grad_norm": 0.008634321391582489, | |
| "learning_rate": 2.533287848764332e-07, | |
| "loss": 0.1104, | |
| "step": 13825 | |
| }, | |
| { | |
| "epoch": 2.8842149104539776, | |
| "grad_norm": 5.85574197769165, | |
| "learning_rate": 2.3227787492069718e-07, | |
| "loss": 0.1284, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 2.8894210745522697, | |
| "grad_norm": 2.434326648712158, | |
| "learning_rate": 2.1213587198755936e-07, | |
| "loss": 0.0968, | |
| "step": 13875 | |
| }, | |
| { | |
| "epoch": 2.8946272386505623, | |
| "grad_norm": 3.5275282859802246, | |
| "learning_rate": 1.929035152329145e-07, | |
| "loss": 0.1454, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 2.899833402748855, | |
| "grad_norm": 3.965625762939453, | |
| "learning_rate": 1.7458151043116544e-07, | |
| "loss": 0.1299, | |
| "step": 13925 | |
| }, | |
| { | |
| "epoch": 2.905039566847147, | |
| "grad_norm": 0.10151717066764832, | |
| "learning_rate": 1.5717052994929927e-07, | |
| "loss": 0.1459, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 2.9102457309454395, | |
| "grad_norm": 0.03408972918987274, | |
| "learning_rate": 1.4067121272223204e-07, | |
| "loss": 0.11, | |
| "step": 13975 | |
| }, | |
| { | |
| "epoch": 2.9154518950437316, | |
| "grad_norm": 0.0022567359264940023, | |
| "learning_rate": 1.2508416422935542e-07, | |
| "loss": 0.1083, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.920658059142024, | |
| "grad_norm": 7.95089054107666, | |
| "learning_rate": 1.1040995647231545e-07, | |
| "loss": 0.1088, | |
| "step": 14025 | |
| }, | |
| { | |
| "epoch": 2.9258642232403167, | |
| "grad_norm": 12.033370018005371, | |
| "learning_rate": 9.664912795402381e-08, | |
| "loss": 0.1305, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 2.931070387338609, | |
| "grad_norm": 0.22287705540657043, | |
| "learning_rate": 8.380218365889592e-08, | |
| "loss": 0.1694, | |
| "step": 14075 | |
| }, | |
| { | |
| "epoch": 2.9362765514369014, | |
| "grad_norm": 0.011814435012638569, | |
| "learning_rate": 7.186959503431845e-08, | |
| "loss": 0.083, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 2.9414827155351935, | |
| "grad_norm": 0.22026073932647705, | |
| "learning_rate": 6.08517999733521e-08, | |
| "loss": 0.0816, | |
| "step": 14125 | |
| }, | |
| { | |
| "epoch": 2.946688879633486, | |
| "grad_norm": 6.017068386077881, | |
| "learning_rate": 5.0749202798652806e-08, | |
| "loss": 0.1286, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 2.9518950437317786, | |
| "grad_norm": 9.714811325073242, | |
| "learning_rate": 4.156217424765296e-08, | |
| "loss": 0.1439, | |
| "step": 14175 | |
| }, | |
| { | |
| "epoch": 2.9571012078300707, | |
| "grad_norm": 0.37943577766418457, | |
| "learning_rate": 3.3291051458927966e-08, | |
| "loss": 0.0811, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 2.9623073719283632, | |
| "grad_norm": 0.007510739378631115, | |
| "learning_rate": 2.5936137959856054e-08, | |
| "loss": 0.1679, | |
| "step": 14225 | |
| }, | |
| { | |
| "epoch": 2.9675135360266554, | |
| "grad_norm": 4.679372310638428, | |
| "learning_rate": 1.9497703655455e-08, | |
| "loss": 0.0982, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 2.972719700124948, | |
| "grad_norm": 5.683784484863281, | |
| "learning_rate": 1.39759848184845e-08, | |
| "loss": 0.1231, | |
| "step": 14275 | |
| }, | |
| { | |
| "epoch": 2.9779258642232405, | |
| "grad_norm": 13.094088554382324, | |
| "learning_rate": 9.37118408078641e-09, | |
| "loss": 0.118, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 2.9831320283215326, | |
| "grad_norm": 7.190179347991943, | |
| "learning_rate": 5.683470425832394e-09, | |
| "loss": 0.1788, | |
| "step": 14325 | |
| }, | |
| { | |
| "epoch": 2.988338192419825, | |
| "grad_norm": 8.636795997619629, | |
| "learning_rate": 2.9129791825344145e-09, | |
| "loss": 0.1482, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 2.9935443565181172, | |
| "grad_norm": 5.166715145111084, | |
| "learning_rate": 1.0598120202681695e-09, | |
| "loss": 0.106, | |
| "step": 14375 | |
| }, | |
| { | |
| "epoch": 2.99875052061641, | |
| "grad_norm": 6.336511611938477, | |
| "learning_rate": 1.2403694515661368e-10, | |
| "loss": 0.1234, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9105245346869713, | |
| "eval_f1_macro": 0.8981344155061894, | |
| "eval_f1_micro": 0.9105245346869713, | |
| "eval_f1_weighted": 0.9032382559745838, | |
| "eval_loss": 0.2098342627286911, | |
| "eval_precision_macro": 0.9192533404916904, | |
| "eval_precision_micro": 0.9105245346869713, | |
| "eval_precision_weighted": 0.920957800948379, | |
| "eval_recall_macro": 0.9038812615955473, | |
| "eval_recall_micro": 0.9105245346869713, | |
| "eval_recall_weighted": 0.9105245346869713, | |
| "eval_runtime": 16.1158, | |
| "eval_samples_per_second": 916.801, | |
| "eval_steps_per_second": 57.335, | |
| "step": 14406 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 14406, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3853101049724160.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |