{ "best_metric": 0.15377455949783325, "best_model_checkpoint": "date2format/checkpoint-11082", "epoch": 3.0, "eval_steps": 500, "global_step": 11082, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00676773145641581, "grad_norm": Infinity, "learning_rate": 1.0820559062218215e-06, "loss": 6.7854, "step": 25 }, { "epoch": 0.01353546291283162, "grad_norm": 18.809656143188477, "learning_rate": 2.2091974752028858e-06, "loss": 6.8207, "step": 50 }, { "epoch": 0.020303194369247428, "grad_norm": 18.936891555786133, "learning_rate": 3.3363390441839496e-06, "loss": 6.712, "step": 75 }, { "epoch": 0.02707092582566324, "grad_norm": 16.995830535888672, "learning_rate": 4.4634806131650134e-06, "loss": 6.6652, "step": 100 }, { "epoch": 0.03383865728207905, "grad_norm": 16.105716705322266, "learning_rate": 5.590622182146077e-06, "loss": 6.5538, "step": 125 }, { "epoch": 0.040606388738494856, "grad_norm": 16.41363525390625, "learning_rate": 6.717763751127142e-06, "loss": 6.5388, "step": 150 }, { "epoch": 0.04737412019491066, "grad_norm": 16.850101470947266, "learning_rate": 7.844905320108207e-06, "loss": 6.4123, "step": 175 }, { "epoch": 0.05414185165132648, "grad_norm": 15.117751121520996, "learning_rate": 8.972046889089269e-06, "loss": 6.3959, "step": 200 }, { "epoch": 0.060909583107742284, "grad_norm": 14.722877502441406, "learning_rate": 1.0099188458070334e-05, "loss": 6.2513, "step": 225 }, { "epoch": 0.0676773145641581, "grad_norm": 13.270645141601562, "learning_rate": 1.1226330027051398e-05, "loss": 6.2321, "step": 250 }, { "epoch": 0.0744450460205739, "grad_norm": 12.977484703063965, "learning_rate": 1.2353471596032462e-05, "loss": 6.129, "step": 275 }, { "epoch": 0.08121277747698971, "grad_norm": 12.7619047164917, "learning_rate": 1.3480613165013526e-05, "loss": 5.9896, "step": 300 }, { "epoch": 0.08798050893340552, "grad_norm": 12.988408088684082, "learning_rate": 1.4607754733994591e-05, "loss": 5.9501, "step": 325 }, { "epoch": 0.09474824038982133, "grad_norm": 13.49023723602295, "learning_rate": 1.5734896302975655e-05, "loss": 5.8552, "step": 350 }, { "epoch": 0.10151597184623715, "grad_norm": 13.501762390136719, "learning_rate": 1.686203787195672e-05, "loss": 5.7112, "step": 375 }, { "epoch": 0.10828370330265295, "grad_norm": 13.560358047485352, "learning_rate": 1.7989179440937783e-05, "loss": 5.5904, "step": 400 }, { "epoch": 0.11505143475906876, "grad_norm": 13.383050918579102, "learning_rate": 1.9116321009918847e-05, "loss": 5.5183, "step": 425 }, { "epoch": 0.12181916621548457, "grad_norm": 13.750153541564941, "learning_rate": 2.024346257889991e-05, "loss": 5.4114, "step": 450 }, { "epoch": 0.12858689767190037, "grad_norm": 14.731257438659668, "learning_rate": 2.1370604147880974e-05, "loss": 5.1422, "step": 475 }, { "epoch": 0.1353546291283162, "grad_norm": 13.863329887390137, "learning_rate": 2.2497745716862038e-05, "loss": 5.2576, "step": 500 }, { "epoch": 0.142122360584732, "grad_norm": 14.840801239013672, "learning_rate": 2.3624887285843102e-05, "loss": 4.9023, "step": 525 }, { "epoch": 0.1488900920411478, "grad_norm": 13.968029975891113, "learning_rate": 2.4752028854824166e-05, "loss": 4.8976, "step": 550 }, { "epoch": 0.15565782349756363, "grad_norm": 15.291852951049805, "learning_rate": 2.5879170423805233e-05, "loss": 4.7663, "step": 575 }, { "epoch": 0.16242555495397942, "grad_norm": 14.247614860534668, "learning_rate": 2.7006311992786293e-05, "loss": 4.7057, "step": 600 }, { "epoch": 0.16919328641039524, "grad_norm": 15.434479713439941, "learning_rate": 2.8133453561767357e-05, "loss": 4.5958, "step": 625 }, { "epoch": 0.17596101786681104, "grad_norm": 15.81191635131836, "learning_rate": 2.9260595130748425e-05, "loss": 4.4317, "step": 650 }, { "epoch": 0.18272874932322686, "grad_norm": 15.542362213134766, "learning_rate": 3.0387736699729485e-05, "loss": 4.3024, "step": 675 }, { "epoch": 0.18949648077964265, "grad_norm": 15.908782005310059, "learning_rate": 3.151487826871055e-05, "loss": 4.2023, "step": 700 }, { "epoch": 0.19626421223605847, "grad_norm": 17.07871437072754, "learning_rate": 3.264201983769162e-05, "loss": 4.1671, "step": 725 }, { "epoch": 0.2030319436924743, "grad_norm": 14.337630271911621, "learning_rate": 3.3769161406672676e-05, "loss": 3.8202, "step": 750 }, { "epoch": 0.20979967514889009, "grad_norm": 15.395954132080078, "learning_rate": 3.489630297565375e-05, "loss": 3.7002, "step": 775 }, { "epoch": 0.2165674066053059, "grad_norm": 17.821945190429688, "learning_rate": 3.602344454463481e-05, "loss": 3.7328, "step": 800 }, { "epoch": 0.2233351380617217, "grad_norm": 15.371402740478516, "learning_rate": 3.715058611361587e-05, "loss": 3.4769, "step": 825 }, { "epoch": 0.23010286951813752, "grad_norm": 18.089616775512695, "learning_rate": 3.827772768259694e-05, "loss": 3.5862, "step": 850 }, { "epoch": 0.23687060097455334, "grad_norm": 16.150415420532227, "learning_rate": 3.9404869251578e-05, "loss": 3.2755, "step": 875 }, { "epoch": 0.24363833243096913, "grad_norm": 16.139698028564453, "learning_rate": 4.0532010820559066e-05, "loss": 3.1737, "step": 900 }, { "epoch": 0.25040606388738496, "grad_norm": 16.604488372802734, "learning_rate": 4.165915238954013e-05, "loss": 3.1445, "step": 925 }, { "epoch": 0.25717379534380075, "grad_norm": 17.532072067260742, "learning_rate": 4.278629395852119e-05, "loss": 3.1923, "step": 950 }, { "epoch": 0.26394152680021654, "grad_norm": 17.041091918945312, "learning_rate": 4.391343552750226e-05, "loss": 2.8598, "step": 975 }, { "epoch": 0.2707092582566324, "grad_norm": 17.634092330932617, "learning_rate": 4.504057709648332e-05, "loss": 2.653, "step": 1000 }, { "epoch": 0.2774769897130482, "grad_norm": 14.8062105178833, "learning_rate": 4.6167718665464385e-05, "loss": 2.6197, "step": 1025 }, { "epoch": 0.284244721169464, "grad_norm": 14.701135635375977, "learning_rate": 4.729486023444545e-05, "loss": 2.4039, "step": 1050 }, { "epoch": 0.2910124526258798, "grad_norm": 14.139700889587402, "learning_rate": 4.842200180342651e-05, "loss": 2.3764, "step": 1075 }, { "epoch": 0.2977801840822956, "grad_norm": 15.656773567199707, "learning_rate": 4.954914337240758e-05, "loss": 2.3153, "step": 1100 }, { "epoch": 0.3045479155387114, "grad_norm": 15.101000785827637, "learning_rate": 4.992479695176978e-05, "loss": 2.0685, "step": 1125 }, { "epoch": 0.31131564699512726, "grad_norm": 15.826728820800781, "learning_rate": 4.9799458538052745e-05, "loss": 2.1117, "step": 1150 }, { "epoch": 0.31808337845154305, "grad_norm": 16.1218204498291, "learning_rate": 4.967412012433571e-05, "loss": 2.0013, "step": 1175 }, { "epoch": 0.32485110990795885, "grad_norm": 12.746655464172363, "learning_rate": 4.9548781710618674e-05, "loss": 1.8766, "step": 1200 }, { "epoch": 0.33161884136437464, "grad_norm": 11.957603454589844, "learning_rate": 4.942344329690164e-05, "loss": 1.7649, "step": 1225 }, { "epoch": 0.3383865728207905, "grad_norm": 14.868532180786133, "learning_rate": 4.92981048831846e-05, "loss": 1.6963, "step": 1250 }, { "epoch": 0.3451543042772063, "grad_norm": 12.839776992797852, "learning_rate": 4.917276646946756e-05, "loss": 1.4838, "step": 1275 }, { "epoch": 0.3519220357336221, "grad_norm": 13.293111801147461, "learning_rate": 4.904742805575053e-05, "loss": 1.393, "step": 1300 }, { "epoch": 0.3586897671900379, "grad_norm": 16.82228660583496, "learning_rate": 4.892208964203349e-05, "loss": 1.463, "step": 1325 }, { "epoch": 0.3654574986464537, "grad_norm": 13.664780616760254, "learning_rate": 4.8796751228316456e-05, "loss": 1.3157, "step": 1350 }, { "epoch": 0.3722252301028695, "grad_norm": 12.570382118225098, "learning_rate": 4.867141281459942e-05, "loss": 1.3402, "step": 1375 }, { "epoch": 0.3789929615592853, "grad_norm": 16.875078201293945, "learning_rate": 4.8546074400882386e-05, "loss": 1.3422, "step": 1400 }, { "epoch": 0.38576069301570115, "grad_norm": 12.442831993103027, "learning_rate": 4.842073598716535e-05, "loss": 1.1951, "step": 1425 }, { "epoch": 0.39252842447211694, "grad_norm": 13.85045051574707, "learning_rate": 4.8295397573448316e-05, "loss": 1.1216, "step": 1450 }, { "epoch": 0.39929615592853274, "grad_norm": 12.063750267028809, "learning_rate": 4.817005915973128e-05, "loss": 0.9869, "step": 1475 }, { "epoch": 0.4060638873849486, "grad_norm": 14.56391716003418, "learning_rate": 4.8044720746014245e-05, "loss": 1.0033, "step": 1500 }, { "epoch": 0.4128316188413644, "grad_norm": 12.925354957580566, "learning_rate": 4.79193823322972e-05, "loss": 0.8906, "step": 1525 }, { "epoch": 0.41959935029778017, "grad_norm": 19.873634338378906, "learning_rate": 4.779404391858017e-05, "loss": 0.9104, "step": 1550 }, { "epoch": 0.426367081754196, "grad_norm": 14.321996688842773, "learning_rate": 4.766870550486313e-05, "loss": 1.0049, "step": 1575 }, { "epoch": 0.4331348132106118, "grad_norm": 8.742544174194336, "learning_rate": 4.75433670911461e-05, "loss": 0.9338, "step": 1600 }, { "epoch": 0.4399025446670276, "grad_norm": 15.12094497680664, "learning_rate": 4.741802867742906e-05, "loss": 0.7763, "step": 1625 }, { "epoch": 0.4466702761234434, "grad_norm": 9.084162712097168, "learning_rate": 4.729269026371203e-05, "loss": 0.7404, "step": 1650 }, { "epoch": 0.45343800757985925, "grad_norm": 8.377363204956055, "learning_rate": 4.716735184999499e-05, "loss": 0.7405, "step": 1675 }, { "epoch": 0.46020573903627504, "grad_norm": 6.151582717895508, "learning_rate": 4.704201343627796e-05, "loss": 0.7003, "step": 1700 }, { "epoch": 0.46697347049269083, "grad_norm": 6.058241367340088, "learning_rate": 4.691667502256092e-05, "loss": 0.7211, "step": 1725 }, { "epoch": 0.4737412019491067, "grad_norm": 11.786967277526855, "learning_rate": 4.6791336608843886e-05, "loss": 0.6573, "step": 1750 }, { "epoch": 0.4805089334055225, "grad_norm": 7.774144649505615, "learning_rate": 4.6665998195126844e-05, "loss": 0.6648, "step": 1775 }, { "epoch": 0.48727666486193827, "grad_norm": 13.611273765563965, "learning_rate": 4.654065978140981e-05, "loss": 0.6778, "step": 1800 }, { "epoch": 0.49404439631835406, "grad_norm": 17.016263961791992, "learning_rate": 4.6415321367692774e-05, "loss": 0.6154, "step": 1825 }, { "epoch": 0.5008121277747699, "grad_norm": 13.737407684326172, "learning_rate": 4.628998295397574e-05, "loss": 0.6161, "step": 1850 }, { "epoch": 0.5075798592311858, "grad_norm": 10.071102142333984, "learning_rate": 4.61646445402587e-05, "loss": 0.596, "step": 1875 }, { "epoch": 0.5143475906876015, "grad_norm": 8.169589042663574, "learning_rate": 4.603930612654166e-05, "loss": 0.5347, "step": 1900 }, { "epoch": 0.5211153221440173, "grad_norm": 13.066163063049316, "learning_rate": 4.5913967712824627e-05, "loss": 0.544, "step": 1925 }, { "epoch": 0.5278830536004331, "grad_norm": 3.5582985877990723, "learning_rate": 4.578862929910759e-05, "loss": 0.5303, "step": 1950 }, { "epoch": 0.5346507850568489, "grad_norm": 7.025475978851318, "learning_rate": 4.5663290885390556e-05, "loss": 0.4748, "step": 1975 }, { "epoch": 0.5414185165132648, "grad_norm": 8.666425704956055, "learning_rate": 4.553795247167352e-05, "loss": 0.5852, "step": 2000 }, { "epoch": 0.5481862479696805, "grad_norm": 16.04596710205078, "learning_rate": 4.5412614057956486e-05, "loss": 0.4351, "step": 2025 }, { "epoch": 0.5549539794260964, "grad_norm": 15.767374038696289, "learning_rate": 4.529228918078813e-05, "loss": 0.6447, "step": 2050 }, { "epoch": 0.5617217108825122, "grad_norm": 8.484817504882812, "learning_rate": 4.516695076707109e-05, "loss": 0.4809, "step": 2075 }, { "epoch": 0.568489442338928, "grad_norm": 16.595365524291992, "learning_rate": 4.504161235335406e-05, "loss": 0.4824, "step": 2100 }, { "epoch": 0.5752571737953438, "grad_norm": 16.1405029296875, "learning_rate": 4.491627393963702e-05, "loss": 0.4274, "step": 2125 }, { "epoch": 0.5820249052517596, "grad_norm": 12.056056022644043, "learning_rate": 4.4790935525919986e-05, "loss": 0.4834, "step": 2150 }, { "epoch": 0.5887926367081754, "grad_norm": 4.0205841064453125, "learning_rate": 4.466559711220295e-05, "loss": 0.5808, "step": 2175 }, { "epoch": 0.5955603681645912, "grad_norm": 16.41112518310547, "learning_rate": 4.4540258698485916e-05, "loss": 0.5036, "step": 2200 }, { "epoch": 0.6023280996210071, "grad_norm": 10.883577346801758, "learning_rate": 4.441492028476888e-05, "loss": 0.4325, "step": 2225 }, { "epoch": 0.6090958310774228, "grad_norm": 15.038456916809082, "learning_rate": 4.4289581871051845e-05, "loss": 0.4418, "step": 2250 }, { "epoch": 0.6158635625338387, "grad_norm": 3.341290235519409, "learning_rate": 4.416424345733481e-05, "loss": 0.4403, "step": 2275 }, { "epoch": 0.6226312939902545, "grad_norm": 8.565878868103027, "learning_rate": 4.403890504361777e-05, "loss": 0.376, "step": 2300 }, { "epoch": 0.6293990254466703, "grad_norm": 8.767007827758789, "learning_rate": 4.391356662990073e-05, "loss": 0.3205, "step": 2325 }, { "epoch": 0.6361667569030861, "grad_norm": 8.68835163116455, "learning_rate": 4.37882282161837e-05, "loss": 0.338, "step": 2350 }, { "epoch": 0.6429344883595018, "grad_norm": 9.748613357543945, "learning_rate": 4.366288980246666e-05, "loss": 0.4474, "step": 2375 }, { "epoch": 0.6497022198159177, "grad_norm": 7.754514217376709, "learning_rate": 4.353755138874963e-05, "loss": 0.4478, "step": 2400 }, { "epoch": 0.6564699512723335, "grad_norm": 11.137701034545898, "learning_rate": 4.341221297503259e-05, "loss": 0.3163, "step": 2425 }, { "epoch": 0.6632376827287493, "grad_norm": 9.576991081237793, "learning_rate": 4.328687456131556e-05, "loss": 0.3521, "step": 2450 }, { "epoch": 0.6700054141851651, "grad_norm": 11.974344253540039, "learning_rate": 4.316153614759852e-05, "loss": 0.4591, "step": 2475 }, { "epoch": 0.676773145641581, "grad_norm": 15.265382766723633, "learning_rate": 4.303619773388149e-05, "loss": 0.3983, "step": 2500 }, { "epoch": 0.6835408770979967, "grad_norm": 12.016144752502441, "learning_rate": 4.291085932016445e-05, "loss": 0.5159, "step": 2525 }, { "epoch": 0.6903086085544126, "grad_norm": 12.998587608337402, "learning_rate": 4.278552090644741e-05, "loss": 0.4533, "step": 2550 }, { "epoch": 0.6970763400108284, "grad_norm": 21.76568031311035, "learning_rate": 4.2660182492730374e-05, "loss": 0.3456, "step": 2575 }, { "epoch": 0.7038440714672441, "grad_norm": 3.395463466644287, "learning_rate": 4.2539857615562016e-05, "loss": 0.3654, "step": 2600 }, { "epoch": 0.71061180292366, "grad_norm": 6.759268283843994, "learning_rate": 4.241451920184498e-05, "loss": 0.316, "step": 2625 }, { "epoch": 0.7173795343800758, "grad_norm": 19.425579071044922, "learning_rate": 4.2289180788127945e-05, "loss": 0.3431, "step": 2650 }, { "epoch": 0.7241472658364916, "grad_norm": 12.407275199890137, "learning_rate": 4.216384237441091e-05, "loss": 0.3912, "step": 2675 }, { "epoch": 0.7309149972929074, "grad_norm": 10.216941833496094, "learning_rate": 4.2038503960693875e-05, "loss": 0.2893, "step": 2700 }, { "epoch": 0.7376827287493233, "grad_norm": 8.958337783813477, "learning_rate": 4.191316554697684e-05, "loss": 0.4489, "step": 2725 }, { "epoch": 0.744450460205739, "grad_norm": 10.17128849029541, "learning_rate": 4.1787827133259804e-05, "loss": 0.2845, "step": 2750 }, { "epoch": 0.7512181916621549, "grad_norm": 6.733510494232178, "learning_rate": 4.166750225609145e-05, "loss": 0.3935, "step": 2775 }, { "epoch": 0.7579859231185706, "grad_norm": 9.230829238891602, "learning_rate": 4.154216384237442e-05, "loss": 0.3809, "step": 2800 }, { "epoch": 0.7647536545749865, "grad_norm": 2.8910205364227295, "learning_rate": 4.1416825428657375e-05, "loss": 0.2973, "step": 2825 }, { "epoch": 0.7715213860314023, "grad_norm": 3.628933906555176, "learning_rate": 4.129148701494034e-05, "loss": 0.2805, "step": 2850 }, { "epoch": 0.778289117487818, "grad_norm": 7.368860721588135, "learning_rate": 4.1166148601223305e-05, "loss": 0.3739, "step": 2875 }, { "epoch": 0.7850568489442339, "grad_norm": 8.461480140686035, "learning_rate": 4.104081018750627e-05, "loss": 0.334, "step": 2900 }, { "epoch": 0.7918245804006497, "grad_norm": 10.173233985900879, "learning_rate": 4.0915471773789235e-05, "loss": 0.427, "step": 2925 }, { "epoch": 0.7985923118570655, "grad_norm": 4.683242321014404, "learning_rate": 4.07901333600722e-05, "loss": 0.2991, "step": 2950 }, { "epoch": 0.8053600433134813, "grad_norm": 10.472857475280762, "learning_rate": 4.0664794946355164e-05, "loss": 0.3194, "step": 2975 }, { "epoch": 0.8121277747698972, "grad_norm": 5.410557746887207, "learning_rate": 4.053945653263813e-05, "loss": 0.3114, "step": 3000 }, { "epoch": 0.8188955062263129, "grad_norm": 12.810556411743164, "learning_rate": 4.041411811892109e-05, "loss": 0.2572, "step": 3025 }, { "epoch": 0.8256632376827288, "grad_norm": 4.909450054168701, "learning_rate": 4.028877970520405e-05, "loss": 0.2069, "step": 3050 }, { "epoch": 0.8324309691391446, "grad_norm": 4.909849643707275, "learning_rate": 4.016344129148702e-05, "loss": 0.3478, "step": 3075 }, { "epoch": 0.8391987005955603, "grad_norm": 13.538515090942383, "learning_rate": 4.003810287776998e-05, "loss": 0.3086, "step": 3100 }, { "epoch": 0.8459664320519762, "grad_norm": 10.3212251663208, "learning_rate": 3.9912764464052946e-05, "loss": 0.2613, "step": 3125 }, { "epoch": 0.852734163508392, "grad_norm": 7.68850040435791, "learning_rate": 3.9787426050335904e-05, "loss": 0.3387, "step": 3150 }, { "epoch": 0.8595018949648078, "grad_norm": 7.078841209411621, "learning_rate": 3.966208763661887e-05, "loss": 0.357, "step": 3175 }, { "epoch": 0.8662696264212236, "grad_norm": 4.790768146514893, "learning_rate": 3.9536749222901834e-05, "loss": 0.3459, "step": 3200 }, { "epoch": 0.8730373578776394, "grad_norm": 4.735093593597412, "learning_rate": 3.94114108091848e-05, "loss": 0.2948, "step": 3225 }, { "epoch": 0.8798050893340552, "grad_norm": 1.3540657758712769, "learning_rate": 3.9286072395467764e-05, "loss": 0.1944, "step": 3250 }, { "epoch": 0.8865728207904711, "grad_norm": 9.657829284667969, "learning_rate": 3.916073398175073e-05, "loss": 0.2671, "step": 3275 }, { "epoch": 0.8933405522468868, "grad_norm": 8.425637245178223, "learning_rate": 3.903539556803369e-05, "loss": 0.2251, "step": 3300 }, { "epoch": 0.9001082837033026, "grad_norm": 7.622613906860352, "learning_rate": 3.891005715431666e-05, "loss": 0.3632, "step": 3325 }, { "epoch": 0.9068760151597185, "grad_norm": 12.632335662841797, "learning_rate": 3.8784718740599616e-05, "loss": 0.207, "step": 3350 }, { "epoch": 0.9136437466161342, "grad_norm": 11.750454902648926, "learning_rate": 3.865938032688258e-05, "loss": 0.2652, "step": 3375 }, { "epoch": 0.9204114780725501, "grad_norm": 6.89017915725708, "learning_rate": 3.8534041913165546e-05, "loss": 0.2457, "step": 3400 }, { "epoch": 0.9271792095289659, "grad_norm": 4.333946704864502, "learning_rate": 3.840870349944851e-05, "loss": 0.3324, "step": 3425 }, { "epoch": 0.9339469409853817, "grad_norm": 1.0153127908706665, "learning_rate": 3.8283365085731475e-05, "loss": 0.1966, "step": 3450 }, { "epoch": 0.9407146724417975, "grad_norm": 1.8941410779953003, "learning_rate": 3.815802667201444e-05, "loss": 0.3098, "step": 3475 }, { "epoch": 0.9474824038982134, "grad_norm": 1.6257559061050415, "learning_rate": 3.8032688258297405e-05, "loss": 0.1872, "step": 3500 }, { "epoch": 0.9542501353546291, "grad_norm": 2.3211212158203125, "learning_rate": 3.790734984458037e-05, "loss": 0.2334, "step": 3525 }, { "epoch": 0.961017866811045, "grad_norm": 10.049856185913086, "learning_rate": 3.7782011430863334e-05, "loss": 0.3128, "step": 3550 }, { "epoch": 0.9677855982674608, "grad_norm": 10.843172073364258, "learning_rate": 3.76566730171463e-05, "loss": 0.3414, "step": 3575 }, { "epoch": 0.9745533297238765, "grad_norm": 0.46516045928001404, "learning_rate": 3.7531334603429264e-05, "loss": 0.2379, "step": 3600 }, { "epoch": 0.9813210611802924, "grad_norm": 15.376679420471191, "learning_rate": 3.740599618971222e-05, "loss": 0.2887, "step": 3625 }, { "epoch": 0.9880887926367081, "grad_norm": 2.3309133052825928, "learning_rate": 3.728065777599519e-05, "loss": 0.2105, "step": 3650 }, { "epoch": 0.994856524093124, "grad_norm": 7.93802547454834, "learning_rate": 3.715531936227815e-05, "loss": 0.3599, "step": 3675 }, { "epoch": 1.0, "eval_accuracy": 0.9213923132704859, "eval_f1_macro": 0.9069570851888077, "eval_f1_micro": 0.9213923132704859, "eval_f1_weighted": 0.911460261524371, "eval_loss": 0.2470918595790863, "eval_precision_macro": 0.9177198642319323, "eval_precision_micro": 0.9213923132704859, "eval_precision_weighted": 0.9195993135359931, "eval_recall_macro": 0.9154624966869864, "eval_recall_micro": 0.9213923132704859, "eval_recall_weighted": 0.9213923132704859, "eval_runtime": 21.8346, "eval_samples_per_second": 947.351, "eval_steps_per_second": 59.218, "step": 3694 }, { "epoch": 1.0016242555495398, "grad_norm": 11.75763988494873, "learning_rate": 3.7029980948561116e-05, "loss": 0.2012, "step": 3700 }, { "epoch": 1.0083919870059557, "grad_norm": 13.783013343811035, "learning_rate": 3.690464253484408e-05, "loss": 0.4173, "step": 3725 }, { "epoch": 1.0151597184623715, "grad_norm": 2.9924991130828857, "learning_rate": 3.6779304121127046e-05, "loss": 0.2121, "step": 3750 }, { "epoch": 1.0219274499187871, "grad_norm": 0.5149463415145874, "learning_rate": 3.665396570741001e-05, "loss": 0.2768, "step": 3775 }, { "epoch": 1.028695181375203, "grad_norm": 14.207648277282715, "learning_rate": 3.6528627293692976e-05, "loss": 0.2858, "step": 3800 }, { "epoch": 1.0354629128316188, "grad_norm": 0.8809079527854919, "learning_rate": 3.640328887997594e-05, "loss": 0.1731, "step": 3825 }, { "epoch": 1.0422306442880347, "grad_norm": 4.510576248168945, "learning_rate": 3.6277950466258905e-05, "loss": 0.2966, "step": 3850 }, { "epoch": 1.0489983757444505, "grad_norm": 17.010372161865234, "learning_rate": 3.615261205254186e-05, "loss": 0.2354, "step": 3875 }, { "epoch": 1.0557661072008662, "grad_norm": 2.4811925888061523, "learning_rate": 3.602727363882483e-05, "loss": 0.26, "step": 3900 }, { "epoch": 1.062533838657282, "grad_norm": 0.9241037368774414, "learning_rate": 3.590193522510779e-05, "loss": 0.1716, "step": 3925 }, { "epoch": 1.0693015701136979, "grad_norm": 11.593517303466797, "learning_rate": 3.577659681139076e-05, "loss": 0.255, "step": 3950 }, { "epoch": 1.0760693015701137, "grad_norm": 8.104696273803711, "learning_rate": 3.565125839767372e-05, "loss": 0.2273, "step": 3975 }, { "epoch": 1.0828370330265296, "grad_norm": 12.741314888000488, "learning_rate": 3.552591998395669e-05, "loss": 0.2807, "step": 4000 }, { "epoch": 1.0896047644829454, "grad_norm": 0.22231225669384003, "learning_rate": 3.540058157023965e-05, "loss": 0.2141, "step": 4025 }, { "epoch": 1.096372495939361, "grad_norm": 12.738525390625, "learning_rate": 3.527524315652262e-05, "loss": 0.2796, "step": 4050 }, { "epoch": 1.1031402273957769, "grad_norm": 9.309906005859375, "learning_rate": 3.514990474280558e-05, "loss": 0.2185, "step": 4075 }, { "epoch": 1.1099079588521927, "grad_norm": 11.775688171386719, "learning_rate": 3.502456632908854e-05, "loss": 0.3496, "step": 4100 }, { "epoch": 1.1166756903086086, "grad_norm": 6.333633899688721, "learning_rate": 3.4899227915371505e-05, "loss": 0.2659, "step": 4125 }, { "epoch": 1.1234434217650244, "grad_norm": 0.39873039722442627, "learning_rate": 3.477388950165447e-05, "loss": 0.2551, "step": 4150 }, { "epoch": 1.13021115322144, "grad_norm": 0.5979344844818115, "learning_rate": 3.4648551087937434e-05, "loss": 0.2102, "step": 4175 }, { "epoch": 1.136978884677856, "grad_norm": 12.985968589782715, "learning_rate": 3.452321267422039e-05, "loss": 0.2303, "step": 4200 }, { "epoch": 1.1437466161342718, "grad_norm": 2.175553560256958, "learning_rate": 3.439787426050336e-05, "loss": 0.2526, "step": 4225 }, { "epoch": 1.1505143475906876, "grad_norm": 0.49194416403770447, "learning_rate": 3.427253584678632e-05, "loss": 0.2483, "step": 4250 }, { "epoch": 1.1572820790471035, "grad_norm": 3.2816367149353027, "learning_rate": 3.4147197433069287e-05, "loss": 0.2854, "step": 4275 }, { "epoch": 1.1640498105035193, "grad_norm": 7.387673377990723, "learning_rate": 3.402185901935225e-05, "loss": 0.2106, "step": 4300 }, { "epoch": 1.1708175419599351, "grad_norm": 7.8965654373168945, "learning_rate": 3.3896520605635216e-05, "loss": 0.2578, "step": 4325 }, { "epoch": 1.1775852734163508, "grad_norm": 1.6988545656204224, "learning_rate": 3.377118219191818e-05, "loss": 0.1903, "step": 4350 }, { "epoch": 1.1843530048727666, "grad_norm": 6.279006481170654, "learning_rate": 3.3645843778201146e-05, "loss": 0.3026, "step": 4375 }, { "epoch": 1.1911207363291825, "grad_norm": 0.32076123356819153, "learning_rate": 3.3520505364484104e-05, "loss": 0.2804, "step": 4400 }, { "epoch": 1.1978884677855983, "grad_norm": 11.526758193969727, "learning_rate": 3.339516695076707e-05, "loss": 0.3756, "step": 4425 }, { "epoch": 1.2046561992420142, "grad_norm": 11.514225959777832, "learning_rate": 3.3269828537050033e-05, "loss": 0.2868, "step": 4450 }, { "epoch": 1.2114239306984298, "grad_norm": 10.091246604919434, "learning_rate": 3.3144490123333e-05, "loss": 0.225, "step": 4475 }, { "epoch": 1.2181916621548456, "grad_norm": 1.9780317544937134, "learning_rate": 3.301915170961596e-05, "loss": 0.1927, "step": 4500 }, { "epoch": 1.2249593936112615, "grad_norm": 14.720560073852539, "learning_rate": 3.289381329589893e-05, "loss": 0.3458, "step": 4525 }, { "epoch": 1.2317271250676773, "grad_norm": 10.85938835144043, "learning_rate": 3.276847488218189e-05, "loss": 0.1138, "step": 4550 }, { "epoch": 1.2384948565240932, "grad_norm": 3.7215845584869385, "learning_rate": 3.264313646846486e-05, "loss": 0.179, "step": 4575 }, { "epoch": 1.245262587980509, "grad_norm": 12.215106010437012, "learning_rate": 3.251779805474782e-05, "loss": 0.3369, "step": 4600 }, { "epoch": 1.2520303194369247, "grad_norm": 13.148759841918945, "learning_rate": 3.239245964103079e-05, "loss": 0.3266, "step": 4625 }, { "epoch": 1.2587980508933405, "grad_norm": 14.143242835998535, "learning_rate": 3.226712122731375e-05, "loss": 0.3503, "step": 4650 }, { "epoch": 1.2655657823497564, "grad_norm": 1.314339280128479, "learning_rate": 3.214178281359671e-05, "loss": 0.1588, "step": 4675 }, { "epoch": 1.2723335138061722, "grad_norm": 13.175312042236328, "learning_rate": 3.2016444399879675e-05, "loss": 0.1884, "step": 4700 }, { "epoch": 1.279101245262588, "grad_norm": 11.514117240905762, "learning_rate": 3.189110598616264e-05, "loss": 0.2922, "step": 4725 }, { "epoch": 1.2858689767190037, "grad_norm": 2.735069990158081, "learning_rate": 3.1765767572445604e-05, "loss": 0.2909, "step": 4750 }, { "epoch": 1.2926367081754195, "grad_norm": 7.173842430114746, "learning_rate": 3.164042915872857e-05, "loss": 0.1868, "step": 4775 }, { "epoch": 1.2994044396318354, "grad_norm": 16.41992950439453, "learning_rate": 3.1515090745011534e-05, "loss": 0.1977, "step": 4800 }, { "epoch": 1.3061721710882512, "grad_norm": 0.7331606149673462, "learning_rate": 3.13897523312945e-05, "loss": 0.3978, "step": 4825 }, { "epoch": 1.312939902544667, "grad_norm": 13.302403450012207, "learning_rate": 3.1264413917577463e-05, "loss": 0.2199, "step": 4850 }, { "epoch": 1.319707634001083, "grad_norm": 6.277172565460205, "learning_rate": 3.113907550386043e-05, "loss": 0.2211, "step": 4875 }, { "epoch": 1.3264753654574988, "grad_norm": 12.060029029846191, "learning_rate": 3.101373709014339e-05, "loss": 0.2111, "step": 4900 }, { "epoch": 1.3332430969139144, "grad_norm": 12.81723403930664, "learning_rate": 3.088839867642635e-05, "loss": 0.2522, "step": 4925 }, { "epoch": 1.3400108283703303, "grad_norm": 0.56070476770401, "learning_rate": 3.0763060262709316e-05, "loss": 0.1966, "step": 4950 }, { "epoch": 1.346778559826746, "grad_norm": 5.43617582321167, "learning_rate": 3.063772184899228e-05, "loss": 0.3197, "step": 4975 }, { "epoch": 1.353546291283162, "grad_norm": 3.4792237281799316, "learning_rate": 3.0512383435275242e-05, "loss": 0.2062, "step": 5000 }, { "epoch": 1.3603140227395776, "grad_norm": 9.568795204162598, "learning_rate": 3.0387045021558207e-05, "loss": 0.3434, "step": 5025 }, { "epoch": 1.3670817541959934, "grad_norm": 10.6992769241333, "learning_rate": 3.0261706607841172e-05, "loss": 0.2204, "step": 5050 }, { "epoch": 1.3738494856524093, "grad_norm": 0.5761290788650513, "learning_rate": 3.0136368194124137e-05, "loss": 0.2141, "step": 5075 }, { "epoch": 1.3806172171088251, "grad_norm": 13.90715217590332, "learning_rate": 3.00110297804071e-05, "loss": 0.1668, "step": 5100 }, { "epoch": 1.387384948565241, "grad_norm": 11.602949142456055, "learning_rate": 2.9885691366690066e-05, "loss": 0.1902, "step": 5125 }, { "epoch": 1.3941526800216568, "grad_norm": 0.09335369616746902, "learning_rate": 2.976035295297303e-05, "loss": 0.1735, "step": 5150 }, { "epoch": 1.4009204114780727, "grad_norm": 1.5695838928222656, "learning_rate": 2.9635014539255996e-05, "loss": 0.2458, "step": 5175 }, { "epoch": 1.4076881429344883, "grad_norm": 1.8779666423797607, "learning_rate": 2.9509676125538954e-05, "loss": 0.2006, "step": 5200 }, { "epoch": 1.4144558743909041, "grad_norm": 10.377031326293945, "learning_rate": 2.938433771182192e-05, "loss": 0.2025, "step": 5225 }, { "epoch": 1.42122360584732, "grad_norm": 10.321118354797363, "learning_rate": 2.9258999298104883e-05, "loss": 0.2157, "step": 5250 }, { "epoch": 1.4279913373037358, "grad_norm": 0.4291195273399353, "learning_rate": 2.9133660884387848e-05, "loss": 0.1963, "step": 5275 }, { "epoch": 1.4347590687601515, "grad_norm": 0.28830039501190186, "learning_rate": 2.9008322470670813e-05, "loss": 0.1947, "step": 5300 }, { "epoch": 1.4415268002165673, "grad_norm": 0.1749316304922104, "learning_rate": 2.8882984056953778e-05, "loss": 0.2793, "step": 5325 }, { "epoch": 1.4482945316729832, "grad_norm": 9.74176025390625, "learning_rate": 2.8757645643236743e-05, "loss": 0.2421, "step": 5350 }, { "epoch": 1.455062263129399, "grad_norm": 0.9622665047645569, "learning_rate": 2.8632307229519707e-05, "loss": 0.2765, "step": 5375 }, { "epoch": 1.4618299945858149, "grad_norm": 0.7690452933311462, "learning_rate": 2.850696881580267e-05, "loss": 0.2429, "step": 5400 }, { "epoch": 1.4685977260422307, "grad_norm": 1.5192012786865234, "learning_rate": 2.8381630402085634e-05, "loss": 0.1464, "step": 5425 }, { "epoch": 1.4753654574986466, "grad_norm": 0.5577375888824463, "learning_rate": 2.8256291988368595e-05, "loss": 0.1942, "step": 5450 }, { "epoch": 1.4821331889550622, "grad_norm": 1.2777996063232422, "learning_rate": 2.813095357465156e-05, "loss": 0.1895, "step": 5475 }, { "epoch": 1.488900920411478, "grad_norm": 8.725980758666992, "learning_rate": 2.8005615160934525e-05, "loss": 0.303, "step": 5500 }, { "epoch": 1.4956686518678939, "grad_norm": 10.138091087341309, "learning_rate": 2.7880276747217486e-05, "loss": 0.2515, "step": 5525 }, { "epoch": 1.5024363833243097, "grad_norm": 2.442488431930542, "learning_rate": 2.775493833350045e-05, "loss": 0.2725, "step": 5550 }, { "epoch": 1.5092041147807254, "grad_norm": 2.7091565132141113, "learning_rate": 2.7629599919783416e-05, "loss": 0.2676, "step": 5575 }, { "epoch": 1.5159718462371412, "grad_norm": 6.794680118560791, "learning_rate": 2.750426150606638e-05, "loss": 0.158, "step": 5600 }, { "epoch": 1.522739577693557, "grad_norm": 1.2340929508209229, "learning_rate": 2.7378923092349345e-05, "loss": 0.2144, "step": 5625 }, { "epoch": 1.529507309149973, "grad_norm": 0.2725580036640167, "learning_rate": 2.725358467863231e-05, "loss": 0.185, "step": 5650 }, { "epoch": 1.5362750406063888, "grad_norm": 3.0790915489196777, "learning_rate": 2.7128246264915275e-05, "loss": 0.128, "step": 5675 }, { "epoch": 1.5430427720628046, "grad_norm": 1.8269541263580322, "learning_rate": 2.700290785119824e-05, "loss": 0.1831, "step": 5700 }, { "epoch": 1.5498105035192205, "grad_norm": 0.6843694448471069, "learning_rate": 2.6877569437481198e-05, "loss": 0.2506, "step": 5725 }, { "epoch": 1.5565782349756363, "grad_norm": 2.2378416061401367, "learning_rate": 2.6752231023764162e-05, "loss": 0.1644, "step": 5750 }, { "epoch": 1.563345966432052, "grad_norm": 11.299232482910156, "learning_rate": 2.6626892610047127e-05, "loss": 0.3624, "step": 5775 }, { "epoch": 1.5701136978884678, "grad_norm": 0.1067349761724472, "learning_rate": 2.6501554196330092e-05, "loss": 0.2144, "step": 5800 }, { "epoch": 1.5768814293448836, "grad_norm": 2.722107172012329, "learning_rate": 2.6376215782613057e-05, "loss": 0.2381, "step": 5825 }, { "epoch": 1.5836491608012992, "grad_norm": 11.49809741973877, "learning_rate": 2.625087736889602e-05, "loss": 0.175, "step": 5850 }, { "epoch": 1.590416892257715, "grad_norm": 16.60283088684082, "learning_rate": 2.6125538955178986e-05, "loss": 0.2896, "step": 5875 }, { "epoch": 1.597184623714131, "grad_norm": 0.3614028990268707, "learning_rate": 2.600020054146195e-05, "loss": 0.2182, "step": 5900 }, { "epoch": 1.6039523551705468, "grad_norm": 1.335888385772705, "learning_rate": 2.5874862127744913e-05, "loss": 0.1903, "step": 5925 }, { "epoch": 1.6107200866269626, "grad_norm": 7.841146945953369, "learning_rate": 2.5749523714027878e-05, "loss": 0.2156, "step": 5950 }, { "epoch": 1.6174878180833785, "grad_norm": 0.4461989402770996, "learning_rate": 2.562418530031084e-05, "loss": 0.1799, "step": 5975 }, { "epoch": 1.6242555495397943, "grad_norm": 6.844948768615723, "learning_rate": 2.5498846886593804e-05, "loss": 0.1853, "step": 6000 }, { "epoch": 1.6310232809962102, "grad_norm": 13.240145683288574, "learning_rate": 2.537350847287677e-05, "loss": 0.2664, "step": 6025 }, { "epoch": 1.637791012452626, "grad_norm": 10.991958618164062, "learning_rate": 2.524817005915973e-05, "loss": 0.2736, "step": 6050 }, { "epoch": 1.6445587439090417, "grad_norm": 18.210996627807617, "learning_rate": 2.5122831645442695e-05, "loss": 0.2818, "step": 6075 }, { "epoch": 1.6513264753654575, "grad_norm": 7.5500006675720215, "learning_rate": 2.499749323172566e-05, "loss": 0.125, "step": 6100 }, { "epoch": 1.6580942068218734, "grad_norm": 0.2722916305065155, "learning_rate": 2.4872154818008624e-05, "loss": 0.2471, "step": 6125 }, { "epoch": 1.664861938278289, "grad_norm": 0.1690392643213272, "learning_rate": 2.474681640429159e-05, "loss": 0.2063, "step": 6150 }, { "epoch": 1.6716296697347048, "grad_norm": 0.8183917999267578, "learning_rate": 2.462147799057455e-05, "loss": 0.2288, "step": 6175 }, { "epoch": 1.6783974011911207, "grad_norm": 12.807232856750488, "learning_rate": 2.4496139576857515e-05, "loss": 0.1793, "step": 6200 }, { "epoch": 1.6851651326475365, "grad_norm": 2.0582687854766846, "learning_rate": 2.437080116314048e-05, "loss": 0.1697, "step": 6225 }, { "epoch": 1.6919328641039524, "grad_norm": 0.955332338809967, "learning_rate": 2.4245462749423445e-05, "loss": 0.1161, "step": 6250 }, { "epoch": 1.6987005955603682, "grad_norm": 0.23503464460372925, "learning_rate": 2.412513787225509e-05, "loss": 0.1578, "step": 6275 }, { "epoch": 1.705468327016784, "grad_norm": 0.7222716808319092, "learning_rate": 2.399979945853805e-05, "loss": 0.2072, "step": 6300 }, { "epoch": 1.7122360584732, "grad_norm": 15.863499641418457, "learning_rate": 2.3874461044821016e-05, "loss": 0.2366, "step": 6325 }, { "epoch": 1.7190037899296156, "grad_norm": 9.790959358215332, "learning_rate": 2.374912263110398e-05, "loss": 0.2673, "step": 6350 }, { "epoch": 1.7257715213860314, "grad_norm": 8.514019012451172, "learning_rate": 2.3623784217386946e-05, "loss": 0.2244, "step": 6375 }, { "epoch": 1.7325392528424473, "grad_norm": 14.102144241333008, "learning_rate": 2.349844580366991e-05, "loss": 0.1813, "step": 6400 }, { "epoch": 1.7393069842988629, "grad_norm": 9.164491653442383, "learning_rate": 2.3373107389952875e-05, "loss": 0.2476, "step": 6425 }, { "epoch": 1.7460747157552787, "grad_norm": 0.8914769887924194, "learning_rate": 2.3247768976235837e-05, "loss": 0.2295, "step": 6450 }, { "epoch": 1.7528424472116946, "grad_norm": 12.076004981994629, "learning_rate": 2.31224305625188e-05, "loss": 0.1614, "step": 6475 }, { "epoch": 1.7596101786681104, "grad_norm": 1.1903892755508423, "learning_rate": 2.2997092148801766e-05, "loss": 0.1336, "step": 6500 }, { "epoch": 1.7663779101245263, "grad_norm": 0.8547431826591492, "learning_rate": 2.287175373508473e-05, "loss": 0.2748, "step": 6525 }, { "epoch": 1.7731456415809421, "grad_norm": 10.832341194152832, "learning_rate": 2.2746415321367696e-05, "loss": 0.242, "step": 6550 }, { "epoch": 1.779913373037358, "grad_norm": 0.6953740119934082, "learning_rate": 2.2621076907650657e-05, "loss": 0.1004, "step": 6575 }, { "epoch": 1.7866811044937738, "grad_norm": 0.1302420198917389, "learning_rate": 2.2495738493933622e-05, "loss": 0.4184, "step": 6600 }, { "epoch": 1.7934488359501894, "grad_norm": 7.436769962310791, "learning_rate": 2.2370400080216587e-05, "loss": 0.1858, "step": 6625 }, { "epoch": 1.8002165674066053, "grad_norm": 20.91210174560547, "learning_rate": 2.224506166649955e-05, "loss": 0.284, "step": 6650 }, { "epoch": 1.8069842988630211, "grad_norm": 0.46657705307006836, "learning_rate": 2.2119723252782516e-05, "loss": 0.1961, "step": 6675 }, { "epoch": 1.8137520303194368, "grad_norm": 6.9242353439331055, "learning_rate": 2.1994384839065478e-05, "loss": 0.2108, "step": 6700 }, { "epoch": 1.8205197617758526, "grad_norm": 13.766924858093262, "learning_rate": 2.1869046425348443e-05, "loss": 0.2072, "step": 6725 }, { "epoch": 1.8272874932322685, "grad_norm": 2.7908565998077393, "learning_rate": 2.1743708011631404e-05, "loss": 0.0987, "step": 6750 }, { "epoch": 1.8340552246886843, "grad_norm": 12.718364715576172, "learning_rate": 2.161836959791437e-05, "loss": 0.1816, "step": 6775 }, { "epoch": 1.8408229561451002, "grad_norm": 12.46013069152832, "learning_rate": 2.1493031184197334e-05, "loss": 0.3094, "step": 6800 }, { "epoch": 1.847590687601516, "grad_norm": 0.13040785491466522, "learning_rate": 2.1367692770480295e-05, "loss": 0.1224, "step": 6825 }, { "epoch": 1.8543584190579319, "grad_norm": 1.2305707931518555, "learning_rate": 2.124235435676326e-05, "loss": 0.083, "step": 6850 }, { "epoch": 1.8611261505143477, "grad_norm": 0.13893193006515503, "learning_rate": 2.1117015943046225e-05, "loss": 0.3004, "step": 6875 }, { "epoch": 1.8678938819707636, "grad_norm": 11.187564849853516, "learning_rate": 2.099167752932919e-05, "loss": 0.2636, "step": 6900 }, { "epoch": 1.8746616134271792, "grad_norm": 8.335643768310547, "learning_rate": 2.0866339115612154e-05, "loss": 0.1974, "step": 6925 }, { "epoch": 1.881429344883595, "grad_norm": 4.112905502319336, "learning_rate": 2.074100070189512e-05, "loss": 0.1114, "step": 6950 }, { "epoch": 1.8881970763400109, "grad_norm": 0.5131503939628601, "learning_rate": 2.061566228817808e-05, "loss": 0.2218, "step": 6975 }, { "epoch": 1.8949648077964265, "grad_norm": 0.07644043117761612, "learning_rate": 2.0490323874461045e-05, "loss": 0.2306, "step": 7000 }, { "epoch": 1.9017325392528424, "grad_norm": 2.5690276622772217, "learning_rate": 2.036498546074401e-05, "loss": 0.1069, "step": 7025 }, { "epoch": 1.9085002707092582, "grad_norm": 5.832399368286133, "learning_rate": 2.0239647047026975e-05, "loss": 0.1663, "step": 7050 }, { "epoch": 1.915268002165674, "grad_norm": 8.066961288452148, "learning_rate": 2.011430863330994e-05, "loss": 0.2029, "step": 7075 }, { "epoch": 1.92203573362209, "grad_norm": 0.3773351013660431, "learning_rate": 1.99889702195929e-05, "loss": 0.0708, "step": 7100 }, { "epoch": 1.9288034650785058, "grad_norm": 12.352056503295898, "learning_rate": 1.9863631805875866e-05, "loss": 0.2559, "step": 7125 }, { "epoch": 1.9355711965349216, "grad_norm": 0.8700584173202515, "learning_rate": 1.973829339215883e-05, "loss": 0.2601, "step": 7150 }, { "epoch": 1.9423389279913374, "grad_norm": 14.849401473999023, "learning_rate": 1.9612954978441795e-05, "loss": 0.2871, "step": 7175 }, { "epoch": 1.949106659447753, "grad_norm": 1.086490511894226, "learning_rate": 1.948761656472476e-05, "loss": 0.3069, "step": 7200 }, { "epoch": 1.955874390904169, "grad_norm": 0.1218922808766365, "learning_rate": 1.9362278151007722e-05, "loss": 0.2398, "step": 7225 }, { "epoch": 1.9626421223605848, "grad_norm": 0.7988734841346741, "learning_rate": 1.9236939737290687e-05, "loss": 0.3118, "step": 7250 }, { "epoch": 1.9694098538170004, "grad_norm": 0.1584845781326294, "learning_rate": 1.911160132357365e-05, "loss": 0.1434, "step": 7275 }, { "epoch": 1.9761775852734162, "grad_norm": 0.6999651193618774, "learning_rate": 1.8986262909856613e-05, "loss": 0.1758, "step": 7300 }, { "epoch": 1.982945316729832, "grad_norm": 0.11756038665771484, "learning_rate": 1.8860924496139578e-05, "loss": 0.1671, "step": 7325 }, { "epoch": 1.989713048186248, "grad_norm": 1.217764139175415, "learning_rate": 1.873558608242254e-05, "loss": 0.1335, "step": 7350 }, { "epoch": 1.9964807796426638, "grad_norm": 8.427165985107422, "learning_rate": 1.8610247668705504e-05, "loss": 0.2056, "step": 7375 }, { "epoch": 2.0, "eval_accuracy": 0.935412134396906, "eval_f1_macro": 0.9274157947563729, "eval_f1_micro": 0.935412134396906, "eval_f1_weighted": 0.9302654119193674, "eval_loss": 0.21082843840122223, "eval_precision_macro": 0.9405383300469721, "eval_precision_micro": 0.935412134396906, "eval_precision_weighted": 0.9403341909054184, "eval_recall_macro": 0.9303843095679831, "eval_recall_micro": 0.935412134396906, "eval_recall_weighted": 0.935412134396906, "eval_runtime": 21.7667, "eval_samples_per_second": 950.305, "eval_steps_per_second": 59.403, "step": 7388 }, { "epoch": 2.0032485110990796, "grad_norm": 0.07127093523740768, "learning_rate": 1.848490925498847e-05, "loss": 0.1575, "step": 7400 }, { "epoch": 2.0100162425554955, "grad_norm": 15.853127479553223, "learning_rate": 1.8359570841271433e-05, "loss": 0.1454, "step": 7425 }, { "epoch": 2.0167839740119113, "grad_norm": 0.1277124136686325, "learning_rate": 1.8234232427554398e-05, "loss": 0.1873, "step": 7450 }, { "epoch": 2.023551705468327, "grad_norm": 11.486383438110352, "learning_rate": 1.8108894013837363e-05, "loss": 0.2182, "step": 7475 }, { "epoch": 2.030319436924743, "grad_norm": 6.678303241729736, "learning_rate": 1.7983555600120324e-05, "loss": 0.124, "step": 7500 }, { "epoch": 2.0370871683811584, "grad_norm": 0.4219975173473358, "learning_rate": 1.785821718640329e-05, "loss": 0.2746, "step": 7525 }, { "epoch": 2.0438548998375743, "grad_norm": 8.490753173828125, "learning_rate": 1.7732878772686254e-05, "loss": 0.1649, "step": 7550 }, { "epoch": 2.05062263129399, "grad_norm": 0.30812859535217285, "learning_rate": 1.760754035896922e-05, "loss": 0.1879, "step": 7575 }, { "epoch": 2.057390362750406, "grad_norm": 8.723641395568848, "learning_rate": 1.7482201945252184e-05, "loss": 0.1553, "step": 7600 }, { "epoch": 2.064158094206822, "grad_norm": 0.0513090118765831, "learning_rate": 1.7356863531535145e-05, "loss": 0.091, "step": 7625 }, { "epoch": 2.0709258256632377, "grad_norm": 0.18665984272956848, "learning_rate": 1.723152511781811e-05, "loss": 0.2645, "step": 7650 }, { "epoch": 2.0776935571196535, "grad_norm": 7.360457420349121, "learning_rate": 1.7106186704101075e-05, "loss": 0.2385, "step": 7675 }, { "epoch": 2.0844612885760694, "grad_norm": 0.102376289665699, "learning_rate": 1.698084829038404e-05, "loss": 0.1201, "step": 7700 }, { "epoch": 2.0912290200324852, "grad_norm": 17.834001541137695, "learning_rate": 1.6855509876667004e-05, "loss": 0.1926, "step": 7725 }, { "epoch": 2.097996751488901, "grad_norm": 0.3818954825401306, "learning_rate": 1.6730171462949966e-05, "loss": 0.1884, "step": 7750 }, { "epoch": 2.104764482945317, "grad_norm": 7.972067356109619, "learning_rate": 1.660483304923293e-05, "loss": 0.1858, "step": 7775 }, { "epoch": 2.1115322144017323, "grad_norm": 9.148263931274414, "learning_rate": 1.6479494635515895e-05, "loss": 0.1961, "step": 7800 }, { "epoch": 2.118299945858148, "grad_norm": 10.137642860412598, "learning_rate": 1.6354156221798857e-05, "loss": 0.1774, "step": 7825 }, { "epoch": 2.125067677314564, "grad_norm": 0.3626168370246887, "learning_rate": 1.622881780808182e-05, "loss": 0.1367, "step": 7850 }, { "epoch": 2.13183540877098, "grad_norm": 0.12807676196098328, "learning_rate": 1.6103479394364783e-05, "loss": 0.1785, "step": 7875 }, { "epoch": 2.1386031402273957, "grad_norm": 1.2243175506591797, "learning_rate": 1.5978140980647748e-05, "loss": 0.1499, "step": 7900 }, { "epoch": 2.1453708716838116, "grad_norm": 11.758691787719727, "learning_rate": 1.5852802566930712e-05, "loss": 0.1778, "step": 7925 }, { "epoch": 2.1521386031402274, "grad_norm": 0.7880843281745911, "learning_rate": 1.5727464153213677e-05, "loss": 0.2024, "step": 7950 }, { "epoch": 2.1589063345966433, "grad_norm": 0.23943960666656494, "learning_rate": 1.5602125739496642e-05, "loss": 0.1409, "step": 7975 }, { "epoch": 2.165674066053059, "grad_norm": 11.204683303833008, "learning_rate": 1.5476787325779607e-05, "loss": 0.2603, "step": 8000 }, { "epoch": 2.172441797509475, "grad_norm": 8.875106811523438, "learning_rate": 1.5351448912062568e-05, "loss": 0.1979, "step": 8025 }, { "epoch": 2.179209528965891, "grad_norm": 10.337849617004395, "learning_rate": 1.5226110498345533e-05, "loss": 0.0886, "step": 8050 }, { "epoch": 2.1859772604223062, "grad_norm": 0.0444558709859848, "learning_rate": 1.5100772084628498e-05, "loss": 0.1459, "step": 8075 }, { "epoch": 2.192744991878722, "grad_norm": 0.4095276892185211, "learning_rate": 1.4975433670911463e-05, "loss": 0.2745, "step": 8100 }, { "epoch": 2.199512723335138, "grad_norm": 0.09346342086791992, "learning_rate": 1.4850095257194427e-05, "loss": 0.213, "step": 8125 }, { "epoch": 2.2062804547915538, "grad_norm": 11.369955062866211, "learning_rate": 1.4724756843477389e-05, "loss": 0.2651, "step": 8150 }, { "epoch": 2.2130481862479696, "grad_norm": 0.17222055792808533, "learning_rate": 1.4599418429760354e-05, "loss": 0.1258, "step": 8175 }, { "epoch": 2.2198159177043855, "grad_norm": 0.5808836221694946, "learning_rate": 1.4474080016043317e-05, "loss": 0.2261, "step": 8200 }, { "epoch": 2.2265836491608013, "grad_norm": 0.37860339879989624, "learning_rate": 1.4348741602326282e-05, "loss": 0.2356, "step": 8225 }, { "epoch": 2.233351380617217, "grad_norm": 7.043012619018555, "learning_rate": 1.4223403188609246e-05, "loss": 0.1983, "step": 8250 }, { "epoch": 2.240119112073633, "grad_norm": 0.04890386760234833, "learning_rate": 1.4098064774892208e-05, "loss": 0.1501, "step": 8275 }, { "epoch": 2.246886843530049, "grad_norm": 1.6778485774993896, "learning_rate": 1.3972726361175173e-05, "loss": 0.2035, "step": 8300 }, { "epoch": 2.2536545749864647, "grad_norm": 0.09249867498874664, "learning_rate": 1.3847387947458137e-05, "loss": 0.1831, "step": 8325 }, { "epoch": 2.26042230644288, "grad_norm": 10.879770278930664, "learning_rate": 1.3722049533741102e-05, "loss": 0.1008, "step": 8350 }, { "epoch": 2.267190037899296, "grad_norm": 0.2881366014480591, "learning_rate": 1.3596711120024067e-05, "loss": 0.0649, "step": 8375 }, { "epoch": 2.273957769355712, "grad_norm": 0.30121418833732605, "learning_rate": 1.3471372706307028e-05, "loss": 0.141, "step": 8400 }, { "epoch": 2.2807255008121277, "grad_norm": 0.31559237837791443, "learning_rate": 1.3346034292589993e-05, "loss": 0.1108, "step": 8425 }, { "epoch": 2.2874932322685435, "grad_norm": 13.67086124420166, "learning_rate": 1.3220695878872958e-05, "loss": 0.1543, "step": 8450 }, { "epoch": 2.2942609637249594, "grad_norm": 4.521094799041748, "learning_rate": 1.3095357465155921e-05, "loss": 0.1735, "step": 8475 }, { "epoch": 2.301028695181375, "grad_norm": 1.390699028968811, "learning_rate": 1.2970019051438886e-05, "loss": 0.1303, "step": 8500 }, { "epoch": 2.307796426637791, "grad_norm": 17.726560592651367, "learning_rate": 1.284468063772185e-05, "loss": 0.2959, "step": 8525 }, { "epoch": 2.314564158094207, "grad_norm": 12.668703079223633, "learning_rate": 1.2719342224004812e-05, "loss": 0.1625, "step": 8550 }, { "epoch": 2.3213318895506228, "grad_norm": 2.370819091796875, "learning_rate": 1.2594003810287777e-05, "loss": 0.1213, "step": 8575 }, { "epoch": 2.3280996210070386, "grad_norm": 6.036921977996826, "learning_rate": 1.2468665396570742e-05, "loss": 0.1634, "step": 8600 }, { "epoch": 2.334867352463454, "grad_norm": 0.8694545030593872, "learning_rate": 1.2343326982853707e-05, "loss": 0.1495, "step": 8625 }, { "epoch": 2.3416350839198703, "grad_norm": 2.2144663333892822, "learning_rate": 1.221798856913667e-05, "loss": 0.1621, "step": 8650 }, { "epoch": 2.3484028153762857, "grad_norm": 10.507080078125, "learning_rate": 1.2092650155419635e-05, "loss": 0.174, "step": 8675 }, { "epoch": 2.3551705468327016, "grad_norm": 0.9843292236328125, "learning_rate": 1.1967311741702598e-05, "loss": 0.1339, "step": 8700 }, { "epoch": 2.3619382782891174, "grad_norm": 0.3039487898349762, "learning_rate": 1.1841973327985562e-05, "loss": 0.1176, "step": 8725 }, { "epoch": 2.3687060097455332, "grad_norm": 0.5395913124084473, "learning_rate": 1.1716634914268526e-05, "loss": 0.2338, "step": 8750 }, { "epoch": 2.375473741201949, "grad_norm": 0.16517315804958344, "learning_rate": 1.1591296500551489e-05, "loss": 0.2132, "step": 8775 }, { "epoch": 2.382241472658365, "grad_norm": 10.029488563537598, "learning_rate": 1.1465958086834453e-05, "loss": 0.188, "step": 8800 }, { "epoch": 2.389009204114781, "grad_norm": 3.222883462905884, "learning_rate": 1.1340619673117417e-05, "loss": 0.2098, "step": 8825 }, { "epoch": 2.3957769355711966, "grad_norm": 0.06654487550258636, "learning_rate": 1.1215281259400381e-05, "loss": 0.1275, "step": 8850 }, { "epoch": 2.4025446670276125, "grad_norm": 2.666473388671875, "learning_rate": 1.1089942845683346e-05, "loss": 0.2144, "step": 8875 }, { "epoch": 2.4093123984840283, "grad_norm": 10.859978675842285, "learning_rate": 1.096460443196631e-05, "loss": 0.1975, "step": 8900 }, { "epoch": 2.416080129940444, "grad_norm": 1.037359356880188, "learning_rate": 1.0839266018249274e-05, "loss": 0.0893, "step": 8925 }, { "epoch": 2.4228478613968596, "grad_norm": 17.77867889404297, "learning_rate": 1.0713927604532239e-05, "loss": 0.178, "step": 8950 }, { "epoch": 2.4296155928532754, "grad_norm": 8.258838653564453, "learning_rate": 1.0588589190815202e-05, "loss": 0.1508, "step": 8975 }, { "epoch": 2.4363833243096913, "grad_norm": 8.984355926513672, "learning_rate": 1.0463250777098165e-05, "loss": 0.1494, "step": 9000 }, { "epoch": 2.443151055766107, "grad_norm": 1.9472849369049072, "learning_rate": 1.0337912363381128e-05, "loss": 0.1767, "step": 9025 }, { "epoch": 2.449918787222523, "grad_norm": 0.1337762475013733, "learning_rate": 1.0212573949664093e-05, "loss": 0.1451, "step": 9050 }, { "epoch": 2.456686518678939, "grad_norm": 0.7223402857780457, "learning_rate": 1.0087235535947058e-05, "loss": 0.1788, "step": 9075 }, { "epoch": 2.4634542501353547, "grad_norm": 12.872135162353516, "learning_rate": 9.961897122230021e-06, "loss": 0.1807, "step": 9100 }, { "epoch": 2.4702219815917705, "grad_norm": 14.778857231140137, "learning_rate": 9.836558708512986e-06, "loss": 0.1621, "step": 9125 }, { "epoch": 2.4769897130481864, "grad_norm": 0.44684821367263794, "learning_rate": 9.71122029479595e-06, "loss": 0.1788, "step": 9150 }, { "epoch": 2.4837574445046022, "grad_norm": 15.21567440032959, "learning_rate": 9.585881881078914e-06, "loss": 0.1615, "step": 9175 }, { "epoch": 2.490525175961018, "grad_norm": 0.5438389182090759, "learning_rate": 9.460543467361878e-06, "loss": 0.0694, "step": 9200 }, { "epoch": 2.4972929074174335, "grad_norm": 0.26495838165283203, "learning_rate": 9.335205053644842e-06, "loss": 0.2251, "step": 9225 }, { "epoch": 2.5040606388738493, "grad_norm": 3.848076343536377, "learning_rate": 9.209866639927806e-06, "loss": 0.118, "step": 9250 }, { "epoch": 2.510828370330265, "grad_norm": 0.0551062636077404, "learning_rate": 9.08452822621077e-06, "loss": 0.066, "step": 9275 }, { "epoch": 2.517596101786681, "grad_norm": 8.600728034973145, "learning_rate": 8.959189812493733e-06, "loss": 0.1336, "step": 9300 }, { "epoch": 2.524363833243097, "grad_norm": 6.382137298583984, "learning_rate": 8.833851398776697e-06, "loss": 0.2049, "step": 9325 }, { "epoch": 2.5311315646995127, "grad_norm": 13.446625709533691, "learning_rate": 8.70851298505966e-06, "loss": 0.1743, "step": 9350 }, { "epoch": 2.5378992961559286, "grad_norm": 6.327456951141357, "learning_rate": 8.583174571342625e-06, "loss": 0.2677, "step": 9375 }, { "epoch": 2.5446670276123444, "grad_norm": 0.14797528088092804, "learning_rate": 8.45783615762559e-06, "loss": 0.1348, "step": 9400 }, { "epoch": 2.5514347590687603, "grad_norm": 0.03272142633795738, "learning_rate": 8.332497743908553e-06, "loss": 0.1655, "step": 9425 }, { "epoch": 2.558202490525176, "grad_norm": 0.09539608657360077, "learning_rate": 8.207159330191518e-06, "loss": 0.2066, "step": 9450 }, { "epoch": 2.564970221981592, "grad_norm": 2.991002321243286, "learning_rate": 8.081820916474483e-06, "loss": 0.1664, "step": 9475 }, { "epoch": 2.5717379534380074, "grad_norm": 0.05500922352075577, "learning_rate": 7.956482502757446e-06, "loss": 0.1241, "step": 9500 }, { "epoch": 2.5785056848944237, "grad_norm": 11.698848724365234, "learning_rate": 7.831144089040409e-06, "loss": 0.2105, "step": 9525 }, { "epoch": 2.585273416350839, "grad_norm": 0.4144781231880188, "learning_rate": 7.705805675323372e-06, "loss": 0.229, "step": 9550 }, { "epoch": 2.592041147807255, "grad_norm": 15.266688346862793, "learning_rate": 7.580467261606338e-06, "loss": 0.1854, "step": 9575 }, { "epoch": 2.5988088792636708, "grad_norm": 0.1844756007194519, "learning_rate": 7.455128847889302e-06, "loss": 0.082, "step": 9600 }, { "epoch": 2.6055766107200866, "grad_norm": 7.458913326263428, "learning_rate": 7.329790434172265e-06, "loss": 0.1278, "step": 9625 }, { "epoch": 2.6123443421765025, "grad_norm": 0.7331855893135071, "learning_rate": 7.20445202045523e-06, "loss": 0.1028, "step": 9650 }, { "epoch": 2.6191120736329183, "grad_norm": 0.3585509657859802, "learning_rate": 7.0791136067381944e-06, "loss": 0.1163, "step": 9675 }, { "epoch": 2.625879805089334, "grad_norm": 0.40765902400016785, "learning_rate": 6.9537751930211575e-06, "loss": 0.1065, "step": 9700 }, { "epoch": 2.63264753654575, "grad_norm": 7.481261730194092, "learning_rate": 6.8284367793041215e-06, "loss": 0.1028, "step": 9725 }, { "epoch": 2.639415268002166, "grad_norm": 1.0196110010147095, "learning_rate": 6.703098365587085e-06, "loss": 0.103, "step": 9750 }, { "epoch": 2.6461829994585813, "grad_norm": 0.306159645318985, "learning_rate": 6.577759951870049e-06, "loss": 0.1617, "step": 9775 }, { "epoch": 2.6529507309149976, "grad_norm": 11.561976432800293, "learning_rate": 6.452421538153014e-06, "loss": 0.1027, "step": 9800 }, { "epoch": 2.659718462371413, "grad_norm": 0.021391283720731735, "learning_rate": 6.327083124435977e-06, "loss": 0.126, "step": 9825 }, { "epoch": 2.666486193827829, "grad_norm": 0.036384038627147675, "learning_rate": 6.201744710718941e-06, "loss": 0.2274, "step": 9850 }, { "epoch": 2.6732539252842447, "grad_norm": 0.39547139406204224, "learning_rate": 6.076406297001905e-06, "loss": 0.2437, "step": 9875 }, { "epoch": 2.6800216567406605, "grad_norm": 1.0845611095428467, "learning_rate": 5.951067883284869e-06, "loss": 0.1372, "step": 9900 }, { "epoch": 2.6867893881970764, "grad_norm": 0.6141884326934814, "learning_rate": 5.825729469567833e-06, "loss": 0.0986, "step": 9925 }, { "epoch": 2.693557119653492, "grad_norm": 6.706904888153076, "learning_rate": 5.700391055850798e-06, "loss": 0.1353, "step": 9950 }, { "epoch": 2.700324851109908, "grad_norm": 0.1707427203655243, "learning_rate": 5.575052642133762e-06, "loss": 0.1917, "step": 9975 }, { "epoch": 2.707092582566324, "grad_norm": 0.16985374689102173, "learning_rate": 5.449714228416725e-06, "loss": 0.0994, "step": 10000 }, { "epoch": 2.7138603140227398, "grad_norm": 10.607304573059082, "learning_rate": 5.324375814699689e-06, "loss": 0.1015, "step": 10025 }, { "epoch": 2.720628045479155, "grad_norm": 0.6444892287254333, "learning_rate": 5.199037400982654e-06, "loss": 0.0935, "step": 10050 }, { "epoch": 2.7273957769355714, "grad_norm": 0.8442856669425964, "learning_rate": 5.073698987265618e-06, "loss": 0.2081, "step": 10075 }, { "epoch": 2.734163508391987, "grad_norm": 0.2734193205833435, "learning_rate": 4.948360573548582e-06, "loss": 0.169, "step": 10100 }, { "epoch": 2.7409312398484027, "grad_norm": 0.19697026908397675, "learning_rate": 4.823022159831545e-06, "loss": 0.1624, "step": 10125 }, { "epoch": 2.7476989713048185, "grad_norm": 12.665722846984863, "learning_rate": 4.69768374611451e-06, "loss": 0.1644, "step": 10150 }, { "epoch": 2.7544667027612344, "grad_norm": 10.231285095214844, "learning_rate": 4.5723453323974735e-06, "loss": 0.1422, "step": 10175 }, { "epoch": 2.7612344342176502, "grad_norm": 10.933349609375, "learning_rate": 4.4470069186804375e-06, "loss": 0.2038, "step": 10200 }, { "epoch": 2.768002165674066, "grad_norm": 10.937248229980469, "learning_rate": 4.3216685049634015e-06, "loss": 0.1339, "step": 10225 }, { "epoch": 2.774769897130482, "grad_norm": 0.07432160526514053, "learning_rate": 4.196330091246365e-06, "loss": 0.2031, "step": 10250 }, { "epoch": 2.781537628586898, "grad_norm": 10.13500690460205, "learning_rate": 4.070991677529329e-06, "loss": 0.1778, "step": 10275 }, { "epoch": 2.7883053600433136, "grad_norm": 0.058682914823293686, "learning_rate": 3.945653263812293e-06, "loss": 0.1036, "step": 10300 }, { "epoch": 2.795073091499729, "grad_norm": 11.469184875488281, "learning_rate": 3.820314850095257e-06, "loss": 0.2036, "step": 10325 }, { "epoch": 2.8018408229561453, "grad_norm": 0.17000257968902588, "learning_rate": 3.6949764363782212e-06, "loss": 0.1875, "step": 10350 }, { "epoch": 2.8086085544125607, "grad_norm": 0.3760491907596588, "learning_rate": 3.5696380226611856e-06, "loss": 0.1494, "step": 10375 }, { "epoch": 2.8153762858689766, "grad_norm": 0.10404614359140396, "learning_rate": 3.4442996089441496e-06, "loss": 0.0973, "step": 10400 }, { "epoch": 2.8221440173253924, "grad_norm": 0.41150447726249695, "learning_rate": 3.3239747317757947e-06, "loss": 0.1634, "step": 10425 }, { "epoch": 2.8289117487818083, "grad_norm": 0.061491526663303375, "learning_rate": 3.1986363180587587e-06, "loss": 0.184, "step": 10450 }, { "epoch": 2.835679480238224, "grad_norm": 1.5745799541473389, "learning_rate": 3.0732979043417226e-06, "loss": 0.1135, "step": 10475 }, { "epoch": 2.84244721169464, "grad_norm": 15.36170482635498, "learning_rate": 2.947959490624687e-06, "loss": 0.0998, "step": 10500 }, { "epoch": 2.849214943151056, "grad_norm": 2.991931915283203, "learning_rate": 2.8226210769076505e-06, "loss": 0.1403, "step": 10525 }, { "epoch": 2.8559826746074717, "grad_norm": 1.1434751749038696, "learning_rate": 2.697282663190615e-06, "loss": 0.1634, "step": 10550 }, { "epoch": 2.8627504060638875, "grad_norm": 5.902674198150635, "learning_rate": 2.571944249473579e-06, "loss": 0.1235, "step": 10575 }, { "epoch": 2.869518137520303, "grad_norm": 0.050640497356653214, "learning_rate": 2.446605835756543e-06, "loss": 0.1516, "step": 10600 }, { "epoch": 2.8762858689767192, "grad_norm": 13.541975021362305, "learning_rate": 2.3212674220395068e-06, "loss": 0.2409, "step": 10625 }, { "epoch": 2.8830536004331346, "grad_norm": 24.88682746887207, "learning_rate": 2.1959290083224707e-06, "loss": 0.1724, "step": 10650 }, { "epoch": 2.8898213318895505, "grad_norm": 0.10497920215129852, "learning_rate": 2.0705905946054347e-06, "loss": 0.1563, "step": 10675 }, { "epoch": 2.8965890633459663, "grad_norm": 3.943291187286377, "learning_rate": 1.945252180888399e-06, "loss": 0.1966, "step": 10700 }, { "epoch": 2.903356794802382, "grad_norm": 0.061565861105918884, "learning_rate": 1.8199137671713628e-06, "loss": 0.1962, "step": 10725 }, { "epoch": 2.910124526258798, "grad_norm": 0.08835487067699432, "learning_rate": 1.694575353454327e-06, "loss": 0.282, "step": 10750 }, { "epoch": 2.916892257715214, "grad_norm": 2.9496688842773438, "learning_rate": 1.5692369397372907e-06, "loss": 0.1138, "step": 10775 }, { "epoch": 2.9236599891716297, "grad_norm": 1.1299965381622314, "learning_rate": 1.4438985260202547e-06, "loss": 0.1773, "step": 10800 }, { "epoch": 2.9304277206280456, "grad_norm": 11.417136192321777, "learning_rate": 1.3185601123032186e-06, "loss": 0.1358, "step": 10825 }, { "epoch": 2.9371954520844614, "grad_norm": 0.06532655656337738, "learning_rate": 1.1932216985861828e-06, "loss": 0.1728, "step": 10850 }, { "epoch": 2.943963183540877, "grad_norm": 5.278496265411377, "learning_rate": 1.0678832848691468e-06, "loss": 0.1416, "step": 10875 }, { "epoch": 2.950730914997293, "grad_norm": 1.0812338590621948, "learning_rate": 9.425448711521107e-07, "loss": 0.1485, "step": 10900 }, { "epoch": 2.9574986464537085, "grad_norm": 0.12115427106618881, "learning_rate": 8.172064574350748e-07, "loss": 0.1201, "step": 10925 }, { "epoch": 2.9642663779101244, "grad_norm": 0.1515658050775528, "learning_rate": 6.918680437180387e-07, "loss": 0.1225, "step": 10950 }, { "epoch": 2.97103410936654, "grad_norm": 0.056250348687171936, "learning_rate": 5.665296300010028e-07, "loss": 0.1442, "step": 10975 }, { "epoch": 2.977801840822956, "grad_norm": 0.10962472856044769, "learning_rate": 4.411912162839667e-07, "loss": 0.0699, "step": 11000 }, { "epoch": 2.984569572279372, "grad_norm": 0.3095192015171051, "learning_rate": 3.1585280256693076e-07, "loss": 0.171, "step": 11025 }, { "epoch": 2.9913373037357878, "grad_norm": 0.24057000875473022, "learning_rate": 1.9051438884989471e-07, "loss": 0.0967, "step": 11050 }, { "epoch": 2.9981050351922036, "grad_norm": 4.0788044929504395, "learning_rate": 6.517597513285872e-08, "loss": 0.1549, "step": 11075 }, { "epoch": 3.0, "eval_accuracy": 0.9390862944162437, "eval_f1_macro": 0.9327706813543745, "eval_f1_micro": 0.9390862944162437, "eval_f1_weighted": 0.9351383662112599, "eval_loss": 0.15377455949783325, "eval_precision_macro": 0.9422812840960479, "eval_precision_micro": 0.9390862944162437, "eval_precision_weighted": 0.9420489414415942, "eval_recall_macro": 0.9346991783726477, "eval_recall_micro": 0.9390862944162437, "eval_recall_weighted": 0.9390862944162437, "eval_runtime": 21.8517, "eval_samples_per_second": 946.61, "eval_steps_per_second": 59.172, "step": 11082 } ], "logging_steps": 25, "max_steps": 11082, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2963923884403200.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }