date2format / checkpoint-11082 /trainer_state.json
syarulzaffi's picture
Upload folder using huggingface_hub
c372e21 verified
Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity, "... is not valid JSON
{
"best_metric": 0.15377455949783325,
"best_model_checkpoint": "date2format/checkpoint-11082",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 11082,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00676773145641581,
"grad_norm": Infinity,
"learning_rate": 1.0820559062218215e-06,
"loss": 6.7854,
"step": 25
},
{
"epoch": 0.01353546291283162,
"grad_norm": 18.809656143188477,
"learning_rate": 2.2091974752028858e-06,
"loss": 6.8207,
"step": 50
},
{
"epoch": 0.020303194369247428,
"grad_norm": 18.936891555786133,
"learning_rate": 3.3363390441839496e-06,
"loss": 6.712,
"step": 75
},
{
"epoch": 0.02707092582566324,
"grad_norm": 16.995830535888672,
"learning_rate": 4.4634806131650134e-06,
"loss": 6.6652,
"step": 100
},
{
"epoch": 0.03383865728207905,
"grad_norm": 16.105716705322266,
"learning_rate": 5.590622182146077e-06,
"loss": 6.5538,
"step": 125
},
{
"epoch": 0.040606388738494856,
"grad_norm": 16.41363525390625,
"learning_rate": 6.717763751127142e-06,
"loss": 6.5388,
"step": 150
},
{
"epoch": 0.04737412019491066,
"grad_norm": 16.850101470947266,
"learning_rate": 7.844905320108207e-06,
"loss": 6.4123,
"step": 175
},
{
"epoch": 0.05414185165132648,
"grad_norm": 15.117751121520996,
"learning_rate": 8.972046889089269e-06,
"loss": 6.3959,
"step": 200
},
{
"epoch": 0.060909583107742284,
"grad_norm": 14.722877502441406,
"learning_rate": 1.0099188458070334e-05,
"loss": 6.2513,
"step": 225
},
{
"epoch": 0.0676773145641581,
"grad_norm": 13.270645141601562,
"learning_rate": 1.1226330027051398e-05,
"loss": 6.2321,
"step": 250
},
{
"epoch": 0.0744450460205739,
"grad_norm": 12.977484703063965,
"learning_rate": 1.2353471596032462e-05,
"loss": 6.129,
"step": 275
},
{
"epoch": 0.08121277747698971,
"grad_norm": 12.7619047164917,
"learning_rate": 1.3480613165013526e-05,
"loss": 5.9896,
"step": 300
},
{
"epoch": 0.08798050893340552,
"grad_norm": 12.988408088684082,
"learning_rate": 1.4607754733994591e-05,
"loss": 5.9501,
"step": 325
},
{
"epoch": 0.09474824038982133,
"grad_norm": 13.49023723602295,
"learning_rate": 1.5734896302975655e-05,
"loss": 5.8552,
"step": 350
},
{
"epoch": 0.10151597184623715,
"grad_norm": 13.501762390136719,
"learning_rate": 1.686203787195672e-05,
"loss": 5.7112,
"step": 375
},
{
"epoch": 0.10828370330265295,
"grad_norm": 13.560358047485352,
"learning_rate": 1.7989179440937783e-05,
"loss": 5.5904,
"step": 400
},
{
"epoch": 0.11505143475906876,
"grad_norm": 13.383050918579102,
"learning_rate": 1.9116321009918847e-05,
"loss": 5.5183,
"step": 425
},
{
"epoch": 0.12181916621548457,
"grad_norm": 13.750153541564941,
"learning_rate": 2.024346257889991e-05,
"loss": 5.4114,
"step": 450
},
{
"epoch": 0.12858689767190037,
"grad_norm": 14.731257438659668,
"learning_rate": 2.1370604147880974e-05,
"loss": 5.1422,
"step": 475
},
{
"epoch": 0.1353546291283162,
"grad_norm": 13.863329887390137,
"learning_rate": 2.2497745716862038e-05,
"loss": 5.2576,
"step": 500
},
{
"epoch": 0.142122360584732,
"grad_norm": 14.840801239013672,
"learning_rate": 2.3624887285843102e-05,
"loss": 4.9023,
"step": 525
},
{
"epoch": 0.1488900920411478,
"grad_norm": 13.968029975891113,
"learning_rate": 2.4752028854824166e-05,
"loss": 4.8976,
"step": 550
},
{
"epoch": 0.15565782349756363,
"grad_norm": 15.291852951049805,
"learning_rate": 2.5879170423805233e-05,
"loss": 4.7663,
"step": 575
},
{
"epoch": 0.16242555495397942,
"grad_norm": 14.247614860534668,
"learning_rate": 2.7006311992786293e-05,
"loss": 4.7057,
"step": 600
},
{
"epoch": 0.16919328641039524,
"grad_norm": 15.434479713439941,
"learning_rate": 2.8133453561767357e-05,
"loss": 4.5958,
"step": 625
},
{
"epoch": 0.17596101786681104,
"grad_norm": 15.81191635131836,
"learning_rate": 2.9260595130748425e-05,
"loss": 4.4317,
"step": 650
},
{
"epoch": 0.18272874932322686,
"grad_norm": 15.542362213134766,
"learning_rate": 3.0387736699729485e-05,
"loss": 4.3024,
"step": 675
},
{
"epoch": 0.18949648077964265,
"grad_norm": 15.908782005310059,
"learning_rate": 3.151487826871055e-05,
"loss": 4.2023,
"step": 700
},
{
"epoch": 0.19626421223605847,
"grad_norm": 17.07871437072754,
"learning_rate": 3.264201983769162e-05,
"loss": 4.1671,
"step": 725
},
{
"epoch": 0.2030319436924743,
"grad_norm": 14.337630271911621,
"learning_rate": 3.3769161406672676e-05,
"loss": 3.8202,
"step": 750
},
{
"epoch": 0.20979967514889009,
"grad_norm": 15.395954132080078,
"learning_rate": 3.489630297565375e-05,
"loss": 3.7002,
"step": 775
},
{
"epoch": 0.2165674066053059,
"grad_norm": 17.821945190429688,
"learning_rate": 3.602344454463481e-05,
"loss": 3.7328,
"step": 800
},
{
"epoch": 0.2233351380617217,
"grad_norm": 15.371402740478516,
"learning_rate": 3.715058611361587e-05,
"loss": 3.4769,
"step": 825
},
{
"epoch": 0.23010286951813752,
"grad_norm": 18.089616775512695,
"learning_rate": 3.827772768259694e-05,
"loss": 3.5862,
"step": 850
},
{
"epoch": 0.23687060097455334,
"grad_norm": 16.150415420532227,
"learning_rate": 3.9404869251578e-05,
"loss": 3.2755,
"step": 875
},
{
"epoch": 0.24363833243096913,
"grad_norm": 16.139698028564453,
"learning_rate": 4.0532010820559066e-05,
"loss": 3.1737,
"step": 900
},
{
"epoch": 0.25040606388738496,
"grad_norm": 16.604488372802734,
"learning_rate": 4.165915238954013e-05,
"loss": 3.1445,
"step": 925
},
{
"epoch": 0.25717379534380075,
"grad_norm": 17.532072067260742,
"learning_rate": 4.278629395852119e-05,
"loss": 3.1923,
"step": 950
},
{
"epoch": 0.26394152680021654,
"grad_norm": 17.041091918945312,
"learning_rate": 4.391343552750226e-05,
"loss": 2.8598,
"step": 975
},
{
"epoch": 0.2707092582566324,
"grad_norm": 17.634092330932617,
"learning_rate": 4.504057709648332e-05,
"loss": 2.653,
"step": 1000
},
{
"epoch": 0.2774769897130482,
"grad_norm": 14.8062105178833,
"learning_rate": 4.6167718665464385e-05,
"loss": 2.6197,
"step": 1025
},
{
"epoch": 0.284244721169464,
"grad_norm": 14.701135635375977,
"learning_rate": 4.729486023444545e-05,
"loss": 2.4039,
"step": 1050
},
{
"epoch": 0.2910124526258798,
"grad_norm": 14.139700889587402,
"learning_rate": 4.842200180342651e-05,
"loss": 2.3764,
"step": 1075
},
{
"epoch": 0.2977801840822956,
"grad_norm": 15.656773567199707,
"learning_rate": 4.954914337240758e-05,
"loss": 2.3153,
"step": 1100
},
{
"epoch": 0.3045479155387114,
"grad_norm": 15.101000785827637,
"learning_rate": 4.992479695176978e-05,
"loss": 2.0685,
"step": 1125
},
{
"epoch": 0.31131564699512726,
"grad_norm": 15.826728820800781,
"learning_rate": 4.9799458538052745e-05,
"loss": 2.1117,
"step": 1150
},
{
"epoch": 0.31808337845154305,
"grad_norm": 16.1218204498291,
"learning_rate": 4.967412012433571e-05,
"loss": 2.0013,
"step": 1175
},
{
"epoch": 0.32485110990795885,
"grad_norm": 12.746655464172363,
"learning_rate": 4.9548781710618674e-05,
"loss": 1.8766,
"step": 1200
},
{
"epoch": 0.33161884136437464,
"grad_norm": 11.957603454589844,
"learning_rate": 4.942344329690164e-05,
"loss": 1.7649,
"step": 1225
},
{
"epoch": 0.3383865728207905,
"grad_norm": 14.868532180786133,
"learning_rate": 4.92981048831846e-05,
"loss": 1.6963,
"step": 1250
},
{
"epoch": 0.3451543042772063,
"grad_norm": 12.839776992797852,
"learning_rate": 4.917276646946756e-05,
"loss": 1.4838,
"step": 1275
},
{
"epoch": 0.3519220357336221,
"grad_norm": 13.293111801147461,
"learning_rate": 4.904742805575053e-05,
"loss": 1.393,
"step": 1300
},
{
"epoch": 0.3586897671900379,
"grad_norm": 16.82228660583496,
"learning_rate": 4.892208964203349e-05,
"loss": 1.463,
"step": 1325
},
{
"epoch": 0.3654574986464537,
"grad_norm": 13.664780616760254,
"learning_rate": 4.8796751228316456e-05,
"loss": 1.3157,
"step": 1350
},
{
"epoch": 0.3722252301028695,
"grad_norm": 12.570382118225098,
"learning_rate": 4.867141281459942e-05,
"loss": 1.3402,
"step": 1375
},
{
"epoch": 0.3789929615592853,
"grad_norm": 16.875078201293945,
"learning_rate": 4.8546074400882386e-05,
"loss": 1.3422,
"step": 1400
},
{
"epoch": 0.38576069301570115,
"grad_norm": 12.442831993103027,
"learning_rate": 4.842073598716535e-05,
"loss": 1.1951,
"step": 1425
},
{
"epoch": 0.39252842447211694,
"grad_norm": 13.85045051574707,
"learning_rate": 4.8295397573448316e-05,
"loss": 1.1216,
"step": 1450
},
{
"epoch": 0.39929615592853274,
"grad_norm": 12.063750267028809,
"learning_rate": 4.817005915973128e-05,
"loss": 0.9869,
"step": 1475
},
{
"epoch": 0.4060638873849486,
"grad_norm": 14.56391716003418,
"learning_rate": 4.8044720746014245e-05,
"loss": 1.0033,
"step": 1500
},
{
"epoch": 0.4128316188413644,
"grad_norm": 12.925354957580566,
"learning_rate": 4.79193823322972e-05,
"loss": 0.8906,
"step": 1525
},
{
"epoch": 0.41959935029778017,
"grad_norm": 19.873634338378906,
"learning_rate": 4.779404391858017e-05,
"loss": 0.9104,
"step": 1550
},
{
"epoch": 0.426367081754196,
"grad_norm": 14.321996688842773,
"learning_rate": 4.766870550486313e-05,
"loss": 1.0049,
"step": 1575
},
{
"epoch": 0.4331348132106118,
"grad_norm": 8.742544174194336,
"learning_rate": 4.75433670911461e-05,
"loss": 0.9338,
"step": 1600
},
{
"epoch": 0.4399025446670276,
"grad_norm": 15.12094497680664,
"learning_rate": 4.741802867742906e-05,
"loss": 0.7763,
"step": 1625
},
{
"epoch": 0.4466702761234434,
"grad_norm": 9.084162712097168,
"learning_rate": 4.729269026371203e-05,
"loss": 0.7404,
"step": 1650
},
{
"epoch": 0.45343800757985925,
"grad_norm": 8.377363204956055,
"learning_rate": 4.716735184999499e-05,
"loss": 0.7405,
"step": 1675
},
{
"epoch": 0.46020573903627504,
"grad_norm": 6.151582717895508,
"learning_rate": 4.704201343627796e-05,
"loss": 0.7003,
"step": 1700
},
{
"epoch": 0.46697347049269083,
"grad_norm": 6.058241367340088,
"learning_rate": 4.691667502256092e-05,
"loss": 0.7211,
"step": 1725
},
{
"epoch": 0.4737412019491067,
"grad_norm": 11.786967277526855,
"learning_rate": 4.6791336608843886e-05,
"loss": 0.6573,
"step": 1750
},
{
"epoch": 0.4805089334055225,
"grad_norm": 7.774144649505615,
"learning_rate": 4.6665998195126844e-05,
"loss": 0.6648,
"step": 1775
},
{
"epoch": 0.48727666486193827,
"grad_norm": 13.611273765563965,
"learning_rate": 4.654065978140981e-05,
"loss": 0.6778,
"step": 1800
},
{
"epoch": 0.49404439631835406,
"grad_norm": 17.016263961791992,
"learning_rate": 4.6415321367692774e-05,
"loss": 0.6154,
"step": 1825
},
{
"epoch": 0.5008121277747699,
"grad_norm": 13.737407684326172,
"learning_rate": 4.628998295397574e-05,
"loss": 0.6161,
"step": 1850
},
{
"epoch": 0.5075798592311858,
"grad_norm": 10.071102142333984,
"learning_rate": 4.61646445402587e-05,
"loss": 0.596,
"step": 1875
},
{
"epoch": 0.5143475906876015,
"grad_norm": 8.169589042663574,
"learning_rate": 4.603930612654166e-05,
"loss": 0.5347,
"step": 1900
},
{
"epoch": 0.5211153221440173,
"grad_norm": 13.066163063049316,
"learning_rate": 4.5913967712824627e-05,
"loss": 0.544,
"step": 1925
},
{
"epoch": 0.5278830536004331,
"grad_norm": 3.5582985877990723,
"learning_rate": 4.578862929910759e-05,
"loss": 0.5303,
"step": 1950
},
{
"epoch": 0.5346507850568489,
"grad_norm": 7.025475978851318,
"learning_rate": 4.5663290885390556e-05,
"loss": 0.4748,
"step": 1975
},
{
"epoch": 0.5414185165132648,
"grad_norm": 8.666425704956055,
"learning_rate": 4.553795247167352e-05,
"loss": 0.5852,
"step": 2000
},
{
"epoch": 0.5481862479696805,
"grad_norm": 16.04596710205078,
"learning_rate": 4.5412614057956486e-05,
"loss": 0.4351,
"step": 2025
},
{
"epoch": 0.5549539794260964,
"grad_norm": 15.767374038696289,
"learning_rate": 4.529228918078813e-05,
"loss": 0.6447,
"step": 2050
},
{
"epoch": 0.5617217108825122,
"grad_norm": 8.484817504882812,
"learning_rate": 4.516695076707109e-05,
"loss": 0.4809,
"step": 2075
},
{
"epoch": 0.568489442338928,
"grad_norm": 16.595365524291992,
"learning_rate": 4.504161235335406e-05,
"loss": 0.4824,
"step": 2100
},
{
"epoch": 0.5752571737953438,
"grad_norm": 16.1405029296875,
"learning_rate": 4.491627393963702e-05,
"loss": 0.4274,
"step": 2125
},
{
"epoch": 0.5820249052517596,
"grad_norm": 12.056056022644043,
"learning_rate": 4.4790935525919986e-05,
"loss": 0.4834,
"step": 2150
},
{
"epoch": 0.5887926367081754,
"grad_norm": 4.0205841064453125,
"learning_rate": 4.466559711220295e-05,
"loss": 0.5808,
"step": 2175
},
{
"epoch": 0.5955603681645912,
"grad_norm": 16.41112518310547,
"learning_rate": 4.4540258698485916e-05,
"loss": 0.5036,
"step": 2200
},
{
"epoch": 0.6023280996210071,
"grad_norm": 10.883577346801758,
"learning_rate": 4.441492028476888e-05,
"loss": 0.4325,
"step": 2225
},
{
"epoch": 0.6090958310774228,
"grad_norm": 15.038456916809082,
"learning_rate": 4.4289581871051845e-05,
"loss": 0.4418,
"step": 2250
},
{
"epoch": 0.6158635625338387,
"grad_norm": 3.341290235519409,
"learning_rate": 4.416424345733481e-05,
"loss": 0.4403,
"step": 2275
},
{
"epoch": 0.6226312939902545,
"grad_norm": 8.565878868103027,
"learning_rate": 4.403890504361777e-05,
"loss": 0.376,
"step": 2300
},
{
"epoch": 0.6293990254466703,
"grad_norm": 8.767007827758789,
"learning_rate": 4.391356662990073e-05,
"loss": 0.3205,
"step": 2325
},
{
"epoch": 0.6361667569030861,
"grad_norm": 8.68835163116455,
"learning_rate": 4.37882282161837e-05,
"loss": 0.338,
"step": 2350
},
{
"epoch": 0.6429344883595018,
"grad_norm": 9.748613357543945,
"learning_rate": 4.366288980246666e-05,
"loss": 0.4474,
"step": 2375
},
{
"epoch": 0.6497022198159177,
"grad_norm": 7.754514217376709,
"learning_rate": 4.353755138874963e-05,
"loss": 0.4478,
"step": 2400
},
{
"epoch": 0.6564699512723335,
"grad_norm": 11.137701034545898,
"learning_rate": 4.341221297503259e-05,
"loss": 0.3163,
"step": 2425
},
{
"epoch": 0.6632376827287493,
"grad_norm": 9.576991081237793,
"learning_rate": 4.328687456131556e-05,
"loss": 0.3521,
"step": 2450
},
{
"epoch": 0.6700054141851651,
"grad_norm": 11.974344253540039,
"learning_rate": 4.316153614759852e-05,
"loss": 0.4591,
"step": 2475
},
{
"epoch": 0.676773145641581,
"grad_norm": 15.265382766723633,
"learning_rate": 4.303619773388149e-05,
"loss": 0.3983,
"step": 2500
},
{
"epoch": 0.6835408770979967,
"grad_norm": 12.016144752502441,
"learning_rate": 4.291085932016445e-05,
"loss": 0.5159,
"step": 2525
},
{
"epoch": 0.6903086085544126,
"grad_norm": 12.998587608337402,
"learning_rate": 4.278552090644741e-05,
"loss": 0.4533,
"step": 2550
},
{
"epoch": 0.6970763400108284,
"grad_norm": 21.76568031311035,
"learning_rate": 4.2660182492730374e-05,
"loss": 0.3456,
"step": 2575
},
{
"epoch": 0.7038440714672441,
"grad_norm": 3.395463466644287,
"learning_rate": 4.2539857615562016e-05,
"loss": 0.3654,
"step": 2600
},
{
"epoch": 0.71061180292366,
"grad_norm": 6.759268283843994,
"learning_rate": 4.241451920184498e-05,
"loss": 0.316,
"step": 2625
},
{
"epoch": 0.7173795343800758,
"grad_norm": 19.425579071044922,
"learning_rate": 4.2289180788127945e-05,
"loss": 0.3431,
"step": 2650
},
{
"epoch": 0.7241472658364916,
"grad_norm": 12.407275199890137,
"learning_rate": 4.216384237441091e-05,
"loss": 0.3912,
"step": 2675
},
{
"epoch": 0.7309149972929074,
"grad_norm": 10.216941833496094,
"learning_rate": 4.2038503960693875e-05,
"loss": 0.2893,
"step": 2700
},
{
"epoch": 0.7376827287493233,
"grad_norm": 8.958337783813477,
"learning_rate": 4.191316554697684e-05,
"loss": 0.4489,
"step": 2725
},
{
"epoch": 0.744450460205739,
"grad_norm": 10.17128849029541,
"learning_rate": 4.1787827133259804e-05,
"loss": 0.2845,
"step": 2750
},
{
"epoch": 0.7512181916621549,
"grad_norm": 6.733510494232178,
"learning_rate": 4.166750225609145e-05,
"loss": 0.3935,
"step": 2775
},
{
"epoch": 0.7579859231185706,
"grad_norm": 9.230829238891602,
"learning_rate": 4.154216384237442e-05,
"loss": 0.3809,
"step": 2800
},
{
"epoch": 0.7647536545749865,
"grad_norm": 2.8910205364227295,
"learning_rate": 4.1416825428657375e-05,
"loss": 0.2973,
"step": 2825
},
{
"epoch": 0.7715213860314023,
"grad_norm": 3.628933906555176,
"learning_rate": 4.129148701494034e-05,
"loss": 0.2805,
"step": 2850
},
{
"epoch": 0.778289117487818,
"grad_norm": 7.368860721588135,
"learning_rate": 4.1166148601223305e-05,
"loss": 0.3739,
"step": 2875
},
{
"epoch": 0.7850568489442339,
"grad_norm": 8.461480140686035,
"learning_rate": 4.104081018750627e-05,
"loss": 0.334,
"step": 2900
},
{
"epoch": 0.7918245804006497,
"grad_norm": 10.173233985900879,
"learning_rate": 4.0915471773789235e-05,
"loss": 0.427,
"step": 2925
},
{
"epoch": 0.7985923118570655,
"grad_norm": 4.683242321014404,
"learning_rate": 4.07901333600722e-05,
"loss": 0.2991,
"step": 2950
},
{
"epoch": 0.8053600433134813,
"grad_norm": 10.472857475280762,
"learning_rate": 4.0664794946355164e-05,
"loss": 0.3194,
"step": 2975
},
{
"epoch": 0.8121277747698972,
"grad_norm": 5.410557746887207,
"learning_rate": 4.053945653263813e-05,
"loss": 0.3114,
"step": 3000
},
{
"epoch": 0.8188955062263129,
"grad_norm": 12.810556411743164,
"learning_rate": 4.041411811892109e-05,
"loss": 0.2572,
"step": 3025
},
{
"epoch": 0.8256632376827288,
"grad_norm": 4.909450054168701,
"learning_rate": 4.028877970520405e-05,
"loss": 0.2069,
"step": 3050
},
{
"epoch": 0.8324309691391446,
"grad_norm": 4.909849643707275,
"learning_rate": 4.016344129148702e-05,
"loss": 0.3478,
"step": 3075
},
{
"epoch": 0.8391987005955603,
"grad_norm": 13.538515090942383,
"learning_rate": 4.003810287776998e-05,
"loss": 0.3086,
"step": 3100
},
{
"epoch": 0.8459664320519762,
"grad_norm": 10.3212251663208,
"learning_rate": 3.9912764464052946e-05,
"loss": 0.2613,
"step": 3125
},
{
"epoch": 0.852734163508392,
"grad_norm": 7.68850040435791,
"learning_rate": 3.9787426050335904e-05,
"loss": 0.3387,
"step": 3150
},
{
"epoch": 0.8595018949648078,
"grad_norm": 7.078841209411621,
"learning_rate": 3.966208763661887e-05,
"loss": 0.357,
"step": 3175
},
{
"epoch": 0.8662696264212236,
"grad_norm": 4.790768146514893,
"learning_rate": 3.9536749222901834e-05,
"loss": 0.3459,
"step": 3200
},
{
"epoch": 0.8730373578776394,
"grad_norm": 4.735093593597412,
"learning_rate": 3.94114108091848e-05,
"loss": 0.2948,
"step": 3225
},
{
"epoch": 0.8798050893340552,
"grad_norm": 1.3540657758712769,
"learning_rate": 3.9286072395467764e-05,
"loss": 0.1944,
"step": 3250
},
{
"epoch": 0.8865728207904711,
"grad_norm": 9.657829284667969,
"learning_rate": 3.916073398175073e-05,
"loss": 0.2671,
"step": 3275
},
{
"epoch": 0.8933405522468868,
"grad_norm": 8.425637245178223,
"learning_rate": 3.903539556803369e-05,
"loss": 0.2251,
"step": 3300
},
{
"epoch": 0.9001082837033026,
"grad_norm": 7.622613906860352,
"learning_rate": 3.891005715431666e-05,
"loss": 0.3632,
"step": 3325
},
{
"epoch": 0.9068760151597185,
"grad_norm": 12.632335662841797,
"learning_rate": 3.8784718740599616e-05,
"loss": 0.207,
"step": 3350
},
{
"epoch": 0.9136437466161342,
"grad_norm": 11.750454902648926,
"learning_rate": 3.865938032688258e-05,
"loss": 0.2652,
"step": 3375
},
{
"epoch": 0.9204114780725501,
"grad_norm": 6.89017915725708,
"learning_rate": 3.8534041913165546e-05,
"loss": 0.2457,
"step": 3400
},
{
"epoch": 0.9271792095289659,
"grad_norm": 4.333946704864502,
"learning_rate": 3.840870349944851e-05,
"loss": 0.3324,
"step": 3425
},
{
"epoch": 0.9339469409853817,
"grad_norm": 1.0153127908706665,
"learning_rate": 3.8283365085731475e-05,
"loss": 0.1966,
"step": 3450
},
{
"epoch": 0.9407146724417975,
"grad_norm": 1.8941410779953003,
"learning_rate": 3.815802667201444e-05,
"loss": 0.3098,
"step": 3475
},
{
"epoch": 0.9474824038982134,
"grad_norm": 1.6257559061050415,
"learning_rate": 3.8032688258297405e-05,
"loss": 0.1872,
"step": 3500
},
{
"epoch": 0.9542501353546291,
"grad_norm": 2.3211212158203125,
"learning_rate": 3.790734984458037e-05,
"loss": 0.2334,
"step": 3525
},
{
"epoch": 0.961017866811045,
"grad_norm": 10.049856185913086,
"learning_rate": 3.7782011430863334e-05,
"loss": 0.3128,
"step": 3550
},
{
"epoch": 0.9677855982674608,
"grad_norm": 10.843172073364258,
"learning_rate": 3.76566730171463e-05,
"loss": 0.3414,
"step": 3575
},
{
"epoch": 0.9745533297238765,
"grad_norm": 0.46516045928001404,
"learning_rate": 3.7531334603429264e-05,
"loss": 0.2379,
"step": 3600
},
{
"epoch": 0.9813210611802924,
"grad_norm": 15.376679420471191,
"learning_rate": 3.740599618971222e-05,
"loss": 0.2887,
"step": 3625
},
{
"epoch": 0.9880887926367081,
"grad_norm": 2.3309133052825928,
"learning_rate": 3.728065777599519e-05,
"loss": 0.2105,
"step": 3650
},
{
"epoch": 0.994856524093124,
"grad_norm": 7.93802547454834,
"learning_rate": 3.715531936227815e-05,
"loss": 0.3599,
"step": 3675
},
{
"epoch": 1.0,
"eval_accuracy": 0.9213923132704859,
"eval_f1_macro": 0.9069570851888077,
"eval_f1_micro": 0.9213923132704859,
"eval_f1_weighted": 0.911460261524371,
"eval_loss": 0.2470918595790863,
"eval_precision_macro": 0.9177198642319323,
"eval_precision_micro": 0.9213923132704859,
"eval_precision_weighted": 0.9195993135359931,
"eval_recall_macro": 0.9154624966869864,
"eval_recall_micro": 0.9213923132704859,
"eval_recall_weighted": 0.9213923132704859,
"eval_runtime": 21.8346,
"eval_samples_per_second": 947.351,
"eval_steps_per_second": 59.218,
"step": 3694
},
{
"epoch": 1.0016242555495398,
"grad_norm": 11.75763988494873,
"learning_rate": 3.7029980948561116e-05,
"loss": 0.2012,
"step": 3700
},
{
"epoch": 1.0083919870059557,
"grad_norm": 13.783013343811035,
"learning_rate": 3.690464253484408e-05,
"loss": 0.4173,
"step": 3725
},
{
"epoch": 1.0151597184623715,
"grad_norm": 2.9924991130828857,
"learning_rate": 3.6779304121127046e-05,
"loss": 0.2121,
"step": 3750
},
{
"epoch": 1.0219274499187871,
"grad_norm": 0.5149463415145874,
"learning_rate": 3.665396570741001e-05,
"loss": 0.2768,
"step": 3775
},
{
"epoch": 1.028695181375203,
"grad_norm": 14.207648277282715,
"learning_rate": 3.6528627293692976e-05,
"loss": 0.2858,
"step": 3800
},
{
"epoch": 1.0354629128316188,
"grad_norm": 0.8809079527854919,
"learning_rate": 3.640328887997594e-05,
"loss": 0.1731,
"step": 3825
},
{
"epoch": 1.0422306442880347,
"grad_norm": 4.510576248168945,
"learning_rate": 3.6277950466258905e-05,
"loss": 0.2966,
"step": 3850
},
{
"epoch": 1.0489983757444505,
"grad_norm": 17.010372161865234,
"learning_rate": 3.615261205254186e-05,
"loss": 0.2354,
"step": 3875
},
{
"epoch": 1.0557661072008662,
"grad_norm": 2.4811925888061523,
"learning_rate": 3.602727363882483e-05,
"loss": 0.26,
"step": 3900
},
{
"epoch": 1.062533838657282,
"grad_norm": 0.9241037368774414,
"learning_rate": 3.590193522510779e-05,
"loss": 0.1716,
"step": 3925
},
{
"epoch": 1.0693015701136979,
"grad_norm": 11.593517303466797,
"learning_rate": 3.577659681139076e-05,
"loss": 0.255,
"step": 3950
},
{
"epoch": 1.0760693015701137,
"grad_norm": 8.104696273803711,
"learning_rate": 3.565125839767372e-05,
"loss": 0.2273,
"step": 3975
},
{
"epoch": 1.0828370330265296,
"grad_norm": 12.741314888000488,
"learning_rate": 3.552591998395669e-05,
"loss": 0.2807,
"step": 4000
},
{
"epoch": 1.0896047644829454,
"grad_norm": 0.22231225669384003,
"learning_rate": 3.540058157023965e-05,
"loss": 0.2141,
"step": 4025
},
{
"epoch": 1.096372495939361,
"grad_norm": 12.738525390625,
"learning_rate": 3.527524315652262e-05,
"loss": 0.2796,
"step": 4050
},
{
"epoch": 1.1031402273957769,
"grad_norm": 9.309906005859375,
"learning_rate": 3.514990474280558e-05,
"loss": 0.2185,
"step": 4075
},
{
"epoch": 1.1099079588521927,
"grad_norm": 11.775688171386719,
"learning_rate": 3.502456632908854e-05,
"loss": 0.3496,
"step": 4100
},
{
"epoch": 1.1166756903086086,
"grad_norm": 6.333633899688721,
"learning_rate": 3.4899227915371505e-05,
"loss": 0.2659,
"step": 4125
},
{
"epoch": 1.1234434217650244,
"grad_norm": 0.39873039722442627,
"learning_rate": 3.477388950165447e-05,
"loss": 0.2551,
"step": 4150
},
{
"epoch": 1.13021115322144,
"grad_norm": 0.5979344844818115,
"learning_rate": 3.4648551087937434e-05,
"loss": 0.2102,
"step": 4175
},
{
"epoch": 1.136978884677856,
"grad_norm": 12.985968589782715,
"learning_rate": 3.452321267422039e-05,
"loss": 0.2303,
"step": 4200
},
{
"epoch": 1.1437466161342718,
"grad_norm": 2.175553560256958,
"learning_rate": 3.439787426050336e-05,
"loss": 0.2526,
"step": 4225
},
{
"epoch": 1.1505143475906876,
"grad_norm": 0.49194416403770447,
"learning_rate": 3.427253584678632e-05,
"loss": 0.2483,
"step": 4250
},
{
"epoch": 1.1572820790471035,
"grad_norm": 3.2816367149353027,
"learning_rate": 3.4147197433069287e-05,
"loss": 0.2854,
"step": 4275
},
{
"epoch": 1.1640498105035193,
"grad_norm": 7.387673377990723,
"learning_rate": 3.402185901935225e-05,
"loss": 0.2106,
"step": 4300
},
{
"epoch": 1.1708175419599351,
"grad_norm": 7.8965654373168945,
"learning_rate": 3.3896520605635216e-05,
"loss": 0.2578,
"step": 4325
},
{
"epoch": 1.1775852734163508,
"grad_norm": 1.6988545656204224,
"learning_rate": 3.377118219191818e-05,
"loss": 0.1903,
"step": 4350
},
{
"epoch": 1.1843530048727666,
"grad_norm": 6.279006481170654,
"learning_rate": 3.3645843778201146e-05,
"loss": 0.3026,
"step": 4375
},
{
"epoch": 1.1911207363291825,
"grad_norm": 0.32076123356819153,
"learning_rate": 3.3520505364484104e-05,
"loss": 0.2804,
"step": 4400
},
{
"epoch": 1.1978884677855983,
"grad_norm": 11.526758193969727,
"learning_rate": 3.339516695076707e-05,
"loss": 0.3756,
"step": 4425
},
{
"epoch": 1.2046561992420142,
"grad_norm": 11.514225959777832,
"learning_rate": 3.3269828537050033e-05,
"loss": 0.2868,
"step": 4450
},
{
"epoch": 1.2114239306984298,
"grad_norm": 10.091246604919434,
"learning_rate": 3.3144490123333e-05,
"loss": 0.225,
"step": 4475
},
{
"epoch": 1.2181916621548456,
"grad_norm": 1.9780317544937134,
"learning_rate": 3.301915170961596e-05,
"loss": 0.1927,
"step": 4500
},
{
"epoch": 1.2249593936112615,
"grad_norm": 14.720560073852539,
"learning_rate": 3.289381329589893e-05,
"loss": 0.3458,
"step": 4525
},
{
"epoch": 1.2317271250676773,
"grad_norm": 10.85938835144043,
"learning_rate": 3.276847488218189e-05,
"loss": 0.1138,
"step": 4550
},
{
"epoch": 1.2384948565240932,
"grad_norm": 3.7215845584869385,
"learning_rate": 3.264313646846486e-05,
"loss": 0.179,
"step": 4575
},
{
"epoch": 1.245262587980509,
"grad_norm": 12.215106010437012,
"learning_rate": 3.251779805474782e-05,
"loss": 0.3369,
"step": 4600
},
{
"epoch": 1.2520303194369247,
"grad_norm": 13.148759841918945,
"learning_rate": 3.239245964103079e-05,
"loss": 0.3266,
"step": 4625
},
{
"epoch": 1.2587980508933405,
"grad_norm": 14.143242835998535,
"learning_rate": 3.226712122731375e-05,
"loss": 0.3503,
"step": 4650
},
{
"epoch": 1.2655657823497564,
"grad_norm": 1.314339280128479,
"learning_rate": 3.214178281359671e-05,
"loss": 0.1588,
"step": 4675
},
{
"epoch": 1.2723335138061722,
"grad_norm": 13.175312042236328,
"learning_rate": 3.2016444399879675e-05,
"loss": 0.1884,
"step": 4700
},
{
"epoch": 1.279101245262588,
"grad_norm": 11.514117240905762,
"learning_rate": 3.189110598616264e-05,
"loss": 0.2922,
"step": 4725
},
{
"epoch": 1.2858689767190037,
"grad_norm": 2.735069990158081,
"learning_rate": 3.1765767572445604e-05,
"loss": 0.2909,
"step": 4750
},
{
"epoch": 1.2926367081754195,
"grad_norm": 7.173842430114746,
"learning_rate": 3.164042915872857e-05,
"loss": 0.1868,
"step": 4775
},
{
"epoch": 1.2994044396318354,
"grad_norm": 16.41992950439453,
"learning_rate": 3.1515090745011534e-05,
"loss": 0.1977,
"step": 4800
},
{
"epoch": 1.3061721710882512,
"grad_norm": 0.7331606149673462,
"learning_rate": 3.13897523312945e-05,
"loss": 0.3978,
"step": 4825
},
{
"epoch": 1.312939902544667,
"grad_norm": 13.302403450012207,
"learning_rate": 3.1264413917577463e-05,
"loss": 0.2199,
"step": 4850
},
{
"epoch": 1.319707634001083,
"grad_norm": 6.277172565460205,
"learning_rate": 3.113907550386043e-05,
"loss": 0.2211,
"step": 4875
},
{
"epoch": 1.3264753654574988,
"grad_norm": 12.060029029846191,
"learning_rate": 3.101373709014339e-05,
"loss": 0.2111,
"step": 4900
},
{
"epoch": 1.3332430969139144,
"grad_norm": 12.81723403930664,
"learning_rate": 3.088839867642635e-05,
"loss": 0.2522,
"step": 4925
},
{
"epoch": 1.3400108283703303,
"grad_norm": 0.56070476770401,
"learning_rate": 3.0763060262709316e-05,
"loss": 0.1966,
"step": 4950
},
{
"epoch": 1.346778559826746,
"grad_norm": 5.43617582321167,
"learning_rate": 3.063772184899228e-05,
"loss": 0.3197,
"step": 4975
},
{
"epoch": 1.353546291283162,
"grad_norm": 3.4792237281799316,
"learning_rate": 3.0512383435275242e-05,
"loss": 0.2062,
"step": 5000
},
{
"epoch": 1.3603140227395776,
"grad_norm": 9.568795204162598,
"learning_rate": 3.0387045021558207e-05,
"loss": 0.3434,
"step": 5025
},
{
"epoch": 1.3670817541959934,
"grad_norm": 10.6992769241333,
"learning_rate": 3.0261706607841172e-05,
"loss": 0.2204,
"step": 5050
},
{
"epoch": 1.3738494856524093,
"grad_norm": 0.5761290788650513,
"learning_rate": 3.0136368194124137e-05,
"loss": 0.2141,
"step": 5075
},
{
"epoch": 1.3806172171088251,
"grad_norm": 13.90715217590332,
"learning_rate": 3.00110297804071e-05,
"loss": 0.1668,
"step": 5100
},
{
"epoch": 1.387384948565241,
"grad_norm": 11.602949142456055,
"learning_rate": 2.9885691366690066e-05,
"loss": 0.1902,
"step": 5125
},
{
"epoch": 1.3941526800216568,
"grad_norm": 0.09335369616746902,
"learning_rate": 2.976035295297303e-05,
"loss": 0.1735,
"step": 5150
},
{
"epoch": 1.4009204114780727,
"grad_norm": 1.5695838928222656,
"learning_rate": 2.9635014539255996e-05,
"loss": 0.2458,
"step": 5175
},
{
"epoch": 1.4076881429344883,
"grad_norm": 1.8779666423797607,
"learning_rate": 2.9509676125538954e-05,
"loss": 0.2006,
"step": 5200
},
{
"epoch": 1.4144558743909041,
"grad_norm": 10.377031326293945,
"learning_rate": 2.938433771182192e-05,
"loss": 0.2025,
"step": 5225
},
{
"epoch": 1.42122360584732,
"grad_norm": 10.321118354797363,
"learning_rate": 2.9258999298104883e-05,
"loss": 0.2157,
"step": 5250
},
{
"epoch": 1.4279913373037358,
"grad_norm": 0.4291195273399353,
"learning_rate": 2.9133660884387848e-05,
"loss": 0.1963,
"step": 5275
},
{
"epoch": 1.4347590687601515,
"grad_norm": 0.28830039501190186,
"learning_rate": 2.9008322470670813e-05,
"loss": 0.1947,
"step": 5300
},
{
"epoch": 1.4415268002165673,
"grad_norm": 0.1749316304922104,
"learning_rate": 2.8882984056953778e-05,
"loss": 0.2793,
"step": 5325
},
{
"epoch": 1.4482945316729832,
"grad_norm": 9.74176025390625,
"learning_rate": 2.8757645643236743e-05,
"loss": 0.2421,
"step": 5350
},
{
"epoch": 1.455062263129399,
"grad_norm": 0.9622665047645569,
"learning_rate": 2.8632307229519707e-05,
"loss": 0.2765,
"step": 5375
},
{
"epoch": 1.4618299945858149,
"grad_norm": 0.7690452933311462,
"learning_rate": 2.850696881580267e-05,
"loss": 0.2429,
"step": 5400
},
{
"epoch": 1.4685977260422307,
"grad_norm": 1.5192012786865234,
"learning_rate": 2.8381630402085634e-05,
"loss": 0.1464,
"step": 5425
},
{
"epoch": 1.4753654574986466,
"grad_norm": 0.5577375888824463,
"learning_rate": 2.8256291988368595e-05,
"loss": 0.1942,
"step": 5450
},
{
"epoch": 1.4821331889550622,
"grad_norm": 1.2777996063232422,
"learning_rate": 2.813095357465156e-05,
"loss": 0.1895,
"step": 5475
},
{
"epoch": 1.488900920411478,
"grad_norm": 8.725980758666992,
"learning_rate": 2.8005615160934525e-05,
"loss": 0.303,
"step": 5500
},
{
"epoch": 1.4956686518678939,
"grad_norm": 10.138091087341309,
"learning_rate": 2.7880276747217486e-05,
"loss": 0.2515,
"step": 5525
},
{
"epoch": 1.5024363833243097,
"grad_norm": 2.442488431930542,
"learning_rate": 2.775493833350045e-05,
"loss": 0.2725,
"step": 5550
},
{
"epoch": 1.5092041147807254,
"grad_norm": 2.7091565132141113,
"learning_rate": 2.7629599919783416e-05,
"loss": 0.2676,
"step": 5575
},
{
"epoch": 1.5159718462371412,
"grad_norm": 6.794680118560791,
"learning_rate": 2.750426150606638e-05,
"loss": 0.158,
"step": 5600
},
{
"epoch": 1.522739577693557,
"grad_norm": 1.2340929508209229,
"learning_rate": 2.7378923092349345e-05,
"loss": 0.2144,
"step": 5625
},
{
"epoch": 1.529507309149973,
"grad_norm": 0.2725580036640167,
"learning_rate": 2.725358467863231e-05,
"loss": 0.185,
"step": 5650
},
{
"epoch": 1.5362750406063888,
"grad_norm": 3.0790915489196777,
"learning_rate": 2.7128246264915275e-05,
"loss": 0.128,
"step": 5675
},
{
"epoch": 1.5430427720628046,
"grad_norm": 1.8269541263580322,
"learning_rate": 2.700290785119824e-05,
"loss": 0.1831,
"step": 5700
},
{
"epoch": 1.5498105035192205,
"grad_norm": 0.6843694448471069,
"learning_rate": 2.6877569437481198e-05,
"loss": 0.2506,
"step": 5725
},
{
"epoch": 1.5565782349756363,
"grad_norm": 2.2378416061401367,
"learning_rate": 2.6752231023764162e-05,
"loss": 0.1644,
"step": 5750
},
{
"epoch": 1.563345966432052,
"grad_norm": 11.299232482910156,
"learning_rate": 2.6626892610047127e-05,
"loss": 0.3624,
"step": 5775
},
{
"epoch": 1.5701136978884678,
"grad_norm": 0.1067349761724472,
"learning_rate": 2.6501554196330092e-05,
"loss": 0.2144,
"step": 5800
},
{
"epoch": 1.5768814293448836,
"grad_norm": 2.722107172012329,
"learning_rate": 2.6376215782613057e-05,
"loss": 0.2381,
"step": 5825
},
{
"epoch": 1.5836491608012992,
"grad_norm": 11.49809741973877,
"learning_rate": 2.625087736889602e-05,
"loss": 0.175,
"step": 5850
},
{
"epoch": 1.590416892257715,
"grad_norm": 16.60283088684082,
"learning_rate": 2.6125538955178986e-05,
"loss": 0.2896,
"step": 5875
},
{
"epoch": 1.597184623714131,
"grad_norm": 0.3614028990268707,
"learning_rate": 2.600020054146195e-05,
"loss": 0.2182,
"step": 5900
},
{
"epoch": 1.6039523551705468,
"grad_norm": 1.335888385772705,
"learning_rate": 2.5874862127744913e-05,
"loss": 0.1903,
"step": 5925
},
{
"epoch": 1.6107200866269626,
"grad_norm": 7.841146945953369,
"learning_rate": 2.5749523714027878e-05,
"loss": 0.2156,
"step": 5950
},
{
"epoch": 1.6174878180833785,
"grad_norm": 0.4461989402770996,
"learning_rate": 2.562418530031084e-05,
"loss": 0.1799,
"step": 5975
},
{
"epoch": 1.6242555495397943,
"grad_norm": 6.844948768615723,
"learning_rate": 2.5498846886593804e-05,
"loss": 0.1853,
"step": 6000
},
{
"epoch": 1.6310232809962102,
"grad_norm": 13.240145683288574,
"learning_rate": 2.537350847287677e-05,
"loss": 0.2664,
"step": 6025
},
{
"epoch": 1.637791012452626,
"grad_norm": 10.991958618164062,
"learning_rate": 2.524817005915973e-05,
"loss": 0.2736,
"step": 6050
},
{
"epoch": 1.6445587439090417,
"grad_norm": 18.210996627807617,
"learning_rate": 2.5122831645442695e-05,
"loss": 0.2818,
"step": 6075
},
{
"epoch": 1.6513264753654575,
"grad_norm": 7.5500006675720215,
"learning_rate": 2.499749323172566e-05,
"loss": 0.125,
"step": 6100
},
{
"epoch": 1.6580942068218734,
"grad_norm": 0.2722916305065155,
"learning_rate": 2.4872154818008624e-05,
"loss": 0.2471,
"step": 6125
},
{
"epoch": 1.664861938278289,
"grad_norm": 0.1690392643213272,
"learning_rate": 2.474681640429159e-05,
"loss": 0.2063,
"step": 6150
},
{
"epoch": 1.6716296697347048,
"grad_norm": 0.8183917999267578,
"learning_rate": 2.462147799057455e-05,
"loss": 0.2288,
"step": 6175
},
{
"epoch": 1.6783974011911207,
"grad_norm": 12.807232856750488,
"learning_rate": 2.4496139576857515e-05,
"loss": 0.1793,
"step": 6200
},
{
"epoch": 1.6851651326475365,
"grad_norm": 2.0582687854766846,
"learning_rate": 2.437080116314048e-05,
"loss": 0.1697,
"step": 6225
},
{
"epoch": 1.6919328641039524,
"grad_norm": 0.955332338809967,
"learning_rate": 2.4245462749423445e-05,
"loss": 0.1161,
"step": 6250
},
{
"epoch": 1.6987005955603682,
"grad_norm": 0.23503464460372925,
"learning_rate": 2.412513787225509e-05,
"loss": 0.1578,
"step": 6275
},
{
"epoch": 1.705468327016784,
"grad_norm": 0.7222716808319092,
"learning_rate": 2.399979945853805e-05,
"loss": 0.2072,
"step": 6300
},
{
"epoch": 1.7122360584732,
"grad_norm": 15.863499641418457,
"learning_rate": 2.3874461044821016e-05,
"loss": 0.2366,
"step": 6325
},
{
"epoch": 1.7190037899296156,
"grad_norm": 9.790959358215332,
"learning_rate": 2.374912263110398e-05,
"loss": 0.2673,
"step": 6350
},
{
"epoch": 1.7257715213860314,
"grad_norm": 8.514019012451172,
"learning_rate": 2.3623784217386946e-05,
"loss": 0.2244,
"step": 6375
},
{
"epoch": 1.7325392528424473,
"grad_norm": 14.102144241333008,
"learning_rate": 2.349844580366991e-05,
"loss": 0.1813,
"step": 6400
},
{
"epoch": 1.7393069842988629,
"grad_norm": 9.164491653442383,
"learning_rate": 2.3373107389952875e-05,
"loss": 0.2476,
"step": 6425
},
{
"epoch": 1.7460747157552787,
"grad_norm": 0.8914769887924194,
"learning_rate": 2.3247768976235837e-05,
"loss": 0.2295,
"step": 6450
},
{
"epoch": 1.7528424472116946,
"grad_norm": 12.076004981994629,
"learning_rate": 2.31224305625188e-05,
"loss": 0.1614,
"step": 6475
},
{
"epoch": 1.7596101786681104,
"grad_norm": 1.1903892755508423,
"learning_rate": 2.2997092148801766e-05,
"loss": 0.1336,
"step": 6500
},
{
"epoch": 1.7663779101245263,
"grad_norm": 0.8547431826591492,
"learning_rate": 2.287175373508473e-05,
"loss": 0.2748,
"step": 6525
},
{
"epoch": 1.7731456415809421,
"grad_norm": 10.832341194152832,
"learning_rate": 2.2746415321367696e-05,
"loss": 0.242,
"step": 6550
},
{
"epoch": 1.779913373037358,
"grad_norm": 0.6953740119934082,
"learning_rate": 2.2621076907650657e-05,
"loss": 0.1004,
"step": 6575
},
{
"epoch": 1.7866811044937738,
"grad_norm": 0.1302420198917389,
"learning_rate": 2.2495738493933622e-05,
"loss": 0.4184,
"step": 6600
},
{
"epoch": 1.7934488359501894,
"grad_norm": 7.436769962310791,
"learning_rate": 2.2370400080216587e-05,
"loss": 0.1858,
"step": 6625
},
{
"epoch": 1.8002165674066053,
"grad_norm": 20.91210174560547,
"learning_rate": 2.224506166649955e-05,
"loss": 0.284,
"step": 6650
},
{
"epoch": 1.8069842988630211,
"grad_norm": 0.46657705307006836,
"learning_rate": 2.2119723252782516e-05,
"loss": 0.1961,
"step": 6675
},
{
"epoch": 1.8137520303194368,
"grad_norm": 6.9242353439331055,
"learning_rate": 2.1994384839065478e-05,
"loss": 0.2108,
"step": 6700
},
{
"epoch": 1.8205197617758526,
"grad_norm": 13.766924858093262,
"learning_rate": 2.1869046425348443e-05,
"loss": 0.2072,
"step": 6725
},
{
"epoch": 1.8272874932322685,
"grad_norm": 2.7908565998077393,
"learning_rate": 2.1743708011631404e-05,
"loss": 0.0987,
"step": 6750
},
{
"epoch": 1.8340552246886843,
"grad_norm": 12.718364715576172,
"learning_rate": 2.161836959791437e-05,
"loss": 0.1816,
"step": 6775
},
{
"epoch": 1.8408229561451002,
"grad_norm": 12.46013069152832,
"learning_rate": 2.1493031184197334e-05,
"loss": 0.3094,
"step": 6800
},
{
"epoch": 1.847590687601516,
"grad_norm": 0.13040785491466522,
"learning_rate": 2.1367692770480295e-05,
"loss": 0.1224,
"step": 6825
},
{
"epoch": 1.8543584190579319,
"grad_norm": 1.2305707931518555,
"learning_rate": 2.124235435676326e-05,
"loss": 0.083,
"step": 6850
},
{
"epoch": 1.8611261505143477,
"grad_norm": 0.13893193006515503,
"learning_rate": 2.1117015943046225e-05,
"loss": 0.3004,
"step": 6875
},
{
"epoch": 1.8678938819707636,
"grad_norm": 11.187564849853516,
"learning_rate": 2.099167752932919e-05,
"loss": 0.2636,
"step": 6900
},
{
"epoch": 1.8746616134271792,
"grad_norm": 8.335643768310547,
"learning_rate": 2.0866339115612154e-05,
"loss": 0.1974,
"step": 6925
},
{
"epoch": 1.881429344883595,
"grad_norm": 4.112905502319336,
"learning_rate": 2.074100070189512e-05,
"loss": 0.1114,
"step": 6950
},
{
"epoch": 1.8881970763400109,
"grad_norm": 0.5131503939628601,
"learning_rate": 2.061566228817808e-05,
"loss": 0.2218,
"step": 6975
},
{
"epoch": 1.8949648077964265,
"grad_norm": 0.07644043117761612,
"learning_rate": 2.0490323874461045e-05,
"loss": 0.2306,
"step": 7000
},
{
"epoch": 1.9017325392528424,
"grad_norm": 2.5690276622772217,
"learning_rate": 2.036498546074401e-05,
"loss": 0.1069,
"step": 7025
},
{
"epoch": 1.9085002707092582,
"grad_norm": 5.832399368286133,
"learning_rate": 2.0239647047026975e-05,
"loss": 0.1663,
"step": 7050
},
{
"epoch": 1.915268002165674,
"grad_norm": 8.066961288452148,
"learning_rate": 2.011430863330994e-05,
"loss": 0.2029,
"step": 7075
},
{
"epoch": 1.92203573362209,
"grad_norm": 0.3773351013660431,
"learning_rate": 1.99889702195929e-05,
"loss": 0.0708,
"step": 7100
},
{
"epoch": 1.9288034650785058,
"grad_norm": 12.352056503295898,
"learning_rate": 1.9863631805875866e-05,
"loss": 0.2559,
"step": 7125
},
{
"epoch": 1.9355711965349216,
"grad_norm": 0.8700584173202515,
"learning_rate": 1.973829339215883e-05,
"loss": 0.2601,
"step": 7150
},
{
"epoch": 1.9423389279913374,
"grad_norm": 14.849401473999023,
"learning_rate": 1.9612954978441795e-05,
"loss": 0.2871,
"step": 7175
},
{
"epoch": 1.949106659447753,
"grad_norm": 1.086490511894226,
"learning_rate": 1.948761656472476e-05,
"loss": 0.3069,
"step": 7200
},
{
"epoch": 1.955874390904169,
"grad_norm": 0.1218922808766365,
"learning_rate": 1.9362278151007722e-05,
"loss": 0.2398,
"step": 7225
},
{
"epoch": 1.9626421223605848,
"grad_norm": 0.7988734841346741,
"learning_rate": 1.9236939737290687e-05,
"loss": 0.3118,
"step": 7250
},
{
"epoch": 1.9694098538170004,
"grad_norm": 0.1584845781326294,
"learning_rate": 1.911160132357365e-05,
"loss": 0.1434,
"step": 7275
},
{
"epoch": 1.9761775852734162,
"grad_norm": 0.6999651193618774,
"learning_rate": 1.8986262909856613e-05,
"loss": 0.1758,
"step": 7300
},
{
"epoch": 1.982945316729832,
"grad_norm": 0.11756038665771484,
"learning_rate": 1.8860924496139578e-05,
"loss": 0.1671,
"step": 7325
},
{
"epoch": 1.989713048186248,
"grad_norm": 1.217764139175415,
"learning_rate": 1.873558608242254e-05,
"loss": 0.1335,
"step": 7350
},
{
"epoch": 1.9964807796426638,
"grad_norm": 8.427165985107422,
"learning_rate": 1.8610247668705504e-05,
"loss": 0.2056,
"step": 7375
},
{
"epoch": 2.0,
"eval_accuracy": 0.935412134396906,
"eval_f1_macro": 0.9274157947563729,
"eval_f1_micro": 0.935412134396906,
"eval_f1_weighted": 0.9302654119193674,
"eval_loss": 0.21082843840122223,
"eval_precision_macro": 0.9405383300469721,
"eval_precision_micro": 0.935412134396906,
"eval_precision_weighted": 0.9403341909054184,
"eval_recall_macro": 0.9303843095679831,
"eval_recall_micro": 0.935412134396906,
"eval_recall_weighted": 0.935412134396906,
"eval_runtime": 21.7667,
"eval_samples_per_second": 950.305,
"eval_steps_per_second": 59.403,
"step": 7388
},
{
"epoch": 2.0032485110990796,
"grad_norm": 0.07127093523740768,
"learning_rate": 1.848490925498847e-05,
"loss": 0.1575,
"step": 7400
},
{
"epoch": 2.0100162425554955,
"grad_norm": 15.853127479553223,
"learning_rate": 1.8359570841271433e-05,
"loss": 0.1454,
"step": 7425
},
{
"epoch": 2.0167839740119113,
"grad_norm": 0.1277124136686325,
"learning_rate": 1.8234232427554398e-05,
"loss": 0.1873,
"step": 7450
},
{
"epoch": 2.023551705468327,
"grad_norm": 11.486383438110352,
"learning_rate": 1.8108894013837363e-05,
"loss": 0.2182,
"step": 7475
},
{
"epoch": 2.030319436924743,
"grad_norm": 6.678303241729736,
"learning_rate": 1.7983555600120324e-05,
"loss": 0.124,
"step": 7500
},
{
"epoch": 2.0370871683811584,
"grad_norm": 0.4219975173473358,
"learning_rate": 1.785821718640329e-05,
"loss": 0.2746,
"step": 7525
},
{
"epoch": 2.0438548998375743,
"grad_norm": 8.490753173828125,
"learning_rate": 1.7732878772686254e-05,
"loss": 0.1649,
"step": 7550
},
{
"epoch": 2.05062263129399,
"grad_norm": 0.30812859535217285,
"learning_rate": 1.760754035896922e-05,
"loss": 0.1879,
"step": 7575
},
{
"epoch": 2.057390362750406,
"grad_norm": 8.723641395568848,
"learning_rate": 1.7482201945252184e-05,
"loss": 0.1553,
"step": 7600
},
{
"epoch": 2.064158094206822,
"grad_norm": 0.0513090118765831,
"learning_rate": 1.7356863531535145e-05,
"loss": 0.091,
"step": 7625
},
{
"epoch": 2.0709258256632377,
"grad_norm": 0.18665984272956848,
"learning_rate": 1.723152511781811e-05,
"loss": 0.2645,
"step": 7650
},
{
"epoch": 2.0776935571196535,
"grad_norm": 7.360457420349121,
"learning_rate": 1.7106186704101075e-05,
"loss": 0.2385,
"step": 7675
},
{
"epoch": 2.0844612885760694,
"grad_norm": 0.102376289665699,
"learning_rate": 1.698084829038404e-05,
"loss": 0.1201,
"step": 7700
},
{
"epoch": 2.0912290200324852,
"grad_norm": 17.834001541137695,
"learning_rate": 1.6855509876667004e-05,
"loss": 0.1926,
"step": 7725
},
{
"epoch": 2.097996751488901,
"grad_norm": 0.3818954825401306,
"learning_rate": 1.6730171462949966e-05,
"loss": 0.1884,
"step": 7750
},
{
"epoch": 2.104764482945317,
"grad_norm": 7.972067356109619,
"learning_rate": 1.660483304923293e-05,
"loss": 0.1858,
"step": 7775
},
{
"epoch": 2.1115322144017323,
"grad_norm": 9.148263931274414,
"learning_rate": 1.6479494635515895e-05,
"loss": 0.1961,
"step": 7800
},
{
"epoch": 2.118299945858148,
"grad_norm": 10.137642860412598,
"learning_rate": 1.6354156221798857e-05,
"loss": 0.1774,
"step": 7825
},
{
"epoch": 2.125067677314564,
"grad_norm": 0.3626168370246887,
"learning_rate": 1.622881780808182e-05,
"loss": 0.1367,
"step": 7850
},
{
"epoch": 2.13183540877098,
"grad_norm": 0.12807676196098328,
"learning_rate": 1.6103479394364783e-05,
"loss": 0.1785,
"step": 7875
},
{
"epoch": 2.1386031402273957,
"grad_norm": 1.2243175506591797,
"learning_rate": 1.5978140980647748e-05,
"loss": 0.1499,
"step": 7900
},
{
"epoch": 2.1453708716838116,
"grad_norm": 11.758691787719727,
"learning_rate": 1.5852802566930712e-05,
"loss": 0.1778,
"step": 7925
},
{
"epoch": 2.1521386031402274,
"grad_norm": 0.7880843281745911,
"learning_rate": 1.5727464153213677e-05,
"loss": 0.2024,
"step": 7950
},
{
"epoch": 2.1589063345966433,
"grad_norm": 0.23943960666656494,
"learning_rate": 1.5602125739496642e-05,
"loss": 0.1409,
"step": 7975
},
{
"epoch": 2.165674066053059,
"grad_norm": 11.204683303833008,
"learning_rate": 1.5476787325779607e-05,
"loss": 0.2603,
"step": 8000
},
{
"epoch": 2.172441797509475,
"grad_norm": 8.875106811523438,
"learning_rate": 1.5351448912062568e-05,
"loss": 0.1979,
"step": 8025
},
{
"epoch": 2.179209528965891,
"grad_norm": 10.337849617004395,
"learning_rate": 1.5226110498345533e-05,
"loss": 0.0886,
"step": 8050
},
{
"epoch": 2.1859772604223062,
"grad_norm": 0.0444558709859848,
"learning_rate": 1.5100772084628498e-05,
"loss": 0.1459,
"step": 8075
},
{
"epoch": 2.192744991878722,
"grad_norm": 0.4095276892185211,
"learning_rate": 1.4975433670911463e-05,
"loss": 0.2745,
"step": 8100
},
{
"epoch": 2.199512723335138,
"grad_norm": 0.09346342086791992,
"learning_rate": 1.4850095257194427e-05,
"loss": 0.213,
"step": 8125
},
{
"epoch": 2.2062804547915538,
"grad_norm": 11.369955062866211,
"learning_rate": 1.4724756843477389e-05,
"loss": 0.2651,
"step": 8150
},
{
"epoch": 2.2130481862479696,
"grad_norm": 0.17222055792808533,
"learning_rate": 1.4599418429760354e-05,
"loss": 0.1258,
"step": 8175
},
{
"epoch": 2.2198159177043855,
"grad_norm": 0.5808836221694946,
"learning_rate": 1.4474080016043317e-05,
"loss": 0.2261,
"step": 8200
},
{
"epoch": 2.2265836491608013,
"grad_norm": 0.37860339879989624,
"learning_rate": 1.4348741602326282e-05,
"loss": 0.2356,
"step": 8225
},
{
"epoch": 2.233351380617217,
"grad_norm": 7.043012619018555,
"learning_rate": 1.4223403188609246e-05,
"loss": 0.1983,
"step": 8250
},
{
"epoch": 2.240119112073633,
"grad_norm": 0.04890386760234833,
"learning_rate": 1.4098064774892208e-05,
"loss": 0.1501,
"step": 8275
},
{
"epoch": 2.246886843530049,
"grad_norm": 1.6778485774993896,
"learning_rate": 1.3972726361175173e-05,
"loss": 0.2035,
"step": 8300
},
{
"epoch": 2.2536545749864647,
"grad_norm": 0.09249867498874664,
"learning_rate": 1.3847387947458137e-05,
"loss": 0.1831,
"step": 8325
},
{
"epoch": 2.26042230644288,
"grad_norm": 10.879770278930664,
"learning_rate": 1.3722049533741102e-05,
"loss": 0.1008,
"step": 8350
},
{
"epoch": 2.267190037899296,
"grad_norm": 0.2881366014480591,
"learning_rate": 1.3596711120024067e-05,
"loss": 0.0649,
"step": 8375
},
{
"epoch": 2.273957769355712,
"grad_norm": 0.30121418833732605,
"learning_rate": 1.3471372706307028e-05,
"loss": 0.141,
"step": 8400
},
{
"epoch": 2.2807255008121277,
"grad_norm": 0.31559237837791443,
"learning_rate": 1.3346034292589993e-05,
"loss": 0.1108,
"step": 8425
},
{
"epoch": 2.2874932322685435,
"grad_norm": 13.67086124420166,
"learning_rate": 1.3220695878872958e-05,
"loss": 0.1543,
"step": 8450
},
{
"epoch": 2.2942609637249594,
"grad_norm": 4.521094799041748,
"learning_rate": 1.3095357465155921e-05,
"loss": 0.1735,
"step": 8475
},
{
"epoch": 2.301028695181375,
"grad_norm": 1.390699028968811,
"learning_rate": 1.2970019051438886e-05,
"loss": 0.1303,
"step": 8500
},
{
"epoch": 2.307796426637791,
"grad_norm": 17.726560592651367,
"learning_rate": 1.284468063772185e-05,
"loss": 0.2959,
"step": 8525
},
{
"epoch": 2.314564158094207,
"grad_norm": 12.668703079223633,
"learning_rate": 1.2719342224004812e-05,
"loss": 0.1625,
"step": 8550
},
{
"epoch": 2.3213318895506228,
"grad_norm": 2.370819091796875,
"learning_rate": 1.2594003810287777e-05,
"loss": 0.1213,
"step": 8575
},
{
"epoch": 2.3280996210070386,
"grad_norm": 6.036921977996826,
"learning_rate": 1.2468665396570742e-05,
"loss": 0.1634,
"step": 8600
},
{
"epoch": 2.334867352463454,
"grad_norm": 0.8694545030593872,
"learning_rate": 1.2343326982853707e-05,
"loss": 0.1495,
"step": 8625
},
{
"epoch": 2.3416350839198703,
"grad_norm": 2.2144663333892822,
"learning_rate": 1.221798856913667e-05,
"loss": 0.1621,
"step": 8650
},
{
"epoch": 2.3484028153762857,
"grad_norm": 10.507080078125,
"learning_rate": 1.2092650155419635e-05,
"loss": 0.174,
"step": 8675
},
{
"epoch": 2.3551705468327016,
"grad_norm": 0.9843292236328125,
"learning_rate": 1.1967311741702598e-05,
"loss": 0.1339,
"step": 8700
},
{
"epoch": 2.3619382782891174,
"grad_norm": 0.3039487898349762,
"learning_rate": 1.1841973327985562e-05,
"loss": 0.1176,
"step": 8725
},
{
"epoch": 2.3687060097455332,
"grad_norm": 0.5395913124084473,
"learning_rate": 1.1716634914268526e-05,
"loss": 0.2338,
"step": 8750
},
{
"epoch": 2.375473741201949,
"grad_norm": 0.16517315804958344,
"learning_rate": 1.1591296500551489e-05,
"loss": 0.2132,
"step": 8775
},
{
"epoch": 2.382241472658365,
"grad_norm": 10.029488563537598,
"learning_rate": 1.1465958086834453e-05,
"loss": 0.188,
"step": 8800
},
{
"epoch": 2.389009204114781,
"grad_norm": 3.222883462905884,
"learning_rate": 1.1340619673117417e-05,
"loss": 0.2098,
"step": 8825
},
{
"epoch": 2.3957769355711966,
"grad_norm": 0.06654487550258636,
"learning_rate": 1.1215281259400381e-05,
"loss": 0.1275,
"step": 8850
},
{
"epoch": 2.4025446670276125,
"grad_norm": 2.666473388671875,
"learning_rate": 1.1089942845683346e-05,
"loss": 0.2144,
"step": 8875
},
{
"epoch": 2.4093123984840283,
"grad_norm": 10.859978675842285,
"learning_rate": 1.096460443196631e-05,
"loss": 0.1975,
"step": 8900
},
{
"epoch": 2.416080129940444,
"grad_norm": 1.037359356880188,
"learning_rate": 1.0839266018249274e-05,
"loss": 0.0893,
"step": 8925
},
{
"epoch": 2.4228478613968596,
"grad_norm": 17.77867889404297,
"learning_rate": 1.0713927604532239e-05,
"loss": 0.178,
"step": 8950
},
{
"epoch": 2.4296155928532754,
"grad_norm": 8.258838653564453,
"learning_rate": 1.0588589190815202e-05,
"loss": 0.1508,
"step": 8975
},
{
"epoch": 2.4363833243096913,
"grad_norm": 8.984355926513672,
"learning_rate": 1.0463250777098165e-05,
"loss": 0.1494,
"step": 9000
},
{
"epoch": 2.443151055766107,
"grad_norm": 1.9472849369049072,
"learning_rate": 1.0337912363381128e-05,
"loss": 0.1767,
"step": 9025
},
{
"epoch": 2.449918787222523,
"grad_norm": 0.1337762475013733,
"learning_rate": 1.0212573949664093e-05,
"loss": 0.1451,
"step": 9050
},
{
"epoch": 2.456686518678939,
"grad_norm": 0.7223402857780457,
"learning_rate": 1.0087235535947058e-05,
"loss": 0.1788,
"step": 9075
},
{
"epoch": 2.4634542501353547,
"grad_norm": 12.872135162353516,
"learning_rate": 9.961897122230021e-06,
"loss": 0.1807,
"step": 9100
},
{
"epoch": 2.4702219815917705,
"grad_norm": 14.778857231140137,
"learning_rate": 9.836558708512986e-06,
"loss": 0.1621,
"step": 9125
},
{
"epoch": 2.4769897130481864,
"grad_norm": 0.44684821367263794,
"learning_rate": 9.71122029479595e-06,
"loss": 0.1788,
"step": 9150
},
{
"epoch": 2.4837574445046022,
"grad_norm": 15.21567440032959,
"learning_rate": 9.585881881078914e-06,
"loss": 0.1615,
"step": 9175
},
{
"epoch": 2.490525175961018,
"grad_norm": 0.5438389182090759,
"learning_rate": 9.460543467361878e-06,
"loss": 0.0694,
"step": 9200
},
{
"epoch": 2.4972929074174335,
"grad_norm": 0.26495838165283203,
"learning_rate": 9.335205053644842e-06,
"loss": 0.2251,
"step": 9225
},
{
"epoch": 2.5040606388738493,
"grad_norm": 3.848076343536377,
"learning_rate": 9.209866639927806e-06,
"loss": 0.118,
"step": 9250
},
{
"epoch": 2.510828370330265,
"grad_norm": 0.0551062636077404,
"learning_rate": 9.08452822621077e-06,
"loss": 0.066,
"step": 9275
},
{
"epoch": 2.517596101786681,
"grad_norm": 8.600728034973145,
"learning_rate": 8.959189812493733e-06,
"loss": 0.1336,
"step": 9300
},
{
"epoch": 2.524363833243097,
"grad_norm": 6.382137298583984,
"learning_rate": 8.833851398776697e-06,
"loss": 0.2049,
"step": 9325
},
{
"epoch": 2.5311315646995127,
"grad_norm": 13.446625709533691,
"learning_rate": 8.70851298505966e-06,
"loss": 0.1743,
"step": 9350
},
{
"epoch": 2.5378992961559286,
"grad_norm": 6.327456951141357,
"learning_rate": 8.583174571342625e-06,
"loss": 0.2677,
"step": 9375
},
{
"epoch": 2.5446670276123444,
"grad_norm": 0.14797528088092804,
"learning_rate": 8.45783615762559e-06,
"loss": 0.1348,
"step": 9400
},
{
"epoch": 2.5514347590687603,
"grad_norm": 0.03272142633795738,
"learning_rate": 8.332497743908553e-06,
"loss": 0.1655,
"step": 9425
},
{
"epoch": 2.558202490525176,
"grad_norm": 0.09539608657360077,
"learning_rate": 8.207159330191518e-06,
"loss": 0.2066,
"step": 9450
},
{
"epoch": 2.564970221981592,
"grad_norm": 2.991002321243286,
"learning_rate": 8.081820916474483e-06,
"loss": 0.1664,
"step": 9475
},
{
"epoch": 2.5717379534380074,
"grad_norm": 0.05500922352075577,
"learning_rate": 7.956482502757446e-06,
"loss": 0.1241,
"step": 9500
},
{
"epoch": 2.5785056848944237,
"grad_norm": 11.698848724365234,
"learning_rate": 7.831144089040409e-06,
"loss": 0.2105,
"step": 9525
},
{
"epoch": 2.585273416350839,
"grad_norm": 0.4144781231880188,
"learning_rate": 7.705805675323372e-06,
"loss": 0.229,
"step": 9550
},
{
"epoch": 2.592041147807255,
"grad_norm": 15.266688346862793,
"learning_rate": 7.580467261606338e-06,
"loss": 0.1854,
"step": 9575
},
{
"epoch": 2.5988088792636708,
"grad_norm": 0.1844756007194519,
"learning_rate": 7.455128847889302e-06,
"loss": 0.082,
"step": 9600
},
{
"epoch": 2.6055766107200866,
"grad_norm": 7.458913326263428,
"learning_rate": 7.329790434172265e-06,
"loss": 0.1278,
"step": 9625
},
{
"epoch": 2.6123443421765025,
"grad_norm": 0.7331855893135071,
"learning_rate": 7.20445202045523e-06,
"loss": 0.1028,
"step": 9650
},
{
"epoch": 2.6191120736329183,
"grad_norm": 0.3585509657859802,
"learning_rate": 7.0791136067381944e-06,
"loss": 0.1163,
"step": 9675
},
{
"epoch": 2.625879805089334,
"grad_norm": 0.40765902400016785,
"learning_rate": 6.9537751930211575e-06,
"loss": 0.1065,
"step": 9700
},
{
"epoch": 2.63264753654575,
"grad_norm": 7.481261730194092,
"learning_rate": 6.8284367793041215e-06,
"loss": 0.1028,
"step": 9725
},
{
"epoch": 2.639415268002166,
"grad_norm": 1.0196110010147095,
"learning_rate": 6.703098365587085e-06,
"loss": 0.103,
"step": 9750
},
{
"epoch": 2.6461829994585813,
"grad_norm": 0.306159645318985,
"learning_rate": 6.577759951870049e-06,
"loss": 0.1617,
"step": 9775
},
{
"epoch": 2.6529507309149976,
"grad_norm": 11.561976432800293,
"learning_rate": 6.452421538153014e-06,
"loss": 0.1027,
"step": 9800
},
{
"epoch": 2.659718462371413,
"grad_norm": 0.021391283720731735,
"learning_rate": 6.327083124435977e-06,
"loss": 0.126,
"step": 9825
},
{
"epoch": 2.666486193827829,
"grad_norm": 0.036384038627147675,
"learning_rate": 6.201744710718941e-06,
"loss": 0.2274,
"step": 9850
},
{
"epoch": 2.6732539252842447,
"grad_norm": 0.39547139406204224,
"learning_rate": 6.076406297001905e-06,
"loss": 0.2437,
"step": 9875
},
{
"epoch": 2.6800216567406605,
"grad_norm": 1.0845611095428467,
"learning_rate": 5.951067883284869e-06,
"loss": 0.1372,
"step": 9900
},
{
"epoch": 2.6867893881970764,
"grad_norm": 0.6141884326934814,
"learning_rate": 5.825729469567833e-06,
"loss": 0.0986,
"step": 9925
},
{
"epoch": 2.693557119653492,
"grad_norm": 6.706904888153076,
"learning_rate": 5.700391055850798e-06,
"loss": 0.1353,
"step": 9950
},
{
"epoch": 2.700324851109908,
"grad_norm": 0.1707427203655243,
"learning_rate": 5.575052642133762e-06,
"loss": 0.1917,
"step": 9975
},
{
"epoch": 2.707092582566324,
"grad_norm": 0.16985374689102173,
"learning_rate": 5.449714228416725e-06,
"loss": 0.0994,
"step": 10000
},
{
"epoch": 2.7138603140227398,
"grad_norm": 10.607304573059082,
"learning_rate": 5.324375814699689e-06,
"loss": 0.1015,
"step": 10025
},
{
"epoch": 2.720628045479155,
"grad_norm": 0.6444892287254333,
"learning_rate": 5.199037400982654e-06,
"loss": 0.0935,
"step": 10050
},
{
"epoch": 2.7273957769355714,
"grad_norm": 0.8442856669425964,
"learning_rate": 5.073698987265618e-06,
"loss": 0.2081,
"step": 10075
},
{
"epoch": 2.734163508391987,
"grad_norm": 0.2734193205833435,
"learning_rate": 4.948360573548582e-06,
"loss": 0.169,
"step": 10100
},
{
"epoch": 2.7409312398484027,
"grad_norm": 0.19697026908397675,
"learning_rate": 4.823022159831545e-06,
"loss": 0.1624,
"step": 10125
},
{
"epoch": 2.7476989713048185,
"grad_norm": 12.665722846984863,
"learning_rate": 4.69768374611451e-06,
"loss": 0.1644,
"step": 10150
},
{
"epoch": 2.7544667027612344,
"grad_norm": 10.231285095214844,
"learning_rate": 4.5723453323974735e-06,
"loss": 0.1422,
"step": 10175
},
{
"epoch": 2.7612344342176502,
"grad_norm": 10.933349609375,
"learning_rate": 4.4470069186804375e-06,
"loss": 0.2038,
"step": 10200
},
{
"epoch": 2.768002165674066,
"grad_norm": 10.937248229980469,
"learning_rate": 4.3216685049634015e-06,
"loss": 0.1339,
"step": 10225
},
{
"epoch": 2.774769897130482,
"grad_norm": 0.07432160526514053,
"learning_rate": 4.196330091246365e-06,
"loss": 0.2031,
"step": 10250
},
{
"epoch": 2.781537628586898,
"grad_norm": 10.13500690460205,
"learning_rate": 4.070991677529329e-06,
"loss": 0.1778,
"step": 10275
},
{
"epoch": 2.7883053600433136,
"grad_norm": 0.058682914823293686,
"learning_rate": 3.945653263812293e-06,
"loss": 0.1036,
"step": 10300
},
{
"epoch": 2.795073091499729,
"grad_norm": 11.469184875488281,
"learning_rate": 3.820314850095257e-06,
"loss": 0.2036,
"step": 10325
},
{
"epoch": 2.8018408229561453,
"grad_norm": 0.17000257968902588,
"learning_rate": 3.6949764363782212e-06,
"loss": 0.1875,
"step": 10350
},
{
"epoch": 2.8086085544125607,
"grad_norm": 0.3760491907596588,
"learning_rate": 3.5696380226611856e-06,
"loss": 0.1494,
"step": 10375
},
{
"epoch": 2.8153762858689766,
"grad_norm": 0.10404614359140396,
"learning_rate": 3.4442996089441496e-06,
"loss": 0.0973,
"step": 10400
},
{
"epoch": 2.8221440173253924,
"grad_norm": 0.41150447726249695,
"learning_rate": 3.3239747317757947e-06,
"loss": 0.1634,
"step": 10425
},
{
"epoch": 2.8289117487818083,
"grad_norm": 0.061491526663303375,
"learning_rate": 3.1986363180587587e-06,
"loss": 0.184,
"step": 10450
},
{
"epoch": 2.835679480238224,
"grad_norm": 1.5745799541473389,
"learning_rate": 3.0732979043417226e-06,
"loss": 0.1135,
"step": 10475
},
{
"epoch": 2.84244721169464,
"grad_norm": 15.36170482635498,
"learning_rate": 2.947959490624687e-06,
"loss": 0.0998,
"step": 10500
},
{
"epoch": 2.849214943151056,
"grad_norm": 2.991931915283203,
"learning_rate": 2.8226210769076505e-06,
"loss": 0.1403,
"step": 10525
},
{
"epoch": 2.8559826746074717,
"grad_norm": 1.1434751749038696,
"learning_rate": 2.697282663190615e-06,
"loss": 0.1634,
"step": 10550
},
{
"epoch": 2.8627504060638875,
"grad_norm": 5.902674198150635,
"learning_rate": 2.571944249473579e-06,
"loss": 0.1235,
"step": 10575
},
{
"epoch": 2.869518137520303,
"grad_norm": 0.050640497356653214,
"learning_rate": 2.446605835756543e-06,
"loss": 0.1516,
"step": 10600
},
{
"epoch": 2.8762858689767192,
"grad_norm": 13.541975021362305,
"learning_rate": 2.3212674220395068e-06,
"loss": 0.2409,
"step": 10625
},
{
"epoch": 2.8830536004331346,
"grad_norm": 24.88682746887207,
"learning_rate": 2.1959290083224707e-06,
"loss": 0.1724,
"step": 10650
},
{
"epoch": 2.8898213318895505,
"grad_norm": 0.10497920215129852,
"learning_rate": 2.0705905946054347e-06,
"loss": 0.1563,
"step": 10675
},
{
"epoch": 2.8965890633459663,
"grad_norm": 3.943291187286377,
"learning_rate": 1.945252180888399e-06,
"loss": 0.1966,
"step": 10700
},
{
"epoch": 2.903356794802382,
"grad_norm": 0.061565861105918884,
"learning_rate": 1.8199137671713628e-06,
"loss": 0.1962,
"step": 10725
},
{
"epoch": 2.910124526258798,
"grad_norm": 0.08835487067699432,
"learning_rate": 1.694575353454327e-06,
"loss": 0.282,
"step": 10750
},
{
"epoch": 2.916892257715214,
"grad_norm": 2.9496688842773438,
"learning_rate": 1.5692369397372907e-06,
"loss": 0.1138,
"step": 10775
},
{
"epoch": 2.9236599891716297,
"grad_norm": 1.1299965381622314,
"learning_rate": 1.4438985260202547e-06,
"loss": 0.1773,
"step": 10800
},
{
"epoch": 2.9304277206280456,
"grad_norm": 11.417136192321777,
"learning_rate": 1.3185601123032186e-06,
"loss": 0.1358,
"step": 10825
},
{
"epoch": 2.9371954520844614,
"grad_norm": 0.06532655656337738,
"learning_rate": 1.1932216985861828e-06,
"loss": 0.1728,
"step": 10850
},
{
"epoch": 2.943963183540877,
"grad_norm": 5.278496265411377,
"learning_rate": 1.0678832848691468e-06,
"loss": 0.1416,
"step": 10875
},
{
"epoch": 2.950730914997293,
"grad_norm": 1.0812338590621948,
"learning_rate": 9.425448711521107e-07,
"loss": 0.1485,
"step": 10900
},
{
"epoch": 2.9574986464537085,
"grad_norm": 0.12115427106618881,
"learning_rate": 8.172064574350748e-07,
"loss": 0.1201,
"step": 10925
},
{
"epoch": 2.9642663779101244,
"grad_norm": 0.1515658050775528,
"learning_rate": 6.918680437180387e-07,
"loss": 0.1225,
"step": 10950
},
{
"epoch": 2.97103410936654,
"grad_norm": 0.056250348687171936,
"learning_rate": 5.665296300010028e-07,
"loss": 0.1442,
"step": 10975
},
{
"epoch": 2.977801840822956,
"grad_norm": 0.10962472856044769,
"learning_rate": 4.411912162839667e-07,
"loss": 0.0699,
"step": 11000
},
{
"epoch": 2.984569572279372,
"grad_norm": 0.3095192015171051,
"learning_rate": 3.1585280256693076e-07,
"loss": 0.171,
"step": 11025
},
{
"epoch": 2.9913373037357878,
"grad_norm": 0.24057000875473022,
"learning_rate": 1.9051438884989471e-07,
"loss": 0.0967,
"step": 11050
},
{
"epoch": 2.9981050351922036,
"grad_norm": 4.0788044929504395,
"learning_rate": 6.517597513285872e-08,
"loss": 0.1549,
"step": 11075
},
{
"epoch": 3.0,
"eval_accuracy": 0.9390862944162437,
"eval_f1_macro": 0.9327706813543745,
"eval_f1_micro": 0.9390862944162437,
"eval_f1_weighted": 0.9351383662112599,
"eval_loss": 0.15377455949783325,
"eval_precision_macro": 0.9422812840960479,
"eval_precision_micro": 0.9390862944162437,
"eval_precision_weighted": 0.9420489414415942,
"eval_recall_macro": 0.9346991783726477,
"eval_recall_micro": 0.9390862944162437,
"eval_recall_weighted": 0.9390862944162437,
"eval_runtime": 21.8517,
"eval_samples_per_second": 946.61,
"eval_steps_per_second": 59.172,
"step": 11082
}
],
"logging_steps": 25,
"max_steps": 11082,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2963923884403200.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}