Invalid JSON:
Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_metric": 0.15377455949783325, | |
| "best_model_checkpoint": "date2format/checkpoint-11082", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 11082, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00676773145641581, | |
| "grad_norm": Infinity, | |
| "learning_rate": 1.0820559062218215e-06, | |
| "loss": 6.7854, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01353546291283162, | |
| "grad_norm": 18.809656143188477, | |
| "learning_rate": 2.2091974752028858e-06, | |
| "loss": 6.8207, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.020303194369247428, | |
| "grad_norm": 18.936891555786133, | |
| "learning_rate": 3.3363390441839496e-06, | |
| "loss": 6.712, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.02707092582566324, | |
| "grad_norm": 16.995830535888672, | |
| "learning_rate": 4.4634806131650134e-06, | |
| "loss": 6.6652, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03383865728207905, | |
| "grad_norm": 16.105716705322266, | |
| "learning_rate": 5.590622182146077e-06, | |
| "loss": 6.5538, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.040606388738494856, | |
| "grad_norm": 16.41363525390625, | |
| "learning_rate": 6.717763751127142e-06, | |
| "loss": 6.5388, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04737412019491066, | |
| "grad_norm": 16.850101470947266, | |
| "learning_rate": 7.844905320108207e-06, | |
| "loss": 6.4123, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.05414185165132648, | |
| "grad_norm": 15.117751121520996, | |
| "learning_rate": 8.972046889089269e-06, | |
| "loss": 6.3959, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.060909583107742284, | |
| "grad_norm": 14.722877502441406, | |
| "learning_rate": 1.0099188458070334e-05, | |
| "loss": 6.2513, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.0676773145641581, | |
| "grad_norm": 13.270645141601562, | |
| "learning_rate": 1.1226330027051398e-05, | |
| "loss": 6.2321, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0744450460205739, | |
| "grad_norm": 12.977484703063965, | |
| "learning_rate": 1.2353471596032462e-05, | |
| "loss": 6.129, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.08121277747698971, | |
| "grad_norm": 12.7619047164917, | |
| "learning_rate": 1.3480613165013526e-05, | |
| "loss": 5.9896, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08798050893340552, | |
| "grad_norm": 12.988408088684082, | |
| "learning_rate": 1.4607754733994591e-05, | |
| "loss": 5.9501, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.09474824038982133, | |
| "grad_norm": 13.49023723602295, | |
| "learning_rate": 1.5734896302975655e-05, | |
| "loss": 5.8552, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10151597184623715, | |
| "grad_norm": 13.501762390136719, | |
| "learning_rate": 1.686203787195672e-05, | |
| "loss": 5.7112, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.10828370330265295, | |
| "grad_norm": 13.560358047485352, | |
| "learning_rate": 1.7989179440937783e-05, | |
| "loss": 5.5904, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11505143475906876, | |
| "grad_norm": 13.383050918579102, | |
| "learning_rate": 1.9116321009918847e-05, | |
| "loss": 5.5183, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.12181916621548457, | |
| "grad_norm": 13.750153541564941, | |
| "learning_rate": 2.024346257889991e-05, | |
| "loss": 5.4114, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.12858689767190037, | |
| "grad_norm": 14.731257438659668, | |
| "learning_rate": 2.1370604147880974e-05, | |
| "loss": 5.1422, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.1353546291283162, | |
| "grad_norm": 13.863329887390137, | |
| "learning_rate": 2.2497745716862038e-05, | |
| "loss": 5.2576, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.142122360584732, | |
| "grad_norm": 14.840801239013672, | |
| "learning_rate": 2.3624887285843102e-05, | |
| "loss": 4.9023, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.1488900920411478, | |
| "grad_norm": 13.968029975891113, | |
| "learning_rate": 2.4752028854824166e-05, | |
| "loss": 4.8976, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.15565782349756363, | |
| "grad_norm": 15.291852951049805, | |
| "learning_rate": 2.5879170423805233e-05, | |
| "loss": 4.7663, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.16242555495397942, | |
| "grad_norm": 14.247614860534668, | |
| "learning_rate": 2.7006311992786293e-05, | |
| "loss": 4.7057, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16919328641039524, | |
| "grad_norm": 15.434479713439941, | |
| "learning_rate": 2.8133453561767357e-05, | |
| "loss": 4.5958, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.17596101786681104, | |
| "grad_norm": 15.81191635131836, | |
| "learning_rate": 2.9260595130748425e-05, | |
| "loss": 4.4317, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.18272874932322686, | |
| "grad_norm": 15.542362213134766, | |
| "learning_rate": 3.0387736699729485e-05, | |
| "loss": 4.3024, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.18949648077964265, | |
| "grad_norm": 15.908782005310059, | |
| "learning_rate": 3.151487826871055e-05, | |
| "loss": 4.2023, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.19626421223605847, | |
| "grad_norm": 17.07871437072754, | |
| "learning_rate": 3.264201983769162e-05, | |
| "loss": 4.1671, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.2030319436924743, | |
| "grad_norm": 14.337630271911621, | |
| "learning_rate": 3.3769161406672676e-05, | |
| "loss": 3.8202, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.20979967514889009, | |
| "grad_norm": 15.395954132080078, | |
| "learning_rate": 3.489630297565375e-05, | |
| "loss": 3.7002, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.2165674066053059, | |
| "grad_norm": 17.821945190429688, | |
| "learning_rate": 3.602344454463481e-05, | |
| "loss": 3.7328, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2233351380617217, | |
| "grad_norm": 15.371402740478516, | |
| "learning_rate": 3.715058611361587e-05, | |
| "loss": 3.4769, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.23010286951813752, | |
| "grad_norm": 18.089616775512695, | |
| "learning_rate": 3.827772768259694e-05, | |
| "loss": 3.5862, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.23687060097455334, | |
| "grad_norm": 16.150415420532227, | |
| "learning_rate": 3.9404869251578e-05, | |
| "loss": 3.2755, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.24363833243096913, | |
| "grad_norm": 16.139698028564453, | |
| "learning_rate": 4.0532010820559066e-05, | |
| "loss": 3.1737, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.25040606388738496, | |
| "grad_norm": 16.604488372802734, | |
| "learning_rate": 4.165915238954013e-05, | |
| "loss": 3.1445, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.25717379534380075, | |
| "grad_norm": 17.532072067260742, | |
| "learning_rate": 4.278629395852119e-05, | |
| "loss": 3.1923, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.26394152680021654, | |
| "grad_norm": 17.041091918945312, | |
| "learning_rate": 4.391343552750226e-05, | |
| "loss": 2.8598, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.2707092582566324, | |
| "grad_norm": 17.634092330932617, | |
| "learning_rate": 4.504057709648332e-05, | |
| "loss": 2.653, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2774769897130482, | |
| "grad_norm": 14.8062105178833, | |
| "learning_rate": 4.6167718665464385e-05, | |
| "loss": 2.6197, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.284244721169464, | |
| "grad_norm": 14.701135635375977, | |
| "learning_rate": 4.729486023444545e-05, | |
| "loss": 2.4039, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2910124526258798, | |
| "grad_norm": 14.139700889587402, | |
| "learning_rate": 4.842200180342651e-05, | |
| "loss": 2.3764, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.2977801840822956, | |
| "grad_norm": 15.656773567199707, | |
| "learning_rate": 4.954914337240758e-05, | |
| "loss": 2.3153, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3045479155387114, | |
| "grad_norm": 15.101000785827637, | |
| "learning_rate": 4.992479695176978e-05, | |
| "loss": 2.0685, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.31131564699512726, | |
| "grad_norm": 15.826728820800781, | |
| "learning_rate": 4.9799458538052745e-05, | |
| "loss": 2.1117, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.31808337845154305, | |
| "grad_norm": 16.1218204498291, | |
| "learning_rate": 4.967412012433571e-05, | |
| "loss": 2.0013, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.32485110990795885, | |
| "grad_norm": 12.746655464172363, | |
| "learning_rate": 4.9548781710618674e-05, | |
| "loss": 1.8766, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.33161884136437464, | |
| "grad_norm": 11.957603454589844, | |
| "learning_rate": 4.942344329690164e-05, | |
| "loss": 1.7649, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.3383865728207905, | |
| "grad_norm": 14.868532180786133, | |
| "learning_rate": 4.92981048831846e-05, | |
| "loss": 1.6963, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3451543042772063, | |
| "grad_norm": 12.839776992797852, | |
| "learning_rate": 4.917276646946756e-05, | |
| "loss": 1.4838, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.3519220357336221, | |
| "grad_norm": 13.293111801147461, | |
| "learning_rate": 4.904742805575053e-05, | |
| "loss": 1.393, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3586897671900379, | |
| "grad_norm": 16.82228660583496, | |
| "learning_rate": 4.892208964203349e-05, | |
| "loss": 1.463, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.3654574986464537, | |
| "grad_norm": 13.664780616760254, | |
| "learning_rate": 4.8796751228316456e-05, | |
| "loss": 1.3157, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.3722252301028695, | |
| "grad_norm": 12.570382118225098, | |
| "learning_rate": 4.867141281459942e-05, | |
| "loss": 1.3402, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.3789929615592853, | |
| "grad_norm": 16.875078201293945, | |
| "learning_rate": 4.8546074400882386e-05, | |
| "loss": 1.3422, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.38576069301570115, | |
| "grad_norm": 12.442831993103027, | |
| "learning_rate": 4.842073598716535e-05, | |
| "loss": 1.1951, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.39252842447211694, | |
| "grad_norm": 13.85045051574707, | |
| "learning_rate": 4.8295397573448316e-05, | |
| "loss": 1.1216, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.39929615592853274, | |
| "grad_norm": 12.063750267028809, | |
| "learning_rate": 4.817005915973128e-05, | |
| "loss": 0.9869, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.4060638873849486, | |
| "grad_norm": 14.56391716003418, | |
| "learning_rate": 4.8044720746014245e-05, | |
| "loss": 1.0033, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4128316188413644, | |
| "grad_norm": 12.925354957580566, | |
| "learning_rate": 4.79193823322972e-05, | |
| "loss": 0.8906, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.41959935029778017, | |
| "grad_norm": 19.873634338378906, | |
| "learning_rate": 4.779404391858017e-05, | |
| "loss": 0.9104, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.426367081754196, | |
| "grad_norm": 14.321996688842773, | |
| "learning_rate": 4.766870550486313e-05, | |
| "loss": 1.0049, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.4331348132106118, | |
| "grad_norm": 8.742544174194336, | |
| "learning_rate": 4.75433670911461e-05, | |
| "loss": 0.9338, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4399025446670276, | |
| "grad_norm": 15.12094497680664, | |
| "learning_rate": 4.741802867742906e-05, | |
| "loss": 0.7763, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.4466702761234434, | |
| "grad_norm": 9.084162712097168, | |
| "learning_rate": 4.729269026371203e-05, | |
| "loss": 0.7404, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.45343800757985925, | |
| "grad_norm": 8.377363204956055, | |
| "learning_rate": 4.716735184999499e-05, | |
| "loss": 0.7405, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.46020573903627504, | |
| "grad_norm": 6.151582717895508, | |
| "learning_rate": 4.704201343627796e-05, | |
| "loss": 0.7003, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.46697347049269083, | |
| "grad_norm": 6.058241367340088, | |
| "learning_rate": 4.691667502256092e-05, | |
| "loss": 0.7211, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.4737412019491067, | |
| "grad_norm": 11.786967277526855, | |
| "learning_rate": 4.6791336608843886e-05, | |
| "loss": 0.6573, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.4805089334055225, | |
| "grad_norm": 7.774144649505615, | |
| "learning_rate": 4.6665998195126844e-05, | |
| "loss": 0.6648, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.48727666486193827, | |
| "grad_norm": 13.611273765563965, | |
| "learning_rate": 4.654065978140981e-05, | |
| "loss": 0.6778, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.49404439631835406, | |
| "grad_norm": 17.016263961791992, | |
| "learning_rate": 4.6415321367692774e-05, | |
| "loss": 0.6154, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.5008121277747699, | |
| "grad_norm": 13.737407684326172, | |
| "learning_rate": 4.628998295397574e-05, | |
| "loss": 0.6161, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5075798592311858, | |
| "grad_norm": 10.071102142333984, | |
| "learning_rate": 4.61646445402587e-05, | |
| "loss": 0.596, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.5143475906876015, | |
| "grad_norm": 8.169589042663574, | |
| "learning_rate": 4.603930612654166e-05, | |
| "loss": 0.5347, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5211153221440173, | |
| "grad_norm": 13.066163063049316, | |
| "learning_rate": 4.5913967712824627e-05, | |
| "loss": 0.544, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.5278830536004331, | |
| "grad_norm": 3.5582985877990723, | |
| "learning_rate": 4.578862929910759e-05, | |
| "loss": 0.5303, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5346507850568489, | |
| "grad_norm": 7.025475978851318, | |
| "learning_rate": 4.5663290885390556e-05, | |
| "loss": 0.4748, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.5414185165132648, | |
| "grad_norm": 8.666425704956055, | |
| "learning_rate": 4.553795247167352e-05, | |
| "loss": 0.5852, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5481862479696805, | |
| "grad_norm": 16.04596710205078, | |
| "learning_rate": 4.5412614057956486e-05, | |
| "loss": 0.4351, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.5549539794260964, | |
| "grad_norm": 15.767374038696289, | |
| "learning_rate": 4.529228918078813e-05, | |
| "loss": 0.6447, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5617217108825122, | |
| "grad_norm": 8.484817504882812, | |
| "learning_rate": 4.516695076707109e-05, | |
| "loss": 0.4809, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.568489442338928, | |
| "grad_norm": 16.595365524291992, | |
| "learning_rate": 4.504161235335406e-05, | |
| "loss": 0.4824, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5752571737953438, | |
| "grad_norm": 16.1405029296875, | |
| "learning_rate": 4.491627393963702e-05, | |
| "loss": 0.4274, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.5820249052517596, | |
| "grad_norm": 12.056056022644043, | |
| "learning_rate": 4.4790935525919986e-05, | |
| "loss": 0.4834, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.5887926367081754, | |
| "grad_norm": 4.0205841064453125, | |
| "learning_rate": 4.466559711220295e-05, | |
| "loss": 0.5808, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.5955603681645912, | |
| "grad_norm": 16.41112518310547, | |
| "learning_rate": 4.4540258698485916e-05, | |
| "loss": 0.5036, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6023280996210071, | |
| "grad_norm": 10.883577346801758, | |
| "learning_rate": 4.441492028476888e-05, | |
| "loss": 0.4325, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.6090958310774228, | |
| "grad_norm": 15.038456916809082, | |
| "learning_rate": 4.4289581871051845e-05, | |
| "loss": 0.4418, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6158635625338387, | |
| "grad_norm": 3.341290235519409, | |
| "learning_rate": 4.416424345733481e-05, | |
| "loss": 0.4403, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.6226312939902545, | |
| "grad_norm": 8.565878868103027, | |
| "learning_rate": 4.403890504361777e-05, | |
| "loss": 0.376, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6293990254466703, | |
| "grad_norm": 8.767007827758789, | |
| "learning_rate": 4.391356662990073e-05, | |
| "loss": 0.3205, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.6361667569030861, | |
| "grad_norm": 8.68835163116455, | |
| "learning_rate": 4.37882282161837e-05, | |
| "loss": 0.338, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6429344883595018, | |
| "grad_norm": 9.748613357543945, | |
| "learning_rate": 4.366288980246666e-05, | |
| "loss": 0.4474, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.6497022198159177, | |
| "grad_norm": 7.754514217376709, | |
| "learning_rate": 4.353755138874963e-05, | |
| "loss": 0.4478, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6564699512723335, | |
| "grad_norm": 11.137701034545898, | |
| "learning_rate": 4.341221297503259e-05, | |
| "loss": 0.3163, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.6632376827287493, | |
| "grad_norm": 9.576991081237793, | |
| "learning_rate": 4.328687456131556e-05, | |
| "loss": 0.3521, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.6700054141851651, | |
| "grad_norm": 11.974344253540039, | |
| "learning_rate": 4.316153614759852e-05, | |
| "loss": 0.4591, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.676773145641581, | |
| "grad_norm": 15.265382766723633, | |
| "learning_rate": 4.303619773388149e-05, | |
| "loss": 0.3983, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6835408770979967, | |
| "grad_norm": 12.016144752502441, | |
| "learning_rate": 4.291085932016445e-05, | |
| "loss": 0.5159, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.6903086085544126, | |
| "grad_norm": 12.998587608337402, | |
| "learning_rate": 4.278552090644741e-05, | |
| "loss": 0.4533, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.6970763400108284, | |
| "grad_norm": 21.76568031311035, | |
| "learning_rate": 4.2660182492730374e-05, | |
| "loss": 0.3456, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.7038440714672441, | |
| "grad_norm": 3.395463466644287, | |
| "learning_rate": 4.2539857615562016e-05, | |
| "loss": 0.3654, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.71061180292366, | |
| "grad_norm": 6.759268283843994, | |
| "learning_rate": 4.241451920184498e-05, | |
| "loss": 0.316, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.7173795343800758, | |
| "grad_norm": 19.425579071044922, | |
| "learning_rate": 4.2289180788127945e-05, | |
| "loss": 0.3431, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.7241472658364916, | |
| "grad_norm": 12.407275199890137, | |
| "learning_rate": 4.216384237441091e-05, | |
| "loss": 0.3912, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.7309149972929074, | |
| "grad_norm": 10.216941833496094, | |
| "learning_rate": 4.2038503960693875e-05, | |
| "loss": 0.2893, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7376827287493233, | |
| "grad_norm": 8.958337783813477, | |
| "learning_rate": 4.191316554697684e-05, | |
| "loss": 0.4489, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.744450460205739, | |
| "grad_norm": 10.17128849029541, | |
| "learning_rate": 4.1787827133259804e-05, | |
| "loss": 0.2845, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.7512181916621549, | |
| "grad_norm": 6.733510494232178, | |
| "learning_rate": 4.166750225609145e-05, | |
| "loss": 0.3935, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.7579859231185706, | |
| "grad_norm": 9.230829238891602, | |
| "learning_rate": 4.154216384237442e-05, | |
| "loss": 0.3809, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7647536545749865, | |
| "grad_norm": 2.8910205364227295, | |
| "learning_rate": 4.1416825428657375e-05, | |
| "loss": 0.2973, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.7715213860314023, | |
| "grad_norm": 3.628933906555176, | |
| "learning_rate": 4.129148701494034e-05, | |
| "loss": 0.2805, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.778289117487818, | |
| "grad_norm": 7.368860721588135, | |
| "learning_rate": 4.1166148601223305e-05, | |
| "loss": 0.3739, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.7850568489442339, | |
| "grad_norm": 8.461480140686035, | |
| "learning_rate": 4.104081018750627e-05, | |
| "loss": 0.334, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.7918245804006497, | |
| "grad_norm": 10.173233985900879, | |
| "learning_rate": 4.0915471773789235e-05, | |
| "loss": 0.427, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.7985923118570655, | |
| "grad_norm": 4.683242321014404, | |
| "learning_rate": 4.07901333600722e-05, | |
| "loss": 0.2991, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.8053600433134813, | |
| "grad_norm": 10.472857475280762, | |
| "learning_rate": 4.0664794946355164e-05, | |
| "loss": 0.3194, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.8121277747698972, | |
| "grad_norm": 5.410557746887207, | |
| "learning_rate": 4.053945653263813e-05, | |
| "loss": 0.3114, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8188955062263129, | |
| "grad_norm": 12.810556411743164, | |
| "learning_rate": 4.041411811892109e-05, | |
| "loss": 0.2572, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.8256632376827288, | |
| "grad_norm": 4.909450054168701, | |
| "learning_rate": 4.028877970520405e-05, | |
| "loss": 0.2069, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.8324309691391446, | |
| "grad_norm": 4.909849643707275, | |
| "learning_rate": 4.016344129148702e-05, | |
| "loss": 0.3478, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.8391987005955603, | |
| "grad_norm": 13.538515090942383, | |
| "learning_rate": 4.003810287776998e-05, | |
| "loss": 0.3086, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.8459664320519762, | |
| "grad_norm": 10.3212251663208, | |
| "learning_rate": 3.9912764464052946e-05, | |
| "loss": 0.2613, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.852734163508392, | |
| "grad_norm": 7.68850040435791, | |
| "learning_rate": 3.9787426050335904e-05, | |
| "loss": 0.3387, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.8595018949648078, | |
| "grad_norm": 7.078841209411621, | |
| "learning_rate": 3.966208763661887e-05, | |
| "loss": 0.357, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.8662696264212236, | |
| "grad_norm": 4.790768146514893, | |
| "learning_rate": 3.9536749222901834e-05, | |
| "loss": 0.3459, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.8730373578776394, | |
| "grad_norm": 4.735093593597412, | |
| "learning_rate": 3.94114108091848e-05, | |
| "loss": 0.2948, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.8798050893340552, | |
| "grad_norm": 1.3540657758712769, | |
| "learning_rate": 3.9286072395467764e-05, | |
| "loss": 0.1944, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.8865728207904711, | |
| "grad_norm": 9.657829284667969, | |
| "learning_rate": 3.916073398175073e-05, | |
| "loss": 0.2671, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.8933405522468868, | |
| "grad_norm": 8.425637245178223, | |
| "learning_rate": 3.903539556803369e-05, | |
| "loss": 0.2251, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9001082837033026, | |
| "grad_norm": 7.622613906860352, | |
| "learning_rate": 3.891005715431666e-05, | |
| "loss": 0.3632, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.9068760151597185, | |
| "grad_norm": 12.632335662841797, | |
| "learning_rate": 3.8784718740599616e-05, | |
| "loss": 0.207, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.9136437466161342, | |
| "grad_norm": 11.750454902648926, | |
| "learning_rate": 3.865938032688258e-05, | |
| "loss": 0.2652, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.9204114780725501, | |
| "grad_norm": 6.89017915725708, | |
| "learning_rate": 3.8534041913165546e-05, | |
| "loss": 0.2457, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9271792095289659, | |
| "grad_norm": 4.333946704864502, | |
| "learning_rate": 3.840870349944851e-05, | |
| "loss": 0.3324, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.9339469409853817, | |
| "grad_norm": 1.0153127908706665, | |
| "learning_rate": 3.8283365085731475e-05, | |
| "loss": 0.1966, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.9407146724417975, | |
| "grad_norm": 1.8941410779953003, | |
| "learning_rate": 3.815802667201444e-05, | |
| "loss": 0.3098, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.9474824038982134, | |
| "grad_norm": 1.6257559061050415, | |
| "learning_rate": 3.8032688258297405e-05, | |
| "loss": 0.1872, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.9542501353546291, | |
| "grad_norm": 2.3211212158203125, | |
| "learning_rate": 3.790734984458037e-05, | |
| "loss": 0.2334, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.961017866811045, | |
| "grad_norm": 10.049856185913086, | |
| "learning_rate": 3.7782011430863334e-05, | |
| "loss": 0.3128, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.9677855982674608, | |
| "grad_norm": 10.843172073364258, | |
| "learning_rate": 3.76566730171463e-05, | |
| "loss": 0.3414, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.9745533297238765, | |
| "grad_norm": 0.46516045928001404, | |
| "learning_rate": 3.7531334603429264e-05, | |
| "loss": 0.2379, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.9813210611802924, | |
| "grad_norm": 15.376679420471191, | |
| "learning_rate": 3.740599618971222e-05, | |
| "loss": 0.2887, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.9880887926367081, | |
| "grad_norm": 2.3309133052825928, | |
| "learning_rate": 3.728065777599519e-05, | |
| "loss": 0.2105, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.994856524093124, | |
| "grad_norm": 7.93802547454834, | |
| "learning_rate": 3.715531936227815e-05, | |
| "loss": 0.3599, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9213923132704859, | |
| "eval_f1_macro": 0.9069570851888077, | |
| "eval_f1_micro": 0.9213923132704859, | |
| "eval_f1_weighted": 0.911460261524371, | |
| "eval_loss": 0.2470918595790863, | |
| "eval_precision_macro": 0.9177198642319323, | |
| "eval_precision_micro": 0.9213923132704859, | |
| "eval_precision_weighted": 0.9195993135359931, | |
| "eval_recall_macro": 0.9154624966869864, | |
| "eval_recall_micro": 0.9213923132704859, | |
| "eval_recall_weighted": 0.9213923132704859, | |
| "eval_runtime": 21.8346, | |
| "eval_samples_per_second": 947.351, | |
| "eval_steps_per_second": 59.218, | |
| "step": 3694 | |
| }, | |
| { | |
| "epoch": 1.0016242555495398, | |
| "grad_norm": 11.75763988494873, | |
| "learning_rate": 3.7029980948561116e-05, | |
| "loss": 0.2012, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.0083919870059557, | |
| "grad_norm": 13.783013343811035, | |
| "learning_rate": 3.690464253484408e-05, | |
| "loss": 0.4173, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.0151597184623715, | |
| "grad_norm": 2.9924991130828857, | |
| "learning_rate": 3.6779304121127046e-05, | |
| "loss": 0.2121, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.0219274499187871, | |
| "grad_norm": 0.5149463415145874, | |
| "learning_rate": 3.665396570741001e-05, | |
| "loss": 0.2768, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.028695181375203, | |
| "grad_norm": 14.207648277282715, | |
| "learning_rate": 3.6528627293692976e-05, | |
| "loss": 0.2858, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.0354629128316188, | |
| "grad_norm": 0.8809079527854919, | |
| "learning_rate": 3.640328887997594e-05, | |
| "loss": 0.1731, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.0422306442880347, | |
| "grad_norm": 4.510576248168945, | |
| "learning_rate": 3.6277950466258905e-05, | |
| "loss": 0.2966, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.0489983757444505, | |
| "grad_norm": 17.010372161865234, | |
| "learning_rate": 3.615261205254186e-05, | |
| "loss": 0.2354, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.0557661072008662, | |
| "grad_norm": 2.4811925888061523, | |
| "learning_rate": 3.602727363882483e-05, | |
| "loss": 0.26, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.062533838657282, | |
| "grad_norm": 0.9241037368774414, | |
| "learning_rate": 3.590193522510779e-05, | |
| "loss": 0.1716, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.0693015701136979, | |
| "grad_norm": 11.593517303466797, | |
| "learning_rate": 3.577659681139076e-05, | |
| "loss": 0.255, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.0760693015701137, | |
| "grad_norm": 8.104696273803711, | |
| "learning_rate": 3.565125839767372e-05, | |
| "loss": 0.2273, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 1.0828370330265296, | |
| "grad_norm": 12.741314888000488, | |
| "learning_rate": 3.552591998395669e-05, | |
| "loss": 0.2807, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.0896047644829454, | |
| "grad_norm": 0.22231225669384003, | |
| "learning_rate": 3.540058157023965e-05, | |
| "loss": 0.2141, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 1.096372495939361, | |
| "grad_norm": 12.738525390625, | |
| "learning_rate": 3.527524315652262e-05, | |
| "loss": 0.2796, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.1031402273957769, | |
| "grad_norm": 9.309906005859375, | |
| "learning_rate": 3.514990474280558e-05, | |
| "loss": 0.2185, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 1.1099079588521927, | |
| "grad_norm": 11.775688171386719, | |
| "learning_rate": 3.502456632908854e-05, | |
| "loss": 0.3496, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.1166756903086086, | |
| "grad_norm": 6.333633899688721, | |
| "learning_rate": 3.4899227915371505e-05, | |
| "loss": 0.2659, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 1.1234434217650244, | |
| "grad_norm": 0.39873039722442627, | |
| "learning_rate": 3.477388950165447e-05, | |
| "loss": 0.2551, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.13021115322144, | |
| "grad_norm": 0.5979344844818115, | |
| "learning_rate": 3.4648551087937434e-05, | |
| "loss": 0.2102, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 1.136978884677856, | |
| "grad_norm": 12.985968589782715, | |
| "learning_rate": 3.452321267422039e-05, | |
| "loss": 0.2303, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.1437466161342718, | |
| "grad_norm": 2.175553560256958, | |
| "learning_rate": 3.439787426050336e-05, | |
| "loss": 0.2526, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 1.1505143475906876, | |
| "grad_norm": 0.49194416403770447, | |
| "learning_rate": 3.427253584678632e-05, | |
| "loss": 0.2483, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.1572820790471035, | |
| "grad_norm": 3.2816367149353027, | |
| "learning_rate": 3.4147197433069287e-05, | |
| "loss": 0.2854, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 1.1640498105035193, | |
| "grad_norm": 7.387673377990723, | |
| "learning_rate": 3.402185901935225e-05, | |
| "loss": 0.2106, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.1708175419599351, | |
| "grad_norm": 7.8965654373168945, | |
| "learning_rate": 3.3896520605635216e-05, | |
| "loss": 0.2578, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 1.1775852734163508, | |
| "grad_norm": 1.6988545656204224, | |
| "learning_rate": 3.377118219191818e-05, | |
| "loss": 0.1903, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.1843530048727666, | |
| "grad_norm": 6.279006481170654, | |
| "learning_rate": 3.3645843778201146e-05, | |
| "loss": 0.3026, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 1.1911207363291825, | |
| "grad_norm": 0.32076123356819153, | |
| "learning_rate": 3.3520505364484104e-05, | |
| "loss": 0.2804, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.1978884677855983, | |
| "grad_norm": 11.526758193969727, | |
| "learning_rate": 3.339516695076707e-05, | |
| "loss": 0.3756, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 1.2046561992420142, | |
| "grad_norm": 11.514225959777832, | |
| "learning_rate": 3.3269828537050033e-05, | |
| "loss": 0.2868, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.2114239306984298, | |
| "grad_norm": 10.091246604919434, | |
| "learning_rate": 3.3144490123333e-05, | |
| "loss": 0.225, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 1.2181916621548456, | |
| "grad_norm": 1.9780317544937134, | |
| "learning_rate": 3.301915170961596e-05, | |
| "loss": 0.1927, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.2249593936112615, | |
| "grad_norm": 14.720560073852539, | |
| "learning_rate": 3.289381329589893e-05, | |
| "loss": 0.3458, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 1.2317271250676773, | |
| "grad_norm": 10.85938835144043, | |
| "learning_rate": 3.276847488218189e-05, | |
| "loss": 0.1138, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.2384948565240932, | |
| "grad_norm": 3.7215845584869385, | |
| "learning_rate": 3.264313646846486e-05, | |
| "loss": 0.179, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 1.245262587980509, | |
| "grad_norm": 12.215106010437012, | |
| "learning_rate": 3.251779805474782e-05, | |
| "loss": 0.3369, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.2520303194369247, | |
| "grad_norm": 13.148759841918945, | |
| "learning_rate": 3.239245964103079e-05, | |
| "loss": 0.3266, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 1.2587980508933405, | |
| "grad_norm": 14.143242835998535, | |
| "learning_rate": 3.226712122731375e-05, | |
| "loss": 0.3503, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.2655657823497564, | |
| "grad_norm": 1.314339280128479, | |
| "learning_rate": 3.214178281359671e-05, | |
| "loss": 0.1588, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 1.2723335138061722, | |
| "grad_norm": 13.175312042236328, | |
| "learning_rate": 3.2016444399879675e-05, | |
| "loss": 0.1884, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.279101245262588, | |
| "grad_norm": 11.514117240905762, | |
| "learning_rate": 3.189110598616264e-05, | |
| "loss": 0.2922, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 1.2858689767190037, | |
| "grad_norm": 2.735069990158081, | |
| "learning_rate": 3.1765767572445604e-05, | |
| "loss": 0.2909, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.2926367081754195, | |
| "grad_norm": 7.173842430114746, | |
| "learning_rate": 3.164042915872857e-05, | |
| "loss": 0.1868, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 1.2994044396318354, | |
| "grad_norm": 16.41992950439453, | |
| "learning_rate": 3.1515090745011534e-05, | |
| "loss": 0.1977, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.3061721710882512, | |
| "grad_norm": 0.7331606149673462, | |
| "learning_rate": 3.13897523312945e-05, | |
| "loss": 0.3978, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 1.312939902544667, | |
| "grad_norm": 13.302403450012207, | |
| "learning_rate": 3.1264413917577463e-05, | |
| "loss": 0.2199, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.319707634001083, | |
| "grad_norm": 6.277172565460205, | |
| "learning_rate": 3.113907550386043e-05, | |
| "loss": 0.2211, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 1.3264753654574988, | |
| "grad_norm": 12.060029029846191, | |
| "learning_rate": 3.101373709014339e-05, | |
| "loss": 0.2111, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.3332430969139144, | |
| "grad_norm": 12.81723403930664, | |
| "learning_rate": 3.088839867642635e-05, | |
| "loss": 0.2522, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 1.3400108283703303, | |
| "grad_norm": 0.56070476770401, | |
| "learning_rate": 3.0763060262709316e-05, | |
| "loss": 0.1966, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.346778559826746, | |
| "grad_norm": 5.43617582321167, | |
| "learning_rate": 3.063772184899228e-05, | |
| "loss": 0.3197, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 1.353546291283162, | |
| "grad_norm": 3.4792237281799316, | |
| "learning_rate": 3.0512383435275242e-05, | |
| "loss": 0.2062, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.3603140227395776, | |
| "grad_norm": 9.568795204162598, | |
| "learning_rate": 3.0387045021558207e-05, | |
| "loss": 0.3434, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 1.3670817541959934, | |
| "grad_norm": 10.6992769241333, | |
| "learning_rate": 3.0261706607841172e-05, | |
| "loss": 0.2204, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.3738494856524093, | |
| "grad_norm": 0.5761290788650513, | |
| "learning_rate": 3.0136368194124137e-05, | |
| "loss": 0.2141, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 1.3806172171088251, | |
| "grad_norm": 13.90715217590332, | |
| "learning_rate": 3.00110297804071e-05, | |
| "loss": 0.1668, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.387384948565241, | |
| "grad_norm": 11.602949142456055, | |
| "learning_rate": 2.9885691366690066e-05, | |
| "loss": 0.1902, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 1.3941526800216568, | |
| "grad_norm": 0.09335369616746902, | |
| "learning_rate": 2.976035295297303e-05, | |
| "loss": 0.1735, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.4009204114780727, | |
| "grad_norm": 1.5695838928222656, | |
| "learning_rate": 2.9635014539255996e-05, | |
| "loss": 0.2458, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 1.4076881429344883, | |
| "grad_norm": 1.8779666423797607, | |
| "learning_rate": 2.9509676125538954e-05, | |
| "loss": 0.2006, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.4144558743909041, | |
| "grad_norm": 10.377031326293945, | |
| "learning_rate": 2.938433771182192e-05, | |
| "loss": 0.2025, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 1.42122360584732, | |
| "grad_norm": 10.321118354797363, | |
| "learning_rate": 2.9258999298104883e-05, | |
| "loss": 0.2157, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.4279913373037358, | |
| "grad_norm": 0.4291195273399353, | |
| "learning_rate": 2.9133660884387848e-05, | |
| "loss": 0.1963, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 1.4347590687601515, | |
| "grad_norm": 0.28830039501190186, | |
| "learning_rate": 2.9008322470670813e-05, | |
| "loss": 0.1947, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.4415268002165673, | |
| "grad_norm": 0.1749316304922104, | |
| "learning_rate": 2.8882984056953778e-05, | |
| "loss": 0.2793, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 1.4482945316729832, | |
| "grad_norm": 9.74176025390625, | |
| "learning_rate": 2.8757645643236743e-05, | |
| "loss": 0.2421, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.455062263129399, | |
| "grad_norm": 0.9622665047645569, | |
| "learning_rate": 2.8632307229519707e-05, | |
| "loss": 0.2765, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 1.4618299945858149, | |
| "grad_norm": 0.7690452933311462, | |
| "learning_rate": 2.850696881580267e-05, | |
| "loss": 0.2429, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.4685977260422307, | |
| "grad_norm": 1.5192012786865234, | |
| "learning_rate": 2.8381630402085634e-05, | |
| "loss": 0.1464, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 1.4753654574986466, | |
| "grad_norm": 0.5577375888824463, | |
| "learning_rate": 2.8256291988368595e-05, | |
| "loss": 0.1942, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.4821331889550622, | |
| "grad_norm": 1.2777996063232422, | |
| "learning_rate": 2.813095357465156e-05, | |
| "loss": 0.1895, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 1.488900920411478, | |
| "grad_norm": 8.725980758666992, | |
| "learning_rate": 2.8005615160934525e-05, | |
| "loss": 0.303, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.4956686518678939, | |
| "grad_norm": 10.138091087341309, | |
| "learning_rate": 2.7880276747217486e-05, | |
| "loss": 0.2515, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 1.5024363833243097, | |
| "grad_norm": 2.442488431930542, | |
| "learning_rate": 2.775493833350045e-05, | |
| "loss": 0.2725, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.5092041147807254, | |
| "grad_norm": 2.7091565132141113, | |
| "learning_rate": 2.7629599919783416e-05, | |
| "loss": 0.2676, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 1.5159718462371412, | |
| "grad_norm": 6.794680118560791, | |
| "learning_rate": 2.750426150606638e-05, | |
| "loss": 0.158, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.522739577693557, | |
| "grad_norm": 1.2340929508209229, | |
| "learning_rate": 2.7378923092349345e-05, | |
| "loss": 0.2144, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 1.529507309149973, | |
| "grad_norm": 0.2725580036640167, | |
| "learning_rate": 2.725358467863231e-05, | |
| "loss": 0.185, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.5362750406063888, | |
| "grad_norm": 3.0790915489196777, | |
| "learning_rate": 2.7128246264915275e-05, | |
| "loss": 0.128, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 1.5430427720628046, | |
| "grad_norm": 1.8269541263580322, | |
| "learning_rate": 2.700290785119824e-05, | |
| "loss": 0.1831, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.5498105035192205, | |
| "grad_norm": 0.6843694448471069, | |
| "learning_rate": 2.6877569437481198e-05, | |
| "loss": 0.2506, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 1.5565782349756363, | |
| "grad_norm": 2.2378416061401367, | |
| "learning_rate": 2.6752231023764162e-05, | |
| "loss": 0.1644, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.563345966432052, | |
| "grad_norm": 11.299232482910156, | |
| "learning_rate": 2.6626892610047127e-05, | |
| "loss": 0.3624, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 1.5701136978884678, | |
| "grad_norm": 0.1067349761724472, | |
| "learning_rate": 2.6501554196330092e-05, | |
| "loss": 0.2144, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.5768814293448836, | |
| "grad_norm": 2.722107172012329, | |
| "learning_rate": 2.6376215782613057e-05, | |
| "loss": 0.2381, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 1.5836491608012992, | |
| "grad_norm": 11.49809741973877, | |
| "learning_rate": 2.625087736889602e-05, | |
| "loss": 0.175, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.590416892257715, | |
| "grad_norm": 16.60283088684082, | |
| "learning_rate": 2.6125538955178986e-05, | |
| "loss": 0.2896, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 1.597184623714131, | |
| "grad_norm": 0.3614028990268707, | |
| "learning_rate": 2.600020054146195e-05, | |
| "loss": 0.2182, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.6039523551705468, | |
| "grad_norm": 1.335888385772705, | |
| "learning_rate": 2.5874862127744913e-05, | |
| "loss": 0.1903, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 1.6107200866269626, | |
| "grad_norm": 7.841146945953369, | |
| "learning_rate": 2.5749523714027878e-05, | |
| "loss": 0.2156, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.6174878180833785, | |
| "grad_norm": 0.4461989402770996, | |
| "learning_rate": 2.562418530031084e-05, | |
| "loss": 0.1799, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 1.6242555495397943, | |
| "grad_norm": 6.844948768615723, | |
| "learning_rate": 2.5498846886593804e-05, | |
| "loss": 0.1853, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.6310232809962102, | |
| "grad_norm": 13.240145683288574, | |
| "learning_rate": 2.537350847287677e-05, | |
| "loss": 0.2664, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 1.637791012452626, | |
| "grad_norm": 10.991958618164062, | |
| "learning_rate": 2.524817005915973e-05, | |
| "loss": 0.2736, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.6445587439090417, | |
| "grad_norm": 18.210996627807617, | |
| "learning_rate": 2.5122831645442695e-05, | |
| "loss": 0.2818, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 1.6513264753654575, | |
| "grad_norm": 7.5500006675720215, | |
| "learning_rate": 2.499749323172566e-05, | |
| "loss": 0.125, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.6580942068218734, | |
| "grad_norm": 0.2722916305065155, | |
| "learning_rate": 2.4872154818008624e-05, | |
| "loss": 0.2471, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 1.664861938278289, | |
| "grad_norm": 0.1690392643213272, | |
| "learning_rate": 2.474681640429159e-05, | |
| "loss": 0.2063, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.6716296697347048, | |
| "grad_norm": 0.8183917999267578, | |
| "learning_rate": 2.462147799057455e-05, | |
| "loss": 0.2288, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 1.6783974011911207, | |
| "grad_norm": 12.807232856750488, | |
| "learning_rate": 2.4496139576857515e-05, | |
| "loss": 0.1793, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.6851651326475365, | |
| "grad_norm": 2.0582687854766846, | |
| "learning_rate": 2.437080116314048e-05, | |
| "loss": 0.1697, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 1.6919328641039524, | |
| "grad_norm": 0.955332338809967, | |
| "learning_rate": 2.4245462749423445e-05, | |
| "loss": 0.1161, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.6987005955603682, | |
| "grad_norm": 0.23503464460372925, | |
| "learning_rate": 2.412513787225509e-05, | |
| "loss": 0.1578, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 1.705468327016784, | |
| "grad_norm": 0.7222716808319092, | |
| "learning_rate": 2.399979945853805e-05, | |
| "loss": 0.2072, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.7122360584732, | |
| "grad_norm": 15.863499641418457, | |
| "learning_rate": 2.3874461044821016e-05, | |
| "loss": 0.2366, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 1.7190037899296156, | |
| "grad_norm": 9.790959358215332, | |
| "learning_rate": 2.374912263110398e-05, | |
| "loss": 0.2673, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.7257715213860314, | |
| "grad_norm": 8.514019012451172, | |
| "learning_rate": 2.3623784217386946e-05, | |
| "loss": 0.2244, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 1.7325392528424473, | |
| "grad_norm": 14.102144241333008, | |
| "learning_rate": 2.349844580366991e-05, | |
| "loss": 0.1813, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.7393069842988629, | |
| "grad_norm": 9.164491653442383, | |
| "learning_rate": 2.3373107389952875e-05, | |
| "loss": 0.2476, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 1.7460747157552787, | |
| "grad_norm": 0.8914769887924194, | |
| "learning_rate": 2.3247768976235837e-05, | |
| "loss": 0.2295, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.7528424472116946, | |
| "grad_norm": 12.076004981994629, | |
| "learning_rate": 2.31224305625188e-05, | |
| "loss": 0.1614, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 1.7596101786681104, | |
| "grad_norm": 1.1903892755508423, | |
| "learning_rate": 2.2997092148801766e-05, | |
| "loss": 0.1336, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.7663779101245263, | |
| "grad_norm": 0.8547431826591492, | |
| "learning_rate": 2.287175373508473e-05, | |
| "loss": 0.2748, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 1.7731456415809421, | |
| "grad_norm": 10.832341194152832, | |
| "learning_rate": 2.2746415321367696e-05, | |
| "loss": 0.242, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.779913373037358, | |
| "grad_norm": 0.6953740119934082, | |
| "learning_rate": 2.2621076907650657e-05, | |
| "loss": 0.1004, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 1.7866811044937738, | |
| "grad_norm": 0.1302420198917389, | |
| "learning_rate": 2.2495738493933622e-05, | |
| "loss": 0.4184, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.7934488359501894, | |
| "grad_norm": 7.436769962310791, | |
| "learning_rate": 2.2370400080216587e-05, | |
| "loss": 0.1858, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 1.8002165674066053, | |
| "grad_norm": 20.91210174560547, | |
| "learning_rate": 2.224506166649955e-05, | |
| "loss": 0.284, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.8069842988630211, | |
| "grad_norm": 0.46657705307006836, | |
| "learning_rate": 2.2119723252782516e-05, | |
| "loss": 0.1961, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 1.8137520303194368, | |
| "grad_norm": 6.9242353439331055, | |
| "learning_rate": 2.1994384839065478e-05, | |
| "loss": 0.2108, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.8205197617758526, | |
| "grad_norm": 13.766924858093262, | |
| "learning_rate": 2.1869046425348443e-05, | |
| "loss": 0.2072, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 1.8272874932322685, | |
| "grad_norm": 2.7908565998077393, | |
| "learning_rate": 2.1743708011631404e-05, | |
| "loss": 0.0987, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.8340552246886843, | |
| "grad_norm": 12.718364715576172, | |
| "learning_rate": 2.161836959791437e-05, | |
| "loss": 0.1816, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 1.8408229561451002, | |
| "grad_norm": 12.46013069152832, | |
| "learning_rate": 2.1493031184197334e-05, | |
| "loss": 0.3094, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.847590687601516, | |
| "grad_norm": 0.13040785491466522, | |
| "learning_rate": 2.1367692770480295e-05, | |
| "loss": 0.1224, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 1.8543584190579319, | |
| "grad_norm": 1.2305707931518555, | |
| "learning_rate": 2.124235435676326e-05, | |
| "loss": 0.083, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.8611261505143477, | |
| "grad_norm": 0.13893193006515503, | |
| "learning_rate": 2.1117015943046225e-05, | |
| "loss": 0.3004, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 1.8678938819707636, | |
| "grad_norm": 11.187564849853516, | |
| "learning_rate": 2.099167752932919e-05, | |
| "loss": 0.2636, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.8746616134271792, | |
| "grad_norm": 8.335643768310547, | |
| "learning_rate": 2.0866339115612154e-05, | |
| "loss": 0.1974, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 1.881429344883595, | |
| "grad_norm": 4.112905502319336, | |
| "learning_rate": 2.074100070189512e-05, | |
| "loss": 0.1114, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.8881970763400109, | |
| "grad_norm": 0.5131503939628601, | |
| "learning_rate": 2.061566228817808e-05, | |
| "loss": 0.2218, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 1.8949648077964265, | |
| "grad_norm": 0.07644043117761612, | |
| "learning_rate": 2.0490323874461045e-05, | |
| "loss": 0.2306, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.9017325392528424, | |
| "grad_norm": 2.5690276622772217, | |
| "learning_rate": 2.036498546074401e-05, | |
| "loss": 0.1069, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 1.9085002707092582, | |
| "grad_norm": 5.832399368286133, | |
| "learning_rate": 2.0239647047026975e-05, | |
| "loss": 0.1663, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.915268002165674, | |
| "grad_norm": 8.066961288452148, | |
| "learning_rate": 2.011430863330994e-05, | |
| "loss": 0.2029, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 1.92203573362209, | |
| "grad_norm": 0.3773351013660431, | |
| "learning_rate": 1.99889702195929e-05, | |
| "loss": 0.0708, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.9288034650785058, | |
| "grad_norm": 12.352056503295898, | |
| "learning_rate": 1.9863631805875866e-05, | |
| "loss": 0.2559, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 1.9355711965349216, | |
| "grad_norm": 0.8700584173202515, | |
| "learning_rate": 1.973829339215883e-05, | |
| "loss": 0.2601, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.9423389279913374, | |
| "grad_norm": 14.849401473999023, | |
| "learning_rate": 1.9612954978441795e-05, | |
| "loss": 0.2871, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 1.949106659447753, | |
| "grad_norm": 1.086490511894226, | |
| "learning_rate": 1.948761656472476e-05, | |
| "loss": 0.3069, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.955874390904169, | |
| "grad_norm": 0.1218922808766365, | |
| "learning_rate": 1.9362278151007722e-05, | |
| "loss": 0.2398, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 1.9626421223605848, | |
| "grad_norm": 0.7988734841346741, | |
| "learning_rate": 1.9236939737290687e-05, | |
| "loss": 0.3118, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.9694098538170004, | |
| "grad_norm": 0.1584845781326294, | |
| "learning_rate": 1.911160132357365e-05, | |
| "loss": 0.1434, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 1.9761775852734162, | |
| "grad_norm": 0.6999651193618774, | |
| "learning_rate": 1.8986262909856613e-05, | |
| "loss": 0.1758, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.982945316729832, | |
| "grad_norm": 0.11756038665771484, | |
| "learning_rate": 1.8860924496139578e-05, | |
| "loss": 0.1671, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 1.989713048186248, | |
| "grad_norm": 1.217764139175415, | |
| "learning_rate": 1.873558608242254e-05, | |
| "loss": 0.1335, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.9964807796426638, | |
| "grad_norm": 8.427165985107422, | |
| "learning_rate": 1.8610247668705504e-05, | |
| "loss": 0.2056, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.935412134396906, | |
| "eval_f1_macro": 0.9274157947563729, | |
| "eval_f1_micro": 0.935412134396906, | |
| "eval_f1_weighted": 0.9302654119193674, | |
| "eval_loss": 0.21082843840122223, | |
| "eval_precision_macro": 0.9405383300469721, | |
| "eval_precision_micro": 0.935412134396906, | |
| "eval_precision_weighted": 0.9403341909054184, | |
| "eval_recall_macro": 0.9303843095679831, | |
| "eval_recall_micro": 0.935412134396906, | |
| "eval_recall_weighted": 0.935412134396906, | |
| "eval_runtime": 21.7667, | |
| "eval_samples_per_second": 950.305, | |
| "eval_steps_per_second": 59.403, | |
| "step": 7388 | |
| }, | |
| { | |
| "epoch": 2.0032485110990796, | |
| "grad_norm": 0.07127093523740768, | |
| "learning_rate": 1.848490925498847e-05, | |
| "loss": 0.1575, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.0100162425554955, | |
| "grad_norm": 15.853127479553223, | |
| "learning_rate": 1.8359570841271433e-05, | |
| "loss": 0.1454, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 2.0167839740119113, | |
| "grad_norm": 0.1277124136686325, | |
| "learning_rate": 1.8234232427554398e-05, | |
| "loss": 0.1873, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.023551705468327, | |
| "grad_norm": 11.486383438110352, | |
| "learning_rate": 1.8108894013837363e-05, | |
| "loss": 0.2182, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 2.030319436924743, | |
| "grad_norm": 6.678303241729736, | |
| "learning_rate": 1.7983555600120324e-05, | |
| "loss": 0.124, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.0370871683811584, | |
| "grad_norm": 0.4219975173473358, | |
| "learning_rate": 1.785821718640329e-05, | |
| "loss": 0.2746, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 2.0438548998375743, | |
| "grad_norm": 8.490753173828125, | |
| "learning_rate": 1.7732878772686254e-05, | |
| "loss": 0.1649, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.05062263129399, | |
| "grad_norm": 0.30812859535217285, | |
| "learning_rate": 1.760754035896922e-05, | |
| "loss": 0.1879, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 2.057390362750406, | |
| "grad_norm": 8.723641395568848, | |
| "learning_rate": 1.7482201945252184e-05, | |
| "loss": 0.1553, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.064158094206822, | |
| "grad_norm": 0.0513090118765831, | |
| "learning_rate": 1.7356863531535145e-05, | |
| "loss": 0.091, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 2.0709258256632377, | |
| "grad_norm": 0.18665984272956848, | |
| "learning_rate": 1.723152511781811e-05, | |
| "loss": 0.2645, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.0776935571196535, | |
| "grad_norm": 7.360457420349121, | |
| "learning_rate": 1.7106186704101075e-05, | |
| "loss": 0.2385, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 2.0844612885760694, | |
| "grad_norm": 0.102376289665699, | |
| "learning_rate": 1.698084829038404e-05, | |
| "loss": 0.1201, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.0912290200324852, | |
| "grad_norm": 17.834001541137695, | |
| "learning_rate": 1.6855509876667004e-05, | |
| "loss": 0.1926, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 2.097996751488901, | |
| "grad_norm": 0.3818954825401306, | |
| "learning_rate": 1.6730171462949966e-05, | |
| "loss": 0.1884, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.104764482945317, | |
| "grad_norm": 7.972067356109619, | |
| "learning_rate": 1.660483304923293e-05, | |
| "loss": 0.1858, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 2.1115322144017323, | |
| "grad_norm": 9.148263931274414, | |
| "learning_rate": 1.6479494635515895e-05, | |
| "loss": 0.1961, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.118299945858148, | |
| "grad_norm": 10.137642860412598, | |
| "learning_rate": 1.6354156221798857e-05, | |
| "loss": 0.1774, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 2.125067677314564, | |
| "grad_norm": 0.3626168370246887, | |
| "learning_rate": 1.622881780808182e-05, | |
| "loss": 0.1367, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.13183540877098, | |
| "grad_norm": 0.12807676196098328, | |
| "learning_rate": 1.6103479394364783e-05, | |
| "loss": 0.1785, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 2.1386031402273957, | |
| "grad_norm": 1.2243175506591797, | |
| "learning_rate": 1.5978140980647748e-05, | |
| "loss": 0.1499, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.1453708716838116, | |
| "grad_norm": 11.758691787719727, | |
| "learning_rate": 1.5852802566930712e-05, | |
| "loss": 0.1778, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 2.1521386031402274, | |
| "grad_norm": 0.7880843281745911, | |
| "learning_rate": 1.5727464153213677e-05, | |
| "loss": 0.2024, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 2.1589063345966433, | |
| "grad_norm": 0.23943960666656494, | |
| "learning_rate": 1.5602125739496642e-05, | |
| "loss": 0.1409, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 2.165674066053059, | |
| "grad_norm": 11.204683303833008, | |
| "learning_rate": 1.5476787325779607e-05, | |
| "loss": 0.2603, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.172441797509475, | |
| "grad_norm": 8.875106811523438, | |
| "learning_rate": 1.5351448912062568e-05, | |
| "loss": 0.1979, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 2.179209528965891, | |
| "grad_norm": 10.337849617004395, | |
| "learning_rate": 1.5226110498345533e-05, | |
| "loss": 0.0886, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 2.1859772604223062, | |
| "grad_norm": 0.0444558709859848, | |
| "learning_rate": 1.5100772084628498e-05, | |
| "loss": 0.1459, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 2.192744991878722, | |
| "grad_norm": 0.4095276892185211, | |
| "learning_rate": 1.4975433670911463e-05, | |
| "loss": 0.2745, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.199512723335138, | |
| "grad_norm": 0.09346342086791992, | |
| "learning_rate": 1.4850095257194427e-05, | |
| "loss": 0.213, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 2.2062804547915538, | |
| "grad_norm": 11.369955062866211, | |
| "learning_rate": 1.4724756843477389e-05, | |
| "loss": 0.2651, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 2.2130481862479696, | |
| "grad_norm": 0.17222055792808533, | |
| "learning_rate": 1.4599418429760354e-05, | |
| "loss": 0.1258, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 2.2198159177043855, | |
| "grad_norm": 0.5808836221694946, | |
| "learning_rate": 1.4474080016043317e-05, | |
| "loss": 0.2261, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.2265836491608013, | |
| "grad_norm": 0.37860339879989624, | |
| "learning_rate": 1.4348741602326282e-05, | |
| "loss": 0.2356, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 2.233351380617217, | |
| "grad_norm": 7.043012619018555, | |
| "learning_rate": 1.4223403188609246e-05, | |
| "loss": 0.1983, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 2.240119112073633, | |
| "grad_norm": 0.04890386760234833, | |
| "learning_rate": 1.4098064774892208e-05, | |
| "loss": 0.1501, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 2.246886843530049, | |
| "grad_norm": 1.6778485774993896, | |
| "learning_rate": 1.3972726361175173e-05, | |
| "loss": 0.2035, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.2536545749864647, | |
| "grad_norm": 0.09249867498874664, | |
| "learning_rate": 1.3847387947458137e-05, | |
| "loss": 0.1831, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 2.26042230644288, | |
| "grad_norm": 10.879770278930664, | |
| "learning_rate": 1.3722049533741102e-05, | |
| "loss": 0.1008, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 2.267190037899296, | |
| "grad_norm": 0.2881366014480591, | |
| "learning_rate": 1.3596711120024067e-05, | |
| "loss": 0.0649, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 2.273957769355712, | |
| "grad_norm": 0.30121418833732605, | |
| "learning_rate": 1.3471372706307028e-05, | |
| "loss": 0.141, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.2807255008121277, | |
| "grad_norm": 0.31559237837791443, | |
| "learning_rate": 1.3346034292589993e-05, | |
| "loss": 0.1108, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 2.2874932322685435, | |
| "grad_norm": 13.67086124420166, | |
| "learning_rate": 1.3220695878872958e-05, | |
| "loss": 0.1543, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 2.2942609637249594, | |
| "grad_norm": 4.521094799041748, | |
| "learning_rate": 1.3095357465155921e-05, | |
| "loss": 0.1735, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 2.301028695181375, | |
| "grad_norm": 1.390699028968811, | |
| "learning_rate": 1.2970019051438886e-05, | |
| "loss": 0.1303, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.307796426637791, | |
| "grad_norm": 17.726560592651367, | |
| "learning_rate": 1.284468063772185e-05, | |
| "loss": 0.2959, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 2.314564158094207, | |
| "grad_norm": 12.668703079223633, | |
| "learning_rate": 1.2719342224004812e-05, | |
| "loss": 0.1625, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 2.3213318895506228, | |
| "grad_norm": 2.370819091796875, | |
| "learning_rate": 1.2594003810287777e-05, | |
| "loss": 0.1213, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 2.3280996210070386, | |
| "grad_norm": 6.036921977996826, | |
| "learning_rate": 1.2468665396570742e-05, | |
| "loss": 0.1634, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.334867352463454, | |
| "grad_norm": 0.8694545030593872, | |
| "learning_rate": 1.2343326982853707e-05, | |
| "loss": 0.1495, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 2.3416350839198703, | |
| "grad_norm": 2.2144663333892822, | |
| "learning_rate": 1.221798856913667e-05, | |
| "loss": 0.1621, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 2.3484028153762857, | |
| "grad_norm": 10.507080078125, | |
| "learning_rate": 1.2092650155419635e-05, | |
| "loss": 0.174, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 2.3551705468327016, | |
| "grad_norm": 0.9843292236328125, | |
| "learning_rate": 1.1967311741702598e-05, | |
| "loss": 0.1339, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.3619382782891174, | |
| "grad_norm": 0.3039487898349762, | |
| "learning_rate": 1.1841973327985562e-05, | |
| "loss": 0.1176, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 2.3687060097455332, | |
| "grad_norm": 0.5395913124084473, | |
| "learning_rate": 1.1716634914268526e-05, | |
| "loss": 0.2338, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 2.375473741201949, | |
| "grad_norm": 0.16517315804958344, | |
| "learning_rate": 1.1591296500551489e-05, | |
| "loss": 0.2132, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 2.382241472658365, | |
| "grad_norm": 10.029488563537598, | |
| "learning_rate": 1.1465958086834453e-05, | |
| "loss": 0.188, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.389009204114781, | |
| "grad_norm": 3.222883462905884, | |
| "learning_rate": 1.1340619673117417e-05, | |
| "loss": 0.2098, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 2.3957769355711966, | |
| "grad_norm": 0.06654487550258636, | |
| "learning_rate": 1.1215281259400381e-05, | |
| "loss": 0.1275, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 2.4025446670276125, | |
| "grad_norm": 2.666473388671875, | |
| "learning_rate": 1.1089942845683346e-05, | |
| "loss": 0.2144, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 2.4093123984840283, | |
| "grad_norm": 10.859978675842285, | |
| "learning_rate": 1.096460443196631e-05, | |
| "loss": 0.1975, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.416080129940444, | |
| "grad_norm": 1.037359356880188, | |
| "learning_rate": 1.0839266018249274e-05, | |
| "loss": 0.0893, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 2.4228478613968596, | |
| "grad_norm": 17.77867889404297, | |
| "learning_rate": 1.0713927604532239e-05, | |
| "loss": 0.178, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 2.4296155928532754, | |
| "grad_norm": 8.258838653564453, | |
| "learning_rate": 1.0588589190815202e-05, | |
| "loss": 0.1508, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 2.4363833243096913, | |
| "grad_norm": 8.984355926513672, | |
| "learning_rate": 1.0463250777098165e-05, | |
| "loss": 0.1494, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.443151055766107, | |
| "grad_norm": 1.9472849369049072, | |
| "learning_rate": 1.0337912363381128e-05, | |
| "loss": 0.1767, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 2.449918787222523, | |
| "grad_norm": 0.1337762475013733, | |
| "learning_rate": 1.0212573949664093e-05, | |
| "loss": 0.1451, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 2.456686518678939, | |
| "grad_norm": 0.7223402857780457, | |
| "learning_rate": 1.0087235535947058e-05, | |
| "loss": 0.1788, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 2.4634542501353547, | |
| "grad_norm": 12.872135162353516, | |
| "learning_rate": 9.961897122230021e-06, | |
| "loss": 0.1807, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.4702219815917705, | |
| "grad_norm": 14.778857231140137, | |
| "learning_rate": 9.836558708512986e-06, | |
| "loss": 0.1621, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 2.4769897130481864, | |
| "grad_norm": 0.44684821367263794, | |
| "learning_rate": 9.71122029479595e-06, | |
| "loss": 0.1788, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 2.4837574445046022, | |
| "grad_norm": 15.21567440032959, | |
| "learning_rate": 9.585881881078914e-06, | |
| "loss": 0.1615, | |
| "step": 9175 | |
| }, | |
| { | |
| "epoch": 2.490525175961018, | |
| "grad_norm": 0.5438389182090759, | |
| "learning_rate": 9.460543467361878e-06, | |
| "loss": 0.0694, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.4972929074174335, | |
| "grad_norm": 0.26495838165283203, | |
| "learning_rate": 9.335205053644842e-06, | |
| "loss": 0.2251, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 2.5040606388738493, | |
| "grad_norm": 3.848076343536377, | |
| "learning_rate": 9.209866639927806e-06, | |
| "loss": 0.118, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 2.510828370330265, | |
| "grad_norm": 0.0551062636077404, | |
| "learning_rate": 9.08452822621077e-06, | |
| "loss": 0.066, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 2.517596101786681, | |
| "grad_norm": 8.600728034973145, | |
| "learning_rate": 8.959189812493733e-06, | |
| "loss": 0.1336, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.524363833243097, | |
| "grad_norm": 6.382137298583984, | |
| "learning_rate": 8.833851398776697e-06, | |
| "loss": 0.2049, | |
| "step": 9325 | |
| }, | |
| { | |
| "epoch": 2.5311315646995127, | |
| "grad_norm": 13.446625709533691, | |
| "learning_rate": 8.70851298505966e-06, | |
| "loss": 0.1743, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 2.5378992961559286, | |
| "grad_norm": 6.327456951141357, | |
| "learning_rate": 8.583174571342625e-06, | |
| "loss": 0.2677, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 2.5446670276123444, | |
| "grad_norm": 0.14797528088092804, | |
| "learning_rate": 8.45783615762559e-06, | |
| "loss": 0.1348, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.5514347590687603, | |
| "grad_norm": 0.03272142633795738, | |
| "learning_rate": 8.332497743908553e-06, | |
| "loss": 0.1655, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 2.558202490525176, | |
| "grad_norm": 0.09539608657360077, | |
| "learning_rate": 8.207159330191518e-06, | |
| "loss": 0.2066, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 2.564970221981592, | |
| "grad_norm": 2.991002321243286, | |
| "learning_rate": 8.081820916474483e-06, | |
| "loss": 0.1664, | |
| "step": 9475 | |
| }, | |
| { | |
| "epoch": 2.5717379534380074, | |
| "grad_norm": 0.05500922352075577, | |
| "learning_rate": 7.956482502757446e-06, | |
| "loss": 0.1241, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.5785056848944237, | |
| "grad_norm": 11.698848724365234, | |
| "learning_rate": 7.831144089040409e-06, | |
| "loss": 0.2105, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 2.585273416350839, | |
| "grad_norm": 0.4144781231880188, | |
| "learning_rate": 7.705805675323372e-06, | |
| "loss": 0.229, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 2.592041147807255, | |
| "grad_norm": 15.266688346862793, | |
| "learning_rate": 7.580467261606338e-06, | |
| "loss": 0.1854, | |
| "step": 9575 | |
| }, | |
| { | |
| "epoch": 2.5988088792636708, | |
| "grad_norm": 0.1844756007194519, | |
| "learning_rate": 7.455128847889302e-06, | |
| "loss": 0.082, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.6055766107200866, | |
| "grad_norm": 7.458913326263428, | |
| "learning_rate": 7.329790434172265e-06, | |
| "loss": 0.1278, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 2.6123443421765025, | |
| "grad_norm": 0.7331855893135071, | |
| "learning_rate": 7.20445202045523e-06, | |
| "loss": 0.1028, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 2.6191120736329183, | |
| "grad_norm": 0.3585509657859802, | |
| "learning_rate": 7.0791136067381944e-06, | |
| "loss": 0.1163, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 2.625879805089334, | |
| "grad_norm": 0.40765902400016785, | |
| "learning_rate": 6.9537751930211575e-06, | |
| "loss": 0.1065, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.63264753654575, | |
| "grad_norm": 7.481261730194092, | |
| "learning_rate": 6.8284367793041215e-06, | |
| "loss": 0.1028, | |
| "step": 9725 | |
| }, | |
| { | |
| "epoch": 2.639415268002166, | |
| "grad_norm": 1.0196110010147095, | |
| "learning_rate": 6.703098365587085e-06, | |
| "loss": 0.103, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 2.6461829994585813, | |
| "grad_norm": 0.306159645318985, | |
| "learning_rate": 6.577759951870049e-06, | |
| "loss": 0.1617, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 2.6529507309149976, | |
| "grad_norm": 11.561976432800293, | |
| "learning_rate": 6.452421538153014e-06, | |
| "loss": 0.1027, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.659718462371413, | |
| "grad_norm": 0.021391283720731735, | |
| "learning_rate": 6.327083124435977e-06, | |
| "loss": 0.126, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 2.666486193827829, | |
| "grad_norm": 0.036384038627147675, | |
| "learning_rate": 6.201744710718941e-06, | |
| "loss": 0.2274, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 2.6732539252842447, | |
| "grad_norm": 0.39547139406204224, | |
| "learning_rate": 6.076406297001905e-06, | |
| "loss": 0.2437, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 2.6800216567406605, | |
| "grad_norm": 1.0845611095428467, | |
| "learning_rate": 5.951067883284869e-06, | |
| "loss": 0.1372, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.6867893881970764, | |
| "grad_norm": 0.6141884326934814, | |
| "learning_rate": 5.825729469567833e-06, | |
| "loss": 0.0986, | |
| "step": 9925 | |
| }, | |
| { | |
| "epoch": 2.693557119653492, | |
| "grad_norm": 6.706904888153076, | |
| "learning_rate": 5.700391055850798e-06, | |
| "loss": 0.1353, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 2.700324851109908, | |
| "grad_norm": 0.1707427203655243, | |
| "learning_rate": 5.575052642133762e-06, | |
| "loss": 0.1917, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 2.707092582566324, | |
| "grad_norm": 0.16985374689102173, | |
| "learning_rate": 5.449714228416725e-06, | |
| "loss": 0.0994, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.7138603140227398, | |
| "grad_norm": 10.607304573059082, | |
| "learning_rate": 5.324375814699689e-06, | |
| "loss": 0.1015, | |
| "step": 10025 | |
| }, | |
| { | |
| "epoch": 2.720628045479155, | |
| "grad_norm": 0.6444892287254333, | |
| "learning_rate": 5.199037400982654e-06, | |
| "loss": 0.0935, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 2.7273957769355714, | |
| "grad_norm": 0.8442856669425964, | |
| "learning_rate": 5.073698987265618e-06, | |
| "loss": 0.2081, | |
| "step": 10075 | |
| }, | |
| { | |
| "epoch": 2.734163508391987, | |
| "grad_norm": 0.2734193205833435, | |
| "learning_rate": 4.948360573548582e-06, | |
| "loss": 0.169, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 2.7409312398484027, | |
| "grad_norm": 0.19697026908397675, | |
| "learning_rate": 4.823022159831545e-06, | |
| "loss": 0.1624, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 2.7476989713048185, | |
| "grad_norm": 12.665722846984863, | |
| "learning_rate": 4.69768374611451e-06, | |
| "loss": 0.1644, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 2.7544667027612344, | |
| "grad_norm": 10.231285095214844, | |
| "learning_rate": 4.5723453323974735e-06, | |
| "loss": 0.1422, | |
| "step": 10175 | |
| }, | |
| { | |
| "epoch": 2.7612344342176502, | |
| "grad_norm": 10.933349609375, | |
| "learning_rate": 4.4470069186804375e-06, | |
| "loss": 0.2038, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.768002165674066, | |
| "grad_norm": 10.937248229980469, | |
| "learning_rate": 4.3216685049634015e-06, | |
| "loss": 0.1339, | |
| "step": 10225 | |
| }, | |
| { | |
| "epoch": 2.774769897130482, | |
| "grad_norm": 0.07432160526514053, | |
| "learning_rate": 4.196330091246365e-06, | |
| "loss": 0.2031, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 2.781537628586898, | |
| "grad_norm": 10.13500690460205, | |
| "learning_rate": 4.070991677529329e-06, | |
| "loss": 0.1778, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 2.7883053600433136, | |
| "grad_norm": 0.058682914823293686, | |
| "learning_rate": 3.945653263812293e-06, | |
| "loss": 0.1036, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.795073091499729, | |
| "grad_norm": 11.469184875488281, | |
| "learning_rate": 3.820314850095257e-06, | |
| "loss": 0.2036, | |
| "step": 10325 | |
| }, | |
| { | |
| "epoch": 2.8018408229561453, | |
| "grad_norm": 0.17000257968902588, | |
| "learning_rate": 3.6949764363782212e-06, | |
| "loss": 0.1875, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 2.8086085544125607, | |
| "grad_norm": 0.3760491907596588, | |
| "learning_rate": 3.5696380226611856e-06, | |
| "loss": 0.1494, | |
| "step": 10375 | |
| }, | |
| { | |
| "epoch": 2.8153762858689766, | |
| "grad_norm": 0.10404614359140396, | |
| "learning_rate": 3.4442996089441496e-06, | |
| "loss": 0.0973, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.8221440173253924, | |
| "grad_norm": 0.41150447726249695, | |
| "learning_rate": 3.3239747317757947e-06, | |
| "loss": 0.1634, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 2.8289117487818083, | |
| "grad_norm": 0.061491526663303375, | |
| "learning_rate": 3.1986363180587587e-06, | |
| "loss": 0.184, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 2.835679480238224, | |
| "grad_norm": 1.5745799541473389, | |
| "learning_rate": 3.0732979043417226e-06, | |
| "loss": 0.1135, | |
| "step": 10475 | |
| }, | |
| { | |
| "epoch": 2.84244721169464, | |
| "grad_norm": 15.36170482635498, | |
| "learning_rate": 2.947959490624687e-06, | |
| "loss": 0.0998, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.849214943151056, | |
| "grad_norm": 2.991931915283203, | |
| "learning_rate": 2.8226210769076505e-06, | |
| "loss": 0.1403, | |
| "step": 10525 | |
| }, | |
| { | |
| "epoch": 2.8559826746074717, | |
| "grad_norm": 1.1434751749038696, | |
| "learning_rate": 2.697282663190615e-06, | |
| "loss": 0.1634, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 2.8627504060638875, | |
| "grad_norm": 5.902674198150635, | |
| "learning_rate": 2.571944249473579e-06, | |
| "loss": 0.1235, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 2.869518137520303, | |
| "grad_norm": 0.050640497356653214, | |
| "learning_rate": 2.446605835756543e-06, | |
| "loss": 0.1516, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.8762858689767192, | |
| "grad_norm": 13.541975021362305, | |
| "learning_rate": 2.3212674220395068e-06, | |
| "loss": 0.2409, | |
| "step": 10625 | |
| }, | |
| { | |
| "epoch": 2.8830536004331346, | |
| "grad_norm": 24.88682746887207, | |
| "learning_rate": 2.1959290083224707e-06, | |
| "loss": 0.1724, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 2.8898213318895505, | |
| "grad_norm": 0.10497920215129852, | |
| "learning_rate": 2.0705905946054347e-06, | |
| "loss": 0.1563, | |
| "step": 10675 | |
| }, | |
| { | |
| "epoch": 2.8965890633459663, | |
| "grad_norm": 3.943291187286377, | |
| "learning_rate": 1.945252180888399e-06, | |
| "loss": 0.1966, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.903356794802382, | |
| "grad_norm": 0.061565861105918884, | |
| "learning_rate": 1.8199137671713628e-06, | |
| "loss": 0.1962, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 2.910124526258798, | |
| "grad_norm": 0.08835487067699432, | |
| "learning_rate": 1.694575353454327e-06, | |
| "loss": 0.282, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 2.916892257715214, | |
| "grad_norm": 2.9496688842773438, | |
| "learning_rate": 1.5692369397372907e-06, | |
| "loss": 0.1138, | |
| "step": 10775 | |
| }, | |
| { | |
| "epoch": 2.9236599891716297, | |
| "grad_norm": 1.1299965381622314, | |
| "learning_rate": 1.4438985260202547e-06, | |
| "loss": 0.1773, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.9304277206280456, | |
| "grad_norm": 11.417136192321777, | |
| "learning_rate": 1.3185601123032186e-06, | |
| "loss": 0.1358, | |
| "step": 10825 | |
| }, | |
| { | |
| "epoch": 2.9371954520844614, | |
| "grad_norm": 0.06532655656337738, | |
| "learning_rate": 1.1932216985861828e-06, | |
| "loss": 0.1728, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 2.943963183540877, | |
| "grad_norm": 5.278496265411377, | |
| "learning_rate": 1.0678832848691468e-06, | |
| "loss": 0.1416, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 2.950730914997293, | |
| "grad_norm": 1.0812338590621948, | |
| "learning_rate": 9.425448711521107e-07, | |
| "loss": 0.1485, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.9574986464537085, | |
| "grad_norm": 0.12115427106618881, | |
| "learning_rate": 8.172064574350748e-07, | |
| "loss": 0.1201, | |
| "step": 10925 | |
| }, | |
| { | |
| "epoch": 2.9642663779101244, | |
| "grad_norm": 0.1515658050775528, | |
| "learning_rate": 6.918680437180387e-07, | |
| "loss": 0.1225, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 2.97103410936654, | |
| "grad_norm": 0.056250348687171936, | |
| "learning_rate": 5.665296300010028e-07, | |
| "loss": 0.1442, | |
| "step": 10975 | |
| }, | |
| { | |
| "epoch": 2.977801840822956, | |
| "grad_norm": 0.10962472856044769, | |
| "learning_rate": 4.411912162839667e-07, | |
| "loss": 0.0699, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.984569572279372, | |
| "grad_norm": 0.3095192015171051, | |
| "learning_rate": 3.1585280256693076e-07, | |
| "loss": 0.171, | |
| "step": 11025 | |
| }, | |
| { | |
| "epoch": 2.9913373037357878, | |
| "grad_norm": 0.24057000875473022, | |
| "learning_rate": 1.9051438884989471e-07, | |
| "loss": 0.0967, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 2.9981050351922036, | |
| "grad_norm": 4.0788044929504395, | |
| "learning_rate": 6.517597513285872e-08, | |
| "loss": 0.1549, | |
| "step": 11075 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9390862944162437, | |
| "eval_f1_macro": 0.9327706813543745, | |
| "eval_f1_micro": 0.9390862944162437, | |
| "eval_f1_weighted": 0.9351383662112599, | |
| "eval_loss": 0.15377455949783325, | |
| "eval_precision_macro": 0.9422812840960479, | |
| "eval_precision_micro": 0.9390862944162437, | |
| "eval_precision_weighted": 0.9420489414415942, | |
| "eval_recall_macro": 0.9346991783726477, | |
| "eval_recall_micro": 0.9390862944162437, | |
| "eval_recall_weighted": 0.9390862944162437, | |
| "eval_runtime": 21.8517, | |
| "eval_samples_per_second": 946.61, | |
| "eval_steps_per_second": 59.172, | |
| "step": 11082 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 11082, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2963923884403200.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |