| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9994000299985002, |
| "eval_steps": 500, |
| "global_step": 4444, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00044997750112494374, |
| "grad_norm": 4.529098245180452, |
| "learning_rate": 2.2471910112359554e-08, |
| "loss": 2.198, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0022498875056247186, |
| "grad_norm": 4.089353425911041, |
| "learning_rate": 1.1235955056179776e-07, |
| "loss": 2.1345, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.004499775011249437, |
| "grad_norm": 3.871692712739612, |
| "learning_rate": 2.247191011235955e-07, |
| "loss": 2.1219, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006749662516874156, |
| "grad_norm": 3.48717161147707, |
| "learning_rate": 3.3707865168539325e-07, |
| "loss": 2.1269, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.008999550022498875, |
| "grad_norm": 2.757793504494978, |
| "learning_rate": 4.49438202247191e-07, |
| "loss": 1.9716, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.011249437528123594, |
| "grad_norm": 1.888825489292899, |
| "learning_rate": 5.617977528089888e-07, |
| "loss": 1.8304, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.013499325033748313, |
| "grad_norm": 1.775898351624132, |
| "learning_rate": 6.741573033707865e-07, |
| "loss": 1.5734, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01574921253937303, |
| "grad_norm": 1.3767992538309424, |
| "learning_rate": 7.865168539325843e-07, |
| "loss": 1.3449, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01799910004499775, |
| "grad_norm": 0.640753938304731, |
| "learning_rate": 8.98876404494382e-07, |
| "loss": 1.033, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.020248987550622467, |
| "grad_norm": 0.5235162453528913, |
| "learning_rate": 1.01123595505618e-06, |
| "loss": 0.8949, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02249887505624719, |
| "grad_norm": 0.5107836396937878, |
| "learning_rate": 1.1235955056179777e-06, |
| "loss": 0.8114, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.024748762561871907, |
| "grad_norm": 0.4706639289305382, |
| "learning_rate": 1.2359550561797752e-06, |
| "loss": 0.7441, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.026998650067496625, |
| "grad_norm": 0.40589783510545635, |
| "learning_rate": 1.348314606741573e-06, |
| "loss": 0.69, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.029248537573121344, |
| "grad_norm": 0.35422872028585745, |
| "learning_rate": 1.4606741573033708e-06, |
| "loss": 0.6444, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03149842507874606, |
| "grad_norm": 0.29411265606808407, |
| "learning_rate": 1.5730337078651686e-06, |
| "loss": 0.6228, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03374831258437078, |
| "grad_norm": 0.26693653586148297, |
| "learning_rate": 1.6853932584269663e-06, |
| "loss": 0.5582, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0359982000899955, |
| "grad_norm": 0.2681533501424792, |
| "learning_rate": 1.797752808988764e-06, |
| "loss": 0.556, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03824808759562022, |
| "grad_norm": 0.25571380427813706, |
| "learning_rate": 1.910112359550562e-06, |
| "loss": 0.5195, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.040497975101244935, |
| "grad_norm": 0.2711200571248154, |
| "learning_rate": 2.02247191011236e-06, |
| "loss": 0.5143, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.042747862606869656, |
| "grad_norm": 0.24950121635592964, |
| "learning_rate": 2.1348314606741574e-06, |
| "loss": 0.494, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04499775011249438, |
| "grad_norm": 0.27150452317695684, |
| "learning_rate": 2.2471910112359554e-06, |
| "loss": 0.4789, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04724763761811909, |
| "grad_norm": 0.28474082113260213, |
| "learning_rate": 2.359550561797753e-06, |
| "loss": 0.4491, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.049497525123743814, |
| "grad_norm": 0.3015594402280247, |
| "learning_rate": 2.4719101123595505e-06, |
| "loss": 0.4403, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.05174741262936853, |
| "grad_norm": 0.30222867324963326, |
| "learning_rate": 2.584269662921349e-06, |
| "loss": 0.4484, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.05399730013499325, |
| "grad_norm": 0.273012018841968, |
| "learning_rate": 2.696629213483146e-06, |
| "loss": 0.4261, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05624718764061797, |
| "grad_norm": 0.26965584672677667, |
| "learning_rate": 2.8089887640449444e-06, |
| "loss": 0.4322, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.05849707514624269, |
| "grad_norm": 0.26746073049580904, |
| "learning_rate": 2.9213483146067416e-06, |
| "loss": 0.4639, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06074696265186741, |
| "grad_norm": 0.23633644420426006, |
| "learning_rate": 3.03370786516854e-06, |
| "loss": 0.4599, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06299685015749212, |
| "grad_norm": 0.23444605189207685, |
| "learning_rate": 3.146067415730337e-06, |
| "loss": 0.3872, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06524673766311684, |
| "grad_norm": 0.2498458749808252, |
| "learning_rate": 3.258426966292135e-06, |
| "loss": 0.4033, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.06749662516874157, |
| "grad_norm": 0.20527616283239267, |
| "learning_rate": 3.3707865168539327e-06, |
| "loss": 0.4029, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06974651267436628, |
| "grad_norm": 0.1929264923191392, |
| "learning_rate": 3.4831460674157306e-06, |
| "loss": 0.3972, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.071996400179991, |
| "grad_norm": 0.2053416646605216, |
| "learning_rate": 3.595505617977528e-06, |
| "loss": 0.4048, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07424628768561573, |
| "grad_norm": 0.21703322812039422, |
| "learning_rate": 3.707865168539326e-06, |
| "loss": 0.4141, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.07649617519124044, |
| "grad_norm": 0.1891382535207117, |
| "learning_rate": 3.820224719101124e-06, |
| "loss": 0.4136, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07874606269686515, |
| "grad_norm": 0.147554774836894, |
| "learning_rate": 3.932584269662922e-06, |
| "loss": 0.375, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08099595020248987, |
| "grad_norm": 0.17905729415958477, |
| "learning_rate": 4.04494382022472e-06, |
| "loss": 0.4017, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0832458377081146, |
| "grad_norm": 0.1505640721404514, |
| "learning_rate": 4.157303370786518e-06, |
| "loss": 0.3805, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08549572521373931, |
| "grad_norm": 0.1583375307295741, |
| "learning_rate": 4.269662921348315e-06, |
| "loss": 0.3782, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.08774561271936403, |
| "grad_norm": 0.18866183228882213, |
| "learning_rate": 4.382022471910113e-06, |
| "loss": 0.4021, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.08999550022498876, |
| "grad_norm": 0.15247202386740694, |
| "learning_rate": 4.494382022471911e-06, |
| "loss": 0.3601, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09224538773061347, |
| "grad_norm": 0.15750805465065473, |
| "learning_rate": 4.606741573033709e-06, |
| "loss": 0.3642, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.09449527523623819, |
| "grad_norm": 0.18964645355569257, |
| "learning_rate": 4.719101123595506e-06, |
| "loss": 0.381, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0967451627418629, |
| "grad_norm": 0.21688318162459963, |
| "learning_rate": 4.831460674157304e-06, |
| "loss": 0.3936, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.09899505024748763, |
| "grad_norm": 0.1626408315890611, |
| "learning_rate": 4.943820224719101e-06, |
| "loss": 0.3836, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.10124493775311234, |
| "grad_norm": 0.15324258338477442, |
| "learning_rate": 5.0561797752809e-06, |
| "loss": 0.3772, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10349482525873706, |
| "grad_norm": 0.14183725887256435, |
| "learning_rate": 5.168539325842698e-06, |
| "loss": 0.372, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10574471276436179, |
| "grad_norm": 0.16218789948449905, |
| "learning_rate": 5.280898876404494e-06, |
| "loss": 0.3571, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.1079946002699865, |
| "grad_norm": 0.1542954767008243, |
| "learning_rate": 5.393258426966292e-06, |
| "loss": 0.3773, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.11024448777561122, |
| "grad_norm": 0.1480650113941815, |
| "learning_rate": 5.50561797752809e-06, |
| "loss": 0.3679, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11249437528123594, |
| "grad_norm": 0.14762694498356746, |
| "learning_rate": 5.617977528089889e-06, |
| "loss": 0.3766, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11474426278686066, |
| "grad_norm": 0.13168710830003866, |
| "learning_rate": 5.730337078651685e-06, |
| "loss": 0.3563, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.11699415029248537, |
| "grad_norm": 0.12994674295453762, |
| "learning_rate": 5.842696629213483e-06, |
| "loss": 0.3835, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.11924403779811009, |
| "grad_norm": 0.15322453794019458, |
| "learning_rate": 5.955056179775281e-06, |
| "loss": 0.3523, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12149392530373482, |
| "grad_norm": 0.14048350395524586, |
| "learning_rate": 6.06741573033708e-06, |
| "loss": 0.3667, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12374381280935953, |
| "grad_norm": 0.12596877364547304, |
| "learning_rate": 6.179775280898876e-06, |
| "loss": 0.3675, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.12599370031498425, |
| "grad_norm": 0.14354283317022593, |
| "learning_rate": 6.292134831460674e-06, |
| "loss": 0.4005, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.12824358782060896, |
| "grad_norm": 0.11630361105168144, |
| "learning_rate": 6.404494382022472e-06, |
| "loss": 0.3747, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13049347532623368, |
| "grad_norm": 0.13228040085041334, |
| "learning_rate": 6.51685393258427e-06, |
| "loss": 0.4077, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13274336283185842, |
| "grad_norm": 0.14738366545686615, |
| "learning_rate": 6.629213483146067e-06, |
| "loss": 0.3998, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.13499325033748313, |
| "grad_norm": 0.12791384531800185, |
| "learning_rate": 6.741573033707865e-06, |
| "loss": 0.3964, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13724313784310785, |
| "grad_norm": 0.14292143339244684, |
| "learning_rate": 6.853932584269663e-06, |
| "loss": 0.3665, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.13949302534873256, |
| "grad_norm": 0.13259028515993362, |
| "learning_rate": 6.966292134831461e-06, |
| "loss": 0.3648, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14174291285435728, |
| "grad_norm": 0.1292871978326336, |
| "learning_rate": 7.078651685393258e-06, |
| "loss": 0.3729, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.143992800359982, |
| "grad_norm": 0.11157572176169035, |
| "learning_rate": 7.191011235955056e-06, |
| "loss": 0.3746, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1462426878656067, |
| "grad_norm": 0.13354661592196232, |
| "learning_rate": 7.303370786516854e-06, |
| "loss": 0.3808, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.14849257537123145, |
| "grad_norm": 0.1306015247259671, |
| "learning_rate": 7.415730337078652e-06, |
| "loss": 0.3581, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15074246287685616, |
| "grad_norm": 0.14070851299418752, |
| "learning_rate": 7.5280898876404495e-06, |
| "loss": 0.3687, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.15299235038248088, |
| "grad_norm": 0.12379734686286958, |
| "learning_rate": 7.640449438202247e-06, |
| "loss": 0.3663, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1552422378881056, |
| "grad_norm": 0.10751947723272416, |
| "learning_rate": 7.752808988764046e-06, |
| "loss": 0.3852, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.1574921253937303, |
| "grad_norm": 0.1540237761883997, |
| "learning_rate": 7.865168539325843e-06, |
| "loss": 0.3717, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.15974201289935502, |
| "grad_norm": 0.11180120270561197, |
| "learning_rate": 7.97752808988764e-06, |
| "loss": 0.3489, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.16199190040497974, |
| "grad_norm": 0.12094951392614195, |
| "learning_rate": 8.08988764044944e-06, |
| "loss": 0.3732, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.16424178791060448, |
| "grad_norm": 0.09738867753978302, |
| "learning_rate": 8.202247191011237e-06, |
| "loss": 0.377, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1664916754162292, |
| "grad_norm": 0.19496877057584272, |
| "learning_rate": 8.314606741573035e-06, |
| "loss": 0.3804, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1687415629218539, |
| "grad_norm": 0.13099392604853372, |
| "learning_rate": 8.426966292134832e-06, |
| "loss": 0.3853, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.17099145042747863, |
| "grad_norm": 0.0929654538179481, |
| "learning_rate": 8.53932584269663e-06, |
| "loss": 0.3532, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17324133793310334, |
| "grad_norm": 0.12166228073135968, |
| "learning_rate": 8.651685393258428e-06, |
| "loss": 0.3629, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.17549122543872805, |
| "grad_norm": 0.10261562058829776, |
| "learning_rate": 8.764044943820226e-06, |
| "loss": 0.3662, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.17774111294435277, |
| "grad_norm": 0.11229652632991165, |
| "learning_rate": 8.876404494382023e-06, |
| "loss": 0.366, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.1799910004499775, |
| "grad_norm": 0.11438088125544643, |
| "learning_rate": 8.988764044943822e-06, |
| "loss": 0.3766, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.18224088795560223, |
| "grad_norm": 0.11100159715996023, |
| "learning_rate": 9.101123595505619e-06, |
| "loss": 0.3149, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.18449077546122694, |
| "grad_norm": 0.09215304432977561, |
| "learning_rate": 9.213483146067417e-06, |
| "loss": 0.3505, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.18674066296685166, |
| "grad_norm": 0.11027407366862503, |
| "learning_rate": 9.325842696629213e-06, |
| "loss": 0.3239, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.18899055047247637, |
| "grad_norm": 0.10591097871093506, |
| "learning_rate": 9.438202247191012e-06, |
| "loss": 0.3763, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19124043797810109, |
| "grad_norm": 0.12014034899424982, |
| "learning_rate": 9.55056179775281e-06, |
| "loss": 0.387, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.1934903254837258, |
| "grad_norm": 0.09470189013776839, |
| "learning_rate": 9.662921348314608e-06, |
| "loss": 0.379, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.19574021298935054, |
| "grad_norm": 0.09501092189291689, |
| "learning_rate": 9.775280898876405e-06, |
| "loss": 0.3642, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.19799010049497526, |
| "grad_norm": 0.11021236791362751, |
| "learning_rate": 9.887640449438202e-06, |
| "loss": 0.353, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.20023998800059997, |
| "grad_norm": 0.10184537613196046, |
| "learning_rate": 1e-05, |
| "loss": 0.365, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.2024898755062247, |
| "grad_norm": 0.08309643827704467, |
| "learning_rate": 9.999961427623602e-06, |
| "loss": 0.3575, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2047397630118494, |
| "grad_norm": 0.10037722707892065, |
| "learning_rate": 9.999845711089533e-06, |
| "loss": 0.3471, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.20698965051747412, |
| "grad_norm": 0.11715533210340626, |
| "learning_rate": 9.999652852183184e-06, |
| "loss": 0.3714, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.20923953802309886, |
| "grad_norm": 0.13060134335041548, |
| "learning_rate": 9.99938285388016e-06, |
| "loss": 0.3635, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.21148942552872357, |
| "grad_norm": 0.08745200641347264, |
| "learning_rate": 9.999035720346254e-06, |
| "loss": 0.3571, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2137393130343483, |
| "grad_norm": 0.08966989980740278, |
| "learning_rate": 9.998611456937373e-06, |
| "loss": 0.3639, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.215989200539973, |
| "grad_norm": 0.09956344072812717, |
| "learning_rate": 9.998110070199454e-06, |
| "loss": 0.3665, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.21823908804559772, |
| "grad_norm": 0.09656500042174225, |
| "learning_rate": 9.997531567868367e-06, |
| "loss": 0.3726, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.22048897555122243, |
| "grad_norm": 0.11853509152316483, |
| "learning_rate": 9.996875958869803e-06, |
| "loss": 0.3518, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.22273886305684715, |
| "grad_norm": 0.10059970891055807, |
| "learning_rate": 9.996143253319113e-06, |
| "loss": 0.3624, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2249887505624719, |
| "grad_norm": 0.10793939615766127, |
| "learning_rate": 9.995333462521178e-06, |
| "loss": 0.3654, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2272386380680966, |
| "grad_norm": 0.10450237535025396, |
| "learning_rate": 9.99444659897022e-06, |
| "loss": 0.3663, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.22948852557372132, |
| "grad_norm": 0.09806526685924745, |
| "learning_rate": 9.993482676349612e-06, |
| "loss": 0.342, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.23173841307934603, |
| "grad_norm": 0.0964872964969597, |
| "learning_rate": 9.992441709531671e-06, |
| "loss": 0.3705, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.23398830058497075, |
| "grad_norm": 0.09412389153440821, |
| "learning_rate": 9.991323714577421e-06, |
| "loss": 0.3541, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.23623818809059546, |
| "grad_norm": 0.08953556655443609, |
| "learning_rate": 9.99012870873635e-06, |
| "loss": 0.3521, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.23848807559622018, |
| "grad_norm": 0.08695345930804899, |
| "learning_rate": 9.988856710446143e-06, |
| "loss": 0.3505, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.24073796310184492, |
| "grad_norm": 0.09543515501943514, |
| "learning_rate": 9.987507739332401e-06, |
| "loss": 0.3766, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.24298785060746964, |
| "grad_norm": 0.08926010106293578, |
| "learning_rate": 9.986081816208333e-06, |
| "loss": 0.329, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.24523773811309435, |
| "grad_norm": 0.08598056512962657, |
| "learning_rate": 9.984578963074436e-06, |
| "loss": 0.3617, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.24748762561871906, |
| "grad_norm": 0.08952811194064599, |
| "learning_rate": 9.982999203118153e-06, |
| "loss": 0.3383, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.24973751312434378, |
| "grad_norm": 0.12658781787185433, |
| "learning_rate": 9.981342560713528e-06, |
| "loss": 0.3238, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.2519874006299685, |
| "grad_norm": 0.09353150867243243, |
| "learning_rate": 9.979609061420812e-06, |
| "loss": 0.3545, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2542372881355932, |
| "grad_norm": 0.09177651257435882, |
| "learning_rate": 9.977798731986079e-06, |
| "loss": 0.3502, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.2564871756412179, |
| "grad_norm": 0.09932686671141468, |
| "learning_rate": 9.975911600340814e-06, |
| "loss": 0.3468, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.25873706314684264, |
| "grad_norm": 0.08192603238892632, |
| "learning_rate": 9.973947695601477e-06, |
| "loss": 0.3324, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.26098695065246735, |
| "grad_norm": 0.07493334234921131, |
| "learning_rate": 9.971907048069058e-06, |
| "loss": 0.3795, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.26323683815809207, |
| "grad_norm": 0.08138918761115761, |
| "learning_rate": 9.969789689228606e-06, |
| "loss": 0.3385, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.26548672566371684, |
| "grad_norm": 0.08838566706763232, |
| "learning_rate": 9.967595651748745e-06, |
| "loss": 0.369, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.26773661316934155, |
| "grad_norm": 0.08784958596018687, |
| "learning_rate": 9.965324969481172e-06, |
| "loss": 0.3169, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.26998650067496627, |
| "grad_norm": 0.0896168468240925, |
| "learning_rate": 9.962977677460132e-06, |
| "loss": 0.3572, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.272236388180591, |
| "grad_norm": 0.0946662419061461, |
| "learning_rate": 9.960553811901879e-06, |
| "loss": 0.385, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.2744862756862157, |
| "grad_norm": 0.12115400639084788, |
| "learning_rate": 9.95805341020411e-06, |
| "loss": 0.3595, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2767361631918404, |
| "grad_norm": 0.0997799833296398, |
| "learning_rate": 9.955476510945401e-06, |
| "loss": 0.3317, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2789860506974651, |
| "grad_norm": 0.0996130660835657, |
| "learning_rate": 9.952823153884606e-06, |
| "loss": 0.3449, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.28123593820308984, |
| "grad_norm": 0.07835665128694007, |
| "learning_rate": 9.950093379960238e-06, |
| "loss": 0.3397, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.28348582570871456, |
| "grad_norm": 0.09391607163130151, |
| "learning_rate": 9.947287231289844e-06, |
| "loss": 0.3776, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.28573571321433927, |
| "grad_norm": 0.09176019196191011, |
| "learning_rate": 9.944404751169353e-06, |
| "loss": 0.3722, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.287985600719964, |
| "grad_norm": 0.08406446635852077, |
| "learning_rate": 9.941445984072408e-06, |
| "loss": 0.338, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2902354882255887, |
| "grad_norm": 0.08291987107240674, |
| "learning_rate": 9.938410975649681e-06, |
| "loss": 0.3742, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.2924853757312134, |
| "grad_norm": 0.08633205314263653, |
| "learning_rate": 9.935299772728166e-06, |
| "loss": 0.3611, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2947352632368382, |
| "grad_norm": 0.07331502093091126, |
| "learning_rate": 9.93211242331046e-06, |
| "loss": 0.3344, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2969851507424629, |
| "grad_norm": 0.08385804833550349, |
| "learning_rate": 9.92884897657402e-06, |
| "loss": 0.3557, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2992350382480876, |
| "grad_norm": 0.07435080817645906, |
| "learning_rate": 9.925509482870403e-06, |
| "loss": 0.3405, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.30148492575371233, |
| "grad_norm": 0.08168530188324026, |
| "learning_rate": 9.922093993724492e-06, |
| "loss": 0.3426, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.30373481325933704, |
| "grad_norm": 0.08035820133808234, |
| "learning_rate": 9.918602561833702e-06, |
| "loss": 0.3604, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.30598470076496176, |
| "grad_norm": 0.08271219072869937, |
| "learning_rate": 9.91503524106716e-06, |
| "loss": 0.348, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3082345882705865, |
| "grad_norm": 0.08921123968472987, |
| "learning_rate": 9.911392086464886e-06, |
| "loss": 0.3441, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.3104844757762112, |
| "grad_norm": 0.0839985353132867, |
| "learning_rate": 9.907673154236929e-06, |
| "loss": 0.3574, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3127343632818359, |
| "grad_norm": 0.10386954688768853, |
| "learning_rate": 9.903878501762511e-06, |
| "loss": 0.3286, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3149842507874606, |
| "grad_norm": 0.08778681814263677, |
| "learning_rate": 9.900008187589138e-06, |
| "loss": 0.3268, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.31723413829308533, |
| "grad_norm": 0.09027807015137441, |
| "learning_rate": 9.896062271431697e-06, |
| "loss": 0.3392, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.31948402579871005, |
| "grad_norm": 0.09567803807106381, |
| "learning_rate": 9.89204081417153e-06, |
| "loss": 0.3539, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.32173391330433476, |
| "grad_norm": 0.08574167684815145, |
| "learning_rate": 9.887943877855505e-06, |
| "loss": 0.3377, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.3239838008099595, |
| "grad_norm": 0.09260863383057749, |
| "learning_rate": 9.883771525695052e-06, |
| "loss": 0.3449, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.32623368831558425, |
| "grad_norm": 0.08495447140601177, |
| "learning_rate": 9.879523822065181e-06, |
| "loss": 0.3219, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.32848357582120896, |
| "grad_norm": 0.07533141152453762, |
| "learning_rate": 9.875200832503505e-06, |
| "loss": 0.3568, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3307334633268337, |
| "grad_norm": 0.12247315370054979, |
| "learning_rate": 9.870802623709215e-06, |
| "loss": 0.3596, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3329833508324584, |
| "grad_norm": 0.08738003894579985, |
| "learning_rate": 9.866329263542055e-06, |
| "loss": 0.3638, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3352332383380831, |
| "grad_norm": 0.08338816245916761, |
| "learning_rate": 9.861780821021282e-06, |
| "loss": 0.3561, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.3374831258437078, |
| "grad_norm": 0.08236575366096931, |
| "learning_rate": 9.857157366324587e-06, |
| "loss": 0.3332, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.33973301334933254, |
| "grad_norm": 0.06944440484574142, |
| "learning_rate": 9.852458970787027e-06, |
| "loss": 0.357, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.34198290085495725, |
| "grad_norm": 0.07253573063652108, |
| "learning_rate": 9.847685706899913e-06, |
| "loss": 0.3245, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.34423278836058196, |
| "grad_norm": 0.07122505571988245, |
| "learning_rate": 9.842837648309698e-06, |
| "loss": 0.3528, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.3464826758662067, |
| "grad_norm": 0.07767571294888054, |
| "learning_rate": 9.837914869816835e-06, |
| "loss": 0.3395, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3487325633718314, |
| "grad_norm": 0.07346709829835463, |
| "learning_rate": 9.832917447374637e-06, |
| "loss": 0.3648, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.3509824508774561, |
| "grad_norm": 0.08947827115200468, |
| "learning_rate": 9.827845458088082e-06, |
| "loss": 0.3521, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3532323383830808, |
| "grad_norm": 0.06534661518603589, |
| "learning_rate": 9.822698980212643e-06, |
| "loss": 0.3366, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.35548222588870554, |
| "grad_norm": 0.08012548677368805, |
| "learning_rate": 9.817478093153074e-06, |
| "loss": 0.3752, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3577321133943303, |
| "grad_norm": 0.08284074693974608, |
| "learning_rate": 9.812182877462182e-06, |
| "loss": 0.3337, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.359982000899955, |
| "grad_norm": 0.0962415375604297, |
| "learning_rate": 9.806813414839588e-06, |
| "loss": 0.3489, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.36223188840557974, |
| "grad_norm": 0.08299125800356132, |
| "learning_rate": 9.801369788130468e-06, |
| "loss": 0.3466, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.36448177591120445, |
| "grad_norm": 0.07196876922608039, |
| "learning_rate": 9.795852081324266e-06, |
| "loss": 0.3424, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.36673166341682917, |
| "grad_norm": 0.07251364573785335, |
| "learning_rate": 9.79026037955341e-06, |
| "loss": 0.3578, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.3689815509224539, |
| "grad_norm": 0.07779557103393991, |
| "learning_rate": 9.784594769091989e-06, |
| "loss": 0.3616, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3712314384280786, |
| "grad_norm": 0.07434071832631806, |
| "learning_rate": 9.778855337354426e-06, |
| "loss": 0.3572, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.3734813259337033, |
| "grad_norm": 0.0761276852235193, |
| "learning_rate": 9.77304217289413e-06, |
| "loss": 0.3147, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.375731213439328, |
| "grad_norm": 0.08043122828466166, |
| "learning_rate": 9.76715536540213e-06, |
| "loss": 0.377, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.37798110094495274, |
| "grad_norm": 0.07418765173136689, |
| "learning_rate": 9.761195005705685e-06, |
| "loss": 0.3198, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.38023098845057746, |
| "grad_norm": 0.08536316659010101, |
| "learning_rate": 9.755161185766891e-06, |
| "loss": 0.3324, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.38248087595620217, |
| "grad_norm": 0.07947600210593922, |
| "learning_rate": 9.74905399868126e-06, |
| "loss": 0.3618, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.3847307634618269, |
| "grad_norm": 0.0744113590241544, |
| "learning_rate": 9.742873538676274e-06, |
| "loss": 0.3402, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.3869806509674516, |
| "grad_norm": 0.06709744864423575, |
| "learning_rate": 9.73661990110995e-06, |
| "loss": 0.3337, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.38923053847307637, |
| "grad_norm": 0.07721094678627155, |
| "learning_rate": 9.73029318246935e-06, |
| "loss": 0.3473, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3914804259787011, |
| "grad_norm": 0.07436794628188735, |
| "learning_rate": 9.723893480369106e-06, |
| "loss": 0.3227, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3937303134843258, |
| "grad_norm": 0.08184087425329187, |
| "learning_rate": 9.717420893549902e-06, |
| "loss": 0.3271, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.3959802009899505, |
| "grad_norm": 0.07323707936362174, |
| "learning_rate": 9.71087552187696e-06, |
| "loss": 0.3353, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.39823008849557523, |
| "grad_norm": 0.07407984516514123, |
| "learning_rate": 9.7042574663385e-06, |
| "loss": 0.3405, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.40047997600119994, |
| "grad_norm": 0.06705082859053621, |
| "learning_rate": 9.697566829044172e-06, |
| "loss": 0.3335, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.40272986350682466, |
| "grad_norm": 0.06417105200135667, |
| "learning_rate": 9.690803713223485e-06, |
| "loss": 0.3632, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.4049797510124494, |
| "grad_norm": 0.07661580482483403, |
| "learning_rate": 9.68396822322422e-06, |
| "loss": 0.341, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4072296385180741, |
| "grad_norm": 0.07783982481846635, |
| "learning_rate": 9.677060464510817e-06, |
| "loss": 0.3422, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4094795260236988, |
| "grad_norm": 0.07934781483289755, |
| "learning_rate": 9.670080543662742e-06, |
| "loss": 0.344, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4117294135293235, |
| "grad_norm": 0.07206722738626223, |
| "learning_rate": 9.663028568372845e-06, |
| "loss": 0.3563, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.41397930103494823, |
| "grad_norm": 0.06767347411319052, |
| "learning_rate": 9.655904647445711e-06, |
| "loss": 0.3231, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.41622918854057295, |
| "grad_norm": 0.07180782228261029, |
| "learning_rate": 9.64870889079596e-06, |
| "loss": 0.3287, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.4184790760461977, |
| "grad_norm": 0.07242610923174227, |
| "learning_rate": 9.641441409446563e-06, |
| "loss": 0.3487, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.42072896355182243, |
| "grad_norm": 0.06832390188318747, |
| "learning_rate": 9.634102315527136e-06, |
| "loss": 0.325, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.42297885105744715, |
| "grad_norm": 0.07856703769371849, |
| "learning_rate": 9.626691722272193e-06, |
| "loss": 0.3458, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.42522873856307186, |
| "grad_norm": 0.0663937348509602, |
| "learning_rate": 9.61920974401941e-06, |
| "loss": 0.3513, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.4274786260686966, |
| "grad_norm": 0.07114607462059036, |
| "learning_rate": 9.611656496207861e-06, |
| "loss": 0.3474, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.4297285135743213, |
| "grad_norm": 0.07603014864007235, |
| "learning_rate": 9.604032095376234e-06, |
| "loss": 0.3362, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.431978401079946, |
| "grad_norm": 0.0734531353849079, |
| "learning_rate": 9.596336659161031e-06, |
| "loss": 0.3445, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.4342282885855707, |
| "grad_norm": 0.061596458285852376, |
| "learning_rate": 9.588570306294759e-06, |
| "loss": 0.3453, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.43647817609119544, |
| "grad_norm": 0.05885162798568731, |
| "learning_rate": 9.58073315660409e-06, |
| "loss": 0.3439, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.43872806359682015, |
| "grad_norm": 0.07082727968014366, |
| "learning_rate": 9.57282533100802e-06, |
| "loss": 0.3395, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.44097795110244487, |
| "grad_norm": 0.07316435404238263, |
| "learning_rate": 9.564846951515997e-06, |
| "loss": 0.3304, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4432278386080696, |
| "grad_norm": 0.07444841963108913, |
| "learning_rate": 9.55679814122605e-06, |
| "loss": 0.3298, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.4454777261136943, |
| "grad_norm": 0.07294271191699972, |
| "learning_rate": 9.548679024322866e-06, |
| "loss": 0.3463, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.447727613619319, |
| "grad_norm": 0.07031942249727262, |
| "learning_rate": 9.540489726075907e-06, |
| "loss": 0.3486, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.4499775011249438, |
| "grad_norm": 0.07151326035389519, |
| "learning_rate": 9.532230372837446e-06, |
| "loss": 0.3537, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4522273886305685, |
| "grad_norm": 0.0671028535664748, |
| "learning_rate": 9.523901092040634e-06, |
| "loss": 0.3455, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.4544772761361932, |
| "grad_norm": 0.07197014184781744, |
| "learning_rate": 9.51550201219754e-06, |
| "loss": 0.3432, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.4567271636418179, |
| "grad_norm": 0.07169196920459484, |
| "learning_rate": 9.507033262897142e-06, |
| "loss": 0.31, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.45897705114744264, |
| "grad_norm": 0.07109226686317548, |
| "learning_rate": 9.498494974803362e-06, |
| "loss": 0.3663, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.46122693865306735, |
| "grad_norm": 0.05804652011529642, |
| "learning_rate": 9.489887279653023e-06, |
| "loss": 0.3194, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.46347682615869207, |
| "grad_norm": 0.0700778438901929, |
| "learning_rate": 9.481210310253826e-06, |
| "loss": 0.3167, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.4657267136643168, |
| "grad_norm": 0.06244080013341172, |
| "learning_rate": 9.472464200482303e-06, |
| "loss": 0.3127, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.4679766011699415, |
| "grad_norm": 0.06903401204251029, |
| "learning_rate": 9.463649085281752e-06, |
| "loss": 0.3259, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.4702264886755662, |
| "grad_norm": 0.07317408098224049, |
| "learning_rate": 9.454765100660144e-06, |
| "loss": 0.3446, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.4724763761811909, |
| "grad_norm": 0.06487603568640564, |
| "learning_rate": 9.445812383688046e-06, |
| "loss": 0.3418, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.47472626368681564, |
| "grad_norm": 0.06587470603877191, |
| "learning_rate": 9.43679107249648e-06, |
| "loss": 0.3473, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.47697615119244036, |
| "grad_norm": 0.07107259617908306, |
| "learning_rate": 9.427701306274812e-06, |
| "loss": 0.337, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.47922603869806507, |
| "grad_norm": 0.06697594936792645, |
| "learning_rate": 9.418543225268598e-06, |
| "loss": 0.3429, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.48147592620368984, |
| "grad_norm": 0.0682858638376316, |
| "learning_rate": 9.40931697077741e-06, |
| "loss": 0.3358, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.48372581370931456, |
| "grad_norm": 0.07619891304792806, |
| "learning_rate": 9.400022685152683e-06, |
| "loss": 0.3333, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.48597570121493927, |
| "grad_norm": 0.07522989171574869, |
| "learning_rate": 9.390660511795481e-06, |
| "loss": 0.3587, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.488225588720564, |
| "grad_norm": 0.07244707737339262, |
| "learning_rate": 9.381230595154319e-06, |
| "loss": 0.3386, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.4904754762261887, |
| "grad_norm": 0.0747628006572659, |
| "learning_rate": 9.371733080722911e-06, |
| "loss": 0.3457, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.4927253637318134, |
| "grad_norm": 0.0687511407497147, |
| "learning_rate": 9.362168115037942e-06, |
| "loss": 0.3433, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.49497525123743813, |
| "grad_norm": 0.07512269519367433, |
| "learning_rate": 9.352535845676791e-06, |
| "loss": 0.3219, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.49722513874306284, |
| "grad_norm": 0.07246031317089945, |
| "learning_rate": 9.342836421255268e-06, |
| "loss": 0.322, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.49947502624868756, |
| "grad_norm": 0.07594536131369899, |
| "learning_rate": 9.333069991425313e-06, |
| "loss": 0.3589, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5017249137543123, |
| "grad_norm": 0.06689469633356987, |
| "learning_rate": 9.323236706872685e-06, |
| "loss": 0.357, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.503974801259937, |
| "grad_norm": 0.06930147639704634, |
| "learning_rate": 9.31333671931465e-06, |
| "loss": 0.3263, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5062246887655617, |
| "grad_norm": 0.06133351525533005, |
| "learning_rate": 9.303370181497623e-06, |
| "loss": 0.3422, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5084745762711864, |
| "grad_norm": 0.07604072880215484, |
| "learning_rate": 9.293337247194827e-06, |
| "loss": 0.359, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5107244637768111, |
| "grad_norm": 0.06955762934734898, |
| "learning_rate": 9.283238071203907e-06, |
| "loss": 0.3439, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5129743512824358, |
| "grad_norm": 0.0803346877614296, |
| "learning_rate": 9.27307280934455e-06, |
| "loss": 0.3471, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5152242387880606, |
| "grad_norm": 0.06622678263367843, |
| "learning_rate": 9.26284161845608e-06, |
| "loss": 0.3427, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.5174741262936853, |
| "grad_norm": 0.0709676185753263, |
| "learning_rate": 9.252544656395033e-06, |
| "loss": 0.3363, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.51972401379931, |
| "grad_norm": 0.08282162237795766, |
| "learning_rate": 9.242182082032729e-06, |
| "loss": 0.341, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.5219739013049347, |
| "grad_norm": 0.06233679346455434, |
| "learning_rate": 9.231754055252817e-06, |
| "loss": 0.3308, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5242237888105594, |
| "grad_norm": 0.05949370637515577, |
| "learning_rate": 9.221260736948803e-06, |
| "loss": 0.3254, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.5264736763161841, |
| "grad_norm": 0.06102505633204194, |
| "learning_rate": 9.21070228902158e-06, |
| "loss": 0.327, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.528723563821809, |
| "grad_norm": 0.07096293590033853, |
| "learning_rate": 9.200078874376917e-06, |
| "loss": 0.3309, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.5309734513274337, |
| "grad_norm": 0.06374179754335971, |
| "learning_rate": 9.189390656922955e-06, |
| "loss": 0.3579, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5332233388330584, |
| "grad_norm": 0.09643830344296066, |
| "learning_rate": 9.17863780156767e-06, |
| "loss": 0.3466, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.5354732263386831, |
| "grad_norm": 0.0652384061049577, |
| "learning_rate": 9.167820474216337e-06, |
| "loss": 0.3523, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5377231138443078, |
| "grad_norm": 0.06430574295906281, |
| "learning_rate": 9.156938841768965e-06, |
| "loss": 0.3722, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.5399730013499325, |
| "grad_norm": 0.07648802804062793, |
| "learning_rate": 9.145993072117724e-06, |
| "loss": 0.321, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5422228888555573, |
| "grad_norm": 0.06775418329662553, |
| "learning_rate": 9.134983334144352e-06, |
| "loss": 0.3549, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.544472776361182, |
| "grad_norm": 0.076334857238285, |
| "learning_rate": 9.123909797717551e-06, |
| "loss": 0.335, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5467226638668067, |
| "grad_norm": 0.06576432515389055, |
| "learning_rate": 9.112772633690368e-06, |
| "loss": 0.3239, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.5489725513724314, |
| "grad_norm": 0.06872638373228167, |
| "learning_rate": 9.101572013897555e-06, |
| "loss": 0.3141, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5512224388780561, |
| "grad_norm": 0.06158733598122966, |
| "learning_rate": 9.090308111152924e-06, |
| "loss": 0.3221, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.5534723263836808, |
| "grad_norm": 0.08097819934773681, |
| "learning_rate": 9.07898109924667e-06, |
| "loss": 0.3151, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5557222138893055, |
| "grad_norm": 0.06764912622152554, |
| "learning_rate": 9.067591152942701e-06, |
| "loss": 0.3332, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.5579721013949303, |
| "grad_norm": 0.07314176615388208, |
| "learning_rate": 9.056138447975936e-06, |
| "loss": 0.3415, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.560221988900555, |
| "grad_norm": 0.0717387579544613, |
| "learning_rate": 9.044623161049594e-06, |
| "loss": 0.3386, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.5624718764061797, |
| "grad_norm": 0.07552097065323739, |
| "learning_rate": 9.033045469832467e-06, |
| "loss": 0.3569, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5647217639118044, |
| "grad_norm": 0.06915693480180615, |
| "learning_rate": 9.02140555295618e-06, |
| "loss": 0.3222, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.5669716514174291, |
| "grad_norm": 0.07769020322155092, |
| "learning_rate": 9.009703590012434e-06, |
| "loss": 0.3185, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5692215389230538, |
| "grad_norm": 0.07598860570344396, |
| "learning_rate": 8.997939761550239e-06, |
| "loss": 0.3522, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.5714714264286785, |
| "grad_norm": 0.07073748495565614, |
| "learning_rate": 8.986114249073122e-06, |
| "loss": 0.3169, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.5737213139343033, |
| "grad_norm": 0.06866551274687982, |
| "learning_rate": 8.97422723503633e-06, |
| "loss": 0.3304, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.575971201439928, |
| "grad_norm": 0.07075202015965712, |
| "learning_rate": 8.962278902844016e-06, |
| "loss": 0.3309, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5782210889455527, |
| "grad_norm": 0.07165184953921011, |
| "learning_rate": 8.950269436846405e-06, |
| "loss": 0.331, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.5804709764511774, |
| "grad_norm": 0.06433134595791733, |
| "learning_rate": 8.938199022336956e-06, |
| "loss": 0.328, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5827208639568021, |
| "grad_norm": 0.07003765990675229, |
| "learning_rate": 8.926067845549495e-06, |
| "loss": 0.3297, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.5849707514624268, |
| "grad_norm": 0.06653035126789796, |
| "learning_rate": 8.913876093655351e-06, |
| "loss": 0.335, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5872206389680515, |
| "grad_norm": 0.06847091877632593, |
| "learning_rate": 8.90162395476046e-06, |
| "loss": 0.3279, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.5894705264736764, |
| "grad_norm": 0.06903452581161729, |
| "learning_rate": 8.889311617902468e-06, |
| "loss": 0.3229, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5917204139793011, |
| "grad_norm": 0.09341128215879058, |
| "learning_rate": 8.876939273047813e-06, |
| "loss": 0.299, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.5939703014849258, |
| "grad_norm": 0.07278846497904187, |
| "learning_rate": 8.86450711108879e-06, |
| "loss": 0.3226, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.5962201889905505, |
| "grad_norm": 0.06262188507904164, |
| "learning_rate": 8.85201532384061e-06, |
| "loss": 0.3133, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.5984700764961752, |
| "grad_norm": 0.06880573696178596, |
| "learning_rate": 8.839464104038445e-06, |
| "loss": 0.2962, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.6007199640017999, |
| "grad_norm": 0.06779234501270573, |
| "learning_rate": 8.826853645334441e-06, |
| "loss": 0.3124, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.6029698515074247, |
| "grad_norm": 0.06187208772150342, |
| "learning_rate": 8.814184142294744e-06, |
| "loss": 0.315, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6052197390130494, |
| "grad_norm": 0.07896028281919827, |
| "learning_rate": 8.80145579039649e-06, |
| "loss": 0.3432, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.6074696265186741, |
| "grad_norm": 0.0751755215796296, |
| "learning_rate": 8.78866878602479e-06, |
| "loss": 0.3239, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6097195140242988, |
| "grad_norm": 0.07156093608605772, |
| "learning_rate": 8.775823326469703e-06, |
| "loss": 0.337, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.6119694015299235, |
| "grad_norm": 0.0727876886960586, |
| "learning_rate": 8.76291960992319e-06, |
| "loss": 0.3737, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.6142192890355482, |
| "grad_norm": 0.07239709772207241, |
| "learning_rate": 8.749957835476053e-06, |
| "loss": 0.333, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.616469176541173, |
| "grad_norm": 0.07320801736790428, |
| "learning_rate": 8.736938203114872e-06, |
| "loss": 0.344, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.6187190640467977, |
| "grad_norm": 0.06549770224319154, |
| "learning_rate": 8.72386091371891e-06, |
| "loss": 0.3006, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.6209689515524224, |
| "grad_norm": 0.07264319355187582, |
| "learning_rate": 8.710726169057018e-06, |
| "loss": 0.3173, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6232188390580471, |
| "grad_norm": 0.07992193946978773, |
| "learning_rate": 8.697534171784523e-06, |
| "loss": 0.3467, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.6254687265636718, |
| "grad_norm": 0.06707456122943496, |
| "learning_rate": 8.684285125440099e-06, |
| "loss": 0.3297, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6277186140692965, |
| "grad_norm": 0.06483948438605809, |
| "learning_rate": 8.670979234442624e-06, |
| "loss": 0.3349, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.6299685015749212, |
| "grad_norm": 0.0705394359218232, |
| "learning_rate": 8.657616704088037e-06, |
| "loss": 0.33, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.632218389080546, |
| "grad_norm": 0.07985592647370479, |
| "learning_rate": 8.644197740546153e-06, |
| "loss": 0.3605, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.6344682765861707, |
| "grad_norm": 0.08150029983078208, |
| "learning_rate": 8.630722550857503e-06, |
| "loss": 0.3363, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6367181640917954, |
| "grad_norm": 0.07286616055279489, |
| "learning_rate": 8.617191342930118e-06, |
| "loss": 0.3441, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.6389680515974201, |
| "grad_norm": 0.06834780355739174, |
| "learning_rate": 8.603604325536338e-06, |
| "loss": 0.3298, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6412179391030448, |
| "grad_norm": 0.06360740971285378, |
| "learning_rate": 8.589961708309582e-06, |
| "loss": 0.308, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.6434678266086695, |
| "grad_norm": 0.06387011586281786, |
| "learning_rate": 8.576263701741115e-06, |
| "loss": 0.3102, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.6457177141142942, |
| "grad_norm": 0.059287022702283844, |
| "learning_rate": 8.562510517176807e-06, |
| "loss": 0.333, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.647967601619919, |
| "grad_norm": 0.07076357545448068, |
| "learning_rate": 8.54870236681386e-06, |
| "loss": 0.3376, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6502174891255437, |
| "grad_norm": 0.08080237359735847, |
| "learning_rate": 8.534839463697541e-06, |
| "loss": 0.344, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.6524673766311685, |
| "grad_norm": 0.07301903865415799, |
| "learning_rate": 8.520922021717903e-06, |
| "loss": 0.3236, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6547172641367932, |
| "grad_norm": 0.06408247558471158, |
| "learning_rate": 8.506950255606466e-06, |
| "loss": 0.3119, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.6569671516424179, |
| "grad_norm": 0.07142979546900464, |
| "learning_rate": 8.492924380932919e-06, |
| "loss": 0.3235, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.6592170391480426, |
| "grad_norm": 0.06623934823982494, |
| "learning_rate": 8.478844614101792e-06, |
| "loss": 0.3127, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.6614669266536674, |
| "grad_norm": 0.064051288527217, |
| "learning_rate": 8.464711172349105e-06, |
| "loss": 0.3408, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.6637168141592921, |
| "grad_norm": 0.07268808159369747, |
| "learning_rate": 8.450524273739036e-06, |
| "loss": 0.3406, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.6659667016649168, |
| "grad_norm": 0.06547939077675495, |
| "learning_rate": 8.436284137160544e-06, |
| "loss": 0.3404, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.6682165891705415, |
| "grad_norm": 0.11515221274329139, |
| "learning_rate": 8.421990982323988e-06, |
| "loss": 0.3342, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.6704664766761662, |
| "grad_norm": 0.08213057865060075, |
| "learning_rate": 8.407645029757752e-06, |
| "loss": 0.3631, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.6727163641817909, |
| "grad_norm": 0.07445316775297253, |
| "learning_rate": 8.393246500804825e-06, |
| "loss": 0.362, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.6749662516874156, |
| "grad_norm": 0.0715773585848479, |
| "learning_rate": 8.3787956176194e-06, |
| "loss": 0.3377, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6772161391930404, |
| "grad_norm": 0.07245667708706742, |
| "learning_rate": 8.36429260316344e-06, |
| "loss": 0.2967, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.6794660266986651, |
| "grad_norm": 0.07191851967760118, |
| "learning_rate": 8.349737681203234e-06, |
| "loss": 0.3447, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.6817159142042898, |
| "grad_norm": 0.06475853768493092, |
| "learning_rate": 8.335131076305958e-06, |
| "loss": 0.3339, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.6839658017099145, |
| "grad_norm": 0.07263158877410257, |
| "learning_rate": 8.320473013836197e-06, |
| "loss": 0.3074, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.6862156892155392, |
| "grad_norm": 0.05942603663221257, |
| "learning_rate": 8.305763719952467e-06, |
| "loss": 0.2997, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.6884655767211639, |
| "grad_norm": 0.07938168227761808, |
| "learning_rate": 8.29100342160374e-06, |
| "loss": 0.3122, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.6907154642267886, |
| "grad_norm": 0.07999223664242092, |
| "learning_rate": 8.27619234652593e-06, |
| "loss": 0.3138, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.6929653517324134, |
| "grad_norm": 0.06996270561203156, |
| "learning_rate": 8.261330723238381e-06, |
| "loss": 0.3321, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.6952152392380381, |
| "grad_norm": 0.09647113986832291, |
| "learning_rate": 8.246418781040345e-06, |
| "loss": 0.3269, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.6974651267436628, |
| "grad_norm": 0.07974144611519904, |
| "learning_rate": 8.231456750007436e-06, |
| "loss": 0.309, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.6997150142492875, |
| "grad_norm": 0.06944041746000827, |
| "learning_rate": 8.216444860988098e-06, |
| "loss": 0.3347, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.7019649017549122, |
| "grad_norm": 0.06697853007490644, |
| "learning_rate": 8.20138334560002e-06, |
| "loss": 0.3432, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.7042147892605369, |
| "grad_norm": 0.09163411149931353, |
| "learning_rate": 8.18627243622658e-06, |
| "loss": 0.3294, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.7064646767661616, |
| "grad_norm": 0.06745466757701833, |
| "learning_rate": 8.171112366013252e-06, |
| "loss": 0.3382, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.7087145642717864, |
| "grad_norm": 0.06524545139947452, |
| "learning_rate": 8.155903368864008e-06, |
| "loss": 0.2894, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.7109644517774111, |
| "grad_norm": 0.07357575023935092, |
| "learning_rate": 8.140645679437713e-06, |
| "loss": 0.345, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7132143392830359, |
| "grad_norm": 0.07032356069075725, |
| "learning_rate": 8.125339533144507e-06, |
| "loss": 0.3497, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.7154642267886606, |
| "grad_norm": 0.07305825316899144, |
| "learning_rate": 8.109985166142161e-06, |
| "loss": 0.3223, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.7177141142942853, |
| "grad_norm": 0.07026921859976491, |
| "learning_rate": 8.09458281533244e-06, |
| "loss": 0.3271, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.71996400179991, |
| "grad_norm": 0.0783084169696169, |
| "learning_rate": 8.079132718357465e-06, |
| "loss": 0.311, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7222138893055348, |
| "grad_norm": 0.06977970059586212, |
| "learning_rate": 8.063635113596006e-06, |
| "loss": 0.3114, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.7244637768111595, |
| "grad_norm": 0.06695382649927473, |
| "learning_rate": 8.048090240159849e-06, |
| "loss": 0.3186, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.7267136643167842, |
| "grad_norm": 0.07382767142740718, |
| "learning_rate": 8.032498337890073e-06, |
| "loss": 0.3115, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.7289635518224089, |
| "grad_norm": 0.0847147954522355, |
| "learning_rate": 8.01685964735337e-06, |
| "loss": 0.3313, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7312134393280336, |
| "grad_norm": 0.08710412831256738, |
| "learning_rate": 8.00117440983832e-06, |
| "loss": 0.3129, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.7334633268336583, |
| "grad_norm": 0.07163605298015002, |
| "learning_rate": 7.985442867351682e-06, |
| "loss": 0.3197, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.735713214339283, |
| "grad_norm": 0.08693479896494097, |
| "learning_rate": 7.969665262614642e-06, |
| "loss": 0.3584, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.7379631018449078, |
| "grad_norm": 0.07181692085074703, |
| "learning_rate": 7.953841839059086e-06, |
| "loss": 0.3024, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7402129893505325, |
| "grad_norm": 0.06762138099885763, |
| "learning_rate": 7.937972840823836e-06, |
| "loss": 0.3393, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.7424628768561572, |
| "grad_norm": 0.06579548855422006, |
| "learning_rate": 7.922058512750876e-06, |
| "loss": 0.3415, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7447127643617819, |
| "grad_norm": 0.06780731913871438, |
| "learning_rate": 7.90609910038159e-06, |
| "loss": 0.326, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.7469626518674066, |
| "grad_norm": 0.07345224322730477, |
| "learning_rate": 7.890094849952964e-06, |
| "loss": 0.3579, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7492125393730313, |
| "grad_norm": 0.07643898702300285, |
| "learning_rate": 7.874046008393783e-06, |
| "loss": 0.3215, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.751462426878656, |
| "grad_norm": 0.08111641352223722, |
| "learning_rate": 7.857952823320833e-06, |
| "loss": 0.3396, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.7537123143842808, |
| "grad_norm": 0.06433102937848656, |
| "learning_rate": 7.84181554303507e-06, |
| "loss": 0.3229, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.7559622018899055, |
| "grad_norm": 0.07066161687549372, |
| "learning_rate": 7.825634416517793e-06, |
| "loss": 0.3168, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.7582120893955302, |
| "grad_norm": 0.07761037252783486, |
| "learning_rate": 7.809409693426803e-06, |
| "loss": 0.345, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.7604619769011549, |
| "grad_norm": 0.07514558565636438, |
| "learning_rate": 7.793141624092551e-06, |
| "loss": 0.3423, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.7627118644067796, |
| "grad_norm": 0.08138341842898199, |
| "learning_rate": 7.776830459514275e-06, |
| "loss": 0.3153, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.7649617519124043, |
| "grad_norm": 0.07657999183778645, |
| "learning_rate": 7.760476451356123e-06, |
| "loss": 0.3568, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7672116394180291, |
| "grad_norm": 0.08932610854441203, |
| "learning_rate": 7.744079851943286e-06, |
| "loss": 0.3045, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.7694615269236538, |
| "grad_norm": 0.07788581856311123, |
| "learning_rate": 7.727640914258076e-06, |
| "loss": 0.322, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.7717114144292785, |
| "grad_norm": 0.07234842557571529, |
| "learning_rate": 7.711159891936059e-06, |
| "loss": 0.301, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.7739613019349032, |
| "grad_norm": 0.08103908033954604, |
| "learning_rate": 7.694637039262109e-06, |
| "loss": 0.2934, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.776211189440528, |
| "grad_norm": 0.08033609793206774, |
| "learning_rate": 7.678072611166503e-06, |
| "loss": 0.3281, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.7784610769461527, |
| "grad_norm": 0.08682473308042656, |
| "learning_rate": 7.661466863220982e-06, |
| "loss": 0.3377, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.7807109644517775, |
| "grad_norm": 0.07975724023981283, |
| "learning_rate": 7.644820051634813e-06, |
| "loss": 0.3312, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.7829608519574022, |
| "grad_norm": 0.08318610850328363, |
| "learning_rate": 7.628132433250828e-06, |
| "loss": 0.318, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.7852107394630269, |
| "grad_norm": 0.07470444210188223, |
| "learning_rate": 7.611404265541464e-06, |
| "loss": 0.3166, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.7874606269686516, |
| "grad_norm": 0.07633984339680623, |
| "learning_rate": 7.594635806604797e-06, |
| "loss": 0.3068, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7897105144742763, |
| "grad_norm": 0.08519611137288997, |
| "learning_rate": 7.57782731516055e-06, |
| "loss": 0.3465, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.791960401979901, |
| "grad_norm": 0.07125104461336126, |
| "learning_rate": 7.560979050546103e-06, |
| "loss": 0.311, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.7942102894855257, |
| "grad_norm": 0.08460045098046377, |
| "learning_rate": 7.544091272712501e-06, |
| "loss": 0.3036, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.7964601769911505, |
| "grad_norm": 0.07731671038628908, |
| "learning_rate": 7.527164242220434e-06, |
| "loss": 0.3214, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.7987100644967752, |
| "grad_norm": 0.07618452283812552, |
| "learning_rate": 7.510198220236217e-06, |
| "loss": 0.3412, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.8009599520023999, |
| "grad_norm": 0.08122249298530079, |
| "learning_rate": 7.493193468527764e-06, |
| "loss": 0.3129, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.8032098395080246, |
| "grad_norm": 0.08390625774458342, |
| "learning_rate": 7.476150249460549e-06, |
| "loss": 0.3168, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.8054597270136493, |
| "grad_norm": 0.07518471851900174, |
| "learning_rate": 7.4590688259935554e-06, |
| "loss": 0.331, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.807709614519274, |
| "grad_norm": 0.07627971467235234, |
| "learning_rate": 7.441949461675223e-06, |
| "loss": 0.3471, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.8099595020248987, |
| "grad_norm": 0.08879967466572108, |
| "learning_rate": 7.424792420639377e-06, |
| "loss": 0.323, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8122093895305235, |
| "grad_norm": 0.0858174200658171, |
| "learning_rate": 7.407597967601155e-06, |
| "loss": 0.3284, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.8144592770361482, |
| "grad_norm": 0.08665127583082709, |
| "learning_rate": 7.390366367852923e-06, |
| "loss": 0.3217, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.8167091645417729, |
| "grad_norm": 0.08001080258785544, |
| "learning_rate": 7.3730978872601825e-06, |
| "loss": 0.3248, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.8189590520473976, |
| "grad_norm": 0.07815794847284734, |
| "learning_rate": 7.355792792257463e-06, |
| "loss": 0.3124, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.8212089395530223, |
| "grad_norm": 0.0869139056537896, |
| "learning_rate": 7.338451349844225e-06, |
| "loss": 0.323, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.823458827058647, |
| "grad_norm": 0.09766019302119812, |
| "learning_rate": 7.3210738275807225e-06, |
| "loss": 0.3332, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8257087145642718, |
| "grad_norm": 0.08508749834617443, |
| "learning_rate": 7.303660493583889e-06, |
| "loss": 0.3285, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.8279586020698965, |
| "grad_norm": 0.10673197384722342, |
| "learning_rate": 7.286211616523193e-06, |
| "loss": 0.3169, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8302084895755212, |
| "grad_norm": 0.11681882774169298, |
| "learning_rate": 7.268727465616497e-06, |
| "loss": 0.331, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.8324583770811459, |
| "grad_norm": 0.08970145688216963, |
| "learning_rate": 7.251208310625899e-06, |
| "loss": 0.3262, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8347082645867706, |
| "grad_norm": 0.08677453595649923, |
| "learning_rate": 7.2336544218535776e-06, |
| "loss": 0.2968, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.8369581520923954, |
| "grad_norm": 0.08463356362517462, |
| "learning_rate": 7.216066070137614e-06, |
| "loss": 0.3408, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8392080395980202, |
| "grad_norm": 0.10768608728008885, |
| "learning_rate": 7.198443526847816e-06, |
| "loss": 0.3222, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.8414579271036449, |
| "grad_norm": 0.08293925088501428, |
| "learning_rate": 7.180787063881534e-06, |
| "loss": 0.3225, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.8437078146092696, |
| "grad_norm": 0.09753175069029144, |
| "learning_rate": 7.163096953659462e-06, |
| "loss": 0.3249, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.8459577021148943, |
| "grad_norm": 0.10750990409191725, |
| "learning_rate": 7.145373469121435e-06, |
| "loss": 0.3248, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.848207589620519, |
| "grad_norm": 0.0713111477001828, |
| "learning_rate": 7.1276168837222215e-06, |
| "loss": 0.3262, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.8504574771261437, |
| "grad_norm": 0.08520099737279731, |
| "learning_rate": 7.109827471427299e-06, |
| "loss": 0.3248, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.8527073646317684, |
| "grad_norm": 0.1007558956965131, |
| "learning_rate": 7.092005506708629e-06, |
| "loss": 0.3063, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.8549572521373932, |
| "grad_norm": 0.10076509216745107, |
| "learning_rate": 7.074151264540425e-06, |
| "loss": 0.3394, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8572071396430179, |
| "grad_norm": 0.1128171772187796, |
| "learning_rate": 7.056265020394908e-06, |
| "loss": 0.3353, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.8594570271486426, |
| "grad_norm": 0.07826929688060387, |
| "learning_rate": 7.038347050238052e-06, |
| "loss": 0.3313, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.8617069146542673, |
| "grad_norm": 0.09477827641455178, |
| "learning_rate": 7.020397630525336e-06, |
| "loss": 0.3094, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.863956802159892, |
| "grad_norm": 0.07996338853084985, |
| "learning_rate": 7.002417038197466e-06, |
| "loss": 0.3365, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.8662066896655167, |
| "grad_norm": 0.08681950662638242, |
| "learning_rate": 6.984405550676113e-06, |
| "loss": 0.2858, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.8684565771711414, |
| "grad_norm": 0.10147028587889259, |
| "learning_rate": 6.966363445859629e-06, |
| "loss": 0.3307, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.8707064646767662, |
| "grad_norm": 0.09778557000247115, |
| "learning_rate": 6.948291002118757e-06, |
| "loss": 0.3346, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.8729563521823909, |
| "grad_norm": 0.08335343107919917, |
| "learning_rate": 6.930188498292334e-06, |
| "loss": 0.3102, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.8752062396880156, |
| "grad_norm": 0.127528307390263, |
| "learning_rate": 6.912056213683001e-06, |
| "loss": 0.2772, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.8774561271936403, |
| "grad_norm": 0.08449830219805671, |
| "learning_rate": 6.893894428052881e-06, |
| "loss": 0.3331, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.879706014699265, |
| "grad_norm": 0.10290918076564952, |
| "learning_rate": 6.875703421619263e-06, |
| "loss": 0.3162, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.8819559022048897, |
| "grad_norm": 0.09666086595549915, |
| "learning_rate": 6.85748347505029e-06, |
| "loss": 0.3393, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.8842057897105144, |
| "grad_norm": 0.09126192537758601, |
| "learning_rate": 6.839234869460614e-06, |
| "loss": 0.3313, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.8864556772161392, |
| "grad_norm": 0.09213126718219308, |
| "learning_rate": 6.820957886407068e-06, |
| "loss": 0.3298, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.8887055647217639, |
| "grad_norm": 0.0893744576312266, |
| "learning_rate": 6.802652807884322e-06, |
| "loss": 0.3258, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.8909554522273886, |
| "grad_norm": 0.10520537204979115, |
| "learning_rate": 6.784319916320528e-06, |
| "loss": 0.3152, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.8932053397330133, |
| "grad_norm": 0.09224246726284402, |
| "learning_rate": 6.765959494572959e-06, |
| "loss": 0.3176, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.895455227238638, |
| "grad_norm": 0.09179694827419689, |
| "learning_rate": 6.74757182592366e-06, |
| "loss": 0.34, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.8977051147442627, |
| "grad_norm": 0.10131034789212955, |
| "learning_rate": 6.7291571940750575e-06, |
| "loss": 0.3171, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.8999550022498876, |
| "grad_norm": 0.11052424709399664, |
| "learning_rate": 6.710715883145599e-06, |
| "loss": 0.3084, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9022048897555123, |
| "grad_norm": 0.09523315367515199, |
| "learning_rate": 6.692248177665357e-06, |
| "loss": 0.3127, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.904454777261137, |
| "grad_norm": 0.09774145840636202, |
| "learning_rate": 6.673754362571646e-06, |
| "loss": 0.2866, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.9067046647667617, |
| "grad_norm": 0.1231628868544864, |
| "learning_rate": 6.6552347232046255e-06, |
| "loss": 0.2926, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.9089545522723864, |
| "grad_norm": 0.09563379874509359, |
| "learning_rate": 6.636689545302898e-06, |
| "loss": 0.3128, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9112044397780111, |
| "grad_norm": 0.07820421786999905, |
| "learning_rate": 6.6181191149990905e-06, |
| "loss": 0.321, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.9134543272836358, |
| "grad_norm": 0.10476028051810904, |
| "learning_rate": 6.599523718815461e-06, |
| "loss": 0.2836, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.9157042147892606, |
| "grad_norm": 0.11389284533738375, |
| "learning_rate": 6.580903643659453e-06, |
| "loss": 0.2934, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.9179541022948853, |
| "grad_norm": 0.10996849745288242, |
| "learning_rate": 6.5622591768192875e-06, |
| "loss": 0.3243, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.92020398980051, |
| "grad_norm": 0.09512165946660596, |
| "learning_rate": 6.5435906059595215e-06, |
| "loss": 0.3081, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.9224538773061347, |
| "grad_norm": 0.10421356775522515, |
| "learning_rate": 6.524898219116612e-06, |
| "loss": 0.2682, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.9247037648117594, |
| "grad_norm": 0.10201698883401172, |
| "learning_rate": 6.5061823046944694e-06, |
| "loss": 0.2909, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.9269536523173841, |
| "grad_norm": 0.10974937304411288, |
| "learning_rate": 6.4874431514600146e-06, |
| "loss": 0.3072, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.9292035398230089, |
| "grad_norm": 0.09276233118456312, |
| "learning_rate": 6.468681048538715e-06, |
| "loss": 0.2989, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.9314534273286336, |
| "grad_norm": 0.11862538493837348, |
| "learning_rate": 6.44989628541013e-06, |
| "loss": 0.3372, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.9337033148342583, |
| "grad_norm": 0.10451521274212297, |
| "learning_rate": 6.431089151903439e-06, |
| "loss": 0.3188, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.935953202339883, |
| "grad_norm": 0.11422644044073009, |
| "learning_rate": 6.412259938192978e-06, |
| "loss": 0.307, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.9382030898455077, |
| "grad_norm": 0.14091820208432657, |
| "learning_rate": 6.393408934793752e-06, |
| "loss": 0.3546, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.9404529773511324, |
| "grad_norm": 0.11829750564224563, |
| "learning_rate": 6.374536432556963e-06, |
| "loss": 0.3267, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.9427028648567571, |
| "grad_norm": 0.11528106197624186, |
| "learning_rate": 6.355642722665512e-06, |
| "loss": 0.3203, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.9449527523623819, |
| "grad_norm": 0.09372673822212164, |
| "learning_rate": 6.336728096629517e-06, |
| "loss": 0.3151, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9472026398680066, |
| "grad_norm": 0.10779896033185006, |
| "learning_rate": 6.317792846281805e-06, |
| "loss": 0.3052, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.9494525273736313, |
| "grad_norm": 0.09672862996353586, |
| "learning_rate": 6.298837263773423e-06, |
| "loss": 0.3033, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.951702414879256, |
| "grad_norm": 0.10872396340925997, |
| "learning_rate": 6.2798616415691095e-06, |
| "loss": 0.3002, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.9539523023848807, |
| "grad_norm": 0.11829489090483326, |
| "learning_rate": 6.260866272442807e-06, |
| "loss": 0.2929, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.9562021898905054, |
| "grad_norm": 0.11145672561455416, |
| "learning_rate": 6.2418514494731245e-06, |
| "loss": 0.2808, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.9584520773961301, |
| "grad_norm": 0.1056896163271936, |
| "learning_rate": 6.222817466038824e-06, |
| "loss": 0.2841, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.9607019649017549, |
| "grad_norm": 0.10666373036314321, |
| "learning_rate": 6.2037646158142975e-06, |
| "loss": 0.3005, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.9629518524073797, |
| "grad_norm": 0.10697096904271322, |
| "learning_rate": 6.184693192765028e-06, |
| "loss": 0.2894, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.9652017399130044, |
| "grad_norm": 0.17157045181184577, |
| "learning_rate": 6.165603491143057e-06, |
| "loss": 0.3298, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.9674516274186291, |
| "grad_norm": 0.1005745666451797, |
| "learning_rate": 6.146495805482451e-06, |
| "loss": 0.3196, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.9697015149242538, |
| "grad_norm": 0.139307317568223, |
| "learning_rate": 6.127370430594745e-06, |
| "loss": 0.2993, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.9719514024298785, |
| "grad_norm": 0.11791582586234053, |
| "learning_rate": 6.108227661564401e-06, |
| "loss": 0.3083, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.9742012899355033, |
| "grad_norm": 0.11233522118086736, |
| "learning_rate": 6.089067793744258e-06, |
| "loss": 0.3137, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.976451177441128, |
| "grad_norm": 0.12524898605746265, |
| "learning_rate": 6.069891122750971e-06, |
| "loss": 0.2825, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.9787010649467527, |
| "grad_norm": 0.09825541745527079, |
| "learning_rate": 6.050697944460444e-06, |
| "loss": 0.3146, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.9809509524523774, |
| "grad_norm": 0.11637412785681134, |
| "learning_rate": 6.0314885550032796e-06, |
| "loss": 0.2935, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.9832008399580021, |
| "grad_norm": 0.10398981333232891, |
| "learning_rate": 6.012263250760199e-06, |
| "loss": 0.28, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.9854507274636268, |
| "grad_norm": 0.1347409630178848, |
| "learning_rate": 5.993022328357466e-06, |
| "loss": 0.2899, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.9877006149692515, |
| "grad_norm": 0.136591408837683, |
| "learning_rate": 5.973766084662324e-06, |
| "loss": 0.2729, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.9899505024748763, |
| "grad_norm": 0.1032954692332516, |
| "learning_rate": 5.954494816778408e-06, |
| "loss": 0.3106, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.992200389980501, |
| "grad_norm": 0.12420490530861028, |
| "learning_rate": 5.935208822041152e-06, |
| "loss": 0.2699, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.9944502774861257, |
| "grad_norm": 0.10146757951487546, |
| "learning_rate": 5.915908398013217e-06, |
| "loss": 0.266, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.9967001649917504, |
| "grad_norm": 0.10690509046474422, |
| "learning_rate": 5.896593842479893e-06, |
| "loss": 0.2916, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.9989500524973751, |
| "grad_norm": 0.2098417588495756, |
| "learning_rate": 5.8772654534445e-06, |
| "loss": 0.3104, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.27543845772743225, |
| "eval_runtime": 55.028, |
| "eval_samples_per_second": 19.59, |
| "eval_steps_per_second": 4.907, |
| "step": 2223 |
| }, |
| { |
| "epoch": 1.00089995500225, |
| "grad_norm": 0.11100179968154768, |
| "learning_rate": 5.857923529123799e-06, |
| "loss": 0.2341, |
| "step": 2225 |
| }, |
| { |
| "epoch": 1.0031498425078746, |
| "grad_norm": 0.14616860643517418, |
| "learning_rate": 5.838568367943383e-06, |
| "loss": 0.2679, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.0053997300134994, |
| "grad_norm": 0.11313230544533252, |
| "learning_rate": 5.819200268533076e-06, |
| "loss": 0.2873, |
| "step": 2235 |
| }, |
| { |
| "epoch": 1.007649617519124, |
| "grad_norm": 0.1210465260044826, |
| "learning_rate": 5.7998195297223285e-06, |
| "loss": 0.2677, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.0098995050247488, |
| "grad_norm": 0.11722674843174795, |
| "learning_rate": 5.7804264505356e-06, |
| "loss": 0.2548, |
| "step": 2245 |
| }, |
| { |
| "epoch": 1.0121493925303735, |
| "grad_norm": 0.12390544554268877, |
| "learning_rate": 5.76102133018775e-06, |
| "loss": 0.2942, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.0143992800359982, |
| "grad_norm": 0.14215352813872506, |
| "learning_rate": 5.741604468079421e-06, |
| "loss": 0.3095, |
| "step": 2255 |
| }, |
| { |
| "epoch": 1.016649167541623, |
| "grad_norm": 0.13309421360381032, |
| "learning_rate": 5.72217616379242e-06, |
| "loss": 0.2794, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.0188990550472476, |
| "grad_norm": 0.1409784002692586, |
| "learning_rate": 5.702736717085093e-06, |
| "loss": 0.2998, |
| "step": 2265 |
| }, |
| { |
| "epoch": 1.0211489425528724, |
| "grad_norm": 0.12978570417210325, |
| "learning_rate": 5.6832864278876984e-06, |
| "loss": 0.2829, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.023398830058497, |
| "grad_norm": 0.10750959417123264, |
| "learning_rate": 5.663825596297794e-06, |
| "loss": 0.2902, |
| "step": 2275 |
| }, |
| { |
| "epoch": 1.0256487175641218, |
| "grad_norm": 0.137940819760974, |
| "learning_rate": 5.644354522575581e-06, |
| "loss": 0.2806, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.0278986050697465, |
| "grad_norm": 0.14563829553392096, |
| "learning_rate": 5.624873507139297e-06, |
| "loss": 0.277, |
| "step": 2285 |
| }, |
| { |
| "epoch": 1.0301484925753712, |
| "grad_norm": 0.12377796525725795, |
| "learning_rate": 5.605382850560565e-06, |
| "loss": 0.2943, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.032398380080996, |
| "grad_norm": 0.16984305955909604, |
| "learning_rate": 5.585882853559762e-06, |
| "loss": 0.2889, |
| "step": 2295 |
| }, |
| { |
| "epoch": 1.0346482675866207, |
| "grad_norm": 0.1281002826955631, |
| "learning_rate": 5.566373817001377e-06, |
| "loss": 0.293, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0368981550922454, |
| "grad_norm": 0.15524678076001608, |
| "learning_rate": 5.546856041889374e-06, |
| "loss": 0.2605, |
| "step": 2305 |
| }, |
| { |
| "epoch": 1.03914804259787, |
| "grad_norm": 0.14215571774039212, |
| "learning_rate": 5.527329829362534e-06, |
| "loss": 0.2786, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.0413979301034948, |
| "grad_norm": 0.1447748028005779, |
| "learning_rate": 5.5077954806898284e-06, |
| "loss": 0.2688, |
| "step": 2315 |
| }, |
| { |
| "epoch": 1.0436478176091195, |
| "grad_norm": 0.14426858307924748, |
| "learning_rate": 5.488253297265757e-06, |
| "loss": 0.2777, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.0458977051147442, |
| "grad_norm": 0.1272869099382178, |
| "learning_rate": 5.468703580605703e-06, |
| "loss": 0.2997, |
| "step": 2325 |
| }, |
| { |
| "epoch": 1.048147592620369, |
| "grad_norm": 0.133865100418296, |
| "learning_rate": 5.4491466323412745e-06, |
| "loss": 0.2839, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.0503974801259937, |
| "grad_norm": 0.12437130432718715, |
| "learning_rate": 5.429582754215664e-06, |
| "loss": 0.2843, |
| "step": 2335 |
| }, |
| { |
| "epoch": 1.0526473676316184, |
| "grad_norm": 0.1419352738893503, |
| "learning_rate": 5.410012248078975e-06, |
| "loss": 0.2677, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.054897255137243, |
| "grad_norm": 0.1639413029064359, |
| "learning_rate": 5.390435415883583e-06, |
| "loss": 0.2805, |
| "step": 2345 |
| }, |
| { |
| "epoch": 1.0571471426428678, |
| "grad_norm": 0.14750894149267404, |
| "learning_rate": 5.370852559679461e-06, |
| "loss": 0.2718, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.0593970301484925, |
| "grad_norm": 0.1418143669594509, |
| "learning_rate": 5.351263981609532e-06, |
| "loss": 0.2374, |
| "step": 2355 |
| }, |
| { |
| "epoch": 1.0616469176541172, |
| "grad_norm": 0.1467085192211227, |
| "learning_rate": 5.331669983904996e-06, |
| "loss": 0.278, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.063896805159742, |
| "grad_norm": 0.1686191463372291, |
| "learning_rate": 5.312070868880678e-06, |
| "loss": 0.2818, |
| "step": 2365 |
| }, |
| { |
| "epoch": 1.0661466926653667, |
| "grad_norm": 0.1844876464618337, |
| "learning_rate": 5.29246693893035e-06, |
| "loss": 0.2971, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.0683965801709914, |
| "grad_norm": 0.12521919673631507, |
| "learning_rate": 5.272858496522084e-06, |
| "loss": 0.2737, |
| "step": 2375 |
| }, |
| { |
| "epoch": 1.070646467676616, |
| "grad_norm": 0.15034047715143825, |
| "learning_rate": 5.253245844193564e-06, |
| "loss": 0.2858, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.0728963551822408, |
| "grad_norm": 0.1264075738033277, |
| "learning_rate": 5.233629284547435e-06, |
| "loss": 0.2564, |
| "step": 2385 |
| }, |
| { |
| "epoch": 1.0751462426878655, |
| "grad_norm": 0.1940932983786269, |
| "learning_rate": 5.214009120246623e-06, |
| "loss": 0.2722, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.0773961301934902, |
| "grad_norm": 0.16922904631843647, |
| "learning_rate": 5.1943856540096795e-06, |
| "loss": 0.2912, |
| "step": 2395 |
| }, |
| { |
| "epoch": 1.079646017699115, |
| "grad_norm": 0.23716139744779294, |
| "learning_rate": 5.174759188606087e-06, |
| "loss": 0.2885, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.0818959052047397, |
| "grad_norm": 0.11509288529342813, |
| "learning_rate": 5.155130026851616e-06, |
| "loss": 0.2575, |
| "step": 2405 |
| }, |
| { |
| "epoch": 1.0841457927103644, |
| "grad_norm": 0.17727493415132747, |
| "learning_rate": 5.135498471603629e-06, |
| "loss": 0.2639, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.0863956802159893, |
| "grad_norm": 0.13930766876349623, |
| "learning_rate": 5.1158648257564235e-06, |
| "loss": 0.2606, |
| "step": 2415 |
| }, |
| { |
| "epoch": 1.0886455677216138, |
| "grad_norm": 0.12454839412933186, |
| "learning_rate": 5.0962293922365495e-06, |
| "loss": 0.256, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.0908954552272387, |
| "grad_norm": 0.18809390149779476, |
| "learning_rate": 5.076592473998141e-06, |
| "loss": 0.2646, |
| "step": 2425 |
| }, |
| { |
| "epoch": 1.0931453427328635, |
| "grad_norm": 0.1508834503375353, |
| "learning_rate": 5.056954374018236e-06, |
| "loss": 0.2764, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.0953952302384882, |
| "grad_norm": 0.1491889266816844, |
| "learning_rate": 5.037315395292111e-06, |
| "loss": 0.2691, |
| "step": 2435 |
| }, |
| { |
| "epoch": 1.0976451177441129, |
| "grad_norm": 0.15633034297704468, |
| "learning_rate": 5.017675840828597e-06, |
| "loss": 0.2657, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.0998950052497376, |
| "grad_norm": 0.15782519717103635, |
| "learning_rate": 4.998036013645409e-06, |
| "loss": 0.2561, |
| "step": 2445 |
| }, |
| { |
| "epoch": 1.1021448927553623, |
| "grad_norm": 0.19449808917352213, |
| "learning_rate": 4.97839621676447e-06, |
| "loss": 0.2571, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.104394780260987, |
| "grad_norm": 0.16872055966750726, |
| "learning_rate": 4.958756753207234e-06, |
| "loss": 0.2459, |
| "step": 2455 |
| }, |
| { |
| "epoch": 1.1066446677666117, |
| "grad_norm": 0.17373438335912267, |
| "learning_rate": 4.939117925990013e-06, |
| "loss": 0.2805, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.1088945552722365, |
| "grad_norm": 0.15151992334964703, |
| "learning_rate": 4.919480038119302e-06, |
| "loss": 0.251, |
| "step": 2465 |
| }, |
| { |
| "epoch": 1.1111444427778612, |
| "grad_norm": 0.15554454267536397, |
| "learning_rate": 4.899843392587104e-06, |
| "loss": 0.2533, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.113394330283486, |
| "grad_norm": 0.16595294302301358, |
| "learning_rate": 4.880208292366247e-06, |
| "loss": 0.2864, |
| "step": 2475 |
| }, |
| { |
| "epoch": 1.1156442177891106, |
| "grad_norm": 0.15038201249362013, |
| "learning_rate": 4.860575040405726e-06, |
| "loss": 0.2744, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.1178941052947353, |
| "grad_norm": 0.16630054816008968, |
| "learning_rate": 4.840943939626012e-06, |
| "loss": 0.2362, |
| "step": 2485 |
| }, |
| { |
| "epoch": 1.12014399280036, |
| "grad_norm": 0.18269629542973387, |
| "learning_rate": 4.821315292914392e-06, |
| "loss": 0.2786, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.1223938803059847, |
| "grad_norm": 0.16417528180865418, |
| "learning_rate": 4.801689403120282e-06, |
| "loss": 0.2506, |
| "step": 2495 |
| }, |
| { |
| "epoch": 1.1246437678116095, |
| "grad_norm": 0.12251195240813534, |
| "learning_rate": 4.782066573050567e-06, |
| "loss": 0.2693, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.1268936553172342, |
| "grad_norm": 0.19913321021658195, |
| "learning_rate": 4.7624471054649216e-06, |
| "loss": 0.26, |
| "step": 2505 |
| }, |
| { |
| "epoch": 1.129143542822859, |
| "grad_norm": 0.16359478594452095, |
| "learning_rate": 4.742831303071143e-06, |
| "loss": 0.2507, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.1313934303284836, |
| "grad_norm": 0.20741074237045662, |
| "learning_rate": 4.723219468520474e-06, |
| "loss": 0.2678, |
| "step": 2515 |
| }, |
| { |
| "epoch": 1.1336433178341083, |
| "grad_norm": 0.16956816625653676, |
| "learning_rate": 4.703611904402939e-06, |
| "loss": 0.2795, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.135893205339733, |
| "grad_norm": 0.1818340434409631, |
| "learning_rate": 4.684008913242679e-06, |
| "loss": 0.2586, |
| "step": 2525 |
| }, |
| { |
| "epoch": 1.1381430928453578, |
| "grad_norm": 0.17749209313732456, |
| "learning_rate": 4.664410797493275e-06, |
| "loss": 0.2708, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.1403929803509825, |
| "grad_norm": 0.167827444506409, |
| "learning_rate": 4.644817859533083e-06, |
| "loss": 0.2717, |
| "step": 2535 |
| }, |
| { |
| "epoch": 1.1426428678566072, |
| "grad_norm": 0.17149191797141825, |
| "learning_rate": 4.625230401660578e-06, |
| "loss": 0.2444, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.144892755362232, |
| "grad_norm": 0.19053262323498327, |
| "learning_rate": 4.605648726089674e-06, |
| "loss": 0.2546, |
| "step": 2545 |
| }, |
| { |
| "epoch": 1.1471426428678566, |
| "grad_norm": 0.17029611567515032, |
| "learning_rate": 4.58607313494508e-06, |
| "loss": 0.2515, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.1493925303734813, |
| "grad_norm": 0.20535330778256622, |
| "learning_rate": 4.566503930257624e-06, |
| "loss": 0.2687, |
| "step": 2555 |
| }, |
| { |
| "epoch": 1.151642417879106, |
| "grad_norm": 0.17888453950166083, |
| "learning_rate": 4.546941413959595e-06, |
| "loss": 0.2582, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.1538923053847308, |
| "grad_norm": 0.17098481716726255, |
| "learning_rate": 4.5273858878800895e-06, |
| "loss": 0.2633, |
| "step": 2565 |
| }, |
| { |
| "epoch": 1.1561421928903555, |
| "grad_norm": 0.22394541422414396, |
| "learning_rate": 4.507837653740355e-06, |
| "loss": 0.2657, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.1583920803959802, |
| "grad_norm": 0.16148745686481833, |
| "learning_rate": 4.4882970131491286e-06, |
| "loss": 0.2469, |
| "step": 2575 |
| }, |
| { |
| "epoch": 1.160641967901605, |
| "grad_norm": 0.21762812124764483, |
| "learning_rate": 4.468764267597986e-06, |
| "loss": 0.2815, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.1628918554072296, |
| "grad_norm": 0.2041647572323139, |
| "learning_rate": 4.449239718456696e-06, |
| "loss": 0.253, |
| "step": 2585 |
| }, |
| { |
| "epoch": 1.1651417429128543, |
| "grad_norm": 0.1508182234886033, |
| "learning_rate": 4.429723666968559e-06, |
| "loss": 0.2532, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.167391630418479, |
| "grad_norm": 0.22173731592066487, |
| "learning_rate": 4.410216414245771e-06, |
| "loss": 0.2597, |
| "step": 2595 |
| }, |
| { |
| "epoch": 1.1696415179241038, |
| "grad_norm": 0.15334607029538722, |
| "learning_rate": 4.390718261264768e-06, |
| "loss": 0.2429, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.1718914054297285, |
| "grad_norm": 0.17386719805484463, |
| "learning_rate": 4.371229508861588e-06, |
| "loss": 0.2718, |
| "step": 2605 |
| }, |
| { |
| "epoch": 1.1741412929353532, |
| "grad_norm": 0.255145373819277, |
| "learning_rate": 4.351750457727229e-06, |
| "loss": 0.2544, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.176391180440978, |
| "grad_norm": 0.19091868423027997, |
| "learning_rate": 4.332281408403011e-06, |
| "loss": 0.26, |
| "step": 2615 |
| }, |
| { |
| "epoch": 1.1786410679466026, |
| "grad_norm": 0.17031635023758315, |
| "learning_rate": 4.312822661275929e-06, |
| "loss": 0.2478, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.1808909554522273, |
| "grad_norm": 0.18810141305157912, |
| "learning_rate": 4.293374516574031e-06, |
| "loss": 0.2593, |
| "step": 2625 |
| }, |
| { |
| "epoch": 1.183140842957852, |
| "grad_norm": 0.20489249951929697, |
| "learning_rate": 4.273937274361782e-06, |
| "loss": 0.2226, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.1853907304634768, |
| "grad_norm": 0.18589998495363094, |
| "learning_rate": 4.254511234535432e-06, |
| "loss": 0.2313, |
| "step": 2635 |
| }, |
| { |
| "epoch": 1.1876406179691015, |
| "grad_norm": 0.1974695166475231, |
| "learning_rate": 4.235096696818385e-06, |
| "loss": 0.2782, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.1898905054747262, |
| "grad_norm": 0.15560807641673985, |
| "learning_rate": 4.215693960756586e-06, |
| "loss": 0.2461, |
| "step": 2645 |
| }, |
| { |
| "epoch": 1.192140392980351, |
| "grad_norm": 0.14168460680781833, |
| "learning_rate": 4.1963033257138904e-06, |
| "loss": 0.2323, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.1943902804859756, |
| "grad_norm": 0.19193101382035213, |
| "learning_rate": 4.176925090867449e-06, |
| "loss": 0.252, |
| "step": 2655 |
| }, |
| { |
| "epoch": 1.1966401679916003, |
| "grad_norm": 0.19059681316908272, |
| "learning_rate": 4.157559555203086e-06, |
| "loss": 0.2237, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.198890055497225, |
| "grad_norm": 0.18365584045782385, |
| "learning_rate": 4.138207017510696e-06, |
| "loss": 0.2498, |
| "step": 2665 |
| }, |
| { |
| "epoch": 1.2011399430028498, |
| "grad_norm": 0.17126185601849214, |
| "learning_rate": 4.118867776379624e-06, |
| "loss": 0.2121, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.2033898305084745, |
| "grad_norm": 0.23530086737062514, |
| "learning_rate": 4.099542130194069e-06, |
| "loss": 0.2369, |
| "step": 2675 |
| }, |
| { |
| "epoch": 1.2056397180140994, |
| "grad_norm": 0.1759441387313428, |
| "learning_rate": 4.0802303771284685e-06, |
| "loss": 0.2171, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.207889605519724, |
| "grad_norm": 0.19878924933956027, |
| "learning_rate": 4.060932815142904e-06, |
| "loss": 0.2631, |
| "step": 2685 |
| }, |
| { |
| "epoch": 1.2101394930253488, |
| "grad_norm": 0.1984620336427276, |
| "learning_rate": 4.041649741978508e-06, |
| "loss": 0.2408, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.2123893805309733, |
| "grad_norm": 0.16406158411947314, |
| "learning_rate": 4.022381455152863e-06, |
| "loss": 0.2204, |
| "step": 2695 |
| }, |
| { |
| "epoch": 1.2146392680365983, |
| "grad_norm": 0.18585134324802086, |
| "learning_rate": 4.003128251955412e-06, |
| "loss": 0.2254, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.2168891555422228, |
| "grad_norm": 0.2028470417783533, |
| "learning_rate": 3.983890429442876e-06, |
| "loss": 0.2174, |
| "step": 2705 |
| }, |
| { |
| "epoch": 1.2191390430478477, |
| "grad_norm": 0.18306124060212872, |
| "learning_rate": 3.964668284434666e-06, |
| "loss": 0.2281, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.2213889305534724, |
| "grad_norm": 0.2261111639681813, |
| "learning_rate": 3.945462113508312e-06, |
| "loss": 0.2183, |
| "step": 2715 |
| }, |
| { |
| "epoch": 1.2236388180590971, |
| "grad_norm": 0.21171892163095699, |
| "learning_rate": 3.92627221299487e-06, |
| "loss": 0.2249, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.2258887055647218, |
| "grad_norm": 0.18554866614076224, |
| "learning_rate": 3.907098878974367e-06, |
| "loss": 0.2356, |
| "step": 2725 |
| }, |
| { |
| "epoch": 1.2281385930703466, |
| "grad_norm": 0.17740452807380613, |
| "learning_rate": 3.887942407271228e-06, |
| "loss": 0.2213, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.2303884805759713, |
| "grad_norm": 0.19628544094095077, |
| "learning_rate": 3.868803093449709e-06, |
| "loss": 0.2256, |
| "step": 2735 |
| }, |
| { |
| "epoch": 1.232638368081596, |
| "grad_norm": 0.1906710395370276, |
| "learning_rate": 3.8496812328093335e-06, |
| "loss": 0.2431, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.2348882555872207, |
| "grad_norm": 0.21739799246928065, |
| "learning_rate": 3.8305771203803434e-06, |
| "loss": 0.2053, |
| "step": 2745 |
| }, |
| { |
| "epoch": 1.2371381430928454, |
| "grad_norm": 0.20933189544262915, |
| "learning_rate": 3.8114910509191483e-06, |
| "loss": 0.2372, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.2393880305984701, |
| "grad_norm": 0.16805864711967494, |
| "learning_rate": 3.7924233189037697e-06, |
| "loss": 0.2421, |
| "step": 2755 |
| }, |
| { |
| "epoch": 1.2416379181040949, |
| "grad_norm": 0.23407049517629622, |
| "learning_rate": 3.773374218529298e-06, |
| "loss": 0.2289, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.2438878056097196, |
| "grad_norm": 0.20043628906146582, |
| "learning_rate": 3.7543440437033656e-06, |
| "loss": 0.2197, |
| "step": 2765 |
| }, |
| { |
| "epoch": 1.2461376931153443, |
| "grad_norm": 0.1811301883423287, |
| "learning_rate": 3.7353330880415963e-06, |
| "loss": 0.2118, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.248387580620969, |
| "grad_norm": 0.20413664615759625, |
| "learning_rate": 3.7163416448630886e-06, |
| "loss": 0.2103, |
| "step": 2775 |
| }, |
| { |
| "epoch": 1.2506374681265937, |
| "grad_norm": 0.2053294418375065, |
| "learning_rate": 3.6973700071858764e-06, |
| "loss": 0.2265, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.2528873556322184, |
| "grad_norm": 0.17855437216730508, |
| "learning_rate": 3.6784184677224204e-06, |
| "loss": 0.2082, |
| "step": 2785 |
| }, |
| { |
| "epoch": 1.2551372431378431, |
| "grad_norm": 0.21204933584524724, |
| "learning_rate": 3.659487318875087e-06, |
| "loss": 0.2368, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.2573871306434679, |
| "grad_norm": 0.244934854739885, |
| "learning_rate": 3.6405768527316376e-06, |
| "loss": 0.2236, |
| "step": 2795 |
| }, |
| { |
| "epoch": 1.2596370181490926, |
| "grad_norm": 0.20352719384257717, |
| "learning_rate": 3.6216873610607155e-06, |
| "loss": 0.2127, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.2618869056547173, |
| "grad_norm": 0.21525625357885447, |
| "learning_rate": 3.602819135307355e-06, |
| "loss": 0.2026, |
| "step": 2805 |
| }, |
| { |
| "epoch": 1.264136793160342, |
| "grad_norm": 0.24886200931475094, |
| "learning_rate": 3.58397246658848e-06, |
| "loss": 0.2049, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.2663866806659667, |
| "grad_norm": 0.22213048059657176, |
| "learning_rate": 3.5651476456884103e-06, |
| "loss": 0.2149, |
| "step": 2815 |
| }, |
| { |
| "epoch": 1.2686365681715914, |
| "grad_norm": 0.24474792019196667, |
| "learning_rate": 3.5463449630543744e-06, |
| "loss": 0.2176, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.2708864556772161, |
| "grad_norm": 0.21959268792414904, |
| "learning_rate": 3.527564708792035e-06, |
| "loss": 0.2319, |
| "step": 2825 |
| }, |
| { |
| "epoch": 1.2731363431828409, |
| "grad_norm": 0.21285142665025264, |
| "learning_rate": 3.508807172661006e-06, |
| "loss": 0.2278, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.2753862306884656, |
| "grad_norm": 0.24872484432655345, |
| "learning_rate": 3.490072644070386e-06, |
| "loss": 0.2367, |
| "step": 2835 |
| }, |
| { |
| "epoch": 1.2776361181940903, |
| "grad_norm": 0.2446892197957464, |
| "learning_rate": 3.47136141207429e-06, |
| "loss": 0.2147, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.279886005699715, |
| "grad_norm": 0.3593552477933211, |
| "learning_rate": 3.452673765367389e-06, |
| "loss": 0.2471, |
| "step": 2845 |
| }, |
| { |
| "epoch": 1.2821358932053397, |
| "grad_norm": 0.18760658096432373, |
| "learning_rate": 3.4340099922804627e-06, |
| "loss": 0.2185, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.2843857807109644, |
| "grad_norm": 0.1746094898464911, |
| "learning_rate": 3.4153703807759432e-06, |
| "loss": 0.1939, |
| "step": 2855 |
| }, |
| { |
| "epoch": 1.2866356682165891, |
| "grad_norm": 0.2386232051443061, |
| "learning_rate": 3.3967552184434753e-06, |
| "loss": 0.2182, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.2888855557222139, |
| "grad_norm": 0.2147456869413775, |
| "learning_rate": 3.378164792495475e-06, |
| "loss": 0.2232, |
| "step": 2865 |
| }, |
| { |
| "epoch": 1.2911354432278386, |
| "grad_norm": 0.21939888824914258, |
| "learning_rate": 3.3595993897627098e-06, |
| "loss": 0.2059, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.2933853307334633, |
| "grad_norm": 0.20007621997926173, |
| "learning_rate": 3.3410592966898565e-06, |
| "loss": 0.2025, |
| "step": 2875 |
| }, |
| { |
| "epoch": 1.295635218239088, |
| "grad_norm": 0.22959303011889556, |
| "learning_rate": 3.3225447993310983e-06, |
| "loss": 0.2004, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.2978851057447127, |
| "grad_norm": 0.23309801112874845, |
| "learning_rate": 3.3040561833456964e-06, |
| "loss": 0.1914, |
| "step": 2885 |
| }, |
| { |
| "epoch": 1.3001349932503374, |
| "grad_norm": 0.22848735574436602, |
| "learning_rate": 3.2855937339935933e-06, |
| "loss": 0.1844, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.3023848807559621, |
| "grad_norm": 0.20570875834144497, |
| "learning_rate": 3.2671577361310087e-06, |
| "loss": 0.2132, |
| "step": 2895 |
| }, |
| { |
| "epoch": 1.3046347682615869, |
| "grad_norm": 0.24826968315533732, |
| "learning_rate": 3.2487484742060427e-06, |
| "loss": 0.2111, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.3068846557672116, |
| "grad_norm": 0.2410762961266627, |
| "learning_rate": 3.2303662322542835e-06, |
| "loss": 0.1948, |
| "step": 2905 |
| }, |
| { |
| "epoch": 1.3091345432728363, |
| "grad_norm": 0.23347593077480983, |
| "learning_rate": 3.212011293894436e-06, |
| "loss": 0.2008, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.311384430778461, |
| "grad_norm": 0.21360507603920142, |
| "learning_rate": 3.1936839423239376e-06, |
| "loss": 0.2042, |
| "step": 2915 |
| }, |
| { |
| "epoch": 1.3136343182840857, |
| "grad_norm": 0.21960761516089436, |
| "learning_rate": 3.1753844603145894e-06, |
| "loss": 0.2391, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.3158842057897104, |
| "grad_norm": 0.20203229318870164, |
| "learning_rate": 3.1571131302081916e-06, |
| "loss": 0.1876, |
| "step": 2925 |
| }, |
| { |
| "epoch": 1.3181340932953352, |
| "grad_norm": 0.24191918555495237, |
| "learning_rate": 3.138870233912197e-06, |
| "loss": 0.1962, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.32038398080096, |
| "grad_norm": 0.20921020378628946, |
| "learning_rate": 3.1206560528953467e-06, |
| "loss": 0.2058, |
| "step": 2935 |
| }, |
| { |
| "epoch": 1.3226338683065846, |
| "grad_norm": 0.20869954401470014, |
| "learning_rate": 3.102470868183344e-06, |
| "loss": 0.2064, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.3248837558122095, |
| "grad_norm": 0.2249649340119077, |
| "learning_rate": 3.084314960354501e-06, |
| "loss": 0.2046, |
| "step": 2945 |
| }, |
| { |
| "epoch": 1.327133643317834, |
| "grad_norm": 0.25182875069609073, |
| "learning_rate": 3.066188609535421e-06, |
| "loss": 0.2037, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.329383530823459, |
| "grad_norm": 0.21107658603026272, |
| "learning_rate": 3.0480920953966786e-06, |
| "loss": 0.2094, |
| "step": 2955 |
| }, |
| { |
| "epoch": 1.3316334183290834, |
| "grad_norm": 0.22913188913268076, |
| "learning_rate": 3.0300256971484943e-06, |
| "loss": 0.2162, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.3338833058347084, |
| "grad_norm": 0.2539850632246194, |
| "learning_rate": 3.0119896935364305e-06, |
| "loss": 0.1941, |
| "step": 2965 |
| }, |
| { |
| "epoch": 1.3361331933403329, |
| "grad_norm": 0.23858971814994895, |
| "learning_rate": 2.993984362837098e-06, |
| "loss": 0.1839, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.3383830808459578, |
| "grad_norm": 0.2721885562055672, |
| "learning_rate": 2.9760099828538545e-06, |
| "loss": 0.211, |
| "step": 2975 |
| }, |
| { |
| "epoch": 1.3406329683515823, |
| "grad_norm": 0.2482373958173057, |
| "learning_rate": 2.9580668309125203e-06, |
| "loss": 0.1998, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.3428828558572072, |
| "grad_norm": 0.2306376622557913, |
| "learning_rate": 2.940155183857096e-06, |
| "loss": 0.2196, |
| "step": 2985 |
| }, |
| { |
| "epoch": 1.3451327433628317, |
| "grad_norm": 0.26262612593731016, |
| "learning_rate": 2.922275318045502e-06, |
| "loss": 0.1882, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.3473826308684567, |
| "grad_norm": 0.20346419756531464, |
| "learning_rate": 2.9044275093453034e-06, |
| "loss": 0.193, |
| "step": 2995 |
| }, |
| { |
| "epoch": 1.3496325183740812, |
| "grad_norm": 0.22993902000452152, |
| "learning_rate": 2.8866120331294567e-06, |
| "loss": 0.1736, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.351882405879706, |
| "grad_norm": 0.27935117403868454, |
| "learning_rate": 2.8688291642720656e-06, |
| "loss": 0.1904, |
| "step": 3005 |
| }, |
| { |
| "epoch": 1.3541322933853308, |
| "grad_norm": 0.27365571988160076, |
| "learning_rate": 2.8510791771441327e-06, |
| "loss": 0.1853, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.3563821808909555, |
| "grad_norm": 0.23083646098925237, |
| "learning_rate": 2.8333623456093313e-06, |
| "loss": 0.1968, |
| "step": 3015 |
| }, |
| { |
| "epoch": 1.3586320683965802, |
| "grad_norm": 0.2704413392632432, |
| "learning_rate": 2.815678943019784e-06, |
| "loss": 0.1927, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.360881955902205, |
| "grad_norm": 0.24524940743516008, |
| "learning_rate": 2.7980292422118282e-06, |
| "loss": 0.2051, |
| "step": 3025 |
| }, |
| { |
| "epoch": 1.3631318434078297, |
| "grad_norm": 0.24409905829083706, |
| "learning_rate": 2.7804135155018307e-06, |
| "loss": 0.164, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.3653817309134544, |
| "grad_norm": 0.23814624701122666, |
| "learning_rate": 2.762832034681965e-06, |
| "loss": 0.1777, |
| "step": 3035 |
| }, |
| { |
| "epoch": 1.367631618419079, |
| "grad_norm": 0.24368144586742516, |
| "learning_rate": 2.7452850710160305e-06, |
| "loss": 0.1946, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.3698815059247038, |
| "grad_norm": 0.24657044578855591, |
| "learning_rate": 2.727772895235262e-06, |
| "loss": 0.2024, |
| "step": 3045 |
| }, |
| { |
| "epoch": 1.3721313934303285, |
| "grad_norm": 0.20668089980394588, |
| "learning_rate": 2.710295777534154e-06, |
| "loss": 0.1853, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.3743812809359532, |
| "grad_norm": 0.22601090907078772, |
| "learning_rate": 2.692853987566291e-06, |
| "loss": 0.1764, |
| "step": 3055 |
| }, |
| { |
| "epoch": 1.376631168441578, |
| "grad_norm": 0.2872809956397954, |
| "learning_rate": 2.675447794440188e-06, |
| "loss": 0.1609, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.3788810559472027, |
| "grad_norm": 0.2411498866283728, |
| "learning_rate": 2.658077466715138e-06, |
| "loss": 0.1813, |
| "step": 3065 |
| }, |
| { |
| "epoch": 1.3811309434528274, |
| "grad_norm": 0.19284001830686515, |
| "learning_rate": 2.6407432723970694e-06, |
| "loss": 0.1751, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.383380830958452, |
| "grad_norm": 0.23962546665483148, |
| "learning_rate": 2.6234454789344067e-06, |
| "loss": 0.164, |
| "step": 3075 |
| }, |
| { |
| "epoch": 1.3856307184640768, |
| "grad_norm": 0.25424115274026465, |
| "learning_rate": 2.6061843532139563e-06, |
| "loss": 0.1816, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.3878806059697015, |
| "grad_norm": 0.25313051025088457, |
| "learning_rate": 2.5889601615567657e-06, |
| "loss": 0.1813, |
| "step": 3085 |
| }, |
| { |
| "epoch": 1.3901304934753262, |
| "grad_norm": 0.2378148779779353, |
| "learning_rate": 2.5717731697140425e-06, |
| "loss": 0.1822, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.392380380980951, |
| "grad_norm": 0.24347808422412195, |
| "learning_rate": 2.554623642863031e-06, |
| "loss": 0.165, |
| "step": 3095 |
| }, |
| { |
| "epoch": 1.3946302684865757, |
| "grad_norm": 0.26898645682575706, |
| "learning_rate": 2.5375118456029345e-06, |
| "loss": 0.1834, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.3968801559922004, |
| "grad_norm": 0.20720842966338204, |
| "learning_rate": 2.520438041950827e-06, |
| "loss": 0.1638, |
| "step": 3105 |
| }, |
| { |
| "epoch": 1.399130043497825, |
| "grad_norm": 0.17953407898094143, |
| "learning_rate": 2.503402495337579e-06, |
| "loss": 0.1547, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.4013799310034498, |
| "grad_norm": 0.27527731902583097, |
| "learning_rate": 2.4864054686037993e-06, |
| "loss": 0.1771, |
| "step": 3115 |
| }, |
| { |
| "epoch": 1.4036298185090745, |
| "grad_norm": 0.2441103318991431, |
| "learning_rate": 2.469447223995772e-06, |
| "loss": 0.1866, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.4058797060146992, |
| "grad_norm": 0.22146555295977546, |
| "learning_rate": 2.452528023161414e-06, |
| "loss": 0.1658, |
| "step": 3125 |
| }, |
| { |
| "epoch": 1.408129593520324, |
| "grad_norm": 0.24036023292266545, |
| "learning_rate": 2.4356481271462396e-06, |
| "loss": 0.1951, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.4103794810259487, |
| "grad_norm": 0.2346343428333368, |
| "learning_rate": 2.4188077963893276e-06, |
| "loss": 0.1724, |
| "step": 3135 |
| }, |
| { |
| "epoch": 1.4126293685315734, |
| "grad_norm": 0.28341682436165366, |
| "learning_rate": 2.4020072907193123e-06, |
| "loss": 0.1786, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.414879256037198, |
| "grad_norm": 0.25526003142103276, |
| "learning_rate": 2.3852468693503635e-06, |
| "loss": 0.166, |
| "step": 3145 |
| }, |
| { |
| "epoch": 1.4171291435428228, |
| "grad_norm": 0.21169312639212728, |
| "learning_rate": 2.3685267908781934e-06, |
| "loss": 0.1651, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.4193790310484475, |
| "grad_norm": 0.2311680518850515, |
| "learning_rate": 2.3518473132760668e-06, |
| "loss": 0.1943, |
| "step": 3155 |
| }, |
| { |
| "epoch": 1.4216289185540723, |
| "grad_norm": 0.2466100225354571, |
| "learning_rate": 2.335208693890819e-06, |
| "loss": 0.1759, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.423878806059697, |
| "grad_norm": 0.25320147361029777, |
| "learning_rate": 2.318611189438884e-06, |
| "loss": 0.1741, |
| "step": 3165 |
| }, |
| { |
| "epoch": 1.4261286935653217, |
| "grad_norm": 0.20379034599277449, |
| "learning_rate": 2.30205505600234e-06, |
| "loss": 0.1515, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.4283785810709464, |
| "grad_norm": 0.2171396909921854, |
| "learning_rate": 2.2855405490249498e-06, |
| "loss": 0.17, |
| "step": 3175 |
| }, |
| { |
| "epoch": 1.430628468576571, |
| "grad_norm": 0.25025327769749556, |
| "learning_rate": 2.2690679233082237e-06, |
| "loss": 0.1442, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.4328783560821958, |
| "grad_norm": 0.20134411154173665, |
| "learning_rate": 2.2526374330074945e-06, |
| "loss": 0.172, |
| "step": 3185 |
| }, |
| { |
| "epoch": 1.4351282435878205, |
| "grad_norm": 0.20636498909519851, |
| "learning_rate": 2.23624933162798e-06, |
| "loss": 0.1663, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.4373781310934453, |
| "grad_norm": 0.24524283669244562, |
| "learning_rate": 2.219903872020885e-06, |
| "loss": 0.1726, |
| "step": 3195 |
| }, |
| { |
| "epoch": 1.43962801859907, |
| "grad_norm": 0.226380159375995, |
| "learning_rate": 2.2036013063795024e-06, |
| "loss": 0.1707, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.4418779061046947, |
| "grad_norm": 0.23433271109204132, |
| "learning_rate": 2.1873418862353095e-06, |
| "loss": 0.1885, |
| "step": 3205 |
| }, |
| { |
| "epoch": 1.4441277936103196, |
| "grad_norm": 0.27219913617478064, |
| "learning_rate": 2.1711258624540955e-06, |
| "loss": 0.1627, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.4463776811159441, |
| "grad_norm": 0.2720555642584958, |
| "learning_rate": 2.15495348523209e-06, |
| "loss": 0.1637, |
| "step": 3215 |
| }, |
| { |
| "epoch": 1.448627568621569, |
| "grad_norm": 0.26833619371988116, |
| "learning_rate": 2.1388250040921007e-06, |
| "loss": 0.1536, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.4508774561271935, |
| "grad_norm": 0.2603614911120465, |
| "learning_rate": 2.1227406678796664e-06, |
| "loss": 0.1608, |
| "step": 3225 |
| }, |
| { |
| "epoch": 1.4531273436328185, |
| "grad_norm": 0.21162975240993986, |
| "learning_rate": 2.1067007247592153e-06, |
| "loss": 0.1649, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.455377231138443, |
| "grad_norm": 0.240416434380939, |
| "learning_rate": 2.0907054222102367e-06, |
| "loss": 0.157, |
| "step": 3235 |
| }, |
| { |
| "epoch": 1.457627118644068, |
| "grad_norm": 0.22619962147371023, |
| "learning_rate": 2.074755007023461e-06, |
| "loss": 0.152, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.4598770061496924, |
| "grad_norm": 0.23122000932974787, |
| "learning_rate": 2.058849725297061e-06, |
| "loss": 0.1674, |
| "step": 3245 |
| }, |
| { |
| "epoch": 1.4621268936553173, |
| "grad_norm": 0.20088454332338435, |
| "learning_rate": 2.042989822432837e-06, |
| "loss": 0.1426, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.4643767811609418, |
| "grad_norm": 0.22446161891702227, |
| "learning_rate": 2.0271755431324456e-06, |
| "loss": 0.1431, |
| "step": 3255 |
| }, |
| { |
| "epoch": 1.4666266686665668, |
| "grad_norm": 0.25951357745647785, |
| "learning_rate": 2.011407131393624e-06, |
| "loss": 0.1521, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.4688765561721913, |
| "grad_norm": 0.23931332134441274, |
| "learning_rate": 1.9956848305064156e-06, |
| "loss": 0.1348, |
| "step": 3265 |
| }, |
| { |
| "epoch": 1.4711264436778162, |
| "grad_norm": 0.2443591943685552, |
| "learning_rate": 1.9800088830494233e-06, |
| "loss": 0.1616, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.4733763311834407, |
| "grad_norm": 0.2583750411808441, |
| "learning_rate": 1.964379530886066e-06, |
| "loss": 0.1568, |
| "step": 3275 |
| }, |
| { |
| "epoch": 1.4756262186890656, |
| "grad_norm": 0.23149031179647173, |
| "learning_rate": 1.948797015160845e-06, |
| "loss": 0.1628, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.4778761061946903, |
| "grad_norm": 0.24603772896490778, |
| "learning_rate": 1.9332615762956252e-06, |
| "loss": 0.172, |
| "step": 3285 |
| }, |
| { |
| "epoch": 1.480125993700315, |
| "grad_norm": 0.21815434321755453, |
| "learning_rate": 1.9177734539859246e-06, |
| "loss": 0.1412, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.4823758812059398, |
| "grad_norm": 0.22721807904843327, |
| "learning_rate": 1.9023328871972163e-06, |
| "loss": 0.1535, |
| "step": 3295 |
| }, |
| { |
| "epoch": 1.4846257687115645, |
| "grad_norm": 0.242196598571302, |
| "learning_rate": 1.886940114161241e-06, |
| "loss": 0.1418, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.4868756562171892, |
| "grad_norm": 0.31031701723743615, |
| "learning_rate": 1.8715953723723374e-06, |
| "loss": 0.152, |
| "step": 3305 |
| }, |
| { |
| "epoch": 1.489125543722814, |
| "grad_norm": 0.2768031393228084, |
| "learning_rate": 1.8562988985837632e-06, |
| "loss": 0.1333, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.4913754312284386, |
| "grad_norm": 0.20415150169731586, |
| "learning_rate": 1.8410509288040557e-06, |
| "loss": 0.1414, |
| "step": 3315 |
| }, |
| { |
| "epoch": 1.4936253187340633, |
| "grad_norm": 0.26667619653525043, |
| "learning_rate": 1.8258516982933905e-06, |
| "loss": 0.1468, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.495875206239688, |
| "grad_norm": 0.2944474959496459, |
| "learning_rate": 1.8107014415599416e-06, |
| "loss": 0.1191, |
| "step": 3325 |
| }, |
| { |
| "epoch": 1.4981250937453128, |
| "grad_norm": 0.2393057029593332, |
| "learning_rate": 1.7956003923562715e-06, |
| "loss": 0.1404, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.5003749812509375, |
| "grad_norm": 0.2714540493615385, |
| "learning_rate": 1.7805487836757224e-06, |
| "loss": 0.139, |
| "step": 3335 |
| }, |
| { |
| "epoch": 1.5026248687565622, |
| "grad_norm": 0.24328575957899592, |
| "learning_rate": 1.7655468477488191e-06, |
| "loss": 0.1388, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.504874756262187, |
| "grad_norm": 0.22919700731255085, |
| "learning_rate": 1.7505948160396901e-06, |
| "loss": 0.1238, |
| "step": 3345 |
| }, |
| { |
| "epoch": 1.5071246437678116, |
| "grad_norm": 0.2619372461489788, |
| "learning_rate": 1.7356929192424937e-06, |
| "loss": 0.1451, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.5093745312734363, |
| "grad_norm": 0.2086634801533094, |
| "learning_rate": 1.720841387277858e-06, |
| "loss": 0.1485, |
| "step": 3355 |
| }, |
| { |
| "epoch": 1.511624418779061, |
| "grad_norm": 0.22980427576347195, |
| "learning_rate": 1.7060404492893345e-06, |
| "loss": 0.1474, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.5138743062846858, |
| "grad_norm": 0.24590985962229212, |
| "learning_rate": 1.6912903336398677e-06, |
| "loss": 0.1375, |
| "step": 3365 |
| }, |
| { |
| "epoch": 1.5161241937903105, |
| "grad_norm": 0.25732491354780235, |
| "learning_rate": 1.6765912679082592e-06, |
| "loss": 0.1357, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.5183740812959352, |
| "grad_norm": 0.26500627710086616, |
| "learning_rate": 1.6619434788856664e-06, |
| "loss": 0.1419, |
| "step": 3375 |
| }, |
| { |
| "epoch": 1.52062396880156, |
| "grad_norm": 0.2928540052735079, |
| "learning_rate": 1.647347192572105e-06, |
| "loss": 0.1307, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.5228738563071846, |
| "grad_norm": 0.21671093221137389, |
| "learning_rate": 1.6328026341729547e-06, |
| "loss": 0.1269, |
| "step": 3385 |
| }, |
| { |
| "epoch": 1.5251237438128094, |
| "grad_norm": 0.20008846223184612, |
| "learning_rate": 1.618310028095486e-06, |
| "loss": 0.1229, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.527373631318434, |
| "grad_norm": 0.27258283595012933, |
| "learning_rate": 1.6038695979454033e-06, |
| "loss": 0.1291, |
| "step": 3395 |
| }, |
| { |
| "epoch": 1.5296235188240588, |
| "grad_norm": 0.1659681540695972, |
| "learning_rate": 1.589481566523388e-06, |
| "loss": 0.1132, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.5318734063296835, |
| "grad_norm": 0.29801173110625, |
| "learning_rate": 1.5751461558216662e-06, |
| "loss": 0.1557, |
| "step": 3405 |
| }, |
| { |
| "epoch": 1.5341232938353082, |
| "grad_norm": 0.21585855046664817, |
| "learning_rate": 1.5608635870205813e-06, |
| "loss": 0.1275, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.536373181340933, |
| "grad_norm": 0.22691724613303907, |
| "learning_rate": 1.546634080485181e-06, |
| "loss": 0.1263, |
| "step": 3415 |
| }, |
| { |
| "epoch": 1.5386230688465576, |
| "grad_norm": 0.3126367672650283, |
| "learning_rate": 1.5324578557618158e-06, |
| "loss": 0.1281, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.5408729563521824, |
| "grad_norm": 0.2605966357941338, |
| "learning_rate": 1.5183351315747618e-06, |
| "loss": 0.1334, |
| "step": 3425 |
| }, |
| { |
| "epoch": 1.543122843857807, |
| "grad_norm": 0.21630173206179193, |
| "learning_rate": 1.5042661258228268e-06, |
| "loss": 0.1372, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.5453727313634318, |
| "grad_norm": 0.26829878682557234, |
| "learning_rate": 1.4902510555760052e-06, |
| "loss": 0.1257, |
| "step": 3435 |
| }, |
| { |
| "epoch": 1.5476226188690565, |
| "grad_norm": 0.29544397855593446, |
| "learning_rate": 1.4762901370721266e-06, |
| "loss": 0.1396, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.5498725063746812, |
| "grad_norm": 0.24819039772864243, |
| "learning_rate": 1.4623835857135099e-06, |
| "loss": 0.1298, |
| "step": 3445 |
| }, |
| { |
| "epoch": 1.552122393880306, |
| "grad_norm": 0.20631029388339692, |
| "learning_rate": 1.4485316160636491e-06, |
| "loss": 0.1135, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.5543722813859309, |
| "grad_norm": 0.2588845660409855, |
| "learning_rate": 1.434734441843899e-06, |
| "loss": 0.119, |
| "step": 3455 |
| }, |
| { |
| "epoch": 1.5566221688915554, |
| "grad_norm": 0.27482300851220287, |
| "learning_rate": 1.420992275930178e-06, |
| "loss": 0.1228, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.5588720563971803, |
| "grad_norm": 0.23756828701782703, |
| "learning_rate": 1.4073053303496837e-06, |
| "loss": 0.1397, |
| "step": 3465 |
| }, |
| { |
| "epoch": 1.5611219439028048, |
| "grad_norm": 0.2565129570324179, |
| "learning_rate": 1.3936738162776269e-06, |
| "loss": 0.1171, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.5633718314084297, |
| "grad_norm": 0.23747517958547196, |
| "learning_rate": 1.3800979440339602e-06, |
| "loss": 0.1112, |
| "step": 3475 |
| }, |
| { |
| "epoch": 1.5656217189140542, |
| "grad_norm": 0.28612053881235616, |
| "learning_rate": 1.3665779230801452e-06, |
| "loss": 0.1214, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.5678716064196792, |
| "grad_norm": 0.21974317229860285, |
| "learning_rate": 1.353113962015919e-06, |
| "loss": 0.1133, |
| "step": 3485 |
| }, |
| { |
| "epoch": 1.5701214939253036, |
| "grad_norm": 0.2586485593990932, |
| "learning_rate": 1.3397062685760715e-06, |
| "loss": 0.131, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.5723713814309286, |
| "grad_norm": 0.24428264502478964, |
| "learning_rate": 1.326355049627238e-06, |
| "loss": 0.1239, |
| "step": 3495 |
| }, |
| { |
| "epoch": 1.574621268936553, |
| "grad_norm": 0.2440528536513943, |
| "learning_rate": 1.31306051116472e-06, |
| "loss": 0.1156, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.576871156442178, |
| "grad_norm": 0.25949845916894754, |
| "learning_rate": 1.299822858309292e-06, |
| "loss": 0.118, |
| "step": 3505 |
| }, |
| { |
| "epoch": 1.5791210439478025, |
| "grad_norm": 0.27496698683134035, |
| "learning_rate": 1.2866422953040458e-06, |
| "loss": 0.1364, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.5813709314534274, |
| "grad_norm": 0.22644679203329376, |
| "learning_rate": 1.273519025511236e-06, |
| "loss": 0.1242, |
| "step": 3515 |
| }, |
| { |
| "epoch": 1.583620818959052, |
| "grad_norm": 0.27460451136628766, |
| "learning_rate": 1.2604532514091444e-06, |
| "loss": 0.1179, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.5858707064646769, |
| "grad_norm": 0.20903077719719648, |
| "learning_rate": 1.2474451745889516e-06, |
| "loss": 0.1174, |
| "step": 3525 |
| }, |
| { |
| "epoch": 1.5881205939703014, |
| "grad_norm": 0.24497256736111866, |
| "learning_rate": 1.2344949957516356e-06, |
| "loss": 0.1119, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.5903704814759263, |
| "grad_norm": 0.2539761952183662, |
| "learning_rate": 1.221602914704862e-06, |
| "loss": 0.1219, |
| "step": 3535 |
| }, |
| { |
| "epoch": 1.5926203689815508, |
| "grad_norm": 0.23677806854402075, |
| "learning_rate": 1.2087691303599109e-06, |
| "loss": 0.1131, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.5948702564871757, |
| "grad_norm": 0.28943703222233913, |
| "learning_rate": 1.1959938407286099e-06, |
| "loss": 0.1265, |
| "step": 3545 |
| }, |
| { |
| "epoch": 1.5971201439928002, |
| "grad_norm": 0.26936033145050353, |
| "learning_rate": 1.1832772429202716e-06, |
| "loss": 0.1155, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.5993700314984252, |
| "grad_norm": 0.24859595390547068, |
| "learning_rate": 1.1706195331386494e-06, |
| "loss": 0.1319, |
| "step": 3555 |
| }, |
| { |
| "epoch": 1.6016199190040497, |
| "grad_norm": 0.21137786230401104, |
| "learning_rate": 1.1580209066789272e-06, |
| "loss": 0.0959, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.6038698065096746, |
| "grad_norm": 0.18826307259382147, |
| "learning_rate": 1.1454815579246874e-06, |
| "loss": 0.1162, |
| "step": 3565 |
| }, |
| { |
| "epoch": 1.606119694015299, |
| "grad_norm": 0.2059875775498964, |
| "learning_rate": 1.1330016803449224e-06, |
| "loss": 0.1079, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.608369581520924, |
| "grad_norm": 0.28679664201908944, |
| "learning_rate": 1.1205814664910464e-06, |
| "loss": 0.1323, |
| "step": 3575 |
| }, |
| { |
| "epoch": 1.6106194690265485, |
| "grad_norm": 0.23898491505271052, |
| "learning_rate": 1.1082211079939248e-06, |
| "loss": 0.1, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.6128693565321734, |
| "grad_norm": 0.2700410969754371, |
| "learning_rate": 1.0959207955609163e-06, |
| "loss": 0.107, |
| "step": 3585 |
| }, |
| { |
| "epoch": 1.6151192440377982, |
| "grad_norm": 0.21154102841364958, |
| "learning_rate": 1.083680718972938e-06, |
| "loss": 0.1126, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.6173691315434229, |
| "grad_norm": 0.19582804111079785, |
| "learning_rate": 1.0715010670815212e-06, |
| "loss": 0.1111, |
| "step": 3595 |
| }, |
| { |
| "epoch": 1.6196190190490476, |
| "grad_norm": 0.2154658262674778, |
| "learning_rate": 1.059382027805914e-06, |
| "loss": 0.1025, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.6218689065546723, |
| "grad_norm": 0.30677049526532074, |
| "learning_rate": 1.0473237881301763e-06, |
| "loss": 0.1201, |
| "step": 3605 |
| }, |
| { |
| "epoch": 1.624118794060297, |
| "grad_norm": 0.23614701148998188, |
| "learning_rate": 1.0353265341002916e-06, |
| "loss": 0.104, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.6263686815659217, |
| "grad_norm": 0.24690793627028748, |
| "learning_rate": 1.0233904508212955e-06, |
| "loss": 0.1078, |
| "step": 3615 |
| }, |
| { |
| "epoch": 1.6286185690715465, |
| "grad_norm": 0.31258122069910355, |
| "learning_rate": 1.0115157224544313e-06, |
| "loss": 0.1036, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.6308684565771712, |
| "grad_norm": 0.23164502515145138, |
| "learning_rate": 9.997025322142934e-07, |
| "loss": 0.1082, |
| "step": 3625 |
| }, |
| { |
| "epoch": 1.6331183440827959, |
| "grad_norm": 0.24106743301610264, |
| "learning_rate": 9.87951062366011e-07, |
| "loss": 0.1197, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.6353682315884206, |
| "grad_norm": 0.26573732918101894, |
| "learning_rate": 9.762614942224312e-07, |
| "loss": 0.1205, |
| "step": 3635 |
| }, |
| { |
| "epoch": 1.6376181190940453, |
| "grad_norm": 0.11482579343049812, |
| "learning_rate": 9.646340081413225e-07, |
| "loss": 0.0915, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.63986800659967, |
| "grad_norm": 0.21075003083613178, |
| "learning_rate": 9.530687835225916e-07, |
| "loss": 0.097, |
| "step": 3645 |
| }, |
| { |
| "epoch": 1.6421178941052947, |
| "grad_norm": 0.23753636897967206, |
| "learning_rate": 9.415659988055215e-07, |
| "loss": 0.1042, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.6443677816109195, |
| "grad_norm": 0.24065236956197258, |
| "learning_rate": 9.30125831466005e-07, |
| "loss": 0.1021, |
| "step": 3655 |
| }, |
| { |
| "epoch": 1.6466176691165442, |
| "grad_norm": 0.28653116996485667, |
| "learning_rate": 9.187484580138184e-07, |
| "loss": 0.1153, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.6488675566221689, |
| "grad_norm": 0.2563141158700858, |
| "learning_rate": 9.074340539898962e-07, |
| "loss": 0.1106, |
| "step": 3665 |
| }, |
| { |
| "epoch": 1.6511174441277936, |
| "grad_norm": 0.29385405444476315, |
| "learning_rate": 8.961827939636198e-07, |
| "loss": 0.1087, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.6533673316334183, |
| "grad_norm": 0.2631541250069433, |
| "learning_rate": 8.849948515301188e-07, |
| "loss": 0.0978, |
| "step": 3675 |
| }, |
| { |
| "epoch": 1.655617219139043, |
| "grad_norm": 0.2722585679724958, |
| "learning_rate": 8.738703993076087e-07, |
| "loss": 0.109, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.6578671066446677, |
| "grad_norm": 0.3065358825170482, |
| "learning_rate": 8.62809608934711e-07, |
| "loss": 0.1019, |
| "step": 3685 |
| }, |
| { |
| "epoch": 1.6601169941502925, |
| "grad_norm": 0.23763230365231583, |
| "learning_rate": 8.518126510678138e-07, |
| "loss": 0.1138, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.6623668816559172, |
| "grad_norm": 0.2532402386408982, |
| "learning_rate": 8.408796953784365e-07, |
| "loss": 0.1102, |
| "step": 3695 |
| }, |
| { |
| "epoch": 1.6646167691615419, |
| "grad_norm": 0.23005454958970656, |
| "learning_rate": 8.30010910550611e-07, |
| "loss": 0.1017, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.6668666566671666, |
| "grad_norm": 0.2194832160899072, |
| "learning_rate": 8.19206464278281e-07, |
| "loss": 0.0985, |
| "step": 3705 |
| }, |
| { |
| "epoch": 1.6691165441727913, |
| "grad_norm": 0.32261159442961446, |
| "learning_rate": 8.084665232627165e-07, |
| "loss": 0.1115, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.671366431678416, |
| "grad_norm": 0.28020915769071963, |
| "learning_rate": 7.977912532099336e-07, |
| "loss": 0.1072, |
| "step": 3715 |
| }, |
| { |
| "epoch": 1.6736163191840407, |
| "grad_norm": 0.2587579519713862, |
| "learning_rate": 7.871808188281461e-07, |
| "loss": 0.0884, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.6758662066896655, |
| "grad_norm": 0.25937560152984207, |
| "learning_rate": 7.766353838252227e-07, |
| "loss": 0.0963, |
| "step": 3725 |
| }, |
| { |
| "epoch": 1.6781160941952904, |
| "grad_norm": 0.2623209006276337, |
| "learning_rate": 7.661551109061593e-07, |
| "loss": 0.0945, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.680365981700915, |
| "grad_norm": 0.2734268883455671, |
| "learning_rate": 7.557401617705673e-07, |
| "loss": 0.0962, |
| "step": 3735 |
| }, |
| { |
| "epoch": 1.6826158692065398, |
| "grad_norm": 0.2600567478099387, |
| "learning_rate": 7.453906971101826e-07, |
| "loss": 0.0965, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.6848657567121643, |
| "grad_norm": 0.21549655386834185, |
| "learning_rate": 7.35106876606384e-07, |
| "loss": 0.0802, |
| "step": 3745 |
| }, |
| { |
| "epoch": 1.6871156442177893, |
| "grad_norm": 0.23243787528580465, |
| "learning_rate": 7.248888589277275e-07, |
| "loss": 0.0979, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.6893655317234137, |
| "grad_norm": 0.2724000087724297, |
| "learning_rate": 7.147368017275075e-07, |
| "loss": 0.0954, |
| "step": 3755 |
| }, |
| { |
| "epoch": 1.6916154192290387, |
| "grad_norm": 0.27067809227580786, |
| "learning_rate": 7.046508616413078e-07, |
| "loss": 0.0921, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.6938653067346632, |
| "grad_norm": 0.2211597100684428, |
| "learning_rate": 6.946311942846002e-07, |
| "loss": 0.1051, |
| "step": 3765 |
| }, |
| { |
| "epoch": 1.6961151942402881, |
| "grad_norm": 0.24002929405082607, |
| "learning_rate": 6.846779542503384e-07, |
| "loss": 0.0899, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.6983650817459126, |
| "grad_norm": 0.19994511733272957, |
| "learning_rate": 6.747912951065722e-07, |
| "loss": 0.0914, |
| "step": 3775 |
| }, |
| { |
| "epoch": 1.7006149692515375, |
| "grad_norm": 0.3127468963077912, |
| "learning_rate": 6.649713693940718e-07, |
| "loss": 0.1032, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.702864856757162, |
| "grad_norm": 0.23642047104133684, |
| "learning_rate": 6.552183286239899e-07, |
| "loss": 0.087, |
| "step": 3785 |
| }, |
| { |
| "epoch": 1.705114744262787, |
| "grad_norm": 0.2068805673647048, |
| "learning_rate": 6.455323232755095e-07, |
| "loss": 0.093, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.7073646317684115, |
| "grad_norm": 0.26466757083784725, |
| "learning_rate": 6.35913502793527e-07, |
| "loss": 0.0857, |
| "step": 3795 |
| }, |
| { |
| "epoch": 1.7096145192740364, |
| "grad_norm": 0.20726909159845547, |
| "learning_rate": 6.263620155863492e-07, |
| "loss": 0.0863, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.711864406779661, |
| "grad_norm": 0.21055369695393691, |
| "learning_rate": 6.168780090233994e-07, |
| "loss": 0.0916, |
| "step": 3805 |
| }, |
| { |
| "epoch": 1.7141142942852858, |
| "grad_norm": 0.2778245613335268, |
| "learning_rate": 6.07461629432945e-07, |
| "loss": 0.0917, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.7163641817909103, |
| "grad_norm": 0.1999858453456665, |
| "learning_rate": 5.981130220998444e-07, |
| "loss": 0.0746, |
| "step": 3815 |
| }, |
| { |
| "epoch": 1.7186140692965353, |
| "grad_norm": 0.19402830775044652, |
| "learning_rate": 5.888323312632948e-07, |
| "loss": 0.094, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.7208639568021598, |
| "grad_norm": 0.26795633448623635, |
| "learning_rate": 5.796197001146164e-07, |
| "loss": 0.0884, |
| "step": 3825 |
| }, |
| { |
| "epoch": 1.7231138443077847, |
| "grad_norm": 0.2390770570872304, |
| "learning_rate": 5.704752707950412e-07, |
| "loss": 0.0905, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.7253637318134092, |
| "grad_norm": 0.16942131967267335, |
| "learning_rate": 5.613991843935179e-07, |
| "loss": 0.0827, |
| "step": 3835 |
| }, |
| { |
| "epoch": 1.7276136193190341, |
| "grad_norm": 0.2027819269347922, |
| "learning_rate": 5.523915809445313e-07, |
| "loss": 0.0832, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.7298635068246586, |
| "grad_norm": 0.22046848444535852, |
| "learning_rate": 5.434525994259531e-07, |
| "loss": 0.0886, |
| "step": 3845 |
| }, |
| { |
| "epoch": 1.7321133943302836, |
| "grad_norm": 0.2595049287219421, |
| "learning_rate": 5.345823777568859e-07, |
| "loss": 0.0937, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.734363281835908, |
| "grad_norm": 0.24929926941084485, |
| "learning_rate": 5.25781052795541e-07, |
| "loss": 0.0787, |
| "step": 3855 |
| }, |
| { |
| "epoch": 1.736613169341533, |
| "grad_norm": 0.24888880504119226, |
| "learning_rate": 5.170487603371266e-07, |
| "loss": 0.0845, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.7388630568471577, |
| "grad_norm": 0.2604651193142029, |
| "learning_rate": 5.083856351117511e-07, |
| "loss": 0.0786, |
| "step": 3865 |
| }, |
| { |
| "epoch": 1.7411129443527824, |
| "grad_norm": 0.20003829357925593, |
| "learning_rate": 4.997918107823446e-07, |
| "loss": 0.08, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.7433628318584071, |
| "grad_norm": 0.2051972235262297, |
| "learning_rate": 4.912674199425999e-07, |
| "loss": 0.0853, |
| "step": 3875 |
| }, |
| { |
| "epoch": 1.7456127193640318, |
| "grad_norm": 0.2549934939375718, |
| "learning_rate": 4.828125941149197e-07, |
| "loss": 0.0844, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.7478626068696566, |
| "grad_norm": 0.3024383705811877, |
| "learning_rate": 4.7442746374839363e-07, |
| "loss": 0.0846, |
| "step": 3885 |
| }, |
| { |
| "epoch": 1.7501124943752813, |
| "grad_norm": 0.21907085756014216, |
| "learning_rate": 4.6611215821678546e-07, |
| "loss": 0.0839, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.752362381880906, |
| "grad_norm": 0.2241634174428953, |
| "learning_rate": 4.578668058165325e-07, |
| "loss": 0.0758, |
| "step": 3895 |
| }, |
| { |
| "epoch": 1.7546122693865307, |
| "grad_norm": 0.26223122192387566, |
| "learning_rate": 4.4969153376476726e-07, |
| "loss": 0.0814, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.7568621568921554, |
| "grad_norm": 0.3009548280743066, |
| "learning_rate": 4.415864681973608e-07, |
| "loss": 0.079, |
| "step": 3905 |
| }, |
| { |
| "epoch": 1.7591120443977801, |
| "grad_norm": 0.2282784959309563, |
| "learning_rate": 4.335517341669676e-07, |
| "loss": 0.084, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.7613619319034048, |
| "grad_norm": 0.2736169743202772, |
| "learning_rate": 4.255874556411016e-07, |
| "loss": 0.0845, |
| "step": 3915 |
| }, |
| { |
| "epoch": 1.7636118194090296, |
| "grad_norm": 0.2273826431677496, |
| "learning_rate": 4.176937555002231e-07, |
| "loss": 0.0789, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.7658617069146543, |
| "grad_norm": 0.2562359538975016, |
| "learning_rate": 4.098707555358411e-07, |
| "loss": 0.0841, |
| "step": 3925 |
| }, |
| { |
| "epoch": 1.768111594420279, |
| "grad_norm": 0.20875979878240594, |
| "learning_rate": 4.0211857644863404e-07, |
| "loss": 0.0868, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.7703614819259037, |
| "grad_norm": 0.23466550669048516, |
| "learning_rate": 3.9443733784659324e-07, |
| "loss": 0.0863, |
| "step": 3935 |
| }, |
| { |
| "epoch": 1.7726113694315284, |
| "grad_norm": 0.22510691667210447, |
| "learning_rate": 3.8682715824316594e-07, |
| "loss": 0.0966, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.7748612569371531, |
| "grad_norm": 0.20906650720115227, |
| "learning_rate": 3.792881550554373e-07, |
| "loss": 0.0792, |
| "step": 3945 |
| }, |
| { |
| "epoch": 1.7771111444427778, |
| "grad_norm": 0.27079014394170864, |
| "learning_rate": 3.7182044460231605e-07, |
| "loss": 0.0793, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.7793610319484026, |
| "grad_norm": 0.1857139907781371, |
| "learning_rate": 3.6442414210273834e-07, |
| "loss": 0.0798, |
| "step": 3955 |
| }, |
| { |
| "epoch": 1.7816109194540273, |
| "grad_norm": 0.2556479800636284, |
| "learning_rate": 3.570993616738866e-07, |
| "loss": 0.0848, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.783860806959652, |
| "grad_norm": 0.20166968130742072, |
| "learning_rate": 3.498462163294386e-07, |
| "loss": 0.0811, |
| "step": 3965 |
| }, |
| { |
| "epoch": 1.7861106944652767, |
| "grad_norm": 0.2536060487731229, |
| "learning_rate": 3.426648179778147e-07, |
| "loss": 0.0953, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.7883605819709014, |
| "grad_norm": 0.2112175288890015, |
| "learning_rate": 3.355552774204551e-07, |
| "loss": 0.0762, |
| "step": 3975 |
| }, |
| { |
| "epoch": 1.7906104694765261, |
| "grad_norm": 0.2295537149666403, |
| "learning_rate": 3.2851770435010864e-07, |
| "loss": 0.0767, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.7928603569821508, |
| "grad_norm": 0.2866042831708544, |
| "learning_rate": 3.215522073491434e-07, |
| "loss": 0.0822, |
| "step": 3985 |
| }, |
| { |
| "epoch": 1.7951102444877756, |
| "grad_norm": 0.21056995297624528, |
| "learning_rate": 3.1465889388786697e-07, |
| "loss": 0.0884, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.7973601319934003, |
| "grad_norm": 0.2722790864581489, |
| "learning_rate": 3.0783787032287407e-07, |
| "loss": 0.0881, |
| "step": 3995 |
| }, |
| { |
| "epoch": 1.799610019499025, |
| "grad_norm": 0.2828340673761126, |
| "learning_rate": 3.010892418953981e-07, |
| "loss": 0.0791, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.80185990700465, |
| "grad_norm": 0.2144007429401686, |
| "learning_rate": 2.9441311272969343e-07, |
| "loss": 0.067, |
| "step": 4005 |
| }, |
| { |
| "epoch": 1.8041097945102744, |
| "grad_norm": 0.21922591950990084, |
| "learning_rate": 2.878095858314278e-07, |
| "loss": 0.0702, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.8063596820158994, |
| "grad_norm": 0.20896747701719126, |
| "learning_rate": 2.812787630860919e-07, |
| "loss": 0.078, |
| "step": 4015 |
| }, |
| { |
| "epoch": 1.8086095695215239, |
| "grad_norm": 0.21476087685701412, |
| "learning_rate": 2.7482074525742477e-07, |
| "loss": 0.0688, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.8108594570271488, |
| "grad_norm": 0.24301133633323727, |
| "learning_rate": 2.6843563198586553e-07, |
| "loss": 0.0804, |
| "step": 4025 |
| }, |
| { |
| "epoch": 1.8131093445327733, |
| "grad_norm": 0.23191122306412676, |
| "learning_rate": 2.621235217870116e-07, |
| "loss": 0.0861, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.8153592320383982, |
| "grad_norm": 0.207067846018882, |
| "learning_rate": 2.55884512050098e-07, |
| "loss": 0.0886, |
| "step": 4035 |
| }, |
| { |
| "epoch": 1.8176091195440227, |
| "grad_norm": 0.22298796620779232, |
| "learning_rate": 2.4971869903649916e-07, |
| "loss": 0.0841, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.8198590070496476, |
| "grad_norm": 0.2933340830070678, |
| "learning_rate": 2.436261778782378e-07, |
| "loss": 0.0794, |
| "step": 4045 |
| }, |
| { |
| "epoch": 1.8221088945552721, |
| "grad_norm": 0.21972032956327708, |
| "learning_rate": 2.3760704257652145e-07, |
| "loss": 0.0774, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.824358782060897, |
| "grad_norm": 0.2597840708263632, |
| "learning_rate": 2.3166138600029198e-07, |
| "loss": 0.0772, |
| "step": 4055 |
| }, |
| { |
| "epoch": 1.8266086695665216, |
| "grad_norm": 0.24945833711183132, |
| "learning_rate": 2.257892998847916e-07, |
| "loss": 0.0758, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.8288585570721465, |
| "grad_norm": 0.2108872276998458, |
| "learning_rate": 2.1999087483014437e-07, |
| "loss": 0.0742, |
| "step": 4065 |
| }, |
| { |
| "epoch": 1.831108444577771, |
| "grad_norm": 0.2561087507310594, |
| "learning_rate": 2.1426620029996516e-07, |
| "loss": 0.078, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.833358332083396, |
| "grad_norm": 0.18787526118382977, |
| "learning_rate": 2.08615364619974e-07, |
| "loss": 0.0679, |
| "step": 4075 |
| }, |
| { |
| "epoch": 1.8356082195890204, |
| "grad_norm": 0.224194236181041, |
| "learning_rate": 2.0303845497663566e-07, |
| "loss": 0.0746, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.8378581070946454, |
| "grad_norm": 0.2392479281560687, |
| "learning_rate": 1.9753555741581277e-07, |
| "loss": 0.0764, |
| "step": 4085 |
| }, |
| { |
| "epoch": 1.8401079946002699, |
| "grad_norm": 0.21912472032156466, |
| "learning_rate": 1.921067568414403e-07, |
| "loss": 0.079, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.8423578821058948, |
| "grad_norm": 0.25670091205707113, |
| "learning_rate": 1.8675213701421223e-07, |
| "loss": 0.0835, |
| "step": 4095 |
| }, |
| { |
| "epoch": 1.8446077696115193, |
| "grad_norm": 0.2594177639066407, |
| "learning_rate": 1.814717805502958e-07, |
| "loss": 0.0803, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.8468576571171442, |
| "grad_norm": 0.21469179882031758, |
| "learning_rate": 1.762657689200481e-07, |
| "loss": 0.0764, |
| "step": 4105 |
| }, |
| { |
| "epoch": 1.8491075446227687, |
| "grad_norm": 0.2209544552641529, |
| "learning_rate": 1.7113418244676493e-07, |
| "loss": 0.0785, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.8513574321283937, |
| "grad_norm": 0.24145566509686753, |
| "learning_rate": 1.6607710030544122e-07, |
| "loss": 0.0719, |
| "step": 4115 |
| }, |
| { |
| "epoch": 1.8536073196340181, |
| "grad_norm": 0.2093424795846333, |
| "learning_rate": 1.6109460052154802e-07, |
| "loss": 0.0764, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.855857207139643, |
| "grad_norm": 0.24494002119656788, |
| "learning_rate": 1.561867599698258e-07, |
| "loss": 0.0798, |
| "step": 4125 |
| }, |
| { |
| "epoch": 1.8581070946452676, |
| "grad_norm": 0.22975170782618237, |
| "learning_rate": 1.5135365437310534e-07, |
| "loss": 0.0837, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.8603569821508925, |
| "grad_norm": 0.23509810998937047, |
| "learning_rate": 1.4659535830113368e-07, |
| "loss": 0.0784, |
| "step": 4135 |
| }, |
| { |
| "epoch": 1.8626068696565172, |
| "grad_norm": 0.21476403073025796, |
| "learning_rate": 1.419119451694262e-07, |
| "loss": 0.0735, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.864856757162142, |
| "grad_norm": 0.206525508501757, |
| "learning_rate": 1.3730348723813181e-07, |
| "loss": 0.0693, |
| "step": 4145 |
| }, |
| { |
| "epoch": 1.8671066446677667, |
| "grad_norm": 0.23677883602034755, |
| "learning_rate": 1.3277005561092016e-07, |
| "loss": 0.0765, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.8693565321733914, |
| "grad_norm": 0.22864240045396528, |
| "learning_rate": 1.2831172023388349e-07, |
| "loss": 0.0682, |
| "step": 4155 |
| }, |
| { |
| "epoch": 1.871606419679016, |
| "grad_norm": 0.23988380079630575, |
| "learning_rate": 1.2392854989445925e-07, |
| "loss": 0.0792, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.8738563071846408, |
| "grad_norm": 0.21420842660768485, |
| "learning_rate": 1.196206122203647e-07, |
| "loss": 0.0723, |
| "step": 4165 |
| }, |
| { |
| "epoch": 1.8761061946902655, |
| "grad_norm": 0.23599002078153936, |
| "learning_rate": 1.153879736785568e-07, |
| "loss": 0.0745, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.8783560821958902, |
| "grad_norm": 0.19939870757943454, |
| "learning_rate": 1.112306995742074e-07, |
| "loss": 0.0764, |
| "step": 4175 |
| }, |
| { |
| "epoch": 1.880605969701515, |
| "grad_norm": 0.3002967324611831, |
| "learning_rate": 1.0714885404969288e-07, |
| "loss": 0.0745, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.8828558572071397, |
| "grad_norm": 0.2534936155963215, |
| "learning_rate": 1.031425000836056e-07, |
| "loss": 0.0805, |
| "step": 4185 |
| }, |
| { |
| "epoch": 1.8851057447127644, |
| "grad_norm": 0.2099770583680312, |
| "learning_rate": 9.921169948978293e-08, |
| "loss": 0.0696, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.887355632218389, |
| "grad_norm": 0.21406906185927677, |
| "learning_rate": 9.535651291635362e-08, |
| "loss": 0.0695, |
| "step": 4195 |
| }, |
| { |
| "epoch": 1.8896055197240138, |
| "grad_norm": 0.222365141419886, |
| "learning_rate": 9.157699984480018e-08, |
| "loss": 0.0747, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.8918554072296385, |
| "grad_norm": 0.26953866043532604, |
| "learning_rate": 8.787321858904241e-08, |
| "loss": 0.0707, |
| "step": 4205 |
| }, |
| { |
| "epoch": 1.8941052947352632, |
| "grad_norm": 0.2625647283056072, |
| "learning_rate": 8.424522629453924e-08, |
| "loss": 0.0743, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.896355182240888, |
| "grad_norm": 0.23612000625927326, |
| "learning_rate": 8.06930789374033e-08, |
| "loss": 0.0763, |
| "step": 4215 |
| }, |
| { |
| "epoch": 1.8986050697465127, |
| "grad_norm": 0.20902097825740532, |
| "learning_rate": 7.721683132354163e-08, |
| "loss": 0.0744, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.9008549572521374, |
| "grad_norm": 0.24284466003504482, |
| "learning_rate": 7.381653708780578e-08, |
| "loss": 0.0741, |
| "step": 4225 |
| }, |
| { |
| "epoch": 1.903104844757762, |
| "grad_norm": 0.208669560652797, |
| "learning_rate": 7.049224869316807e-08, |
| "loss": 0.0711, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.9053547322633868, |
| "grad_norm": 0.2250655987088142, |
| "learning_rate": 6.724401742990993e-08, |
| "loss": 0.0689, |
| "step": 4235 |
| }, |
| { |
| "epoch": 1.9076046197690115, |
| "grad_norm": 0.21694307565283746, |
| "learning_rate": 6.407189341483044e-08, |
| "loss": 0.0761, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.9098545072746362, |
| "grad_norm": 0.19311480533839126, |
| "learning_rate": 6.097592559047405e-08, |
| "loss": 0.0743, |
| "step": 4245 |
| }, |
| { |
| "epoch": 1.912104394780261, |
| "grad_norm": 0.18525264406718234, |
| "learning_rate": 5.795616172437624e-08, |
| "loss": 0.0725, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.9143542822858857, |
| "grad_norm": 0.1651176792199085, |
| "learning_rate": 5.501264840832299e-08, |
| "loss": 0.0761, |
| "step": 4255 |
| }, |
| { |
| "epoch": 1.9166041697915104, |
| "grad_norm": 0.2260442730154628, |
| "learning_rate": 5.214543105763692e-08, |
| "loss": 0.0889, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.918854057297135, |
| "grad_norm": 0.18715741530378352, |
| "learning_rate": 4.935455391047228e-08, |
| "loss": 0.0663, |
| "step": 4265 |
| }, |
| { |
| "epoch": 1.9211039448027598, |
| "grad_norm": 0.27005890292615836, |
| "learning_rate": 4.664006002713495e-08, |
| "loss": 0.0728, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.9233538323083845, |
| "grad_norm": 0.2849847348297522, |
| "learning_rate": 4.400199128941573e-08, |
| "loss": 0.077, |
| "step": 4275 |
| }, |
| { |
| "epoch": 1.9256037198140095, |
| "grad_norm": 0.21209255590676854, |
| "learning_rate": 4.1440388399948686e-08, |
| "loss": 0.0668, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.927853607319634, |
| "grad_norm": 0.24917025560084188, |
| "learning_rate": 3.8955290881576566e-08, |
| "loss": 0.0731, |
| "step": 4285 |
| }, |
| { |
| "epoch": 1.930103494825259, |
| "grad_norm": 0.18031075827239754, |
| "learning_rate": 3.654673707674639e-08, |
| "loss": 0.0755, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.9323533823308834, |
| "grad_norm": 0.29159777908954887, |
| "learning_rate": 3.4214764146915936e-08, |
| "loss": 0.0771, |
| "step": 4295 |
| }, |
| { |
| "epoch": 1.9346032698365083, |
| "grad_norm": 0.2515730214349518, |
| "learning_rate": 3.195940807198039e-08, |
| "loss": 0.0718, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.9368531573421328, |
| "grad_norm": 0.2359716229508164, |
| "learning_rate": 2.9780703649716637e-08, |
| "loss": 0.075, |
| "step": 4305 |
| }, |
| { |
| "epoch": 1.9391030448477578, |
| "grad_norm": 0.2279891015303591, |
| "learning_rate": 2.767868449524813e-08, |
| "loss": 0.071, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.9413529323533822, |
| "grad_norm": 0.24505091349172803, |
| "learning_rate": 2.5653383040524228e-08, |
| "loss": 0.074, |
| "step": 4315 |
| }, |
| { |
| "epoch": 1.9436028198590072, |
| "grad_norm": 0.2478550376131261, |
| "learning_rate": 2.370483053382111e-08, |
| "loss": 0.0771, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.9458527073646317, |
| "grad_norm": 0.2193182868657674, |
| "learning_rate": 2.183305703925831e-08, |
| "loss": 0.0756, |
| "step": 4325 |
| }, |
| { |
| "epoch": 1.9481025948702566, |
| "grad_norm": 0.2746040123293314, |
| "learning_rate": 2.0038091436337392e-08, |
| "loss": 0.0684, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.950352482375881, |
| "grad_norm": 0.27001950154682536, |
| "learning_rate": 1.8319961419493436e-08, |
| "loss": 0.0776, |
| "step": 4335 |
| }, |
| { |
| "epoch": 1.952602369881506, |
| "grad_norm": 0.17878344688895553, |
| "learning_rate": 1.667869349766982e-08, |
| "loss": 0.0684, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.9548522573871305, |
| "grad_norm": 0.2242691348688971, |
| "learning_rate": 1.5114312993908532e-08, |
| "loss": 0.0775, |
| "step": 4345 |
| }, |
| { |
| "epoch": 1.9571021448927555, |
| "grad_norm": 0.30659381638740507, |
| "learning_rate": 1.3626844044957733e-08, |
| "loss": 0.0742, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.95935203239838, |
| "grad_norm": 0.22927922548010668, |
| "learning_rate": 1.2216309600903142e-08, |
| "loss": 0.0758, |
| "step": 4355 |
| }, |
| { |
| "epoch": 1.961601919904005, |
| "grad_norm": 0.24862858256846118, |
| "learning_rate": 1.088273142481111e-08, |
| "loss": 0.0716, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.9638518074096294, |
| "grad_norm": 0.18094046454860382, |
| "learning_rate": 9.626130092393326e-09, |
| "loss": 0.0708, |
| "step": 4365 |
| }, |
| { |
| "epoch": 1.9661016949152543, |
| "grad_norm": 0.22402544663253343, |
| "learning_rate": 8.446524991689298e-09, |
| "loss": 0.0724, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.9683515824208788, |
| "grad_norm": 0.1742440920374198, |
| "learning_rate": 7.343934322767699e-09, |
| "loss": 0.0776, |
| "step": 4375 |
| }, |
| { |
| "epoch": 1.9706014699265038, |
| "grad_norm": 0.22337791056104178, |
| "learning_rate": 6.318375097446039e-09, |
| "loss": 0.0662, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.9728513574321282, |
| "grad_norm": 0.20821619733680874, |
| "learning_rate": 5.369863139026432e-09, |
| "loss": 0.0823, |
| "step": 4385 |
| }, |
| { |
| "epoch": 1.9751012449377532, |
| "grad_norm": 0.28459129908034536, |
| "learning_rate": 4.498413082053566e-09, |
| "loss": 0.0847, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.9773511324433777, |
| "grad_norm": 0.2487528636409247, |
| "learning_rate": 3.704038372085994e-09, |
| "loss": 0.0812, |
| "step": 4395 |
| }, |
| { |
| "epoch": 1.9796010199490026, |
| "grad_norm": 0.20877793863085078, |
| "learning_rate": 2.986751265493526e-09, |
| "loss": 0.0726, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.981850907454627, |
| "grad_norm": 0.2843353007301253, |
| "learning_rate": 2.3465628292623776e-09, |
| "loss": 0.0696, |
| "step": 4405 |
| }, |
| { |
| "epoch": 1.984100794960252, |
| "grad_norm": 0.20964930087522637, |
| "learning_rate": 1.7834829408286402e-09, |
| "loss": 0.0669, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.9863506824658768, |
| "grad_norm": 0.18510001166373868, |
| "learning_rate": 1.297520287923404e-09, |
| "loss": 0.0732, |
| "step": 4415 |
| }, |
| { |
| "epoch": 1.9886005699715015, |
| "grad_norm": 0.1927246478831311, |
| "learning_rate": 8.886823684417512e-10, |
| "loss": 0.0798, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.9908504574771262, |
| "grad_norm": 0.1773427078092757, |
| "learning_rate": 5.56975490322853e-10, |
| "loss": 0.0759, |
| "step": 4425 |
| }, |
| { |
| "epoch": 1.993100344982751, |
| "grad_norm": 0.2305382589496537, |
| "learning_rate": 3.0240477145559997e-10, |
| "loss": 0.0862, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.9953502324883756, |
| "grad_norm": 0.28373308100373656, |
| "learning_rate": 1.24974139599221e-10, |
| "loss": 0.0769, |
| "step": 4435 |
| }, |
| { |
| "epoch": 1.9976001199940003, |
| "grad_norm": 0.19382710057534525, |
| "learning_rate": 2.4686332322221286e-11, |
| "loss": 0.0755, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.9994000299985002, |
| "eval_loss": 0.08069541305303574, |
| "eval_runtime": 54.013, |
| "eval_samples_per_second": 19.958, |
| "eval_steps_per_second": 4.999, |
| "step": 4444 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 4444, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2064109084748546e+19, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|