| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9994000299985002, | |
| "eval_steps": 500, | |
| "global_step": 4444, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00044997750112494374, | |
| "grad_norm": 4.529098245180452, | |
| "learning_rate": 2.2471910112359554e-08, | |
| "loss": 2.198, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0022498875056247186, | |
| "grad_norm": 4.089353425911041, | |
| "learning_rate": 1.1235955056179776e-07, | |
| "loss": 2.1345, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.004499775011249437, | |
| "grad_norm": 3.871692712739612, | |
| "learning_rate": 2.247191011235955e-07, | |
| "loss": 2.1219, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.006749662516874156, | |
| "grad_norm": 3.48717161147707, | |
| "learning_rate": 3.3707865168539325e-07, | |
| "loss": 2.1269, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.008999550022498875, | |
| "grad_norm": 2.757793504494978, | |
| "learning_rate": 4.49438202247191e-07, | |
| "loss": 1.9716, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.011249437528123594, | |
| "grad_norm": 1.888825489292899, | |
| "learning_rate": 5.617977528089888e-07, | |
| "loss": 1.8304, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.013499325033748313, | |
| "grad_norm": 1.775898351624132, | |
| "learning_rate": 6.741573033707865e-07, | |
| "loss": 1.5734, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01574921253937303, | |
| "grad_norm": 1.3767992538309424, | |
| "learning_rate": 7.865168539325843e-07, | |
| "loss": 1.3449, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01799910004499775, | |
| "grad_norm": 0.640753938304731, | |
| "learning_rate": 8.98876404494382e-07, | |
| "loss": 1.033, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.020248987550622467, | |
| "grad_norm": 0.5235162453528913, | |
| "learning_rate": 1.01123595505618e-06, | |
| "loss": 0.8949, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.02249887505624719, | |
| "grad_norm": 0.5107836396937878, | |
| "learning_rate": 1.1235955056179777e-06, | |
| "loss": 0.8114, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.024748762561871907, | |
| "grad_norm": 0.4706639289305382, | |
| "learning_rate": 1.2359550561797752e-06, | |
| "loss": 0.7441, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.026998650067496625, | |
| "grad_norm": 0.40589783510545635, | |
| "learning_rate": 1.348314606741573e-06, | |
| "loss": 0.69, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.029248537573121344, | |
| "grad_norm": 0.35422872028585745, | |
| "learning_rate": 1.4606741573033708e-06, | |
| "loss": 0.6444, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.03149842507874606, | |
| "grad_norm": 0.29411265606808407, | |
| "learning_rate": 1.5730337078651686e-06, | |
| "loss": 0.6228, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03374831258437078, | |
| "grad_norm": 0.26693653586148297, | |
| "learning_rate": 1.6853932584269663e-06, | |
| "loss": 0.5582, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0359982000899955, | |
| "grad_norm": 0.2681533501424792, | |
| "learning_rate": 1.797752808988764e-06, | |
| "loss": 0.556, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03824808759562022, | |
| "grad_norm": 0.25571380427813706, | |
| "learning_rate": 1.910112359550562e-06, | |
| "loss": 0.5195, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.040497975101244935, | |
| "grad_norm": 0.2711200571248154, | |
| "learning_rate": 2.02247191011236e-06, | |
| "loss": 0.5143, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.042747862606869656, | |
| "grad_norm": 0.24950121635592964, | |
| "learning_rate": 2.1348314606741574e-06, | |
| "loss": 0.494, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.04499775011249438, | |
| "grad_norm": 0.27150452317695684, | |
| "learning_rate": 2.2471910112359554e-06, | |
| "loss": 0.4789, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04724763761811909, | |
| "grad_norm": 0.28474082113260213, | |
| "learning_rate": 2.359550561797753e-06, | |
| "loss": 0.4491, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.049497525123743814, | |
| "grad_norm": 0.3015594402280247, | |
| "learning_rate": 2.4719101123595505e-06, | |
| "loss": 0.4403, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.05174741262936853, | |
| "grad_norm": 0.30222867324963326, | |
| "learning_rate": 2.584269662921349e-06, | |
| "loss": 0.4484, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.05399730013499325, | |
| "grad_norm": 0.273012018841968, | |
| "learning_rate": 2.696629213483146e-06, | |
| "loss": 0.4261, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05624718764061797, | |
| "grad_norm": 0.26965584672677667, | |
| "learning_rate": 2.8089887640449444e-06, | |
| "loss": 0.4322, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05849707514624269, | |
| "grad_norm": 0.26746073049580904, | |
| "learning_rate": 2.9213483146067416e-06, | |
| "loss": 0.4639, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.06074696265186741, | |
| "grad_norm": 0.23633644420426006, | |
| "learning_rate": 3.03370786516854e-06, | |
| "loss": 0.4599, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.06299685015749212, | |
| "grad_norm": 0.23444605189207685, | |
| "learning_rate": 3.146067415730337e-06, | |
| "loss": 0.3872, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06524673766311684, | |
| "grad_norm": 0.2498458749808252, | |
| "learning_rate": 3.258426966292135e-06, | |
| "loss": 0.4033, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.06749662516874157, | |
| "grad_norm": 0.20527616283239267, | |
| "learning_rate": 3.3707865168539327e-06, | |
| "loss": 0.4029, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06974651267436628, | |
| "grad_norm": 0.1929264923191392, | |
| "learning_rate": 3.4831460674157306e-06, | |
| "loss": 0.3972, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.071996400179991, | |
| "grad_norm": 0.2053416646605216, | |
| "learning_rate": 3.595505617977528e-06, | |
| "loss": 0.4048, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.07424628768561573, | |
| "grad_norm": 0.21703322812039422, | |
| "learning_rate": 3.707865168539326e-06, | |
| "loss": 0.4141, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.07649617519124044, | |
| "grad_norm": 0.1891382535207117, | |
| "learning_rate": 3.820224719101124e-06, | |
| "loss": 0.4136, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07874606269686515, | |
| "grad_norm": 0.147554774836894, | |
| "learning_rate": 3.932584269662922e-06, | |
| "loss": 0.375, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.08099595020248987, | |
| "grad_norm": 0.17905729415958477, | |
| "learning_rate": 4.04494382022472e-06, | |
| "loss": 0.4017, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0832458377081146, | |
| "grad_norm": 0.1505640721404514, | |
| "learning_rate": 4.157303370786518e-06, | |
| "loss": 0.3805, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.08549572521373931, | |
| "grad_norm": 0.1583375307295741, | |
| "learning_rate": 4.269662921348315e-06, | |
| "loss": 0.3782, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08774561271936403, | |
| "grad_norm": 0.18866183228882213, | |
| "learning_rate": 4.382022471910113e-06, | |
| "loss": 0.4021, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.08999550022498876, | |
| "grad_norm": 0.15247202386740694, | |
| "learning_rate": 4.494382022471911e-06, | |
| "loss": 0.3601, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09224538773061347, | |
| "grad_norm": 0.15750805465065473, | |
| "learning_rate": 4.606741573033709e-06, | |
| "loss": 0.3642, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.09449527523623819, | |
| "grad_norm": 0.18964645355569257, | |
| "learning_rate": 4.719101123595506e-06, | |
| "loss": 0.381, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.0967451627418629, | |
| "grad_norm": 0.21688318162459963, | |
| "learning_rate": 4.831460674157304e-06, | |
| "loss": 0.3936, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.09899505024748763, | |
| "grad_norm": 0.1626408315890611, | |
| "learning_rate": 4.943820224719101e-06, | |
| "loss": 0.3836, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.10124493775311234, | |
| "grad_norm": 0.15324258338477442, | |
| "learning_rate": 5.0561797752809e-06, | |
| "loss": 0.3772, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.10349482525873706, | |
| "grad_norm": 0.14183725887256435, | |
| "learning_rate": 5.168539325842698e-06, | |
| "loss": 0.372, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.10574471276436179, | |
| "grad_norm": 0.16218789948449905, | |
| "learning_rate": 5.280898876404494e-06, | |
| "loss": 0.3571, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1079946002699865, | |
| "grad_norm": 0.1542954767008243, | |
| "learning_rate": 5.393258426966292e-06, | |
| "loss": 0.3773, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.11024448777561122, | |
| "grad_norm": 0.1480650113941815, | |
| "learning_rate": 5.50561797752809e-06, | |
| "loss": 0.3679, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.11249437528123594, | |
| "grad_norm": 0.14762694498356746, | |
| "learning_rate": 5.617977528089889e-06, | |
| "loss": 0.3766, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.11474426278686066, | |
| "grad_norm": 0.13168710830003866, | |
| "learning_rate": 5.730337078651685e-06, | |
| "loss": 0.3563, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.11699415029248537, | |
| "grad_norm": 0.12994674295453762, | |
| "learning_rate": 5.842696629213483e-06, | |
| "loss": 0.3835, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.11924403779811009, | |
| "grad_norm": 0.15322453794019458, | |
| "learning_rate": 5.955056179775281e-06, | |
| "loss": 0.3523, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.12149392530373482, | |
| "grad_norm": 0.14048350395524586, | |
| "learning_rate": 6.06741573033708e-06, | |
| "loss": 0.3667, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.12374381280935953, | |
| "grad_norm": 0.12596877364547304, | |
| "learning_rate": 6.179775280898876e-06, | |
| "loss": 0.3675, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.12599370031498425, | |
| "grad_norm": 0.14354283317022593, | |
| "learning_rate": 6.292134831460674e-06, | |
| "loss": 0.4005, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.12824358782060896, | |
| "grad_norm": 0.11630361105168144, | |
| "learning_rate": 6.404494382022472e-06, | |
| "loss": 0.3747, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.13049347532623368, | |
| "grad_norm": 0.13228040085041334, | |
| "learning_rate": 6.51685393258427e-06, | |
| "loss": 0.4077, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.13274336283185842, | |
| "grad_norm": 0.14738366545686615, | |
| "learning_rate": 6.629213483146067e-06, | |
| "loss": 0.3998, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.13499325033748313, | |
| "grad_norm": 0.12791384531800185, | |
| "learning_rate": 6.741573033707865e-06, | |
| "loss": 0.3964, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.13724313784310785, | |
| "grad_norm": 0.14292143339244684, | |
| "learning_rate": 6.853932584269663e-06, | |
| "loss": 0.3665, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.13949302534873256, | |
| "grad_norm": 0.13259028515993362, | |
| "learning_rate": 6.966292134831461e-06, | |
| "loss": 0.3648, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.14174291285435728, | |
| "grad_norm": 0.1292871978326336, | |
| "learning_rate": 7.078651685393258e-06, | |
| "loss": 0.3729, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.143992800359982, | |
| "grad_norm": 0.11157572176169035, | |
| "learning_rate": 7.191011235955056e-06, | |
| "loss": 0.3746, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1462426878656067, | |
| "grad_norm": 0.13354661592196232, | |
| "learning_rate": 7.303370786516854e-06, | |
| "loss": 0.3808, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.14849257537123145, | |
| "grad_norm": 0.1306015247259671, | |
| "learning_rate": 7.415730337078652e-06, | |
| "loss": 0.3581, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.15074246287685616, | |
| "grad_norm": 0.14070851299418752, | |
| "learning_rate": 7.5280898876404495e-06, | |
| "loss": 0.3687, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.15299235038248088, | |
| "grad_norm": 0.12379734686286958, | |
| "learning_rate": 7.640449438202247e-06, | |
| "loss": 0.3663, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1552422378881056, | |
| "grad_norm": 0.10751947723272416, | |
| "learning_rate": 7.752808988764046e-06, | |
| "loss": 0.3852, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.1574921253937303, | |
| "grad_norm": 0.1540237761883997, | |
| "learning_rate": 7.865168539325843e-06, | |
| "loss": 0.3717, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.15974201289935502, | |
| "grad_norm": 0.11180120270561197, | |
| "learning_rate": 7.97752808988764e-06, | |
| "loss": 0.3489, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.16199190040497974, | |
| "grad_norm": 0.12094951392614195, | |
| "learning_rate": 8.08988764044944e-06, | |
| "loss": 0.3732, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.16424178791060448, | |
| "grad_norm": 0.09738867753978302, | |
| "learning_rate": 8.202247191011237e-06, | |
| "loss": 0.377, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.1664916754162292, | |
| "grad_norm": 0.19496877057584272, | |
| "learning_rate": 8.314606741573035e-06, | |
| "loss": 0.3804, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1687415629218539, | |
| "grad_norm": 0.13099392604853372, | |
| "learning_rate": 8.426966292134832e-06, | |
| "loss": 0.3853, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.17099145042747863, | |
| "grad_norm": 0.0929654538179481, | |
| "learning_rate": 8.53932584269663e-06, | |
| "loss": 0.3532, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.17324133793310334, | |
| "grad_norm": 0.12166228073135968, | |
| "learning_rate": 8.651685393258428e-06, | |
| "loss": 0.3629, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.17549122543872805, | |
| "grad_norm": 0.10261562058829776, | |
| "learning_rate": 8.764044943820226e-06, | |
| "loss": 0.3662, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.17774111294435277, | |
| "grad_norm": 0.11229652632991165, | |
| "learning_rate": 8.876404494382023e-06, | |
| "loss": 0.366, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.1799910004499775, | |
| "grad_norm": 0.11438088125544643, | |
| "learning_rate": 8.988764044943822e-06, | |
| "loss": 0.3766, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.18224088795560223, | |
| "grad_norm": 0.11100159715996023, | |
| "learning_rate": 9.101123595505619e-06, | |
| "loss": 0.3149, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.18449077546122694, | |
| "grad_norm": 0.09215304432977561, | |
| "learning_rate": 9.213483146067417e-06, | |
| "loss": 0.3505, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.18674066296685166, | |
| "grad_norm": 0.11027407366862503, | |
| "learning_rate": 9.325842696629213e-06, | |
| "loss": 0.3239, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.18899055047247637, | |
| "grad_norm": 0.10591097871093506, | |
| "learning_rate": 9.438202247191012e-06, | |
| "loss": 0.3763, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.19124043797810109, | |
| "grad_norm": 0.12014034899424982, | |
| "learning_rate": 9.55056179775281e-06, | |
| "loss": 0.387, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.1934903254837258, | |
| "grad_norm": 0.09470189013776839, | |
| "learning_rate": 9.662921348314608e-06, | |
| "loss": 0.379, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.19574021298935054, | |
| "grad_norm": 0.09501092189291689, | |
| "learning_rate": 9.775280898876405e-06, | |
| "loss": 0.3642, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.19799010049497526, | |
| "grad_norm": 0.11021236791362751, | |
| "learning_rate": 9.887640449438202e-06, | |
| "loss": 0.353, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.20023998800059997, | |
| "grad_norm": 0.10184537613196046, | |
| "learning_rate": 1e-05, | |
| "loss": 0.365, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.2024898755062247, | |
| "grad_norm": 0.08309643827704467, | |
| "learning_rate": 9.999961427623602e-06, | |
| "loss": 0.3575, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2047397630118494, | |
| "grad_norm": 0.10037722707892065, | |
| "learning_rate": 9.999845711089533e-06, | |
| "loss": 0.3471, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.20698965051747412, | |
| "grad_norm": 0.11715533210340626, | |
| "learning_rate": 9.999652852183184e-06, | |
| "loss": 0.3714, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.20923953802309886, | |
| "grad_norm": 0.13060134335041548, | |
| "learning_rate": 9.99938285388016e-06, | |
| "loss": 0.3635, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.21148942552872357, | |
| "grad_norm": 0.08745200641347264, | |
| "learning_rate": 9.999035720346254e-06, | |
| "loss": 0.3571, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2137393130343483, | |
| "grad_norm": 0.08966989980740278, | |
| "learning_rate": 9.998611456937373e-06, | |
| "loss": 0.3639, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.215989200539973, | |
| "grad_norm": 0.09956344072812717, | |
| "learning_rate": 9.998110070199454e-06, | |
| "loss": 0.3665, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.21823908804559772, | |
| "grad_norm": 0.09656500042174225, | |
| "learning_rate": 9.997531567868367e-06, | |
| "loss": 0.3726, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.22048897555122243, | |
| "grad_norm": 0.11853509152316483, | |
| "learning_rate": 9.996875958869803e-06, | |
| "loss": 0.3518, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.22273886305684715, | |
| "grad_norm": 0.10059970891055807, | |
| "learning_rate": 9.996143253319113e-06, | |
| "loss": 0.3624, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.2249887505624719, | |
| "grad_norm": 0.10793939615766127, | |
| "learning_rate": 9.995333462521178e-06, | |
| "loss": 0.3654, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2272386380680966, | |
| "grad_norm": 0.10450237535025396, | |
| "learning_rate": 9.99444659897022e-06, | |
| "loss": 0.3663, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.22948852557372132, | |
| "grad_norm": 0.09806526685924745, | |
| "learning_rate": 9.993482676349612e-06, | |
| "loss": 0.342, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.23173841307934603, | |
| "grad_norm": 0.0964872964969597, | |
| "learning_rate": 9.992441709531671e-06, | |
| "loss": 0.3705, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.23398830058497075, | |
| "grad_norm": 0.09412389153440821, | |
| "learning_rate": 9.991323714577421e-06, | |
| "loss": 0.3541, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.23623818809059546, | |
| "grad_norm": 0.08953556655443609, | |
| "learning_rate": 9.99012870873635e-06, | |
| "loss": 0.3521, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.23848807559622018, | |
| "grad_norm": 0.08695345930804899, | |
| "learning_rate": 9.988856710446143e-06, | |
| "loss": 0.3505, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.24073796310184492, | |
| "grad_norm": 0.09543515501943514, | |
| "learning_rate": 9.987507739332401e-06, | |
| "loss": 0.3766, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.24298785060746964, | |
| "grad_norm": 0.08926010106293578, | |
| "learning_rate": 9.986081816208333e-06, | |
| "loss": 0.329, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.24523773811309435, | |
| "grad_norm": 0.08598056512962657, | |
| "learning_rate": 9.984578963074436e-06, | |
| "loss": 0.3617, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.24748762561871906, | |
| "grad_norm": 0.08952811194064599, | |
| "learning_rate": 9.982999203118153e-06, | |
| "loss": 0.3383, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.24973751312434378, | |
| "grad_norm": 0.12658781787185433, | |
| "learning_rate": 9.981342560713528e-06, | |
| "loss": 0.3238, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.2519874006299685, | |
| "grad_norm": 0.09353150867243243, | |
| "learning_rate": 9.979609061420812e-06, | |
| "loss": 0.3545, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.2542372881355932, | |
| "grad_norm": 0.09177651257435882, | |
| "learning_rate": 9.977798731986079e-06, | |
| "loss": 0.3502, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.2564871756412179, | |
| "grad_norm": 0.09932686671141468, | |
| "learning_rate": 9.975911600340814e-06, | |
| "loss": 0.3468, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.25873706314684264, | |
| "grad_norm": 0.08192603238892632, | |
| "learning_rate": 9.973947695601477e-06, | |
| "loss": 0.3324, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.26098695065246735, | |
| "grad_norm": 0.07493334234921131, | |
| "learning_rate": 9.971907048069058e-06, | |
| "loss": 0.3795, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.26323683815809207, | |
| "grad_norm": 0.08138918761115761, | |
| "learning_rate": 9.969789689228606e-06, | |
| "loss": 0.3385, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.26548672566371684, | |
| "grad_norm": 0.08838566706763232, | |
| "learning_rate": 9.967595651748745e-06, | |
| "loss": 0.369, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.26773661316934155, | |
| "grad_norm": 0.08784958596018687, | |
| "learning_rate": 9.965324969481172e-06, | |
| "loss": 0.3169, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.26998650067496627, | |
| "grad_norm": 0.0896168468240925, | |
| "learning_rate": 9.962977677460132e-06, | |
| "loss": 0.3572, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.272236388180591, | |
| "grad_norm": 0.0946662419061461, | |
| "learning_rate": 9.960553811901879e-06, | |
| "loss": 0.385, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.2744862756862157, | |
| "grad_norm": 0.12115400639084788, | |
| "learning_rate": 9.95805341020411e-06, | |
| "loss": 0.3595, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2767361631918404, | |
| "grad_norm": 0.0997799833296398, | |
| "learning_rate": 9.955476510945401e-06, | |
| "loss": 0.3317, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.2789860506974651, | |
| "grad_norm": 0.0996130660835657, | |
| "learning_rate": 9.952823153884606e-06, | |
| "loss": 0.3449, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.28123593820308984, | |
| "grad_norm": 0.07835665128694007, | |
| "learning_rate": 9.950093379960238e-06, | |
| "loss": 0.3397, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.28348582570871456, | |
| "grad_norm": 0.09391607163130151, | |
| "learning_rate": 9.947287231289844e-06, | |
| "loss": 0.3776, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.28573571321433927, | |
| "grad_norm": 0.09176019196191011, | |
| "learning_rate": 9.944404751169353e-06, | |
| "loss": 0.3722, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.287985600719964, | |
| "grad_norm": 0.08406446635852077, | |
| "learning_rate": 9.941445984072408e-06, | |
| "loss": 0.338, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2902354882255887, | |
| "grad_norm": 0.08291987107240674, | |
| "learning_rate": 9.938410975649681e-06, | |
| "loss": 0.3742, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.2924853757312134, | |
| "grad_norm": 0.08633205314263653, | |
| "learning_rate": 9.935299772728166e-06, | |
| "loss": 0.3611, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2947352632368382, | |
| "grad_norm": 0.07331502093091126, | |
| "learning_rate": 9.93211242331046e-06, | |
| "loss": 0.3344, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.2969851507424629, | |
| "grad_norm": 0.08385804833550349, | |
| "learning_rate": 9.92884897657402e-06, | |
| "loss": 0.3557, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2992350382480876, | |
| "grad_norm": 0.07435080817645906, | |
| "learning_rate": 9.925509482870403e-06, | |
| "loss": 0.3405, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.30148492575371233, | |
| "grad_norm": 0.08168530188324026, | |
| "learning_rate": 9.922093993724492e-06, | |
| "loss": 0.3426, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.30373481325933704, | |
| "grad_norm": 0.08035820133808234, | |
| "learning_rate": 9.918602561833702e-06, | |
| "loss": 0.3604, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.30598470076496176, | |
| "grad_norm": 0.08271219072869937, | |
| "learning_rate": 9.91503524106716e-06, | |
| "loss": 0.348, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3082345882705865, | |
| "grad_norm": 0.08921123968472987, | |
| "learning_rate": 9.911392086464886e-06, | |
| "loss": 0.3441, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.3104844757762112, | |
| "grad_norm": 0.0839985353132867, | |
| "learning_rate": 9.907673154236929e-06, | |
| "loss": 0.3574, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3127343632818359, | |
| "grad_norm": 0.10386954688768853, | |
| "learning_rate": 9.903878501762511e-06, | |
| "loss": 0.3286, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.3149842507874606, | |
| "grad_norm": 0.08778681814263677, | |
| "learning_rate": 9.900008187589138e-06, | |
| "loss": 0.3268, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.31723413829308533, | |
| "grad_norm": 0.09027807015137441, | |
| "learning_rate": 9.896062271431697e-06, | |
| "loss": 0.3392, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.31948402579871005, | |
| "grad_norm": 0.09567803807106381, | |
| "learning_rate": 9.89204081417153e-06, | |
| "loss": 0.3539, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.32173391330433476, | |
| "grad_norm": 0.08574167684815145, | |
| "learning_rate": 9.887943877855505e-06, | |
| "loss": 0.3377, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.3239838008099595, | |
| "grad_norm": 0.09260863383057749, | |
| "learning_rate": 9.883771525695052e-06, | |
| "loss": 0.3449, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.32623368831558425, | |
| "grad_norm": 0.08495447140601177, | |
| "learning_rate": 9.879523822065181e-06, | |
| "loss": 0.3219, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.32848357582120896, | |
| "grad_norm": 0.07533141152453762, | |
| "learning_rate": 9.875200832503505e-06, | |
| "loss": 0.3568, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3307334633268337, | |
| "grad_norm": 0.12247315370054979, | |
| "learning_rate": 9.870802623709215e-06, | |
| "loss": 0.3596, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.3329833508324584, | |
| "grad_norm": 0.08738003894579985, | |
| "learning_rate": 9.866329263542055e-06, | |
| "loss": 0.3638, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.3352332383380831, | |
| "grad_norm": 0.08338816245916761, | |
| "learning_rate": 9.861780821021282e-06, | |
| "loss": 0.3561, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.3374831258437078, | |
| "grad_norm": 0.08236575366096931, | |
| "learning_rate": 9.857157366324587e-06, | |
| "loss": 0.3332, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.33973301334933254, | |
| "grad_norm": 0.06944440484574142, | |
| "learning_rate": 9.852458970787027e-06, | |
| "loss": 0.357, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.34198290085495725, | |
| "grad_norm": 0.07253573063652108, | |
| "learning_rate": 9.847685706899913e-06, | |
| "loss": 0.3245, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.34423278836058196, | |
| "grad_norm": 0.07122505571988245, | |
| "learning_rate": 9.842837648309698e-06, | |
| "loss": 0.3528, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.3464826758662067, | |
| "grad_norm": 0.07767571294888054, | |
| "learning_rate": 9.837914869816835e-06, | |
| "loss": 0.3395, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.3487325633718314, | |
| "grad_norm": 0.07346709829835463, | |
| "learning_rate": 9.832917447374637e-06, | |
| "loss": 0.3648, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.3509824508774561, | |
| "grad_norm": 0.08947827115200468, | |
| "learning_rate": 9.827845458088082e-06, | |
| "loss": 0.3521, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3532323383830808, | |
| "grad_norm": 0.06534661518603589, | |
| "learning_rate": 9.822698980212643e-06, | |
| "loss": 0.3366, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.35548222588870554, | |
| "grad_norm": 0.08012548677368805, | |
| "learning_rate": 9.817478093153074e-06, | |
| "loss": 0.3752, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.3577321133943303, | |
| "grad_norm": 0.08284074693974608, | |
| "learning_rate": 9.812182877462182e-06, | |
| "loss": 0.3337, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.359982000899955, | |
| "grad_norm": 0.0962415375604297, | |
| "learning_rate": 9.806813414839588e-06, | |
| "loss": 0.3489, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.36223188840557974, | |
| "grad_norm": 0.08299125800356132, | |
| "learning_rate": 9.801369788130468e-06, | |
| "loss": 0.3466, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.36448177591120445, | |
| "grad_norm": 0.07196876922608039, | |
| "learning_rate": 9.795852081324266e-06, | |
| "loss": 0.3424, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.36673166341682917, | |
| "grad_norm": 0.07251364573785335, | |
| "learning_rate": 9.79026037955341e-06, | |
| "loss": 0.3578, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.3689815509224539, | |
| "grad_norm": 0.07779557103393991, | |
| "learning_rate": 9.784594769091989e-06, | |
| "loss": 0.3616, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.3712314384280786, | |
| "grad_norm": 0.07434071832631806, | |
| "learning_rate": 9.778855337354426e-06, | |
| "loss": 0.3572, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.3734813259337033, | |
| "grad_norm": 0.0761276852235193, | |
| "learning_rate": 9.77304217289413e-06, | |
| "loss": 0.3147, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.375731213439328, | |
| "grad_norm": 0.08043122828466166, | |
| "learning_rate": 9.76715536540213e-06, | |
| "loss": 0.377, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.37798110094495274, | |
| "grad_norm": 0.07418765173136689, | |
| "learning_rate": 9.761195005705685e-06, | |
| "loss": 0.3198, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.38023098845057746, | |
| "grad_norm": 0.08536316659010101, | |
| "learning_rate": 9.755161185766891e-06, | |
| "loss": 0.3324, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.38248087595620217, | |
| "grad_norm": 0.07947600210593922, | |
| "learning_rate": 9.74905399868126e-06, | |
| "loss": 0.3618, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3847307634618269, | |
| "grad_norm": 0.0744113590241544, | |
| "learning_rate": 9.742873538676274e-06, | |
| "loss": 0.3402, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.3869806509674516, | |
| "grad_norm": 0.06709744864423575, | |
| "learning_rate": 9.73661990110995e-06, | |
| "loss": 0.3337, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.38923053847307637, | |
| "grad_norm": 0.07721094678627155, | |
| "learning_rate": 9.73029318246935e-06, | |
| "loss": 0.3473, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.3914804259787011, | |
| "grad_norm": 0.07436794628188735, | |
| "learning_rate": 9.723893480369106e-06, | |
| "loss": 0.3227, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3937303134843258, | |
| "grad_norm": 0.08184087425329187, | |
| "learning_rate": 9.717420893549902e-06, | |
| "loss": 0.3271, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.3959802009899505, | |
| "grad_norm": 0.07323707936362174, | |
| "learning_rate": 9.71087552187696e-06, | |
| "loss": 0.3353, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.39823008849557523, | |
| "grad_norm": 0.07407984516514123, | |
| "learning_rate": 9.7042574663385e-06, | |
| "loss": 0.3405, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.40047997600119994, | |
| "grad_norm": 0.06705082859053621, | |
| "learning_rate": 9.697566829044172e-06, | |
| "loss": 0.3335, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.40272986350682466, | |
| "grad_norm": 0.06417105200135667, | |
| "learning_rate": 9.690803713223485e-06, | |
| "loss": 0.3632, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.4049797510124494, | |
| "grad_norm": 0.07661580482483403, | |
| "learning_rate": 9.68396822322422e-06, | |
| "loss": 0.341, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4072296385180741, | |
| "grad_norm": 0.07783982481846635, | |
| "learning_rate": 9.677060464510817e-06, | |
| "loss": 0.3422, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.4094795260236988, | |
| "grad_norm": 0.07934781483289755, | |
| "learning_rate": 9.670080543662742e-06, | |
| "loss": 0.344, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.4117294135293235, | |
| "grad_norm": 0.07206722738626223, | |
| "learning_rate": 9.663028568372845e-06, | |
| "loss": 0.3563, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.41397930103494823, | |
| "grad_norm": 0.06767347411319052, | |
| "learning_rate": 9.655904647445711e-06, | |
| "loss": 0.3231, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.41622918854057295, | |
| "grad_norm": 0.07180782228261029, | |
| "learning_rate": 9.64870889079596e-06, | |
| "loss": 0.3287, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.4184790760461977, | |
| "grad_norm": 0.07242610923174227, | |
| "learning_rate": 9.641441409446563e-06, | |
| "loss": 0.3487, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.42072896355182243, | |
| "grad_norm": 0.06832390188318747, | |
| "learning_rate": 9.634102315527136e-06, | |
| "loss": 0.325, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.42297885105744715, | |
| "grad_norm": 0.07856703769371849, | |
| "learning_rate": 9.626691722272193e-06, | |
| "loss": 0.3458, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.42522873856307186, | |
| "grad_norm": 0.0663937348509602, | |
| "learning_rate": 9.61920974401941e-06, | |
| "loss": 0.3513, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.4274786260686966, | |
| "grad_norm": 0.07114607462059036, | |
| "learning_rate": 9.611656496207861e-06, | |
| "loss": 0.3474, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4297285135743213, | |
| "grad_norm": 0.07603014864007235, | |
| "learning_rate": 9.604032095376234e-06, | |
| "loss": 0.3362, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.431978401079946, | |
| "grad_norm": 0.0734531353849079, | |
| "learning_rate": 9.596336659161031e-06, | |
| "loss": 0.3445, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.4342282885855707, | |
| "grad_norm": 0.061596458285852376, | |
| "learning_rate": 9.588570306294759e-06, | |
| "loss": 0.3453, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.43647817609119544, | |
| "grad_norm": 0.05885162798568731, | |
| "learning_rate": 9.58073315660409e-06, | |
| "loss": 0.3439, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.43872806359682015, | |
| "grad_norm": 0.07082727968014366, | |
| "learning_rate": 9.57282533100802e-06, | |
| "loss": 0.3395, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.44097795110244487, | |
| "grad_norm": 0.07316435404238263, | |
| "learning_rate": 9.564846951515997e-06, | |
| "loss": 0.3304, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.4432278386080696, | |
| "grad_norm": 0.07444841963108913, | |
| "learning_rate": 9.55679814122605e-06, | |
| "loss": 0.3298, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.4454777261136943, | |
| "grad_norm": 0.07294271191699972, | |
| "learning_rate": 9.548679024322866e-06, | |
| "loss": 0.3463, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.447727613619319, | |
| "grad_norm": 0.07031942249727262, | |
| "learning_rate": 9.540489726075907e-06, | |
| "loss": 0.3486, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.4499775011249438, | |
| "grad_norm": 0.07151326035389519, | |
| "learning_rate": 9.532230372837446e-06, | |
| "loss": 0.3537, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4522273886305685, | |
| "grad_norm": 0.0671028535664748, | |
| "learning_rate": 9.523901092040634e-06, | |
| "loss": 0.3455, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.4544772761361932, | |
| "grad_norm": 0.07197014184781744, | |
| "learning_rate": 9.51550201219754e-06, | |
| "loss": 0.3432, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.4567271636418179, | |
| "grad_norm": 0.07169196920459484, | |
| "learning_rate": 9.507033262897142e-06, | |
| "loss": 0.31, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.45897705114744264, | |
| "grad_norm": 0.07109226686317548, | |
| "learning_rate": 9.498494974803362e-06, | |
| "loss": 0.3663, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.46122693865306735, | |
| "grad_norm": 0.05804652011529642, | |
| "learning_rate": 9.489887279653023e-06, | |
| "loss": 0.3194, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.46347682615869207, | |
| "grad_norm": 0.0700778438901929, | |
| "learning_rate": 9.481210310253826e-06, | |
| "loss": 0.3167, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.4657267136643168, | |
| "grad_norm": 0.06244080013341172, | |
| "learning_rate": 9.472464200482303e-06, | |
| "loss": 0.3127, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.4679766011699415, | |
| "grad_norm": 0.06903401204251029, | |
| "learning_rate": 9.463649085281752e-06, | |
| "loss": 0.3259, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.4702264886755662, | |
| "grad_norm": 0.07317408098224049, | |
| "learning_rate": 9.454765100660144e-06, | |
| "loss": 0.3446, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.4724763761811909, | |
| "grad_norm": 0.06487603568640564, | |
| "learning_rate": 9.445812383688046e-06, | |
| "loss": 0.3418, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.47472626368681564, | |
| "grad_norm": 0.06587470603877191, | |
| "learning_rate": 9.43679107249648e-06, | |
| "loss": 0.3473, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.47697615119244036, | |
| "grad_norm": 0.07107259617908306, | |
| "learning_rate": 9.427701306274812e-06, | |
| "loss": 0.337, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.47922603869806507, | |
| "grad_norm": 0.06697594936792645, | |
| "learning_rate": 9.418543225268598e-06, | |
| "loss": 0.3429, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.48147592620368984, | |
| "grad_norm": 0.0682858638376316, | |
| "learning_rate": 9.40931697077741e-06, | |
| "loss": 0.3358, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.48372581370931456, | |
| "grad_norm": 0.07619891304792806, | |
| "learning_rate": 9.400022685152683e-06, | |
| "loss": 0.3333, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.48597570121493927, | |
| "grad_norm": 0.07522989171574869, | |
| "learning_rate": 9.390660511795481e-06, | |
| "loss": 0.3587, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.488225588720564, | |
| "grad_norm": 0.07244707737339262, | |
| "learning_rate": 9.381230595154319e-06, | |
| "loss": 0.3386, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.4904754762261887, | |
| "grad_norm": 0.0747628006572659, | |
| "learning_rate": 9.371733080722911e-06, | |
| "loss": 0.3457, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.4927253637318134, | |
| "grad_norm": 0.0687511407497147, | |
| "learning_rate": 9.362168115037942e-06, | |
| "loss": 0.3433, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.49497525123743813, | |
| "grad_norm": 0.07512269519367433, | |
| "learning_rate": 9.352535845676791e-06, | |
| "loss": 0.3219, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.49722513874306284, | |
| "grad_norm": 0.07246031317089945, | |
| "learning_rate": 9.342836421255268e-06, | |
| "loss": 0.322, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.49947502624868756, | |
| "grad_norm": 0.07594536131369899, | |
| "learning_rate": 9.333069991425313e-06, | |
| "loss": 0.3589, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5017249137543123, | |
| "grad_norm": 0.06689469633356987, | |
| "learning_rate": 9.323236706872685e-06, | |
| "loss": 0.357, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.503974801259937, | |
| "grad_norm": 0.06930147639704634, | |
| "learning_rate": 9.31333671931465e-06, | |
| "loss": 0.3263, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5062246887655617, | |
| "grad_norm": 0.06133351525533005, | |
| "learning_rate": 9.303370181497623e-06, | |
| "loss": 0.3422, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.5084745762711864, | |
| "grad_norm": 0.07604072880215484, | |
| "learning_rate": 9.293337247194827e-06, | |
| "loss": 0.359, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.5107244637768111, | |
| "grad_norm": 0.06955762934734898, | |
| "learning_rate": 9.283238071203907e-06, | |
| "loss": 0.3439, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.5129743512824358, | |
| "grad_norm": 0.0803346877614296, | |
| "learning_rate": 9.27307280934455e-06, | |
| "loss": 0.3471, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5152242387880606, | |
| "grad_norm": 0.06622678263367843, | |
| "learning_rate": 9.26284161845608e-06, | |
| "loss": 0.3427, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.5174741262936853, | |
| "grad_norm": 0.0709676185753263, | |
| "learning_rate": 9.252544656395033e-06, | |
| "loss": 0.3363, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.51972401379931, | |
| "grad_norm": 0.08282162237795766, | |
| "learning_rate": 9.242182082032729e-06, | |
| "loss": 0.341, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.5219739013049347, | |
| "grad_norm": 0.06233679346455434, | |
| "learning_rate": 9.231754055252817e-06, | |
| "loss": 0.3308, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.5242237888105594, | |
| "grad_norm": 0.05949370637515577, | |
| "learning_rate": 9.221260736948803e-06, | |
| "loss": 0.3254, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.5264736763161841, | |
| "grad_norm": 0.06102505633204194, | |
| "learning_rate": 9.21070228902158e-06, | |
| "loss": 0.327, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.528723563821809, | |
| "grad_norm": 0.07096293590033853, | |
| "learning_rate": 9.200078874376917e-06, | |
| "loss": 0.3309, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.5309734513274337, | |
| "grad_norm": 0.06374179754335971, | |
| "learning_rate": 9.189390656922955e-06, | |
| "loss": 0.3579, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.5332233388330584, | |
| "grad_norm": 0.09643830344296066, | |
| "learning_rate": 9.17863780156767e-06, | |
| "loss": 0.3466, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.5354732263386831, | |
| "grad_norm": 0.0652384061049577, | |
| "learning_rate": 9.167820474216337e-06, | |
| "loss": 0.3523, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.5377231138443078, | |
| "grad_norm": 0.06430574295906281, | |
| "learning_rate": 9.156938841768965e-06, | |
| "loss": 0.3722, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.5399730013499325, | |
| "grad_norm": 0.07648802804062793, | |
| "learning_rate": 9.145993072117724e-06, | |
| "loss": 0.321, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5422228888555573, | |
| "grad_norm": 0.06775418329662553, | |
| "learning_rate": 9.134983334144352e-06, | |
| "loss": 0.3549, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.544472776361182, | |
| "grad_norm": 0.076334857238285, | |
| "learning_rate": 9.123909797717551e-06, | |
| "loss": 0.335, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.5467226638668067, | |
| "grad_norm": 0.06576432515389055, | |
| "learning_rate": 9.112772633690368e-06, | |
| "loss": 0.3239, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.5489725513724314, | |
| "grad_norm": 0.06872638373228167, | |
| "learning_rate": 9.101572013897555e-06, | |
| "loss": 0.3141, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.5512224388780561, | |
| "grad_norm": 0.06158733598122966, | |
| "learning_rate": 9.090308111152924e-06, | |
| "loss": 0.3221, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.5534723263836808, | |
| "grad_norm": 0.08097819934773681, | |
| "learning_rate": 9.07898109924667e-06, | |
| "loss": 0.3151, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.5557222138893055, | |
| "grad_norm": 0.06764912622152554, | |
| "learning_rate": 9.067591152942701e-06, | |
| "loss": 0.3332, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.5579721013949303, | |
| "grad_norm": 0.07314176615388208, | |
| "learning_rate": 9.056138447975936e-06, | |
| "loss": 0.3415, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.560221988900555, | |
| "grad_norm": 0.0717387579544613, | |
| "learning_rate": 9.044623161049594e-06, | |
| "loss": 0.3386, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.5624718764061797, | |
| "grad_norm": 0.07552097065323739, | |
| "learning_rate": 9.033045469832467e-06, | |
| "loss": 0.3569, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5647217639118044, | |
| "grad_norm": 0.06915693480180615, | |
| "learning_rate": 9.02140555295618e-06, | |
| "loss": 0.3222, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.5669716514174291, | |
| "grad_norm": 0.07769020322155092, | |
| "learning_rate": 9.009703590012434e-06, | |
| "loss": 0.3185, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5692215389230538, | |
| "grad_norm": 0.07598860570344396, | |
| "learning_rate": 8.997939761550239e-06, | |
| "loss": 0.3522, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.5714714264286785, | |
| "grad_norm": 0.07073748495565614, | |
| "learning_rate": 8.986114249073122e-06, | |
| "loss": 0.3169, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.5737213139343033, | |
| "grad_norm": 0.06866551274687982, | |
| "learning_rate": 8.97422723503633e-06, | |
| "loss": 0.3304, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.575971201439928, | |
| "grad_norm": 0.07075202015965712, | |
| "learning_rate": 8.962278902844016e-06, | |
| "loss": 0.3309, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5782210889455527, | |
| "grad_norm": 0.07165184953921011, | |
| "learning_rate": 8.950269436846405e-06, | |
| "loss": 0.331, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.5804709764511774, | |
| "grad_norm": 0.06433134595791733, | |
| "learning_rate": 8.938199022336956e-06, | |
| "loss": 0.328, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.5827208639568021, | |
| "grad_norm": 0.07003765990675229, | |
| "learning_rate": 8.926067845549495e-06, | |
| "loss": 0.3297, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.5849707514624268, | |
| "grad_norm": 0.06653035126789796, | |
| "learning_rate": 8.913876093655351e-06, | |
| "loss": 0.335, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5872206389680515, | |
| "grad_norm": 0.06847091877632593, | |
| "learning_rate": 8.90162395476046e-06, | |
| "loss": 0.3279, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.5894705264736764, | |
| "grad_norm": 0.06903452581161729, | |
| "learning_rate": 8.889311617902468e-06, | |
| "loss": 0.3229, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.5917204139793011, | |
| "grad_norm": 0.09341128215879058, | |
| "learning_rate": 8.876939273047813e-06, | |
| "loss": 0.299, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.5939703014849258, | |
| "grad_norm": 0.07278846497904187, | |
| "learning_rate": 8.86450711108879e-06, | |
| "loss": 0.3226, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.5962201889905505, | |
| "grad_norm": 0.06262188507904164, | |
| "learning_rate": 8.85201532384061e-06, | |
| "loss": 0.3133, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.5984700764961752, | |
| "grad_norm": 0.06880573696178596, | |
| "learning_rate": 8.839464104038445e-06, | |
| "loss": 0.2962, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6007199640017999, | |
| "grad_norm": 0.06779234501270573, | |
| "learning_rate": 8.826853645334441e-06, | |
| "loss": 0.3124, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.6029698515074247, | |
| "grad_norm": 0.06187208772150342, | |
| "learning_rate": 8.814184142294744e-06, | |
| "loss": 0.315, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6052197390130494, | |
| "grad_norm": 0.07896028281919827, | |
| "learning_rate": 8.80145579039649e-06, | |
| "loss": 0.3432, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.6074696265186741, | |
| "grad_norm": 0.0751755215796296, | |
| "learning_rate": 8.78866878602479e-06, | |
| "loss": 0.3239, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6097195140242988, | |
| "grad_norm": 0.07156093608605772, | |
| "learning_rate": 8.775823326469703e-06, | |
| "loss": 0.337, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.6119694015299235, | |
| "grad_norm": 0.0727876886960586, | |
| "learning_rate": 8.76291960992319e-06, | |
| "loss": 0.3737, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.6142192890355482, | |
| "grad_norm": 0.07239709772207241, | |
| "learning_rate": 8.749957835476053e-06, | |
| "loss": 0.333, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.616469176541173, | |
| "grad_norm": 0.07320801736790428, | |
| "learning_rate": 8.736938203114872e-06, | |
| "loss": 0.344, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.6187190640467977, | |
| "grad_norm": 0.06549770224319154, | |
| "learning_rate": 8.72386091371891e-06, | |
| "loss": 0.3006, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.6209689515524224, | |
| "grad_norm": 0.07264319355187582, | |
| "learning_rate": 8.710726169057018e-06, | |
| "loss": 0.3173, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.6232188390580471, | |
| "grad_norm": 0.07992193946978773, | |
| "learning_rate": 8.697534171784523e-06, | |
| "loss": 0.3467, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.6254687265636718, | |
| "grad_norm": 0.06707456122943496, | |
| "learning_rate": 8.684285125440099e-06, | |
| "loss": 0.3297, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.6277186140692965, | |
| "grad_norm": 0.06483948438605809, | |
| "learning_rate": 8.670979234442624e-06, | |
| "loss": 0.3349, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.6299685015749212, | |
| "grad_norm": 0.0705394359218232, | |
| "learning_rate": 8.657616704088037e-06, | |
| "loss": 0.33, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.632218389080546, | |
| "grad_norm": 0.07985592647370479, | |
| "learning_rate": 8.644197740546153e-06, | |
| "loss": 0.3605, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.6344682765861707, | |
| "grad_norm": 0.08150029983078208, | |
| "learning_rate": 8.630722550857503e-06, | |
| "loss": 0.3363, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.6367181640917954, | |
| "grad_norm": 0.07286616055279489, | |
| "learning_rate": 8.617191342930118e-06, | |
| "loss": 0.3441, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.6389680515974201, | |
| "grad_norm": 0.06834780355739174, | |
| "learning_rate": 8.603604325536338e-06, | |
| "loss": 0.3298, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.6412179391030448, | |
| "grad_norm": 0.06360740971285378, | |
| "learning_rate": 8.589961708309582e-06, | |
| "loss": 0.308, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.6434678266086695, | |
| "grad_norm": 0.06387011586281786, | |
| "learning_rate": 8.576263701741115e-06, | |
| "loss": 0.3102, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.6457177141142942, | |
| "grad_norm": 0.059287022702283844, | |
| "learning_rate": 8.562510517176807e-06, | |
| "loss": 0.333, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.647967601619919, | |
| "grad_norm": 0.07076357545448068, | |
| "learning_rate": 8.54870236681386e-06, | |
| "loss": 0.3376, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.6502174891255437, | |
| "grad_norm": 0.08080237359735847, | |
| "learning_rate": 8.534839463697541e-06, | |
| "loss": 0.344, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.6524673766311685, | |
| "grad_norm": 0.07301903865415799, | |
| "learning_rate": 8.520922021717903e-06, | |
| "loss": 0.3236, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.6547172641367932, | |
| "grad_norm": 0.06408247558471158, | |
| "learning_rate": 8.506950255606466e-06, | |
| "loss": 0.3119, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.6569671516424179, | |
| "grad_norm": 0.07142979546900464, | |
| "learning_rate": 8.492924380932919e-06, | |
| "loss": 0.3235, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.6592170391480426, | |
| "grad_norm": 0.06623934823982494, | |
| "learning_rate": 8.478844614101792e-06, | |
| "loss": 0.3127, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.6614669266536674, | |
| "grad_norm": 0.064051288527217, | |
| "learning_rate": 8.464711172349105e-06, | |
| "loss": 0.3408, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.6637168141592921, | |
| "grad_norm": 0.07268808159369747, | |
| "learning_rate": 8.450524273739036e-06, | |
| "loss": 0.3406, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.6659667016649168, | |
| "grad_norm": 0.06547939077675495, | |
| "learning_rate": 8.436284137160544e-06, | |
| "loss": 0.3404, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.6682165891705415, | |
| "grad_norm": 0.11515221274329139, | |
| "learning_rate": 8.421990982323988e-06, | |
| "loss": 0.3342, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.6704664766761662, | |
| "grad_norm": 0.08213057865060075, | |
| "learning_rate": 8.407645029757752e-06, | |
| "loss": 0.3631, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6727163641817909, | |
| "grad_norm": 0.07445316775297253, | |
| "learning_rate": 8.393246500804825e-06, | |
| "loss": 0.362, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.6749662516874156, | |
| "grad_norm": 0.0715773585848479, | |
| "learning_rate": 8.3787956176194e-06, | |
| "loss": 0.3377, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6772161391930404, | |
| "grad_norm": 0.07245667708706742, | |
| "learning_rate": 8.36429260316344e-06, | |
| "loss": 0.2967, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.6794660266986651, | |
| "grad_norm": 0.07191851967760118, | |
| "learning_rate": 8.349737681203234e-06, | |
| "loss": 0.3447, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.6817159142042898, | |
| "grad_norm": 0.06475853768493092, | |
| "learning_rate": 8.335131076305958e-06, | |
| "loss": 0.3339, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.6839658017099145, | |
| "grad_norm": 0.07263158877410257, | |
| "learning_rate": 8.320473013836197e-06, | |
| "loss": 0.3074, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.6862156892155392, | |
| "grad_norm": 0.05942603663221257, | |
| "learning_rate": 8.305763719952467e-06, | |
| "loss": 0.2997, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.6884655767211639, | |
| "grad_norm": 0.07938168227761808, | |
| "learning_rate": 8.29100342160374e-06, | |
| "loss": 0.3122, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.6907154642267886, | |
| "grad_norm": 0.07999223664242092, | |
| "learning_rate": 8.27619234652593e-06, | |
| "loss": 0.3138, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.6929653517324134, | |
| "grad_norm": 0.06996270561203156, | |
| "learning_rate": 8.261330723238381e-06, | |
| "loss": 0.3321, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.6952152392380381, | |
| "grad_norm": 0.09647113986832291, | |
| "learning_rate": 8.246418781040345e-06, | |
| "loss": 0.3269, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.6974651267436628, | |
| "grad_norm": 0.07974144611519904, | |
| "learning_rate": 8.231456750007436e-06, | |
| "loss": 0.309, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.6997150142492875, | |
| "grad_norm": 0.06944041746000827, | |
| "learning_rate": 8.216444860988098e-06, | |
| "loss": 0.3347, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.7019649017549122, | |
| "grad_norm": 0.06697853007490644, | |
| "learning_rate": 8.20138334560002e-06, | |
| "loss": 0.3432, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7042147892605369, | |
| "grad_norm": 0.09163411149931353, | |
| "learning_rate": 8.18627243622658e-06, | |
| "loss": 0.3294, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.7064646767661616, | |
| "grad_norm": 0.06745466757701833, | |
| "learning_rate": 8.171112366013252e-06, | |
| "loss": 0.3382, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.7087145642717864, | |
| "grad_norm": 0.06524545139947452, | |
| "learning_rate": 8.155903368864008e-06, | |
| "loss": 0.2894, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.7109644517774111, | |
| "grad_norm": 0.07357575023935092, | |
| "learning_rate": 8.140645679437713e-06, | |
| "loss": 0.345, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.7132143392830359, | |
| "grad_norm": 0.07032356069075725, | |
| "learning_rate": 8.125339533144507e-06, | |
| "loss": 0.3497, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.7154642267886606, | |
| "grad_norm": 0.07305825316899144, | |
| "learning_rate": 8.109985166142161e-06, | |
| "loss": 0.3223, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.7177141142942853, | |
| "grad_norm": 0.07026921859976491, | |
| "learning_rate": 8.09458281533244e-06, | |
| "loss": 0.3271, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.71996400179991, | |
| "grad_norm": 0.0783084169696169, | |
| "learning_rate": 8.079132718357465e-06, | |
| "loss": 0.311, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7222138893055348, | |
| "grad_norm": 0.06977970059586212, | |
| "learning_rate": 8.063635113596006e-06, | |
| "loss": 0.3114, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.7244637768111595, | |
| "grad_norm": 0.06695382649927473, | |
| "learning_rate": 8.048090240159849e-06, | |
| "loss": 0.3186, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.7267136643167842, | |
| "grad_norm": 0.07382767142740718, | |
| "learning_rate": 8.032498337890073e-06, | |
| "loss": 0.3115, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.7289635518224089, | |
| "grad_norm": 0.0847147954522355, | |
| "learning_rate": 8.01685964735337e-06, | |
| "loss": 0.3313, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.7312134393280336, | |
| "grad_norm": 0.08710412831256738, | |
| "learning_rate": 8.00117440983832e-06, | |
| "loss": 0.3129, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.7334633268336583, | |
| "grad_norm": 0.07163605298015002, | |
| "learning_rate": 7.985442867351682e-06, | |
| "loss": 0.3197, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.735713214339283, | |
| "grad_norm": 0.08693479896494097, | |
| "learning_rate": 7.969665262614642e-06, | |
| "loss": 0.3584, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.7379631018449078, | |
| "grad_norm": 0.07181692085074703, | |
| "learning_rate": 7.953841839059086e-06, | |
| "loss": 0.3024, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.7402129893505325, | |
| "grad_norm": 0.06762138099885763, | |
| "learning_rate": 7.937972840823836e-06, | |
| "loss": 0.3393, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.7424628768561572, | |
| "grad_norm": 0.06579548855422006, | |
| "learning_rate": 7.922058512750876e-06, | |
| "loss": 0.3415, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.7447127643617819, | |
| "grad_norm": 0.06780731913871438, | |
| "learning_rate": 7.90609910038159e-06, | |
| "loss": 0.326, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.7469626518674066, | |
| "grad_norm": 0.07345224322730477, | |
| "learning_rate": 7.890094849952964e-06, | |
| "loss": 0.3579, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.7492125393730313, | |
| "grad_norm": 0.07643898702300285, | |
| "learning_rate": 7.874046008393783e-06, | |
| "loss": 0.3215, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.751462426878656, | |
| "grad_norm": 0.08111641352223722, | |
| "learning_rate": 7.857952823320833e-06, | |
| "loss": 0.3396, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.7537123143842808, | |
| "grad_norm": 0.06433102937848656, | |
| "learning_rate": 7.84181554303507e-06, | |
| "loss": 0.3229, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.7559622018899055, | |
| "grad_norm": 0.07066161687549372, | |
| "learning_rate": 7.825634416517793e-06, | |
| "loss": 0.3168, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.7582120893955302, | |
| "grad_norm": 0.07761037252783486, | |
| "learning_rate": 7.809409693426803e-06, | |
| "loss": 0.345, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.7604619769011549, | |
| "grad_norm": 0.07514558565636438, | |
| "learning_rate": 7.793141624092551e-06, | |
| "loss": 0.3423, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.7627118644067796, | |
| "grad_norm": 0.08138341842898199, | |
| "learning_rate": 7.776830459514275e-06, | |
| "loss": 0.3153, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.7649617519124043, | |
| "grad_norm": 0.07657999183778645, | |
| "learning_rate": 7.760476451356123e-06, | |
| "loss": 0.3568, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.7672116394180291, | |
| "grad_norm": 0.08932610854441203, | |
| "learning_rate": 7.744079851943286e-06, | |
| "loss": 0.3045, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.7694615269236538, | |
| "grad_norm": 0.07788581856311123, | |
| "learning_rate": 7.727640914258076e-06, | |
| "loss": 0.322, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.7717114144292785, | |
| "grad_norm": 0.07234842557571529, | |
| "learning_rate": 7.711159891936059e-06, | |
| "loss": 0.301, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.7739613019349032, | |
| "grad_norm": 0.08103908033954604, | |
| "learning_rate": 7.694637039262109e-06, | |
| "loss": 0.2934, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.776211189440528, | |
| "grad_norm": 0.08033609793206774, | |
| "learning_rate": 7.678072611166503e-06, | |
| "loss": 0.3281, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.7784610769461527, | |
| "grad_norm": 0.08682473308042656, | |
| "learning_rate": 7.661466863220982e-06, | |
| "loss": 0.3377, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.7807109644517775, | |
| "grad_norm": 0.07975724023981283, | |
| "learning_rate": 7.644820051634813e-06, | |
| "loss": 0.3312, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.7829608519574022, | |
| "grad_norm": 0.08318610850328363, | |
| "learning_rate": 7.628132433250828e-06, | |
| "loss": 0.318, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.7852107394630269, | |
| "grad_norm": 0.07470444210188223, | |
| "learning_rate": 7.611404265541464e-06, | |
| "loss": 0.3166, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.7874606269686516, | |
| "grad_norm": 0.07633984339680623, | |
| "learning_rate": 7.594635806604797e-06, | |
| "loss": 0.3068, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.7897105144742763, | |
| "grad_norm": 0.08519611137288997, | |
| "learning_rate": 7.57782731516055e-06, | |
| "loss": 0.3465, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.791960401979901, | |
| "grad_norm": 0.07125104461336126, | |
| "learning_rate": 7.560979050546103e-06, | |
| "loss": 0.311, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.7942102894855257, | |
| "grad_norm": 0.08460045098046377, | |
| "learning_rate": 7.544091272712501e-06, | |
| "loss": 0.3036, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.7964601769911505, | |
| "grad_norm": 0.07731671038628908, | |
| "learning_rate": 7.527164242220434e-06, | |
| "loss": 0.3214, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.7987100644967752, | |
| "grad_norm": 0.07618452283812552, | |
| "learning_rate": 7.510198220236217e-06, | |
| "loss": 0.3412, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.8009599520023999, | |
| "grad_norm": 0.08122249298530079, | |
| "learning_rate": 7.493193468527764e-06, | |
| "loss": 0.3129, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.8032098395080246, | |
| "grad_norm": 0.08390625774458342, | |
| "learning_rate": 7.476150249460549e-06, | |
| "loss": 0.3168, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.8054597270136493, | |
| "grad_norm": 0.07518471851900174, | |
| "learning_rate": 7.4590688259935554e-06, | |
| "loss": 0.331, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.807709614519274, | |
| "grad_norm": 0.07627971467235234, | |
| "learning_rate": 7.441949461675223e-06, | |
| "loss": 0.3471, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.8099595020248987, | |
| "grad_norm": 0.08879967466572108, | |
| "learning_rate": 7.424792420639377e-06, | |
| "loss": 0.323, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8122093895305235, | |
| "grad_norm": 0.0858174200658171, | |
| "learning_rate": 7.407597967601155e-06, | |
| "loss": 0.3284, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.8144592770361482, | |
| "grad_norm": 0.08665127583082709, | |
| "learning_rate": 7.390366367852923e-06, | |
| "loss": 0.3217, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.8167091645417729, | |
| "grad_norm": 0.08001080258785544, | |
| "learning_rate": 7.3730978872601825e-06, | |
| "loss": 0.3248, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.8189590520473976, | |
| "grad_norm": 0.07815794847284734, | |
| "learning_rate": 7.355792792257463e-06, | |
| "loss": 0.3124, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.8212089395530223, | |
| "grad_norm": 0.0869139056537896, | |
| "learning_rate": 7.338451349844225e-06, | |
| "loss": 0.323, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.823458827058647, | |
| "grad_norm": 0.09766019302119812, | |
| "learning_rate": 7.3210738275807225e-06, | |
| "loss": 0.3332, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.8257087145642718, | |
| "grad_norm": 0.08508749834617443, | |
| "learning_rate": 7.303660493583889e-06, | |
| "loss": 0.3285, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.8279586020698965, | |
| "grad_norm": 0.10673197384722342, | |
| "learning_rate": 7.286211616523193e-06, | |
| "loss": 0.3169, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.8302084895755212, | |
| "grad_norm": 0.11681882774169298, | |
| "learning_rate": 7.268727465616497e-06, | |
| "loss": 0.331, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.8324583770811459, | |
| "grad_norm": 0.08970145688216963, | |
| "learning_rate": 7.251208310625899e-06, | |
| "loss": 0.3262, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.8347082645867706, | |
| "grad_norm": 0.08677453595649923, | |
| "learning_rate": 7.2336544218535776e-06, | |
| "loss": 0.2968, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.8369581520923954, | |
| "grad_norm": 0.08463356362517462, | |
| "learning_rate": 7.216066070137614e-06, | |
| "loss": 0.3408, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.8392080395980202, | |
| "grad_norm": 0.10768608728008885, | |
| "learning_rate": 7.198443526847816e-06, | |
| "loss": 0.3222, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.8414579271036449, | |
| "grad_norm": 0.08293925088501428, | |
| "learning_rate": 7.180787063881534e-06, | |
| "loss": 0.3225, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.8437078146092696, | |
| "grad_norm": 0.09753175069029144, | |
| "learning_rate": 7.163096953659462e-06, | |
| "loss": 0.3249, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.8459577021148943, | |
| "grad_norm": 0.10750990409191725, | |
| "learning_rate": 7.145373469121435e-06, | |
| "loss": 0.3248, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.848207589620519, | |
| "grad_norm": 0.0713111477001828, | |
| "learning_rate": 7.1276168837222215e-06, | |
| "loss": 0.3262, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.8504574771261437, | |
| "grad_norm": 0.08520099737279731, | |
| "learning_rate": 7.109827471427299e-06, | |
| "loss": 0.3248, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.8527073646317684, | |
| "grad_norm": 0.1007558956965131, | |
| "learning_rate": 7.092005506708629e-06, | |
| "loss": 0.3063, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.8549572521373932, | |
| "grad_norm": 0.10076509216745107, | |
| "learning_rate": 7.074151264540425e-06, | |
| "loss": 0.3394, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.8572071396430179, | |
| "grad_norm": 0.1128171772187796, | |
| "learning_rate": 7.056265020394908e-06, | |
| "loss": 0.3353, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.8594570271486426, | |
| "grad_norm": 0.07826929688060387, | |
| "learning_rate": 7.038347050238052e-06, | |
| "loss": 0.3313, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.8617069146542673, | |
| "grad_norm": 0.09477827641455178, | |
| "learning_rate": 7.020397630525336e-06, | |
| "loss": 0.3094, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.863956802159892, | |
| "grad_norm": 0.07996338853084985, | |
| "learning_rate": 7.002417038197466e-06, | |
| "loss": 0.3365, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.8662066896655167, | |
| "grad_norm": 0.08681950662638242, | |
| "learning_rate": 6.984405550676113e-06, | |
| "loss": 0.2858, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.8684565771711414, | |
| "grad_norm": 0.10147028587889259, | |
| "learning_rate": 6.966363445859629e-06, | |
| "loss": 0.3307, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.8707064646767662, | |
| "grad_norm": 0.09778557000247115, | |
| "learning_rate": 6.948291002118757e-06, | |
| "loss": 0.3346, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.8729563521823909, | |
| "grad_norm": 0.08335343107919917, | |
| "learning_rate": 6.930188498292334e-06, | |
| "loss": 0.3102, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.8752062396880156, | |
| "grad_norm": 0.127528307390263, | |
| "learning_rate": 6.912056213683001e-06, | |
| "loss": 0.2772, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.8774561271936403, | |
| "grad_norm": 0.08449830219805671, | |
| "learning_rate": 6.893894428052881e-06, | |
| "loss": 0.3331, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.879706014699265, | |
| "grad_norm": 0.10290918076564952, | |
| "learning_rate": 6.875703421619263e-06, | |
| "loss": 0.3162, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.8819559022048897, | |
| "grad_norm": 0.09666086595549915, | |
| "learning_rate": 6.85748347505029e-06, | |
| "loss": 0.3393, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.8842057897105144, | |
| "grad_norm": 0.09126192537758601, | |
| "learning_rate": 6.839234869460614e-06, | |
| "loss": 0.3313, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.8864556772161392, | |
| "grad_norm": 0.09213126718219308, | |
| "learning_rate": 6.820957886407068e-06, | |
| "loss": 0.3298, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.8887055647217639, | |
| "grad_norm": 0.0893744576312266, | |
| "learning_rate": 6.802652807884322e-06, | |
| "loss": 0.3258, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.8909554522273886, | |
| "grad_norm": 0.10520537204979115, | |
| "learning_rate": 6.784319916320528e-06, | |
| "loss": 0.3152, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.8932053397330133, | |
| "grad_norm": 0.09224246726284402, | |
| "learning_rate": 6.765959494572959e-06, | |
| "loss": 0.3176, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.895455227238638, | |
| "grad_norm": 0.09179694827419689, | |
| "learning_rate": 6.74757182592366e-06, | |
| "loss": 0.34, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.8977051147442627, | |
| "grad_norm": 0.10131034789212955, | |
| "learning_rate": 6.7291571940750575e-06, | |
| "loss": 0.3171, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.8999550022498876, | |
| "grad_norm": 0.11052424709399664, | |
| "learning_rate": 6.710715883145599e-06, | |
| "loss": 0.3084, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9022048897555123, | |
| "grad_norm": 0.09523315367515199, | |
| "learning_rate": 6.692248177665357e-06, | |
| "loss": 0.3127, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.904454777261137, | |
| "grad_norm": 0.09774145840636202, | |
| "learning_rate": 6.673754362571646e-06, | |
| "loss": 0.2866, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.9067046647667617, | |
| "grad_norm": 0.1231628868544864, | |
| "learning_rate": 6.6552347232046255e-06, | |
| "loss": 0.2926, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.9089545522723864, | |
| "grad_norm": 0.09563379874509359, | |
| "learning_rate": 6.636689545302898e-06, | |
| "loss": 0.3128, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.9112044397780111, | |
| "grad_norm": 0.07820421786999905, | |
| "learning_rate": 6.6181191149990905e-06, | |
| "loss": 0.321, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.9134543272836358, | |
| "grad_norm": 0.10476028051810904, | |
| "learning_rate": 6.599523718815461e-06, | |
| "loss": 0.2836, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.9157042147892606, | |
| "grad_norm": 0.11389284533738375, | |
| "learning_rate": 6.580903643659453e-06, | |
| "loss": 0.2934, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.9179541022948853, | |
| "grad_norm": 0.10996849745288242, | |
| "learning_rate": 6.5622591768192875e-06, | |
| "loss": 0.3243, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.92020398980051, | |
| "grad_norm": 0.09512165946660596, | |
| "learning_rate": 6.5435906059595215e-06, | |
| "loss": 0.3081, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.9224538773061347, | |
| "grad_norm": 0.10421356775522515, | |
| "learning_rate": 6.524898219116612e-06, | |
| "loss": 0.2682, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.9247037648117594, | |
| "grad_norm": 0.10201698883401172, | |
| "learning_rate": 6.5061823046944694e-06, | |
| "loss": 0.2909, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.9269536523173841, | |
| "grad_norm": 0.10974937304411288, | |
| "learning_rate": 6.4874431514600146e-06, | |
| "loss": 0.3072, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.9292035398230089, | |
| "grad_norm": 0.09276233118456312, | |
| "learning_rate": 6.468681048538715e-06, | |
| "loss": 0.2989, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.9314534273286336, | |
| "grad_norm": 0.11862538493837348, | |
| "learning_rate": 6.44989628541013e-06, | |
| "loss": 0.3372, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.9337033148342583, | |
| "grad_norm": 0.10451521274212297, | |
| "learning_rate": 6.431089151903439e-06, | |
| "loss": 0.3188, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.935953202339883, | |
| "grad_norm": 0.11422644044073009, | |
| "learning_rate": 6.412259938192978e-06, | |
| "loss": 0.307, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.9382030898455077, | |
| "grad_norm": 0.14091820208432657, | |
| "learning_rate": 6.393408934793752e-06, | |
| "loss": 0.3546, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.9404529773511324, | |
| "grad_norm": 0.11829750564224563, | |
| "learning_rate": 6.374536432556963e-06, | |
| "loss": 0.3267, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.9427028648567571, | |
| "grad_norm": 0.11528106197624186, | |
| "learning_rate": 6.355642722665512e-06, | |
| "loss": 0.3203, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.9449527523623819, | |
| "grad_norm": 0.09372673822212164, | |
| "learning_rate": 6.336728096629517e-06, | |
| "loss": 0.3151, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.9472026398680066, | |
| "grad_norm": 0.10779896033185006, | |
| "learning_rate": 6.317792846281805e-06, | |
| "loss": 0.3052, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.9494525273736313, | |
| "grad_norm": 0.09672862996353586, | |
| "learning_rate": 6.298837263773423e-06, | |
| "loss": 0.3033, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.951702414879256, | |
| "grad_norm": 0.10872396340925997, | |
| "learning_rate": 6.2798616415691095e-06, | |
| "loss": 0.3002, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.9539523023848807, | |
| "grad_norm": 0.11829489090483326, | |
| "learning_rate": 6.260866272442807e-06, | |
| "loss": 0.2929, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.9562021898905054, | |
| "grad_norm": 0.11145672561455416, | |
| "learning_rate": 6.2418514494731245e-06, | |
| "loss": 0.2808, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.9584520773961301, | |
| "grad_norm": 0.1056896163271936, | |
| "learning_rate": 6.222817466038824e-06, | |
| "loss": 0.2841, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.9607019649017549, | |
| "grad_norm": 0.10666373036314321, | |
| "learning_rate": 6.2037646158142975e-06, | |
| "loss": 0.3005, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.9629518524073797, | |
| "grad_norm": 0.10697096904271322, | |
| "learning_rate": 6.184693192765028e-06, | |
| "loss": 0.2894, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.9652017399130044, | |
| "grad_norm": 0.17157045181184577, | |
| "learning_rate": 6.165603491143057e-06, | |
| "loss": 0.3298, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.9674516274186291, | |
| "grad_norm": 0.1005745666451797, | |
| "learning_rate": 6.146495805482451e-06, | |
| "loss": 0.3196, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.9697015149242538, | |
| "grad_norm": 0.139307317568223, | |
| "learning_rate": 6.127370430594745e-06, | |
| "loss": 0.2993, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.9719514024298785, | |
| "grad_norm": 0.11791582586234053, | |
| "learning_rate": 6.108227661564401e-06, | |
| "loss": 0.3083, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.9742012899355033, | |
| "grad_norm": 0.11233522118086736, | |
| "learning_rate": 6.089067793744258e-06, | |
| "loss": 0.3137, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.976451177441128, | |
| "grad_norm": 0.12524898605746265, | |
| "learning_rate": 6.069891122750971e-06, | |
| "loss": 0.2825, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.9787010649467527, | |
| "grad_norm": 0.09825541745527079, | |
| "learning_rate": 6.050697944460444e-06, | |
| "loss": 0.3146, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.9809509524523774, | |
| "grad_norm": 0.11637412785681134, | |
| "learning_rate": 6.0314885550032796e-06, | |
| "loss": 0.2935, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.9832008399580021, | |
| "grad_norm": 0.10398981333232891, | |
| "learning_rate": 6.012263250760199e-06, | |
| "loss": 0.28, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.9854507274636268, | |
| "grad_norm": 0.1347409630178848, | |
| "learning_rate": 5.993022328357466e-06, | |
| "loss": 0.2899, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.9877006149692515, | |
| "grad_norm": 0.136591408837683, | |
| "learning_rate": 5.973766084662324e-06, | |
| "loss": 0.2729, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.9899505024748763, | |
| "grad_norm": 0.1032954692332516, | |
| "learning_rate": 5.954494816778408e-06, | |
| "loss": 0.3106, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.992200389980501, | |
| "grad_norm": 0.12420490530861028, | |
| "learning_rate": 5.935208822041152e-06, | |
| "loss": 0.2699, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.9944502774861257, | |
| "grad_norm": 0.10146757951487546, | |
| "learning_rate": 5.915908398013217e-06, | |
| "loss": 0.266, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.9967001649917504, | |
| "grad_norm": 0.10690509046474422, | |
| "learning_rate": 5.896593842479893e-06, | |
| "loss": 0.2916, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.9989500524973751, | |
| "grad_norm": 0.2098417588495756, | |
| "learning_rate": 5.8772654534445e-06, | |
| "loss": 0.3104, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.27543845772743225, | |
| "eval_runtime": 55.028, | |
| "eval_samples_per_second": 19.59, | |
| "eval_steps_per_second": 4.907, | |
| "step": 2223 | |
| }, | |
| { | |
| "epoch": 1.00089995500225, | |
| "grad_norm": 0.11100179968154768, | |
| "learning_rate": 5.857923529123799e-06, | |
| "loss": 0.2341, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.0031498425078746, | |
| "grad_norm": 0.14616860643517418, | |
| "learning_rate": 5.838568367943383e-06, | |
| "loss": 0.2679, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.0053997300134994, | |
| "grad_norm": 0.11313230544533252, | |
| "learning_rate": 5.819200268533076e-06, | |
| "loss": 0.2873, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 1.007649617519124, | |
| "grad_norm": 0.1210465260044826, | |
| "learning_rate": 5.7998195297223285e-06, | |
| "loss": 0.2677, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.0098995050247488, | |
| "grad_norm": 0.11722674843174795, | |
| "learning_rate": 5.7804264505356e-06, | |
| "loss": 0.2548, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 1.0121493925303735, | |
| "grad_norm": 0.12390544554268877, | |
| "learning_rate": 5.76102133018775e-06, | |
| "loss": 0.2942, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.0143992800359982, | |
| "grad_norm": 0.14215352813872506, | |
| "learning_rate": 5.741604468079421e-06, | |
| "loss": 0.3095, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 1.016649167541623, | |
| "grad_norm": 0.13309421360381032, | |
| "learning_rate": 5.72217616379242e-06, | |
| "loss": 0.2794, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.0188990550472476, | |
| "grad_norm": 0.1409784002692586, | |
| "learning_rate": 5.702736717085093e-06, | |
| "loss": 0.2998, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 1.0211489425528724, | |
| "grad_norm": 0.12978570417210325, | |
| "learning_rate": 5.6832864278876984e-06, | |
| "loss": 0.2829, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.023398830058497, | |
| "grad_norm": 0.10750959417123264, | |
| "learning_rate": 5.663825596297794e-06, | |
| "loss": 0.2902, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.0256487175641218, | |
| "grad_norm": 0.137940819760974, | |
| "learning_rate": 5.644354522575581e-06, | |
| "loss": 0.2806, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.0278986050697465, | |
| "grad_norm": 0.14563829553392096, | |
| "learning_rate": 5.624873507139297e-06, | |
| "loss": 0.277, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 1.0301484925753712, | |
| "grad_norm": 0.12377796525725795, | |
| "learning_rate": 5.605382850560565e-06, | |
| "loss": 0.2943, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.032398380080996, | |
| "grad_norm": 0.16984305955909604, | |
| "learning_rate": 5.585882853559762e-06, | |
| "loss": 0.2889, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 1.0346482675866207, | |
| "grad_norm": 0.1281002826955631, | |
| "learning_rate": 5.566373817001377e-06, | |
| "loss": 0.293, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.0368981550922454, | |
| "grad_norm": 0.15524678076001608, | |
| "learning_rate": 5.546856041889374e-06, | |
| "loss": 0.2605, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 1.03914804259787, | |
| "grad_norm": 0.14215571774039212, | |
| "learning_rate": 5.527329829362534e-06, | |
| "loss": 0.2786, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.0413979301034948, | |
| "grad_norm": 0.1447748028005779, | |
| "learning_rate": 5.5077954806898284e-06, | |
| "loss": 0.2688, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 1.0436478176091195, | |
| "grad_norm": 0.14426858307924748, | |
| "learning_rate": 5.488253297265757e-06, | |
| "loss": 0.2777, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.0458977051147442, | |
| "grad_norm": 0.1272869099382178, | |
| "learning_rate": 5.468703580605703e-06, | |
| "loss": 0.2997, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.048147592620369, | |
| "grad_norm": 0.133865100418296, | |
| "learning_rate": 5.4491466323412745e-06, | |
| "loss": 0.2839, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.0503974801259937, | |
| "grad_norm": 0.12437130432718715, | |
| "learning_rate": 5.429582754215664e-06, | |
| "loss": 0.2843, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 1.0526473676316184, | |
| "grad_norm": 0.1419352738893503, | |
| "learning_rate": 5.410012248078975e-06, | |
| "loss": 0.2677, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.054897255137243, | |
| "grad_norm": 0.1639413029064359, | |
| "learning_rate": 5.390435415883583e-06, | |
| "loss": 0.2805, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 1.0571471426428678, | |
| "grad_norm": 0.14750894149267404, | |
| "learning_rate": 5.370852559679461e-06, | |
| "loss": 0.2718, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.0593970301484925, | |
| "grad_norm": 0.1418143669594509, | |
| "learning_rate": 5.351263981609532e-06, | |
| "loss": 0.2374, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 1.0616469176541172, | |
| "grad_norm": 0.1467085192211227, | |
| "learning_rate": 5.331669983904996e-06, | |
| "loss": 0.278, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.063896805159742, | |
| "grad_norm": 0.1686191463372291, | |
| "learning_rate": 5.312070868880678e-06, | |
| "loss": 0.2818, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 1.0661466926653667, | |
| "grad_norm": 0.1844876464618337, | |
| "learning_rate": 5.29246693893035e-06, | |
| "loss": 0.2971, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.0683965801709914, | |
| "grad_norm": 0.12521919673631507, | |
| "learning_rate": 5.272858496522084e-06, | |
| "loss": 0.2737, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.070646467676616, | |
| "grad_norm": 0.15034047715143825, | |
| "learning_rate": 5.253245844193564e-06, | |
| "loss": 0.2858, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.0728963551822408, | |
| "grad_norm": 0.1264075738033277, | |
| "learning_rate": 5.233629284547435e-06, | |
| "loss": 0.2564, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 1.0751462426878655, | |
| "grad_norm": 0.1940932983786269, | |
| "learning_rate": 5.214009120246623e-06, | |
| "loss": 0.2722, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.0773961301934902, | |
| "grad_norm": 0.16922904631843647, | |
| "learning_rate": 5.1943856540096795e-06, | |
| "loss": 0.2912, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 1.079646017699115, | |
| "grad_norm": 0.23716139744779294, | |
| "learning_rate": 5.174759188606087e-06, | |
| "loss": 0.2885, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.0818959052047397, | |
| "grad_norm": 0.11509288529342813, | |
| "learning_rate": 5.155130026851616e-06, | |
| "loss": 0.2575, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 1.0841457927103644, | |
| "grad_norm": 0.17727493415132747, | |
| "learning_rate": 5.135498471603629e-06, | |
| "loss": 0.2639, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.0863956802159893, | |
| "grad_norm": 0.13930766876349623, | |
| "learning_rate": 5.1158648257564235e-06, | |
| "loss": 0.2606, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 1.0886455677216138, | |
| "grad_norm": 0.12454839412933186, | |
| "learning_rate": 5.0962293922365495e-06, | |
| "loss": 0.256, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.0908954552272387, | |
| "grad_norm": 0.18809390149779476, | |
| "learning_rate": 5.076592473998141e-06, | |
| "loss": 0.2646, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.0931453427328635, | |
| "grad_norm": 0.1508834503375353, | |
| "learning_rate": 5.056954374018236e-06, | |
| "loss": 0.2764, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.0953952302384882, | |
| "grad_norm": 0.1491889266816844, | |
| "learning_rate": 5.037315395292111e-06, | |
| "loss": 0.2691, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 1.0976451177441129, | |
| "grad_norm": 0.15633034297704468, | |
| "learning_rate": 5.017675840828597e-06, | |
| "loss": 0.2657, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.0998950052497376, | |
| "grad_norm": 0.15782519717103635, | |
| "learning_rate": 4.998036013645409e-06, | |
| "loss": 0.2561, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 1.1021448927553623, | |
| "grad_norm": 0.19449808917352213, | |
| "learning_rate": 4.97839621676447e-06, | |
| "loss": 0.2571, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.104394780260987, | |
| "grad_norm": 0.16872055966750726, | |
| "learning_rate": 4.958756753207234e-06, | |
| "loss": 0.2459, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 1.1066446677666117, | |
| "grad_norm": 0.17373438335912267, | |
| "learning_rate": 4.939117925990013e-06, | |
| "loss": 0.2805, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.1088945552722365, | |
| "grad_norm": 0.15151992334964703, | |
| "learning_rate": 4.919480038119302e-06, | |
| "loss": 0.251, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 1.1111444427778612, | |
| "grad_norm": 0.15554454267536397, | |
| "learning_rate": 4.899843392587104e-06, | |
| "loss": 0.2533, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.113394330283486, | |
| "grad_norm": 0.16595294302301358, | |
| "learning_rate": 4.880208292366247e-06, | |
| "loss": 0.2864, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.1156442177891106, | |
| "grad_norm": 0.15038201249362013, | |
| "learning_rate": 4.860575040405726e-06, | |
| "loss": 0.2744, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.1178941052947353, | |
| "grad_norm": 0.16630054816008968, | |
| "learning_rate": 4.840943939626012e-06, | |
| "loss": 0.2362, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 1.12014399280036, | |
| "grad_norm": 0.18269629542973387, | |
| "learning_rate": 4.821315292914392e-06, | |
| "loss": 0.2786, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.1223938803059847, | |
| "grad_norm": 0.16417528180865418, | |
| "learning_rate": 4.801689403120282e-06, | |
| "loss": 0.2506, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 1.1246437678116095, | |
| "grad_norm": 0.12251195240813534, | |
| "learning_rate": 4.782066573050567e-06, | |
| "loss": 0.2693, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.1268936553172342, | |
| "grad_norm": 0.19913321021658195, | |
| "learning_rate": 4.7624471054649216e-06, | |
| "loss": 0.26, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 1.129143542822859, | |
| "grad_norm": 0.16359478594452095, | |
| "learning_rate": 4.742831303071143e-06, | |
| "loss": 0.2507, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.1313934303284836, | |
| "grad_norm": 0.20741074237045662, | |
| "learning_rate": 4.723219468520474e-06, | |
| "loss": 0.2678, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 1.1336433178341083, | |
| "grad_norm": 0.16956816625653676, | |
| "learning_rate": 4.703611904402939e-06, | |
| "loss": 0.2795, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.135893205339733, | |
| "grad_norm": 0.1818340434409631, | |
| "learning_rate": 4.684008913242679e-06, | |
| "loss": 0.2586, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.1381430928453578, | |
| "grad_norm": 0.17749209313732456, | |
| "learning_rate": 4.664410797493275e-06, | |
| "loss": 0.2708, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.1403929803509825, | |
| "grad_norm": 0.167827444506409, | |
| "learning_rate": 4.644817859533083e-06, | |
| "loss": 0.2717, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 1.1426428678566072, | |
| "grad_norm": 0.17149191797141825, | |
| "learning_rate": 4.625230401660578e-06, | |
| "loss": 0.2444, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.144892755362232, | |
| "grad_norm": 0.19053262323498327, | |
| "learning_rate": 4.605648726089674e-06, | |
| "loss": 0.2546, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 1.1471426428678566, | |
| "grad_norm": 0.17029611567515032, | |
| "learning_rate": 4.58607313494508e-06, | |
| "loss": 0.2515, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.1493925303734813, | |
| "grad_norm": 0.20535330778256622, | |
| "learning_rate": 4.566503930257624e-06, | |
| "loss": 0.2687, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 1.151642417879106, | |
| "grad_norm": 0.17888453950166083, | |
| "learning_rate": 4.546941413959595e-06, | |
| "loss": 0.2582, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.1538923053847308, | |
| "grad_norm": 0.17098481716726255, | |
| "learning_rate": 4.5273858878800895e-06, | |
| "loss": 0.2633, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 1.1561421928903555, | |
| "grad_norm": 0.22394541422414396, | |
| "learning_rate": 4.507837653740355e-06, | |
| "loss": 0.2657, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.1583920803959802, | |
| "grad_norm": 0.16148745686481833, | |
| "learning_rate": 4.4882970131491286e-06, | |
| "loss": 0.2469, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.160641967901605, | |
| "grad_norm": 0.21762812124764483, | |
| "learning_rate": 4.468764267597986e-06, | |
| "loss": 0.2815, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.1628918554072296, | |
| "grad_norm": 0.2041647572323139, | |
| "learning_rate": 4.449239718456696e-06, | |
| "loss": 0.253, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 1.1651417429128543, | |
| "grad_norm": 0.1508182234886033, | |
| "learning_rate": 4.429723666968559e-06, | |
| "loss": 0.2532, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.167391630418479, | |
| "grad_norm": 0.22173731592066487, | |
| "learning_rate": 4.410216414245771e-06, | |
| "loss": 0.2597, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 1.1696415179241038, | |
| "grad_norm": 0.15334607029538722, | |
| "learning_rate": 4.390718261264768e-06, | |
| "loss": 0.2429, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.1718914054297285, | |
| "grad_norm": 0.17386719805484463, | |
| "learning_rate": 4.371229508861588e-06, | |
| "loss": 0.2718, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 1.1741412929353532, | |
| "grad_norm": 0.255145373819277, | |
| "learning_rate": 4.351750457727229e-06, | |
| "loss": 0.2544, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.176391180440978, | |
| "grad_norm": 0.19091868423027997, | |
| "learning_rate": 4.332281408403011e-06, | |
| "loss": 0.26, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 1.1786410679466026, | |
| "grad_norm": 0.17031635023758315, | |
| "learning_rate": 4.312822661275929e-06, | |
| "loss": 0.2478, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.1808909554522273, | |
| "grad_norm": 0.18810141305157912, | |
| "learning_rate": 4.293374516574031e-06, | |
| "loss": 0.2593, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.183140842957852, | |
| "grad_norm": 0.20489249951929697, | |
| "learning_rate": 4.273937274361782e-06, | |
| "loss": 0.2226, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.1853907304634768, | |
| "grad_norm": 0.18589998495363094, | |
| "learning_rate": 4.254511234535432e-06, | |
| "loss": 0.2313, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 1.1876406179691015, | |
| "grad_norm": 0.1974695166475231, | |
| "learning_rate": 4.235096696818385e-06, | |
| "loss": 0.2782, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.1898905054747262, | |
| "grad_norm": 0.15560807641673985, | |
| "learning_rate": 4.215693960756586e-06, | |
| "loss": 0.2461, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 1.192140392980351, | |
| "grad_norm": 0.14168460680781833, | |
| "learning_rate": 4.1963033257138904e-06, | |
| "loss": 0.2323, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.1943902804859756, | |
| "grad_norm": 0.19193101382035213, | |
| "learning_rate": 4.176925090867449e-06, | |
| "loss": 0.252, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 1.1966401679916003, | |
| "grad_norm": 0.19059681316908272, | |
| "learning_rate": 4.157559555203086e-06, | |
| "loss": 0.2237, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.198890055497225, | |
| "grad_norm": 0.18365584045782385, | |
| "learning_rate": 4.138207017510696e-06, | |
| "loss": 0.2498, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 1.2011399430028498, | |
| "grad_norm": 0.17126185601849214, | |
| "learning_rate": 4.118867776379624e-06, | |
| "loss": 0.2121, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.2033898305084745, | |
| "grad_norm": 0.23530086737062514, | |
| "learning_rate": 4.099542130194069e-06, | |
| "loss": 0.2369, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.2056397180140994, | |
| "grad_norm": 0.1759441387313428, | |
| "learning_rate": 4.0802303771284685e-06, | |
| "loss": 0.2171, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.207889605519724, | |
| "grad_norm": 0.19878924933956027, | |
| "learning_rate": 4.060932815142904e-06, | |
| "loss": 0.2631, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 1.2101394930253488, | |
| "grad_norm": 0.1984620336427276, | |
| "learning_rate": 4.041649741978508e-06, | |
| "loss": 0.2408, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.2123893805309733, | |
| "grad_norm": 0.16406158411947314, | |
| "learning_rate": 4.022381455152863e-06, | |
| "loss": 0.2204, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 1.2146392680365983, | |
| "grad_norm": 0.18585134324802086, | |
| "learning_rate": 4.003128251955412e-06, | |
| "loss": 0.2254, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.2168891555422228, | |
| "grad_norm": 0.2028470417783533, | |
| "learning_rate": 3.983890429442876e-06, | |
| "loss": 0.2174, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 1.2191390430478477, | |
| "grad_norm": 0.18306124060212872, | |
| "learning_rate": 3.964668284434666e-06, | |
| "loss": 0.2281, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.2213889305534724, | |
| "grad_norm": 0.2261111639681813, | |
| "learning_rate": 3.945462113508312e-06, | |
| "loss": 0.2183, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 1.2236388180590971, | |
| "grad_norm": 0.21171892163095699, | |
| "learning_rate": 3.92627221299487e-06, | |
| "loss": 0.2249, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.2258887055647218, | |
| "grad_norm": 0.18554866614076224, | |
| "learning_rate": 3.907098878974367e-06, | |
| "loss": 0.2356, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.2281385930703466, | |
| "grad_norm": 0.17740452807380613, | |
| "learning_rate": 3.887942407271228e-06, | |
| "loss": 0.2213, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.2303884805759713, | |
| "grad_norm": 0.19628544094095077, | |
| "learning_rate": 3.868803093449709e-06, | |
| "loss": 0.2256, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 1.232638368081596, | |
| "grad_norm": 0.1906710395370276, | |
| "learning_rate": 3.8496812328093335e-06, | |
| "loss": 0.2431, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.2348882555872207, | |
| "grad_norm": 0.21739799246928065, | |
| "learning_rate": 3.8305771203803434e-06, | |
| "loss": 0.2053, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 1.2371381430928454, | |
| "grad_norm": 0.20933189544262915, | |
| "learning_rate": 3.8114910509191483e-06, | |
| "loss": 0.2372, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.2393880305984701, | |
| "grad_norm": 0.16805864711967494, | |
| "learning_rate": 3.7924233189037697e-06, | |
| "loss": 0.2421, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 1.2416379181040949, | |
| "grad_norm": 0.23407049517629622, | |
| "learning_rate": 3.773374218529298e-06, | |
| "loss": 0.2289, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.2438878056097196, | |
| "grad_norm": 0.20043628906146582, | |
| "learning_rate": 3.7543440437033656e-06, | |
| "loss": 0.2197, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 1.2461376931153443, | |
| "grad_norm": 0.1811301883423287, | |
| "learning_rate": 3.7353330880415963e-06, | |
| "loss": 0.2118, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.248387580620969, | |
| "grad_norm": 0.20413664615759625, | |
| "learning_rate": 3.7163416448630886e-06, | |
| "loss": 0.2103, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.2506374681265937, | |
| "grad_norm": 0.2053294418375065, | |
| "learning_rate": 3.6973700071858764e-06, | |
| "loss": 0.2265, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.2528873556322184, | |
| "grad_norm": 0.17855437216730508, | |
| "learning_rate": 3.6784184677224204e-06, | |
| "loss": 0.2082, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 1.2551372431378431, | |
| "grad_norm": 0.21204933584524724, | |
| "learning_rate": 3.659487318875087e-06, | |
| "loss": 0.2368, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.2573871306434679, | |
| "grad_norm": 0.244934854739885, | |
| "learning_rate": 3.6405768527316376e-06, | |
| "loss": 0.2236, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 1.2596370181490926, | |
| "grad_norm": 0.20352719384257717, | |
| "learning_rate": 3.6216873610607155e-06, | |
| "loss": 0.2127, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.2618869056547173, | |
| "grad_norm": 0.21525625357885447, | |
| "learning_rate": 3.602819135307355e-06, | |
| "loss": 0.2026, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 1.264136793160342, | |
| "grad_norm": 0.24886200931475094, | |
| "learning_rate": 3.58397246658848e-06, | |
| "loss": 0.2049, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.2663866806659667, | |
| "grad_norm": 0.22213048059657176, | |
| "learning_rate": 3.5651476456884103e-06, | |
| "loss": 0.2149, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 1.2686365681715914, | |
| "grad_norm": 0.24474792019196667, | |
| "learning_rate": 3.5463449630543744e-06, | |
| "loss": 0.2176, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.2708864556772161, | |
| "grad_norm": 0.21959268792414904, | |
| "learning_rate": 3.527564708792035e-06, | |
| "loss": 0.2319, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.2731363431828409, | |
| "grad_norm": 0.21285142665025264, | |
| "learning_rate": 3.508807172661006e-06, | |
| "loss": 0.2278, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.2753862306884656, | |
| "grad_norm": 0.24872484432655345, | |
| "learning_rate": 3.490072644070386e-06, | |
| "loss": 0.2367, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 1.2776361181940903, | |
| "grad_norm": 0.2446892197957464, | |
| "learning_rate": 3.47136141207429e-06, | |
| "loss": 0.2147, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.279886005699715, | |
| "grad_norm": 0.3593552477933211, | |
| "learning_rate": 3.452673765367389e-06, | |
| "loss": 0.2471, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 1.2821358932053397, | |
| "grad_norm": 0.18760658096432373, | |
| "learning_rate": 3.4340099922804627e-06, | |
| "loss": 0.2185, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.2843857807109644, | |
| "grad_norm": 0.1746094898464911, | |
| "learning_rate": 3.4153703807759432e-06, | |
| "loss": 0.1939, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 1.2866356682165891, | |
| "grad_norm": 0.2386232051443061, | |
| "learning_rate": 3.3967552184434753e-06, | |
| "loss": 0.2182, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.2888855557222139, | |
| "grad_norm": 0.2147456869413775, | |
| "learning_rate": 3.378164792495475e-06, | |
| "loss": 0.2232, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 1.2911354432278386, | |
| "grad_norm": 0.21939888824914258, | |
| "learning_rate": 3.3595993897627098e-06, | |
| "loss": 0.2059, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.2933853307334633, | |
| "grad_norm": 0.20007621997926173, | |
| "learning_rate": 3.3410592966898565e-06, | |
| "loss": 0.2025, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.295635218239088, | |
| "grad_norm": 0.22959303011889556, | |
| "learning_rate": 3.3225447993310983e-06, | |
| "loss": 0.2004, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.2978851057447127, | |
| "grad_norm": 0.23309801112874845, | |
| "learning_rate": 3.3040561833456964e-06, | |
| "loss": 0.1914, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 1.3001349932503374, | |
| "grad_norm": 0.22848735574436602, | |
| "learning_rate": 3.2855937339935933e-06, | |
| "loss": 0.1844, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.3023848807559621, | |
| "grad_norm": 0.20570875834144497, | |
| "learning_rate": 3.2671577361310087e-06, | |
| "loss": 0.2132, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 1.3046347682615869, | |
| "grad_norm": 0.24826968315533732, | |
| "learning_rate": 3.2487484742060427e-06, | |
| "loss": 0.2111, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.3068846557672116, | |
| "grad_norm": 0.2410762961266627, | |
| "learning_rate": 3.2303662322542835e-06, | |
| "loss": 0.1948, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 1.3091345432728363, | |
| "grad_norm": 0.23347593077480983, | |
| "learning_rate": 3.212011293894436e-06, | |
| "loss": 0.2008, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.311384430778461, | |
| "grad_norm": 0.21360507603920142, | |
| "learning_rate": 3.1936839423239376e-06, | |
| "loss": 0.2042, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 1.3136343182840857, | |
| "grad_norm": 0.21960761516089436, | |
| "learning_rate": 3.1753844603145894e-06, | |
| "loss": 0.2391, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.3158842057897104, | |
| "grad_norm": 0.20203229318870164, | |
| "learning_rate": 3.1571131302081916e-06, | |
| "loss": 0.1876, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.3181340932953352, | |
| "grad_norm": 0.24191918555495237, | |
| "learning_rate": 3.138870233912197e-06, | |
| "loss": 0.1962, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.32038398080096, | |
| "grad_norm": 0.20921020378628946, | |
| "learning_rate": 3.1206560528953467e-06, | |
| "loss": 0.2058, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 1.3226338683065846, | |
| "grad_norm": 0.20869954401470014, | |
| "learning_rate": 3.102470868183344e-06, | |
| "loss": 0.2064, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.3248837558122095, | |
| "grad_norm": 0.2249649340119077, | |
| "learning_rate": 3.084314960354501e-06, | |
| "loss": 0.2046, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 1.327133643317834, | |
| "grad_norm": 0.25182875069609073, | |
| "learning_rate": 3.066188609535421e-06, | |
| "loss": 0.2037, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.329383530823459, | |
| "grad_norm": 0.21107658603026272, | |
| "learning_rate": 3.0480920953966786e-06, | |
| "loss": 0.2094, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 1.3316334183290834, | |
| "grad_norm": 0.22913188913268076, | |
| "learning_rate": 3.0300256971484943e-06, | |
| "loss": 0.2162, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.3338833058347084, | |
| "grad_norm": 0.2539850632246194, | |
| "learning_rate": 3.0119896935364305e-06, | |
| "loss": 0.1941, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 1.3361331933403329, | |
| "grad_norm": 0.23858971814994895, | |
| "learning_rate": 2.993984362837098e-06, | |
| "loss": 0.1839, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.3383830808459578, | |
| "grad_norm": 0.2721885562055672, | |
| "learning_rate": 2.9760099828538545e-06, | |
| "loss": 0.211, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.3406329683515823, | |
| "grad_norm": 0.2482373958173057, | |
| "learning_rate": 2.9580668309125203e-06, | |
| "loss": 0.1998, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.3428828558572072, | |
| "grad_norm": 0.2306376622557913, | |
| "learning_rate": 2.940155183857096e-06, | |
| "loss": 0.2196, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 1.3451327433628317, | |
| "grad_norm": 0.26262612593731016, | |
| "learning_rate": 2.922275318045502e-06, | |
| "loss": 0.1882, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.3473826308684567, | |
| "grad_norm": 0.20346419756531464, | |
| "learning_rate": 2.9044275093453034e-06, | |
| "loss": 0.193, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 1.3496325183740812, | |
| "grad_norm": 0.22993902000452152, | |
| "learning_rate": 2.8866120331294567e-06, | |
| "loss": 0.1736, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.351882405879706, | |
| "grad_norm": 0.27935117403868454, | |
| "learning_rate": 2.8688291642720656e-06, | |
| "loss": 0.1904, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 1.3541322933853308, | |
| "grad_norm": 0.27365571988160076, | |
| "learning_rate": 2.8510791771441327e-06, | |
| "loss": 0.1853, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.3563821808909555, | |
| "grad_norm": 0.23083646098925237, | |
| "learning_rate": 2.8333623456093313e-06, | |
| "loss": 0.1968, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 1.3586320683965802, | |
| "grad_norm": 0.2704413392632432, | |
| "learning_rate": 2.815678943019784e-06, | |
| "loss": 0.1927, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.360881955902205, | |
| "grad_norm": 0.24524940743516008, | |
| "learning_rate": 2.7980292422118282e-06, | |
| "loss": 0.2051, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.3631318434078297, | |
| "grad_norm": 0.24409905829083706, | |
| "learning_rate": 2.7804135155018307e-06, | |
| "loss": 0.164, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.3653817309134544, | |
| "grad_norm": 0.23814624701122666, | |
| "learning_rate": 2.762832034681965e-06, | |
| "loss": 0.1777, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 1.367631618419079, | |
| "grad_norm": 0.24368144586742516, | |
| "learning_rate": 2.7452850710160305e-06, | |
| "loss": 0.1946, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.3698815059247038, | |
| "grad_norm": 0.24657044578855591, | |
| "learning_rate": 2.727772895235262e-06, | |
| "loss": 0.2024, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 1.3721313934303285, | |
| "grad_norm": 0.20668089980394588, | |
| "learning_rate": 2.710295777534154e-06, | |
| "loss": 0.1853, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.3743812809359532, | |
| "grad_norm": 0.22601090907078772, | |
| "learning_rate": 2.692853987566291e-06, | |
| "loss": 0.1764, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 1.376631168441578, | |
| "grad_norm": 0.2872809956397954, | |
| "learning_rate": 2.675447794440188e-06, | |
| "loss": 0.1609, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.3788810559472027, | |
| "grad_norm": 0.2411498866283728, | |
| "learning_rate": 2.658077466715138e-06, | |
| "loss": 0.1813, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 1.3811309434528274, | |
| "grad_norm": 0.19284001830686515, | |
| "learning_rate": 2.6407432723970694e-06, | |
| "loss": 0.1751, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.383380830958452, | |
| "grad_norm": 0.23962546665483148, | |
| "learning_rate": 2.6234454789344067e-06, | |
| "loss": 0.164, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.3856307184640768, | |
| "grad_norm": 0.25424115274026465, | |
| "learning_rate": 2.6061843532139563e-06, | |
| "loss": 0.1816, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.3878806059697015, | |
| "grad_norm": 0.25313051025088457, | |
| "learning_rate": 2.5889601615567657e-06, | |
| "loss": 0.1813, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 1.3901304934753262, | |
| "grad_norm": 0.2378148779779353, | |
| "learning_rate": 2.5717731697140425e-06, | |
| "loss": 0.1822, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.392380380980951, | |
| "grad_norm": 0.24347808422412195, | |
| "learning_rate": 2.554623642863031e-06, | |
| "loss": 0.165, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 1.3946302684865757, | |
| "grad_norm": 0.26898645682575706, | |
| "learning_rate": 2.5375118456029345e-06, | |
| "loss": 0.1834, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.3968801559922004, | |
| "grad_norm": 0.20720842966338204, | |
| "learning_rate": 2.520438041950827e-06, | |
| "loss": 0.1638, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 1.399130043497825, | |
| "grad_norm": 0.17953407898094143, | |
| "learning_rate": 2.503402495337579e-06, | |
| "loss": 0.1547, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.4013799310034498, | |
| "grad_norm": 0.27527731902583097, | |
| "learning_rate": 2.4864054686037993e-06, | |
| "loss": 0.1771, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 1.4036298185090745, | |
| "grad_norm": 0.2441103318991431, | |
| "learning_rate": 2.469447223995772e-06, | |
| "loss": 0.1866, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.4058797060146992, | |
| "grad_norm": 0.22146555295977546, | |
| "learning_rate": 2.452528023161414e-06, | |
| "loss": 0.1658, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.408129593520324, | |
| "grad_norm": 0.24036023292266545, | |
| "learning_rate": 2.4356481271462396e-06, | |
| "loss": 0.1951, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.4103794810259487, | |
| "grad_norm": 0.2346343428333368, | |
| "learning_rate": 2.4188077963893276e-06, | |
| "loss": 0.1724, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 1.4126293685315734, | |
| "grad_norm": 0.28341682436165366, | |
| "learning_rate": 2.4020072907193123e-06, | |
| "loss": 0.1786, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.414879256037198, | |
| "grad_norm": 0.25526003142103276, | |
| "learning_rate": 2.3852468693503635e-06, | |
| "loss": 0.166, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 1.4171291435428228, | |
| "grad_norm": 0.21169312639212728, | |
| "learning_rate": 2.3685267908781934e-06, | |
| "loss": 0.1651, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.4193790310484475, | |
| "grad_norm": 0.2311680518850515, | |
| "learning_rate": 2.3518473132760668e-06, | |
| "loss": 0.1943, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 1.4216289185540723, | |
| "grad_norm": 0.2466100225354571, | |
| "learning_rate": 2.335208693890819e-06, | |
| "loss": 0.1759, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.423878806059697, | |
| "grad_norm": 0.25320147361029777, | |
| "learning_rate": 2.318611189438884e-06, | |
| "loss": 0.1741, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 1.4261286935653217, | |
| "grad_norm": 0.20379034599277449, | |
| "learning_rate": 2.30205505600234e-06, | |
| "loss": 0.1515, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.4283785810709464, | |
| "grad_norm": 0.2171396909921854, | |
| "learning_rate": 2.2855405490249498e-06, | |
| "loss": 0.17, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.430628468576571, | |
| "grad_norm": 0.25025327769749556, | |
| "learning_rate": 2.2690679233082237e-06, | |
| "loss": 0.1442, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.4328783560821958, | |
| "grad_norm": 0.20134411154173665, | |
| "learning_rate": 2.2526374330074945e-06, | |
| "loss": 0.172, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 1.4351282435878205, | |
| "grad_norm": 0.20636498909519851, | |
| "learning_rate": 2.23624933162798e-06, | |
| "loss": 0.1663, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.4373781310934453, | |
| "grad_norm": 0.24524283669244562, | |
| "learning_rate": 2.219903872020885e-06, | |
| "loss": 0.1726, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 1.43962801859907, | |
| "grad_norm": 0.226380159375995, | |
| "learning_rate": 2.2036013063795024e-06, | |
| "loss": 0.1707, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.4418779061046947, | |
| "grad_norm": 0.23433271109204132, | |
| "learning_rate": 2.1873418862353095e-06, | |
| "loss": 0.1885, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 1.4441277936103196, | |
| "grad_norm": 0.27219913617478064, | |
| "learning_rate": 2.1711258624540955e-06, | |
| "loss": 0.1627, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.4463776811159441, | |
| "grad_norm": 0.2720555642584958, | |
| "learning_rate": 2.15495348523209e-06, | |
| "loss": 0.1637, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 1.448627568621569, | |
| "grad_norm": 0.26833619371988116, | |
| "learning_rate": 2.1388250040921007e-06, | |
| "loss": 0.1536, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.4508774561271935, | |
| "grad_norm": 0.2603614911120465, | |
| "learning_rate": 2.1227406678796664e-06, | |
| "loss": 0.1608, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.4531273436328185, | |
| "grad_norm": 0.21162975240993986, | |
| "learning_rate": 2.1067007247592153e-06, | |
| "loss": 0.1649, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.455377231138443, | |
| "grad_norm": 0.240416434380939, | |
| "learning_rate": 2.0907054222102367e-06, | |
| "loss": 0.157, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 1.457627118644068, | |
| "grad_norm": 0.22619962147371023, | |
| "learning_rate": 2.074755007023461e-06, | |
| "loss": 0.152, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.4598770061496924, | |
| "grad_norm": 0.23122000932974787, | |
| "learning_rate": 2.058849725297061e-06, | |
| "loss": 0.1674, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 1.4621268936553173, | |
| "grad_norm": 0.20088454332338435, | |
| "learning_rate": 2.042989822432837e-06, | |
| "loss": 0.1426, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.4643767811609418, | |
| "grad_norm": 0.22446161891702227, | |
| "learning_rate": 2.0271755431324456e-06, | |
| "loss": 0.1431, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 1.4666266686665668, | |
| "grad_norm": 0.25951357745647785, | |
| "learning_rate": 2.011407131393624e-06, | |
| "loss": 0.1521, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.4688765561721913, | |
| "grad_norm": 0.23931332134441274, | |
| "learning_rate": 1.9956848305064156e-06, | |
| "loss": 0.1348, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 1.4711264436778162, | |
| "grad_norm": 0.2443591943685552, | |
| "learning_rate": 1.9800088830494233e-06, | |
| "loss": 0.1616, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.4733763311834407, | |
| "grad_norm": 0.2583750411808441, | |
| "learning_rate": 1.964379530886066e-06, | |
| "loss": 0.1568, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.4756262186890656, | |
| "grad_norm": 0.23149031179647173, | |
| "learning_rate": 1.948797015160845e-06, | |
| "loss": 0.1628, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.4778761061946903, | |
| "grad_norm": 0.24603772896490778, | |
| "learning_rate": 1.9332615762956252e-06, | |
| "loss": 0.172, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 1.480125993700315, | |
| "grad_norm": 0.21815434321755453, | |
| "learning_rate": 1.9177734539859246e-06, | |
| "loss": 0.1412, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.4823758812059398, | |
| "grad_norm": 0.22721807904843327, | |
| "learning_rate": 1.9023328871972163e-06, | |
| "loss": 0.1535, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 1.4846257687115645, | |
| "grad_norm": 0.242196598571302, | |
| "learning_rate": 1.886940114161241e-06, | |
| "loss": 0.1418, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.4868756562171892, | |
| "grad_norm": 0.31031701723743615, | |
| "learning_rate": 1.8715953723723374e-06, | |
| "loss": 0.152, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 1.489125543722814, | |
| "grad_norm": 0.2768031393228084, | |
| "learning_rate": 1.8562988985837632e-06, | |
| "loss": 0.1333, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.4913754312284386, | |
| "grad_norm": 0.20415150169731586, | |
| "learning_rate": 1.8410509288040557e-06, | |
| "loss": 0.1414, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 1.4936253187340633, | |
| "grad_norm": 0.26667619653525043, | |
| "learning_rate": 1.8258516982933905e-06, | |
| "loss": 0.1468, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.495875206239688, | |
| "grad_norm": 0.2944474959496459, | |
| "learning_rate": 1.8107014415599416e-06, | |
| "loss": 0.1191, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.4981250937453128, | |
| "grad_norm": 0.2393057029593332, | |
| "learning_rate": 1.7956003923562715e-06, | |
| "loss": 0.1404, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.5003749812509375, | |
| "grad_norm": 0.2714540493615385, | |
| "learning_rate": 1.7805487836757224e-06, | |
| "loss": 0.139, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 1.5026248687565622, | |
| "grad_norm": 0.24328575957899592, | |
| "learning_rate": 1.7655468477488191e-06, | |
| "loss": 0.1388, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.504874756262187, | |
| "grad_norm": 0.22919700731255085, | |
| "learning_rate": 1.7505948160396901e-06, | |
| "loss": 0.1238, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 1.5071246437678116, | |
| "grad_norm": 0.2619372461489788, | |
| "learning_rate": 1.7356929192424937e-06, | |
| "loss": 0.1451, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.5093745312734363, | |
| "grad_norm": 0.2086634801533094, | |
| "learning_rate": 1.720841387277858e-06, | |
| "loss": 0.1485, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 1.511624418779061, | |
| "grad_norm": 0.22980427576347195, | |
| "learning_rate": 1.7060404492893345e-06, | |
| "loss": 0.1474, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.5138743062846858, | |
| "grad_norm": 0.24590985962229212, | |
| "learning_rate": 1.6912903336398677e-06, | |
| "loss": 0.1375, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 1.5161241937903105, | |
| "grad_norm": 0.25732491354780235, | |
| "learning_rate": 1.6765912679082592e-06, | |
| "loss": 0.1357, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.5183740812959352, | |
| "grad_norm": 0.26500627710086616, | |
| "learning_rate": 1.6619434788856664e-06, | |
| "loss": 0.1419, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.52062396880156, | |
| "grad_norm": 0.2928540052735079, | |
| "learning_rate": 1.647347192572105e-06, | |
| "loss": 0.1307, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.5228738563071846, | |
| "grad_norm": 0.21671093221137389, | |
| "learning_rate": 1.6328026341729547e-06, | |
| "loss": 0.1269, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 1.5251237438128094, | |
| "grad_norm": 0.20008846223184612, | |
| "learning_rate": 1.618310028095486e-06, | |
| "loss": 0.1229, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.527373631318434, | |
| "grad_norm": 0.27258283595012933, | |
| "learning_rate": 1.6038695979454033e-06, | |
| "loss": 0.1291, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 1.5296235188240588, | |
| "grad_norm": 0.1659681540695972, | |
| "learning_rate": 1.589481566523388e-06, | |
| "loss": 0.1132, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.5318734063296835, | |
| "grad_norm": 0.29801173110625, | |
| "learning_rate": 1.5751461558216662e-06, | |
| "loss": 0.1557, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 1.5341232938353082, | |
| "grad_norm": 0.21585855046664817, | |
| "learning_rate": 1.5608635870205813e-06, | |
| "loss": 0.1275, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.536373181340933, | |
| "grad_norm": 0.22691724613303907, | |
| "learning_rate": 1.546634080485181e-06, | |
| "loss": 0.1263, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 1.5386230688465576, | |
| "grad_norm": 0.3126367672650283, | |
| "learning_rate": 1.5324578557618158e-06, | |
| "loss": 0.1281, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.5408729563521824, | |
| "grad_norm": 0.2605966357941338, | |
| "learning_rate": 1.5183351315747618e-06, | |
| "loss": 0.1334, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 1.543122843857807, | |
| "grad_norm": 0.21630173206179193, | |
| "learning_rate": 1.5042661258228268e-06, | |
| "loss": 0.1372, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.5453727313634318, | |
| "grad_norm": 0.26829878682557234, | |
| "learning_rate": 1.4902510555760052e-06, | |
| "loss": 0.1257, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 1.5476226188690565, | |
| "grad_norm": 0.29544397855593446, | |
| "learning_rate": 1.4762901370721266e-06, | |
| "loss": 0.1396, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.5498725063746812, | |
| "grad_norm": 0.24819039772864243, | |
| "learning_rate": 1.4623835857135099e-06, | |
| "loss": 0.1298, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 1.552122393880306, | |
| "grad_norm": 0.20631029388339692, | |
| "learning_rate": 1.4485316160636491e-06, | |
| "loss": 0.1135, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.5543722813859309, | |
| "grad_norm": 0.2588845660409855, | |
| "learning_rate": 1.434734441843899e-06, | |
| "loss": 0.119, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 1.5566221688915554, | |
| "grad_norm": 0.27482300851220287, | |
| "learning_rate": 1.420992275930178e-06, | |
| "loss": 0.1228, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.5588720563971803, | |
| "grad_norm": 0.23756828701782703, | |
| "learning_rate": 1.4073053303496837e-06, | |
| "loss": 0.1397, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 1.5611219439028048, | |
| "grad_norm": 0.2565129570324179, | |
| "learning_rate": 1.3936738162776269e-06, | |
| "loss": 0.1171, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.5633718314084297, | |
| "grad_norm": 0.23747517958547196, | |
| "learning_rate": 1.3800979440339602e-06, | |
| "loss": 0.1112, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 1.5656217189140542, | |
| "grad_norm": 0.28612053881235616, | |
| "learning_rate": 1.3665779230801452e-06, | |
| "loss": 0.1214, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.5678716064196792, | |
| "grad_norm": 0.21974317229860285, | |
| "learning_rate": 1.353113962015919e-06, | |
| "loss": 0.1133, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 1.5701214939253036, | |
| "grad_norm": 0.2586485593990932, | |
| "learning_rate": 1.3397062685760715e-06, | |
| "loss": 0.131, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.5723713814309286, | |
| "grad_norm": 0.24428264502478964, | |
| "learning_rate": 1.326355049627238e-06, | |
| "loss": 0.1239, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 1.574621268936553, | |
| "grad_norm": 0.2440528536513943, | |
| "learning_rate": 1.31306051116472e-06, | |
| "loss": 0.1156, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.576871156442178, | |
| "grad_norm": 0.25949845916894754, | |
| "learning_rate": 1.299822858309292e-06, | |
| "loss": 0.118, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 1.5791210439478025, | |
| "grad_norm": 0.27496698683134035, | |
| "learning_rate": 1.2866422953040458e-06, | |
| "loss": 0.1364, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.5813709314534274, | |
| "grad_norm": 0.22644679203329376, | |
| "learning_rate": 1.273519025511236e-06, | |
| "loss": 0.1242, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 1.583620818959052, | |
| "grad_norm": 0.27460451136628766, | |
| "learning_rate": 1.2604532514091444e-06, | |
| "loss": 0.1179, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.5858707064646769, | |
| "grad_norm": 0.20903077719719648, | |
| "learning_rate": 1.2474451745889516e-06, | |
| "loss": 0.1174, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.5881205939703014, | |
| "grad_norm": 0.24497256736111866, | |
| "learning_rate": 1.2344949957516356e-06, | |
| "loss": 0.1119, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.5903704814759263, | |
| "grad_norm": 0.2539761952183662, | |
| "learning_rate": 1.221602914704862e-06, | |
| "loss": 0.1219, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 1.5926203689815508, | |
| "grad_norm": 0.23677806854402075, | |
| "learning_rate": 1.2087691303599109e-06, | |
| "loss": 0.1131, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.5948702564871757, | |
| "grad_norm": 0.28943703222233913, | |
| "learning_rate": 1.1959938407286099e-06, | |
| "loss": 0.1265, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 1.5971201439928002, | |
| "grad_norm": 0.26936033145050353, | |
| "learning_rate": 1.1832772429202716e-06, | |
| "loss": 0.1155, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.5993700314984252, | |
| "grad_norm": 0.24859595390547068, | |
| "learning_rate": 1.1706195331386494e-06, | |
| "loss": 0.1319, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 1.6016199190040497, | |
| "grad_norm": 0.21137786230401104, | |
| "learning_rate": 1.1580209066789272e-06, | |
| "loss": 0.0959, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.6038698065096746, | |
| "grad_norm": 0.18826307259382147, | |
| "learning_rate": 1.1454815579246874e-06, | |
| "loss": 0.1162, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 1.606119694015299, | |
| "grad_norm": 0.2059875775498964, | |
| "learning_rate": 1.1330016803449224e-06, | |
| "loss": 0.1079, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.608369581520924, | |
| "grad_norm": 0.28679664201908944, | |
| "learning_rate": 1.1205814664910464e-06, | |
| "loss": 0.1323, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.6106194690265485, | |
| "grad_norm": 0.23898491505271052, | |
| "learning_rate": 1.1082211079939248e-06, | |
| "loss": 0.1, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.6128693565321734, | |
| "grad_norm": 0.2700410969754371, | |
| "learning_rate": 1.0959207955609163e-06, | |
| "loss": 0.107, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 1.6151192440377982, | |
| "grad_norm": 0.21154102841364958, | |
| "learning_rate": 1.083680718972938e-06, | |
| "loss": 0.1126, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.6173691315434229, | |
| "grad_norm": 0.19582804111079785, | |
| "learning_rate": 1.0715010670815212e-06, | |
| "loss": 0.1111, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 1.6196190190490476, | |
| "grad_norm": 0.2154658262674778, | |
| "learning_rate": 1.059382027805914e-06, | |
| "loss": 0.1025, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.6218689065546723, | |
| "grad_norm": 0.30677049526532074, | |
| "learning_rate": 1.0473237881301763e-06, | |
| "loss": 0.1201, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 1.624118794060297, | |
| "grad_norm": 0.23614701148998188, | |
| "learning_rate": 1.0353265341002916e-06, | |
| "loss": 0.104, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.6263686815659217, | |
| "grad_norm": 0.24690793627028748, | |
| "learning_rate": 1.0233904508212955e-06, | |
| "loss": 0.1078, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 1.6286185690715465, | |
| "grad_norm": 0.31258122069910355, | |
| "learning_rate": 1.0115157224544313e-06, | |
| "loss": 0.1036, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.6308684565771712, | |
| "grad_norm": 0.23164502515145138, | |
| "learning_rate": 9.997025322142934e-07, | |
| "loss": 0.1082, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.6331183440827959, | |
| "grad_norm": 0.24106743301610264, | |
| "learning_rate": 9.87951062366011e-07, | |
| "loss": 0.1197, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.6353682315884206, | |
| "grad_norm": 0.26573732918101894, | |
| "learning_rate": 9.762614942224312e-07, | |
| "loss": 0.1205, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 1.6376181190940453, | |
| "grad_norm": 0.11482579343049812, | |
| "learning_rate": 9.646340081413225e-07, | |
| "loss": 0.0915, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.63986800659967, | |
| "grad_norm": 0.21075003083613178, | |
| "learning_rate": 9.530687835225916e-07, | |
| "loss": 0.097, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 1.6421178941052947, | |
| "grad_norm": 0.23753636897967206, | |
| "learning_rate": 9.415659988055215e-07, | |
| "loss": 0.1042, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.6443677816109195, | |
| "grad_norm": 0.24065236956197258, | |
| "learning_rate": 9.30125831466005e-07, | |
| "loss": 0.1021, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 1.6466176691165442, | |
| "grad_norm": 0.28653116996485667, | |
| "learning_rate": 9.187484580138184e-07, | |
| "loss": 0.1153, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.6488675566221689, | |
| "grad_norm": 0.2563141158700858, | |
| "learning_rate": 9.074340539898962e-07, | |
| "loss": 0.1106, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 1.6511174441277936, | |
| "grad_norm": 0.29385405444476315, | |
| "learning_rate": 8.961827939636198e-07, | |
| "loss": 0.1087, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.6533673316334183, | |
| "grad_norm": 0.2631541250069433, | |
| "learning_rate": 8.849948515301188e-07, | |
| "loss": 0.0978, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.655617219139043, | |
| "grad_norm": 0.2722585679724958, | |
| "learning_rate": 8.738703993076087e-07, | |
| "loss": 0.109, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.6578671066446677, | |
| "grad_norm": 0.3065358825170482, | |
| "learning_rate": 8.62809608934711e-07, | |
| "loss": 0.1019, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 1.6601169941502925, | |
| "grad_norm": 0.23763230365231583, | |
| "learning_rate": 8.518126510678138e-07, | |
| "loss": 0.1138, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.6623668816559172, | |
| "grad_norm": 0.2532402386408982, | |
| "learning_rate": 8.408796953784365e-07, | |
| "loss": 0.1102, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 1.6646167691615419, | |
| "grad_norm": 0.23005454958970656, | |
| "learning_rate": 8.30010910550611e-07, | |
| "loss": 0.1017, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.6668666566671666, | |
| "grad_norm": 0.2194832160899072, | |
| "learning_rate": 8.19206464278281e-07, | |
| "loss": 0.0985, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 1.6691165441727913, | |
| "grad_norm": 0.32261159442961446, | |
| "learning_rate": 8.084665232627165e-07, | |
| "loss": 0.1115, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.671366431678416, | |
| "grad_norm": 0.28020915769071963, | |
| "learning_rate": 7.977912532099336e-07, | |
| "loss": 0.1072, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 1.6736163191840407, | |
| "grad_norm": 0.2587579519713862, | |
| "learning_rate": 7.871808188281461e-07, | |
| "loss": 0.0884, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.6758662066896655, | |
| "grad_norm": 0.25937560152984207, | |
| "learning_rate": 7.766353838252227e-07, | |
| "loss": 0.0963, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.6781160941952904, | |
| "grad_norm": 0.2623209006276337, | |
| "learning_rate": 7.661551109061593e-07, | |
| "loss": 0.0945, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.680365981700915, | |
| "grad_norm": 0.2734268883455671, | |
| "learning_rate": 7.557401617705673e-07, | |
| "loss": 0.0962, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 1.6826158692065398, | |
| "grad_norm": 0.2600567478099387, | |
| "learning_rate": 7.453906971101826e-07, | |
| "loss": 0.0965, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.6848657567121643, | |
| "grad_norm": 0.21549655386834185, | |
| "learning_rate": 7.35106876606384e-07, | |
| "loss": 0.0802, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 1.6871156442177893, | |
| "grad_norm": 0.23243787528580465, | |
| "learning_rate": 7.248888589277275e-07, | |
| "loss": 0.0979, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.6893655317234137, | |
| "grad_norm": 0.2724000087724297, | |
| "learning_rate": 7.147368017275075e-07, | |
| "loss": 0.0954, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 1.6916154192290387, | |
| "grad_norm": 0.27067809227580786, | |
| "learning_rate": 7.046508616413078e-07, | |
| "loss": 0.0921, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.6938653067346632, | |
| "grad_norm": 0.2211597100684428, | |
| "learning_rate": 6.946311942846002e-07, | |
| "loss": 0.1051, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 1.6961151942402881, | |
| "grad_norm": 0.24002929405082607, | |
| "learning_rate": 6.846779542503384e-07, | |
| "loss": 0.0899, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.6983650817459126, | |
| "grad_norm": 0.19994511733272957, | |
| "learning_rate": 6.747912951065722e-07, | |
| "loss": 0.0914, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.7006149692515375, | |
| "grad_norm": 0.3127468963077912, | |
| "learning_rate": 6.649713693940718e-07, | |
| "loss": 0.1032, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.702864856757162, | |
| "grad_norm": 0.23642047104133684, | |
| "learning_rate": 6.552183286239899e-07, | |
| "loss": 0.087, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 1.705114744262787, | |
| "grad_norm": 0.2068805673647048, | |
| "learning_rate": 6.455323232755095e-07, | |
| "loss": 0.093, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.7073646317684115, | |
| "grad_norm": 0.26466757083784725, | |
| "learning_rate": 6.35913502793527e-07, | |
| "loss": 0.0857, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 1.7096145192740364, | |
| "grad_norm": 0.20726909159845547, | |
| "learning_rate": 6.263620155863492e-07, | |
| "loss": 0.0863, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.711864406779661, | |
| "grad_norm": 0.21055369695393691, | |
| "learning_rate": 6.168780090233994e-07, | |
| "loss": 0.0916, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 1.7141142942852858, | |
| "grad_norm": 0.2778245613335268, | |
| "learning_rate": 6.07461629432945e-07, | |
| "loss": 0.0917, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.7163641817909103, | |
| "grad_norm": 0.1999858453456665, | |
| "learning_rate": 5.981130220998444e-07, | |
| "loss": 0.0746, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 1.7186140692965353, | |
| "grad_norm": 0.19402830775044652, | |
| "learning_rate": 5.888323312632948e-07, | |
| "loss": 0.094, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.7208639568021598, | |
| "grad_norm": 0.26795633448623635, | |
| "learning_rate": 5.796197001146164e-07, | |
| "loss": 0.0884, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.7231138443077847, | |
| "grad_norm": 0.2390770570872304, | |
| "learning_rate": 5.704752707950412e-07, | |
| "loss": 0.0905, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.7253637318134092, | |
| "grad_norm": 0.16942131967267335, | |
| "learning_rate": 5.613991843935179e-07, | |
| "loss": 0.0827, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 1.7276136193190341, | |
| "grad_norm": 0.2027819269347922, | |
| "learning_rate": 5.523915809445313e-07, | |
| "loss": 0.0832, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.7298635068246586, | |
| "grad_norm": 0.22046848444535852, | |
| "learning_rate": 5.434525994259531e-07, | |
| "loss": 0.0886, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 1.7321133943302836, | |
| "grad_norm": 0.2595049287219421, | |
| "learning_rate": 5.345823777568859e-07, | |
| "loss": 0.0937, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.734363281835908, | |
| "grad_norm": 0.24929926941084485, | |
| "learning_rate": 5.25781052795541e-07, | |
| "loss": 0.0787, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 1.736613169341533, | |
| "grad_norm": 0.24888880504119226, | |
| "learning_rate": 5.170487603371266e-07, | |
| "loss": 0.0845, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.7388630568471577, | |
| "grad_norm": 0.2604651193142029, | |
| "learning_rate": 5.083856351117511e-07, | |
| "loss": 0.0786, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 1.7411129443527824, | |
| "grad_norm": 0.20003829357925593, | |
| "learning_rate": 4.997918107823446e-07, | |
| "loss": 0.08, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.7433628318584071, | |
| "grad_norm": 0.2051972235262297, | |
| "learning_rate": 4.912674199425999e-07, | |
| "loss": 0.0853, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.7456127193640318, | |
| "grad_norm": 0.2549934939375718, | |
| "learning_rate": 4.828125941149197e-07, | |
| "loss": 0.0844, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.7478626068696566, | |
| "grad_norm": 0.3024383705811877, | |
| "learning_rate": 4.7442746374839363e-07, | |
| "loss": 0.0846, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 1.7501124943752813, | |
| "grad_norm": 0.21907085756014216, | |
| "learning_rate": 4.6611215821678546e-07, | |
| "loss": 0.0839, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.752362381880906, | |
| "grad_norm": 0.2241634174428953, | |
| "learning_rate": 4.578668058165325e-07, | |
| "loss": 0.0758, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 1.7546122693865307, | |
| "grad_norm": 0.26223122192387566, | |
| "learning_rate": 4.4969153376476726e-07, | |
| "loss": 0.0814, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.7568621568921554, | |
| "grad_norm": 0.3009548280743066, | |
| "learning_rate": 4.415864681973608e-07, | |
| "loss": 0.079, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 1.7591120443977801, | |
| "grad_norm": 0.2282784959309563, | |
| "learning_rate": 4.335517341669676e-07, | |
| "loss": 0.084, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.7613619319034048, | |
| "grad_norm": 0.2736169743202772, | |
| "learning_rate": 4.255874556411016e-07, | |
| "loss": 0.0845, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 1.7636118194090296, | |
| "grad_norm": 0.2273826431677496, | |
| "learning_rate": 4.176937555002231e-07, | |
| "loss": 0.0789, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.7658617069146543, | |
| "grad_norm": 0.2562359538975016, | |
| "learning_rate": 4.098707555358411e-07, | |
| "loss": 0.0841, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.768111594420279, | |
| "grad_norm": 0.20875979878240594, | |
| "learning_rate": 4.0211857644863404e-07, | |
| "loss": 0.0868, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.7703614819259037, | |
| "grad_norm": 0.23466550669048516, | |
| "learning_rate": 3.9443733784659324e-07, | |
| "loss": 0.0863, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 1.7726113694315284, | |
| "grad_norm": 0.22510691667210447, | |
| "learning_rate": 3.8682715824316594e-07, | |
| "loss": 0.0966, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.7748612569371531, | |
| "grad_norm": 0.20906650720115227, | |
| "learning_rate": 3.792881550554373e-07, | |
| "loss": 0.0792, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 1.7771111444427778, | |
| "grad_norm": 0.27079014394170864, | |
| "learning_rate": 3.7182044460231605e-07, | |
| "loss": 0.0793, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.7793610319484026, | |
| "grad_norm": 0.1857139907781371, | |
| "learning_rate": 3.6442414210273834e-07, | |
| "loss": 0.0798, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 1.7816109194540273, | |
| "grad_norm": 0.2556479800636284, | |
| "learning_rate": 3.570993616738866e-07, | |
| "loss": 0.0848, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.783860806959652, | |
| "grad_norm": 0.20166968130742072, | |
| "learning_rate": 3.498462163294386e-07, | |
| "loss": 0.0811, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 1.7861106944652767, | |
| "grad_norm": 0.2536060487731229, | |
| "learning_rate": 3.426648179778147e-07, | |
| "loss": 0.0953, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.7883605819709014, | |
| "grad_norm": 0.2112175288890015, | |
| "learning_rate": 3.355552774204551e-07, | |
| "loss": 0.0762, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 1.7906104694765261, | |
| "grad_norm": 0.2295537149666403, | |
| "learning_rate": 3.2851770435010864e-07, | |
| "loss": 0.0767, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.7928603569821508, | |
| "grad_norm": 0.2866042831708544, | |
| "learning_rate": 3.215522073491434e-07, | |
| "loss": 0.0822, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 1.7951102444877756, | |
| "grad_norm": 0.21056995297624528, | |
| "learning_rate": 3.1465889388786697e-07, | |
| "loss": 0.0884, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.7973601319934003, | |
| "grad_norm": 0.2722790864581489, | |
| "learning_rate": 3.0783787032287407e-07, | |
| "loss": 0.0881, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 1.799610019499025, | |
| "grad_norm": 0.2828340673761126, | |
| "learning_rate": 3.010892418953981e-07, | |
| "loss": 0.0791, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.80185990700465, | |
| "grad_norm": 0.2144007429401686, | |
| "learning_rate": 2.9441311272969343e-07, | |
| "loss": 0.067, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 1.8041097945102744, | |
| "grad_norm": 0.21922591950990084, | |
| "learning_rate": 2.878095858314278e-07, | |
| "loss": 0.0702, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.8063596820158994, | |
| "grad_norm": 0.20896747701719126, | |
| "learning_rate": 2.812787630860919e-07, | |
| "loss": 0.078, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 1.8086095695215239, | |
| "grad_norm": 0.21476087685701412, | |
| "learning_rate": 2.7482074525742477e-07, | |
| "loss": 0.0688, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.8108594570271488, | |
| "grad_norm": 0.24301133633323727, | |
| "learning_rate": 2.6843563198586553e-07, | |
| "loss": 0.0804, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 1.8131093445327733, | |
| "grad_norm": 0.23191122306412676, | |
| "learning_rate": 2.621235217870116e-07, | |
| "loss": 0.0861, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.8153592320383982, | |
| "grad_norm": 0.207067846018882, | |
| "learning_rate": 2.55884512050098e-07, | |
| "loss": 0.0886, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 1.8176091195440227, | |
| "grad_norm": 0.22298796620779232, | |
| "learning_rate": 2.4971869903649916e-07, | |
| "loss": 0.0841, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.8198590070496476, | |
| "grad_norm": 0.2933340830070678, | |
| "learning_rate": 2.436261778782378e-07, | |
| "loss": 0.0794, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 1.8221088945552721, | |
| "grad_norm": 0.21972032956327708, | |
| "learning_rate": 2.3760704257652145e-07, | |
| "loss": 0.0774, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.824358782060897, | |
| "grad_norm": 0.2597840708263632, | |
| "learning_rate": 2.3166138600029198e-07, | |
| "loss": 0.0772, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 1.8266086695665216, | |
| "grad_norm": 0.24945833711183132, | |
| "learning_rate": 2.257892998847916e-07, | |
| "loss": 0.0758, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.8288585570721465, | |
| "grad_norm": 0.2108872276998458, | |
| "learning_rate": 2.1999087483014437e-07, | |
| "loss": 0.0742, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 1.831108444577771, | |
| "grad_norm": 0.2561087507310594, | |
| "learning_rate": 2.1426620029996516e-07, | |
| "loss": 0.078, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.833358332083396, | |
| "grad_norm": 0.18787526118382977, | |
| "learning_rate": 2.08615364619974e-07, | |
| "loss": 0.0679, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 1.8356082195890204, | |
| "grad_norm": 0.224194236181041, | |
| "learning_rate": 2.0303845497663566e-07, | |
| "loss": 0.0746, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.8378581070946454, | |
| "grad_norm": 0.2392479281560687, | |
| "learning_rate": 1.9753555741581277e-07, | |
| "loss": 0.0764, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 1.8401079946002699, | |
| "grad_norm": 0.21912472032156466, | |
| "learning_rate": 1.921067568414403e-07, | |
| "loss": 0.079, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.8423578821058948, | |
| "grad_norm": 0.25670091205707113, | |
| "learning_rate": 1.8675213701421223e-07, | |
| "loss": 0.0835, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 1.8446077696115193, | |
| "grad_norm": 0.2594177639066407, | |
| "learning_rate": 1.814717805502958e-07, | |
| "loss": 0.0803, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.8468576571171442, | |
| "grad_norm": 0.21469179882031758, | |
| "learning_rate": 1.762657689200481e-07, | |
| "loss": 0.0764, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 1.8491075446227687, | |
| "grad_norm": 0.2209544552641529, | |
| "learning_rate": 1.7113418244676493e-07, | |
| "loss": 0.0785, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.8513574321283937, | |
| "grad_norm": 0.24145566509686753, | |
| "learning_rate": 1.6607710030544122e-07, | |
| "loss": 0.0719, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 1.8536073196340181, | |
| "grad_norm": 0.2093424795846333, | |
| "learning_rate": 1.6109460052154802e-07, | |
| "loss": 0.0764, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.855857207139643, | |
| "grad_norm": 0.24494002119656788, | |
| "learning_rate": 1.561867599698258e-07, | |
| "loss": 0.0798, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 1.8581070946452676, | |
| "grad_norm": 0.22975170782618237, | |
| "learning_rate": 1.5135365437310534e-07, | |
| "loss": 0.0837, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.8603569821508925, | |
| "grad_norm": 0.23509810998937047, | |
| "learning_rate": 1.4659535830113368e-07, | |
| "loss": 0.0784, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 1.8626068696565172, | |
| "grad_norm": 0.21476403073025796, | |
| "learning_rate": 1.419119451694262e-07, | |
| "loss": 0.0735, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.864856757162142, | |
| "grad_norm": 0.206525508501757, | |
| "learning_rate": 1.3730348723813181e-07, | |
| "loss": 0.0693, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 1.8671066446677667, | |
| "grad_norm": 0.23677883602034755, | |
| "learning_rate": 1.3277005561092016e-07, | |
| "loss": 0.0765, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.8693565321733914, | |
| "grad_norm": 0.22864240045396528, | |
| "learning_rate": 1.2831172023388349e-07, | |
| "loss": 0.0682, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 1.871606419679016, | |
| "grad_norm": 0.23988380079630575, | |
| "learning_rate": 1.2392854989445925e-07, | |
| "loss": 0.0792, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.8738563071846408, | |
| "grad_norm": 0.21420842660768485, | |
| "learning_rate": 1.196206122203647e-07, | |
| "loss": 0.0723, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 1.8761061946902655, | |
| "grad_norm": 0.23599002078153936, | |
| "learning_rate": 1.153879736785568e-07, | |
| "loss": 0.0745, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.8783560821958902, | |
| "grad_norm": 0.19939870757943454, | |
| "learning_rate": 1.112306995742074e-07, | |
| "loss": 0.0764, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 1.880605969701515, | |
| "grad_norm": 0.3002967324611831, | |
| "learning_rate": 1.0714885404969288e-07, | |
| "loss": 0.0745, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.8828558572071397, | |
| "grad_norm": 0.2534936155963215, | |
| "learning_rate": 1.031425000836056e-07, | |
| "loss": 0.0805, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 1.8851057447127644, | |
| "grad_norm": 0.2099770583680312, | |
| "learning_rate": 9.921169948978293e-08, | |
| "loss": 0.0696, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.887355632218389, | |
| "grad_norm": 0.21406906185927677, | |
| "learning_rate": 9.535651291635362e-08, | |
| "loss": 0.0695, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 1.8896055197240138, | |
| "grad_norm": 0.222365141419886, | |
| "learning_rate": 9.157699984480018e-08, | |
| "loss": 0.0747, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.8918554072296385, | |
| "grad_norm": 0.26953866043532604, | |
| "learning_rate": 8.787321858904241e-08, | |
| "loss": 0.0707, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 1.8941052947352632, | |
| "grad_norm": 0.2625647283056072, | |
| "learning_rate": 8.424522629453924e-08, | |
| "loss": 0.0743, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.896355182240888, | |
| "grad_norm": 0.23612000625927326, | |
| "learning_rate": 8.06930789374033e-08, | |
| "loss": 0.0763, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 1.8986050697465127, | |
| "grad_norm": 0.20902097825740532, | |
| "learning_rate": 7.721683132354163e-08, | |
| "loss": 0.0744, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.9008549572521374, | |
| "grad_norm": 0.24284466003504482, | |
| "learning_rate": 7.381653708780578e-08, | |
| "loss": 0.0741, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 1.903104844757762, | |
| "grad_norm": 0.208669560652797, | |
| "learning_rate": 7.049224869316807e-08, | |
| "loss": 0.0711, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.9053547322633868, | |
| "grad_norm": 0.2250655987088142, | |
| "learning_rate": 6.724401742990993e-08, | |
| "loss": 0.0689, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 1.9076046197690115, | |
| "grad_norm": 0.21694307565283746, | |
| "learning_rate": 6.407189341483044e-08, | |
| "loss": 0.0761, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.9098545072746362, | |
| "grad_norm": 0.19311480533839126, | |
| "learning_rate": 6.097592559047405e-08, | |
| "loss": 0.0743, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 1.912104394780261, | |
| "grad_norm": 0.18525264406718234, | |
| "learning_rate": 5.795616172437624e-08, | |
| "loss": 0.0725, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.9143542822858857, | |
| "grad_norm": 0.1651176792199085, | |
| "learning_rate": 5.501264840832299e-08, | |
| "loss": 0.0761, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 1.9166041697915104, | |
| "grad_norm": 0.2260442730154628, | |
| "learning_rate": 5.214543105763692e-08, | |
| "loss": 0.0889, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.918854057297135, | |
| "grad_norm": 0.18715741530378352, | |
| "learning_rate": 4.935455391047228e-08, | |
| "loss": 0.0663, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 1.9211039448027598, | |
| "grad_norm": 0.27005890292615836, | |
| "learning_rate": 4.664006002713495e-08, | |
| "loss": 0.0728, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.9233538323083845, | |
| "grad_norm": 0.2849847348297522, | |
| "learning_rate": 4.400199128941573e-08, | |
| "loss": 0.077, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 1.9256037198140095, | |
| "grad_norm": 0.21209255590676854, | |
| "learning_rate": 4.1440388399948686e-08, | |
| "loss": 0.0668, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.927853607319634, | |
| "grad_norm": 0.24917025560084188, | |
| "learning_rate": 3.8955290881576566e-08, | |
| "loss": 0.0731, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 1.930103494825259, | |
| "grad_norm": 0.18031075827239754, | |
| "learning_rate": 3.654673707674639e-08, | |
| "loss": 0.0755, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.9323533823308834, | |
| "grad_norm": 0.29159777908954887, | |
| "learning_rate": 3.4214764146915936e-08, | |
| "loss": 0.0771, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 1.9346032698365083, | |
| "grad_norm": 0.2515730214349518, | |
| "learning_rate": 3.195940807198039e-08, | |
| "loss": 0.0718, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.9368531573421328, | |
| "grad_norm": 0.2359716229508164, | |
| "learning_rate": 2.9780703649716637e-08, | |
| "loss": 0.075, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 1.9391030448477578, | |
| "grad_norm": 0.2279891015303591, | |
| "learning_rate": 2.767868449524813e-08, | |
| "loss": 0.071, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.9413529323533822, | |
| "grad_norm": 0.24505091349172803, | |
| "learning_rate": 2.5653383040524228e-08, | |
| "loss": 0.074, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 1.9436028198590072, | |
| "grad_norm": 0.2478550376131261, | |
| "learning_rate": 2.370483053382111e-08, | |
| "loss": 0.0771, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.9458527073646317, | |
| "grad_norm": 0.2193182868657674, | |
| "learning_rate": 2.183305703925831e-08, | |
| "loss": 0.0756, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 1.9481025948702566, | |
| "grad_norm": 0.2746040123293314, | |
| "learning_rate": 2.0038091436337392e-08, | |
| "loss": 0.0684, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.950352482375881, | |
| "grad_norm": 0.27001950154682536, | |
| "learning_rate": 1.8319961419493436e-08, | |
| "loss": 0.0776, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 1.952602369881506, | |
| "grad_norm": 0.17878344688895553, | |
| "learning_rate": 1.667869349766982e-08, | |
| "loss": 0.0684, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.9548522573871305, | |
| "grad_norm": 0.2242691348688971, | |
| "learning_rate": 1.5114312993908532e-08, | |
| "loss": 0.0775, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 1.9571021448927555, | |
| "grad_norm": 0.30659381638740507, | |
| "learning_rate": 1.3626844044957733e-08, | |
| "loss": 0.0742, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.95935203239838, | |
| "grad_norm": 0.22927922548010668, | |
| "learning_rate": 1.2216309600903142e-08, | |
| "loss": 0.0758, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 1.961601919904005, | |
| "grad_norm": 0.24862858256846118, | |
| "learning_rate": 1.088273142481111e-08, | |
| "loss": 0.0716, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.9638518074096294, | |
| "grad_norm": 0.18094046454860382, | |
| "learning_rate": 9.626130092393326e-09, | |
| "loss": 0.0708, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 1.9661016949152543, | |
| "grad_norm": 0.22402544663253343, | |
| "learning_rate": 8.446524991689298e-09, | |
| "loss": 0.0724, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.9683515824208788, | |
| "grad_norm": 0.1742440920374198, | |
| "learning_rate": 7.343934322767699e-09, | |
| "loss": 0.0776, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 1.9706014699265038, | |
| "grad_norm": 0.22337791056104178, | |
| "learning_rate": 6.318375097446039e-09, | |
| "loss": 0.0662, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.9728513574321282, | |
| "grad_norm": 0.20821619733680874, | |
| "learning_rate": 5.369863139026432e-09, | |
| "loss": 0.0823, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 1.9751012449377532, | |
| "grad_norm": 0.28459129908034536, | |
| "learning_rate": 4.498413082053566e-09, | |
| "loss": 0.0847, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.9773511324433777, | |
| "grad_norm": 0.2487528636409247, | |
| "learning_rate": 3.704038372085994e-09, | |
| "loss": 0.0812, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 1.9796010199490026, | |
| "grad_norm": 0.20877793863085078, | |
| "learning_rate": 2.986751265493526e-09, | |
| "loss": 0.0726, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.981850907454627, | |
| "grad_norm": 0.2843353007301253, | |
| "learning_rate": 2.3465628292623776e-09, | |
| "loss": 0.0696, | |
| "step": 4405 | |
| }, | |
| { | |
| "epoch": 1.984100794960252, | |
| "grad_norm": 0.20964930087522637, | |
| "learning_rate": 1.7834829408286402e-09, | |
| "loss": 0.0669, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.9863506824658768, | |
| "grad_norm": 0.18510001166373868, | |
| "learning_rate": 1.297520287923404e-09, | |
| "loss": 0.0732, | |
| "step": 4415 | |
| }, | |
| { | |
| "epoch": 1.9886005699715015, | |
| "grad_norm": 0.1927246478831311, | |
| "learning_rate": 8.886823684417512e-10, | |
| "loss": 0.0798, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.9908504574771262, | |
| "grad_norm": 0.1773427078092757, | |
| "learning_rate": 5.56975490322853e-10, | |
| "loss": 0.0759, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 1.993100344982751, | |
| "grad_norm": 0.2305382589496537, | |
| "learning_rate": 3.0240477145559997e-10, | |
| "loss": 0.0862, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.9953502324883756, | |
| "grad_norm": 0.28373308100373656, | |
| "learning_rate": 1.24974139599221e-10, | |
| "loss": 0.0769, | |
| "step": 4435 | |
| }, | |
| { | |
| "epoch": 1.9976001199940003, | |
| "grad_norm": 0.19382710057534525, | |
| "learning_rate": 2.4686332322221286e-11, | |
| "loss": 0.0755, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.9994000299985002, | |
| "eval_loss": 0.08069541305303574, | |
| "eval_runtime": 54.013, | |
| "eval_samples_per_second": 19.958, | |
| "eval_steps_per_second": 4.999, | |
| "step": 4444 | |
| }, | |
| { | |
| "epoch": 1.9994000299985002, | |
| "step": 4444, | |
| "total_flos": 1.2064109084748546e+19, | |
| "train_loss": 0.2652757017731559, | |
| "train_runtime": 35452.6785, | |
| "train_samples_per_second": 6.017, | |
| "train_steps_per_second": 0.125 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4444, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2064109084748546e+19, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |