| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9989708404802744, | |
| "eval_steps": 500, | |
| "global_step": 728, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 1.4022, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.8181818181818183e-06, | |
| "loss": 1.4239, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 1.3843, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.6363636363636366e-06, | |
| "loss": 1.3722, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 1.3411, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 1.3187, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.363636363636364e-06, | |
| "loss": 1.284, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 7.272727272727273e-06, | |
| "loss": 1.2492, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.181818181818183e-06, | |
| "loss": 1.2658, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 1.2173, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2302, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.0909090909090909e-05, | |
| "loss": 1.2301, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.181818181818182e-05, | |
| "loss": 1.1855, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.2727272727272728e-05, | |
| "loss": 1.2094, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 1.1788, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.4545454545454546e-05, | |
| "loss": 1.1804, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.5454545454545454e-05, | |
| "loss": 1.166, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.6363636363636366e-05, | |
| "loss": 1.1256, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.7272727272727274e-05, | |
| "loss": 1.1289, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 1.1392, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9090909090909094e-05, | |
| "loss": 1.131, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1288, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9999900994429424e-05, | |
| "loss": 1.1198, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.999960397967811e-05, | |
| "loss": 1.1281, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9999108961627284e-05, | |
| "loss": 1.134, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9998415950078858e-05, | |
| "loss": 1.1148, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9997524958755226e-05, | |
| "loss": 1.1162, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9996436005299013e-05, | |
| "loss": 1.12, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.999514911127271e-05, | |
| "loss": 1.12, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9993664302158255e-05, | |
| "loss": 1.0938, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9991981607356517e-05, | |
| "loss": 1.0838, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9990101060186732e-05, | |
| "loss": 1.1078, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.998802269788583e-05, | |
| "loss": 1.1037, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9985746561607696e-05, | |
| "loss": 1.0804, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.998327269642237e-05, | |
| "loss": 1.0977, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.998060115131513e-05, | |
| "loss": 1.1036, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9977731979185556e-05, | |
| "loss": 1.1109, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9974665236846443e-05, | |
| "loss": 1.0937, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9971400985022712e-05, | |
| "loss": 1.0834, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9967939288350184e-05, | |
| "loss": 1.1002, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9964280215374312e-05, | |
| "loss": 1.0847, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9960423838548814e-05, | |
| "loss": 1.0845, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.995637023423425e-05, | |
| "loss": 1.0984, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.9952119482696504e-05, | |
| "loss": 1.0836, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.9947671668105185e-05, | |
| "loss": 1.082, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.9943026878531985e-05, | |
| "loss": 1.0707, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.9938185205948906e-05, | |
| "loss": 1.0545, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.993314674622646e-05, | |
| "loss": 1.0618, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.992791159913177e-05, | |
| "loss": 1.0514, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.992247986832658e-05, | |
| "loss": 1.0733, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.99168516613652e-05, | |
| "loss": 1.0712, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.991102708969241e-05, | |
| "loss": 1.0788, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.9905006268641212e-05, | |
| "loss": 1.0744, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.9898789317430577e-05, | |
| "loss": 1.0621, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.9892376359163058e-05, | |
| "loss": 1.0598, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.9885767520822377e-05, | |
| "loss": 1.095, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.9878962933270896e-05, | |
| "loss": 1.0666, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.987196273124703e-05, | |
| "loss": 1.0657, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.986476705336258e-05, | |
| "loss": 1.0691, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.9857376042099982e-05, | |
| "loss": 1.0663, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.9849789843809496e-05, | |
| "loss": 1.0476, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.9842008608706295e-05, | |
| "loss": 1.0509, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.983403249086751e-05, | |
| "loss": 1.0622, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.9825861648229154e-05, | |
| "loss": 1.0708, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.981749624258302e-05, | |
| "loss": 1.0672, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.9808936439573455e-05, | |
| "loss": 1.0627, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9800182408694096e-05, | |
| "loss": 1.0726, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9791234323284515e-05, | |
| "loss": 1.0558, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9782092360526763e-05, | |
| "loss": 1.0677, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.977275670144189e-05, | |
| "loss": 1.0422, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.9763227530886348e-05, | |
| "loss": 1.0364, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.9753505037548334e-05, | |
| "loss": 1.0475, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.974358941394404e-05, | |
| "loss": 1.0508, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.973348085641387e-05, | |
| "loss": 1.0595, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.972317956511852e-05, | |
| "loss": 1.0528, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.971268574403503e-05, | |
| "loss": 1.0562, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.970199960095276e-05, | |
| "loss": 1.0329, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.9691121347469235e-05, | |
| "loss": 1.045, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.9680051198986004e-05, | |
| "loss": 1.0561, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.9668789374704337e-05, | |
| "loss": 1.0449, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.9657336097620904e-05, | |
| "loss": 1.0359, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.964569159452335e-05, | |
| "loss": 1.0359, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.963385609598581e-05, | |
| "loss": 1.0271, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.9621829836364335e-05, | |
| "loss": 1.0563, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.9609613053792276e-05, | |
| "loss": 1.0416, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.9597205990175528e-05, | |
| "loss": 1.0578, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.958460889118778e-05, | |
| "loss": 1.0461, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.9571822006265623e-05, | |
| "loss": 1.0262, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.9558845588603625e-05, | |
| "loss": 1.0254, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.9545679895149315e-05, | |
| "loss": 1.0642, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.9532325186598093e-05, | |
| "loss": 1.0456, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.951878172738806e-05, | |
| "loss": 1.0358, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.9505049785694803e-05, | |
| "loss": 1.0409, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.9491129633426068e-05, | |
| "loss": 1.0382, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.9477021546216376e-05, | |
| "loss": 1.0415, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.9462725803421566e-05, | |
| "loss": 1.0308, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.9448242688113286e-05, | |
| "loss": 1.0376, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.9433572487073343e-05, | |
| "loss": 1.0259, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.9418715490788066e-05, | |
| "loss": 1.0496, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.9403671993442534e-05, | |
| "loss": 1.0519, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.9388442292914754e-05, | |
| "loss": 1.0418, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.937302669076976e-05, | |
| "loss": 1.0372, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.9357425492253662e-05, | |
| "loss": 1.0347, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.934163900628756e-05, | |
| "loss": 1.0253, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.9325667545461466e-05, | |
| "loss": 1.0401, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.9309511426028105e-05, | |
| "loss": 1.0282, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.9293170967896632e-05, | |
| "loss": 1.0306, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.9276646494626333e-05, | |
| "loss": 1.0313, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.9259938333420183e-05, | |
| "loss": 1.0433, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.9243046815118387e-05, | |
| "loss": 1.0232, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.922597227419183e-05, | |
| "loss": 1.0222, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.9208715048735446e-05, | |
| "loss": 1.0186, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.9191275480461525e-05, | |
| "loss": 1.033, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.9173653914692947e-05, | |
| "loss": 1.0342, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.9155850700356345e-05, | |
| "loss": 1.035, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.91378661899752e-05, | |
| "loss": 1.0206, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.9119700739662857e-05, | |
| "loss": 1.0435, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.910135470911547e-05, | |
| "loss": 1.0181, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.908282846160488e-05, | |
| "loss": 1.0267, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.9064122363971426e-05, | |
| "loss": 1.0365, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.904523678661669e-05, | |
| "loss": 1.0381, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.9026172103496138e-05, | |
| "loss": 1.0048, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.900692869211174e-05, | |
| "loss": 1.0392, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.898750693350447e-05, | |
| "loss": 1.0278, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.8967907212246803e-05, | |
| "loss": 1.013, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.8948129916435048e-05, | |
| "loss": 1.0385, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.8928175437681698e-05, | |
| "loss": 1.0168, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.8908044171107658e-05, | |
| "loss": 1.0123, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.8887736515334443e-05, | |
| "loss": 1.015, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.8867252872476255e-05, | |
| "loss": 1.0265, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.884659364813205e-05, | |
| "loss": 0.9997, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.8825759251377484e-05, | |
| "loss": 1.0109, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.8804750094756827e-05, | |
| "loss": 1.0199, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.8783566594274783e-05, | |
| "loss": 0.9998, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.8762209169388262e-05, | |
| "loss": 1.0088, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.8740678242998077e-05, | |
| "loss": 1.0022, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.8718974241440552e-05, | |
| "loss": 1.0216, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.8697097594479103e-05, | |
| "loss": 1.0248, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.867504873529571e-05, | |
| "loss": 0.9974, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.865282810048235e-05, | |
| "loss": 1.0138, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.8630436130032353e-05, | |
| "loss": 1.0004, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.860787326733168e-05, | |
| "loss": 1.0081, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.8585139959150144e-05, | |
| "loss": 1.0238, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.856223665563258e-05, | |
| "loss": 1.0328, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.8539163810289914e-05, | |
| "loss": 1.0071, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.8515921879990187e-05, | |
| "loss": 1.0134, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.8492511324949516e-05, | |
| "loss": 1.0181, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.8468932608722975e-05, | |
| "loss": 1.0363, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.8445186198195406e-05, | |
| "loss": 1.0011, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.8421272563572202e-05, | |
| "loss": 0.9993, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.8397192178369965e-05, | |
| "loss": 1.0201, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.837294551940716e-05, | |
| "loss": 0.987, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.834853306679464e-05, | |
| "loss": 1.0106, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8323955303926165e-05, | |
| "loss": 1.0034, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8299212717468825e-05, | |
| "loss": 1.0095, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8274305797353397e-05, | |
| "loss": 0.9921, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.824923503676465e-05, | |
| "loss": 0.9859, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.822400093213157e-05, | |
| "loss": 1.017, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.8198603983117546e-05, | |
| "loss": 1.0118, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.8173044692610466e-05, | |
| "loss": 0.9912, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.8147323566712755e-05, | |
| "loss": 1.0162, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.8121441114731366e-05, | |
| "loss": 1.0089, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.809539784916768e-05, | |
| "loss": 0.9752, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.806919428570737e-05, | |
| "loss": 1.007, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.804283094321019e-05, | |
| "loss": 1.0145, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.8016308343699686e-05, | |
| "loss": 1.0008, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.798962701235289e-05, | |
| "loss": 1.0067, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.796278747748988e-05, | |
| "loss": 1.0017, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.7935790270563345e-05, | |
| "loss": 1.0086, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.790863592614807e-05, | |
| "loss": 0.9884, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.788132498193032e-05, | |
| "loss": 1.0028, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.7853857978697223e-05, | |
| "loss": 1.0055, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.7826235460326043e-05, | |
| "loss": 1.005, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.7798457973773418e-05, | |
| "loss": 1.002, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.7770526069064525e-05, | |
| "loss": 0.9838, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.7742440299282203e-05, | |
| "loss": 1.001, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.7714201220555982e-05, | |
| "loss": 0.9984, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.7685809392051084e-05, | |
| "loss": 1.0035, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.765726537595734e-05, | |
| "loss": 1.0076, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.7628569737478076e-05, | |
| "loss": 0.9936, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.7599723044818898e-05, | |
| "loss": 1.0053, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.7570725869176468e-05, | |
| "loss": 0.9968, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.7541578784727163e-05, | |
| "loss": 1.0059, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.751228236861573e-05, | |
| "loss": 1.0059, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.7482837200943845e-05, | |
| "loss": 1.0081, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.7453243864758638e-05, | |
| "loss": 1.0215, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.7423502946041133e-05, | |
| "loss": 0.9935, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.739361503369466e-05, | |
| "loss": 0.9945, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.7363580719533173e-05, | |
| "loss": 0.9926, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.733340059826956e-05, | |
| "loss": 0.9946, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.7303075267503845e-05, | |
| "loss": 1.0079, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.7272605327711364e-05, | |
| "loss": 1.0212, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.7241991382230872e-05, | |
| "loss": 0.993, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.72112340372526e-05, | |
| "loss": 0.9843, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.718033390180624e-05, | |
| "loss": 0.9837, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.71492915877489e-05, | |
| "loss": 0.959, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.7118107709752986e-05, | |
| "loss": 0.9895, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.7086782885294026e-05, | |
| "loss": 0.99, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.7055317734638444e-05, | |
| "loss": 1.006, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.702371288083127e-05, | |
| "loss": 1.0009, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6991968949683835e-05, | |
| "loss": 0.9758, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6960086569761332e-05, | |
| "loss": 0.9801, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.6928066372370407e-05, | |
| "loss": 0.9833, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.689590899154664e-05, | |
| "loss": 0.9846, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.6863615064042003e-05, | |
| "loss": 0.9752, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.6831185229312237e-05, | |
| "loss": 0.9869, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.67986201295042e-05, | |
| "loss": 0.9869, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.676592040944315e-05, | |
| "loss": 0.9878, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.6733086716619976e-05, | |
| "loss": 0.9938, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.6700119701178378e-05, | |
| "loss": 1.0045, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.666702001590199e-05, | |
| "loss": 1.0088, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.6633788316201455e-05, | |
| "loss": 0.998, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.6600425260101453e-05, | |
| "loss": 1.0017, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.6566931508227663e-05, | |
| "loss": 0.9995, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.6533307723793688e-05, | |
| "loss": 1.0012, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.649955457258792e-05, | |
| "loss": 0.9807, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.6465672722960365e-05, | |
| "loss": 0.9664, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.6431662845809388e-05, | |
| "loss": 0.9707, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.6397525614568446e-05, | |
| "loss": 0.983, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.6363261705192757e-05, | |
| "loss": 1.0061, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.6328871796145894e-05, | |
| "loss": 0.9899, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.629435656838637e-05, | |
| "loss": 0.9795, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.6259716705354154e-05, | |
| "loss": 1.0002, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.6224952892957122e-05, | |
| "loss": 0.9837, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.6190065819557496e-05, | |
| "loss": 0.9872, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.615505617595819e-05, | |
| "loss": 0.9797, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.6119924655389158e-05, | |
| "loss": 0.9926, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.6084671953493645e-05, | |
| "loss": 0.9884, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.6049298768314425e-05, | |
| "loss": 0.9918, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.6013805800279977e-05, | |
| "loss": 0.9829, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5978193752190607e-05, | |
| "loss": 0.9854, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.5942463329204546e-05, | |
| "loss": 0.9751, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.5906615238823974e-05, | |
| "loss": 0.9945, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.5870650190881023e-05, | |
| "loss": 0.9957, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.583456889752371e-05, | |
| "loss": 1.0047, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.579837207320184e-05, | |
| "loss": 0.9921, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.5762060434652863e-05, | |
| "loss": 0.9839, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.572563470088768e-05, | |
| "loss": 0.9922, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.56890955931764e-05, | |
| "loss": 0.9752, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.565244383503407e-05, | |
| "loss": 0.9778, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.5615680152206324e-05, | |
| "loss": 0.9795, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.557880527265505e-05, | |
| "loss": 0.9774, | |
| "step": 726 | |
| } | |
| ], | |
| "logging_steps": 3, | |
| "max_steps": 2184, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500.0, | |
| "total_flos": 4.694048596218085e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |