| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 16.402405686167306, | |
| "global_step": 120000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 5e-05, | |
| "loss": 3.1294, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.999658259859204e-05, | |
| "loss": 3.1422, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.999316519718406e-05, | |
| "loss": 3.1479, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.998974779577609e-05, | |
| "loss": 3.1412, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.998633039436813e-05, | |
| "loss": 3.1387, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.9982912992960154e-05, | |
| "loss": 3.1344, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.997949559155218e-05, | |
| "loss": 3.1353, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.9976078190144217e-05, | |
| "loss": 3.1283, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.9972660788736244e-05, | |
| "loss": 3.1291, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.996924338732828e-05, | |
| "loss": 3.1226, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.9965825985920306e-05, | |
| "loss": 3.1173, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.996240858451234e-05, | |
| "loss": 3.1126, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.995899118310437e-05, | |
| "loss": 3.1117, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.99555737816964e-05, | |
| "loss": 3.1037, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.995215638028843e-05, | |
| "loss": 3.0981, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.994873897888046e-05, | |
| "loss": 3.0682, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.994532157747249e-05, | |
| "loss": 3.0697, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.994190417606453e-05, | |
| "loss": 3.0673, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.993848677465655e-05, | |
| "loss": 3.0703, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.993506937324858e-05, | |
| "loss": 3.0624, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.993165197184062e-05, | |
| "loss": 3.0618, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 4.9928234570432645e-05, | |
| "loss": 3.0612, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 4.992481716902467e-05, | |
| "loss": 3.0651, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 4.992139976761671e-05, | |
| "loss": 3.0555, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 4.9917982366208735e-05, | |
| "loss": 3.0609, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 4.991456496480077e-05, | |
| "loss": 3.0564, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 4.99111475633928e-05, | |
| "loss": 3.0581, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 4.990773016198483e-05, | |
| "loss": 3.054, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 4.990431276057686e-05, | |
| "loss": 3.0559, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 4.990089535916889e-05, | |
| "loss": 3.0273, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 4.989747795776092e-05, | |
| "loss": 3.0221, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 4.989406055635295e-05, | |
| "loss": 3.0218, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 4.989064315494498e-05, | |
| "loss": 3.0245, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 4.988722575353702e-05, | |
| "loss": 3.0237, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 4.988380835212904e-05, | |
| "loss": 3.0224, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 4.988039095072107e-05, | |
| "loss": 3.0183, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 4.987697354931311e-05, | |
| "loss": 3.0113, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 4.9873556147905135e-05, | |
| "loss": 3.0211, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 4.987013874649716e-05, | |
| "loss": 3.0253, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.98667213450892e-05, | |
| "loss": 3.0143, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 4.9863303943681225e-05, | |
| "loss": 3.0192, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 4.985988654227326e-05, | |
| "loss": 3.012, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 4.985646914086529e-05, | |
| "loss": 3.016, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 4.985305173945732e-05, | |
| "loss": 3.0087, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 4.984963433804935e-05, | |
| "loss": 2.9676, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 4.9846216936641384e-05, | |
| "loss": 2.9644, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 4.984279953523341e-05, | |
| "loss": 2.9714, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 4.983938213382544e-05, | |
| "loss": 2.9816, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 4.9835964732417474e-05, | |
| "loss": 2.9759, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 4.983254733100951e-05, | |
| "loss": 2.977, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 4.982912992960153e-05, | |
| "loss": 2.9841, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 4.982571252819356e-05, | |
| "loss": 2.9731, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 4.98222951267856e-05, | |
| "loss": 2.9716, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 4.9818877725377625e-05, | |
| "loss": 2.9762, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 4.981546032396965e-05, | |
| "loss": 2.9774, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 4.981204292256169e-05, | |
| "loss": 2.9743, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 4.9808625521153715e-05, | |
| "loss": 2.9737, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 4.980520811974575e-05, | |
| "loss": 2.9701, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 4.980179071833778e-05, | |
| "loss": 2.9589, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 4.9798373316929805e-05, | |
| "loss": 2.926, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 4.979495591552184e-05, | |
| "loss": 2.9327, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 4.9791538514113874e-05, | |
| "loss": 2.9401, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 4.97881211127059e-05, | |
| "loss": 2.9345, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 4.978470371129793e-05, | |
| "loss": 2.9348, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 4.9781286309889964e-05, | |
| "loss": 2.9386, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 4.977786890848199e-05, | |
| "loss": 2.9312, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 4.977445150707402e-05, | |
| "loss": 2.9361, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 4.9771034105666054e-05, | |
| "loss": 2.9316, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 4.976761670425809e-05, | |
| "loss": 2.9397, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 4.9764199302850116e-05, | |
| "loss": 2.9377, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 4.9760781901442144e-05, | |
| "loss": 2.9378, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 4.975736450003418e-05, | |
| "loss": 2.9306, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 4.9753947098626206e-05, | |
| "loss": 2.9339, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 4.975052969721824e-05, | |
| "loss": 2.8964, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 4.974711229581027e-05, | |
| "loss": 2.8929, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 4.9743694894402296e-05, | |
| "loss": 2.901, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 4.974027749299433e-05, | |
| "loss": 2.8934, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 4.9736860091586364e-05, | |
| "loss": 2.9019, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 4.973344269017839e-05, | |
| "loss": 2.9037, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 4.973002528877042e-05, | |
| "loss": 2.8915, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 4.9726607887362454e-05, | |
| "loss": 2.899, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 4.972319048595448e-05, | |
| "loss": 2.9026, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 4.971977308454651e-05, | |
| "loss": 2.9062, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 4.9716355683138544e-05, | |
| "loss": 2.8974, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 4.971293828173058e-05, | |
| "loss": 2.8969, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 4.9709520880322606e-05, | |
| "loss": 2.8972, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 4.9706103478914634e-05, | |
| "loss": 2.8994, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 4.970268607750667e-05, | |
| "loss": 2.8925, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 4.9699268676098696e-05, | |
| "loss": 2.8637, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 4.969585127469073e-05, | |
| "loss": 2.859, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 4.969243387328276e-05, | |
| "loss": 2.8624, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 4.9689016471874786e-05, | |
| "loss": 2.8595, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 4.968559907046682e-05, | |
| "loss": 2.8632, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 4.9682181669058855e-05, | |
| "loss": 2.8609, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 4.9678764267650876e-05, | |
| "loss": 2.8702, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 4.967534686624291e-05, | |
| "loss": 2.8714, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 4.9671929464834945e-05, | |
| "loss": 2.8636, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 4.966851206342697e-05, | |
| "loss": 2.8666, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 4.9665094662019e-05, | |
| "loss": 2.8701, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 4.9661677260611034e-05, | |
| "loss": 2.87, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 4.965825985920307e-05, | |
| "loss": 2.8608, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 4.96548424577951e-05, | |
| "loss": 2.867, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 4.9651425056387124e-05, | |
| "loss": 2.8444, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 4.964800765497916e-05, | |
| "loss": 2.8337, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 4.9644590253571186e-05, | |
| "loss": 2.8337, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 4.964117285216322e-05, | |
| "loss": 2.8319, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 4.963775545075525e-05, | |
| "loss": 2.8317, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 4.9634338049347276e-05, | |
| "loss": 2.8342, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 4.963092064793931e-05, | |
| "loss": 2.8273, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 4.962750324653134e-05, | |
| "loss": 2.8347, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 4.9624085845123366e-05, | |
| "loss": 2.8383, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 4.96206684437154e-05, | |
| "loss": 2.8313, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 4.9617251042307435e-05, | |
| "loss": 2.8387, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 4.961383364089946e-05, | |
| "loss": 2.834, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 4.961041623949149e-05, | |
| "loss": 2.8377, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 4.9606998838083525e-05, | |
| "loss": 2.8365, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 4.960358143667555e-05, | |
| "loss": 2.8338, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 4.960016403526758e-05, | |
| "loss": 2.7898, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 4.9596746633859615e-05, | |
| "loss": 2.7955, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 4.959332923245165e-05, | |
| "loss": 2.7901, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 4.958991183104368e-05, | |
| "loss": 2.7974, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 4.9586494429635705e-05, | |
| "loss": 2.8055, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 4.958307702822774e-05, | |
| "loss": 2.8088, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 4.957965962681977e-05, | |
| "loss": 2.8061, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 4.95762422254118e-05, | |
| "loss": 2.8005, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 4.957282482400383e-05, | |
| "loss": 2.8056, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 4.9569407422595856e-05, | |
| "loss": 2.8096, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 4.956599002118789e-05, | |
| "loss": 2.8077, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "learning_rate": 4.9562572619779925e-05, | |
| "loss": 2.8107, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 4.955915521837195e-05, | |
| "loss": 2.8123, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 4.955573781696398e-05, | |
| "loss": 2.8121, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 4.9552320415556015e-05, | |
| "loss": 2.7953, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 4.954890301414804e-05, | |
| "loss": 2.7705, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 4.954548561274007e-05, | |
| "loss": 2.7682, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 4.9542068211332105e-05, | |
| "loss": 2.7656, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 4.953865080992414e-05, | |
| "loss": 2.7742, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 4.953523340851617e-05, | |
| "loss": 2.7677, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 4.9531816007108195e-05, | |
| "loss": 2.7772, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 4.952839860570023e-05, | |
| "loss": 2.7756, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 4.952498120429226e-05, | |
| "loss": 2.7735, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 4.952156380288429e-05, | |
| "loss": 2.7794, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 9.7, | |
| "learning_rate": 4.951814640147632e-05, | |
| "loss": 2.7803, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 4.951472900006835e-05, | |
| "loss": 2.7819, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "learning_rate": 4.951131159866038e-05, | |
| "loss": 2.7798, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 4.9507894197252416e-05, | |
| "loss": 2.7858, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "learning_rate": 4.950447679584444e-05, | |
| "loss": 2.7831, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "learning_rate": 4.950105939443647e-05, | |
| "loss": 2.7485, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "learning_rate": 4.9497641993028506e-05, | |
| "loss": 2.7361, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 10.18, | |
| "learning_rate": 4.949422459162053e-05, | |
| "loss": 2.739, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 10.25, | |
| "learning_rate": 4.949080719021256e-05, | |
| "loss": 2.7434, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "learning_rate": 4.9487389788804595e-05, | |
| "loss": 2.7427, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 10.39, | |
| "learning_rate": 4.948397238739663e-05, | |
| "loss": 2.7424, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 10.46, | |
| "learning_rate": 4.948055498598866e-05, | |
| "loss": 2.756, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 10.52, | |
| "learning_rate": 4.9477137584580685e-05, | |
| "loss": 2.7434, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "learning_rate": 4.947372018317272e-05, | |
| "loss": 2.7512, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 10.66, | |
| "learning_rate": 4.947030278176475e-05, | |
| "loss": 2.7527, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 10.73, | |
| "learning_rate": 4.946688538035678e-05, | |
| "loss": 2.7605, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 4.946346797894881e-05, | |
| "loss": 2.7624, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "learning_rate": 4.946005057754084e-05, | |
| "loss": 2.7563, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 10.93, | |
| "learning_rate": 4.945663317613287e-05, | |
| "loss": 2.7531, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 4.9453215774724906e-05, | |
| "loss": 2.7539, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 11.07, | |
| "learning_rate": 4.944979837331693e-05, | |
| "loss": 2.7114, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 11.14, | |
| "learning_rate": 4.944638097190896e-05, | |
| "loss": 2.7128, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "learning_rate": 4.9442963570500996e-05, | |
| "loss": 2.7177, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 11.28, | |
| "learning_rate": 4.9439546169093024e-05, | |
| "loss": 2.7212, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "learning_rate": 4.943612876768505e-05, | |
| "loss": 2.7224, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 11.41, | |
| "learning_rate": 4.9432711366277086e-05, | |
| "loss": 2.7234, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 11.48, | |
| "learning_rate": 4.9429293964869113e-05, | |
| "loss": 2.7233, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "learning_rate": 4.942587656346115e-05, | |
| "loss": 2.7297, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 11.62, | |
| "learning_rate": 4.9422459162053176e-05, | |
| "loss": 2.7211, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 11.69, | |
| "learning_rate": 4.941904176064521e-05, | |
| "loss": 2.7282, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 11.76, | |
| "learning_rate": 4.941562435923724e-05, | |
| "loss": 2.7272, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 11.82, | |
| "learning_rate": 4.941220695782927e-05, | |
| "loss": 2.7325, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 11.89, | |
| "learning_rate": 4.94087895564213e-05, | |
| "loss": 2.7275, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "learning_rate": 4.940537215501333e-05, | |
| "loss": 2.7303, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 12.03, | |
| "learning_rate": 4.940195475360536e-05, | |
| "loss": 2.7091, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 12.1, | |
| "learning_rate": 4.9398537352197397e-05, | |
| "loss": 2.6863, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 4.939511995078942e-05, | |
| "loss": 2.6867, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "learning_rate": 4.939170254938145e-05, | |
| "loss": 2.6934, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 12.3, | |
| "learning_rate": 4.9388285147973486e-05, | |
| "loss": 2.6936, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 12.37, | |
| "learning_rate": 4.9384867746565514e-05, | |
| "loss": 2.6953, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "learning_rate": 4.938145034515754e-05, | |
| "loss": 2.6973, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 12.51, | |
| "learning_rate": 4.9378032943749576e-05, | |
| "loss": 2.7022, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 12.58, | |
| "learning_rate": 4.9374615542341604e-05, | |
| "loss": 2.6977, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 12.64, | |
| "learning_rate": 4.937119814093364e-05, | |
| "loss": 2.6996, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 12.71, | |
| "learning_rate": 4.9367780739525666e-05, | |
| "loss": 2.7008, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 12.78, | |
| "learning_rate": 4.93643633381177e-05, | |
| "loss": 2.7091, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 12.85, | |
| "learning_rate": 4.936094593670973e-05, | |
| "loss": 2.7057, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "learning_rate": 4.935752853530176e-05, | |
| "loss": 2.7084, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "learning_rate": 4.935411113389379e-05, | |
| "loss": 2.7114, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 13.05, | |
| "learning_rate": 4.935069373248582e-05, | |
| "loss": 2.6727, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 13.12, | |
| "learning_rate": 4.934727633107785e-05, | |
| "loss": 2.6586, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 13.19, | |
| "learning_rate": 4.934385892966989e-05, | |
| "loss": 2.6631, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "learning_rate": 4.934044152826191e-05, | |
| "loss": 2.6632, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 4.933702412685394e-05, | |
| "loss": 2.6727, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 13.4, | |
| "learning_rate": 4.933360672544598e-05, | |
| "loss": 2.6671, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 13.46, | |
| "learning_rate": 4.9330189324038004e-05, | |
| "loss": 2.686, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 13.53, | |
| "learning_rate": 4.932677192263003e-05, | |
| "loss": 2.6796, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "learning_rate": 4.9323354521222067e-05, | |
| "loss": 2.6745, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 13.67, | |
| "learning_rate": 4.9319937119814094e-05, | |
| "loss": 2.6825, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 13.74, | |
| "learning_rate": 4.931651971840613e-05, | |
| "loss": 2.6857, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 13.81, | |
| "learning_rate": 4.9313102316998156e-05, | |
| "loss": 2.6812, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 13.87, | |
| "learning_rate": 4.9309684915590184e-05, | |
| "loss": 2.6805, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 13.94, | |
| "learning_rate": 4.930626751418222e-05, | |
| "loss": 2.6857, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "learning_rate": 4.930285011277425e-05, | |
| "loss": 2.6768, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "learning_rate": 4.929943271136628e-05, | |
| "loss": 2.6435, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 14.15, | |
| "learning_rate": 4.929601530995831e-05, | |
| "loss": 2.6508, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "learning_rate": 4.929259790855034e-05, | |
| "loss": 2.6444, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 14.28, | |
| "learning_rate": 4.928918050714238e-05, | |
| "loss": 2.6396, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "learning_rate": 4.92857631057344e-05, | |
| "loss": 2.6452, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 14.42, | |
| "learning_rate": 4.928234570432643e-05, | |
| "loss": 2.6535, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 14.49, | |
| "learning_rate": 4.927892830291847e-05, | |
| "loss": 2.6482, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 14.56, | |
| "learning_rate": 4.9275510901510495e-05, | |
| "loss": 2.6567, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 14.63, | |
| "learning_rate": 4.927209350010252e-05, | |
| "loss": 2.6589, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 14.69, | |
| "learning_rate": 4.926867609869456e-05, | |
| "loss": 2.6564, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 14.76, | |
| "learning_rate": 4.9265258697286585e-05, | |
| "loss": 2.6579, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 14.83, | |
| "learning_rate": 4.926184129587861e-05, | |
| "loss": 2.658, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 14.9, | |
| "learning_rate": 4.925842389447065e-05, | |
| "loss": 2.6658, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 14.97, | |
| "learning_rate": 4.9255006493062674e-05, | |
| "loss": 2.6535, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 15.04, | |
| "learning_rate": 4.925158909165471e-05, | |
| "loss": 2.6404, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 15.1, | |
| "learning_rate": 4.9248171690246737e-05, | |
| "loss": 2.6185, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 15.17, | |
| "learning_rate": 4.924475428883877e-05, | |
| "loss": 2.6158, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 15.24, | |
| "learning_rate": 4.92413368874308e-05, | |
| "loss": 2.621, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 15.31, | |
| "learning_rate": 4.923791948602283e-05, | |
| "loss": 2.6296, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 15.38, | |
| "learning_rate": 4.923450208461486e-05, | |
| "loss": 2.6284, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 15.45, | |
| "learning_rate": 4.923108468320689e-05, | |
| "loss": 2.6252, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 15.51, | |
| "learning_rate": 4.922766728179892e-05, | |
| "loss": 2.6355, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 15.58, | |
| "learning_rate": 4.922424988039096e-05, | |
| "loss": 2.6337, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "learning_rate": 4.922083247898298e-05, | |
| "loss": 2.6337, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 15.72, | |
| "learning_rate": 4.921741507757501e-05, | |
| "loss": 2.6354, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 15.79, | |
| "learning_rate": 4.921399767616705e-05, | |
| "loss": 2.6371, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 15.86, | |
| "learning_rate": 4.9210580274759075e-05, | |
| "loss": 2.6427, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 15.92, | |
| "learning_rate": 4.92071628733511e-05, | |
| "loss": 2.639, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "learning_rate": 4.920374547194314e-05, | |
| "loss": 2.6426, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 16.06, | |
| "learning_rate": 4.9200328070535165e-05, | |
| "loss": 2.5977, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 16.13, | |
| "learning_rate": 4.91969106691272e-05, | |
| "loss": 2.5961, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 16.2, | |
| "learning_rate": 4.919349326771923e-05, | |
| "loss": 2.6007, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "learning_rate": 4.919007586631126e-05, | |
| "loss": 2.6015, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 16.33, | |
| "learning_rate": 4.918665846490329e-05, | |
| "loss": 2.6064, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 16.4, | |
| "learning_rate": 4.9183241063495324e-05, | |
| "loss": 2.6125, | |
| "step": 120000 | |
| } | |
| ], | |
| "max_steps": 7316000, | |
| "num_train_epochs": 1000, | |
| "total_flos": 241553911465377792, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |