{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9875000000000006e-05, "loss": 6.535, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.975e-05, "loss": 5.1906, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.962500000000001e-05, "loss": 4.6323, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.9500000000000004e-05, "loss": 3.96, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.937500000000001e-05, "loss": 3.6713, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.9250000000000004e-05, "loss": 3.4882, "step": 3000 }, { "epoch": 0.02, "learning_rate": 4.9125e-05, "loss": 3.3843, "step": 3500 }, { "epoch": 0.02, "learning_rate": 4.9e-05, "loss": 3.3546, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.8875e-05, "loss": 3.223, "step": 4500 }, { "epoch": 0.03, "learning_rate": 4.875e-05, "loss": 3.1837, "step": 5000 }, { "epoch": 0.03, "learning_rate": 4.8625e-05, "loss": 3.063, "step": 5500 }, { "epoch": 0.03, "learning_rate": 4.85e-05, "loss": 3.092, "step": 6000 }, { "epoch": 0.03, "learning_rate": 4.8375000000000004e-05, "loss": 3.0056, "step": 6500 }, { "epoch": 0.04, "learning_rate": 4.825e-05, "loss": 2.9768, "step": 7000 }, { "epoch": 0.04, "learning_rate": 4.8125000000000004e-05, "loss": 2.9547, "step": 7500 }, { "epoch": 0.04, "learning_rate": 4.8e-05, "loss": 2.8948, "step": 8000 }, { "epoch": 0.04, "learning_rate": 4.7875000000000005e-05, "loss": 2.9324, "step": 8500 }, { "epoch": 0.04, "learning_rate": 4.775e-05, "loss": 2.8575, "step": 9000 }, { "epoch": 0.05, "learning_rate": 4.7625000000000006e-05, "loss": 2.8774, "step": 9500 }, { "epoch": 0.05, "learning_rate": 4.75e-05, "loss": 2.8511, "step": 10000 }, { "epoch": 0.05, "learning_rate": 4.7375e-05, "loss": 2.8635, "step": 10500 }, { "epoch": 0.06, "learning_rate": 4.7249999999999997e-05, "loss": 2.7233, "step": 11000 }, { "epoch": 0.06, "learning_rate": 4.7125e-05, "loss": 2.7953, "step": 11500 }, { "epoch": 0.06, "learning_rate": 4.7e-05, "loss": 2.7092, "step": 12000 }, { "epoch": 0.06, "learning_rate": 4.6875e-05, "loss": 2.7372, "step": 12500 }, { "epoch": 0.07, "learning_rate": 4.6750000000000005e-05, "loss": 2.7834, "step": 13000 }, { "epoch": 0.07, "learning_rate": 4.6625e-05, "loss": 2.7742, "step": 13500 }, { "epoch": 0.07, "learning_rate": 4.6500000000000005e-05, "loss": 2.7567, "step": 14000 }, { "epoch": 0.07, "learning_rate": 4.6375e-05, "loss": 2.7159, "step": 14500 }, { "epoch": 0.07, "learning_rate": 4.6250000000000006e-05, "loss": 2.6708, "step": 15000 }, { "epoch": 0.08, "learning_rate": 4.6125e-05, "loss": 2.7847, "step": 15500 }, { "epoch": 0.08, "learning_rate": 4.600000000000001e-05, "loss": 2.7013, "step": 16000 }, { "epoch": 0.08, "learning_rate": 4.5875000000000004e-05, "loss": 2.6901, "step": 16500 }, { "epoch": 0.09, "learning_rate": 4.575e-05, "loss": 2.6959, "step": 17000 }, { "epoch": 0.09, "learning_rate": 4.5625e-05, "loss": 2.6235, "step": 17500 }, { "epoch": 0.09, "learning_rate": 4.55e-05, "loss": 2.698, "step": 18000 }, { "epoch": 0.09, "learning_rate": 4.5375e-05, "loss": 2.635, "step": 18500 }, { "epoch": 0.1, "learning_rate": 4.525e-05, "loss": 2.5864, "step": 19000 }, { "epoch": 0.1, "learning_rate": 4.5125e-05, "loss": 2.7273, "step": 19500 }, { "epoch": 0.1, "learning_rate": 4.5e-05, "loss": 2.628, "step": 20000 }, { "epoch": 0.1, "learning_rate": 4.4875e-05, "loss": 2.6253, "step": 20500 }, { "epoch": 0.1, "learning_rate": 4.4750000000000004e-05, "loss": 2.5907, "step": 21000 }, { "epoch": 0.11, "learning_rate": 4.4625e-05, "loss": 2.6367, "step": 21500 }, { "epoch": 0.11, "learning_rate": 4.4500000000000004e-05, "loss": 2.6509, "step": 22000 }, { "epoch": 0.11, "learning_rate": 4.4375e-05, "loss": 2.5594, "step": 22500 }, { "epoch": 0.12, "learning_rate": 4.4250000000000005e-05, "loss": 2.6131, "step": 23000 }, { "epoch": 0.12, "learning_rate": 4.4125e-05, "loss": 2.5891, "step": 23500 }, { "epoch": 0.12, "learning_rate": 4.4000000000000006e-05, "loss": 2.5567, "step": 24000 }, { "epoch": 0.12, "learning_rate": 4.3875e-05, "loss": 2.5828, "step": 24500 }, { "epoch": 0.12, "learning_rate": 4.375e-05, "loss": 2.5276, "step": 25000 }, { "epoch": 0.13, "learning_rate": 4.3625e-05, "loss": 2.6037, "step": 25500 }, { "epoch": 0.13, "learning_rate": 4.35e-05, "loss": 2.5522, "step": 26000 }, { "epoch": 0.13, "learning_rate": 4.3375000000000004e-05, "loss": 2.612, "step": 26500 }, { "epoch": 0.14, "learning_rate": 4.325e-05, "loss": 2.5103, "step": 27000 }, { "epoch": 0.14, "learning_rate": 4.3125000000000005e-05, "loss": 2.5565, "step": 27500 }, { "epoch": 0.14, "learning_rate": 4.3e-05, "loss": 2.5448, "step": 28000 }, { "epoch": 0.14, "learning_rate": 4.2875000000000005e-05, "loss": 2.5231, "step": 28500 }, { "epoch": 0.14, "learning_rate": 4.275e-05, "loss": 2.5115, "step": 29000 }, { "epoch": 0.15, "learning_rate": 4.2625000000000006e-05, "loss": 2.5543, "step": 29500 }, { "epoch": 0.15, "learning_rate": 4.25e-05, "loss": 2.5185, "step": 30000 }, { "epoch": 0.15, "learning_rate": 4.237500000000001e-05, "loss": 2.5097, "step": 30500 }, { "epoch": 0.15, "learning_rate": 4.2250000000000004e-05, "loss": 2.4648, "step": 31000 }, { "epoch": 0.16, "learning_rate": 4.2125e-05, "loss": 2.5455, "step": 31500 }, { "epoch": 0.16, "learning_rate": 4.2e-05, "loss": 2.4929, "step": 32000 }, { "epoch": 0.16, "learning_rate": 4.1875e-05, "loss": 2.5141, "step": 32500 }, { "epoch": 0.17, "learning_rate": 4.175e-05, "loss": 2.5016, "step": 33000 }, { "epoch": 0.17, "learning_rate": 4.1625e-05, "loss": 2.515, "step": 33500 }, { "epoch": 0.17, "learning_rate": 4.15e-05, "loss": 2.4849, "step": 34000 }, { "epoch": 0.17, "learning_rate": 4.1375e-05, "loss": 2.4865, "step": 34500 }, { "epoch": 0.17, "learning_rate": 4.125e-05, "loss": 2.4996, "step": 35000 }, { "epoch": 0.18, "learning_rate": 4.1125000000000004e-05, "loss": 2.5202, "step": 35500 }, { "epoch": 0.18, "learning_rate": 4.1e-05, "loss": 2.491, "step": 36000 }, { "epoch": 0.18, "learning_rate": 4.0875000000000004e-05, "loss": 2.4586, "step": 36500 }, { "epoch": 0.18, "learning_rate": 4.075e-05, "loss": 2.4067, "step": 37000 }, { "epoch": 0.19, "learning_rate": 4.0625000000000005e-05, "loss": 2.4727, "step": 37500 }, { "epoch": 0.19, "learning_rate": 4.05e-05, "loss": 2.473, "step": 38000 }, { "epoch": 0.19, "learning_rate": 4.0375e-05, "loss": 2.5031, "step": 38500 }, { "epoch": 0.2, "learning_rate": 4.025e-05, "loss": 2.4446, "step": 39000 }, { "epoch": 0.2, "learning_rate": 4.0125e-05, "loss": 2.4294, "step": 39500 }, { "epoch": 0.2, "learning_rate": 4e-05, "loss": 2.4211, "step": 40000 }, { "epoch": 0.2, "learning_rate": 3.9875e-05, "loss": 2.4458, "step": 40500 }, { "epoch": 0.2, "learning_rate": 3.9750000000000004e-05, "loss": 2.4372, "step": 41000 }, { "epoch": 0.21, "learning_rate": 3.9625e-05, "loss": 2.4291, "step": 41500 }, { "epoch": 0.21, "learning_rate": 3.9500000000000005e-05, "loss": 2.4459, "step": 42000 }, { "epoch": 0.21, "learning_rate": 3.9375e-05, "loss": 2.4384, "step": 42500 }, { "epoch": 0.21, "learning_rate": 3.9250000000000005e-05, "loss": 2.3212, "step": 43000 }, { "epoch": 0.22, "learning_rate": 3.9125e-05, "loss": 2.4252, "step": 43500 }, { "epoch": 0.22, "learning_rate": 3.9000000000000006e-05, "loss": 2.447, "step": 44000 }, { "epoch": 0.22, "learning_rate": 3.8875e-05, "loss": 2.3701, "step": 44500 }, { "epoch": 0.23, "learning_rate": 3.875e-05, "loss": 2.4841, "step": 45000 }, { "epoch": 0.23, "learning_rate": 3.8625e-05, "loss": 2.4248, "step": 45500 }, { "epoch": 0.23, "learning_rate": 3.85e-05, "loss": 2.4825, "step": 46000 }, { "epoch": 0.23, "learning_rate": 3.8375e-05, "loss": 2.4443, "step": 46500 }, { "epoch": 0.23, "learning_rate": 3.825e-05, "loss": 2.4231, "step": 47000 }, { "epoch": 0.24, "learning_rate": 3.8125e-05, "loss": 2.4251, "step": 47500 }, { "epoch": 0.24, "learning_rate": 3.8e-05, "loss": 2.3801, "step": 48000 }, { "epoch": 0.24, "learning_rate": 3.7875e-05, "loss": 2.3641, "step": 48500 }, { "epoch": 0.24, "learning_rate": 3.775e-05, "loss": 2.391, "step": 49000 }, { "epoch": 0.25, "learning_rate": 3.7625e-05, "loss": 2.3917, "step": 49500 }, { "epoch": 0.25, "learning_rate": 3.7500000000000003e-05, "loss": 2.4489, "step": 50000 }, { "epoch": 0.25, "learning_rate": 3.737500000000001e-05, "loss": 2.413, "step": 50500 }, { "epoch": 0.26, "learning_rate": 3.7250000000000004e-05, "loss": 2.424, "step": 51000 }, { "epoch": 0.26, "learning_rate": 3.7125e-05, "loss": 2.3752, "step": 51500 }, { "epoch": 0.26, "learning_rate": 3.7e-05, "loss": 2.418, "step": 52000 }, { "epoch": 0.26, "learning_rate": 3.6875e-05, "loss": 2.3733, "step": 52500 }, { "epoch": 0.27, "learning_rate": 3.675e-05, "loss": 2.4311, "step": 53000 }, { "epoch": 0.27, "learning_rate": 3.6625e-05, "loss": 2.4121, "step": 53500 }, { "epoch": 0.27, "learning_rate": 3.65e-05, "loss": 2.3514, "step": 54000 }, { "epoch": 0.27, "learning_rate": 3.6375e-05, "loss": 2.3905, "step": 54500 }, { "epoch": 0.28, "learning_rate": 3.625e-05, "loss": 2.288, "step": 55000 }, { "epoch": 0.28, "learning_rate": 3.6125000000000004e-05, "loss": 2.3938, "step": 55500 }, { "epoch": 0.28, "learning_rate": 3.6e-05, "loss": 2.3689, "step": 56000 }, { "epoch": 0.28, "learning_rate": 3.5875000000000005e-05, "loss": 2.417, "step": 56500 }, { "epoch": 0.28, "learning_rate": 3.575e-05, "loss": 2.3249, "step": 57000 }, { "epoch": 0.29, "learning_rate": 3.5625000000000005e-05, "loss": 2.2635, "step": 57500 }, { "epoch": 0.29, "learning_rate": 3.55e-05, "loss": 2.3325, "step": 58000 }, { "epoch": 0.29, "learning_rate": 3.5375e-05, "loss": 2.3574, "step": 58500 }, { "epoch": 0.29, "learning_rate": 3.525e-05, "loss": 2.3943, "step": 59000 }, { "epoch": 0.3, "learning_rate": 3.5125e-05, "loss": 2.3558, "step": 59500 }, { "epoch": 0.3, "learning_rate": 3.5e-05, "loss": 2.3984, "step": 60000 } ], "max_steps": 200000, "num_train_epochs": 9223372036854775807, "total_flos": 8.31675319713792e+18, "trial_name": null, "trial_params": null }