| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9141551206970433, | |
| "global_step": 800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.2499999999999998e-05, | |
| "loss": 5.1881, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 5.518892288208008, | |
| "eval_runtime": 281.7614, | |
| "eval_samples_per_second": 16.539, | |
| "eval_steps_per_second": 16.539, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.75e-05, | |
| "loss": 3.8191, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 2.910094738006592, | |
| "eval_runtime": 282.3915, | |
| "eval_samples_per_second": 16.502, | |
| "eval_steps_per_second": 16.502, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 7.25e-05, | |
| "loss": 2.5718, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 2.240684986114502, | |
| "eval_runtime": 283.3137, | |
| "eval_samples_per_second": 16.448, | |
| "eval_steps_per_second": 16.448, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.750000000000001e-05, | |
| "loss": 2.3477, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 2.1633121967315674, | |
| "eval_runtime": 282.161, | |
| "eval_samples_per_second": 16.515, | |
| "eval_steps_per_second": 16.515, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001225, | |
| "loss": 2.3472, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 2.118901252746582, | |
| "eval_runtime": 281.7032, | |
| "eval_samples_per_second": 16.542, | |
| "eval_steps_per_second": 16.542, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001475, | |
| "loss": 2.2188, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 2.1062021255493164, | |
| "eval_runtime": 282.3706, | |
| "eval_samples_per_second": 16.503, | |
| "eval_steps_per_second": 16.503, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001725, | |
| "loss": 2.2085, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 2.0873425006866455, | |
| "eval_runtime": 282.0216, | |
| "eval_samples_per_second": 16.524, | |
| "eval_steps_per_second": 16.524, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001975, | |
| "loss": 2.1271, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 2.0874459743499756, | |
| "eval_runtime": 283.0992, | |
| "eval_samples_per_second": 16.461, | |
| "eval_steps_per_second": 16.461, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00022250000000000001, | |
| "loss": 2.1834, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 2.0584352016448975, | |
| "eval_runtime": 281.7518, | |
| "eval_samples_per_second": 16.539, | |
| "eval_steps_per_second": 16.539, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0002475, | |
| "loss": 2.1927, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_loss": 2.050870656967163, | |
| "eval_runtime": 281.7194, | |
| "eval_samples_per_second": 16.541, | |
| "eval_steps_per_second": 16.541, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0002725, | |
| "loss": 2.1816, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 2.039118766784668, | |
| "eval_runtime": 282.0613, | |
| "eval_samples_per_second": 16.521, | |
| "eval_steps_per_second": 16.521, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00029749999999999997, | |
| "loss": 2.2131, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_loss": 2.0333147048950195, | |
| "eval_runtime": 282.4675, | |
| "eval_samples_per_second": 16.497, | |
| "eval_steps_per_second": 16.497, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00032250000000000003, | |
| "loss": 2.2322, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_loss": 2.0386910438537598, | |
| "eval_runtime": 282.3106, | |
| "eval_samples_per_second": 16.507, | |
| "eval_steps_per_second": 16.507, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0003475, | |
| "loss": 2.2614, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 2.025144577026367, | |
| "eval_runtime": 285.8974, | |
| "eval_samples_per_second": 16.3, | |
| "eval_steps_per_second": 16.3, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0003725, | |
| "loss": 2.186, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_loss": 2.0244803428649902, | |
| "eval_runtime": 284.3127, | |
| "eval_samples_per_second": 16.39, | |
| "eval_steps_per_second": 16.39, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0003975, | |
| "loss": 2.1677, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_loss": 2.0197227001190186, | |
| "eval_runtime": 284.0212, | |
| "eval_samples_per_second": 16.407, | |
| "eval_steps_per_second": 16.407, | |
| "step": 800 | |
| } | |
| ], | |
| "max_steps": 875, | |
| "num_train_epochs": 1, | |
| "total_flos": 4.324339205829427e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |