{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.38807321648017595, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.973158269193455e-05, "loss": 1.1361, "step": 500 }, { "epoch": 0.03, "learning_rate": 4.9462087402712206e-05, "loss": 1.0853, "step": 1000 }, { "epoch": 0.05, "learning_rate": 4.919259211348986e-05, "loss": 1.0513, "step": 1500 }, { "epoch": 0.06, "learning_rate": 4.892309682426751e-05, "loss": 1.0224, "step": 2000 }, { "epoch": 0.08, "learning_rate": 4.8654140525623614e-05, "loss": 1.0188, "step": 2500 }, { "epoch": 0.1, "learning_rate": 4.838464523640127e-05, "loss": 0.9882, "step": 3000 }, { "epoch": 0.11, "learning_rate": 4.811514994717893e-05, "loss": 0.9726, "step": 3500 }, { "epoch": 0.13, "learning_rate": 4.784565465795658e-05, "loss": 0.9692, "step": 4000 }, { "epoch": 0.15, "learning_rate": 4.757615936873423e-05, "loss": 0.9499, "step": 4500 }, { "epoch": 0.16, "learning_rate": 4.730666407951189e-05, "loss": 0.9558, "step": 5000 }, { "epoch": 0.18, "learning_rate": 4.703770778086799e-05, "loss": 0.9387, "step": 5500 }, { "epoch": 0.19, "learning_rate": 4.676821249164565e-05, "loss": 0.924, "step": 6000 }, { "epoch": 0.21, "learning_rate": 4.6498717202423306e-05, "loss": 0.9163, "step": 6500 }, { "epoch": 0.23, "learning_rate": 4.622922191320096e-05, "loss": 0.9206, "step": 7000 }, { "epoch": 0.24, "learning_rate": 4.5959726623978616e-05, "loss": 0.9196, "step": 7500 }, { "epoch": 0.26, "learning_rate": 4.5690231334756274e-05, "loss": 0.9009, "step": 8000 }, { "epoch": 0.27, "learning_rate": 4.5420736045533925e-05, "loss": 0.9045, "step": 8500 }, { "epoch": 0.29, "learning_rate": 4.5151240756311583e-05, "loss": 0.9058, "step": 9000 }, { "epoch": 0.31, "learning_rate": 4.488282344824612e-05, "loss": 0.8905, "step": 9500 }, { "epoch": 0.32, "learning_rate": 4.461332815902378e-05, "loss": 0.8932, "step": 10000 }, { "epoch": 0.34, "learning_rate": 4.434383286980144e-05, "loss": 0.8844, "step": 10500 }, { "epoch": 0.36, "learning_rate": 4.4074337580579096e-05, "loss": 0.8951, "step": 11000 }, { "epoch": 0.37, "learning_rate": 4.3804842291356755e-05, "loss": 0.8806, "step": 11500 }, { "epoch": 0.39, "learning_rate": 4.35353470021344e-05, "loss": 0.8887, "step": 12000 } ], "max_steps": 92766, "num_train_epochs": 3, "total_flos": 3.31877922352704e+16, "trial_name": null, "trial_params": null }