| { | |
| "best_metric": 1.818081021308899, | |
| "best_model_checkpoint": "math/checkpoint-17040", | |
| "epoch": 5.0, | |
| "global_step": 17040, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.853286384976526e-05, | |
| "loss": 2.4525, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.706572769953052e-05, | |
| "loss": 2.3303, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.559859154929578e-05, | |
| "loss": 2.2788, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.413145539906103e-05, | |
| "loss": 2.2405, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.26643192488263e-05, | |
| "loss": 2.2089, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.119718309859155e-05, | |
| "loss": 2.1796, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.94922935962677, | |
| "eval_runtime": 38.2996, | |
| "eval_samples_per_second": 79.113, | |
| "eval_steps_per_second": 9.896, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.973004694835681e-05, | |
| "loss": 2.1575, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.826291079812207e-05, | |
| "loss": 2.1208, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.679577464788733e-05, | |
| "loss": 2.0958, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.532863849765258e-05, | |
| "loss": 2.0928, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.386150234741784e-05, | |
| "loss": 2.0898, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.23943661971831e-05, | |
| "loss": 2.0751, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.092723004694836e-05, | |
| "loss": 2.0669, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.8773746490478516, | |
| "eval_runtime": 38.3673, | |
| "eval_samples_per_second": 78.973, | |
| "eval_steps_per_second": 9.878, | |
| "step": 6816 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 2.9460093896713615e-05, | |
| "loss": 2.0433, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 2.7992957746478874e-05, | |
| "loss": 2.0275, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 2.6525821596244134e-05, | |
| "loss": 2.0168, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 2.505868544600939e-05, | |
| "loss": 2.0208, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.359154929577465e-05, | |
| "loss": 2.0111, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 2.2124413145539908e-05, | |
| "loss": 2.0058, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.0657276995305167e-05, | |
| "loss": 2.0019, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.8432753086090088, | |
| "eval_runtime": 38.3414, | |
| "eval_samples_per_second": 79.027, | |
| "eval_steps_per_second": 9.885, | |
| "step": 10224 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.9190140845070423e-05, | |
| "loss": 1.9746, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 1.7723004694835683e-05, | |
| "loss": 1.9727, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 1.625586854460094e-05, | |
| "loss": 1.9677, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 1.4788732394366198e-05, | |
| "loss": 1.9785, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 1.3321596244131457e-05, | |
| "loss": 1.9694, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 1.1854460093896715e-05, | |
| "loss": 1.9653, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 1.0387323943661972e-05, | |
| "loss": 1.9702, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.8244620561599731, | |
| "eval_runtime": 38.37, | |
| "eval_samples_per_second": 78.968, | |
| "eval_steps_per_second": 9.878, | |
| "step": 13632 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 8.92018779342723e-06, | |
| "loss": 1.9433, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 7.453051643192488e-06, | |
| "loss": 1.9495, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 5.9859154929577465e-06, | |
| "loss": 1.9531, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 4.518779342723005e-06, | |
| "loss": 1.9457, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 3.051643192488263e-06, | |
| "loss": 1.9329, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 1.5845070422535212e-06, | |
| "loss": 1.9425, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 1.1737089201877935e-07, | |
| "loss": 1.9458, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.818081021308899, | |
| "eval_runtime": 38.3351, | |
| "eval_samples_per_second": 79.04, | |
| "eval_steps_per_second": 9.886, | |
| "step": 17040 | |
| } | |
| ], | |
| "max_steps": 17040, | |
| "num_train_epochs": 5, | |
| "total_flos": 1.312631697641472e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |