| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 30.303030303030305, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.6655681133270264, | |
| "eval_runtime": 31.9535, | |
| "eval_samples_per_second": 16.242, | |
| "eval_steps_per_second": 1.033, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.34202826023101807, | |
| "eval_runtime": 32.6272, | |
| "eval_samples_per_second": 15.907, | |
| "eval_steps_per_second": 1.011, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.1993253082036972, | |
| "eval_runtime": 34.1947, | |
| "eval_samples_per_second": 15.178, | |
| "eval_steps_per_second": 0.965, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.12108779698610306, | |
| "eval_runtime": 33.5003, | |
| "eval_samples_per_second": 15.492, | |
| "eval_steps_per_second": 0.985, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.08059267699718475, | |
| "eval_runtime": 33.0363, | |
| "eval_samples_per_second": 15.71, | |
| "eval_steps_per_second": 0.999, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.0539543516933918, | |
| "eval_runtime": 33.3191, | |
| "eval_samples_per_second": 15.577, | |
| "eval_steps_per_second": 0.99, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.033514220267534256, | |
| "eval_runtime": 33.9315, | |
| "eval_samples_per_second": 15.296, | |
| "eval_steps_per_second": 0.973, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.02793893776834011, | |
| "eval_runtime": 33.1021, | |
| "eval_samples_per_second": 15.679, | |
| "eval_steps_per_second": 0.997, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.017579322680830956, | |
| "eval_runtime": 33.1929, | |
| "eval_samples_per_second": 15.636, | |
| "eval_steps_per_second": 0.994, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.017992401495575905, | |
| "eval_runtime": 33.5989, | |
| "eval_samples_per_second": 15.447, | |
| "eval_steps_per_second": 0.982, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 0.011186002753674984, | |
| "eval_runtime": 33.2114, | |
| "eval_samples_per_second": 15.627, | |
| "eval_steps_per_second": 0.994, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 0.009413644671440125, | |
| "eval_runtime": 33.6973, | |
| "eval_samples_per_second": 15.402, | |
| "eval_steps_per_second": 0.979, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 0.008357277140021324, | |
| "eval_runtime": 34.2116, | |
| "eval_samples_per_second": 15.17, | |
| "eval_steps_per_second": 0.965, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 0.006698057986795902, | |
| "eval_runtime": 32.3567, | |
| "eval_samples_per_second": 16.04, | |
| "eval_steps_per_second": 1.02, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 0.005631112959235907, | |
| "eval_runtime": 30.3383, | |
| "eval_samples_per_second": 17.107, | |
| "eval_steps_per_second": 1.088, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 15.15, | |
| "learning_rate": 1.2424242424242425e-05, | |
| "loss": 0.1575, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 0.00457022013142705, | |
| "eval_runtime": 30.6053, | |
| "eval_samples_per_second": 16.958, | |
| "eval_steps_per_second": 1.078, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 0.005157523322850466, | |
| "eval_runtime": 30.0767, | |
| "eval_samples_per_second": 17.256, | |
| "eval_steps_per_second": 1.097, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 0.0044335490092635155, | |
| "eval_runtime": 29.0299, | |
| "eval_samples_per_second": 17.878, | |
| "eval_steps_per_second": 1.137, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 0.003722449066117406, | |
| "eval_runtime": 28.4937, | |
| "eval_samples_per_second": 18.215, | |
| "eval_steps_per_second": 1.158, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.004425578285008669, | |
| "eval_runtime": 32.1425, | |
| "eval_samples_per_second": 16.147, | |
| "eval_steps_per_second": 1.027, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.0040681445971131325, | |
| "eval_runtime": 28.8069, | |
| "eval_samples_per_second": 18.017, | |
| "eval_steps_per_second": 1.146, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.003019771073013544, | |
| "eval_runtime": 28.6404, | |
| "eval_samples_per_second": 18.121, | |
| "eval_steps_per_second": 1.152, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.002829624805599451, | |
| "eval_runtime": 29.787, | |
| "eval_samples_per_second": 17.424, | |
| "eval_steps_per_second": 1.108, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.002751641208305955, | |
| "eval_runtime": 28.377, | |
| "eval_samples_per_second": 18.289, | |
| "eval_steps_per_second": 1.163, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.002945221494883299, | |
| "eval_runtime": 29.5958, | |
| "eval_samples_per_second": 17.536, | |
| "eval_steps_per_second": 1.115, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.0026160639245063066, | |
| "eval_runtime": 29.0161, | |
| "eval_samples_per_second": 17.887, | |
| "eval_steps_per_second": 1.137, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 0.002537393243983388, | |
| "eval_runtime": 28.4904, | |
| "eval_samples_per_second": 18.217, | |
| "eval_steps_per_second": 1.158, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 0.00242584478110075, | |
| "eval_runtime": 29.437, | |
| "eval_samples_per_second": 17.631, | |
| "eval_steps_per_second": 1.121, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 0.0026495754718780518, | |
| "eval_runtime": 28.3889, | |
| "eval_samples_per_second": 18.282, | |
| "eval_steps_per_second": 1.162, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 0.0023259243462234735, | |
| "eval_runtime": 28.1977, | |
| "eval_samples_per_second": 18.406, | |
| "eval_steps_per_second": 1.17, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 30.3, | |
| "learning_rate": 4.848484848484849e-06, | |
| "loss": 0.0065, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 1320, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "total_flos": 278856790097838.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |