| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.996539792387544, | |
| "global_step": 3600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8460490703582764, | |
| "eval_loss": 0.5365713834762573, | |
| "eval_runtime": 8.0333, | |
| "eval_samples_per_second": 91.369, | |
| "eval_steps_per_second": 45.685, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8092643022537231, | |
| "eval_loss": 0.5189609527587891, | |
| "eval_runtime": 8.0124, | |
| "eval_samples_per_second": 91.608, | |
| "eval_steps_per_second": 45.804, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 4.309722222222222e-05, | |
| "loss": 0.4021, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8283378481864929, | |
| "eval_loss": 0.6708077788352966, | |
| "eval_runtime": 8.0551, | |
| "eval_samples_per_second": 91.122, | |
| "eval_steps_per_second": 45.561, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8542234301567078, | |
| "eval_loss": 0.516476571559906, | |
| "eval_runtime": 8.0611, | |
| "eval_samples_per_second": 91.054, | |
| "eval_steps_per_second": 45.527, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8188011050224304, | |
| "eval_loss": 0.6029361486434937, | |
| "eval_runtime": 8.0681, | |
| "eval_samples_per_second": 90.975, | |
| "eval_steps_per_second": 45.488, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 3.6166666666666674e-05, | |
| "loss": 0.2576, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8487738370895386, | |
| "eval_loss": 0.6060934066772461, | |
| "eval_runtime": 8.0522, | |
| "eval_samples_per_second": 91.155, | |
| "eval_steps_per_second": 45.578, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8514986634254456, | |
| "eval_loss": 0.748849630355835, | |
| "eval_runtime": 8.0692, | |
| "eval_samples_per_second": 90.963, | |
| "eval_steps_per_second": 45.481, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8651226162910461, | |
| "eval_loss": 0.6119422912597656, | |
| "eval_runtime": 8.0635, | |
| "eval_samples_per_second": 91.028, | |
| "eval_steps_per_second": 45.514, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 2.9236111111111115e-05, | |
| "loss": 0.1738, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8542234301567078, | |
| "eval_loss": 0.6864181160926819, | |
| "eval_runtime": 8.212, | |
| "eval_samples_per_second": 89.382, | |
| "eval_steps_per_second": 44.691, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8446866273880005, | |
| "eval_loss": 0.7817405462265015, | |
| "eval_runtime": 8.0215, | |
| "eval_samples_per_second": 91.505, | |
| "eval_steps_per_second": 45.752, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8514986634254456, | |
| "eval_loss": 0.6188392043113708, | |
| "eval_runtime": 8.0857, | |
| "eval_samples_per_second": 90.777, | |
| "eval_steps_per_second": 45.389, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 11.11, | |
| "learning_rate": 2.2305555555555556e-05, | |
| "loss": 0.1303, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8569482564926147, | |
| "eval_loss": 0.5936163663864136, | |
| "eval_runtime": 8.0618, | |
| "eval_samples_per_second": 91.047, | |
| "eval_steps_per_second": 45.523, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.859673023223877, | |
| "eval_loss": 0.6109394431114197, | |
| "eval_runtime": 8.0512, | |
| "eval_samples_per_second": 91.167, | |
| "eval_steps_per_second": 45.583, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 13.89, | |
| "learning_rate": 1.5375e-05, | |
| "loss": 0.1226, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8501362204551697, | |
| "eval_loss": 0.7600889205932617, | |
| "eval_runtime": 8.0176, | |
| "eval_samples_per_second": 91.549, | |
| "eval_steps_per_second": 45.774, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8501362204551697, | |
| "eval_loss": 0.6596993803977966, | |
| "eval_runtime": 8.0565, | |
| "eval_samples_per_second": 91.107, | |
| "eval_steps_per_second": 45.553, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8460490703582764, | |
| "eval_loss": 0.712175190448761, | |
| "eval_runtime": 8.0456, | |
| "eval_samples_per_second": 91.23, | |
| "eval_steps_per_second": 45.615, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 16.66, | |
| "learning_rate": 8.430555555555556e-06, | |
| "loss": 0.1261, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8514986634254456, | |
| "eval_loss": 0.7294248938560486, | |
| "eval_runtime": 8.0369, | |
| "eval_samples_per_second": 91.329, | |
| "eval_steps_per_second": 45.664, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.863760232925415, | |
| "eval_loss": 0.6875426173210144, | |
| "eval_runtime": 8.0764, | |
| "eval_samples_per_second": 90.882, | |
| "eval_steps_per_second": 45.441, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.8664849996566772, | |
| "eval_loss": 0.6823601126670837, | |
| "eval_runtime": 8.0997, | |
| "eval_samples_per_second": 90.62, | |
| "eval_steps_per_second": 45.31, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 19.44, | |
| "learning_rate": 1.4861111111111113e-06, | |
| "loss": 0.1044, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8623978495597839, | |
| "eval_loss": 0.6752045750617981, | |
| "eval_runtime": 8.0488, | |
| "eval_samples_per_second": 91.193, | |
| "eval_steps_per_second": 45.597, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 3600, | |
| "total_flos": 1.3459697175036672e+16, | |
| "train_loss": 0.18581116994222005, | |
| "train_runtime": 2514.2374, | |
| "train_samples_per_second": 22.981, | |
| "train_steps_per_second": 1.432 | |
| } | |
| ], | |
| "max_steps": 3600, | |
| "num_train_epochs": 20, | |
| "total_flos": 1.3459697175036672e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |