| { | |
| "best_metric": 0.9394112188195306, | |
| "best_model_checkpoint": "model_saves/roberta-large_lemon_10k_3_p3/checkpoint-536", | |
| "epoch": 8.0, | |
| "global_step": 2144, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9393497243931618, | |
| "eval_loss": 0.4459913671016693, | |
| "eval_runtime": 3.4398, | |
| "eval_samples_per_second": 1273.909, | |
| "eval_steps_per_second": 10.175, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9394112188195306, | |
| "eval_loss": 0.46163710951805115, | |
| "eval_runtime": 3.4301, | |
| "eval_samples_per_second": 1277.515, | |
| "eval_steps_per_second": 10.204, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.938237234316126, | |
| "eval_loss": 0.5016078352928162, | |
| "eval_runtime": 3.4314, | |
| "eval_samples_per_second": 1277.037, | |
| "eval_steps_per_second": 10.2, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2628, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9388969017989915, | |
| "eval_loss": 0.5514333248138428, | |
| "eval_runtime": 4.6434, | |
| "eval_samples_per_second": 943.704, | |
| "eval_steps_per_second": 7.538, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9377396885027784, | |
| "eval_loss": 0.6031810641288757, | |
| "eval_runtime": 3.437, | |
| "eval_samples_per_second": 1274.931, | |
| "eval_steps_per_second": 10.183, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9375440244188776, | |
| "eval_loss": 0.6418653130531311, | |
| "eval_runtime": 3.5192, | |
| "eval_samples_per_second": 1245.154, | |
| "eval_steps_per_second": 9.945, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9377396885027784, | |
| "eval_loss": 0.7208025455474854, | |
| "eval_runtime": 3.4571, | |
| "eval_samples_per_second": 1267.546, | |
| "eval_steps_per_second": 10.124, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1093, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9376390612596295, | |
| "eval_loss": 0.7791202068328857, | |
| "eval_runtime": 3.4736, | |
| "eval_samples_per_second": 1261.504, | |
| "eval_steps_per_second": 10.076, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 2144, | |
| "total_flos": 2.8204045344702464e+16, | |
| "train_loss": 0.039562054534456624, | |
| "train_runtime": 261.1202, | |
| "train_samples_per_second": 1970.587, | |
| "train_steps_per_second": 15.395 | |
| } | |
| ], | |
| "max_steps": 4020, | |
| "num_train_epochs": 15, | |
| "total_flos": 2.8204045344702464e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |