| { |
| "best_metric": 0.9395286172698711, |
| "best_model_checkpoint": "model_saves/xlnet-large_lemon_5k_3_p3/checkpoint-536", |
| "epoch": 5.0, |
| "global_step": 1340, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.939120517894878, |
| "eval_loss": 0.4418571889400482, |
| "eval_runtime": 10.7128, |
| "eval_samples_per_second": 409.043, |
| "eval_steps_per_second": 3.267, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9395286172698711, |
| "eval_loss": 0.46262186765670776, |
| "eval_runtime": 5.4443, |
| "eval_samples_per_second": 804.875, |
| "eval_steps_per_second": 6.429, |
| "step": 536 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.938913673006183, |
| "eval_loss": 0.49879011511802673, |
| "eval_runtime": 4.7783, |
| "eval_samples_per_second": 917.056, |
| "eval_steps_per_second": 7.325, |
| "step": 804 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 1e-05, |
| "loss": 0.2989, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9388801305918, |
| "eval_loss": 0.5438826084136963, |
| "eval_runtime": 4.8062, |
| "eval_samples_per_second": 911.742, |
| "eval_steps_per_second": 7.282, |
| "step": 1072 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9391261082972753, |
| "eval_loss": 0.599226176738739, |
| "eval_runtime": 4.7421, |
| "eval_samples_per_second": 924.067, |
| "eval_steps_per_second": 7.381, |
| "step": 1340 |
| }, |
| { |
| "epoch": 5.0, |
| "step": 1340, |
| "total_flos": 2.127979812277453e+16, |
| "train_loss": 0.2707222041799061, |
| "train_runtime": 595.1865, |
| "train_samples_per_second": 864.536, |
| "train_steps_per_second": 6.754 |
| } |
| ], |
| "max_steps": 4020, |
| "num_train_epochs": 15, |
| "total_flos": 2.127979812277453e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|