| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "global_step": 16290, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.37, |
| "learning_rate": 2.6316758747697975e-05, |
| "loss": 1.7533, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_loss": 1.023402452468872, |
| "eval_runtime": 20.2636, |
| "eval_samples_per_second": 57.69, |
| "eval_steps_per_second": 3.652, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.2633517495395946e-05, |
| "loss": 1.2356, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.74, |
| "eval_loss": 0.9132137298583984, |
| "eval_runtime": 20.2624, |
| "eval_samples_per_second": 57.693, |
| "eval_steps_per_second": 3.652, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 1.8950276243093924e-05, |
| "loss": 1.0926, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.1, |
| "eval_loss": 0.9214878082275391, |
| "eval_runtime": 20.2606, |
| "eval_samples_per_second": 57.698, |
| "eval_steps_per_second": 3.652, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.52670349907919e-05, |
| "loss": 0.8857, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_loss": 0.948037326335907, |
| "eval_runtime": 20.263, |
| "eval_samples_per_second": 57.691, |
| "eval_steps_per_second": 3.652, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.1583793738489871e-05, |
| "loss": 0.8726, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.84, |
| "eval_loss": 0.9162865877151489, |
| "eval_runtime": 20.2461, |
| "eval_samples_per_second": 57.739, |
| "eval_steps_per_second": 3.655, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 7.900552486187846e-06, |
| "loss": 0.7355, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.21, |
| "eval_loss": 0.9980245232582092, |
| "eval_runtime": 20.2431, |
| "eval_samples_per_second": 57.748, |
| "eval_steps_per_second": 3.656, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 4.21731123388582e-06, |
| "loss": 0.6529, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.58, |
| "eval_loss": 1.0005152225494385, |
| "eval_runtime": 20.2501, |
| "eval_samples_per_second": 57.728, |
| "eval_steps_per_second": 3.654, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 5.340699815837937e-07, |
| "loss": 0.6474, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.95, |
| "eval_loss": 0.9974539875984192, |
| "eval_runtime": 20.2537, |
| "eval_samples_per_second": 57.718, |
| "eval_steps_per_second": 3.654, |
| "step": 16000 |
| } |
| ], |
| "max_steps": 16290, |
| "num_train_epochs": 3, |
| "total_flos": 6.809654777296896e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|