| { |
| "best_metric": 0.32135966420173645, |
| "best_model_checkpoint": "out/checkpoint-700", |
| "epoch": 0.6548175865294668, |
| "global_step": 700, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09, |
| "eval_loss": 0.5697824358940125, |
| "eval_runtime": 1.9806, |
| "eval_samples_per_second": 266.081, |
| "eval_steps_per_second": 33.323, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_loss": 0.4737452268600464, |
| "eval_runtime": 2.3251, |
| "eval_samples_per_second": 226.658, |
| "eval_steps_per_second": 28.386, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_loss": 0.4208296537399292, |
| "eval_runtime": 2.1099, |
| "eval_samples_per_second": 249.779, |
| "eval_steps_per_second": 31.282, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_loss": 0.4499533176422119, |
| "eval_runtime": 1.9863, |
| "eval_samples_per_second": 265.321, |
| "eval_steps_per_second": 33.228, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 5.234932144954725e-06, |
| "loss": 0.4817, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.47, |
| "eval_loss": 0.37633195519447327, |
| "eval_runtime": 1.9692, |
| "eval_samples_per_second": 267.62, |
| "eval_steps_per_second": 33.516, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.56, |
| "eval_loss": 0.3510819375514984, |
| "eval_runtime": 2.019, |
| "eval_samples_per_second": 261.021, |
| "eval_steps_per_second": 32.69, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_loss": 0.32135966420173645, |
| "eval_runtime": 1.9717, |
| "eval_samples_per_second": 267.278, |
| "eval_steps_per_second": 33.473, |
| "step": 700 |
| } |
| ], |
| "max_steps": 2138, |
| "num_train_epochs": 2, |
| "total_flos": 32716802017344.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|