| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "global_step": 9793, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9149052043977e-05, |
| "loss": 1.6648, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.8298104087953985e-05, |
| "loss": 1.3545, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.7447156131930974e-05, |
| "loss": 1.2345, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.659620817590796e-05, |
| "loss": 1.1511, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.574526021988496e-05, |
| "loss": 1.1304, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.4894312263861945e-05, |
| "loss": 1.0994, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.404336430783893e-05, |
| "loss": 1.0345, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.319241635181592e-05, |
| "loss": 1.0472, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.2341468395792916e-05, |
| "loss": 0.9727, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.1490520439769904e-05, |
| "loss": 0.9465, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.06395724837469e-05, |
| "loss": 0.9456, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.978862452772389e-05, |
| "loss": 0.9414, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.8937676571700875e-05, |
| "loss": 0.8902, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 3.808672861567787e-05, |
| "loss": 0.8932, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.723578065965486e-05, |
| "loss": 0.8493, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 3.638483270363185e-05, |
| "loss": 0.8771, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 3.5533884747608835e-05, |
| "loss": 0.8472, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.468293679158583e-05, |
| "loss": 0.8341, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.383198883556282e-05, |
| "loss": 0.8426, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.5429383516311646, |
| "eval_runtime": 3.13, |
| "eval_samples_per_second": 449.846, |
| "eval_steps_per_second": 56.231, |
| "step": 9793 |
| } |
| ], |
| "max_steps": 29379, |
| "num_train_epochs": 3, |
| "total_flos": 1251329143528800.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|