| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.996168582375478, |
| "global_step": 650, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.615384615384616e-05, |
| "loss": 9.0082, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_loss": 8.411425590515137, |
| "eval_runtime": 12.7191, |
| "eval_samples_per_second": 45.601, |
| "eval_steps_per_second": 2.909, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 4.230769230769231e-05, |
| "loss": 8.0104, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.54, |
| "eval_loss": 7.781517505645752, |
| "eval_runtime": 12.722, |
| "eval_samples_per_second": 45.59, |
| "eval_steps_per_second": 2.908, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 3.846153846153846e-05, |
| "loss": 7.6542, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.31, |
| "eval_loss": 7.551076889038086, |
| "eval_runtime": 12.717, |
| "eval_samples_per_second": 45.608, |
| "eval_steps_per_second": 2.909, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 3.461538461538462e-05, |
| "loss": 7.4951, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.08, |
| "eval_loss": 7.532299995422363, |
| "eval_runtime": 12.7257, |
| "eval_samples_per_second": 45.577, |
| "eval_steps_per_second": 2.908, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 3.0769230769230774e-05, |
| "loss": 7.3489, |
| "step": 250 |
| }, |
| { |
| "epoch": 3.84, |
| "eval_loss": 7.389176845550537, |
| "eval_runtime": 12.723, |
| "eval_samples_per_second": 45.587, |
| "eval_steps_per_second": 2.908, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.61, |
| "learning_rate": 2.6923076923076923e-05, |
| "loss": 7.349, |
| "step": 300 |
| }, |
| { |
| "epoch": 4.61, |
| "eval_loss": 7.34345006942749, |
| "eval_runtime": 12.7274, |
| "eval_samples_per_second": 45.571, |
| "eval_steps_per_second": 2.907, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 2.307692307692308e-05, |
| "loss": 7.2625, |
| "step": 350 |
| }, |
| { |
| "epoch": 5.38, |
| "eval_loss": 7.265749454498291, |
| "eval_runtime": 12.7257, |
| "eval_samples_per_second": 45.577, |
| "eval_steps_per_second": 2.907, |
| "step": 350 |
| }, |
| { |
| "epoch": 6.15, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": 7.1982, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.15, |
| "eval_loss": 7.229588985443115, |
| "eval_runtime": 12.7294, |
| "eval_samples_per_second": 45.564, |
| "eval_steps_per_second": 2.907, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.92, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 7.1324, |
| "step": 450 |
| }, |
| { |
| "epoch": 6.92, |
| "eval_loss": 7.157928466796875, |
| "eval_runtime": 12.7187, |
| "eval_samples_per_second": 45.602, |
| "eval_steps_per_second": 2.909, |
| "step": 450 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 1.153846153846154e-05, |
| "loss": 7.1505, |
| "step": 500 |
| }, |
| { |
| "epoch": 7.69, |
| "eval_loss": 7.151326656341553, |
| "eval_runtime": 12.7324, |
| "eval_samples_per_second": 45.553, |
| "eval_steps_per_second": 2.906, |
| "step": 500 |
| }, |
| { |
| "epoch": 8.46, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 7.1079, |
| "step": 550 |
| }, |
| { |
| "epoch": 8.46, |
| "eval_loss": 7.1157402992248535, |
| "eval_runtime": 12.7267, |
| "eval_samples_per_second": 45.573, |
| "eval_steps_per_second": 2.907, |
| "step": 550 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 7.1039, |
| "step": 600 |
| }, |
| { |
| "epoch": 9.23, |
| "eval_loss": 7.14491605758667, |
| "eval_runtime": 12.7279, |
| "eval_samples_per_second": 45.569, |
| "eval_steps_per_second": 2.907, |
| "step": 600 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 0.0, |
| "loss": 7.046, |
| "step": 650 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 7.087325096130371, |
| "eval_runtime": 12.7263, |
| "eval_samples_per_second": 45.575, |
| "eval_steps_per_second": 2.907, |
| "step": 650 |
| } |
| ], |
| "max_steps": 650, |
| "num_train_epochs": 10, |
| "total_flos": 1.3722182915328e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|