| { | |
| "best_metric": 2.578360080718994, | |
| "best_model_checkpoint": "output/sum-41/checkpoint-108", | |
| "epoch": 6.0, | |
| "global_step": 108, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00011269523002449659, | |
| "loss": 3.268, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 5.668773501204858e-05, | |
| "loss": 2.9209, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.190657300387505e-06, | |
| "loss": 2.9717, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.928959369659424, | |
| "eval_runtime": 1.1458, | |
| "eval_samples_per_second": 22.691, | |
| "eval_steps_per_second": 3.491, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.137086214086682e-06, | |
| "loss": 2.7011, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.513741816785908e-05, | |
| "loss": 2.8934, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00010290000000000001, | |
| "loss": 2.793, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.00013615781185663748, | |
| "loss": 2.7355, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.6775753498077393, | |
| "eval_runtime": 1.2274, | |
| "eval_samples_per_second": 22.813, | |
| "eval_steps_per_second": 3.259, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 0.0001211506487979619, | |
| "loss": 2.6552, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 6.860000000000001e-05, | |
| "loss": 2.4943, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 1.6049351202038163e-05, | |
| "loss": 2.2804, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.632376194000244, | |
| "eval_runtime": 1.2528, | |
| "eval_samples_per_second": 22.349, | |
| "eval_steps_per_second": 3.193, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.0421881433625223e-06, | |
| "loss": 2.6341, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 3.4300000000000014e-05, | |
| "loss": 2.3676, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 9.206258183214083e-05, | |
| "loss": 2.2182, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 0.00013306291378591332, | |
| "loss": 2.4212, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 2.5963516235351562, | |
| "eval_runtime": 1.2572, | |
| "eval_samples_per_second": 22.272, | |
| "eval_steps_per_second": 3.182, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 0.00012800934269961248, | |
| "loss": 2.2587, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 8.051226498795145e-05, | |
| "loss": 2.1767, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 2.4504769975503385e-05, | |
| "loss": 2.1842, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.2178, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 2.578657388687134, | |
| "eval_runtime": 1.2377, | |
| "eval_samples_per_second": 22.622, | |
| "eval_steps_per_second": 3.232, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 2.4504769975503317e-05, | |
| "loss": 2.0438, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 8.051226498795124e-05, | |
| "loss": 2.1724, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 0.00012800934269961248, | |
| "loss": 1.9973, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 2.578360080718994, | |
| "eval_runtime": 1.2443, | |
| "eval_samples_per_second": 22.503, | |
| "eval_steps_per_second": 3.215, | |
| "step": 108 | |
| } | |
| ], | |
| "max_steps": 198, | |
| "num_train_epochs": 11, | |
| "total_flos": 110787821568000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |