| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": null, |
| "eval_steps": 500, |
| "global_step": 310, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.29, |
| "learning_rate": 2.8125e-06, |
| "loss": 0.5093, |
| "reward": 3.7168, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 5.9375e-06, |
| "loss": 0.3901, |
| "reward": 3.9072, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.0625e-06, |
| "loss": 0.3463, |
| "reward": 3.795, |
| "step": 29 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 9.985430661522333e-06, |
| "loss": 0.1907, |
| "reward": 4.8141, |
| "step": 39 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.914274958326507e-06, |
| "loss": 0.1381, |
| "reward": 4.9855, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 9.784701678661045e-06, |
| "loss": 0.1166, |
| "reward": 5.1631, |
| "step": 59 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 9.598251102025463e-06, |
| "loss": 0.1019, |
| "reward": 5.3393, |
| "step": 69 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 9.357139626751308e-06, |
| "loss": 0.0751, |
| "reward": 5.4161, |
| "step": 79 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 9.064233422958078e-06, |
| "loss": 0.1013, |
| "reward": 5.4679, |
| "step": 89 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 8.723014361461633e-06, |
| "loss": 0.0473, |
| "reward": 5.523, |
| "step": 99 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 8.337538623649237e-06, |
| "loss": 0.0388, |
| "reward": 5.5629, |
| "step": 109 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 7.912388484339012e-06, |
| "loss": 0.0874, |
| "reward": 5.3349, |
| "step": 119 |
| }, |
| { |
| "epoch": 4.16, |
| "learning_rate": 7.4526178407965396e-06, |
| "loss": 0.0367, |
| "reward": 5.4507, |
| "step": 129 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 6.963692135422872e-06, |
| "loss": 0.0387, |
| "reward": 5.6017, |
| "step": 139 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 6.451423386272312e-06, |
| "loss": 0.0302, |
| "reward": 5.798, |
| "step": 149 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 5.921901097713317e-06, |
| "loss": 0.0424, |
| "reward": 5.7266, |
| "step": 159 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 5.381419872519763e-06, |
| "loss": 0.0397, |
| "reward": 5.762, |
| "step": 169 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 4.83640458589112e-06, |
| "loss": 0.0327, |
| "reward": 5.7574, |
| "step": 179 |
| }, |
| { |
| "epoch": 6.1, |
| "learning_rate": 4.293334010882164e-06, |
| "loss": 0.0301, |
| "reward": 5.7441, |
| "step": 189 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 3.7586638031314182e-06, |
| "loss": 0.0149, |
| "reward": 5.8132, |
| "step": 199 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 3.2387497603938327e-06, |
| "loss": 0.0367, |
| "reward": 5.7579, |
| "step": 209 |
| }, |
| { |
| "epoch": 7.06, |
| "learning_rate": 2.739772269116402e-06, |
| "loss": 0.0369, |
| "reward": 5.7852, |
| "step": 219 |
| }, |
| { |
| "epoch": 7.39, |
| "learning_rate": 2.2676628361847834e-06, |
| "loss": 0.038, |
| "reward": 5.7178, |
| "step": 229 |
| }, |
| { |
| "epoch": 7.71, |
| "learning_rate": 1.8280335791817733e-06, |
| "loss": 0.0198, |
| "reward": 5.8608, |
| "step": 239 |
| }, |
| { |
| "epoch": 8.03, |
| "learning_rate": 1.4261105133297693e-06, |
| "loss": 0.0323, |
| "reward": 5.7764, |
| "step": 249 |
| }, |
| { |
| "epoch": 8.35, |
| "learning_rate": 1.0666714281569152e-06, |
| "loss": 0.0215, |
| "reward": 5.8996, |
| "step": 259 |
| }, |
| { |
| "epoch": 8.68, |
| "learning_rate": 7.539890923671061e-07, |
| "loss": 0.0206, |
| "reward": 5.8818, |
| "step": 269 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 4.917804620559202e-07, |
| "loss": 0.0445, |
| "reward": 5.689, |
| "step": 279 |
| }, |
| { |
| "epoch": 9.32, |
| "learning_rate": 2.8316249605087386e-07, |
| "loss": 0.0234, |
| "reward": 6.027, |
| "step": 289 |
| }, |
| { |
| "epoch": 9.65, |
| "learning_rate": 1.3061510361333186e-07, |
| "loss": 0.0142, |
| "reward": 5.8128, |
| "step": 299 |
| }, |
| { |
| "epoch": 9.97, |
| "learning_rate": 3.59516649547248e-08, |
| "loss": 0.0158, |
| "reward": 5.8429, |
| "step": 309 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 310, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10.0, |
| "save_steps": 500, |
| "stateful_callbacks": {}, |
| "total_flos": 0, |
| "train_batch_size": null, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|