| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": null, |
| "eval_steps": 500, |
| "global_step": 310, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.29, |
| "learning_rate": 2.8125e-06, |
| "loss": 2.9065, |
| "reward": 11.2264, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 5.9375e-06, |
| "loss": 2.6597, |
| "reward": 11.0573, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.0625e-06, |
| "loss": 1.7609, |
| "reward": 11.9648, |
| "step": 29 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 9.985430661522333e-06, |
| "loss": 1.2947, |
| "reward": 13.177, |
| "step": 39 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.914274958326507e-06, |
| "loss": 0.7901, |
| "reward": 13.7785, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 9.784701678661045e-06, |
| "loss": 0.4549, |
| "reward": 14.603, |
| "step": 59 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 9.598251102025463e-06, |
| "loss": 0.5224, |
| "reward": 15.594, |
| "step": 69 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 9.357139626751308e-06, |
| "loss": 0.3896, |
| "reward": 15.697, |
| "step": 79 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 9.064233422958078e-06, |
| "loss": 0.305, |
| "reward": 16.4374, |
| "step": 89 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 8.723014361461633e-06, |
| "loss": 0.3697, |
| "reward": 16.539, |
| "step": 99 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 8.337538623649237e-06, |
| "loss": 0.3504, |
| "reward": 17.218, |
| "step": 109 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 7.912388484339012e-06, |
| "loss": 0.3387, |
| "reward": 16.595, |
| "step": 119 |
| }, |
| { |
| "epoch": 4.16, |
| "learning_rate": 7.4526178407965396e-06, |
| "loss": 0.3369, |
| "reward": 17.3314, |
| "step": 129 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 6.963692135422872e-06, |
| "loss": 0.1907, |
| "reward": 17.0773, |
| "step": 139 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 6.451423386272312e-06, |
| "loss": 0.2781, |
| "reward": 17.3675, |
| "step": 149 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 5.921901097713317e-06, |
| "loss": 0.2708, |
| "reward": 17.4558, |
| "step": 159 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 5.381419872519763e-06, |
| "loss": 0.2548, |
| "reward": 18.2967, |
| "step": 169 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 4.83640458589112e-06, |
| "loss": 0.1266, |
| "reward": 18.4324, |
| "step": 179 |
| }, |
| { |
| "epoch": 6.1, |
| "learning_rate": 4.293334010882164e-06, |
| "loss": 0.2178, |
| "reward": 17.8528, |
| "step": 189 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 3.7586638031314182e-06, |
| "loss": 0.184, |
| "reward": 18.424, |
| "step": 199 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 3.2387497603938327e-06, |
| "loss": 0.2045, |
| "reward": 18.8729, |
| "step": 209 |
| }, |
| { |
| "epoch": 7.06, |
| "learning_rate": 2.739772269116402e-06, |
| "loss": 0.1521, |
| "reward": 19.1409, |
| "step": 219 |
| }, |
| { |
| "epoch": 7.39, |
| "learning_rate": 2.2676628361847834e-06, |
| "loss": 0.1096, |
| "reward": 18.8351, |
| "step": 229 |
| }, |
| { |
| "epoch": 7.71, |
| "learning_rate": 1.8280335791817733e-06, |
| "loss": 0.1408, |
| "reward": 18.8548, |
| "step": 239 |
| }, |
| { |
| "epoch": 8.03, |
| "learning_rate": 1.4261105133297693e-06, |
| "loss": 0.1674, |
| "reward": 18.8099, |
| "step": 249 |
| }, |
| { |
| "epoch": 8.35, |
| "learning_rate": 1.0666714281569152e-06, |
| "loss": 0.1583, |
| "reward": 18.7122, |
| "step": 259 |
| }, |
| { |
| "epoch": 8.68, |
| "learning_rate": 7.539890923671061e-07, |
| "loss": 0.1407, |
| "reward": 18.8339, |
| "step": 269 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 4.917804620559202e-07, |
| "loss": 0.1214, |
| "reward": 18.7366, |
| "step": 279 |
| }, |
| { |
| "epoch": 9.32, |
| "learning_rate": 2.8316249605087386e-07, |
| "loss": 0.093, |
| "reward": 18.8959, |
| "step": 289 |
| }, |
| { |
| "epoch": 9.65, |
| "learning_rate": 1.3061510361333186e-07, |
| "loss": 0.1226, |
| "reward": 18.4177, |
| "step": 299 |
| }, |
| { |
| "epoch": 9.97, |
| "learning_rate": 3.59516649547248e-08, |
| "loss": 0.1234, |
| "reward": 18.9226, |
| "step": 309 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 310, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10.0, |
| "save_steps": 500, |
| "stateful_callbacks": {}, |
| "total_flos": 0, |
| "train_batch_size": null, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|