| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4331348132106118, | |
| "eval_steps": 500, | |
| "global_step": 4000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02707092582566324, | |
| "grad_norm": 0.8431992530822754, | |
| "learning_rate": 9.910124526258799e-05, | |
| "loss": 4.3576, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05414185165132648, | |
| "grad_norm": 0.8061181306838989, | |
| "learning_rate": 9.819888106839921e-05, | |
| "loss": 4.2775, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05414185165132648, | |
| "eval_loss": 3.5462777614593506, | |
| "eval_runtime": 87.734, | |
| "eval_samples_per_second": 112.271, | |
| "eval_steps_per_second": 7.021, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08121277747698971, | |
| "grad_norm": 0.7875335812568665, | |
| "learning_rate": 9.729651687421044e-05, | |
| "loss": 4.1927, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.10828370330265295, | |
| "grad_norm": 1.0330173969268799, | |
| "learning_rate": 9.639415268002166e-05, | |
| "loss": 4.1178, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.10828370330265295, | |
| "eval_loss": 3.3765828609466553, | |
| "eval_runtime": 87.6227, | |
| "eval_samples_per_second": 112.414, | |
| "eval_steps_per_second": 7.03, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1353546291283162, | |
| "grad_norm": 1.041638731956482, | |
| "learning_rate": 9.549178848583288e-05, | |
| "loss": 4.0201, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.16242555495397942, | |
| "grad_norm": 0.8976061940193176, | |
| "learning_rate": 9.458942429164411e-05, | |
| "loss": 3.9802, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.16242555495397942, | |
| "eval_loss": 3.2597038745880127, | |
| "eval_runtime": 87.5921, | |
| "eval_samples_per_second": 112.453, | |
| "eval_steps_per_second": 7.033, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.18949648077964265, | |
| "grad_norm": 1.0333911180496216, | |
| "learning_rate": 9.368706009745533e-05, | |
| "loss": 3.9335, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.2165674066053059, | |
| "grad_norm": 0.9179081320762634, | |
| "learning_rate": 9.278469590326656e-05, | |
| "loss": 3.8709, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2165674066053059, | |
| "eval_loss": 3.1846394538879395, | |
| "eval_runtime": 87.5421, | |
| "eval_samples_per_second": 112.517, | |
| "eval_steps_per_second": 7.037, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24363833243096913, | |
| "grad_norm": 1.0419113636016846, | |
| "learning_rate": 9.188233170907778e-05, | |
| "loss": 3.8416, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.2707092582566324, | |
| "grad_norm": 0.9652225375175476, | |
| "learning_rate": 9.0979967514889e-05, | |
| "loss": 3.807, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2707092582566324, | |
| "eval_loss": 3.119335174560547, | |
| "eval_runtime": 87.4769, | |
| "eval_samples_per_second": 112.601, | |
| "eval_steps_per_second": 7.042, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2977801840822956, | |
| "grad_norm": 0.8792561888694763, | |
| "learning_rate": 9.007760332070024e-05, | |
| "loss": 3.7754, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.32485110990795885, | |
| "grad_norm": 0.9625837206840515, | |
| "learning_rate": 8.917523912651147e-05, | |
| "loss": 3.7471, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.32485110990795885, | |
| "eval_loss": 3.0782463550567627, | |
| "eval_runtime": 87.7273, | |
| "eval_samples_per_second": 112.28, | |
| "eval_steps_per_second": 7.022, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3519220357336221, | |
| "grad_norm": 1.0289523601531982, | |
| "learning_rate": 8.827287493232269e-05, | |
| "loss": 3.7326, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.3789929615592853, | |
| "grad_norm": 0.9764179587364197, | |
| "learning_rate": 8.737051073813391e-05, | |
| "loss": 3.6939, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.3789929615592853, | |
| "eval_loss": 3.052320718765259, | |
| "eval_runtime": 87.6344, | |
| "eval_samples_per_second": 112.399, | |
| "eval_steps_per_second": 7.029, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.4060638873849486, | |
| "grad_norm": 0.9247903227806091, | |
| "learning_rate": 8.646814654394514e-05, | |
| "loss": 3.6782, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.4331348132106118, | |
| "grad_norm": 0.9769233465194702, | |
| "learning_rate": 8.556578234975636e-05, | |
| "loss": 3.6654, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.4331348132106118, | |
| "eval_loss": 3.0321156978607178, | |
| "eval_runtime": 87.6462, | |
| "eval_samples_per_second": 112.384, | |
| "eval_steps_per_second": 7.028, | |
| "step": 4000 | |
| } | |
| ], | |
| "logging_steps": 250, | |
| "max_steps": 27705, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1444216307712000.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |