| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 1000.0, | |
| "global_step": 7494, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.200160128102482, | |
| "grad_norm": 6.359441757202148, | |
| "learning_rate": 4.6663997864958635e-05, | |
| "loss": 6.6546, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.400320256204964, | |
| "grad_norm": 5.609431743621826, | |
| "learning_rate": 4.332799572991727e-05, | |
| "loss": 6.1195, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.600480384307446, | |
| "grad_norm": 8.655993461608887, | |
| "learning_rate": 3.99919935948759e-05, | |
| "loss": 5.8641, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.800640512409928, | |
| "grad_norm": 5.4739837646484375, | |
| "learning_rate": 3.665599145983454e-05, | |
| "loss": 5.7106, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.00080064051241, | |
| "grad_norm": 5.922306060791016, | |
| "learning_rate": 3.3319989324793174e-05, | |
| "loss": 5.5891, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.200960768614892, | |
| "grad_norm": 7.8872504234313965, | |
| "learning_rate": 2.9983987189751807e-05, | |
| "loss": 5.1944, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.4011208967173738, | |
| "grad_norm": 5.556982040405273, | |
| "learning_rate": 2.6647985054710433e-05, | |
| "loss": 5.1804, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.601281024819856, | |
| "grad_norm": 6.548351287841797, | |
| "learning_rate": 2.331198291966907e-05, | |
| "loss": 5.1859, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.801441152922338, | |
| "grad_norm": 7.128460884094238, | |
| "learning_rate": 1.9975980784627705e-05, | |
| "loss": 5.0938, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.00160128102482, | |
| "grad_norm": 5.632880210876465, | |
| "learning_rate": 1.6639978649586335e-05, | |
| "loss": 5.0563, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.2017614091273017, | |
| "grad_norm": 6.655033588409424, | |
| "learning_rate": 1.330397651454497e-05, | |
| "loss": 4.68, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.401921537229784, | |
| "grad_norm": 8.128520965576172, | |
| "learning_rate": 9.967974379503602e-06, | |
| "loss": 4.7047, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.602081665332266, | |
| "grad_norm": 8.819676399230957, | |
| "learning_rate": 6.631972244462237e-06, | |
| "loss": 4.6252, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.8022417934347477, | |
| "grad_norm": 6.872320175170898, | |
| "learning_rate": 3.29597010942087e-06, | |
| "loss": 4.656, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 7494, | |
| "total_flos": 3.246076355120333e+16, | |
| "train_loss": 5.265000151162597, | |
| "train_runtime": 4607.9912, | |
| "train_samples_per_second": 3.252, | |
| "train_steps_per_second": 1.626 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 7494, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.246076355120333e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |