{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 1000.0, "global_step": 7494, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.200160128102482, "grad_norm": 6.359441757202148, "learning_rate": 4.6663997864958635e-05, "loss": 6.6546, "step": 500 }, { "epoch": 0.400320256204964, "grad_norm": 5.609431743621826, "learning_rate": 4.332799572991727e-05, "loss": 6.1195, "step": 1000 }, { "epoch": 0.600480384307446, "grad_norm": 8.655993461608887, "learning_rate": 3.99919935948759e-05, "loss": 5.8641, "step": 1500 }, { "epoch": 0.800640512409928, "grad_norm": 5.4739837646484375, "learning_rate": 3.665599145983454e-05, "loss": 5.7106, "step": 2000 }, { "epoch": 1.00080064051241, "grad_norm": 5.922306060791016, "learning_rate": 3.3319989324793174e-05, "loss": 5.5891, "step": 2500 }, { "epoch": 1.200960768614892, "grad_norm": 7.8872504234313965, "learning_rate": 2.9983987189751807e-05, "loss": 5.1944, "step": 3000 }, { "epoch": 1.4011208967173738, "grad_norm": 5.556982040405273, "learning_rate": 2.6647985054710433e-05, "loss": 5.1804, "step": 3500 }, { "epoch": 1.601281024819856, "grad_norm": 6.548351287841797, "learning_rate": 2.331198291966907e-05, "loss": 5.1859, "step": 4000 }, { "epoch": 1.801441152922338, "grad_norm": 7.128460884094238, "learning_rate": 1.9975980784627705e-05, "loss": 5.0938, "step": 4500 }, { "epoch": 2.00160128102482, "grad_norm": 5.632880210876465, "learning_rate": 1.6639978649586335e-05, "loss": 5.0563, "step": 5000 }, { "epoch": 2.2017614091273017, "grad_norm": 6.655033588409424, "learning_rate": 1.330397651454497e-05, "loss": 4.68, "step": 5500 }, { "epoch": 2.401921537229784, "grad_norm": 8.128520965576172, "learning_rate": 9.967974379503602e-06, "loss": 4.7047, "step": 6000 }, { "epoch": 2.602081665332266, "grad_norm": 8.819676399230957, "learning_rate": 6.631972244462237e-06, "loss": 4.6252, "step": 6500 }, { "epoch": 2.8022417934347477, "grad_norm": 6.872320175170898, "learning_rate": 3.29597010942087e-06, "loss": 4.656, "step": 7000 }, { "epoch": 3.0, "step": 7494, "total_flos": 3.246076355120333e+16, "train_loss": 5.265000151162597, "train_runtime": 4607.9912, "train_samples_per_second": 3.252, "train_steps_per_second": 1.626 } ], "logging_steps": 500, "max_steps": 7494, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.246076355120333e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }