{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.957983193277311, "eval_steps": 500, "global_step": 177, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16806722689075632, "grad_norm": 1.64210754465846, "learning_rate": 5.555555555555557e-06, "loss": 0.8834, "step": 10 }, { "epoch": 0.33613445378151263, "grad_norm": 1.6216633128454179, "learning_rate": 9.99609654676786e-06, "loss": 0.832, "step": 20 }, { "epoch": 0.5042016806722689, "grad_norm": 1.0890026008565015, "learning_rate": 9.860114570402055e-06, "loss": 0.7929, "step": 30 }, { "epoch": 0.6722689075630253, "grad_norm": 0.9852931548778409, "learning_rate": 9.535012074008688e-06, "loss": 0.7613, "step": 40 }, { "epoch": 0.8403361344537815, "grad_norm": 1.0010829198190254, "learning_rate": 9.033439696227966e-06, "loss": 0.7369, "step": 50 }, { "epoch": 1.0, "grad_norm": 0.9912119135873784, "learning_rate": 8.374915007591053e-06, "loss": 0.6853, "step": 60 }, { "epoch": 1.1680672268907564, "grad_norm": 0.9598316653212201, "learning_rate": 7.58506302778873e-06, "loss": 0.6288, "step": 70 }, { "epoch": 1.3361344537815127, "grad_norm": 0.9969526888332303, "learning_rate": 6.694619085176159e-06, "loss": 0.5971, "step": 80 }, { "epoch": 1.504201680672269, "grad_norm": 5.712426354685314, "learning_rate": 5.738232820012407e-06, "loss": 0.5633, "step": 90 }, { "epoch": 1.6722689075630253, "grad_norm": 0.8053877541293872, "learning_rate": 4.753119870981486e-06, "loss": 0.5586, "step": 100 }, { "epoch": 1.8403361344537816, "grad_norm": 0.9085557253419428, "learning_rate": 3.777613711607087e-06, "loss": 0.5633, "step": 110 }, { "epoch": 2.0, "grad_norm": 0.9125623622985397, "learning_rate": 2.8496739886173994e-06, "loss": 0.5249, "step": 120 }, { "epoch": 2.168067226890756, "grad_norm": 0.9796403145730364, "learning_rate": 2.005409406946e-06, "loss": 0.4774, "step": 130 }, { "epoch": 2.3361344537815127, "grad_norm": 0.8379009664030831, "learning_rate": 1.277672640004936e-06, "loss": 0.4569, "step": 140 }, { "epoch": 2.504201680672269, "grad_norm": 0.912579146579012, "learning_rate": 6.947819411632223e-07, "loss": 0.4598, "step": 150 }, { "epoch": 2.6722689075630255, "grad_norm": 0.8570497984949336, "learning_rate": 2.7941920206915443e-07, "loss": 0.4533, "step": 160 }, { "epoch": 2.8403361344537816, "grad_norm": 0.913025927962694, "learning_rate": 4.774733741942206e-08, "loss": 0.4608, "step": 170 }, { "epoch": 2.957983193277311, "step": 177, "total_flos": 92447203917824.0, "train_loss": 0.6085002004763501, "train_runtime": 1135.371, "train_samples_per_second": 20.124, "train_steps_per_second": 0.156 } ], "logging_steps": 10, "max_steps": 177, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 92447203917824.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }