{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.88, "eval_steps": 500, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.096, "grad_norm": 5.814898920253824, "learning_rate": 1.6666666666666667e-06, "loss": 0.8387, "step": 1 }, { "epoch": 0.192, "grad_norm": 6.057292675599317, "learning_rate": 3.3333333333333333e-06, "loss": 0.8858, "step": 2 }, { "epoch": 0.288, "grad_norm": 5.644663837222156, "learning_rate": 5e-06, "loss": 0.8602, "step": 3 }, { "epoch": 0.384, "grad_norm": 4.386875194981092, "learning_rate": 4.983095894354858e-06, "loss": 0.8439, "step": 4 }, { "epoch": 0.48, "grad_norm": 2.4847785324465694, "learning_rate": 4.93261217644956e-06, "loss": 0.7638, "step": 5 }, { "epoch": 0.576, "grad_norm": 2.0150513534004237, "learning_rate": 4.849231551964771e-06, "loss": 0.7787, "step": 6 }, { "epoch": 0.672, "grad_norm": 4.310739824230349, "learning_rate": 4.734081600808531e-06, "loss": 0.7736, "step": 7 }, { "epoch": 0.768, "grad_norm": 4.593835357339227, "learning_rate": 4.588719528532342e-06, "loss": 0.794, "step": 8 }, { "epoch": 0.864, "grad_norm": 4.297514689606173, "learning_rate": 4.415111107797445e-06, "loss": 0.7967, "step": 9 }, { "epoch": 0.96, "grad_norm": 3.5868480239959792, "learning_rate": 4.215604094671835e-06, "loss": 0.7584, "step": 10 }, { "epoch": 1.056, "grad_norm": 4.7658908684279835, "learning_rate": 3.992896479256966e-06, "loss": 1.0823, "step": 11 }, { "epoch": 1.152, "grad_norm": 2.4559603231588087, "learning_rate": 3.7500000000000005e-06, "loss": 0.6939, "step": 12 }, { "epoch": 1.248, "grad_norm": 1.972442848963337, "learning_rate": 3.4901994150978926e-06, "loss": 0.7301, "step": 13 }, { "epoch": 1.3439999999999999, "grad_norm": 1.3858986042175938, "learning_rate": 3.217008081777726e-06, "loss": 0.7026, "step": 14 }, { "epoch": 1.44, "grad_norm": 1.1730441100334783, "learning_rate": 2.9341204441673267e-06, "loss": 0.6786, "step": 15 }, { "epoch": 1.536, "grad_norm": 1.119242637007583, "learning_rate": 2.6453620722761897e-06, "loss": 0.6457, "step": 16 }, { "epoch": 1.6320000000000001, "grad_norm": 1.242070894051599, "learning_rate": 2.3546379277238107e-06, "loss": 0.6852, "step": 17 }, { "epoch": 1.728, "grad_norm": 1.0986457895290331, "learning_rate": 2.0658795558326745e-06, "loss": 0.6334, "step": 18 }, { "epoch": 1.8239999999999998, "grad_norm": 0.9075839495380478, "learning_rate": 1.7829919182222752e-06, "loss": 0.679, "step": 19 }, { "epoch": 1.92, "grad_norm": 0.7221290672453798, "learning_rate": 1.509800584902108e-06, "loss": 0.6066, "step": 20 }, { "epoch": 2.016, "grad_norm": 1.091018170812889, "learning_rate": 1.2500000000000007e-06, "loss": 1.0308, "step": 21 }, { "epoch": 2.112, "grad_norm": 0.7565398447709, "learning_rate": 1.0071035207430352e-06, "loss": 0.6308, "step": 22 }, { "epoch": 2.208, "grad_norm": 0.6252631187849726, "learning_rate": 7.843959053281663e-07, "loss": 0.6039, "step": 23 }, { "epoch": 2.304, "grad_norm": 0.6592522345972539, "learning_rate": 5.848888922025553e-07, "loss": 0.6632, "step": 24 }, { "epoch": 2.4, "grad_norm": 0.6078000962674466, "learning_rate": 4.1128047146765936e-07, "loss": 0.5926, "step": 25 }, { "epoch": 2.496, "grad_norm": 0.643894665051632, "learning_rate": 2.6591839919146963e-07, "loss": 0.6525, "step": 26 }, { "epoch": 2.592, "grad_norm": 0.5811315948755351, "learning_rate": 1.507684480352292e-07, "loss": 0.6727, "step": 27 }, { "epoch": 2.6879999999999997, "grad_norm": 0.574044192546292, "learning_rate": 6.738782355044048e-08, "loss": 0.6145, "step": 28 }, { "epoch": 2.784, "grad_norm": 0.538050743523217, "learning_rate": 1.6904105645142443e-08, "loss": 0.5905, "step": 29 }, { "epoch": 2.88, "grad_norm": 0.5778834721689959, "learning_rate": 0.0, "loss": 0.6567, "step": 30 }, { "epoch": 2.88, "step": 30, "total_flos": 5.991907970357658e+16, "train_loss": 0.7313097993532817, "train_runtime": 1870.0735, "train_samples_per_second": 1.598, "train_steps_per_second": 0.016 } ], "logging_steps": 1.0, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.991907970357658e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }