{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.9984, "eval_steps": 500, "global_step": 7810, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.32, "grad_norm": 114357.3359375, "learning_rate": 4.740596627756161e-05, "loss": 3.7821, "step": 500 }, { "epoch": 0.64, "grad_norm": 114484.1171875, "learning_rate": 4.4163424124513617e-05, "loss": 1.3466, "step": 1000 }, { "epoch": 0.96, "grad_norm": 74623.7109375, "learning_rate": 4.092088197146563e-05, "loss": 1.2312, "step": 1500 }, { "epoch": 1.28, "grad_norm": 90289.9375, "learning_rate": 3.767833981841764e-05, "loss": 1.1712, "step": 2000 }, { "epoch": 1.6, "grad_norm": 48473.625, "learning_rate": 3.4435797665369654e-05, "loss": 1.1258, "step": 2500 }, { "epoch": 1.92, "grad_norm": 48174.4140625, "learning_rate": 3.119325551232166e-05, "loss": 1.1166, "step": 3000 }, { "epoch": 2.24, "grad_norm": 48501.65234375, "learning_rate": 2.7950713359273672e-05, "loss": 1.0865, "step": 3500 }, { "epoch": 2.56, "grad_norm": 51178.6875, "learning_rate": 2.4708171206225684e-05, "loss": 1.0675, "step": 4000 }, { "epoch": 2.88, "grad_norm": 90527.828125, "learning_rate": 2.146562905317769e-05, "loss": 1.0602, "step": 4500 }, { "epoch": 3.2, "grad_norm": 54371.5234375, "learning_rate": 1.8223086900129702e-05, "loss": 1.043, "step": 5000 }, { "epoch": 3.52, "grad_norm": 159429.1875, "learning_rate": 1.4980544747081713e-05, "loss": 1.0311, "step": 5500 }, { "epoch": 3.84, "grad_norm": 55272.26171875, "learning_rate": 1.1738002594033724e-05, "loss": 1.0335, "step": 6000 }, { "epoch": 4.16, "grad_norm": 42917.234375, "learning_rate": 8.495460440985733e-06, "loss": 1.0229, "step": 6500 }, { "epoch": 4.48, "grad_norm": 53693.88671875, "learning_rate": 5.2529182879377435e-06, "loss": 1.0176, "step": 7000 }, { "epoch": 4.8, "grad_norm": 48436.2265625, "learning_rate": 2.0103761348897538e-06, "loss": 1.0157, "step": 7500 } ], "logging_steps": 500, "max_steps": 7810, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 30000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.026686829985792e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }