| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.9984, | |
| "eval_steps": 500, | |
| "global_step": 7810, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 114357.3359375, | |
| "learning_rate": 4.740596627756161e-05, | |
| "loss": 3.7821, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 114484.1171875, | |
| "learning_rate": 4.4163424124513617e-05, | |
| "loss": 1.3466, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 74623.7109375, | |
| "learning_rate": 4.092088197146563e-05, | |
| "loss": 1.2312, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 90289.9375, | |
| "learning_rate": 3.767833981841764e-05, | |
| "loss": 1.1712, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 48473.625, | |
| "learning_rate": 3.4435797665369654e-05, | |
| "loss": 1.1258, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 48174.4140625, | |
| "learning_rate": 3.119325551232166e-05, | |
| "loss": 1.1166, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 48501.65234375, | |
| "learning_rate": 2.7950713359273672e-05, | |
| "loss": 1.0865, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 51178.6875, | |
| "learning_rate": 2.4708171206225684e-05, | |
| "loss": 1.0675, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 90527.828125, | |
| "learning_rate": 2.146562905317769e-05, | |
| "loss": 1.0602, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 54371.5234375, | |
| "learning_rate": 1.8223086900129702e-05, | |
| "loss": 1.043, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 159429.1875, | |
| "learning_rate": 1.4980544747081713e-05, | |
| "loss": 1.0311, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 55272.26171875, | |
| "learning_rate": 1.1738002594033724e-05, | |
| "loss": 1.0335, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 42917.234375, | |
| "learning_rate": 8.495460440985733e-06, | |
| "loss": 1.0229, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 53693.88671875, | |
| "learning_rate": 5.2529182879377435e-06, | |
| "loss": 1.0176, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 48436.2265625, | |
| "learning_rate": 2.0103761348897538e-06, | |
| "loss": 1.0157, | |
| "step": 7500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 7810, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 30000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.026686829985792e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |