| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 23.076923076923077, | |
| "eval_steps": 6, | |
| "global_step": 150, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 9.26979654579933, | |
| "learning_rate": 9.930555555555557e-06, | |
| "loss": 2.3448, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 5.210528714230263, | |
| "learning_rate": 9.444444444444445e-06, | |
| "loss": 1.1491, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 3.230769230769231, | |
| "grad_norm": 2.9442662904253956, | |
| "learning_rate": 8.958333333333334e-06, | |
| "loss": 0.8672, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 4.3076923076923075, | |
| "grad_norm": 3.5593465552558414, | |
| "learning_rate": 8.472222222222223e-06, | |
| "loss": 0.5821, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 5.384615384615385, | |
| "grad_norm": 1.8226707401714455, | |
| "learning_rate": 7.986111111111112e-06, | |
| "loss": 0.3871, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 6.461538461538462, | |
| "grad_norm": 1.7198913738921449, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.2406, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 7.538461538461538, | |
| "grad_norm": 1.2945005159302418, | |
| "learning_rate": 7.013888888888889e-06, | |
| "loss": 0.1497, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 8.615384615384615, | |
| "grad_norm": 1.2289370515669946, | |
| "learning_rate": 6.5277777777777784e-06, | |
| "loss": 0.0934, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 9.692307692307692, | |
| "grad_norm": 0.9150002193228371, | |
| "learning_rate": 6.041666666666667e-06, | |
| "loss": 0.0654, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 10.76923076923077, | |
| "grad_norm": 0.9131175489790094, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.0536, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 11.846153846153847, | |
| "grad_norm": 1.8523335548758657, | |
| "learning_rate": 5.069444444444445e-06, | |
| "loss": 0.0486, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 12.923076923076923, | |
| "grad_norm": 0.6575134033150337, | |
| "learning_rate": 4.583333333333333e-06, | |
| "loss": 0.0448, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.6767754837248606, | |
| "learning_rate": 4.097222222222222e-06, | |
| "loss": 0.0429, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 15.076923076923077, | |
| "grad_norm": 0.7630021804463145, | |
| "learning_rate": 3.6111111111111115e-06, | |
| "loss": 0.0428, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 16.153846153846153, | |
| "grad_norm": 0.4151377369372317, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.0411, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 17.23076923076923, | |
| "grad_norm": 0.34776591719448274, | |
| "learning_rate": 2.6388888888888893e-06, | |
| "loss": 0.0399, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 18.307692307692307, | |
| "grad_norm": 0.3123846039887391, | |
| "learning_rate": 2.152777777777778e-06, | |
| "loss": 0.0401, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 19.384615384615383, | |
| "grad_norm": 0.34698952138584394, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.0389, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 20.46153846153846, | |
| "grad_norm": 0.24525450335189472, | |
| "learning_rate": 1.1805555555555556e-06, | |
| "loss": 0.0393, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 21.53846153846154, | |
| "grad_norm": 0.12542494453259936, | |
| "learning_rate": 6.944444444444446e-07, | |
| "loss": 0.0358, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 22.615384615384617, | |
| "grad_norm": 0.12124244421139624, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "loss": 0.0388, | |
| "step": 147 | |
| } | |
| ], | |
| "logging_steps": 7, | |
| "max_steps": 150, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 150, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |