| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 10240, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.48828125, | |
| "grad_norm": 14.348142623901367, | |
| "learning_rate": 2e-05, | |
| "loss": 0.718, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9765625, | |
| "grad_norm": 12.407145500183105, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6899, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.46484375, | |
| "grad_norm": 9.278654098510742, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3397, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.953125, | |
| "grad_norm": 8.198233604431152, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3419, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.44140625, | |
| "grad_norm": 4.592423915863037, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1915, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.9296875, | |
| "grad_norm": 8.893723487854004, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1922, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.41796875, | |
| "grad_norm": 8.089862823486328, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1263, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.90625, | |
| "grad_norm": 6.3830485343933105, | |
| "learning_rate": 2e-05, | |
| "loss": 0.121, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.39453125, | |
| "grad_norm": 7.365416049957275, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0897, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.8828125, | |
| "grad_norm": 7.9196600914001465, | |
| "learning_rate": 2e-05, | |
| "loss": 0.087, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.37109375, | |
| "grad_norm": 3.7600364685058594, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0665, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.859375, | |
| "grad_norm": 4.597715854644775, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0639, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.34765625, | |
| "grad_norm": 4.1985182762146, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0543, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 6.8359375, | |
| "grad_norm": 5.26850700378418, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0506, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.32421875, | |
| "grad_norm": 3.8958706855773926, | |
| "learning_rate": 2e-05, | |
| "loss": 0.049, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 7.8125, | |
| "grad_norm": 6.896234035491943, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0451, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 8.30078125, | |
| "grad_norm": 3.6532912254333496, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0431, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 8.7890625, | |
| "grad_norm": 4.900750160217285, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0402, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 9.27734375, | |
| "grad_norm": 1.2737749814987183, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0367, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 9.765625, | |
| "grad_norm": 5.822772026062012, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0391, | |
| "step": 10000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 10240, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3193555713038746e+17, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |