{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.10416666666666667, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 1.7594966888427734, "eval_runtime": 8.6735, "eval_samples_per_second": 4.727, "eval_steps_per_second": 2.421, "step": 0 }, { "epoch": 0.010416666666666666, "grad_norm": 0.7218555212020874, "learning_rate": 0.0, "loss": 1.7446, "step": 1 }, { "epoch": 0.020833333333333332, "grad_norm": 0.7277606129646301, "learning_rate": 2e-05, "loss": 1.5541, "step": 2 }, { "epoch": 0.03125, "grad_norm": 0.780460774898529, "learning_rate": 4e-05, "loss": 2.0334, "step": 3 }, { "epoch": 0.03125, "eval_loss": 1.7596561908721924, "eval_runtime": 8.4033, "eval_samples_per_second": 4.879, "eval_steps_per_second": 2.499, "step": 3 }, { "epoch": 0.041666666666666664, "grad_norm": 0.6424224376678467, "learning_rate": 6e-05, "loss": 1.279, "step": 4 }, { "epoch": 0.052083333333333336, "grad_norm": 0.7212741374969482, "learning_rate": 8e-05, "loss": 1.7382, "step": 5 }, { "epoch": 0.0625, "grad_norm": 0.6658374071121216, "learning_rate": 0.0001, "loss": 1.4699, "step": 6 }, { "epoch": 0.0625, "eval_loss": 1.7542871236801147, "eval_runtime": 8.3903, "eval_samples_per_second": 4.887, "eval_steps_per_second": 2.503, "step": 6 }, { "epoch": 0.07291666666666667, "grad_norm": 0.6790383458137512, "learning_rate": 0.00012, "loss": 1.4455, "step": 7 }, { "epoch": 0.08333333333333333, "grad_norm": 0.5029568076133728, "learning_rate": 0.00014, "loss": 1.3662, "step": 8 }, { "epoch": 0.09375, "grad_norm": 0.958249032497406, "learning_rate": 0.00016, "loss": 1.4307, "step": 9 }, { "epoch": 0.09375, "eval_loss": 1.7402998208999634, "eval_runtime": 8.3041, "eval_samples_per_second": 4.937, "eval_steps_per_second": 2.529, "step": 9 }, { "epoch": 0.10416666666666667, "grad_norm": 0.7801684141159058, "learning_rate": 0.00018, "loss": 2.183, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 240545515438080.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }