{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.968, "eval_steps": 500, "global_step": 124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 1.9230769230769231e-07, "loss": 1.9369, "step": 5 }, { "epoch": 0.32, "learning_rate": 3.8461538461538463e-07, "loss": 1.9166, "step": 10 }, { "epoch": 0.48, "learning_rate": 4.995995873155957e-07, "loss": 1.8806, "step": 15 }, { "epoch": 0.64, "learning_rate": 4.951096619903317e-07, "loss": 1.7313, "step": 20 }, { "epoch": 0.8, "learning_rate": 4.85719361365271e-07, "loss": 1.5518, "step": 25 }, { "epoch": 0.96, "learning_rate": 4.7161642180652463e-07, "loss": 1.2057, "step": 30 }, { "epoch": 0.99, "eval_loss": 0.9854549169540405, "eval_runtime": 79.0664, "eval_samples_per_second": 1.391, "eval_steps_per_second": 0.177, "step": 31 }, { "epoch": 1.12, "learning_rate": 4.5308279750597144e-07, "loss": 0.9383, "step": 35 }, { "epoch": 1.28, "learning_rate": 4.3048902348863106e-07, "loss": 0.6736, "step": 40 }, { "epoch": 1.44, "learning_rate": 4.042868076603638e-07, "loss": 0.6114, "step": 45 }, { "epoch": 1.6, "learning_rate": 3.75e-07, "loss": 0.5736, "step": 50 }, { "epoch": 1.76, "learning_rate": 3.4321411944507714e-07, "loss": 0.5836, "step": 55 }, { "epoch": 1.92, "learning_rate": 3.095646478557912e-07, "loss": 0.5676, "step": 60 }, { "epoch": 1.98, "eval_loss": 0.569873571395874, "eval_runtime": 79.9371, "eval_samples_per_second": 1.376, "eval_steps_per_second": 0.175, "step": 62 }, { "epoch": 2.08, "learning_rate": 2.747243250910625e-07, "loss": 0.5831, "step": 65 }, { "epoch": 2.24, "learning_rate": 2.3938969920096296e-07, "loss": 0.5583, "step": 70 }, { "epoch": 2.4, "learning_rate": 2.0426720063190333e-07, "loss": 0.5599, "step": 75 }, { "epoch": 2.56, "learning_rate": 1.7005901885718867e-07, "loss": 0.5424, "step": 80 }, { "epoch": 2.72, "learning_rate": 1.3744906379558164e-07, "loss": 0.5574, "step": 85 }, { "epoch": 2.88, "learning_rate": 1.0708929268538034e-07, "loss": 0.5608, "step": 90 }, { "epoch": 2.98, "eval_loss": 0.5596022605895996, "eval_runtime": 80.1059, "eval_samples_per_second": 1.373, "eval_steps_per_second": 0.175, "step": 93 }, { "epoch": 3.04, "learning_rate": 7.958667577511683e-08, "loss": 0.5648, "step": 95 }, { "epoch": 3.2, "learning_rate": 5.5491061420390174e-08, "loss": 0.551, "step": 100 }, { "epoch": 3.36, "learning_rate": 3.528418319489348e-08, "loss": 0.5471, "step": 105 }, { "epoch": 3.52, "learning_rate": 1.9370028791882847e-08, "loss": 0.5572, "step": 110 }, { "epoch": 3.68, "learning_rate": 8.066763266625282e-09, "loss": 0.5416, "step": 115 }, { "epoch": 3.84, "learning_rate": 1.6003680950742726e-09, "loss": 0.5576, "step": 120 }, { "epoch": 3.97, "eval_loss": 0.5581246018409729, "eval_runtime": 79.6401, "eval_samples_per_second": 1.381, "eval_steps_per_second": 0.176, "step": 124 }, { "epoch": 3.97, "step": 124, "total_flos": 23406615330816.0, "train_loss": 0.8594164925236856, "train_runtime": 9510.8221, "train_samples_per_second": 0.418, "train_steps_per_second": 0.013 } ], "logging_steps": 5, "max_steps": 124, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 23406615330816.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }