| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.968, | |
| "eval_steps": 500, | |
| "global_step": 124, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9230769230769231e-07, | |
| "loss": 1.9369, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.8461538461538463e-07, | |
| "loss": 1.9166, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.995995873155957e-07, | |
| "loss": 1.8806, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.951096619903317e-07, | |
| "loss": 1.7313, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.85719361365271e-07, | |
| "loss": 1.5518, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.7161642180652463e-07, | |
| "loss": 1.2057, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 0.9854549169540405, | |
| "eval_runtime": 79.0664, | |
| "eval_samples_per_second": 1.391, | |
| "eval_steps_per_second": 0.177, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.5308279750597144e-07, | |
| "loss": 0.9383, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.3048902348863106e-07, | |
| "loss": 0.6736, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.042868076603638e-07, | |
| "loss": 0.6114, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.75e-07, | |
| "loss": 0.5736, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.4321411944507714e-07, | |
| "loss": 0.5836, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.095646478557912e-07, | |
| "loss": 0.5676, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_loss": 0.569873571395874, | |
| "eval_runtime": 79.9371, | |
| "eval_samples_per_second": 1.376, | |
| "eval_steps_per_second": 0.175, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 2.747243250910625e-07, | |
| "loss": 0.5831, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 2.3938969920096296e-07, | |
| "loss": 0.5583, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 2.0426720063190333e-07, | |
| "loss": 0.5599, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.7005901885718867e-07, | |
| "loss": 0.5424, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 1.3744906379558164e-07, | |
| "loss": 0.5574, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.0708929268538034e-07, | |
| "loss": 0.5608, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_loss": 0.5596022605895996, | |
| "eval_runtime": 80.1059, | |
| "eval_samples_per_second": 1.373, | |
| "eval_steps_per_second": 0.175, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 7.958667577511683e-08, | |
| "loss": 0.5648, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 5.5491061420390174e-08, | |
| "loss": 0.551, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 3.528418319489348e-08, | |
| "loss": 0.5471, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 1.9370028791882847e-08, | |
| "loss": 0.5572, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 8.066763266625282e-09, | |
| "loss": 0.5416, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 1.6003680950742726e-09, | |
| "loss": 0.5576, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "eval_loss": 0.5581246018409729, | |
| "eval_runtime": 79.6401, | |
| "eval_samples_per_second": 1.381, | |
| "eval_steps_per_second": 0.176, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "step": 124, | |
| "total_flos": 23406615330816.0, | |
| "train_loss": 0.8594164925236856, | |
| "train_runtime": 9510.8221, | |
| "train_samples_per_second": 0.418, | |
| "train_steps_per_second": 0.013 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 124, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "total_flos": 23406615330816.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |