| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.88, | |
| "global_step": 9000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 6.805111821086262e-05, | |
| "loss": 0.8467, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 11.306694030761719, | |
| "eval_meteor": 0, | |
| "eval_runtime": 15.5559, | |
| "eval_samples_per_second": 6.428, | |
| "eval_steps_per_second": 0.45, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.610223642172524e-05, | |
| "loss": 0.2373, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 11.511818885803223, | |
| "eval_meteor": 0, | |
| "eval_runtime": 13.8771, | |
| "eval_samples_per_second": 7.206, | |
| "eval_steps_per_second": 0.504, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 4.153354632587859e-06, | |
| "loss": 0.2084, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_loss": 11.402015686035156, | |
| "eval_meteor": 0, | |
| "eval_runtime": 13.9339, | |
| "eval_samples_per_second": 7.177, | |
| "eval_steps_per_second": 0.502, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.933333333333334e-05, | |
| "loss": 0.2015, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 10.56881332397461, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 14.4237, | |
| "eval_samples_per_second": 6.933, | |
| "eval_steps_per_second": 0.485, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 8.4e-05, | |
| "loss": 0.1958, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_loss": 11.447636604309082, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 12.9601, | |
| "eval_samples_per_second": 7.716, | |
| "eval_steps_per_second": 0.54, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 7.866666666666666e-05, | |
| "loss": 0.1921, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 11.779410362243652, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 12.9944, | |
| "eval_samples_per_second": 7.696, | |
| "eval_steps_per_second": 0.539, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 7.333333333333333e-05, | |
| "loss": 0.189, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 11.277334213256836, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.0206, | |
| "eval_samples_per_second": 7.68, | |
| "eval_steps_per_second": 0.538, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 0.1831, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 12.441211700439453, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.1556, | |
| "eval_samples_per_second": 7.601, | |
| "eval_steps_per_second": 0.532, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 6.266666666666667e-05, | |
| "loss": 0.1679, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_loss": 11.94626235961914, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.0053, | |
| "eval_samples_per_second": 7.689, | |
| "eval_steps_per_second": 0.538, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 5.7333333333333336e-05, | |
| "loss": 0.1653, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_loss": 11.814840316772461, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.0463, | |
| "eval_samples_per_second": 7.665, | |
| "eval_steps_per_second": 0.537, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 5.2000000000000004e-05, | |
| "loss": 0.1644, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_loss": 12.166290283203125, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.0378, | |
| "eval_samples_per_second": 7.67, | |
| "eval_steps_per_second": 0.537, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 0.1642, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 11.968828201293945, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 12.9985, | |
| "eval_samples_per_second": 7.693, | |
| "eval_steps_per_second": 0.539, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 4.133333333333333e-05, | |
| "loss": 0.1641, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 12.146567344665527, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.0609, | |
| "eval_samples_per_second": 7.656, | |
| "eval_steps_per_second": 0.536, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.1633, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 12.101709365844727, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.0856, | |
| "eval_samples_per_second": 7.642, | |
| "eval_steps_per_second": 0.535, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.066666666666667e-05, | |
| "loss": 0.1525, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "eval_loss": 12.363394737243652, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.5503, | |
| "eval_samples_per_second": 7.38, | |
| "eval_steps_per_second": 0.517, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 2.5333333333333337e-05, | |
| "loss": 0.1434, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_loss": 12.543970108032227, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.9628, | |
| "eval_samples_per_second": 7.162, | |
| "eval_steps_per_second": 0.501, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1456, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_loss": 12.349944114685059, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.1423, | |
| "eval_samples_per_second": 7.609, | |
| "eval_steps_per_second": 0.533, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.4666666666666668e-05, | |
| "loss": 0.1437, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_loss": 12.369497299194336, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.3304, | |
| "eval_samples_per_second": 7.502, | |
| "eval_steps_per_second": 0.525, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 0.1414, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_loss": 12.623024940490723, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.0594, | |
| "eval_samples_per_second": 7.657, | |
| "eval_steps_per_second": 0.536, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.1443, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_loss": 12.473442077636719, | |
| "eval_meteor": 0.0, | |
| "eval_runtime": 13.0574, | |
| "eval_samples_per_second": 7.659, | |
| "eval_steps_per_second": 0.536, | |
| "step": 9000 | |
| } | |
| ], | |
| "max_steps": 9375, | |
| "num_train_epochs": 3, | |
| "total_flos": 3.11732169867264e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |