{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.88, "global_step": 9000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.96, "learning_rate": 6.805111821086262e-05, "loss": 0.8467, "step": 300 }, { "epoch": 0.96, "eval_loss": 11.306694030761719, "eval_meteor": 0, "eval_runtime": 15.5559, "eval_samples_per_second": 6.428, "eval_steps_per_second": 0.45, "step": 300 }, { "epoch": 1.92, "learning_rate": 3.610223642172524e-05, "loss": 0.2373, "step": 600 }, { "epoch": 1.92, "eval_loss": 11.511818885803223, "eval_meteor": 0, "eval_runtime": 13.8771, "eval_samples_per_second": 7.206, "eval_steps_per_second": 0.504, "step": 600 }, { "epoch": 2.88, "learning_rate": 4.153354632587859e-06, "loss": 0.2084, "step": 900 }, { "epoch": 2.88, "eval_loss": 11.402015686035156, "eval_meteor": 0, "eval_runtime": 13.9339, "eval_samples_per_second": 7.177, "eval_steps_per_second": 0.502, "step": 900 }, { "epoch": 0.32, "learning_rate": 8.933333333333334e-05, "loss": 0.2015, "step": 1000 }, { "epoch": 0.32, "eval_loss": 10.56881332397461, "eval_meteor": 0.0, "eval_runtime": 14.4237, "eval_samples_per_second": 6.933, "eval_steps_per_second": 0.485, "step": 1000 }, { "epoch": 0.48, "learning_rate": 8.4e-05, "loss": 0.1958, "step": 1500 }, { "epoch": 0.48, "eval_loss": 11.447636604309082, "eval_meteor": 0.0, "eval_runtime": 12.9601, "eval_samples_per_second": 7.716, "eval_steps_per_second": 0.54, "step": 1500 }, { "epoch": 0.64, "learning_rate": 7.866666666666666e-05, "loss": 0.1921, "step": 2000 }, { "epoch": 0.64, "eval_loss": 11.779410362243652, "eval_meteor": 0.0, "eval_runtime": 12.9944, "eval_samples_per_second": 7.696, "eval_steps_per_second": 0.539, "step": 2000 }, { "epoch": 0.8, "learning_rate": 7.333333333333333e-05, "loss": 0.189, "step": 2500 }, { "epoch": 0.8, "eval_loss": 11.277334213256836, "eval_meteor": 0.0, "eval_runtime": 13.0206, "eval_samples_per_second": 7.68, "eval_steps_per_second": 0.538, "step": 2500 }, { "epoch": 0.96, "learning_rate": 6.800000000000001e-05, "loss": 0.1831, "step": 3000 }, { "epoch": 0.96, "eval_loss": 12.441211700439453, "eval_meteor": 0.0, "eval_runtime": 13.1556, "eval_samples_per_second": 7.601, "eval_steps_per_second": 0.532, "step": 3000 }, { "epoch": 1.12, "learning_rate": 6.266666666666667e-05, "loss": 0.1679, "step": 3500 }, { "epoch": 1.12, "eval_loss": 11.94626235961914, "eval_meteor": 0.0, "eval_runtime": 13.0053, "eval_samples_per_second": 7.689, "eval_steps_per_second": 0.538, "step": 3500 }, { "epoch": 1.28, "learning_rate": 5.7333333333333336e-05, "loss": 0.1653, "step": 4000 }, { "epoch": 1.28, "eval_loss": 11.814840316772461, "eval_meteor": 0.0, "eval_runtime": 13.0463, "eval_samples_per_second": 7.665, "eval_steps_per_second": 0.537, "step": 4000 }, { "epoch": 1.44, "learning_rate": 5.2000000000000004e-05, "loss": 0.1644, "step": 4500 }, { "epoch": 1.44, "eval_loss": 12.166290283203125, "eval_meteor": 0.0, "eval_runtime": 13.0378, "eval_samples_per_second": 7.67, "eval_steps_per_second": 0.537, "step": 4500 }, { "epoch": 1.6, "learning_rate": 4.666666666666667e-05, "loss": 0.1642, "step": 5000 }, { "epoch": 1.6, "eval_loss": 11.968828201293945, "eval_meteor": 0.0, "eval_runtime": 12.9985, "eval_samples_per_second": 7.693, "eval_steps_per_second": 0.539, "step": 5000 }, { "epoch": 1.76, "learning_rate": 4.133333333333333e-05, "loss": 0.1641, "step": 5500 }, { "epoch": 1.76, "eval_loss": 12.146567344665527, "eval_meteor": 0.0, "eval_runtime": 13.0609, "eval_samples_per_second": 7.656, "eval_steps_per_second": 0.536, "step": 5500 }, { "epoch": 1.92, "learning_rate": 3.6e-05, "loss": 0.1633, "step": 6000 }, { "epoch": 1.92, "eval_loss": 12.101709365844727, "eval_meteor": 0.0, "eval_runtime": 13.0856, "eval_samples_per_second": 7.642, "eval_steps_per_second": 0.535, "step": 6000 }, { "epoch": 2.08, "learning_rate": 3.066666666666667e-05, "loss": 0.1525, "step": 6500 }, { "epoch": 2.08, "eval_loss": 12.363394737243652, "eval_meteor": 0.0, "eval_runtime": 13.5503, "eval_samples_per_second": 7.38, "eval_steps_per_second": 0.517, "step": 6500 }, { "epoch": 2.24, "learning_rate": 2.5333333333333337e-05, "loss": 0.1434, "step": 7000 }, { "epoch": 2.24, "eval_loss": 12.543970108032227, "eval_meteor": 0.0, "eval_runtime": 13.9628, "eval_samples_per_second": 7.162, "eval_steps_per_second": 0.501, "step": 7000 }, { "epoch": 2.4, "learning_rate": 2e-05, "loss": 0.1456, "step": 7500 }, { "epoch": 2.4, "eval_loss": 12.349944114685059, "eval_meteor": 0.0, "eval_runtime": 13.1423, "eval_samples_per_second": 7.609, "eval_steps_per_second": 0.533, "step": 7500 }, { "epoch": 2.56, "learning_rate": 1.4666666666666668e-05, "loss": 0.1437, "step": 8000 }, { "epoch": 2.56, "eval_loss": 12.369497299194336, "eval_meteor": 0.0, "eval_runtime": 13.3304, "eval_samples_per_second": 7.502, "eval_steps_per_second": 0.525, "step": 8000 }, { "epoch": 2.72, "learning_rate": 9.333333333333334e-06, "loss": 0.1414, "step": 8500 }, { "epoch": 2.72, "eval_loss": 12.623024940490723, "eval_meteor": 0.0, "eval_runtime": 13.0594, "eval_samples_per_second": 7.657, "eval_steps_per_second": 0.536, "step": 8500 }, { "epoch": 2.88, "learning_rate": 4.000000000000001e-06, "loss": 0.1443, "step": 9000 }, { "epoch": 2.88, "eval_loss": 12.473442077636719, "eval_meteor": 0.0, "eval_runtime": 13.0574, "eval_samples_per_second": 7.659, "eval_steps_per_second": 0.536, "step": 9000 } ], "max_steps": 9375, "num_train_epochs": 3, "total_flos": 3.11732169867264e+17, "trial_name": null, "trial_params": null }