{ "best_metric": 0.15142391622066498, "best_model_checkpoint": "./outputs/50_50/checkpoint-468", "epoch": 10.0, "eval_steps": 500, "global_step": 4680, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.943939393939394, "eval_loss": 0.15142391622066498, "eval_runtime": 2.5713, "eval_samples_per_second": 256.684, "eval_steps_per_second": 32.28, "step": 468 }, { "epoch": 1.07, "learning_rate": 1.7863247863247866e-05, "loss": 0.2863, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.9303030303030303, "eval_loss": 0.1917150467634201, "eval_runtime": 2.7645, "eval_samples_per_second": 238.743, "eval_steps_per_second": 30.024, "step": 936 }, { "epoch": 2.14, "learning_rate": 1.5726495726495726e-05, "loss": 0.2377, "step": 1000 }, { "epoch": 3.0, "eval_accuracy": 0.9333333333333333, "eval_loss": 0.17250454425811768, "eval_runtime": 2.6638, "eval_samples_per_second": 247.77, "eval_steps_per_second": 31.159, "step": 1404 }, { "epoch": 3.21, "learning_rate": 1.3589743589743592e-05, "loss": 0.2142, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.9287878787878788, "eval_loss": 0.17816253006458282, "eval_runtime": 2.5386, "eval_samples_per_second": 259.984, "eval_steps_per_second": 32.695, "step": 1872 }, { "epoch": 4.27, "learning_rate": 1.1452991452991454e-05, "loss": 0.2058, "step": 2000 }, { "epoch": 5.0, "eval_accuracy": 0.9272727272727272, "eval_loss": 0.17876102030277252, "eval_runtime": 2.5719, "eval_samples_per_second": 256.62, "eval_steps_per_second": 32.272, "step": 2340 }, { "epoch": 5.34, "learning_rate": 9.316239316239318e-06, "loss": 0.1899, "step": 2500 }, { "epoch": 6.0, "eval_accuracy": 0.9318181818181818, "eval_loss": 0.18244825303554535, "eval_runtime": 2.5621, "eval_samples_per_second": 257.6, "eval_steps_per_second": 32.395, "step": 2808 }, { "epoch": 6.41, "learning_rate": 7.17948717948718e-06, "loss": 0.1838, "step": 3000 }, { "epoch": 7.0, "eval_accuracy": 0.9333333333333333, "eval_loss": 0.1878737211227417, "eval_runtime": 2.7257, "eval_samples_per_second": 242.138, "eval_steps_per_second": 30.451, "step": 3276 }, { "epoch": 7.48, "learning_rate": 5.042735042735043e-06, "loss": 0.1757, "step": 3500 }, { "epoch": 8.0, "eval_accuracy": 0.9333333333333333, "eval_loss": 0.23907029628753662, "eval_runtime": 2.5302, "eval_samples_per_second": 260.853, "eval_steps_per_second": 32.804, "step": 3744 }, { "epoch": 8.55, "learning_rate": 2.9059829059829063e-06, "loss": 0.1852, "step": 4000 }, { "epoch": 9.0, "eval_accuracy": 0.9409090909090909, "eval_loss": 0.17251791059970856, "eval_runtime": 2.7072, "eval_samples_per_second": 243.796, "eval_steps_per_second": 30.659, "step": 4212 }, { "epoch": 9.62, "learning_rate": 7.692307692307694e-07, "loss": 0.1634, "step": 4500 }, { "epoch": 10.0, "eval_accuracy": 0.9393939393939394, "eval_loss": 0.17622749507427216, "eval_runtime": 2.5223, "eval_samples_per_second": 261.67, "eval_steps_per_second": 32.907, "step": 4680 }, { "epoch": 10.0, "step": 4680, "total_flos": 2.898200411585741e+18, "train_loss": 0.20278241247193426, "train_runtime": 515.941, "train_samples_per_second": 72.489, "train_steps_per_second": 9.071 } ], "logging_steps": 500, "max_steps": 4680, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2.898200411585741e+18, "trial_name": null, "trial_params": null }