{ "best_metric": 0.1858806610107422, "best_model_checkpoint": "./models/results_one_liners_977/checkpoint-325", "epoch": 0.9374436632413917, "global_step": 325, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.000000000000001e-06, "loss": 0.6981, "step": 8 }, { "epoch": 0.05, "learning_rate": 8.000000000000001e-06, "loss": 0.6942, "step": 16 }, { "epoch": 0.07, "learning_rate": 1.2e-05, "loss": 0.6922, "step": 24 }, { "epoch": 0.07, "eval_accuracy": 0.5154574132492113, "eval_f1": 0.6695352839931153, "eval_loss": 0.6819674968719482, "eval_precision": 0.5032341526520052, "eval_recall": 1.0, "eval_runtime": 20.0554, "eval_samples_per_second": 158.062, "eval_steps_per_second": 19.795, "step": 25 }, { "epoch": 0.09, "learning_rate": 1.6000000000000003e-05, "loss": 0.6758, "step": 32 }, { "epoch": 0.12, "learning_rate": 2e-05, "loss": 0.6063, "step": 40 }, { "epoch": 0.14, "learning_rate": 2.4e-05, "loss": 0.4676, "step": 48 }, { "epoch": 0.14, "eval_accuracy": 0.8643533123028391, "eval_f1": 0.8701690821256038, "eval_loss": 0.32924458384513855, "eval_precision": 0.820615034168565, "eval_recall": 0.9260925449871465, "eval_runtime": 19.9782, "eval_samples_per_second": 158.673, "eval_steps_per_second": 19.872, "step": 50 }, { "epoch": 0.16, "learning_rate": 2.8000000000000003e-05, "loss": 0.3596, "step": 56 }, { "epoch": 0.18, "learning_rate": 3.2000000000000005e-05, "loss": 0.3063, "step": 64 }, { "epoch": 0.21, "learning_rate": 3.6e-05, "loss": 0.3641, "step": 72 }, { "epoch": 0.22, "eval_accuracy": 0.886435331230284, "eval_f1": 0.8778833107191316, "eval_loss": 0.28779253363609314, "eval_precision": 0.9295977011494253, "eval_recall": 0.8316195372750642, "eval_runtime": 20.0332, "eval_samples_per_second": 158.237, "eval_steps_per_second": 19.817, "step": 75 }, { "epoch": 0.23, "learning_rate": 4e-05, "loss": 0.3659, "step": 80 }, { "epoch": 0.25, "learning_rate": 4.4000000000000006e-05, "loss": 0.256, "step": 88 }, { "epoch": 0.28, "learning_rate": 4.8e-05, "loss": 0.2936, "step": 96 }, { "epoch": 0.29, "eval_accuracy": 0.8936908517350157, "eval_f1": 0.8964669738863287, "eval_loss": 0.26408979296684265, "eval_precision": 0.8587404355503238, "eval_recall": 0.9376606683804627, "eval_runtime": 19.9923, "eval_samples_per_second": 158.561, "eval_steps_per_second": 19.858, "step": 100 }, { "epoch": 0.3, "learning_rate": 4.9186991869918704e-05, "loss": 0.3169, "step": 104 }, { "epoch": 0.32, "learning_rate": 4.75609756097561e-05, "loss": 0.2808, "step": 112 }, { "epoch": 0.35, "learning_rate": 4.59349593495935e-05, "loss": 0.2615, "step": 120 }, { "epoch": 0.36, "eval_accuracy": 0.9066246056782334, "eval_f1": 0.9083591331269351, "eval_loss": 0.2332078069448471, "eval_precision": 0.8763440860215054, "eval_recall": 0.9428020565552699, "eval_runtime": 20.0114, "eval_samples_per_second": 158.41, "eval_steps_per_second": 19.839, "step": 125 }, { "epoch": 0.37, "learning_rate": 4.43089430894309e-05, "loss": 0.2801, "step": 128 }, { "epoch": 0.39, "learning_rate": 4.26829268292683e-05, "loss": 0.2454, "step": 136 }, { "epoch": 0.42, "learning_rate": 4.105691056910569e-05, "loss": 0.2407, "step": 144 }, { "epoch": 0.43, "eval_accuracy": 0.8965299684542587, "eval_f1": 0.9015606242496998, "eval_loss": 0.25834545493125916, "eval_precision": 0.8457207207207207, "eval_recall": 0.9652956298200515, "eval_runtime": 19.9859, "eval_samples_per_second": 158.611, "eval_steps_per_second": 19.864, "step": 150 }, { "epoch": 0.44, "learning_rate": 3.943089430894309e-05, "loss": 0.2835, "step": 152 }, { "epoch": 0.46, "learning_rate": 3.780487804878049e-05, "loss": 0.2684, "step": 160 }, { "epoch": 0.48, "learning_rate": 3.617886178861789e-05, "loss": 0.2493, "step": 168 }, { "epoch": 0.5, "eval_accuracy": 0.9018927444794953, "eval_f1": 0.895180316818335, "eval_loss": 0.25140437483787537, "eval_precision": 0.9411764705882353, "eval_recall": 0.8534704370179949, "eval_runtime": 19.9806, "eval_samples_per_second": 158.654, "eval_steps_per_second": 19.869, "step": 175 }, { "epoch": 0.51, "learning_rate": 3.4552845528455286e-05, "loss": 0.2957, "step": 176 }, { "epoch": 0.53, "learning_rate": 3.292682926829269e-05, "loss": 0.2412, "step": 184 }, { "epoch": 0.55, "learning_rate": 3.150406504065041e-05, "loss": 0.2066, "step": 192 }, { "epoch": 0.58, "learning_rate": 2.9878048780487805e-05, "loss": 0.2657, "step": 200 }, { "epoch": 0.58, "eval_accuracy": 0.9201892744479495, "eval_f1": 0.9199620373299588, "eval_loss": 0.2050713449716568, "eval_precision": 0.9059190031152647, "eval_recall": 0.9344473007712082, "eval_runtime": 20.0006, "eval_samples_per_second": 158.495, "eval_steps_per_second": 19.849, "step": 200 }, { "epoch": 0.6, "learning_rate": 2.8252032520325205e-05, "loss": 0.2702, "step": 208 }, { "epoch": 0.62, "learning_rate": 2.66260162601626e-05, "loss": 0.2435, "step": 216 }, { "epoch": 0.65, "learning_rate": 2.5e-05, "loss": 0.2281, "step": 224 }, { "epoch": 0.65, "eval_accuracy": 0.905993690851735, "eval_f1": 0.9101326899879373, "eval_loss": 0.23607869446277618, "eval_precision": 0.8573863636363637, "eval_recall": 0.9697943444730077, "eval_runtime": 19.9795, "eval_samples_per_second": 158.663, "eval_steps_per_second": 19.87, "step": 225 }, { "epoch": 0.67, "learning_rate": 2.3373983739837398e-05, "loss": 0.233, "step": 232 }, { "epoch": 0.69, "learning_rate": 2.1747967479674798e-05, "loss": 0.2294, "step": 240 }, { "epoch": 0.72, "learning_rate": 2.0121951219512197e-05, "loss": 0.1933, "step": 248 }, { "epoch": 0.72, "eval_accuracy": 0.9186119873817035, "eval_f1": 0.9201732673267328, "eval_loss": 0.206766277551651, "eval_precision": 0.8872315035799523, "eval_recall": 0.955655526992288, "eval_runtime": 19.9867, "eval_samples_per_second": 158.605, "eval_steps_per_second": 19.863, "step": 250 }, { "epoch": 0.74, "learning_rate": 1.8495934959349594e-05, "loss": 0.2417, "step": 256 }, { "epoch": 0.76, "learning_rate": 1.6869918699186994e-05, "loss": 0.213, "step": 264 }, { "epoch": 0.78, "learning_rate": 1.524390243902439e-05, "loss": 0.1522, "step": 272 }, { "epoch": 0.79, "eval_accuracy": 0.9277602523659306, "eval_f1": 0.9273707580082461, "eval_loss": 0.21532806754112244, "eval_precision": 0.915466499686913, "eval_recall": 0.9395886889460154, "eval_runtime": 20.029, "eval_samples_per_second": 158.27, "eval_steps_per_second": 19.821, "step": 275 }, { "epoch": 0.81, "learning_rate": 1.3617886178861788e-05, "loss": 0.2011, "step": 280 }, { "epoch": 0.83, "learning_rate": 1.1991869918699188e-05, "loss": 0.2349, "step": 288 }, { "epoch": 0.85, "learning_rate": 1.0365853658536585e-05, "loss": 0.2403, "step": 296 }, { "epoch": 0.87, "eval_accuracy": 0.9271293375394322, "eval_f1": 0.9257473481195757, "eval_loss": 0.1873028576374054, "eval_precision": 0.9260450160771704, "eval_recall": 0.9254498714652957, "eval_runtime": 19.9617, "eval_samples_per_second": 158.804, "eval_steps_per_second": 19.888, "step": 300 }, { "epoch": 0.88, "learning_rate": 8.739837398373985e-06, "loss": 0.1999, "step": 304 }, { "epoch": 0.9, "learning_rate": 7.113821138211382e-06, "loss": 0.1742, "step": 312 }, { "epoch": 0.92, "learning_rate": 5.487804878048781e-06, "loss": 0.1809, "step": 320 }, { "epoch": 0.94, "eval_accuracy": 0.9277602523659306, "eval_f1": 0.9287713841368586, "eval_loss": 0.1858806610107422, "eval_precision": 0.8999397227245328, "eval_recall": 0.9595115681233933, "eval_runtime": 20.0389, "eval_samples_per_second": 158.192, "eval_steps_per_second": 19.811, "step": 325 } ], "max_steps": 346, "num_train_epochs": 1, "total_flos": 619955424192000.0, "trial_name": null, "trial_params": null }