{ "best_metric": 0.1847076267004013, "best_model_checkpoint": "./models/results_one_liners_23/checkpoint-325", "epoch": 0.9374436632413917, "global_step": 325, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.000000000000001e-06, "loss": 0.694, "step": 8 }, { "epoch": 0.05, "learning_rate": 8.000000000000001e-06, "loss": 0.6942, "step": 16 }, { "epoch": 0.07, "learning_rate": 1.2e-05, "loss": 0.6865, "step": 24 }, { "epoch": 0.07, "eval_accuracy": 0.4908517350157729, "eval_f1": 0.6584849767245028, "eval_loss": 0.6858492493629456, "eval_precision": 0.4908517350157729, "eval_recall": 1.0, "eval_runtime": 15.5866, "eval_samples_per_second": 203.38, "eval_steps_per_second": 25.471, "step": 25 }, { "epoch": 0.09, "learning_rate": 1.6000000000000003e-05, "loss": 0.6792, "step": 32 }, { "epoch": 0.12, "learning_rate": 2e-05, "loss": 0.6488, "step": 40 }, { "epoch": 0.14, "learning_rate": 2.4e-05, "loss": 0.4778, "step": 48 }, { "epoch": 0.14, "eval_accuracy": 0.8091482649842271, "eval_f1": 0.8317107093184979, "eval_loss": 0.4457879066467285, "eval_precision": 0.73320255026974, "eval_recall": 0.9607969151670951, "eval_runtime": 15.5131, "eval_samples_per_second": 204.343, "eval_steps_per_second": 25.591, "step": 50 }, { "epoch": 0.16, "learning_rate": 2.8000000000000003e-05, "loss": 0.44, "step": 56 }, { "epoch": 0.18, "learning_rate": 3.2000000000000005e-05, "loss": 0.3451, "step": 64 }, { "epoch": 0.21, "learning_rate": 3.6e-05, "loss": 0.3646, "step": 72 }, { "epoch": 0.22, "eval_accuracy": 0.8700315457413249, "eval_f1": 0.8557422969187675, "eval_loss": 0.30672192573547363, "eval_precision": 0.94, "eval_recall": 0.7853470437017995, "eval_runtime": 15.2999, "eval_samples_per_second": 207.191, "eval_steps_per_second": 25.948, "step": 75 }, { "epoch": 0.23, "learning_rate": 4e-05, "loss": 0.3381, "step": 80 }, { "epoch": 0.25, "learning_rate": 4.4000000000000006e-05, "loss": 0.2767, "step": 88 }, { "epoch": 0.28, "learning_rate": 4.75e-05, "loss": 0.3318, "step": 96 }, { "epoch": 0.29, "eval_accuracy": 0.9009463722397476, "eval_f1": 0.9018750000000001, "eval_loss": 0.2531206011772156, "eval_precision": 0.8777372262773723, "eval_recall": 0.9273778920308483, "eval_runtime": 15.1416, "eval_samples_per_second": 209.357, "eval_steps_per_second": 26.219, "step": 100 }, { "epoch": 0.3, "learning_rate": 4.9390243902439024e-05, "loss": 0.2726, "step": 104 }, { "epoch": 0.32, "learning_rate": 4.776422764227643e-05, "loss": 0.3775, "step": 112 }, { "epoch": 0.35, "learning_rate": 4.613821138211382e-05, "loss": 0.4153, "step": 120 }, { "epoch": 0.36, "eval_accuracy": 0.898422712933754, "eval_f1": 0.9005558987029031, "eval_loss": 0.258443683385849, "eval_precision": 0.8668252080856124, "eval_recall": 0.9370179948586118, "eval_runtime": 15.1593, "eval_samples_per_second": 209.112, "eval_steps_per_second": 26.189, "step": 125 }, { "epoch": 0.37, "learning_rate": 4.451219512195122e-05, "loss": 0.3625, "step": 128 }, { "epoch": 0.39, "learning_rate": 4.2886178861788616e-05, "loss": 0.3033, "step": 136 }, { "epoch": 0.42, "learning_rate": 4.126016260162602e-05, "loss": 0.3392, "step": 144 }, { "epoch": 0.43, "eval_accuracy": 0.91198738170347, "eval_f1": 0.9094449853943525, "eval_loss": 0.22280997037887573, "eval_precision": 0.9186885245901639, "eval_recall": 0.9003856041131105, "eval_runtime": 14.7479, "eval_samples_per_second": 214.945, "eval_steps_per_second": 26.919, "step": 150 }, { "epoch": 0.44, "learning_rate": 3.9634146341463416e-05, "loss": 0.2677, "step": 152 }, { "epoch": 0.46, "learning_rate": 3.800813008130081e-05, "loss": 0.2286, "step": 160 }, { "epoch": 0.48, "learning_rate": 3.6382113821138216e-05, "loss": 0.284, "step": 168 }, { "epoch": 0.5, "eval_accuracy": 0.8958990536277602, "eval_f1": 0.9009603841536614, "eval_loss": 0.2512595057487488, "eval_precision": 0.8451576576576577, "eval_recall": 0.9646529562982005, "eval_runtime": 14.9616, "eval_samples_per_second": 211.876, "eval_steps_per_second": 26.535, "step": 175 }, { "epoch": 0.51, "learning_rate": 3.475609756097561e-05, "loss": 0.2726, "step": 176 }, { "epoch": 0.53, "learning_rate": 3.313008130081301e-05, "loss": 0.2476, "step": 184 }, { "epoch": 0.55, "learning_rate": 3.150406504065041e-05, "loss": 0.2403, "step": 192 }, { "epoch": 0.58, "learning_rate": 2.9878048780487805e-05, "loss": 0.1909, "step": 200 }, { "epoch": 0.58, "eval_accuracy": 0.9148264984227129, "eval_f1": 0.9126778783958602, "eval_loss": 0.2199183851480484, "eval_precision": 0.9186197916666666, "eval_recall": 0.9068123393316195, "eval_runtime": 15.3053, "eval_samples_per_second": 207.118, "eval_steps_per_second": 25.939, "step": 200 }, { "epoch": 0.6, "learning_rate": 2.8252032520325205e-05, "loss": 0.214, "step": 208 }, { "epoch": 0.62, "learning_rate": 2.66260162601626e-05, "loss": 0.2807, "step": 216 }, { "epoch": 0.65, "learning_rate": 2.5e-05, "loss": 0.2116, "step": 224 }, { "epoch": 0.65, "eval_accuracy": 0.917981072555205, "eval_f1": 0.9144736842105263, "eval_loss": 0.2060399204492569, "eval_precision": 0.9366576819407008, "eval_recall": 0.8933161953727506, "eval_runtime": 15.179, "eval_samples_per_second": 208.841, "eval_steps_per_second": 26.155, "step": 225 }, { "epoch": 0.67, "learning_rate": 2.3373983739837398e-05, "loss": 0.2214, "step": 232 }, { "epoch": 0.69, "learning_rate": 2.1747967479674798e-05, "loss": 0.2164, "step": 240 }, { "epoch": 0.72, "learning_rate": 2.0121951219512197e-05, "loss": 0.2329, "step": 248 }, { "epoch": 0.72, "eval_accuracy": 0.9097791798107255, "eval_f1": 0.9134382566585957, "eval_loss": 0.2206372767686844, "eval_precision": 0.8632723112128147, "eval_recall": 0.9697943444730077, "eval_runtime": 14.8474, "eval_samples_per_second": 213.506, "eval_steps_per_second": 26.739, "step": 250 }, { "epoch": 0.74, "learning_rate": 1.8495934959349594e-05, "loss": 0.1998, "step": 256 }, { "epoch": 0.76, "learning_rate": 1.6869918699186994e-05, "loss": 0.2386, "step": 264 }, { "epoch": 0.78, "learning_rate": 1.524390243902439e-05, "loss": 0.1922, "step": 272 }, { "epoch": 0.79, "eval_accuracy": 0.9287066246056782, "eval_f1": 0.9282083862770013, "eval_loss": 0.19086162745952606, "eval_precision": 0.917713567839196, "eval_recall": 0.9389460154241646, "eval_runtime": 14.7603, "eval_samples_per_second": 214.766, "eval_steps_per_second": 26.896, "step": 275 }, { "epoch": 0.81, "learning_rate": 1.3617886178861788e-05, "loss": 0.1785, "step": 280 }, { "epoch": 0.83, "learning_rate": 1.1991869918699188e-05, "loss": 0.2006, "step": 288 }, { "epoch": 0.85, "learning_rate": 1.0365853658536585e-05, "loss": 0.2012, "step": 296 }, { "epoch": 0.87, "eval_accuracy": 0.9160883280757098, "eval_f1": 0.9183046683046682, "eval_loss": 0.21321891248226166, "eval_precision": 0.8794117647058823, "eval_recall": 0.9607969151670951, "eval_runtime": 14.8364, "eval_samples_per_second": 213.663, "eval_steps_per_second": 26.758, "step": 300 }, { "epoch": 0.88, "learning_rate": 8.739837398373985e-06, "loss": 0.205, "step": 304 }, { "epoch": 0.9, "learning_rate": 7.113821138211382e-06, "loss": 0.1995, "step": 312 }, { "epoch": 0.92, "learning_rate": 5.487804878048781e-06, "loss": 0.1804, "step": 320 }, { "epoch": 0.94, "eval_accuracy": 0.9264984227129337, "eval_f1": 0.9260552205649001, "eval_loss": 0.1847076267004013, "eval_precision": 0.9147335423197492, "eval_recall": 0.9376606683804627, "eval_runtime": 15.3041, "eval_samples_per_second": 207.135, "eval_steps_per_second": 25.941, "step": 325 } ], "max_steps": 346, "num_train_epochs": 1, "total_flos": 619955424192000.0, "trial_name": null, "trial_params": null }