| { | |
| "best_metric": 0.1847076267004013, | |
| "best_model_checkpoint": "./models/results_one_liners_23/checkpoint-325", | |
| "epoch": 0.9374436632413917, | |
| "global_step": 325, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.694, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.6942, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.6865, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.4908517350157729, | |
| "eval_f1": 0.6584849767245028, | |
| "eval_loss": 0.6858492493629456, | |
| "eval_precision": 0.4908517350157729, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 15.5866, | |
| "eval_samples_per_second": 203.38, | |
| "eval_steps_per_second": 25.471, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.6792, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6488, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.4778, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.8091482649842271, | |
| "eval_f1": 0.8317107093184979, | |
| "eval_loss": 0.4457879066467285, | |
| "eval_precision": 0.73320255026974, | |
| "eval_recall": 0.9607969151670951, | |
| "eval_runtime": 15.5131, | |
| "eval_samples_per_second": 204.343, | |
| "eval_steps_per_second": 25.591, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 0.44, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.3451, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.3646, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.8700315457413249, | |
| "eval_f1": 0.8557422969187675, | |
| "eval_loss": 0.30672192573547363, | |
| "eval_precision": 0.94, | |
| "eval_recall": 0.7853470437017995, | |
| "eval_runtime": 15.2999, | |
| "eval_samples_per_second": 207.191, | |
| "eval_steps_per_second": 25.948, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4e-05, | |
| "loss": 0.3381, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.2767, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.3318, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.9009463722397476, | |
| "eval_f1": 0.9018750000000001, | |
| "eval_loss": 0.2531206011772156, | |
| "eval_precision": 0.8777372262773723, | |
| "eval_recall": 0.9273778920308483, | |
| "eval_runtime": 15.1416, | |
| "eval_samples_per_second": 209.357, | |
| "eval_steps_per_second": 26.219, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.9390243902439024e-05, | |
| "loss": 0.2726, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.776422764227643e-05, | |
| "loss": 0.3775, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.613821138211382e-05, | |
| "loss": 0.4153, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.898422712933754, | |
| "eval_f1": 0.9005558987029031, | |
| "eval_loss": 0.258443683385849, | |
| "eval_precision": 0.8668252080856124, | |
| "eval_recall": 0.9370179948586118, | |
| "eval_runtime": 15.1593, | |
| "eval_samples_per_second": 209.112, | |
| "eval_steps_per_second": 26.189, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.451219512195122e-05, | |
| "loss": 0.3625, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.2886178861788616e-05, | |
| "loss": 0.3033, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.126016260162602e-05, | |
| "loss": 0.3392, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.91198738170347, | |
| "eval_f1": 0.9094449853943525, | |
| "eval_loss": 0.22280997037887573, | |
| "eval_precision": 0.9186885245901639, | |
| "eval_recall": 0.9003856041131105, | |
| "eval_runtime": 14.7479, | |
| "eval_samples_per_second": 214.945, | |
| "eval_steps_per_second": 26.919, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.9634146341463416e-05, | |
| "loss": 0.2677, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.800813008130081e-05, | |
| "loss": 0.2286, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.6382113821138216e-05, | |
| "loss": 0.284, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.8958990536277602, | |
| "eval_f1": 0.9009603841536614, | |
| "eval_loss": 0.2512595057487488, | |
| "eval_precision": 0.8451576576576577, | |
| "eval_recall": 0.9646529562982005, | |
| "eval_runtime": 14.9616, | |
| "eval_samples_per_second": 211.876, | |
| "eval_steps_per_second": 26.535, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 3.475609756097561e-05, | |
| "loss": 0.2726, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 3.313008130081301e-05, | |
| "loss": 0.2476, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 3.150406504065041e-05, | |
| "loss": 0.2403, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.9878048780487805e-05, | |
| "loss": 0.1909, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.9148264984227129, | |
| "eval_f1": 0.9126778783958602, | |
| "eval_loss": 0.2199183851480484, | |
| "eval_precision": 0.9186197916666666, | |
| "eval_recall": 0.9068123393316195, | |
| "eval_runtime": 15.3053, | |
| "eval_samples_per_second": 207.118, | |
| "eval_steps_per_second": 25.939, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.8252032520325205e-05, | |
| "loss": 0.214, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.66260162601626e-05, | |
| "loss": 0.2807, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.2116, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.917981072555205, | |
| "eval_f1": 0.9144736842105263, | |
| "eval_loss": 0.2060399204492569, | |
| "eval_precision": 0.9366576819407008, | |
| "eval_recall": 0.8933161953727506, | |
| "eval_runtime": 15.179, | |
| "eval_samples_per_second": 208.841, | |
| "eval_steps_per_second": 26.155, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.3373983739837398e-05, | |
| "loss": 0.2214, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.1747967479674798e-05, | |
| "loss": 0.2164, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.0121951219512197e-05, | |
| "loss": 0.2329, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.9097791798107255, | |
| "eval_f1": 0.9134382566585957, | |
| "eval_loss": 0.2206372767686844, | |
| "eval_precision": 0.8632723112128147, | |
| "eval_recall": 0.9697943444730077, | |
| "eval_runtime": 14.8474, | |
| "eval_samples_per_second": 213.506, | |
| "eval_steps_per_second": 26.739, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.8495934959349594e-05, | |
| "loss": 0.1998, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.6869918699186994e-05, | |
| "loss": 0.2386, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.524390243902439e-05, | |
| "loss": 0.1922, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.9287066246056782, | |
| "eval_f1": 0.9282083862770013, | |
| "eval_loss": 0.19086162745952606, | |
| "eval_precision": 0.917713567839196, | |
| "eval_recall": 0.9389460154241646, | |
| "eval_runtime": 14.7603, | |
| "eval_samples_per_second": 214.766, | |
| "eval_steps_per_second": 26.896, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.3617886178861788e-05, | |
| "loss": 0.1785, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.1991869918699188e-05, | |
| "loss": 0.2006, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.0365853658536585e-05, | |
| "loss": 0.2012, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.9160883280757098, | |
| "eval_f1": 0.9183046683046682, | |
| "eval_loss": 0.21321891248226166, | |
| "eval_precision": 0.8794117647058823, | |
| "eval_recall": 0.9607969151670951, | |
| "eval_runtime": 14.8364, | |
| "eval_samples_per_second": 213.663, | |
| "eval_steps_per_second": 26.758, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.739837398373985e-06, | |
| "loss": 0.205, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 7.113821138211382e-06, | |
| "loss": 0.1995, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 5.487804878048781e-06, | |
| "loss": 0.1804, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.9264984227129337, | |
| "eval_f1": 0.9260552205649001, | |
| "eval_loss": 0.1847076267004013, | |
| "eval_precision": 0.9147335423197492, | |
| "eval_recall": 0.9376606683804627, | |
| "eval_runtime": 15.3041, | |
| "eval_samples_per_second": 207.135, | |
| "eval_steps_per_second": 25.941, | |
| "step": 325 | |
| } | |
| ], | |
| "max_steps": 346, | |
| "num_train_epochs": 1, | |
| "total_flos": 619955424192000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |