{ "best_metric": 0.1803661435842514, "best_model_checkpoint": "./models/results_one_liners_47/checkpoint-250", "epoch": 0.721110510185686, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.000000000000001e-06, "loss": 0.7024, "step": 8 }, { "epoch": 0.05, "learning_rate": 8.000000000000001e-06, "loss": 0.6948, "step": 16 }, { "epoch": 0.07, "learning_rate": 1.2e-05, "loss": 0.6917, "step": 24 }, { "epoch": 0.07, "eval_accuracy": 0.5394321766561514, "eval_f1": 0.6806649168853892, "eval_loss": 0.6824995279312134, "eval_precision": 0.5159151193633952, "eval_recall": 1.0, "eval_runtime": 14.6819, "eval_samples_per_second": 215.912, "eval_steps_per_second": 27.04, "step": 25 }, { "epoch": 0.09, "learning_rate": 1.6000000000000003e-05, "loss": 0.6767, "step": 32 }, { "epoch": 0.12, "learning_rate": 2e-05, "loss": 0.6376, "step": 40 }, { "epoch": 0.14, "learning_rate": 2.4e-05, "loss": 0.4677, "step": 48 }, { "epoch": 0.14, "eval_accuracy": 0.8384858044164037, "eval_f1": 0.8515942028985507, "eval_loss": 0.37244707345962524, "eval_precision": 0.7756071805702217, "eval_recall": 0.9440874035989717, "eval_runtime": 14.4869, "eval_samples_per_second": 218.818, "eval_steps_per_second": 27.404, "step": 50 }, { "epoch": 0.16, "learning_rate": 2.8000000000000003e-05, "loss": 0.4309, "step": 56 }, { "epoch": 0.18, "learning_rate": 3.2000000000000005e-05, "loss": 0.3928, "step": 64 }, { "epoch": 0.21, "learning_rate": 3.6e-05, "loss": 0.4007, "step": 72 }, { "epoch": 0.22, "eval_accuracy": 0.8406940063091483, "eval_f1": 0.8573043232551568, "eval_loss": 0.36749163269996643, "eval_precision": 0.7650025214321735, "eval_recall": 0.9749357326478149, "eval_runtime": 14.6324, "eval_samples_per_second": 216.643, "eval_steps_per_second": 27.132, "step": 75 }, { "epoch": 0.23, "learning_rate": 4e-05, "loss": 0.3881, "step": 80 }, { "epoch": 0.25, "learning_rate": 4.4000000000000006e-05, "loss": 0.3478, "step": 88 }, { "epoch": 0.28, "learning_rate": 4.8e-05, "loss": 0.3324, "step": 96 }, { "epoch": 0.29, "eval_accuracy": 0.9094637223974763, "eval_f1": 0.9070294784580499, "eval_loss": 0.2337632179260254, "eval_precision": 0.914435009797518, "eval_recall": 0.8997429305912596, "eval_runtime": 14.2433, "eval_samples_per_second": 222.561, "eval_steps_per_second": 27.873, "step": 100 }, { "epoch": 0.3, "learning_rate": 4.9186991869918704e-05, "loss": 0.2844, "step": 104 }, { "epoch": 0.32, "learning_rate": 4.75609756097561e-05, "loss": 0.3181, "step": 112 }, { "epoch": 0.35, "learning_rate": 4.59349593495935e-05, "loss": 0.3595, "step": 120 }, { "epoch": 0.36, "eval_accuracy": 0.8353312302839117, "eval_f1": 0.8033157498116051, "eval_loss": 0.3661021888256073, "eval_precision": 0.970856102003643, "eval_recall": 0.6850899742930592, "eval_runtime": 15.0232, "eval_samples_per_second": 211.007, "eval_steps_per_second": 26.426, "step": 125 }, { "epoch": 0.37, "learning_rate": 4.451219512195122e-05, "loss": 0.3999, "step": 128 }, { "epoch": 0.39, "learning_rate": 4.2886178861788616e-05, "loss": 0.2809, "step": 136 }, { "epoch": 0.42, "learning_rate": 4.126016260162602e-05, "loss": 0.2538, "step": 144 }, { "epoch": 0.43, "eval_accuracy": 0.8905362776025236, "eval_f1": 0.8962630792227205, "eval_loss": 0.2805730998516083, "eval_precision": 0.8378982671883733, "eval_recall": 0.9633676092544987, "eval_runtime": 15.3124, "eval_samples_per_second": 207.021, "eval_steps_per_second": 25.927, "step": 150 }, { "epoch": 0.44, "learning_rate": 3.9634146341463416e-05, "loss": 0.2226, "step": 152 }, { "epoch": 0.46, "learning_rate": 3.800813008130081e-05, "loss": 0.2722, "step": 160 }, { "epoch": 0.48, "learning_rate": 3.6382113821138216e-05, "loss": 0.2625, "step": 168 }, { "epoch": 0.5, "eval_accuracy": 0.9123028391167193, "eval_f1": 0.9135572139303482, "eval_loss": 0.2242085486650467, "eval_precision": 0.8849397590361445, "eval_recall": 0.9440874035989717, "eval_runtime": 15.1268, "eval_samples_per_second": 209.561, "eval_steps_per_second": 26.245, "step": 175 }, { "epoch": 0.51, "learning_rate": 3.475609756097561e-05, "loss": 0.2097, "step": 176 }, { "epoch": 0.53, "learning_rate": 3.313008130081301e-05, "loss": 0.2623, "step": 184 }, { "epoch": 0.55, "learning_rate": 3.150406504065041e-05, "loss": 0.2493, "step": 192 }, { "epoch": 0.58, "learning_rate": 2.9878048780487805e-05, "loss": 0.2118, "step": 200 }, { "epoch": 0.58, "eval_accuracy": 0.9242902208201893, "eval_f1": 0.9222797927461142, "eval_loss": 0.19594888389110565, "eval_precision": 0.9295039164490861, "eval_recall": 0.9151670951156813, "eval_runtime": 15.3657, "eval_samples_per_second": 206.304, "eval_steps_per_second": 25.837, "step": 200 }, { "epoch": 0.6, "learning_rate": 2.8252032520325205e-05, "loss": 0.267, "step": 208 }, { "epoch": 0.62, "learning_rate": 2.66260162601626e-05, "loss": 0.2188, "step": 216 }, { "epoch": 0.65, "learning_rate": 2.5e-05, "loss": 0.2165, "step": 224 }, { "epoch": 0.65, "eval_accuracy": 0.9293375394321767, "eval_f1": 0.9271307742355238, "eval_loss": 0.191427543759346, "eval_precision": 0.9387351778656127, "eval_recall": 0.9158097686375322, "eval_runtime": 14.3922, "eval_samples_per_second": 220.258, "eval_steps_per_second": 27.584, "step": 225 }, { "epoch": 0.67, "learning_rate": 2.3373983739837398e-05, "loss": 0.1911, "step": 232 }, { "epoch": 0.69, "learning_rate": 2.1747967479674798e-05, "loss": 0.2288, "step": 240 }, { "epoch": 0.72, "learning_rate": 2.0121951219512197e-05, "loss": 0.2024, "step": 248 }, { "epoch": 0.72, "eval_accuracy": 0.9296529968454259, "eval_f1": 0.9278550630863798, "eval_loss": 0.1803661435842514, "eval_precision": 0.9342019543973942, "eval_recall": 0.9215938303341902, "eval_runtime": 14.3698, "eval_samples_per_second": 220.602, "eval_steps_per_second": 27.627, "step": 250 } ], "max_steps": 346, "num_train_epochs": 1, "total_flos": 476888787840000.0, "trial_name": null, "trial_params": null }