| { | |
| "best_metric": 0.1858806610107422, | |
| "best_model_checkpoint": "./models/results_one_liners_977/checkpoint-325", | |
| "epoch": 0.9374436632413917, | |
| "global_step": 325, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.6981, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.6942, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.6922, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.5154574132492113, | |
| "eval_f1": 0.6695352839931153, | |
| "eval_loss": 0.6819674968719482, | |
| "eval_precision": 0.5032341526520052, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 20.0554, | |
| "eval_samples_per_second": 158.062, | |
| "eval_steps_per_second": 19.795, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.6758, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6063, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.4676, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.8643533123028391, | |
| "eval_f1": 0.8701690821256038, | |
| "eval_loss": 0.32924458384513855, | |
| "eval_precision": 0.820615034168565, | |
| "eval_recall": 0.9260925449871465, | |
| "eval_runtime": 19.9782, | |
| "eval_samples_per_second": 158.673, | |
| "eval_steps_per_second": 19.872, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 0.3596, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.3063, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.3641, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.886435331230284, | |
| "eval_f1": 0.8778833107191316, | |
| "eval_loss": 0.28779253363609314, | |
| "eval_precision": 0.9295977011494253, | |
| "eval_recall": 0.8316195372750642, | |
| "eval_runtime": 20.0332, | |
| "eval_samples_per_second": 158.237, | |
| "eval_steps_per_second": 19.817, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4e-05, | |
| "loss": 0.3659, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.256, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.2936, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.8936908517350157, | |
| "eval_f1": 0.8964669738863287, | |
| "eval_loss": 0.26408979296684265, | |
| "eval_precision": 0.8587404355503238, | |
| "eval_recall": 0.9376606683804627, | |
| "eval_runtime": 19.9923, | |
| "eval_samples_per_second": 158.561, | |
| "eval_steps_per_second": 19.858, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.9186991869918704e-05, | |
| "loss": 0.3169, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.75609756097561e-05, | |
| "loss": 0.2808, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.59349593495935e-05, | |
| "loss": 0.2615, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.9066246056782334, | |
| "eval_f1": 0.9083591331269351, | |
| "eval_loss": 0.2332078069448471, | |
| "eval_precision": 0.8763440860215054, | |
| "eval_recall": 0.9428020565552699, | |
| "eval_runtime": 20.0114, | |
| "eval_samples_per_second": 158.41, | |
| "eval_steps_per_second": 19.839, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.43089430894309e-05, | |
| "loss": 0.2801, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.26829268292683e-05, | |
| "loss": 0.2454, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.105691056910569e-05, | |
| "loss": 0.2407, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.8965299684542587, | |
| "eval_f1": 0.9015606242496998, | |
| "eval_loss": 0.25834545493125916, | |
| "eval_precision": 0.8457207207207207, | |
| "eval_recall": 0.9652956298200515, | |
| "eval_runtime": 19.9859, | |
| "eval_samples_per_second": 158.611, | |
| "eval_steps_per_second": 19.864, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.943089430894309e-05, | |
| "loss": 0.2835, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.780487804878049e-05, | |
| "loss": 0.2684, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.617886178861789e-05, | |
| "loss": 0.2493, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.9018927444794953, | |
| "eval_f1": 0.895180316818335, | |
| "eval_loss": 0.25140437483787537, | |
| "eval_precision": 0.9411764705882353, | |
| "eval_recall": 0.8534704370179949, | |
| "eval_runtime": 19.9806, | |
| "eval_samples_per_second": 158.654, | |
| "eval_steps_per_second": 19.869, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 3.4552845528455286e-05, | |
| "loss": 0.2957, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 3.292682926829269e-05, | |
| "loss": 0.2412, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 3.150406504065041e-05, | |
| "loss": 0.2066, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.9878048780487805e-05, | |
| "loss": 0.2657, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.9201892744479495, | |
| "eval_f1": 0.9199620373299588, | |
| "eval_loss": 0.2050713449716568, | |
| "eval_precision": 0.9059190031152647, | |
| "eval_recall": 0.9344473007712082, | |
| "eval_runtime": 20.0006, | |
| "eval_samples_per_second": 158.495, | |
| "eval_steps_per_second": 19.849, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.8252032520325205e-05, | |
| "loss": 0.2702, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.66260162601626e-05, | |
| "loss": 0.2435, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.2281, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.905993690851735, | |
| "eval_f1": 0.9101326899879373, | |
| "eval_loss": 0.23607869446277618, | |
| "eval_precision": 0.8573863636363637, | |
| "eval_recall": 0.9697943444730077, | |
| "eval_runtime": 19.9795, | |
| "eval_samples_per_second": 158.663, | |
| "eval_steps_per_second": 19.87, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.3373983739837398e-05, | |
| "loss": 0.233, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.1747967479674798e-05, | |
| "loss": 0.2294, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.0121951219512197e-05, | |
| "loss": 0.1933, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.9186119873817035, | |
| "eval_f1": 0.9201732673267328, | |
| "eval_loss": 0.206766277551651, | |
| "eval_precision": 0.8872315035799523, | |
| "eval_recall": 0.955655526992288, | |
| "eval_runtime": 19.9867, | |
| "eval_samples_per_second": 158.605, | |
| "eval_steps_per_second": 19.863, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.8495934959349594e-05, | |
| "loss": 0.2417, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.6869918699186994e-05, | |
| "loss": 0.213, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.524390243902439e-05, | |
| "loss": 0.1522, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.9277602523659306, | |
| "eval_f1": 0.9273707580082461, | |
| "eval_loss": 0.21532806754112244, | |
| "eval_precision": 0.915466499686913, | |
| "eval_recall": 0.9395886889460154, | |
| "eval_runtime": 20.029, | |
| "eval_samples_per_second": 158.27, | |
| "eval_steps_per_second": 19.821, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.3617886178861788e-05, | |
| "loss": 0.2011, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.1991869918699188e-05, | |
| "loss": 0.2349, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.0365853658536585e-05, | |
| "loss": 0.2403, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.9271293375394322, | |
| "eval_f1": 0.9257473481195757, | |
| "eval_loss": 0.1873028576374054, | |
| "eval_precision": 0.9260450160771704, | |
| "eval_recall": 0.9254498714652957, | |
| "eval_runtime": 19.9617, | |
| "eval_samples_per_second": 158.804, | |
| "eval_steps_per_second": 19.888, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.739837398373985e-06, | |
| "loss": 0.1999, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 7.113821138211382e-06, | |
| "loss": 0.1742, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 5.487804878048781e-06, | |
| "loss": 0.1809, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.9277602523659306, | |
| "eval_f1": 0.9287713841368586, | |
| "eval_loss": 0.1858806610107422, | |
| "eval_precision": 0.8999397227245328, | |
| "eval_recall": 0.9595115681233933, | |
| "eval_runtime": 20.0389, | |
| "eval_samples_per_second": 158.192, | |
| "eval_steps_per_second": 19.811, | |
| "step": 325 | |
| } | |
| ], | |
| "max_steps": 346, | |
| "num_train_epochs": 1, | |
| "total_flos": 619955424192000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |