diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100755--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,11257 @@ +{ + "best_metric": 0.4657398212512413, + "best_model_checkpoint": "/mnt/chenzhi/dialogzoo/finetune/txt2sql_picard_cosql/checkpoint-6848", + "epoch": 855.9933373712902, + "global_step": 6848, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 0.0001, + "loss": 8.9198, + "step": 1 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001, + "loss": 4.4541, + "step": 4 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001, + "loss": 2.0276, + "step": 8 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001, + "loss": 1.1989, + "step": 12 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001, + "loss": 0.8306, + "step": 16 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001, + "loss": 0.6585, + "step": 20 + }, + { + "epoch": 2.99, + "learning_rate": 0.0001, + "loss": 0.5417, + "step": 24 + }, + { + "epoch": 3.5, + "learning_rate": 0.0001, + "loss": 0.4681, + "step": 28 + }, + { + "epoch": 3.99, + "learning_rate": 0.0001, + "loss": 0.4011, + "step": 32 + }, + { + "epoch": 4.5, + "learning_rate": 0.0001, + "loss": 0.3661, + "step": 36 + }, + { + "epoch": 4.99, + "learning_rate": 0.0001, + "loss": 0.3404, + "step": 40 + }, + { + "epoch": 5.5, + "learning_rate": 0.0001, + "loss": 0.3268, + "step": 44 + }, + { + "epoch": 5.99, + "learning_rate": 0.0001, + "loss": 0.2935, + "step": 48 + }, + { + "epoch": 6.5, + "learning_rate": 0.0001, + "loss": 0.2853, + "step": 52 + }, + { + "epoch": 6.99, + "learning_rate": 0.0001, + "loss": 0.2694, + "step": 56 + }, + { + "epoch": 7.5, + "learning_rate": 0.0001, + "loss": 0.2601, + "step": 60 + }, + { + "epoch": 7.99, + "learning_rate": 0.0001, + "loss": 0.2487, + "step": 64 + }, + { + "epoch": 7.99, + "eval_exact_match": 0.24726911618669314, + "eval_exec": 0.30883813306852037, + "eval_loss": 0.32532989978790283, + "eval_runtime": 219.0487, + "eval_samples_per_second": 5.935, + "step": 64 + }, + { + "epoch": 8.5, + "learning_rate": 0.0001, + "loss": 0.2352, + "step": 68 + }, + { + "epoch": 8.99, + "learning_rate": 0.0001, + "loss": 0.2257, + "step": 72 + }, + { + "epoch": 9.5, + "learning_rate": 0.0001, + "loss": 0.222, + "step": 76 + }, + { + "epoch": 9.99, + "learning_rate": 0.0001, + "loss": 0.2146, + "step": 80 + }, + { + "epoch": 10.5, + "learning_rate": 0.0001, + "loss": 0.2115, + "step": 84 + }, + { + "epoch": 10.99, + "learning_rate": 0.0001, + "loss": 0.2032, + "step": 88 + }, + { + "epoch": 11.5, + "learning_rate": 0.0001, + "loss": 0.1954, + "step": 92 + }, + { + "epoch": 11.99, + "learning_rate": 0.0001, + "loss": 0.186, + "step": 96 + }, + { + "epoch": 12.5, + "learning_rate": 0.0001, + "loss": 0.1808, + "step": 100 + }, + { + "epoch": 12.99, + "learning_rate": 0.0001, + "loss": 0.18, + "step": 104 + }, + { + "epoch": 13.5, + "learning_rate": 0.0001, + "loss": 0.177, + "step": 108 + }, + { + "epoch": 13.99, + "learning_rate": 0.0001, + "loss": 0.1714, + "step": 112 + }, + { + "epoch": 14.5, + "learning_rate": 0.0001, + "loss": 0.1714, + "step": 116 + }, + { + "epoch": 14.99, + "learning_rate": 0.0001, + "loss": 0.1615, + "step": 120 + }, + { + "epoch": 15.5, + "learning_rate": 0.0001, + "loss": 0.1599, + "step": 124 + }, + { + "epoch": 15.99, + "learning_rate": 0.0001, + "loss": 0.1565, + "step": 128 + }, + { + "epoch": 15.99, + "eval_exact_match": 0.33068520357497516, + "eval_exec": 0.3843098311817279, + "eval_loss": 0.29061898589134216, + "eval_runtime": 200.5643, + "eval_samples_per_second": 6.482, + "step": 128 + }, + { + "epoch": 16.5, + "learning_rate": 0.0001, + "loss": 0.1513, + "step": 132 + }, + { + "epoch": 16.99, + "learning_rate": 0.0001, + "loss": 0.143, + "step": 136 + }, + { + "epoch": 17.5, + "learning_rate": 0.0001, + "loss": 0.1426, + "step": 140 + }, + { + "epoch": 17.99, + "learning_rate": 0.0001, + "loss": 0.1403, + "step": 144 + }, + { + "epoch": 18.5, + "learning_rate": 0.0001, + "loss": 0.1441, + "step": 148 + }, + { + "epoch": 18.99, + "learning_rate": 0.0001, + "loss": 0.1378, + "step": 152 + }, + { + "epoch": 19.5, + "learning_rate": 0.0001, + "loss": 0.1344, + "step": 156 + }, + { + "epoch": 19.99, + "learning_rate": 0.0001, + "loss": 0.1293, + "step": 160 + }, + { + "epoch": 20.5, + "learning_rate": 0.0001, + "loss": 0.1314, + "step": 164 + }, + { + "epoch": 20.99, + "learning_rate": 0.0001, + "loss": 0.1219, + "step": 168 + }, + { + "epoch": 21.5, + "learning_rate": 0.0001, + "loss": 0.1196, + "step": 172 + }, + { + "epoch": 21.99, + "learning_rate": 0.0001, + "loss": 0.1192, + "step": 176 + }, + { + "epoch": 22.5, + "learning_rate": 0.0001, + "loss": 0.1203, + "step": 180 + }, + { + "epoch": 22.99, + "learning_rate": 0.0001, + "loss": 0.1189, + "step": 184 + }, + { + "epoch": 23.5, + "learning_rate": 0.0001, + "loss": 0.1154, + "step": 188 + }, + { + "epoch": 23.99, + "learning_rate": 0.0001, + "loss": 0.1142, + "step": 192 + }, + { + "epoch": 23.99, + "eval_exact_match": 0.3426017874875869, + "eval_exec": 0.4011916583912612, + "eval_loss": 0.28066790103912354, + "eval_runtime": 235.2994, + "eval_samples_per_second": 5.525, + "step": 192 + }, + { + "epoch": 24.5, + "learning_rate": 0.0001, + "loss": 0.1104, + "step": 196 + }, + { + "epoch": 24.99, + "learning_rate": 0.0001, + "loss": 0.1092, + "step": 200 + }, + { + "epoch": 25.5, + "learning_rate": 0.0001, + "loss": 0.1079, + "step": 204 + }, + { + "epoch": 25.99, + "learning_rate": 0.0001, + "loss": 0.1043, + "step": 208 + }, + { + "epoch": 26.5, + "learning_rate": 0.0001, + "loss": 0.1068, + "step": 212 + }, + { + "epoch": 26.99, + "learning_rate": 0.0001, + "loss": 0.1009, + "step": 216 + }, + { + "epoch": 27.5, + "learning_rate": 0.0001, + "loss": 0.1033, + "step": 220 + }, + { + "epoch": 27.99, + "learning_rate": 0.0001, + "loss": 0.1013, + "step": 224 + }, + { + "epoch": 28.5, + "learning_rate": 0.0001, + "loss": 0.0986, + "step": 228 + }, + { + "epoch": 28.99, + "learning_rate": 0.0001, + "loss": 0.0951, + "step": 232 + }, + { + "epoch": 29.5, + "learning_rate": 0.0001, + "loss": 0.0947, + "step": 236 + }, + { + "epoch": 29.99, + "learning_rate": 0.0001, + "loss": 0.0917, + "step": 240 + }, + { + "epoch": 30.5, + "learning_rate": 0.0001, + "loss": 0.0959, + "step": 244 + }, + { + "epoch": 30.99, + "learning_rate": 0.0001, + "loss": 0.0922, + "step": 248 + }, + { + "epoch": 31.5, + "learning_rate": 0.0001, + "loss": 0.0892, + "step": 252 + }, + { + "epoch": 31.99, + "learning_rate": 0.0001, + "loss": 0.0885, + "step": 256 + }, + { + "epoch": 31.99, + "eval_exact_match": 0.36742800397219466, + "eval_exec": 0.41509433962264153, + "eval_loss": 0.28681814670562744, + "eval_runtime": 236.9193, + "eval_samples_per_second": 5.487, + "step": 256 + }, + { + "epoch": 32.5, + "learning_rate": 0.0001, + "loss": 0.087, + "step": 260 + }, + { + "epoch": 32.99, + "learning_rate": 0.0001, + "loss": 0.0853, + "step": 264 + }, + { + "epoch": 33.5, + "learning_rate": 0.0001, + "loss": 0.0847, + "step": 268 + }, + { + "epoch": 33.99, + "learning_rate": 0.0001, + "loss": 0.0821, + "step": 272 + }, + { + "epoch": 34.5, + "learning_rate": 0.0001, + "loss": 0.0802, + "step": 276 + }, + { + "epoch": 34.99, + "learning_rate": 0.0001, + "loss": 0.084, + "step": 280 + }, + { + "epoch": 35.5, + "learning_rate": 0.0001, + "loss": 0.0844, + "step": 284 + }, + { + "epoch": 35.99, + "learning_rate": 0.0001, + "loss": 0.0803, + "step": 288 + }, + { + "epoch": 36.5, + "learning_rate": 0.0001, + "loss": 0.0786, + "step": 292 + }, + { + "epoch": 36.99, + "learning_rate": 0.0001, + "loss": 0.0735, + "step": 296 + }, + { + "epoch": 37.5, + "learning_rate": 0.0001, + "loss": 0.0784, + "step": 300 + }, + { + "epoch": 37.99, + "learning_rate": 0.0001, + "loss": 0.0733, + "step": 304 + }, + { + "epoch": 38.5, + "learning_rate": 0.0001, + "loss": 0.075, + "step": 308 + }, + { + "epoch": 38.99, + "learning_rate": 0.0001, + "loss": 0.0747, + "step": 312 + }, + { + "epoch": 39.5, + "learning_rate": 0.0001, + "loss": 0.0733, + "step": 316 + }, + { + "epoch": 39.99, + "learning_rate": 0.0001, + "loss": 0.0712, + "step": 320 + }, + { + "epoch": 39.99, + "eval_exact_match": 0.407149950347567, + "eval_exec": 0.44885799404170806, + "eval_loss": 0.29831913113594055, + "eval_runtime": 183.5809, + "eval_samples_per_second": 7.081, + "step": 320 + }, + { + "epoch": 40.5, + "learning_rate": 0.0001, + "loss": 0.0706, + "step": 324 + }, + { + "epoch": 40.99, + "learning_rate": 0.0001, + "loss": 0.071, + "step": 328 + }, + { + "epoch": 41.5, + "learning_rate": 0.0001, + "loss": 0.0675, + "step": 332 + }, + { + "epoch": 41.99, + "learning_rate": 0.0001, + "loss": 0.0663, + "step": 336 + }, + { + "epoch": 42.5, + "learning_rate": 0.0001, + "loss": 0.0652, + "step": 340 + }, + { + "epoch": 42.99, + "learning_rate": 0.0001, + "loss": 0.068, + "step": 344 + }, + { + "epoch": 43.5, + "learning_rate": 0.0001, + "loss": 0.066, + "step": 348 + }, + { + "epoch": 43.99, + "learning_rate": 0.0001, + "loss": 0.0658, + "step": 352 + }, + { + "epoch": 44.5, + "learning_rate": 0.0001, + "loss": 0.0628, + "step": 356 + }, + { + "epoch": 44.99, + "learning_rate": 0.0001, + "loss": 0.063, + "step": 360 + }, + { + "epoch": 45.5, + "learning_rate": 0.0001, + "loss": 0.0607, + "step": 364 + }, + { + "epoch": 45.99, + "learning_rate": 0.0001, + "loss": 0.0605, + "step": 368 + }, + { + "epoch": 46.5, + "learning_rate": 0.0001, + "loss": 0.06, + "step": 372 + }, + { + "epoch": 46.99, + "learning_rate": 0.0001, + "loss": 0.0618, + "step": 376 + }, + { + "epoch": 47.5, + "learning_rate": 0.0001, + "loss": 0.0605, + "step": 380 + }, + { + "epoch": 47.99, + "learning_rate": 0.0001, + "loss": 0.0586, + "step": 384 + }, + { + "epoch": 47.99, + "eval_exact_match": 0.423038728897716, + "eval_exec": 0.4637537239324727, + "eval_loss": 0.31259259581565857, + "eval_runtime": 192.9699, + "eval_samples_per_second": 6.737, + "step": 384 + }, + { + "epoch": 48.5, + "learning_rate": 0.0001, + "loss": 0.058, + "step": 388 + }, + { + "epoch": 48.99, + "learning_rate": 0.0001, + "loss": 0.0573, + "step": 392 + }, + { + "epoch": 49.5, + "learning_rate": 0.0001, + "loss": 0.0594, + "step": 396 + }, + { + "epoch": 49.99, + "learning_rate": 0.0001, + "loss": 0.0552, + "step": 400 + }, + { + "epoch": 50.5, + "learning_rate": 0.0001, + "loss": 0.056, + "step": 404 + }, + { + "epoch": 50.99, + "learning_rate": 0.0001, + "loss": 0.0537, + "step": 408 + }, + { + "epoch": 51.5, + "learning_rate": 0.0001, + "loss": 0.054, + "step": 412 + }, + { + "epoch": 51.99, + "learning_rate": 0.0001, + "loss": 0.0555, + "step": 416 + }, + { + "epoch": 52.5, + "learning_rate": 0.0001, + "loss": 0.0515, + "step": 420 + }, + { + "epoch": 52.99, + "learning_rate": 0.0001, + "loss": 0.0522, + "step": 424 + }, + { + "epoch": 53.5, + "learning_rate": 0.0001, + "loss": 0.0507, + "step": 428 + }, + { + "epoch": 53.99, + "learning_rate": 0.0001, + "loss": 0.051, + "step": 432 + }, + { + "epoch": 54.5, + "learning_rate": 0.0001, + "loss": 0.0492, + "step": 436 + }, + { + "epoch": 54.99, + "learning_rate": 0.0001, + "loss": 0.0503, + "step": 440 + }, + { + "epoch": 55.5, + "learning_rate": 0.0001, + "loss": 0.0484, + "step": 444 + }, + { + "epoch": 55.99, + "learning_rate": 0.0001, + "loss": 0.0486, + "step": 448 + }, + { + "epoch": 55.99, + "eval_exact_match": 0.4270109235352532, + "eval_exec": 0.4657398212512413, + "eval_loss": 0.32657375931739807, + "eval_runtime": 181.0806, + "eval_samples_per_second": 7.179, + "step": 448 + }, + { + "epoch": 56.5, + "learning_rate": 0.0001, + "loss": 0.0491, + "step": 452 + }, + { + "epoch": 56.99, + "learning_rate": 0.0001, + "loss": 0.0505, + "step": 456 + }, + { + "epoch": 57.5, + "learning_rate": 0.0001, + "loss": 0.0491, + "step": 460 + }, + { + "epoch": 57.99, + "learning_rate": 0.0001, + "loss": 0.0487, + "step": 464 + }, + { + "epoch": 58.5, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 468 + }, + { + "epoch": 58.99, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 472 + }, + { + "epoch": 59.5, + "learning_rate": 0.0001, + "loss": 0.0449, + "step": 476 + }, + { + "epoch": 59.99, + "learning_rate": 0.0001, + "loss": 0.0429, + "step": 480 + }, + { + "epoch": 60.5, + "learning_rate": 0.0001, + "loss": 0.0441, + "step": 484 + }, + { + "epoch": 60.99, + "learning_rate": 0.0001, + "loss": 0.0451, + "step": 488 + }, + { + "epoch": 61.5, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 492 + }, + { + "epoch": 61.99, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 496 + }, + { + "epoch": 62.5, + "learning_rate": 0.0001, + "loss": 0.0429, + "step": 500 + }, + { + "epoch": 62.99, + "learning_rate": 0.0001, + "loss": 0.0412, + "step": 504 + }, + { + "epoch": 63.5, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 508 + }, + { + "epoch": 63.99, + "learning_rate": 0.0001, + "loss": 0.0412, + "step": 512 + }, + { + "epoch": 63.99, + "eval_exact_match": 0.40913604766633566, + "eval_exec": 0.45878848063555117, + "eval_loss": 0.3272022306919098, + "eval_runtime": 205.684, + "eval_samples_per_second": 6.32, + "step": 512 + }, + { + "epoch": 64.5, + "learning_rate": 0.0001, + "loss": 0.0397, + "step": 516 + }, + { + "epoch": 64.99, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 520 + }, + { + "epoch": 65.5, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 524 + }, + { + "epoch": 65.99, + "learning_rate": 0.0001, + "loss": 0.0418, + "step": 528 + }, + { + "epoch": 66.5, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 532 + }, + { + "epoch": 66.99, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 536 + }, + { + "epoch": 67.5, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 540 + }, + { + "epoch": 67.99, + "learning_rate": 0.0001, + "loss": 0.0384, + "step": 544 + }, + { + "epoch": 68.5, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 548 + }, + { + "epoch": 68.99, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 552 + }, + { + "epoch": 69.5, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 556 + }, + { + "epoch": 69.99, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 560 + }, + { + "epoch": 70.5, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 564 + }, + { + "epoch": 70.99, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 568 + }, + { + "epoch": 71.5, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 572 + }, + { + "epoch": 71.99, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 576 + }, + { + "epoch": 71.99, + "eval_exact_match": 0.435948361469712, + "eval_exec": 0.4766633565044687, + "eval_loss": 0.3480900824069977, + "eval_runtime": 174.2765, + "eval_samples_per_second": 7.459, + "step": 576 + }, + { + "epoch": 72.5, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 580 + }, + { + "epoch": 72.99, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 584 + }, + { + "epoch": 73.5, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 588 + }, + { + "epoch": 73.99, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 592 + }, + { + "epoch": 74.5, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 596 + }, + { + "epoch": 74.99, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 600 + }, + { + "epoch": 75.5, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 604 + }, + { + "epoch": 75.99, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 608 + }, + { + "epoch": 76.5, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 612 + }, + { + "epoch": 76.99, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 616 + }, + { + "epoch": 77.5, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 620 + }, + { + "epoch": 77.99, + "learning_rate": 0.0001, + "loss": 0.03, + "step": 624 + }, + { + "epoch": 78.5, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 628 + }, + { + "epoch": 78.99, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 632 + }, + { + "epoch": 79.5, + "learning_rate": 0.0001, + "loss": 0.0297, + "step": 636 + }, + { + "epoch": 79.99, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 640 + }, + { + "epoch": 79.99, + "eval_exact_match": 0.42899702085402186, + "eval_exec": 0.46871896722939427, + "eval_loss": 0.3477668762207031, + "eval_runtime": 228.958, + "eval_samples_per_second": 5.678, + "step": 640 + }, + { + "epoch": 80.5, + "learning_rate": 0.0001, + "loss": 0.0284, + "step": 644 + }, + { + "epoch": 80.99, + "learning_rate": 0.0001, + "loss": 0.028, + "step": 648 + }, + { + "epoch": 81.5, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 652 + }, + { + "epoch": 81.99, + "learning_rate": 0.0001, + "loss": 0.0286, + "step": 656 + }, + { + "epoch": 82.5, + "learning_rate": 0.0001, + "loss": 0.0299, + "step": 660 + }, + { + "epoch": 82.99, + "learning_rate": 0.0001, + "loss": 0.0294, + "step": 664 + }, + { + "epoch": 83.5, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 668 + }, + { + "epoch": 83.99, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 672 + }, + { + "epoch": 84.5, + "learning_rate": 0.0001, + "loss": 0.0267, + "step": 676 + }, + { + "epoch": 84.99, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 680 + }, + { + "epoch": 85.5, + "learning_rate": 0.0001, + "loss": 0.027, + "step": 684 + }, + { + "epoch": 85.99, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 688 + }, + { + "epoch": 86.5, + "learning_rate": 0.0001, + "loss": 0.026, + "step": 692 + }, + { + "epoch": 86.99, + "learning_rate": 0.0001, + "loss": 0.0259, + "step": 696 + }, + { + "epoch": 87.5, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 700 + }, + { + "epoch": 87.99, + "learning_rate": 0.0001, + "loss": 0.0253, + "step": 704 + }, + { + "epoch": 87.99, + "eval_exact_match": 0.423038728897716, + "eval_exec": 0.46971201588877853, + "eval_loss": 0.36298030614852905, + "eval_runtime": 237.4093, + "eval_samples_per_second": 5.476, + "step": 704 + }, + { + "epoch": 88.5, + "learning_rate": 0.0001, + "loss": 0.0252, + "step": 708 + }, + { + "epoch": 88.99, + "learning_rate": 0.0001, + "loss": 0.0254, + "step": 712 + }, + { + "epoch": 89.5, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 716 + }, + { + "epoch": 89.99, + "learning_rate": 0.0001, + "loss": 0.0261, + "step": 720 + }, + { + "epoch": 90.5, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 724 + }, + { + "epoch": 90.99, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 728 + }, + { + "epoch": 91.5, + "learning_rate": 0.0001, + "loss": 0.0231, + "step": 732 + }, + { + "epoch": 91.99, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 736 + }, + { + "epoch": 92.5, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 740 + }, + { + "epoch": 92.99, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 744 + }, + { + "epoch": 93.5, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 748 + }, + { + "epoch": 93.99, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 752 + }, + { + "epoch": 94.5, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 756 + }, + { + "epoch": 94.99, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 760 + }, + { + "epoch": 95.5, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 764 + }, + { + "epoch": 95.99, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 768 + }, + { + "epoch": 95.99, + "eval_exact_match": 0.4329692154915591, + "eval_exec": 0.46871896722939427, + "eval_loss": 0.3830316960811615, + "eval_runtime": 229.6043, + "eval_samples_per_second": 5.662, + "step": 768 + }, + { + "epoch": 96.5, + "learning_rate": 0.0001, + "loss": 0.0226, + "step": 772 + }, + { + "epoch": 96.99, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 776 + }, + { + "epoch": 97.5, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 780 + }, + { + "epoch": 97.99, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 784 + }, + { + "epoch": 98.5, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 788 + }, + { + "epoch": 98.99, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 792 + }, + { + "epoch": 99.5, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 796 + }, + { + "epoch": 99.99, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 800 + }, + { + "epoch": 100.5, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 804 + }, + { + "epoch": 100.99, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 808 + }, + { + "epoch": 101.5, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 812 + }, + { + "epoch": 101.99, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 816 + }, + { + "epoch": 102.5, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 820 + }, + { + "epoch": 102.99, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 824 + }, + { + "epoch": 103.5, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 828 + }, + { + "epoch": 103.99, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 832 + }, + { + "epoch": 103.99, + "eval_exact_match": 0.41807348560079444, + "eval_exec": 0.4667328699106256, + "eval_loss": 0.388680100440979, + "eval_runtime": 220.6138, + "eval_samples_per_second": 5.893, + "step": 832 + }, + { + "epoch": 104.5, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 836 + }, + { + "epoch": 104.99, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 840 + }, + { + "epoch": 105.5, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 844 + }, + { + "epoch": 105.99, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 848 + }, + { + "epoch": 106.5, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 852 + }, + { + "epoch": 106.99, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 856 + }, + { + "epoch": 107.5, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 860 + }, + { + "epoch": 107.99, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 864 + }, + { + "epoch": 108.5, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 868 + }, + { + "epoch": 108.99, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 872 + }, + { + "epoch": 109.5, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 876 + }, + { + "epoch": 109.99, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 880 + }, + { + "epoch": 110.5, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 884 + }, + { + "epoch": 110.99, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 888 + }, + { + "epoch": 111.5, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 892 + }, + { + "epoch": 111.99, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 896 + }, + { + "epoch": 111.99, + "eval_exact_match": 0.41012909632572, + "eval_exec": 0.464746772591857, + "eval_loss": 0.3992396891117096, + "eval_runtime": 240.1288, + "eval_samples_per_second": 5.414, + "step": 896 + }, + { + "epoch": 112.5, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 900 + }, + { + "epoch": 112.99, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 904 + }, + { + "epoch": 113.5, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 908 + }, + { + "epoch": 113.99, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 912 + }, + { + "epoch": 114.5, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 916 + }, + { + "epoch": 114.99, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 920 + }, + { + "epoch": 115.5, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 924 + }, + { + "epoch": 115.99, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 928 + }, + { + "epoch": 116.5, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 932 + }, + { + "epoch": 116.99, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 936 + }, + { + "epoch": 117.5, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 940 + }, + { + "epoch": 117.99, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 944 + }, + { + "epoch": 118.5, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 948 + }, + { + "epoch": 118.99, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 952 + }, + { + "epoch": 119.5, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 956 + }, + { + "epoch": 119.99, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 960 + }, + { + "epoch": 119.99, + "eval_exact_match": 0.4329692154915591, + "eval_exec": 0.4746772591857001, + "eval_loss": 0.42190492153167725, + "eval_runtime": 202.7706, + "eval_samples_per_second": 6.411, + "step": 960 + }, + { + "epoch": 120.5, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 964 + }, + { + "epoch": 120.99, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 968 + }, + { + "epoch": 121.5, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 972 + }, + { + "epoch": 121.99, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 976 + }, + { + "epoch": 122.5, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 980 + }, + { + "epoch": 122.99, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 984 + }, + { + "epoch": 123.5, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 988 + }, + { + "epoch": 123.99, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 992 + }, + { + "epoch": 124.5, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 996 + }, + { + "epoch": 124.99, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 1000 + }, + { + "epoch": 125.5, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 1004 + }, + { + "epoch": 125.99, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 1008 + }, + { + "epoch": 126.5, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 1012 + }, + { + "epoch": 126.99, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 1016 + }, + { + "epoch": 127.5, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 1020 + }, + { + "epoch": 127.99, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 1024 + }, + { + "epoch": 127.99, + "eval_exact_match": 0.43892750744786496, + "eval_exec": 0.4856007944389275, + "eval_loss": 0.4193364083766937, + "eval_runtime": 203.6399, + "eval_samples_per_second": 6.384, + "step": 1024 + }, + { + "epoch": 128.5, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 1028 + }, + { + "epoch": 128.99, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 1032 + }, + { + "epoch": 129.5, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 1036 + }, + { + "epoch": 129.99, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 1040 + }, + { + "epoch": 130.5, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 1044 + }, + { + "epoch": 130.99, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 1048 + }, + { + "epoch": 131.5, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 1052 + }, + { + "epoch": 131.99, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 1056 + }, + { + "epoch": 132.5, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 1060 + }, + { + "epoch": 132.99, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 1064 + }, + { + "epoch": 133.5, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 1068 + }, + { + "epoch": 133.99, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 1072 + }, + { + "epoch": 134.5, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 1076 + }, + { + "epoch": 134.99, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 1080 + }, + { + "epoch": 135.5, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 1084 + }, + { + "epoch": 135.99, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 1088 + }, + { + "epoch": 135.99, + "eval_exact_match": 0.4240317775571003, + "eval_exec": 0.4726911618669315, + "eval_loss": 0.4311941862106323, + "eval_runtime": 209.696, + "eval_samples_per_second": 6.199, + "step": 1088 + }, + { + "epoch": 136.5, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 1092 + }, + { + "epoch": 136.99, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 1096 + }, + { + "epoch": 137.5, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 1100 + }, + { + "epoch": 137.99, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 1104 + }, + { + "epoch": 138.5, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 1108 + }, + { + "epoch": 138.99, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 1112 + }, + { + "epoch": 139.5, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 1116 + }, + { + "epoch": 139.99, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 1120 + }, + { + "epoch": 140.5, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 1124 + }, + { + "epoch": 140.99, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 1128 + }, + { + "epoch": 141.5, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 1132 + }, + { + "epoch": 141.99, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 1136 + }, + { + "epoch": 142.5, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 1140 + }, + { + "epoch": 142.99, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 1144 + }, + { + "epoch": 143.5, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 1148 + }, + { + "epoch": 143.99, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 1152 + }, + { + "epoch": 143.99, + "eval_exact_match": 0.42105263157894735, + "eval_exec": 0.47070506454816285, + "eval_loss": 0.4453062117099762, + "eval_runtime": 205.5286, + "eval_samples_per_second": 6.325, + "step": 1152 + }, + { + "epoch": 144.5, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 1156 + }, + { + "epoch": 144.99, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 1160 + }, + { + "epoch": 145.5, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 1164 + }, + { + "epoch": 145.99, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 1168 + }, + { + "epoch": 146.5, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 1172 + }, + { + "epoch": 146.99, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 1176 + }, + { + "epoch": 147.5, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 1180 + }, + { + "epoch": 147.99, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 1184 + }, + { + "epoch": 148.5, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 1188 + }, + { + "epoch": 148.99, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 1192 + }, + { + "epoch": 149.5, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 1196 + }, + { + "epoch": 149.99, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 1200 + }, + { + "epoch": 150.5, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 1204 + }, + { + "epoch": 150.99, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 1208 + }, + { + "epoch": 151.5, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 1212 + }, + { + "epoch": 151.99, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 1216 + }, + { + "epoch": 151.99, + "eval_exact_match": 0.43892750744786496, + "eval_exec": 0.47765640516385305, + "eval_loss": 0.45582684874534607, + "eval_runtime": 213.7344, + "eval_samples_per_second": 6.082, + "step": 1216 + }, + { + "epoch": 152.5, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 1220 + }, + { + "epoch": 152.99, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 1224 + }, + { + "epoch": 153.5, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 1228 + }, + { + "epoch": 153.99, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 1232 + }, + { + "epoch": 154.5, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 1236 + }, + { + "epoch": 154.99, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 1240 + }, + { + "epoch": 155.5, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 1244 + }, + { + "epoch": 155.99, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 1248 + }, + { + "epoch": 156.5, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 1252 + }, + { + "epoch": 156.99, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 1256 + }, + { + "epoch": 157.5, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 1260 + }, + { + "epoch": 157.99, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 1264 + }, + { + "epoch": 158.5, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 1268 + }, + { + "epoch": 158.99, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 1272 + }, + { + "epoch": 159.5, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 1276 + }, + { + "epoch": 159.99, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 1280 + }, + { + "epoch": 159.99, + "eval_exact_match": 0.4200595829195631, + "eval_exec": 0.47070506454816285, + "eval_loss": 0.45270583033561707, + "eval_runtime": 204.4816, + "eval_samples_per_second": 6.358, + "step": 1280 + }, + { + "epoch": 160.5, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 1284 + }, + { + "epoch": 160.99, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 1288 + }, + { + "epoch": 161.5, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 1292 + }, + { + "epoch": 161.99, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 1296 + }, + { + "epoch": 162.5, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 1300 + }, + { + "epoch": 162.99, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 1304 + }, + { + "epoch": 163.5, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 1308 + }, + { + "epoch": 163.99, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 1312 + }, + { + "epoch": 164.5, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 1316 + }, + { + "epoch": 164.99, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 1320 + }, + { + "epoch": 165.5, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 1324 + }, + { + "epoch": 165.99, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 1328 + }, + { + "epoch": 166.5, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 1332 + }, + { + "epoch": 166.99, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 1336 + }, + { + "epoch": 167.5, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 1340 + }, + { + "epoch": 167.99, + "learning_rate": 0.0001, + "loss": 0.0069, + "step": 1344 + }, + { + "epoch": 167.99, + "eval_exact_match": 0.42502482621648463, + "eval_exec": 0.4766633565044687, + "eval_loss": 0.46810275316238403, + "eval_runtime": 209.333, + "eval_samples_per_second": 6.21, + "step": 1344 + }, + { + "epoch": 168.5, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 1348 + }, + { + "epoch": 168.99, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 1352 + }, + { + "epoch": 169.5, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 1356 + }, + { + "epoch": 169.99, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 1360 + }, + { + "epoch": 170.5, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 1364 + }, + { + "epoch": 170.99, + "learning_rate": 0.0001, + "loss": 0.0069, + "step": 1368 + }, + { + "epoch": 171.5, + "learning_rate": 0.0001, + "loss": 0.0065, + "step": 1372 + }, + { + "epoch": 171.99, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 1376 + }, + { + "epoch": 172.5, + "learning_rate": 0.0001, + "loss": 0.0068, + "step": 1380 + }, + { + "epoch": 172.99, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 1384 + }, + { + "epoch": 173.5, + "learning_rate": 0.0001, + "loss": 0.0066, + "step": 1388 + }, + { + "epoch": 173.99, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 1392 + }, + { + "epoch": 174.5, + "learning_rate": 0.0001, + "loss": 0.0068, + "step": 1396 + }, + { + "epoch": 174.99, + "learning_rate": 0.0001, + "loss": 0.0066, + "step": 1400 + }, + { + "epoch": 175.5, + "learning_rate": 0.0001, + "loss": 0.0067, + "step": 1404 + }, + { + "epoch": 175.99, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 1408 + }, + { + "epoch": 175.99, + "eval_exact_match": 0.423038728897716, + "eval_exec": 0.47070506454816285, + "eval_loss": 0.48861581087112427, + "eval_runtime": 209.5612, + "eval_samples_per_second": 6.203, + "step": 1408 + }, + { + "epoch": 176.5, + "learning_rate": 0.0001, + "loss": 0.0068, + "step": 1412 + }, + { + "epoch": 176.99, + "learning_rate": 0.0001, + "loss": 0.0066, + "step": 1416 + }, + { + "epoch": 177.5, + "learning_rate": 0.0001, + "loss": 0.0066, + "step": 1420 + }, + { + "epoch": 177.99, + "learning_rate": 0.0001, + "loss": 0.0065, + "step": 1424 + }, + { + "epoch": 178.5, + "learning_rate": 0.0001, + "loss": 0.0066, + "step": 1428 + }, + { + "epoch": 178.99, + "learning_rate": 0.0001, + "loss": 0.0069, + "step": 1432 + }, + { + "epoch": 179.5, + "learning_rate": 0.0001, + "loss": 0.0064, + "step": 1436 + }, + { + "epoch": 179.99, + "learning_rate": 0.0001, + "loss": 0.0062, + "step": 1440 + }, + { + "epoch": 180.5, + "learning_rate": 0.0001, + "loss": 0.0063, + "step": 1444 + }, + { + "epoch": 180.99, + "learning_rate": 0.0001, + "loss": 0.0063, + "step": 1448 + }, + { + "epoch": 181.5, + "learning_rate": 0.0001, + "loss": 0.0063, + "step": 1452 + }, + { + "epoch": 181.99, + "learning_rate": 0.0001, + "loss": 0.0058, + "step": 1456 + }, + { + "epoch": 182.5, + "learning_rate": 0.0001, + "loss": 0.0066, + "step": 1460 + }, + { + "epoch": 182.99, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 1464 + }, + { + "epoch": 183.5, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 1468 + }, + { + "epoch": 183.99, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 1472 + }, + { + "epoch": 183.99, + "eval_exact_match": 0.4399205561072492, + "eval_exec": 0.4856007944389275, + "eval_loss": 0.46796470880508423, + "eval_runtime": 198.2198, + "eval_samples_per_second": 6.558, + "step": 1472 + }, + { + "epoch": 184.5, + "learning_rate": 0.0001, + "loss": 0.0065, + "step": 1476 + }, + { + "epoch": 184.99, + "learning_rate": 0.0001, + "loss": 0.0059, + "step": 1480 + }, + { + "epoch": 185.5, + "learning_rate": 0.0001, + "loss": 0.006, + "step": 1484 + }, + { + "epoch": 185.99, + "learning_rate": 0.0001, + "loss": 0.0061, + "step": 1488 + }, + { + "epoch": 186.5, + "learning_rate": 0.0001, + "loss": 0.006, + "step": 1492 + }, + { + "epoch": 186.99, + "learning_rate": 0.0001, + "loss": 0.0061, + "step": 1496 + }, + { + "epoch": 187.5, + "learning_rate": 0.0001, + "loss": 0.0064, + "step": 1500 + }, + { + "epoch": 187.99, + "learning_rate": 0.0001, + "loss": 0.0062, + "step": 1504 + }, + { + "epoch": 188.5, + "learning_rate": 0.0001, + "loss": 0.006, + "step": 1508 + }, + { + "epoch": 188.99, + "learning_rate": 0.0001, + "loss": 0.0059, + "step": 1512 + }, + { + "epoch": 189.5, + "learning_rate": 0.0001, + "loss": 0.0062, + "step": 1516 + }, + { + "epoch": 189.99, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 1520 + }, + { + "epoch": 190.5, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 1524 + }, + { + "epoch": 190.99, + "learning_rate": 0.0001, + "loss": 0.0057, + "step": 1528 + }, + { + "epoch": 191.5, + "learning_rate": 0.0001, + "loss": 0.0055, + "step": 1532 + }, + { + "epoch": 191.99, + "learning_rate": 0.0001, + "loss": 0.0059, + "step": 1536 + }, + { + "epoch": 191.99, + "eval_exact_match": 0.43197616683217477, + "eval_exec": 0.4746772591857001, + "eval_loss": 0.4912528097629547, + "eval_runtime": 222.179, + "eval_samples_per_second": 5.851, + "step": 1536 + }, + { + "epoch": 192.5, + "learning_rate": 0.0001, + "loss": 0.006, + "step": 1540 + }, + { + "epoch": 192.99, + "learning_rate": 0.0001, + "loss": 0.0055, + "step": 1544 + }, + { + "epoch": 193.5, + "learning_rate": 0.0001, + "loss": 0.0052, + "step": 1548 + }, + { + "epoch": 193.99, + "learning_rate": 0.0001, + "loss": 0.0055, + "step": 1552 + }, + { + "epoch": 194.5, + "learning_rate": 0.0001, + "loss": 0.0056, + "step": 1556 + }, + { + "epoch": 194.99, + "learning_rate": 0.0001, + "loss": 0.0055, + "step": 1560 + }, + { + "epoch": 195.5, + "learning_rate": 0.0001, + "loss": 0.0052, + "step": 1564 + }, + { + "epoch": 195.99, + "learning_rate": 0.0001, + "loss": 0.0054, + "step": 1568 + }, + { + "epoch": 196.5, + "learning_rate": 0.0001, + "loss": 0.0054, + "step": 1572 + }, + { + "epoch": 196.99, + "learning_rate": 0.0001, + "loss": 0.0052, + "step": 1576 + }, + { + "epoch": 197.5, + "learning_rate": 0.0001, + "loss": 0.005, + "step": 1580 + }, + { + "epoch": 197.99, + "learning_rate": 0.0001, + "loss": 0.0053, + "step": 1584 + }, + { + "epoch": 198.5, + "learning_rate": 0.0001, + "loss": 0.005, + "step": 1588 + }, + { + "epoch": 198.99, + "learning_rate": 0.0001, + "loss": 0.0054, + "step": 1592 + }, + { + "epoch": 199.5, + "learning_rate": 0.0001, + "loss": 0.0051, + "step": 1596 + }, + { + "epoch": 199.99, + "learning_rate": 0.0001, + "loss": 0.005, + "step": 1600 + }, + { + "epoch": 199.99, + "eval_exact_match": 0.4329692154915591, + "eval_exec": 0.48659384309831183, + "eval_loss": 0.4948062002658844, + "eval_runtime": 223.792, + "eval_samples_per_second": 5.809, + "step": 1600 + }, + { + "epoch": 200.5, + "learning_rate": 0.0001, + "loss": 0.0046, + "step": 1604 + }, + { + "epoch": 200.99, + "learning_rate": 0.0001, + "loss": 0.0046, + "step": 1608 + }, + { + "epoch": 201.5, + "learning_rate": 0.0001, + "loss": 0.005, + "step": 1612 + }, + { + "epoch": 201.99, + "learning_rate": 0.0001, + "loss": 0.005, + "step": 1616 + }, + { + "epoch": 202.5, + "learning_rate": 0.0001, + "loss": 0.0049, + "step": 1620 + }, + { + "epoch": 202.99, + "learning_rate": 0.0001, + "loss": 0.0048, + "step": 1624 + }, + { + "epoch": 203.5, + "learning_rate": 0.0001, + "loss": 0.0051, + "step": 1628 + }, + { + "epoch": 203.99, + "learning_rate": 0.0001, + "loss": 0.0046, + "step": 1632 + }, + { + "epoch": 204.5, + "learning_rate": 0.0001, + "loss": 0.0051, + "step": 1636 + }, + { + "epoch": 204.99, + "learning_rate": 0.0001, + "loss": 0.005, + "step": 1640 + }, + { + "epoch": 205.5, + "learning_rate": 0.0001, + "loss": 0.005, + "step": 1644 + }, + { + "epoch": 205.99, + "learning_rate": 0.0001, + "loss": 0.0049, + "step": 1648 + }, + { + "epoch": 206.5, + "learning_rate": 0.0001, + "loss": 0.0045, + "step": 1652 + }, + { + "epoch": 206.99, + "learning_rate": 0.0001, + "loss": 0.0048, + "step": 1656 + }, + { + "epoch": 207.5, + "learning_rate": 0.0001, + "loss": 0.0048, + "step": 1660 + }, + { + "epoch": 207.99, + "learning_rate": 0.0001, + "loss": 0.0047, + "step": 1664 + }, + { + "epoch": 207.99, + "eval_exact_match": 0.42502482621648463, + "eval_exec": 0.48063555114200596, + "eval_loss": 0.4956875443458557, + "eval_runtime": 203.094, + "eval_samples_per_second": 6.401, + "step": 1664 + }, + { + "epoch": 208.5, + "learning_rate": 0.0001, + "loss": 0.0046, + "step": 1668 + }, + { + "epoch": 208.99, + "learning_rate": 0.0001, + "loss": 0.0044, + "step": 1672 + }, + { + "epoch": 209.5, + "learning_rate": 0.0001, + "loss": 0.0047, + "step": 1676 + }, + { + "epoch": 209.99, + "learning_rate": 0.0001, + "loss": 0.0042, + "step": 1680 + }, + { + "epoch": 210.5, + "learning_rate": 0.0001, + "loss": 0.0043, + "step": 1684 + }, + { + "epoch": 210.99, + "learning_rate": 0.0001, + "loss": 0.0047, + "step": 1688 + }, + { + "epoch": 211.5, + "learning_rate": 0.0001, + "loss": 0.0051, + "step": 1692 + }, + { + "epoch": 211.99, + "learning_rate": 0.0001, + "loss": 0.0049, + "step": 1696 + }, + { + "epoch": 212.5, + "learning_rate": 0.0001, + "loss": 0.0049, + "step": 1700 + }, + { + "epoch": 212.99, + "learning_rate": 0.0001, + "loss": 0.0044, + "step": 1704 + }, + { + "epoch": 213.5, + "learning_rate": 0.0001, + "loss": 0.0044, + "step": 1708 + }, + { + "epoch": 213.99, + "learning_rate": 0.0001, + "loss": 0.0045, + "step": 1712 + }, + { + "epoch": 214.5, + "learning_rate": 0.0001, + "loss": 0.0042, + "step": 1716 + }, + { + "epoch": 214.99, + "learning_rate": 0.0001, + "loss": 0.0043, + "step": 1720 + }, + { + "epoch": 215.5, + "learning_rate": 0.0001, + "loss": 0.0047, + "step": 1724 + }, + { + "epoch": 215.99, + "learning_rate": 0.0001, + "loss": 0.0045, + "step": 1728 + }, + { + "epoch": 215.99, + "eval_exact_match": 0.4339622641509434, + "eval_exec": 0.4726911618669315, + "eval_loss": 0.4982646703720093, + "eval_runtime": 200.9709, + "eval_samples_per_second": 6.469, + "step": 1728 + }, + { + "epoch": 216.5, + "learning_rate": 0.0001, + "loss": 0.0045, + "step": 1732 + }, + { + "epoch": 216.99, + "learning_rate": 0.0001, + "loss": 0.0046, + "step": 1736 + }, + { + "epoch": 217.5, + "learning_rate": 0.0001, + "loss": 0.0042, + "step": 1740 + }, + { + "epoch": 217.99, + "learning_rate": 0.0001, + "loss": 0.0046, + "step": 1744 + }, + { + "epoch": 218.5, + "learning_rate": 0.0001, + "loss": 0.0038, + "step": 1748 + }, + { + "epoch": 218.99, + "learning_rate": 0.0001, + "loss": 0.0045, + "step": 1752 + }, + { + "epoch": 219.5, + "learning_rate": 0.0001, + "loss": 0.0042, + "step": 1756 + }, + { + "epoch": 219.99, + "learning_rate": 0.0001, + "loss": 0.0041, + "step": 1760 + }, + { + "epoch": 220.5, + "learning_rate": 0.0001, + "loss": 0.0046, + "step": 1764 + }, + { + "epoch": 220.99, + "learning_rate": 0.0001, + "loss": 0.004, + "step": 1768 + }, + { + "epoch": 221.5, + "learning_rate": 0.0001, + "loss": 0.0042, + "step": 1772 + }, + { + "epoch": 221.99, + "learning_rate": 0.0001, + "loss": 0.0039, + "step": 1776 + }, + { + "epoch": 222.5, + "learning_rate": 0.0001, + "loss": 0.0043, + "step": 1780 + }, + { + "epoch": 222.99, + "learning_rate": 0.0001, + "loss": 0.0041, + "step": 1784 + }, + { + "epoch": 223.5, + "learning_rate": 0.0001, + "loss": 0.0038, + "step": 1788 + }, + { + "epoch": 223.99, + "learning_rate": 0.0001, + "loss": 0.0042, + "step": 1792 + }, + { + "epoch": 223.99, + "eval_exact_match": 0.43793445878848064, + "eval_exec": 0.4756703078450844, + "eval_loss": 0.5064935088157654, + "eval_runtime": 200.6112, + "eval_samples_per_second": 6.48, + "step": 1792 + }, + { + "epoch": 224.5, + "learning_rate": 0.0001, + "loss": 0.005, + "step": 1796 + }, + { + "epoch": 224.99, + "learning_rate": 0.0001, + "loss": 0.0046, + "step": 1800 + }, + { + "epoch": 225.5, + "learning_rate": 0.0001, + "loss": 0.004, + "step": 1804 + }, + { + "epoch": 225.99, + "learning_rate": 0.0001, + "loss": 0.0037, + "step": 1808 + }, + { + "epoch": 226.5, + "learning_rate": 0.0001, + "loss": 0.0039, + "step": 1812 + }, + { + "epoch": 226.99, + "learning_rate": 0.0001, + "loss": 0.0037, + "step": 1816 + }, + { + "epoch": 227.5, + "learning_rate": 0.0001, + "loss": 0.0037, + "step": 1820 + }, + { + "epoch": 227.99, + "learning_rate": 0.0001, + "loss": 0.0039, + "step": 1824 + }, + { + "epoch": 228.5, + "learning_rate": 0.0001, + "loss": 0.0037, + "step": 1828 + }, + { + "epoch": 228.99, + "learning_rate": 0.0001, + "loss": 0.0038, + "step": 1832 + }, + { + "epoch": 229.5, + "learning_rate": 0.0001, + "loss": 0.0036, + "step": 1836 + }, + { + "epoch": 229.99, + "learning_rate": 0.0001, + "loss": 0.0036, + "step": 1840 + }, + { + "epoch": 230.5, + "learning_rate": 0.0001, + "loss": 0.0037, + "step": 1844 + }, + { + "epoch": 230.99, + "learning_rate": 0.0001, + "loss": 0.0036, + "step": 1848 + }, + { + "epoch": 231.5, + "learning_rate": 0.0001, + "loss": 0.0038, + "step": 1852 + }, + { + "epoch": 231.99, + "learning_rate": 0.0001, + "loss": 0.0036, + "step": 1856 + }, + { + "epoch": 231.99, + "eval_exact_match": 0.43793445878848064, + "eval_exec": 0.4856007944389275, + "eval_loss": 0.5247978568077087, + "eval_runtime": 207.1906, + "eval_samples_per_second": 6.274, + "step": 1856 + }, + { + "epoch": 232.5, + "learning_rate": 0.0001, + "loss": 0.0034, + "step": 1860 + }, + { + "epoch": 232.99, + "learning_rate": 0.0001, + "loss": 0.0039, + "step": 1864 + }, + { + "epoch": 233.5, + "learning_rate": 0.0001, + "loss": 0.0039, + "step": 1868 + }, + { + "epoch": 233.99, + "learning_rate": 0.0001, + "loss": 0.0034, + "step": 1872 + }, + { + "epoch": 234.5, + "learning_rate": 0.0001, + "loss": 0.0042, + "step": 1876 + }, + { + "epoch": 234.99, + "learning_rate": 0.0001, + "loss": 0.004, + "step": 1880 + }, + { + "epoch": 235.5, + "learning_rate": 0.0001, + "loss": 0.0047, + "step": 1884 + }, + { + "epoch": 235.99, + "learning_rate": 0.0001, + "loss": 0.0065, + "step": 1888 + }, + { + "epoch": 236.5, + "learning_rate": 0.0001, + "loss": 0.0036, + "step": 1892 + }, + { + "epoch": 236.99, + "learning_rate": 0.0001, + "loss": 0.0036, + "step": 1896 + }, + { + "epoch": 237.5, + "learning_rate": 0.0001, + "loss": 0.0036, + "step": 1900 + }, + { + "epoch": 237.99, + "learning_rate": 0.0001, + "loss": 0.0041, + "step": 1904 + }, + { + "epoch": 238.5, + "learning_rate": 0.0001, + "loss": 0.0036, + "step": 1908 + }, + { + "epoch": 238.99, + "learning_rate": 0.0001, + "loss": 0.0035, + "step": 1912 + }, + { + "epoch": 239.5, + "learning_rate": 0.0001, + "loss": 0.0037, + "step": 1916 + }, + { + "epoch": 239.99, + "learning_rate": 0.0001, + "loss": 0.0035, + "step": 1920 + }, + { + "epoch": 239.99, + "eval_exact_match": 0.43892750744786496, + "eval_exec": 0.48659384309831183, + "eval_loss": 0.5224528908729553, + "eval_runtime": 194.9847, + "eval_samples_per_second": 6.667, + "step": 1920 + }, + { + "epoch": 240.5, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 1924 + }, + { + "epoch": 240.99, + "learning_rate": 0.0001, + "loss": 0.0054, + "step": 1928 + }, + { + "epoch": 241.5, + "learning_rate": 0.0001, + "loss": 0.0037, + "step": 1932 + }, + { + "epoch": 241.99, + "learning_rate": 0.0001, + "loss": 0.0037, + "step": 1936 + }, + { + "epoch": 242.5, + "learning_rate": 0.0001, + "loss": 0.0035, + "step": 1940 + }, + { + "epoch": 242.99, + "learning_rate": 0.0001, + "loss": 0.0037, + "step": 1944 + }, + { + "epoch": 243.5, + "learning_rate": 0.0001, + "loss": 0.0035, + "step": 1948 + }, + { + "epoch": 243.99, + "learning_rate": 0.0001, + "loss": 0.0031, + "step": 1952 + }, + { + "epoch": 244.5, + "learning_rate": 0.0001, + "loss": 0.0034, + "step": 1956 + }, + { + "epoch": 244.99, + "learning_rate": 0.0001, + "loss": 0.0035, + "step": 1960 + }, + { + "epoch": 245.5, + "learning_rate": 0.0001, + "loss": 0.0031, + "step": 1964 + }, + { + "epoch": 245.99, + "learning_rate": 0.0001, + "loss": 0.0036, + "step": 1968 + }, + { + "epoch": 246.5, + "learning_rate": 0.0001, + "loss": 0.0038, + "step": 1972 + }, + { + "epoch": 246.99, + "learning_rate": 0.0001, + "loss": 0.0035, + "step": 1976 + }, + { + "epoch": 247.5, + "learning_rate": 0.0001, + "loss": 0.0033, + "step": 1980 + }, + { + "epoch": 247.99, + "learning_rate": 0.0001, + "loss": 0.0031, + "step": 1984 + }, + { + "epoch": 247.99, + "eval_exact_match": 0.4299900695134062, + "eval_exec": 0.4856007944389275, + "eval_loss": 0.521920382976532, + "eval_runtime": 197.9426, + "eval_samples_per_second": 6.568, + "step": 1984 + }, + { + "epoch": 248.5, + "learning_rate": 0.0001, + "loss": 0.0034, + "step": 1988 + }, + { + "epoch": 248.99, + "learning_rate": 0.0001, + "loss": 0.0033, + "step": 1992 + }, + { + "epoch": 249.5, + "learning_rate": 0.0001, + "loss": 0.0034, + "step": 1996 + }, + { + "epoch": 249.99, + "learning_rate": 0.0001, + "loss": 0.0031, + "step": 2000 + }, + { + "epoch": 250.5, + "learning_rate": 0.0001, + "loss": 0.0031, + "step": 2004 + }, + { + "epoch": 250.99, + "learning_rate": 0.0001, + "loss": 0.0032, + "step": 2008 + }, + { + "epoch": 251.5, + "learning_rate": 0.0001, + "loss": 0.0033, + "step": 2012 + }, + { + "epoch": 251.99, + "learning_rate": 0.0001, + "loss": 0.0032, + "step": 2016 + }, + { + "epoch": 252.5, + "learning_rate": 0.0001, + "loss": 0.0032, + "step": 2020 + }, + { + "epoch": 252.99, + "learning_rate": 0.0001, + "loss": 0.003, + "step": 2024 + }, + { + "epoch": 253.5, + "learning_rate": 0.0001, + "loss": 0.003, + "step": 2028 + }, + { + "epoch": 253.99, + "learning_rate": 0.0001, + "loss": 0.0032, + "step": 2032 + }, + { + "epoch": 254.5, + "learning_rate": 0.0001, + "loss": 0.0033, + "step": 2036 + }, + { + "epoch": 254.99, + "learning_rate": 0.0001, + "loss": 0.0039, + "step": 2040 + }, + { + "epoch": 255.5, + "learning_rate": 0.0001, + "loss": 0.0064, + "step": 2044 + }, + { + "epoch": 255.99, + "learning_rate": 0.0001, + "loss": 0.0035, + "step": 2048 + }, + { + "epoch": 255.99, + "eval_exact_match": 0.4399205561072492, + "eval_exec": 0.49056603773584906, + "eval_loss": 0.516386091709137, + "eval_runtime": 193.5596, + "eval_samples_per_second": 6.716, + "step": 2048 + }, + { + "epoch": 256.5, + "learning_rate": 0.0001, + "loss": 0.0032, + "step": 2052 + }, + { + "epoch": 256.99, + "learning_rate": 0.0001, + "loss": 0.0033, + "step": 2056 + }, + { + "epoch": 257.5, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2060 + }, + { + "epoch": 257.99, + "learning_rate": 0.0001, + "loss": 0.0031, + "step": 2064 + }, + { + "epoch": 258.5, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2068 + }, + { + "epoch": 258.99, + "learning_rate": 0.0001, + "loss": 0.0032, + "step": 2072 + }, + { + "epoch": 259.5, + "learning_rate": 0.0001, + "loss": 0.0032, + "step": 2076 + }, + { + "epoch": 259.99, + "learning_rate": 0.0001, + "loss": 0.003, + "step": 2080 + }, + { + "epoch": 260.5, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2084 + }, + { + "epoch": 260.99, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2088 + }, + { + "epoch": 261.5, + "learning_rate": 0.0001, + "loss": 0.0031, + "step": 2092 + }, + { + "epoch": 261.99, + "learning_rate": 0.0001, + "loss": 0.0033, + "step": 2096 + }, + { + "epoch": 262.5, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2100 + }, + { + "epoch": 262.99, + "learning_rate": 0.0001, + "loss": 0.0031, + "step": 2104 + }, + { + "epoch": 263.5, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2108 + }, + { + "epoch": 263.99, + "learning_rate": 0.0001, + "loss": 0.0032, + "step": 2112 + }, + { + "epoch": 263.99, + "eval_exact_match": 0.43793445878848064, + "eval_exec": 0.48758689175769615, + "eval_loss": 0.5402066707611084, + "eval_runtime": 203.0032, + "eval_samples_per_second": 6.404, + "step": 2112 + }, + { + "epoch": 264.5, + "learning_rate": 0.0001, + "loss": 0.0028, + "step": 2116 + }, + { + "epoch": 264.99, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2120 + }, + { + "epoch": 265.5, + "learning_rate": 0.0001, + "loss": 0.0028, + "step": 2124 + }, + { + "epoch": 265.99, + "learning_rate": 0.0001, + "loss": 0.0032, + "step": 2128 + }, + { + "epoch": 266.5, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2132 + }, + { + "epoch": 266.99, + "learning_rate": 0.0001, + "loss": 0.0031, + "step": 2136 + }, + { + "epoch": 267.5, + "learning_rate": 0.0001, + "loss": 0.0031, + "step": 2140 + }, + { + "epoch": 267.99, + "learning_rate": 0.0001, + "loss": 0.0028, + "step": 2144 + }, + { + "epoch": 268.5, + "learning_rate": 0.0001, + "loss": 0.003, + "step": 2148 + }, + { + "epoch": 268.99, + "learning_rate": 0.0001, + "loss": 0.003, + "step": 2152 + }, + { + "epoch": 269.5, + "learning_rate": 0.0001, + "loss": 0.0032, + "step": 2156 + }, + { + "epoch": 269.99, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2160 + }, + { + "epoch": 270.5, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2164 + }, + { + "epoch": 270.99, + "learning_rate": 0.0001, + "loss": 0.003, + "step": 2168 + }, + { + "epoch": 271.5, + "learning_rate": 0.0001, + "loss": 0.003, + "step": 2172 + }, + { + "epoch": 271.99, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2176 + }, + { + "epoch": 271.99, + "eval_exact_match": 0.43495531281032773, + "eval_exec": 0.4816285998013903, + "eval_loss": 0.5360086560249329, + "eval_runtime": 197.8829, + "eval_samples_per_second": 6.57, + "step": 2176 + }, + { + "epoch": 272.5, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2180 + }, + { + "epoch": 272.99, + "learning_rate": 0.0001, + "loss": 0.003, + "step": 2184 + }, + { + "epoch": 273.5, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2188 + }, + { + "epoch": 273.99, + "learning_rate": 0.0001, + "loss": 0.0028, + "step": 2192 + }, + { + "epoch": 274.5, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2196 + }, + { + "epoch": 274.99, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2200 + }, + { + "epoch": 275.5, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2204 + }, + { + "epoch": 275.99, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2208 + }, + { + "epoch": 276.5, + "learning_rate": 0.0001, + "loss": 0.0028, + "step": 2212 + }, + { + "epoch": 276.99, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2216 + }, + { + "epoch": 277.5, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2220 + }, + { + "epoch": 277.99, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2224 + }, + { + "epoch": 278.5, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2228 + }, + { + "epoch": 278.99, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2232 + }, + { + "epoch": 279.5, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2236 + }, + { + "epoch": 279.99, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2240 + }, + { + "epoch": 279.99, + "eval_exact_match": 0.4369414101290963, + "eval_exec": 0.48063555114200596, + "eval_loss": 0.5520691871643066, + "eval_runtime": 204.4424, + "eval_samples_per_second": 6.359, + "step": 2240 + }, + { + "epoch": 280.5, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2244 + }, + { + "epoch": 280.99, + "learning_rate": 0.0001, + "loss": 0.003, + "step": 2248 + }, + { + "epoch": 281.5, + "learning_rate": 0.0001, + "loss": 0.003, + "step": 2252 + }, + { + "epoch": 281.99, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2256 + }, + { + "epoch": 282.5, + "learning_rate": 0.0001, + "loss": 0.0028, + "step": 2260 + }, + { + "epoch": 282.99, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2264 + }, + { + "epoch": 283.5, + "learning_rate": 0.0001, + "loss": 0.0028, + "step": 2268 + }, + { + "epoch": 283.99, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2272 + }, + { + "epoch": 284.5, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2276 + }, + { + "epoch": 284.99, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2280 + }, + { + "epoch": 285.5, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2284 + }, + { + "epoch": 285.99, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2288 + }, + { + "epoch": 286.5, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2292 + }, + { + "epoch": 286.99, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2296 + }, + { + "epoch": 287.5, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2300 + }, + { + "epoch": 287.99, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2304 + }, + { + "epoch": 287.99, + "eval_exact_match": 0.4438927507447865, + "eval_exec": 0.48659384309831183, + "eval_loss": 0.5534113645553589, + "eval_runtime": 202.8208, + "eval_samples_per_second": 6.41, + "step": 2304 + }, + { + "epoch": 288.5, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2308 + }, + { + "epoch": 288.99, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2312 + }, + { + "epoch": 289.5, + "learning_rate": 0.0001, + "loss": 0.0027, + "step": 2316 + }, + { + "epoch": 289.99, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2320 + }, + { + "epoch": 290.5, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2324 + }, + { + "epoch": 290.99, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2328 + }, + { + "epoch": 291.5, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2332 + }, + { + "epoch": 291.99, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2336 + }, + { + "epoch": 292.5, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2340 + }, + { + "epoch": 292.99, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2344 + }, + { + "epoch": 293.5, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2348 + }, + { + "epoch": 293.99, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2352 + }, + { + "epoch": 294.5, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2356 + }, + { + "epoch": 294.99, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2360 + }, + { + "epoch": 295.5, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2364 + }, + { + "epoch": 295.99, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2368 + }, + { + "epoch": 295.99, + "eval_exact_match": 0.4428997020854022, + "eval_exec": 0.4816285998013903, + "eval_loss": 0.557854413986206, + "eval_runtime": 203.9919, + "eval_samples_per_second": 6.373, + "step": 2368 + }, + { + "epoch": 296.5, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2372 + }, + { + "epoch": 296.99, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2376 + }, + { + "epoch": 297.5, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2380 + }, + { + "epoch": 297.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2384 + }, + { + "epoch": 298.5, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2388 + }, + { + "epoch": 298.99, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2392 + }, + { + "epoch": 299.5, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2396 + }, + { + "epoch": 299.99, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2400 + }, + { + "epoch": 300.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2404 + }, + { + "epoch": 300.99, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2408 + }, + { + "epoch": 301.5, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2412 + }, + { + "epoch": 301.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2416 + }, + { + "epoch": 302.5, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2420 + }, + { + "epoch": 302.99, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2424 + }, + { + "epoch": 303.5, + "learning_rate": 0.0001, + "loss": 0.0028, + "step": 2428 + }, + { + "epoch": 303.99, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 2432 + }, + { + "epoch": 303.99, + "eval_exact_match": 0.44985104270109233, + "eval_exec": 0.49056603773584906, + "eval_loss": 0.5580935478210449, + "eval_runtime": 200.2593, + "eval_samples_per_second": 6.492, + "step": 2432 + }, + { + "epoch": 304.5, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2436 + }, + { + "epoch": 304.99, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2440 + }, + { + "epoch": 305.5, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2444 + }, + { + "epoch": 305.99, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2448 + }, + { + "epoch": 306.5, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2452 + }, + { + "epoch": 306.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2456 + }, + { + "epoch": 307.5, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2460 + }, + { + "epoch": 307.99, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2464 + }, + { + "epoch": 308.5, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2468 + }, + { + "epoch": 308.99, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2472 + }, + { + "epoch": 309.5, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2476 + }, + { + "epoch": 309.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2480 + }, + { + "epoch": 310.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2484 + }, + { + "epoch": 310.99, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2488 + }, + { + "epoch": 311.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2492 + }, + { + "epoch": 311.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2496 + }, + { + "epoch": 311.99, + "eval_exact_match": 0.4200595829195631, + "eval_exec": 0.47070506454816285, + "eval_loss": 0.560897946357727, + "eval_runtime": 202.502, + "eval_samples_per_second": 6.42, + "step": 2496 + }, + { + "epoch": 312.5, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2500 + }, + { + "epoch": 312.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2504 + }, + { + "epoch": 313.5, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2508 + }, + { + "epoch": 313.99, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2512 + }, + { + "epoch": 314.5, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2516 + }, + { + "epoch": 314.99, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 2520 + }, + { + "epoch": 315.5, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2524 + }, + { + "epoch": 315.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2528 + }, + { + "epoch": 316.5, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2532 + }, + { + "epoch": 316.99, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2536 + }, + { + "epoch": 317.5, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2540 + }, + { + "epoch": 317.99, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2544 + }, + { + "epoch": 318.5, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2548 + }, + { + "epoch": 318.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2552 + }, + { + "epoch": 319.5, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2556 + }, + { + "epoch": 319.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2560 + }, + { + "epoch": 319.99, + "eval_exact_match": 0.42800397219463754, + "eval_exec": 0.46971201588877853, + "eval_loss": 0.5524822473526001, + "eval_runtime": 204.4122, + "eval_samples_per_second": 6.36, + "step": 2560 + }, + { + "epoch": 320.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2564 + }, + { + "epoch": 320.99, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 2568 + }, + { + "epoch": 321.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2572 + }, + { + "epoch": 321.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2576 + }, + { + "epoch": 322.5, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2580 + }, + { + "epoch": 322.99, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2584 + }, + { + "epoch": 323.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2588 + }, + { + "epoch": 323.99, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2592 + }, + { + "epoch": 324.5, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2596 + }, + { + "epoch": 324.99, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2600 + }, + { + "epoch": 325.5, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2604 + }, + { + "epoch": 325.99, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2608 + }, + { + "epoch": 326.5, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2612 + }, + { + "epoch": 326.99, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2616 + }, + { + "epoch": 327.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2620 + }, + { + "epoch": 327.99, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2624 + }, + { + "epoch": 327.99, + "eval_exact_match": 0.4260178748758689, + "eval_exec": 0.48361469712015887, + "eval_loss": 0.5746508240699768, + "eval_runtime": 194.402, + "eval_samples_per_second": 6.687, + "step": 2624 + }, + { + "epoch": 328.5, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2628 + }, + { + "epoch": 328.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2632 + }, + { + "epoch": 329.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2636 + }, + { + "epoch": 329.99, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2640 + }, + { + "epoch": 330.5, + "learning_rate": 0.0001, + "loss": 0.0025, + "step": 2644 + }, + { + "epoch": 330.99, + "learning_rate": 0.0001, + "loss": 0.003, + "step": 2648 + }, + { + "epoch": 331.5, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2652 + }, + { + "epoch": 331.99, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2656 + }, + { + "epoch": 332.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2660 + }, + { + "epoch": 332.99, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2664 + }, + { + "epoch": 333.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2668 + }, + { + "epoch": 333.99, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2672 + }, + { + "epoch": 334.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2676 + }, + { + "epoch": 334.99, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2680 + }, + { + "epoch": 335.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2684 + }, + { + "epoch": 335.99, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2688 + }, + { + "epoch": 335.99, + "eval_exact_match": 0.4369414101290963, + "eval_exec": 0.4786494538232373, + "eval_loss": 0.5734978914260864, + "eval_runtime": 199.4394, + "eval_samples_per_second": 6.518, + "step": 2688 + }, + { + "epoch": 336.5, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2692 + }, + { + "epoch": 336.99, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2696 + }, + { + "epoch": 337.5, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2700 + }, + { + "epoch": 337.99, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2704 + }, + { + "epoch": 338.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2708 + }, + { + "epoch": 338.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2712 + }, + { + "epoch": 339.5, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2716 + }, + { + "epoch": 339.99, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2720 + }, + { + "epoch": 340.5, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2724 + }, + { + "epoch": 340.99, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2728 + }, + { + "epoch": 341.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2732 + }, + { + "epoch": 341.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 2736 + }, + { + "epoch": 342.5, + "learning_rate": 0.0001, + "loss": 0.0034, + "step": 2740 + }, + { + "epoch": 342.99, + "learning_rate": 0.0001, + "loss": 0.0041, + "step": 2744 + }, + { + "epoch": 343.5, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2748 + }, + { + "epoch": 343.99, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2752 + }, + { + "epoch": 343.99, + "eval_exact_match": 0.4369414101290963, + "eval_exec": 0.48063555114200596, + "eval_loss": 0.5549472570419312, + "eval_runtime": 209.2028, + "eval_samples_per_second": 6.214, + "step": 2752 + }, + { + "epoch": 344.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2756 + }, + { + "epoch": 344.99, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2760 + }, + { + "epoch": 345.5, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2764 + }, + { + "epoch": 345.99, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2768 + }, + { + "epoch": 346.5, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 2772 + }, + { + "epoch": 346.99, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2776 + }, + { + "epoch": 347.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2780 + }, + { + "epoch": 347.99, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2784 + }, + { + "epoch": 348.5, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2788 + }, + { + "epoch": 348.99, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2792 + }, + { + "epoch": 349.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2796 + }, + { + "epoch": 349.99, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2800 + }, + { + "epoch": 350.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2804 + }, + { + "epoch": 350.99, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2808 + }, + { + "epoch": 351.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 2812 + }, + { + "epoch": 351.99, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2816 + }, + { + "epoch": 351.99, + "eval_exact_match": 0.4448857994041708, + "eval_exec": 0.48758689175769615, + "eval_loss": 0.5706749558448792, + "eval_runtime": 208.8886, + "eval_samples_per_second": 6.223, + "step": 2816 + }, + { + "epoch": 352.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2820 + }, + { + "epoch": 352.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2824 + }, + { + "epoch": 353.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2828 + }, + { + "epoch": 353.99, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2832 + }, + { + "epoch": 354.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2836 + }, + { + "epoch": 354.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2840 + }, + { + "epoch": 355.5, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2844 + }, + { + "epoch": 355.99, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2848 + }, + { + "epoch": 356.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2852 + }, + { + "epoch": 356.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2856 + }, + { + "epoch": 357.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2860 + }, + { + "epoch": 357.99, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2864 + }, + { + "epoch": 358.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 2868 + }, + { + "epoch": 358.99, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2872 + }, + { + "epoch": 359.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2876 + }, + { + "epoch": 359.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2880 + }, + { + "epoch": 359.99, + "eval_exact_match": 0.4468718967229394, + "eval_exec": 0.48957298907646474, + "eval_loss": 0.5861152410507202, + "eval_runtime": 206.5997, + "eval_samples_per_second": 6.292, + "step": 2880 + }, + { + "epoch": 360.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2884 + }, + { + "epoch": 360.99, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2888 + }, + { + "epoch": 361.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2892 + }, + { + "epoch": 361.99, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2896 + }, + { + "epoch": 362.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2900 + }, + { + "epoch": 362.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2904 + }, + { + "epoch": 363.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 2908 + }, + { + "epoch": 363.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2912 + }, + { + "epoch": 364.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2916 + }, + { + "epoch": 364.99, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2920 + }, + { + "epoch": 365.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2924 + }, + { + "epoch": 365.99, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2928 + }, + { + "epoch": 366.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2932 + }, + { + "epoch": 366.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2936 + }, + { + "epoch": 367.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 2940 + }, + { + "epoch": 367.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2944 + }, + { + "epoch": 367.99, + "eval_exact_match": 0.44786494538232374, + "eval_exec": 0.4955312810327706, + "eval_loss": 0.5812374949455261, + "eval_runtime": 213.3063, + "eval_samples_per_second": 6.095, + "step": 2944 + }, + { + "epoch": 368.5, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2948 + }, + { + "epoch": 368.99, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 2952 + }, + { + "epoch": 369.5, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2956 + }, + { + "epoch": 369.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2960 + }, + { + "epoch": 370.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 2964 + }, + { + "epoch": 370.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 2968 + }, + { + "epoch": 371.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2972 + }, + { + "epoch": 371.99, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2976 + }, + { + "epoch": 372.5, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 2980 + }, + { + "epoch": 372.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2984 + }, + { + "epoch": 373.5, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 2988 + }, + { + "epoch": 373.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 2992 + }, + { + "epoch": 374.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 2996 + }, + { + "epoch": 374.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3000 + }, + { + "epoch": 375.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3004 + }, + { + "epoch": 375.99, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 3008 + }, + { + "epoch": 375.99, + "eval_exact_match": 0.4438927507447865, + "eval_exec": 0.48659384309831183, + "eval_loss": 0.5652831792831421, + "eval_runtime": 210.776, + "eval_samples_per_second": 6.168, + "step": 3008 + }, + { + "epoch": 376.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 3012 + }, + { + "epoch": 376.99, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 3016 + }, + { + "epoch": 377.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3020 + }, + { + "epoch": 377.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3024 + }, + { + "epoch": 378.5, + "learning_rate": 0.0001, + "loss": 0.0021, + "step": 3028 + }, + { + "epoch": 378.99, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 3032 + }, + { + "epoch": 379.5, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 3036 + }, + { + "epoch": 379.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3040 + }, + { + "epoch": 380.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3044 + }, + { + "epoch": 380.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3048 + }, + { + "epoch": 381.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3052 + }, + { + "epoch": 381.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3056 + }, + { + "epoch": 382.5, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 3060 + }, + { + "epoch": 382.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3064 + }, + { + "epoch": 383.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3068 + }, + { + "epoch": 383.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3072 + }, + { + "epoch": 383.99, + "eval_exact_match": 0.44985104270109233, + "eval_exec": 0.4915590863952334, + "eval_loss": 0.5784198641777039, + "eval_runtime": 204.8823, + "eval_samples_per_second": 6.345, + "step": 3072 + }, + { + "epoch": 384.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3076 + }, + { + "epoch": 384.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3080 + }, + { + "epoch": 385.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3084 + }, + { + "epoch": 385.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3088 + }, + { + "epoch": 386.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3092 + }, + { + "epoch": 386.99, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 3096 + }, + { + "epoch": 387.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3100 + }, + { + "epoch": 387.99, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 3104 + }, + { + "epoch": 388.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3108 + }, + { + "epoch": 388.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3112 + }, + { + "epoch": 389.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3116 + }, + { + "epoch": 389.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3120 + }, + { + "epoch": 390.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3124 + }, + { + "epoch": 390.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3128 + }, + { + "epoch": 391.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3132 + }, + { + "epoch": 391.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3136 + }, + { + "epoch": 391.99, + "eval_exact_match": 0.4528301886792453, + "eval_exec": 0.4925521350546177, + "eval_loss": 0.5775428414344788, + "eval_runtime": 211.2569, + "eval_samples_per_second": 6.154, + "step": 3136 + }, + { + "epoch": 392.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3140 + }, + { + "epoch": 392.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3144 + }, + { + "epoch": 393.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3148 + }, + { + "epoch": 393.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3152 + }, + { + "epoch": 394.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3156 + }, + { + "epoch": 394.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3160 + }, + { + "epoch": 395.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3164 + }, + { + "epoch": 395.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3168 + }, + { + "epoch": 396.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3172 + }, + { + "epoch": 396.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3176 + }, + { + "epoch": 397.5, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 3180 + }, + { + "epoch": 397.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3184 + }, + { + "epoch": 398.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3188 + }, + { + "epoch": 398.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3192 + }, + { + "epoch": 399.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3196 + }, + { + "epoch": 399.99, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 3200 + }, + { + "epoch": 399.99, + "eval_exact_match": 0.4538232373386296, + "eval_exec": 0.49751737835153925, + "eval_loss": 0.5791714191436768, + "eval_runtime": 202.6045, + "eval_samples_per_second": 6.416, + "step": 3200 + }, + { + "epoch": 400.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3204 + }, + { + "epoch": 400.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3208 + }, + { + "epoch": 401.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3212 + }, + { + "epoch": 401.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3216 + }, + { + "epoch": 402.5, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3220 + }, + { + "epoch": 402.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3224 + }, + { + "epoch": 403.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3228 + }, + { + "epoch": 403.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3232 + }, + { + "epoch": 404.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3236 + }, + { + "epoch": 404.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3240 + }, + { + "epoch": 405.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3244 + }, + { + "epoch": 405.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3248 + }, + { + "epoch": 406.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3252 + }, + { + "epoch": 406.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3256 + }, + { + "epoch": 407.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3260 + }, + { + "epoch": 407.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3264 + }, + { + "epoch": 407.99, + "eval_exact_match": 0.44190665342601787, + "eval_exec": 0.48361469712015887, + "eval_loss": 0.58585524559021, + "eval_runtime": 208.6202, + "eval_samples_per_second": 6.231, + "step": 3264 + }, + { + "epoch": 408.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3268 + }, + { + "epoch": 408.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3272 + }, + { + "epoch": 409.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3276 + }, + { + "epoch": 409.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3280 + }, + { + "epoch": 410.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3284 + }, + { + "epoch": 410.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3288 + }, + { + "epoch": 411.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3292 + }, + { + "epoch": 411.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3296 + }, + { + "epoch": 412.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3300 + }, + { + "epoch": 412.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3304 + }, + { + "epoch": 413.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3308 + }, + { + "epoch": 413.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3312 + }, + { + "epoch": 414.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3316 + }, + { + "epoch": 414.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3320 + }, + { + "epoch": 415.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3324 + }, + { + "epoch": 415.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3328 + }, + { + "epoch": 415.99, + "eval_exact_match": 0.44786494538232374, + "eval_exec": 0.48361469712015887, + "eval_loss": 0.5857390761375427, + "eval_runtime": 199.7986, + "eval_samples_per_second": 6.507, + "step": 3328 + }, + { + "epoch": 416.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3332 + }, + { + "epoch": 416.99, + "learning_rate": 0.0001, + "loss": 0.0023, + "step": 3336 + }, + { + "epoch": 417.5, + "learning_rate": 0.0001, + "loss": 0.0024, + "step": 3340 + }, + { + "epoch": 417.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3344 + }, + { + "epoch": 418.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3348 + }, + { + "epoch": 418.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3352 + }, + { + "epoch": 419.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3356 + }, + { + "epoch": 419.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3360 + }, + { + "epoch": 420.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3364 + }, + { + "epoch": 420.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3368 + }, + { + "epoch": 421.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3372 + }, + { + "epoch": 421.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3376 + }, + { + "epoch": 422.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3380 + }, + { + "epoch": 422.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3384 + }, + { + "epoch": 423.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3388 + }, + { + "epoch": 423.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3392 + }, + { + "epoch": 423.99, + "eval_exact_match": 0.4468718967229394, + "eval_exec": 0.48758689175769615, + "eval_loss": 0.5896801948547363, + "eval_runtime": 211.8567, + "eval_samples_per_second": 6.136, + "step": 3392 + }, + { + "epoch": 424.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3396 + }, + { + "epoch": 424.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3400 + }, + { + "epoch": 425.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3404 + }, + { + "epoch": 425.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3408 + }, + { + "epoch": 426.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3412 + }, + { + "epoch": 426.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3416 + }, + { + "epoch": 427.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3420 + }, + { + "epoch": 427.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3424 + }, + { + "epoch": 428.5, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3428 + }, + { + "epoch": 428.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3432 + }, + { + "epoch": 429.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3436 + }, + { + "epoch": 429.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3440 + }, + { + "epoch": 430.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3444 + }, + { + "epoch": 430.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3448 + }, + { + "epoch": 431.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3452 + }, + { + "epoch": 431.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3456 + }, + { + "epoch": 431.99, + "eval_exact_match": 0.44885799404170806, + "eval_exec": 0.4846077457795432, + "eval_loss": 0.5921575427055359, + "eval_runtime": 197.9512, + "eval_samples_per_second": 6.567, + "step": 3456 + }, + { + "epoch": 432.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3460 + }, + { + "epoch": 432.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3464 + }, + { + "epoch": 433.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3468 + }, + { + "epoch": 433.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3472 + }, + { + "epoch": 434.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3476 + }, + { + "epoch": 434.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3480 + }, + { + "epoch": 435.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3484 + }, + { + "epoch": 435.99, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 3488 + }, + { + "epoch": 436.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3492 + }, + { + "epoch": 436.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3496 + }, + { + "epoch": 437.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3500 + }, + { + "epoch": 437.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3504 + }, + { + "epoch": 438.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3508 + }, + { + "epoch": 438.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3512 + }, + { + "epoch": 439.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3516 + }, + { + "epoch": 439.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3520 + }, + { + "epoch": 439.99, + "eval_exact_match": 0.4428997020854022, + "eval_exec": 0.49056603773584906, + "eval_loss": 0.5778002142906189, + "eval_runtime": 200.4589, + "eval_samples_per_second": 6.485, + "step": 3520 + }, + { + "epoch": 440.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3524 + }, + { + "epoch": 440.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3528 + }, + { + "epoch": 441.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3532 + }, + { + "epoch": 441.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3536 + }, + { + "epoch": 442.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3540 + }, + { + "epoch": 442.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3544 + }, + { + "epoch": 443.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3548 + }, + { + "epoch": 443.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3552 + }, + { + "epoch": 444.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3556 + }, + { + "epoch": 444.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3560 + }, + { + "epoch": 445.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3564 + }, + { + "epoch": 445.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3568 + }, + { + "epoch": 446.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3572 + }, + { + "epoch": 446.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3576 + }, + { + "epoch": 447.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3580 + }, + { + "epoch": 447.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3584 + }, + { + "epoch": 447.99, + "eval_exact_match": 0.4438927507447865, + "eval_exec": 0.47765640516385305, + "eval_loss": 0.584464967250824, + "eval_runtime": 195.0196, + "eval_samples_per_second": 6.666, + "step": 3584 + }, + { + "epoch": 448.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3588 + }, + { + "epoch": 448.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3592 + }, + { + "epoch": 449.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3596 + }, + { + "epoch": 449.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3600 + }, + { + "epoch": 450.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3604 + }, + { + "epoch": 450.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3608 + }, + { + "epoch": 451.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3612 + }, + { + "epoch": 451.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3616 + }, + { + "epoch": 452.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3620 + }, + { + "epoch": 452.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3624 + }, + { + "epoch": 453.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3628 + }, + { + "epoch": 453.99, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 3632 + }, + { + "epoch": 454.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3636 + }, + { + "epoch": 454.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3640 + }, + { + "epoch": 455.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3644 + }, + { + "epoch": 455.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3648 + }, + { + "epoch": 455.99, + "eval_exact_match": 0.4438927507447865, + "eval_exec": 0.4846077457795432, + "eval_loss": 0.6006260514259338, + "eval_runtime": 207.0142, + "eval_samples_per_second": 6.28, + "step": 3648 + }, + { + "epoch": 456.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3652 + }, + { + "epoch": 456.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3656 + }, + { + "epoch": 457.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3660 + }, + { + "epoch": 457.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3664 + }, + { + "epoch": 458.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3668 + }, + { + "epoch": 458.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3672 + }, + { + "epoch": 459.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3676 + }, + { + "epoch": 459.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3680 + }, + { + "epoch": 460.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3684 + }, + { + "epoch": 460.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3688 + }, + { + "epoch": 461.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3692 + }, + { + "epoch": 461.99, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3696 + }, + { + "epoch": 462.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3700 + }, + { + "epoch": 462.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3704 + }, + { + "epoch": 463.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3708 + }, + { + "epoch": 463.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3712 + }, + { + "epoch": 463.99, + "eval_exact_match": 0.4448857994041708, + "eval_exec": 0.47765640516385305, + "eval_loss": 0.6055679321289062, + "eval_runtime": 203.8538, + "eval_samples_per_second": 6.377, + "step": 3712 + }, + { + "epoch": 464.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3716 + }, + { + "epoch": 464.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3720 + }, + { + "epoch": 465.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3724 + }, + { + "epoch": 465.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3728 + }, + { + "epoch": 466.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3732 + }, + { + "epoch": 466.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3736 + }, + { + "epoch": 467.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3740 + }, + { + "epoch": 467.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3744 + }, + { + "epoch": 468.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3748 + }, + { + "epoch": 468.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3752 + }, + { + "epoch": 469.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3756 + }, + { + "epoch": 469.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3760 + }, + { + "epoch": 470.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3764 + }, + { + "epoch": 470.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3768 + }, + { + "epoch": 471.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3772 + }, + { + "epoch": 471.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3776 + }, + { + "epoch": 471.99, + "eval_exact_match": 0.44190665342601787, + "eval_exec": 0.47368421052631576, + "eval_loss": 0.6094422340393066, + "eval_runtime": 200.6337, + "eval_samples_per_second": 6.479, + "step": 3776 + }, + { + "epoch": 472.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3780 + }, + { + "epoch": 472.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 3784 + }, + { + "epoch": 473.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3788 + }, + { + "epoch": 473.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3792 + }, + { + "epoch": 474.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3796 + }, + { + "epoch": 474.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3800 + }, + { + "epoch": 475.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 3804 + }, + { + "epoch": 475.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3808 + }, + { + "epoch": 476.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3812 + }, + { + "epoch": 476.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3816 + }, + { + "epoch": 477.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3820 + }, + { + "epoch": 477.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3824 + }, + { + "epoch": 478.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3828 + }, + { + "epoch": 478.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3832 + }, + { + "epoch": 479.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3836 + }, + { + "epoch": 479.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3840 + }, + { + "epoch": 479.99, + "eval_exact_match": 0.4458788480635551, + "eval_exec": 0.49354518371400197, + "eval_loss": 0.6145819425582886, + "eval_runtime": 190.4305, + "eval_samples_per_second": 6.827, + "step": 3840 + }, + { + "epoch": 480.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3844 + }, + { + "epoch": 480.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3848 + }, + { + "epoch": 481.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3852 + }, + { + "epoch": 481.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3856 + }, + { + "epoch": 482.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3860 + }, + { + "epoch": 482.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3864 + }, + { + "epoch": 483.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3868 + }, + { + "epoch": 483.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3872 + }, + { + "epoch": 484.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3876 + }, + { + "epoch": 484.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 3880 + }, + { + "epoch": 485.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3884 + }, + { + "epoch": 485.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3888 + }, + { + "epoch": 486.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3892 + }, + { + "epoch": 486.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 3896 + }, + { + "epoch": 487.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3900 + }, + { + "epoch": 487.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3904 + }, + { + "epoch": 487.99, + "eval_exact_match": 0.44190665342601787, + "eval_exec": 0.4846077457795432, + "eval_loss": 0.6196692585945129, + "eval_runtime": 197.0844, + "eval_samples_per_second": 6.596, + "step": 3904 + }, + { + "epoch": 488.5, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 3908 + }, + { + "epoch": 488.99, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3912 + }, + { + "epoch": 489.5, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 3916 + }, + { + "epoch": 489.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3920 + }, + { + "epoch": 490.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3924 + }, + { + "epoch": 490.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3928 + }, + { + "epoch": 491.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3932 + }, + { + "epoch": 491.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3936 + }, + { + "epoch": 492.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3940 + }, + { + "epoch": 492.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3944 + }, + { + "epoch": 493.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3948 + }, + { + "epoch": 493.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3952 + }, + { + "epoch": 494.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3956 + }, + { + "epoch": 494.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3960 + }, + { + "epoch": 495.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3964 + }, + { + "epoch": 495.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3968 + }, + { + "epoch": 495.99, + "eval_exact_match": 0.4339622641509434, + "eval_exec": 0.4756703078450844, + "eval_loss": 0.6161912679672241, + "eval_runtime": 201.1763, + "eval_samples_per_second": 6.462, + "step": 3968 + }, + { + "epoch": 496.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3972 + }, + { + "epoch": 496.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3976 + }, + { + "epoch": 497.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 3980 + }, + { + "epoch": 497.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 3984 + }, + { + "epoch": 498.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3988 + }, + { + "epoch": 498.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 3992 + }, + { + "epoch": 499.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 3996 + }, + { + "epoch": 499.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4000 + }, + { + "epoch": 500.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4004 + }, + { + "epoch": 500.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4008 + }, + { + "epoch": 501.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4012 + }, + { + "epoch": 501.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4016 + }, + { + "epoch": 502.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 4020 + }, + { + "epoch": 502.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4024 + }, + { + "epoch": 503.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4028 + }, + { + "epoch": 503.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4032 + }, + { + "epoch": 503.99, + "eval_exact_match": 0.4399205561072492, + "eval_exec": 0.4846077457795432, + "eval_loss": 0.6102380156517029, + "eval_runtime": 199.2958, + "eval_samples_per_second": 6.523, + "step": 4032 + }, + { + "epoch": 504.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4036 + }, + { + "epoch": 504.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4040 + }, + { + "epoch": 505.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4044 + }, + { + "epoch": 505.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4048 + }, + { + "epoch": 506.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4052 + }, + { + "epoch": 506.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4056 + }, + { + "epoch": 507.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4060 + }, + { + "epoch": 507.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4064 + }, + { + "epoch": 508.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4068 + }, + { + "epoch": 508.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4072 + }, + { + "epoch": 509.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4076 + }, + { + "epoch": 509.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4080 + }, + { + "epoch": 510.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4084 + }, + { + "epoch": 510.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4088 + }, + { + "epoch": 511.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4092 + }, + { + "epoch": 511.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4096 + }, + { + "epoch": 511.99, + "eval_exact_match": 0.4399205561072492, + "eval_exec": 0.48659384309831183, + "eval_loss": 0.619750440120697, + "eval_runtime": 211.0292, + "eval_samples_per_second": 6.16, + "step": 4096 + }, + { + "epoch": 512.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4100 + }, + { + "epoch": 512.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4104 + }, + { + "epoch": 513.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4108 + }, + { + "epoch": 513.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4112 + }, + { + "epoch": 514.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4116 + }, + { + "epoch": 514.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4120 + }, + { + "epoch": 515.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4124 + }, + { + "epoch": 515.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4128 + }, + { + "epoch": 516.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4132 + }, + { + "epoch": 516.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4136 + }, + { + "epoch": 517.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4140 + }, + { + "epoch": 517.99, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 4144 + }, + { + "epoch": 518.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4148 + }, + { + "epoch": 518.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4152 + }, + { + "epoch": 519.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4156 + }, + { + "epoch": 519.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4160 + }, + { + "epoch": 519.99, + "eval_exact_match": 0.4428997020854022, + "eval_exec": 0.4856007944389275, + "eval_loss": 0.6127471327781677, + "eval_runtime": 203.9094, + "eval_samples_per_second": 6.375, + "step": 4160 + }, + { + "epoch": 520.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4164 + }, + { + "epoch": 520.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4168 + }, + { + "epoch": 521.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4172 + }, + { + "epoch": 521.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4176 + }, + { + "epoch": 522.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4180 + }, + { + "epoch": 522.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4184 + }, + { + "epoch": 523.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4188 + }, + { + "epoch": 523.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4192 + }, + { + "epoch": 524.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4196 + }, + { + "epoch": 524.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4200 + }, + { + "epoch": 525.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4204 + }, + { + "epoch": 525.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4208 + }, + { + "epoch": 526.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4212 + }, + { + "epoch": 526.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4216 + }, + { + "epoch": 527.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4220 + }, + { + "epoch": 527.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4224 + }, + { + "epoch": 527.99, + "eval_exact_match": 0.4458788480635551, + "eval_exec": 0.48758689175769615, + "eval_loss": 0.6248003244400024, + "eval_runtime": 204.1177, + "eval_samples_per_second": 6.369, + "step": 4224 + }, + { + "epoch": 528.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4228 + }, + { + "epoch": 528.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4232 + }, + { + "epoch": 529.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4236 + }, + { + "epoch": 529.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4240 + }, + { + "epoch": 530.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4244 + }, + { + "epoch": 530.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4248 + }, + { + "epoch": 531.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4252 + }, + { + "epoch": 531.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4256 + }, + { + "epoch": 532.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4260 + }, + { + "epoch": 532.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4264 + }, + { + "epoch": 533.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4268 + }, + { + "epoch": 533.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4272 + }, + { + "epoch": 534.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4276 + }, + { + "epoch": 534.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4280 + }, + { + "epoch": 535.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4284 + }, + { + "epoch": 535.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4288 + }, + { + "epoch": 535.99, + "eval_exact_match": 0.4468718967229394, + "eval_exec": 0.48063555114200596, + "eval_loss": 0.6122114658355713, + "eval_runtime": 196.9407, + "eval_samples_per_second": 6.601, + "step": 4288 + }, + { + "epoch": 536.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4292 + }, + { + "epoch": 536.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4296 + }, + { + "epoch": 537.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4300 + }, + { + "epoch": 537.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4304 + }, + { + "epoch": 538.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 4308 + }, + { + "epoch": 538.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4312 + }, + { + "epoch": 539.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4316 + }, + { + "epoch": 539.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4320 + }, + { + "epoch": 540.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4324 + }, + { + "epoch": 540.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4328 + }, + { + "epoch": 541.5, + "learning_rate": 0.0001, + "loss": 0.0013, + "step": 4332 + }, + { + "epoch": 541.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4336 + }, + { + "epoch": 542.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4340 + }, + { + "epoch": 542.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4344 + }, + { + "epoch": 543.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4348 + }, + { + "epoch": 543.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4352 + }, + { + "epoch": 543.99, + "eval_exact_match": 0.43495531281032773, + "eval_exec": 0.4816285998013903, + "eval_loss": 0.605417013168335, + "eval_runtime": 200.1247, + "eval_samples_per_second": 6.496, + "step": 4352 + }, + { + "epoch": 544.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4356 + }, + { + "epoch": 544.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4360 + }, + { + "epoch": 545.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4364 + }, + { + "epoch": 545.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4368 + }, + { + "epoch": 546.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4372 + }, + { + "epoch": 546.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4376 + }, + { + "epoch": 547.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4380 + }, + { + "epoch": 547.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4384 + }, + { + "epoch": 548.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4388 + }, + { + "epoch": 548.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4392 + }, + { + "epoch": 549.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4396 + }, + { + "epoch": 549.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4400 + }, + { + "epoch": 550.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4404 + }, + { + "epoch": 550.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4408 + }, + { + "epoch": 551.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4412 + }, + { + "epoch": 551.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4416 + }, + { + "epoch": 551.99, + "eval_exact_match": 0.4369414101290963, + "eval_exec": 0.4746772591857001, + "eval_loss": 0.6194772124290466, + "eval_runtime": 195.0605, + "eval_samples_per_second": 6.665, + "step": 4416 + }, + { + "epoch": 552.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4420 + }, + { + "epoch": 552.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4424 + }, + { + "epoch": 553.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4428 + }, + { + "epoch": 553.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4432 + }, + { + "epoch": 554.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4436 + }, + { + "epoch": 554.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4440 + }, + { + "epoch": 555.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4444 + }, + { + "epoch": 555.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4448 + }, + { + "epoch": 556.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4452 + }, + { + "epoch": 556.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4456 + }, + { + "epoch": 557.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4460 + }, + { + "epoch": 557.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4464 + }, + { + "epoch": 558.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4468 + }, + { + "epoch": 558.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4472 + }, + { + "epoch": 559.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4476 + }, + { + "epoch": 559.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4480 + }, + { + "epoch": 559.99, + "eval_exact_match": 0.44885799404170806, + "eval_exec": 0.4925521350546177, + "eval_loss": 0.6179357767105103, + "eval_runtime": 194.9028, + "eval_samples_per_second": 6.67, + "step": 4480 + }, + { + "epoch": 560.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4484 + }, + { + "epoch": 560.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4488 + }, + { + "epoch": 561.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4492 + }, + { + "epoch": 561.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4496 + }, + { + "epoch": 562.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4500 + }, + { + "epoch": 562.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4504 + }, + { + "epoch": 563.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4508 + }, + { + "epoch": 563.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4512 + }, + { + "epoch": 564.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4516 + }, + { + "epoch": 564.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4520 + }, + { + "epoch": 565.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4524 + }, + { + "epoch": 565.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4528 + }, + { + "epoch": 566.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4532 + }, + { + "epoch": 566.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4536 + }, + { + "epoch": 567.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4540 + }, + { + "epoch": 567.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4544 + }, + { + "epoch": 567.99, + "eval_exact_match": 0.45084409136047665, + "eval_exec": 0.4915590863952334, + "eval_loss": 0.6036345958709717, + "eval_runtime": 196.9122, + "eval_samples_per_second": 6.602, + "step": 4544 + }, + { + "epoch": 568.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4548 + }, + { + "epoch": 568.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4552 + }, + { + "epoch": 569.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4556 + }, + { + "epoch": 569.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4560 + }, + { + "epoch": 570.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4564 + }, + { + "epoch": 570.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4568 + }, + { + "epoch": 571.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4572 + }, + { + "epoch": 571.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4576 + }, + { + "epoch": 572.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4580 + }, + { + "epoch": 572.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4584 + }, + { + "epoch": 573.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4588 + }, + { + "epoch": 573.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4592 + }, + { + "epoch": 574.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4596 + }, + { + "epoch": 574.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4600 + }, + { + "epoch": 575.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4604 + }, + { + "epoch": 575.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4608 + }, + { + "epoch": 575.99, + "eval_exact_match": 0.45183714001986097, + "eval_exec": 0.49354518371400197, + "eval_loss": 0.6121585369110107, + "eval_runtime": 216.7301, + "eval_samples_per_second": 5.998, + "step": 4608 + }, + { + "epoch": 576.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4612 + }, + { + "epoch": 576.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4616 + }, + { + "epoch": 577.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4620 + }, + { + "epoch": 577.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4624 + }, + { + "epoch": 578.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4628 + }, + { + "epoch": 578.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4632 + }, + { + "epoch": 579.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4636 + }, + { + "epoch": 579.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4640 + }, + { + "epoch": 580.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4644 + }, + { + "epoch": 580.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4648 + }, + { + "epoch": 581.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4652 + }, + { + "epoch": 581.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4656 + }, + { + "epoch": 582.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4660 + }, + { + "epoch": 582.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4664 + }, + { + "epoch": 583.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4668 + }, + { + "epoch": 583.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4672 + }, + { + "epoch": 583.99, + "eval_exact_match": 0.4438927507447865, + "eval_exec": 0.48063555114200596, + "eval_loss": 0.6172407269477844, + "eval_runtime": 196.291, + "eval_samples_per_second": 6.623, + "step": 4672 + }, + { + "epoch": 584.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4676 + }, + { + "epoch": 584.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4680 + }, + { + "epoch": 585.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4684 + }, + { + "epoch": 585.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4688 + }, + { + "epoch": 586.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4692 + }, + { + "epoch": 586.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4696 + }, + { + "epoch": 587.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4700 + }, + { + "epoch": 587.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4704 + }, + { + "epoch": 588.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4708 + }, + { + "epoch": 588.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4712 + }, + { + "epoch": 589.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4716 + }, + { + "epoch": 589.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4720 + }, + { + "epoch": 590.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4724 + }, + { + "epoch": 590.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4728 + }, + { + "epoch": 591.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4732 + }, + { + "epoch": 591.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4736 + }, + { + "epoch": 591.99, + "eval_exact_match": 0.43793445878848064, + "eval_exec": 0.4766633565044687, + "eval_loss": 0.6108298301696777, + "eval_runtime": 203.2897, + "eval_samples_per_second": 6.395, + "step": 4736 + }, + { + "epoch": 592.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4740 + }, + { + "epoch": 592.99, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4744 + }, + { + "epoch": 593.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4748 + }, + { + "epoch": 593.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4752 + }, + { + "epoch": 594.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 4756 + }, + { + "epoch": 594.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4760 + }, + { + "epoch": 595.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4764 + }, + { + "epoch": 595.99, + "learning_rate": 0.0001, + "loss": 0.0022, + "step": 4768 + }, + { + "epoch": 596.5, + "learning_rate": 0.0001, + "loss": 0.0012, + "step": 4772 + }, + { + "epoch": 596.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4776 + }, + { + "epoch": 597.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4780 + }, + { + "epoch": 597.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4784 + }, + { + "epoch": 598.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4788 + }, + { + "epoch": 598.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4792 + }, + { + "epoch": 599.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4796 + }, + { + "epoch": 599.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4800 + }, + { + "epoch": 599.99, + "eval_exact_match": 0.4369414101290963, + "eval_exec": 0.4746772591857001, + "eval_loss": 0.6420004367828369, + "eval_runtime": 195.6774, + "eval_samples_per_second": 6.644, + "step": 4800 + }, + { + "epoch": 600.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4804 + }, + { + "epoch": 600.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4808 + }, + { + "epoch": 601.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4812 + }, + { + "epoch": 601.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4816 + }, + { + "epoch": 602.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4820 + }, + { + "epoch": 602.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4824 + }, + { + "epoch": 603.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4828 + }, + { + "epoch": 603.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4832 + }, + { + "epoch": 604.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4836 + }, + { + "epoch": 604.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4840 + }, + { + "epoch": 605.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4844 + }, + { + "epoch": 605.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4848 + }, + { + "epoch": 606.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4852 + }, + { + "epoch": 606.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4856 + }, + { + "epoch": 607.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4860 + }, + { + "epoch": 607.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4864 + }, + { + "epoch": 607.99, + "eval_exact_match": 0.4438927507447865, + "eval_exec": 0.4726911618669315, + "eval_loss": 0.6424113512039185, + "eval_runtime": 202.6801, + "eval_samples_per_second": 6.414, + "step": 4864 + }, + { + "epoch": 608.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4868 + }, + { + "epoch": 608.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4872 + }, + { + "epoch": 609.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4876 + }, + { + "epoch": 609.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4880 + }, + { + "epoch": 610.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 4884 + }, + { + "epoch": 610.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4888 + }, + { + "epoch": 611.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4892 + }, + { + "epoch": 611.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4896 + }, + { + "epoch": 612.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4900 + }, + { + "epoch": 612.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4904 + }, + { + "epoch": 613.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4908 + }, + { + "epoch": 613.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4912 + }, + { + "epoch": 614.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4916 + }, + { + "epoch": 614.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4920 + }, + { + "epoch": 615.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4924 + }, + { + "epoch": 615.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4928 + }, + { + "epoch": 615.99, + "eval_exact_match": 0.4339622641509434, + "eval_exec": 0.46871896722939427, + "eval_loss": 0.630379319190979, + "eval_runtime": 198.8917, + "eval_samples_per_second": 6.536, + "step": 4928 + }, + { + "epoch": 616.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4932 + }, + { + "epoch": 616.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4936 + }, + { + "epoch": 617.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4940 + }, + { + "epoch": 617.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4944 + }, + { + "epoch": 618.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4948 + }, + { + "epoch": 618.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4952 + }, + { + "epoch": 619.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4956 + }, + { + "epoch": 619.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4960 + }, + { + "epoch": 620.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4964 + }, + { + "epoch": 620.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4968 + }, + { + "epoch": 621.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 4972 + }, + { + "epoch": 621.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4976 + }, + { + "epoch": 622.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4980 + }, + { + "epoch": 622.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4984 + }, + { + "epoch": 623.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4988 + }, + { + "epoch": 623.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 4992 + }, + { + "epoch": 623.99, + "eval_exact_match": 0.44190665342601787, + "eval_exec": 0.48361469712015887, + "eval_loss": 0.6309102177619934, + "eval_runtime": 191.856, + "eval_samples_per_second": 6.776, + "step": 4992 + }, + { + "epoch": 624.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 4996 + }, + { + "epoch": 624.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5000 + }, + { + "epoch": 625.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 5004 + }, + { + "epoch": 625.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5008 + }, + { + "epoch": 626.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5012 + }, + { + "epoch": 626.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5016 + }, + { + "epoch": 627.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5020 + }, + { + "epoch": 627.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5024 + }, + { + "epoch": 628.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5028 + }, + { + "epoch": 628.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5032 + }, + { + "epoch": 629.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5036 + }, + { + "epoch": 629.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5040 + }, + { + "epoch": 630.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5044 + }, + { + "epoch": 630.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5048 + }, + { + "epoch": 631.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5052 + }, + { + "epoch": 631.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5056 + }, + { + "epoch": 631.99, + "eval_exact_match": 0.4369414101290963, + "eval_exec": 0.46772591857000995, + "eval_loss": 0.6289202570915222, + "eval_runtime": 198.0458, + "eval_samples_per_second": 6.564, + "step": 5056 + }, + { + "epoch": 632.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5060 + }, + { + "epoch": 632.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5064 + }, + { + "epoch": 633.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5068 + }, + { + "epoch": 633.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5072 + }, + { + "epoch": 634.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5076 + }, + { + "epoch": 634.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5080 + }, + { + "epoch": 635.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5084 + }, + { + "epoch": 635.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5088 + }, + { + "epoch": 636.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5092 + }, + { + "epoch": 636.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5096 + }, + { + "epoch": 637.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5100 + }, + { + "epoch": 637.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5104 + }, + { + "epoch": 638.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5108 + }, + { + "epoch": 638.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5112 + }, + { + "epoch": 639.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5116 + }, + { + "epoch": 639.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5120 + }, + { + "epoch": 639.99, + "eval_exact_match": 0.44985104270109233, + "eval_exec": 0.4885799404170804, + "eval_loss": 0.6289829015731812, + "eval_runtime": 208.2718, + "eval_samples_per_second": 6.242, + "step": 5120 + }, + { + "epoch": 640.5, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 5124 + }, + { + "epoch": 640.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5128 + }, + { + "epoch": 641.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5132 + }, + { + "epoch": 641.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5136 + }, + { + "epoch": 642.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5140 + }, + { + "epoch": 642.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5144 + }, + { + "epoch": 643.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5148 + }, + { + "epoch": 643.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5152 + }, + { + "epoch": 644.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5156 + }, + { + "epoch": 644.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5160 + }, + { + "epoch": 645.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5164 + }, + { + "epoch": 645.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5168 + }, + { + "epoch": 646.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5172 + }, + { + "epoch": 646.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5176 + }, + { + "epoch": 647.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5180 + }, + { + "epoch": 647.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5184 + }, + { + "epoch": 647.99, + "eval_exact_match": 0.45084409136047665, + "eval_exec": 0.4846077457795432, + "eval_loss": 0.6306817531585693, + "eval_runtime": 212.6718, + "eval_samples_per_second": 6.113, + "step": 5184 + }, + { + "epoch": 648.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5188 + }, + { + "epoch": 648.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5192 + }, + { + "epoch": 649.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5196 + }, + { + "epoch": 649.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5200 + }, + { + "epoch": 650.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5204 + }, + { + "epoch": 650.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5208 + }, + { + "epoch": 651.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5212 + }, + { + "epoch": 651.99, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 5216 + }, + { + "epoch": 652.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5220 + }, + { + "epoch": 652.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5224 + }, + { + "epoch": 653.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5228 + }, + { + "epoch": 653.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5232 + }, + { + "epoch": 654.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5236 + }, + { + "epoch": 654.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5240 + }, + { + "epoch": 655.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5244 + }, + { + "epoch": 655.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5248 + }, + { + "epoch": 655.99, + "eval_exact_match": 0.43495531281032773, + "eval_exec": 0.4746772591857001, + "eval_loss": 0.6379679441452026, + "eval_runtime": 203.8275, + "eval_samples_per_second": 6.378, + "step": 5248 + }, + { + "epoch": 656.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5252 + }, + { + "epoch": 656.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5256 + }, + { + "epoch": 657.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5260 + }, + { + "epoch": 657.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5264 + }, + { + "epoch": 658.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5268 + }, + { + "epoch": 658.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5272 + }, + { + "epoch": 659.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5276 + }, + { + "epoch": 659.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5280 + }, + { + "epoch": 660.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5284 + }, + { + "epoch": 660.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5288 + }, + { + "epoch": 661.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5292 + }, + { + "epoch": 661.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5296 + }, + { + "epoch": 662.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5300 + }, + { + "epoch": 662.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5304 + }, + { + "epoch": 663.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5308 + }, + { + "epoch": 663.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5312 + }, + { + "epoch": 663.99, + "eval_exact_match": 0.43793445878848064, + "eval_exec": 0.4766633565044687, + "eval_loss": 0.6149209141731262, + "eval_runtime": 205.2947, + "eval_samples_per_second": 6.332, + "step": 5312 + }, + { + "epoch": 664.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5316 + }, + { + "epoch": 664.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5320 + }, + { + "epoch": 665.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5324 + }, + { + "epoch": 665.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5328 + }, + { + "epoch": 666.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5332 + }, + { + "epoch": 666.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5336 + }, + { + "epoch": 667.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5340 + }, + { + "epoch": 667.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5344 + }, + { + "epoch": 668.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5348 + }, + { + "epoch": 668.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5352 + }, + { + "epoch": 669.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5356 + }, + { + "epoch": 669.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5360 + }, + { + "epoch": 670.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5364 + }, + { + "epoch": 670.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5368 + }, + { + "epoch": 671.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5372 + }, + { + "epoch": 671.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5376 + }, + { + "epoch": 671.99, + "eval_exact_match": 0.4458788480635551, + "eval_exec": 0.47765640516385305, + "eval_loss": 0.6341748237609863, + "eval_runtime": 200.2501, + "eval_samples_per_second": 6.492, + "step": 5376 + }, + { + "epoch": 672.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5380 + }, + { + "epoch": 672.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5384 + }, + { + "epoch": 673.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5388 + }, + { + "epoch": 673.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5392 + }, + { + "epoch": 674.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5396 + }, + { + "epoch": 674.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5400 + }, + { + "epoch": 675.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5404 + }, + { + "epoch": 675.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5408 + }, + { + "epoch": 676.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5412 + }, + { + "epoch": 676.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5416 + }, + { + "epoch": 677.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5420 + }, + { + "epoch": 677.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5424 + }, + { + "epoch": 678.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5428 + }, + { + "epoch": 678.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5432 + }, + { + "epoch": 679.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5436 + }, + { + "epoch": 679.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5440 + }, + { + "epoch": 679.99, + "eval_exact_match": 0.43892750744786496, + "eval_exec": 0.4766633565044687, + "eval_loss": 0.6424917578697205, + "eval_runtime": 214.8147, + "eval_samples_per_second": 6.052, + "step": 5440 + }, + { + "epoch": 680.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5444 + }, + { + "epoch": 680.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5448 + }, + { + "epoch": 681.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5452 + }, + { + "epoch": 681.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5456 + }, + { + "epoch": 682.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5460 + }, + { + "epoch": 682.99, + "learning_rate": 0.0001, + "loss": 0.0029, + "step": 5464 + }, + { + "epoch": 683.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5468 + }, + { + "epoch": 683.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5472 + }, + { + "epoch": 684.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5476 + }, + { + "epoch": 684.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5480 + }, + { + "epoch": 685.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5484 + }, + { + "epoch": 685.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5488 + }, + { + "epoch": 686.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5492 + }, + { + "epoch": 686.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5496 + }, + { + "epoch": 687.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5500 + }, + { + "epoch": 687.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5504 + }, + { + "epoch": 687.99, + "eval_exact_match": 0.44786494538232374, + "eval_exec": 0.4826216484607746, + "eval_loss": 0.6195693612098694, + "eval_runtime": 205.7504, + "eval_samples_per_second": 6.318, + "step": 5504 + }, + { + "epoch": 688.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5508 + }, + { + "epoch": 688.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5512 + }, + { + "epoch": 689.5, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5516 + }, + { + "epoch": 689.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5520 + }, + { + "epoch": 690.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5524 + }, + { + "epoch": 690.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5528 + }, + { + "epoch": 691.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5532 + }, + { + "epoch": 691.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5536 + }, + { + "epoch": 692.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5540 + }, + { + "epoch": 692.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5544 + }, + { + "epoch": 693.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5548 + }, + { + "epoch": 693.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5552 + }, + { + "epoch": 694.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5556 + }, + { + "epoch": 694.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5560 + }, + { + "epoch": 695.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5564 + }, + { + "epoch": 695.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5568 + }, + { + "epoch": 695.99, + "eval_exact_match": 0.4448857994041708, + "eval_exec": 0.48361469712015887, + "eval_loss": 0.6261533498764038, + "eval_runtime": 213.1489, + "eval_samples_per_second": 6.099, + "step": 5568 + }, + { + "epoch": 696.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5572 + }, + { + "epoch": 696.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5576 + }, + { + "epoch": 697.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5580 + }, + { + "epoch": 697.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5584 + }, + { + "epoch": 698.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5588 + }, + { + "epoch": 698.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5592 + }, + { + "epoch": 699.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5596 + }, + { + "epoch": 699.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5600 + }, + { + "epoch": 700.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5604 + }, + { + "epoch": 700.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5608 + }, + { + "epoch": 701.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5612 + }, + { + "epoch": 701.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5616 + }, + { + "epoch": 702.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5620 + }, + { + "epoch": 702.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5624 + }, + { + "epoch": 703.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5628 + }, + { + "epoch": 703.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5632 + }, + { + "epoch": 703.99, + "eval_exact_match": 0.43892750744786496, + "eval_exec": 0.4756703078450844, + "eval_loss": 0.6435733437538147, + "eval_runtime": 208.5122, + "eval_samples_per_second": 6.235, + "step": 5632 + }, + { + "epoch": 704.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5636 + }, + { + "epoch": 704.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5640 + }, + { + "epoch": 705.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5644 + }, + { + "epoch": 705.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5648 + }, + { + "epoch": 706.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5652 + }, + { + "epoch": 706.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5656 + }, + { + "epoch": 707.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5660 + }, + { + "epoch": 707.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5664 + }, + { + "epoch": 708.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5668 + }, + { + "epoch": 708.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5672 + }, + { + "epoch": 709.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5676 + }, + { + "epoch": 709.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5680 + }, + { + "epoch": 710.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5684 + }, + { + "epoch": 710.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5688 + }, + { + "epoch": 711.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5692 + }, + { + "epoch": 711.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5696 + }, + { + "epoch": 711.99, + "eval_exact_match": 0.44985104270109233, + "eval_exec": 0.48659384309831183, + "eval_loss": 0.6287506818771362, + "eval_runtime": 206.1374, + "eval_samples_per_second": 6.306, + "step": 5696 + }, + { + "epoch": 712.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5700 + }, + { + "epoch": 712.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5704 + }, + { + "epoch": 713.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5708 + }, + { + "epoch": 713.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5712 + }, + { + "epoch": 714.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5716 + }, + { + "epoch": 714.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5720 + }, + { + "epoch": 715.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5724 + }, + { + "epoch": 715.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5728 + }, + { + "epoch": 716.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5732 + }, + { + "epoch": 716.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5736 + }, + { + "epoch": 717.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5740 + }, + { + "epoch": 717.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5744 + }, + { + "epoch": 718.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5748 + }, + { + "epoch": 718.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5752 + }, + { + "epoch": 719.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5756 + }, + { + "epoch": 719.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5760 + }, + { + "epoch": 719.99, + "eval_exact_match": 0.4448857994041708, + "eval_exec": 0.4856007944389275, + "eval_loss": 0.641159176826477, + "eval_runtime": 206.6444, + "eval_samples_per_second": 6.291, + "step": 5760 + }, + { + "epoch": 720.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5764 + }, + { + "epoch": 720.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5768 + }, + { + "epoch": 721.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5772 + }, + { + "epoch": 721.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5776 + }, + { + "epoch": 722.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5780 + }, + { + "epoch": 722.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5784 + }, + { + "epoch": 723.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5788 + }, + { + "epoch": 723.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 5792 + }, + { + "epoch": 724.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5796 + }, + { + "epoch": 724.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5800 + }, + { + "epoch": 725.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5804 + }, + { + "epoch": 725.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5808 + }, + { + "epoch": 726.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5812 + }, + { + "epoch": 726.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5816 + }, + { + "epoch": 727.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5820 + }, + { + "epoch": 727.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5824 + }, + { + "epoch": 727.99, + "eval_exact_match": 0.4458788480635551, + "eval_exec": 0.48659384309831183, + "eval_loss": 0.647663414478302, + "eval_runtime": 211.128, + "eval_samples_per_second": 6.157, + "step": 5824 + }, + { + "epoch": 728.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5828 + }, + { + "epoch": 728.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5832 + }, + { + "epoch": 729.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5836 + }, + { + "epoch": 729.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5840 + }, + { + "epoch": 730.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5844 + }, + { + "epoch": 730.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5848 + }, + { + "epoch": 731.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 5852 + }, + { + "epoch": 731.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5856 + }, + { + "epoch": 732.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5860 + }, + { + "epoch": 732.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5864 + }, + { + "epoch": 733.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 5868 + }, + { + "epoch": 733.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5872 + }, + { + "epoch": 734.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5876 + }, + { + "epoch": 734.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5880 + }, + { + "epoch": 735.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5884 + }, + { + "epoch": 735.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5888 + }, + { + "epoch": 735.99, + "eval_exact_match": 0.4448857994041708, + "eval_exec": 0.4846077457795432, + "eval_loss": 0.6436724662780762, + "eval_runtime": 200.2646, + "eval_samples_per_second": 6.491, + "step": 5888 + }, + { + "epoch": 736.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5892 + }, + { + "epoch": 736.99, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5896 + }, + { + "epoch": 737.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5900 + }, + { + "epoch": 737.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5904 + }, + { + "epoch": 738.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5908 + }, + { + "epoch": 738.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5912 + }, + { + "epoch": 739.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5916 + }, + { + "epoch": 739.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5920 + }, + { + "epoch": 740.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5924 + }, + { + "epoch": 740.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5928 + }, + { + "epoch": 741.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5932 + }, + { + "epoch": 741.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5936 + }, + { + "epoch": 742.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 5940 + }, + { + "epoch": 742.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 5944 + }, + { + "epoch": 743.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 5948 + }, + { + "epoch": 743.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5952 + }, + { + "epoch": 743.99, + "eval_exact_match": 0.44786494538232374, + "eval_exec": 0.4826216484607746, + "eval_loss": 0.648475706577301, + "eval_runtime": 203.6491, + "eval_samples_per_second": 6.384, + "step": 5952 + }, + { + "epoch": 744.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5956 + }, + { + "epoch": 744.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5960 + }, + { + "epoch": 745.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5964 + }, + { + "epoch": 745.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 5968 + }, + { + "epoch": 746.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 5972 + }, + { + "epoch": 746.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5976 + }, + { + "epoch": 747.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5980 + }, + { + "epoch": 747.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5984 + }, + { + "epoch": 748.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5988 + }, + { + "epoch": 748.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 5992 + }, + { + "epoch": 749.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 5996 + }, + { + "epoch": 749.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6000 + }, + { + "epoch": 750.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6004 + }, + { + "epoch": 750.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6008 + }, + { + "epoch": 751.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6012 + }, + { + "epoch": 751.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6016 + }, + { + "epoch": 751.99, + "eval_exact_match": 0.4468718967229394, + "eval_exec": 0.48063555114200596, + "eval_loss": 0.6662933826446533, + "eval_runtime": 196.1389, + "eval_samples_per_second": 6.628, + "step": 6016 + }, + { + "epoch": 752.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6020 + }, + { + "epoch": 752.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6024 + }, + { + "epoch": 753.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6028 + }, + { + "epoch": 753.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6032 + }, + { + "epoch": 754.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6036 + }, + { + "epoch": 754.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6040 + }, + { + "epoch": 755.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6044 + }, + { + "epoch": 755.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6048 + }, + { + "epoch": 756.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6052 + }, + { + "epoch": 756.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6056 + }, + { + "epoch": 757.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6060 + }, + { + "epoch": 757.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6064 + }, + { + "epoch": 758.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6068 + }, + { + "epoch": 758.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6072 + }, + { + "epoch": 759.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6076 + }, + { + "epoch": 759.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6080 + }, + { + "epoch": 759.99, + "eval_exact_match": 0.43793445878848064, + "eval_exec": 0.46971201588877853, + "eval_loss": 0.6573625206947327, + "eval_runtime": 203.6771, + "eval_samples_per_second": 6.383, + "step": 6080 + }, + { + "epoch": 760.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6084 + }, + { + "epoch": 760.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6088 + }, + { + "epoch": 761.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6092 + }, + { + "epoch": 761.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6096 + }, + { + "epoch": 762.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6100 + }, + { + "epoch": 762.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6104 + }, + { + "epoch": 763.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6108 + }, + { + "epoch": 763.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6112 + }, + { + "epoch": 764.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6116 + }, + { + "epoch": 764.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6120 + }, + { + "epoch": 765.5, + "learning_rate": 0.0001, + "loss": 0.0009, + "step": 6124 + }, + { + "epoch": 765.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6128 + }, + { + "epoch": 766.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6132 + }, + { + "epoch": 766.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6136 + }, + { + "epoch": 767.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6140 + }, + { + "epoch": 767.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6144 + }, + { + "epoch": 767.99, + "eval_exact_match": 0.45183714001986097, + "eval_exec": 0.48957298907646474, + "eval_loss": 0.6423913240432739, + "eval_runtime": 196.8271, + "eval_samples_per_second": 6.605, + "step": 6144 + }, + { + "epoch": 768.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6148 + }, + { + "epoch": 768.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6152 + }, + { + "epoch": 769.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6156 + }, + { + "epoch": 769.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6160 + }, + { + "epoch": 770.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6164 + }, + { + "epoch": 770.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6168 + }, + { + "epoch": 771.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6172 + }, + { + "epoch": 771.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6176 + }, + { + "epoch": 772.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6180 + }, + { + "epoch": 772.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6184 + }, + { + "epoch": 773.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6188 + }, + { + "epoch": 773.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6192 + }, + { + "epoch": 774.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6196 + }, + { + "epoch": 774.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6200 + }, + { + "epoch": 775.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6204 + }, + { + "epoch": 775.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6208 + }, + { + "epoch": 775.99, + "eval_exact_match": 0.4558093346573982, + "eval_exec": 0.49652432969215493, + "eval_loss": 0.6396003365516663, + "eval_runtime": 214.4091, + "eval_samples_per_second": 6.063, + "step": 6208 + }, + { + "epoch": 776.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6212 + }, + { + "epoch": 776.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6216 + }, + { + "epoch": 777.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6220 + }, + { + "epoch": 777.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6224 + }, + { + "epoch": 778.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6228 + }, + { + "epoch": 778.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6232 + }, + { + "epoch": 779.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6236 + }, + { + "epoch": 779.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6240 + }, + { + "epoch": 780.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6244 + }, + { + "epoch": 780.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6248 + }, + { + "epoch": 781.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6252 + }, + { + "epoch": 781.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6256 + }, + { + "epoch": 782.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6260 + }, + { + "epoch": 782.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6264 + }, + { + "epoch": 783.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6268 + }, + { + "epoch": 783.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6272 + }, + { + "epoch": 783.99, + "eval_exact_match": 0.43892750744786496, + "eval_exec": 0.4856007944389275, + "eval_loss": 0.6399450898170471, + "eval_runtime": 226.2412, + "eval_samples_per_second": 5.746, + "step": 6272 + }, + { + "epoch": 784.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6276 + }, + { + "epoch": 784.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6280 + }, + { + "epoch": 785.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6284 + }, + { + "epoch": 785.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6288 + }, + { + "epoch": 786.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6292 + }, + { + "epoch": 786.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6296 + }, + { + "epoch": 787.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6300 + }, + { + "epoch": 787.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6304 + }, + { + "epoch": 788.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6308 + }, + { + "epoch": 788.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6312 + }, + { + "epoch": 789.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6316 + }, + { + "epoch": 789.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6320 + }, + { + "epoch": 790.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6324 + }, + { + "epoch": 790.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6328 + }, + { + "epoch": 791.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6332 + }, + { + "epoch": 791.99, + "learning_rate": 0.0001, + "loss": 0.001, + "step": 6336 + }, + { + "epoch": 791.99, + "eval_exact_match": 0.44091360476663355, + "eval_exec": 0.47964250248262164, + "eval_loss": 0.6275960803031921, + "eval_runtime": 209.1576, + "eval_samples_per_second": 6.215, + "step": 6336 + }, + { + "epoch": 792.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6340 + }, + { + "epoch": 792.99, + "learning_rate": 0.0001, + "loss": 0.0005, + "step": 6344 + }, + { + "epoch": 793.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6348 + }, + { + "epoch": 793.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6352 + }, + { + "epoch": 794.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6356 + }, + { + "epoch": 794.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6360 + }, + { + "epoch": 795.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6364 + }, + { + "epoch": 795.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6368 + }, + { + "epoch": 796.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6372 + }, + { + "epoch": 796.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6376 + }, + { + "epoch": 797.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6380 + }, + { + "epoch": 797.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6384 + }, + { + "epoch": 798.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6388 + }, + { + "epoch": 798.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6392 + }, + { + "epoch": 799.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6396 + }, + { + "epoch": 799.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6400 + }, + { + "epoch": 799.99, + "eval_exact_match": 0.4468718967229394, + "eval_exec": 0.48659384309831183, + "eval_loss": 0.641415536403656, + "eval_runtime": 208.4431, + "eval_samples_per_second": 6.237, + "step": 6400 + }, + { + "epoch": 800.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6404 + }, + { + "epoch": 800.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6408 + }, + { + "epoch": 801.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6412 + }, + { + "epoch": 801.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6416 + }, + { + "epoch": 802.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6420 + }, + { + "epoch": 802.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6424 + }, + { + "epoch": 803.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6428 + }, + { + "epoch": 803.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6432 + }, + { + "epoch": 804.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6436 + }, + { + "epoch": 804.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6440 + }, + { + "epoch": 805.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6444 + }, + { + "epoch": 805.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6448 + }, + { + "epoch": 806.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6452 + }, + { + "epoch": 806.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6456 + }, + { + "epoch": 807.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6460 + }, + { + "epoch": 807.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6464 + }, + { + "epoch": 807.99, + "eval_exact_match": 0.44786494538232374, + "eval_exec": 0.4846077457795432, + "eval_loss": 0.6324633359909058, + "eval_runtime": 216.057, + "eval_samples_per_second": 6.017, + "step": 6464 + }, + { + "epoch": 808.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6468 + }, + { + "epoch": 808.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6472 + }, + { + "epoch": 809.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6476 + }, + { + "epoch": 809.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6480 + }, + { + "epoch": 810.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6484 + }, + { + "epoch": 810.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6488 + }, + { + "epoch": 811.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6492 + }, + { + "epoch": 811.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6496 + }, + { + "epoch": 812.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6500 + }, + { + "epoch": 812.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6504 + }, + { + "epoch": 813.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6508 + }, + { + "epoch": 813.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6512 + }, + { + "epoch": 814.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6516 + }, + { + "epoch": 814.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6520 + }, + { + "epoch": 815.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6524 + }, + { + "epoch": 815.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6528 + }, + { + "epoch": 815.99, + "eval_exact_match": 0.44190665342601787, + "eval_exec": 0.4885799404170804, + "eval_loss": 0.6281804442405701, + "eval_runtime": 213.8722, + "eval_samples_per_second": 6.078, + "step": 6528 + }, + { + "epoch": 816.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6532 + }, + { + "epoch": 816.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6536 + }, + { + "epoch": 817.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6540 + }, + { + "epoch": 817.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6544 + }, + { + "epoch": 818.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6548 + }, + { + "epoch": 818.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6552 + }, + { + "epoch": 819.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6556 + }, + { + "epoch": 819.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6560 + }, + { + "epoch": 820.5, + "learning_rate": 0.0001, + "loss": 0.0005, + "step": 6564 + }, + { + "epoch": 820.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6568 + }, + { + "epoch": 821.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6572 + }, + { + "epoch": 821.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6576 + }, + { + "epoch": 822.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6580 + }, + { + "epoch": 822.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6584 + }, + { + "epoch": 823.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6588 + }, + { + "epoch": 823.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6592 + }, + { + "epoch": 823.99, + "eval_exact_match": 0.45779543197616684, + "eval_exec": 0.4915590863952334, + "eval_loss": 0.6452751159667969, + "eval_runtime": 223.4105, + "eval_samples_per_second": 5.819, + "step": 6592 + }, + { + "epoch": 824.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6596 + }, + { + "epoch": 824.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6600 + }, + { + "epoch": 825.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6604 + }, + { + "epoch": 825.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6608 + }, + { + "epoch": 826.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6612 + }, + { + "epoch": 826.99, + "learning_rate": 0.0001, + "loss": 0.0005, + "step": 6616 + }, + { + "epoch": 827.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6620 + }, + { + "epoch": 827.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6624 + }, + { + "epoch": 828.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6628 + }, + { + "epoch": 828.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6632 + }, + { + "epoch": 829.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6636 + }, + { + "epoch": 829.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6640 + }, + { + "epoch": 830.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6644 + }, + { + "epoch": 830.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6648 + }, + { + "epoch": 831.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6652 + }, + { + "epoch": 831.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6656 + }, + { + "epoch": 831.99, + "eval_exact_match": 0.4637537239324727, + "eval_exec": 0.49751737835153925, + "eval_loss": 0.6585542559623718, + "eval_runtime": 209.9114, + "eval_samples_per_second": 6.193, + "step": 6656 + }, + { + "epoch": 832.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6660 + }, + { + "epoch": 832.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6664 + }, + { + "epoch": 833.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6668 + }, + { + "epoch": 833.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6672 + }, + { + "epoch": 834.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6676 + }, + { + "epoch": 834.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6680 + }, + { + "epoch": 835.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6684 + }, + { + "epoch": 835.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6688 + }, + { + "epoch": 836.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6692 + }, + { + "epoch": 836.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6696 + }, + { + "epoch": 837.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6700 + }, + { + "epoch": 837.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6704 + }, + { + "epoch": 838.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6708 + }, + { + "epoch": 838.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6712 + }, + { + "epoch": 839.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6716 + }, + { + "epoch": 839.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6720 + }, + { + "epoch": 839.99, + "eval_exact_match": 0.45978152929493543, + "eval_exec": 0.4945382323733863, + "eval_loss": 0.660439670085907, + "eval_runtime": 216.7543, + "eval_samples_per_second": 5.998, + "step": 6720 + }, + { + "epoch": 840.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6724 + }, + { + "epoch": 840.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6728 + }, + { + "epoch": 841.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6732 + }, + { + "epoch": 841.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6736 + }, + { + "epoch": 842.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6740 + }, + { + "epoch": 842.99, + "learning_rate": 0.0001, + "loss": 0.0005, + "step": 6744 + }, + { + "epoch": 843.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6748 + }, + { + "epoch": 843.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6752 + }, + { + "epoch": 844.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6756 + }, + { + "epoch": 844.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6760 + }, + { + "epoch": 845.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6764 + }, + { + "epoch": 845.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6768 + }, + { + "epoch": 846.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6772 + }, + { + "epoch": 846.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6776 + }, + { + "epoch": 847.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6780 + }, + { + "epoch": 847.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6784 + }, + { + "epoch": 847.99, + "eval_exact_match": 0.4468718967229394, + "eval_exec": 0.4885799404170804, + "eval_loss": 0.6388683319091797, + "eval_runtime": 225.744, + "eval_samples_per_second": 5.759, + "step": 6784 + }, + { + "epoch": 848.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6788 + }, + { + "epoch": 848.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6792 + }, + { + "epoch": 849.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6796 + }, + { + "epoch": 849.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6800 + }, + { + "epoch": 850.5, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6804 + }, + { + "epoch": 850.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6808 + }, + { + "epoch": 851.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6812 + }, + { + "epoch": 851.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6816 + }, + { + "epoch": 852.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6820 + }, + { + "epoch": 852.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6824 + }, + { + "epoch": 853.5, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6828 + }, + { + "epoch": 853.99, + "learning_rate": 0.0001, + "loss": 0.0008, + "step": 6832 + }, + { + "epoch": 854.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6836 + }, + { + "epoch": 854.99, + "learning_rate": 0.0001, + "loss": 0.0007, + "step": 6840 + }, + { + "epoch": 855.5, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6844 + }, + { + "epoch": 855.99, + "learning_rate": 0.0001, + "loss": 0.0006, + "step": 6848 + }, + { + "epoch": 855.99, + "eval_exact_match": 0.4657398212512413, + "eval_exec": 0.4925521350546177, + "eval_loss": 0.6665723323822021, + "eval_runtime": 199.9726, + "eval_samples_per_second": 6.501, + "step": 6848 + } + ], + "max_steps": 24576, + "num_train_epochs": 3072, + "total_flos": 7.45169724254251e+18, + "trial_name": null, + "trial_params": null +}