{ "best_metric": 0.4657398212512413, "best_model_checkpoint": "/mnt/chenzhi/dialogzoo/finetune/txt2sql_picard_cosql/checkpoint-6848", "epoch": 855.9933373712902, "global_step": 6848, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 0.0001, "loss": 8.9198, "step": 1 }, { "epoch": 0.5, "learning_rate": 0.0001, "loss": 4.4541, "step": 4 }, { "epoch": 0.99, "learning_rate": 0.0001, "loss": 2.0276, "step": 8 }, { "epoch": 1.5, "learning_rate": 0.0001, "loss": 1.1989, "step": 12 }, { "epoch": 1.99, "learning_rate": 0.0001, "loss": 0.8306, "step": 16 }, { "epoch": 2.5, "learning_rate": 0.0001, "loss": 0.6585, "step": 20 }, { "epoch": 2.99, "learning_rate": 0.0001, "loss": 0.5417, "step": 24 }, { "epoch": 3.5, "learning_rate": 0.0001, "loss": 0.4681, "step": 28 }, { "epoch": 3.99, "learning_rate": 0.0001, "loss": 0.4011, "step": 32 }, { "epoch": 4.5, "learning_rate": 0.0001, "loss": 0.3661, "step": 36 }, { "epoch": 4.99, "learning_rate": 0.0001, "loss": 0.3404, "step": 40 }, { "epoch": 5.5, "learning_rate": 0.0001, "loss": 0.3268, "step": 44 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 0.2935, "step": 48 }, { "epoch": 6.5, "learning_rate": 0.0001, "loss": 0.2853, "step": 52 }, { "epoch": 6.99, "learning_rate": 0.0001, "loss": 0.2694, "step": 56 }, { "epoch": 7.5, "learning_rate": 0.0001, "loss": 0.2601, "step": 60 }, { "epoch": 7.99, "learning_rate": 0.0001, "loss": 0.2487, "step": 64 }, { "epoch": 7.99, "eval_exact_match": 0.24726911618669314, "eval_exec": 0.30883813306852037, "eval_loss": 0.32532989978790283, "eval_runtime": 219.0487, "eval_samples_per_second": 5.935, "step": 64 }, { "epoch": 8.5, "learning_rate": 0.0001, "loss": 0.2352, "step": 68 }, { "epoch": 8.99, "learning_rate": 0.0001, "loss": 0.2257, "step": 72 }, { "epoch": 9.5, "learning_rate": 0.0001, "loss": 0.222, "step": 76 }, { "epoch": 9.99, "learning_rate": 0.0001, "loss": 0.2146, "step": 80 }, { "epoch": 10.5, "learning_rate": 0.0001, "loss": 0.2115, "step": 84 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 0.2032, "step": 88 }, { "epoch": 11.5, "learning_rate": 0.0001, "loss": 0.1954, "step": 92 }, { "epoch": 11.99, "learning_rate": 0.0001, "loss": 0.186, "step": 96 }, { "epoch": 12.5, "learning_rate": 0.0001, "loss": 0.1808, "step": 100 }, { "epoch": 12.99, "learning_rate": 0.0001, "loss": 0.18, "step": 104 }, { "epoch": 13.5, "learning_rate": 0.0001, "loss": 0.177, "step": 108 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 0.1714, "step": 112 }, { "epoch": 14.5, "learning_rate": 0.0001, "loss": 0.1714, "step": 116 }, { "epoch": 14.99, "learning_rate": 0.0001, "loss": 0.1615, "step": 120 }, { "epoch": 15.5, "learning_rate": 0.0001, "loss": 0.1599, "step": 124 }, { "epoch": 15.99, "learning_rate": 0.0001, "loss": 0.1565, "step": 128 }, { "epoch": 15.99, "eval_exact_match": 0.33068520357497516, "eval_exec": 0.3843098311817279, "eval_loss": 0.29061898589134216, "eval_runtime": 200.5643, "eval_samples_per_second": 6.482, "step": 128 }, { "epoch": 16.5, "learning_rate": 0.0001, "loss": 0.1513, "step": 132 }, { "epoch": 16.99, "learning_rate": 0.0001, "loss": 0.143, "step": 136 }, { "epoch": 17.5, "learning_rate": 0.0001, "loss": 0.1426, "step": 140 }, { "epoch": 17.99, "learning_rate": 0.0001, "loss": 0.1403, "step": 144 }, { "epoch": 18.5, "learning_rate": 0.0001, "loss": 0.1441, "step": 148 }, { "epoch": 18.99, "learning_rate": 0.0001, "loss": 0.1378, "step": 152 }, { "epoch": 19.5, "learning_rate": 0.0001, "loss": 0.1344, "step": 156 }, { "epoch": 19.99, "learning_rate": 0.0001, "loss": 0.1293, "step": 160 }, { "epoch": 20.5, "learning_rate": 0.0001, "loss": 0.1314, "step": 164 }, { "epoch": 20.99, "learning_rate": 0.0001, "loss": 0.1219, "step": 168 }, { "epoch": 21.5, "learning_rate": 0.0001, "loss": 0.1196, "step": 172 }, { "epoch": 21.99, "learning_rate": 0.0001, "loss": 0.1192, "step": 176 }, { "epoch": 22.5, "learning_rate": 0.0001, "loss": 0.1203, "step": 180 }, { "epoch": 22.99, "learning_rate": 0.0001, "loss": 0.1189, "step": 184 }, { "epoch": 23.5, "learning_rate": 0.0001, "loss": 0.1154, "step": 188 }, { "epoch": 23.99, "learning_rate": 0.0001, "loss": 0.1142, "step": 192 }, { "epoch": 23.99, "eval_exact_match": 0.3426017874875869, "eval_exec": 0.4011916583912612, "eval_loss": 0.28066790103912354, "eval_runtime": 235.2994, "eval_samples_per_second": 5.525, "step": 192 }, { "epoch": 24.5, "learning_rate": 0.0001, "loss": 0.1104, "step": 196 }, { "epoch": 24.99, "learning_rate": 0.0001, "loss": 0.1092, "step": 200 }, { "epoch": 25.5, "learning_rate": 0.0001, "loss": 0.1079, "step": 204 }, { "epoch": 25.99, "learning_rate": 0.0001, "loss": 0.1043, "step": 208 }, { "epoch": 26.5, "learning_rate": 0.0001, "loss": 0.1068, "step": 212 }, { "epoch": 26.99, "learning_rate": 0.0001, "loss": 0.1009, "step": 216 }, { "epoch": 27.5, "learning_rate": 0.0001, "loss": 0.1033, "step": 220 }, { "epoch": 27.99, "learning_rate": 0.0001, "loss": 0.1013, "step": 224 }, { "epoch": 28.5, "learning_rate": 0.0001, "loss": 0.0986, "step": 228 }, { "epoch": 28.99, "learning_rate": 0.0001, "loss": 0.0951, "step": 232 }, { "epoch": 29.5, "learning_rate": 0.0001, "loss": 0.0947, "step": 236 }, { "epoch": 29.99, "learning_rate": 0.0001, "loss": 0.0917, "step": 240 }, { "epoch": 30.5, "learning_rate": 0.0001, "loss": 0.0959, "step": 244 }, { "epoch": 30.99, "learning_rate": 0.0001, "loss": 0.0922, "step": 248 }, { "epoch": 31.5, "learning_rate": 0.0001, "loss": 0.0892, "step": 252 }, { "epoch": 31.99, "learning_rate": 0.0001, "loss": 0.0885, "step": 256 }, { "epoch": 31.99, "eval_exact_match": 0.36742800397219466, "eval_exec": 0.41509433962264153, "eval_loss": 0.28681814670562744, "eval_runtime": 236.9193, "eval_samples_per_second": 5.487, "step": 256 }, { "epoch": 32.5, "learning_rate": 0.0001, "loss": 0.087, "step": 260 }, { "epoch": 32.99, "learning_rate": 0.0001, "loss": 0.0853, "step": 264 }, { "epoch": 33.5, "learning_rate": 0.0001, "loss": 0.0847, "step": 268 }, { "epoch": 33.99, "learning_rate": 0.0001, "loss": 0.0821, "step": 272 }, { "epoch": 34.5, "learning_rate": 0.0001, "loss": 0.0802, "step": 276 }, { "epoch": 34.99, "learning_rate": 0.0001, "loss": 0.084, "step": 280 }, { "epoch": 35.5, "learning_rate": 0.0001, "loss": 0.0844, "step": 284 }, { "epoch": 35.99, "learning_rate": 0.0001, "loss": 0.0803, "step": 288 }, { "epoch": 36.5, "learning_rate": 0.0001, "loss": 0.0786, "step": 292 }, { "epoch": 36.99, "learning_rate": 0.0001, "loss": 0.0735, "step": 296 }, { "epoch": 37.5, "learning_rate": 0.0001, "loss": 0.0784, "step": 300 }, { "epoch": 37.99, "learning_rate": 0.0001, "loss": 0.0733, "step": 304 }, { "epoch": 38.5, "learning_rate": 0.0001, "loss": 0.075, "step": 308 }, { "epoch": 38.99, "learning_rate": 0.0001, "loss": 0.0747, "step": 312 }, { "epoch": 39.5, "learning_rate": 0.0001, "loss": 0.0733, "step": 316 }, { "epoch": 39.99, "learning_rate": 0.0001, "loss": 0.0712, "step": 320 }, { "epoch": 39.99, "eval_exact_match": 0.407149950347567, "eval_exec": 0.44885799404170806, "eval_loss": 0.29831913113594055, "eval_runtime": 183.5809, "eval_samples_per_second": 7.081, "step": 320 }, { "epoch": 40.5, "learning_rate": 0.0001, "loss": 0.0706, "step": 324 }, { "epoch": 40.99, "learning_rate": 0.0001, "loss": 0.071, "step": 328 }, { "epoch": 41.5, "learning_rate": 0.0001, "loss": 0.0675, "step": 332 }, { "epoch": 41.99, "learning_rate": 0.0001, "loss": 0.0663, "step": 336 }, { "epoch": 42.5, "learning_rate": 0.0001, "loss": 0.0652, "step": 340 }, { "epoch": 42.99, "learning_rate": 0.0001, "loss": 0.068, "step": 344 }, { "epoch": 43.5, "learning_rate": 0.0001, "loss": 0.066, "step": 348 }, { "epoch": 43.99, "learning_rate": 0.0001, "loss": 0.0658, "step": 352 }, { "epoch": 44.5, "learning_rate": 0.0001, "loss": 0.0628, "step": 356 }, { "epoch": 44.99, "learning_rate": 0.0001, "loss": 0.063, "step": 360 }, { "epoch": 45.5, "learning_rate": 0.0001, "loss": 0.0607, "step": 364 }, { "epoch": 45.99, "learning_rate": 0.0001, "loss": 0.0605, "step": 368 }, { "epoch": 46.5, "learning_rate": 0.0001, "loss": 0.06, "step": 372 }, { "epoch": 46.99, "learning_rate": 0.0001, "loss": 0.0618, "step": 376 }, { "epoch": 47.5, "learning_rate": 0.0001, "loss": 0.0605, "step": 380 }, { "epoch": 47.99, "learning_rate": 0.0001, "loss": 0.0586, "step": 384 }, { "epoch": 47.99, "eval_exact_match": 0.423038728897716, "eval_exec": 0.4637537239324727, "eval_loss": 0.31259259581565857, "eval_runtime": 192.9699, "eval_samples_per_second": 6.737, "step": 384 }, { "epoch": 48.5, "learning_rate": 0.0001, "loss": 0.058, "step": 388 }, { "epoch": 48.99, "learning_rate": 0.0001, "loss": 0.0573, "step": 392 }, { "epoch": 49.5, "learning_rate": 0.0001, "loss": 0.0594, "step": 396 }, { "epoch": 49.99, "learning_rate": 0.0001, "loss": 0.0552, "step": 400 }, { "epoch": 50.5, "learning_rate": 0.0001, "loss": 0.056, "step": 404 }, { "epoch": 50.99, "learning_rate": 0.0001, "loss": 0.0537, "step": 408 }, { "epoch": 51.5, "learning_rate": 0.0001, "loss": 0.054, "step": 412 }, { "epoch": 51.99, "learning_rate": 0.0001, "loss": 0.0555, "step": 416 }, { "epoch": 52.5, "learning_rate": 0.0001, "loss": 0.0515, "step": 420 }, { "epoch": 52.99, "learning_rate": 0.0001, "loss": 0.0522, "step": 424 }, { "epoch": 53.5, "learning_rate": 0.0001, "loss": 0.0507, "step": 428 }, { "epoch": 53.99, "learning_rate": 0.0001, "loss": 0.051, "step": 432 }, { "epoch": 54.5, "learning_rate": 0.0001, "loss": 0.0492, "step": 436 }, { "epoch": 54.99, "learning_rate": 0.0001, "loss": 0.0503, "step": 440 }, { "epoch": 55.5, "learning_rate": 0.0001, "loss": 0.0484, "step": 444 }, { "epoch": 55.99, "learning_rate": 0.0001, "loss": 0.0486, "step": 448 }, { "epoch": 55.99, "eval_exact_match": 0.4270109235352532, "eval_exec": 0.4657398212512413, "eval_loss": 0.32657375931739807, "eval_runtime": 181.0806, "eval_samples_per_second": 7.179, "step": 448 }, { "epoch": 56.5, "learning_rate": 0.0001, "loss": 0.0491, "step": 452 }, { "epoch": 56.99, "learning_rate": 0.0001, "loss": 0.0505, "step": 456 }, { "epoch": 57.5, "learning_rate": 0.0001, "loss": 0.0491, "step": 460 }, { "epoch": 57.99, "learning_rate": 0.0001, "loss": 0.0487, "step": 464 }, { "epoch": 58.5, "learning_rate": 0.0001, "loss": 0.0456, "step": 468 }, { "epoch": 58.99, "learning_rate": 0.0001, "loss": 0.0454, "step": 472 }, { "epoch": 59.5, "learning_rate": 0.0001, "loss": 0.0449, "step": 476 }, { "epoch": 59.99, "learning_rate": 0.0001, "loss": 0.0429, "step": 480 }, { "epoch": 60.5, "learning_rate": 0.0001, "loss": 0.0441, "step": 484 }, { "epoch": 60.99, "learning_rate": 0.0001, "loss": 0.0451, "step": 488 }, { "epoch": 61.5, "learning_rate": 0.0001, "loss": 0.0447, "step": 492 }, { "epoch": 61.99, "learning_rate": 0.0001, "loss": 0.0423, "step": 496 }, { "epoch": 62.5, "learning_rate": 0.0001, "loss": 0.0429, "step": 500 }, { "epoch": 62.99, "learning_rate": 0.0001, "loss": 0.0412, "step": 504 }, { "epoch": 63.5, "learning_rate": 0.0001, "loss": 0.0423, "step": 508 }, { "epoch": 63.99, "learning_rate": 0.0001, "loss": 0.0412, "step": 512 }, { "epoch": 63.99, "eval_exact_match": 0.40913604766633566, "eval_exec": 0.45878848063555117, "eval_loss": 0.3272022306919098, "eval_runtime": 205.684, "eval_samples_per_second": 6.32, "step": 512 }, { "epoch": 64.5, "learning_rate": 0.0001, "loss": 0.0397, "step": 516 }, { "epoch": 64.99, "learning_rate": 0.0001, "loss": 0.0394, "step": 520 }, { "epoch": 65.5, "learning_rate": 0.0001, "loss": 0.0411, "step": 524 }, { "epoch": 65.99, "learning_rate": 0.0001, "loss": 0.0418, "step": 528 }, { "epoch": 66.5, "learning_rate": 0.0001, "loss": 0.038, "step": 532 }, { "epoch": 66.99, "learning_rate": 0.0001, "loss": 0.0388, "step": 536 }, { "epoch": 67.5, "learning_rate": 0.0001, "loss": 0.0383, "step": 540 }, { "epoch": 67.99, "learning_rate": 0.0001, "loss": 0.0384, "step": 544 }, { "epoch": 68.5, "learning_rate": 0.0001, "loss": 0.039, "step": 548 }, { "epoch": 68.99, "learning_rate": 0.0001, "loss": 0.0366, "step": 552 }, { "epoch": 69.5, "learning_rate": 0.0001, "loss": 0.0364, "step": 556 }, { "epoch": 69.99, "learning_rate": 0.0001, "loss": 0.0363, "step": 560 }, { "epoch": 70.5, "learning_rate": 0.0001, "loss": 0.036, "step": 564 }, { "epoch": 70.99, "learning_rate": 0.0001, "loss": 0.0358, "step": 568 }, { "epoch": 71.5, "learning_rate": 0.0001, "loss": 0.0339, "step": 572 }, { "epoch": 71.99, "learning_rate": 0.0001, "loss": 0.0374, "step": 576 }, { "epoch": 71.99, "eval_exact_match": 0.435948361469712, "eval_exec": 0.4766633565044687, "eval_loss": 0.3480900824069977, "eval_runtime": 174.2765, "eval_samples_per_second": 7.459, "step": 576 }, { "epoch": 72.5, "learning_rate": 0.0001, "loss": 0.0376, "step": 580 }, { "epoch": 72.99, "learning_rate": 0.0001, "loss": 0.0341, "step": 584 }, { "epoch": 73.5, "learning_rate": 0.0001, "loss": 0.0329, "step": 588 }, { "epoch": 73.99, "learning_rate": 0.0001, "loss": 0.0329, "step": 592 }, { "epoch": 74.5, "learning_rate": 0.0001, "loss": 0.0334, "step": 596 }, { "epoch": 74.99, "learning_rate": 0.0001, "loss": 0.0334, "step": 600 }, { "epoch": 75.5, "learning_rate": 0.0001, "loss": 0.0327, "step": 604 }, { "epoch": 75.99, "learning_rate": 0.0001, "loss": 0.0328, "step": 608 }, { "epoch": 76.5, "learning_rate": 0.0001, "loss": 0.0321, "step": 612 }, { "epoch": 76.99, "learning_rate": 0.0001, "loss": 0.0327, "step": 616 }, { "epoch": 77.5, "learning_rate": 0.0001, "loss": 0.0321, "step": 620 }, { "epoch": 77.99, "learning_rate": 0.0001, "loss": 0.03, "step": 624 }, { "epoch": 78.5, "learning_rate": 0.0001, "loss": 0.0313, "step": 628 }, { "epoch": 78.99, "learning_rate": 0.0001, "loss": 0.0335, "step": 632 }, { "epoch": 79.5, "learning_rate": 0.0001, "loss": 0.0297, "step": 636 }, { "epoch": 79.99, "learning_rate": 0.0001, "loss": 0.0293, "step": 640 }, { "epoch": 79.99, "eval_exact_match": 0.42899702085402186, "eval_exec": 0.46871896722939427, "eval_loss": 0.3477668762207031, "eval_runtime": 228.958, "eval_samples_per_second": 5.678, "step": 640 }, { "epoch": 80.5, "learning_rate": 0.0001, "loss": 0.0284, "step": 644 }, { "epoch": 80.99, "learning_rate": 0.0001, "loss": 0.028, "step": 648 }, { "epoch": 81.5, "learning_rate": 0.0001, "loss": 0.0311, "step": 652 }, { "epoch": 81.99, "learning_rate": 0.0001, "loss": 0.0286, "step": 656 }, { "epoch": 82.5, "learning_rate": 0.0001, "loss": 0.0299, "step": 660 }, { "epoch": 82.99, "learning_rate": 0.0001, "loss": 0.0294, "step": 664 }, { "epoch": 83.5, "learning_rate": 0.0001, "loss": 0.0265, "step": 668 }, { "epoch": 83.99, "learning_rate": 0.0001, "loss": 0.0269, "step": 672 }, { "epoch": 84.5, "learning_rate": 0.0001, "loss": 0.0267, "step": 676 }, { "epoch": 84.99, "learning_rate": 0.0001, "loss": 0.0269, "step": 680 }, { "epoch": 85.5, "learning_rate": 0.0001, "loss": 0.027, "step": 684 }, { "epoch": 85.99, "learning_rate": 0.0001, "loss": 0.0269, "step": 688 }, { "epoch": 86.5, "learning_rate": 0.0001, "loss": 0.026, "step": 692 }, { "epoch": 86.99, "learning_rate": 0.0001, "loss": 0.0259, "step": 696 }, { "epoch": 87.5, "learning_rate": 0.0001, "loss": 0.0251, "step": 700 }, { "epoch": 87.99, "learning_rate": 0.0001, "loss": 0.0253, "step": 704 }, { "epoch": 87.99, "eval_exact_match": 0.423038728897716, "eval_exec": 0.46971201588877853, "eval_loss": 0.36298030614852905, "eval_runtime": 237.4093, "eval_samples_per_second": 5.476, "step": 704 }, { "epoch": 88.5, "learning_rate": 0.0001, "loss": 0.0252, "step": 708 }, { "epoch": 88.99, "learning_rate": 0.0001, "loss": 0.0254, "step": 712 }, { "epoch": 89.5, "learning_rate": 0.0001, "loss": 0.0262, "step": 716 }, { "epoch": 89.99, "learning_rate": 0.0001, "loss": 0.0261, "step": 720 }, { "epoch": 90.5, "learning_rate": 0.0001, "loss": 0.0236, "step": 724 }, { "epoch": 90.99, "learning_rate": 0.0001, "loss": 0.024, "step": 728 }, { "epoch": 91.5, "learning_rate": 0.0001, "loss": 0.0231, "step": 732 }, { "epoch": 91.99, "learning_rate": 0.0001, "loss": 0.0241, "step": 736 }, { "epoch": 92.5, "learning_rate": 0.0001, "loss": 0.0244, "step": 740 }, { "epoch": 92.99, "learning_rate": 0.0001, "loss": 0.0242, "step": 744 }, { "epoch": 93.5, "learning_rate": 0.0001, "loss": 0.023, "step": 748 }, { "epoch": 93.99, "learning_rate": 0.0001, "loss": 0.025, "step": 752 }, { "epoch": 94.5, "learning_rate": 0.0001, "loss": 0.0236, "step": 756 }, { "epoch": 94.99, "learning_rate": 0.0001, "loss": 0.022, "step": 760 }, { "epoch": 95.5, "learning_rate": 0.0001, "loss": 0.0207, "step": 764 }, { "epoch": 95.99, "learning_rate": 0.0001, "loss": 0.0223, "step": 768 }, { "epoch": 95.99, "eval_exact_match": 0.4329692154915591, "eval_exec": 0.46871896722939427, "eval_loss": 0.3830316960811615, "eval_runtime": 229.6043, "eval_samples_per_second": 5.662, "step": 768 }, { "epoch": 96.5, "learning_rate": 0.0001, "loss": 0.0226, "step": 772 }, { "epoch": 96.99, "learning_rate": 0.0001, "loss": 0.0215, "step": 776 }, { "epoch": 97.5, "learning_rate": 0.0001, "loss": 0.0213, "step": 780 }, { "epoch": 97.99, "learning_rate": 0.0001, "loss": 0.0209, "step": 784 }, { "epoch": 98.5, "learning_rate": 0.0001, "loss": 0.0199, "step": 788 }, { "epoch": 98.99, "learning_rate": 0.0001, "loss": 0.0208, "step": 792 }, { "epoch": 99.5, "learning_rate": 0.0001, "loss": 0.0205, "step": 796 }, { "epoch": 99.99, "learning_rate": 0.0001, "loss": 0.0209, "step": 800 }, { "epoch": 100.5, "learning_rate": 0.0001, "loss": 0.0217, "step": 804 }, { "epoch": 100.99, "learning_rate": 0.0001, "loss": 0.02, "step": 808 }, { "epoch": 101.5, "learning_rate": 0.0001, "loss": 0.0192, "step": 812 }, { "epoch": 101.99, "learning_rate": 0.0001, "loss": 0.0195, "step": 816 }, { "epoch": 102.5, "learning_rate": 0.0001, "loss": 0.0194, "step": 820 }, { "epoch": 102.99, "learning_rate": 0.0001, "loss": 0.0193, "step": 824 }, { "epoch": 103.5, "learning_rate": 0.0001, "loss": 0.0212, "step": 828 }, { "epoch": 103.99, "learning_rate": 0.0001, "loss": 0.0195, "step": 832 }, { "epoch": 103.99, "eval_exact_match": 0.41807348560079444, "eval_exec": 0.4667328699106256, "eval_loss": 0.388680100440979, "eval_runtime": 220.6138, "eval_samples_per_second": 5.893, "step": 832 }, { "epoch": 104.5, "learning_rate": 0.0001, "loss": 0.018, "step": 836 }, { "epoch": 104.99, "learning_rate": 0.0001, "loss": 0.0185, "step": 840 }, { "epoch": 105.5, "learning_rate": 0.0001, "loss": 0.0195, "step": 844 }, { "epoch": 105.99, "learning_rate": 0.0001, "loss": 0.0196, "step": 848 }, { "epoch": 106.5, "learning_rate": 0.0001, "loss": 0.0189, "step": 852 }, { "epoch": 106.99, "learning_rate": 0.0001, "loss": 0.0182, "step": 856 }, { "epoch": 107.5, "learning_rate": 0.0001, "loss": 0.0168, "step": 860 }, { "epoch": 107.99, "learning_rate": 0.0001, "loss": 0.018, "step": 864 }, { "epoch": 108.5, "learning_rate": 0.0001, "loss": 0.0181, "step": 868 }, { "epoch": 108.99, "learning_rate": 0.0001, "loss": 0.0179, "step": 872 }, { "epoch": 109.5, "learning_rate": 0.0001, "loss": 0.017, "step": 876 }, { "epoch": 109.99, "learning_rate": 0.0001, "loss": 0.0187, "step": 880 }, { "epoch": 110.5, "learning_rate": 0.0001, "loss": 0.0178, "step": 884 }, { "epoch": 110.99, "learning_rate": 0.0001, "loss": 0.0161, "step": 888 }, { "epoch": 111.5, "learning_rate": 0.0001, "loss": 0.0164, "step": 892 }, { "epoch": 111.99, "learning_rate": 0.0001, "loss": 0.0164, "step": 896 }, { "epoch": 111.99, "eval_exact_match": 0.41012909632572, "eval_exec": 0.464746772591857, "eval_loss": 0.3992396891117096, "eval_runtime": 240.1288, "eval_samples_per_second": 5.414, "step": 896 }, { "epoch": 112.5, "learning_rate": 0.0001, "loss": 0.0172, "step": 900 }, { "epoch": 112.99, "learning_rate": 0.0001, "loss": 0.0173, "step": 904 }, { "epoch": 113.5, "learning_rate": 0.0001, "loss": 0.0163, "step": 908 }, { "epoch": 113.99, "learning_rate": 0.0001, "loss": 0.0153, "step": 912 }, { "epoch": 114.5, "learning_rate": 0.0001, "loss": 0.0157, "step": 916 }, { "epoch": 114.99, "learning_rate": 0.0001, "loss": 0.0159, "step": 920 }, { "epoch": 115.5, "learning_rate": 0.0001, "loss": 0.016, "step": 924 }, { "epoch": 115.99, "learning_rate": 0.0001, "loss": 0.0152, "step": 928 }, { "epoch": 116.5, "learning_rate": 0.0001, "loss": 0.0159, "step": 932 }, { "epoch": 116.99, "learning_rate": 0.0001, "loss": 0.0161, "step": 936 }, { "epoch": 117.5, "learning_rate": 0.0001, "loss": 0.0152, "step": 940 }, { "epoch": 117.99, "learning_rate": 0.0001, "loss": 0.0149, "step": 944 }, { "epoch": 118.5, "learning_rate": 0.0001, "loss": 0.0145, "step": 948 }, { "epoch": 118.99, "learning_rate": 0.0001, "loss": 0.0151, "step": 952 }, { "epoch": 119.5, "learning_rate": 0.0001, "loss": 0.0165, "step": 956 }, { "epoch": 119.99, "learning_rate": 0.0001, "loss": 0.0179, "step": 960 }, { "epoch": 119.99, "eval_exact_match": 0.4329692154915591, "eval_exec": 0.4746772591857001, "eval_loss": 0.42190492153167725, "eval_runtime": 202.7706, "eval_samples_per_second": 6.411, "step": 960 }, { "epoch": 120.5, "learning_rate": 0.0001, "loss": 0.017, "step": 964 }, { "epoch": 120.99, "learning_rate": 0.0001, "loss": 0.014, "step": 968 }, { "epoch": 121.5, "learning_rate": 0.0001, "loss": 0.0144, "step": 972 }, { "epoch": 121.99, "learning_rate": 0.0001, "loss": 0.0141, "step": 976 }, { "epoch": 122.5, "learning_rate": 0.0001, "loss": 0.0137, "step": 980 }, { "epoch": 122.99, "learning_rate": 0.0001, "loss": 0.0143, "step": 984 }, { "epoch": 123.5, "learning_rate": 0.0001, "loss": 0.015, "step": 988 }, { "epoch": 123.99, "learning_rate": 0.0001, "loss": 0.0157, "step": 992 }, { "epoch": 124.5, "learning_rate": 0.0001, "loss": 0.0137, "step": 996 }, { "epoch": 124.99, "learning_rate": 0.0001, "loss": 0.0131, "step": 1000 }, { "epoch": 125.5, "learning_rate": 0.0001, "loss": 0.0135, "step": 1004 }, { "epoch": 125.99, "learning_rate": 0.0001, "loss": 0.0133, "step": 1008 }, { "epoch": 126.5, "learning_rate": 0.0001, "loss": 0.0128, "step": 1012 }, { "epoch": 126.99, "learning_rate": 0.0001, "loss": 0.0134, "step": 1016 }, { "epoch": 127.5, "learning_rate": 0.0001, "loss": 0.0125, "step": 1020 }, { "epoch": 127.99, "learning_rate": 0.0001, "loss": 0.012, "step": 1024 }, { "epoch": 127.99, "eval_exact_match": 0.43892750744786496, "eval_exec": 0.4856007944389275, "eval_loss": 0.4193364083766937, "eval_runtime": 203.6399, "eval_samples_per_second": 6.384, "step": 1024 }, { "epoch": 128.5, "learning_rate": 0.0001, "loss": 0.012, "step": 1028 }, { "epoch": 128.99, "learning_rate": 0.0001, "loss": 0.0129, "step": 1032 }, { "epoch": 129.5, "learning_rate": 0.0001, "loss": 0.0136, "step": 1036 }, { "epoch": 129.99, "learning_rate": 0.0001, "loss": 0.0123, "step": 1040 }, { "epoch": 130.5, "learning_rate": 0.0001, "loss": 0.0122, "step": 1044 }, { "epoch": 130.99, "learning_rate": 0.0001, "loss": 0.0126, "step": 1048 }, { "epoch": 131.5, "learning_rate": 0.0001, "loss": 0.0111, "step": 1052 }, { "epoch": 131.99, "learning_rate": 0.0001, "loss": 0.0129, "step": 1056 }, { "epoch": 132.5, "learning_rate": 0.0001, "loss": 0.0135, "step": 1060 }, { "epoch": 132.99, "learning_rate": 0.0001, "loss": 0.012, "step": 1064 }, { "epoch": 133.5, "learning_rate": 0.0001, "loss": 0.0119, "step": 1068 }, { "epoch": 133.99, "learning_rate": 0.0001, "loss": 0.0115, "step": 1072 }, { "epoch": 134.5, "learning_rate": 0.0001, "loss": 0.0113, "step": 1076 }, { "epoch": 134.99, "learning_rate": 0.0001, "loss": 0.0131, "step": 1080 }, { "epoch": 135.5, "learning_rate": 0.0001, "loss": 0.0126, "step": 1084 }, { "epoch": 135.99, "learning_rate": 0.0001, "loss": 0.0114, "step": 1088 }, { "epoch": 135.99, "eval_exact_match": 0.4240317775571003, "eval_exec": 0.4726911618669315, "eval_loss": 0.4311941862106323, "eval_runtime": 209.696, "eval_samples_per_second": 6.199, "step": 1088 }, { "epoch": 136.5, "learning_rate": 0.0001, "loss": 0.0111, "step": 1092 }, { "epoch": 136.99, "learning_rate": 0.0001, "loss": 0.0109, "step": 1096 }, { "epoch": 137.5, "learning_rate": 0.0001, "loss": 0.0105, "step": 1100 }, { "epoch": 137.99, "learning_rate": 0.0001, "loss": 0.0108, "step": 1104 }, { "epoch": 138.5, "learning_rate": 0.0001, "loss": 0.0106, "step": 1108 }, { "epoch": 138.99, "learning_rate": 0.0001, "loss": 0.01, "step": 1112 }, { "epoch": 139.5, "learning_rate": 0.0001, "loss": 0.0115, "step": 1116 }, { "epoch": 139.99, "learning_rate": 0.0001, "loss": 0.0111, "step": 1120 }, { "epoch": 140.5, "learning_rate": 0.0001, "loss": 0.0105, "step": 1124 }, { "epoch": 140.99, "learning_rate": 0.0001, "loss": 0.0101, "step": 1128 }, { "epoch": 141.5, "learning_rate": 0.0001, "loss": 0.0099, "step": 1132 }, { "epoch": 141.99, "learning_rate": 0.0001, "loss": 0.0099, "step": 1136 }, { "epoch": 142.5, "learning_rate": 0.0001, "loss": 0.0104, "step": 1140 }, { "epoch": 142.99, "learning_rate": 0.0001, "loss": 0.0111, "step": 1144 }, { "epoch": 143.5, "learning_rate": 0.0001, "loss": 0.011, "step": 1148 }, { "epoch": 143.99, "learning_rate": 0.0001, "loss": 0.0095, "step": 1152 }, { "epoch": 143.99, "eval_exact_match": 0.42105263157894735, "eval_exec": 0.47070506454816285, "eval_loss": 0.4453062117099762, "eval_runtime": 205.5286, "eval_samples_per_second": 6.325, "step": 1152 }, { "epoch": 144.5, "learning_rate": 0.0001, "loss": 0.0098, "step": 1156 }, { "epoch": 144.99, "learning_rate": 0.0001, "loss": 0.0098, "step": 1160 }, { "epoch": 145.5, "learning_rate": 0.0001, "loss": 0.0096, "step": 1164 }, { "epoch": 145.99, "learning_rate": 0.0001, "loss": 0.0101, "step": 1168 }, { "epoch": 146.5, "learning_rate": 0.0001, "loss": 0.01, "step": 1172 }, { "epoch": 146.99, "learning_rate": 0.0001, "loss": 0.0095, "step": 1176 }, { "epoch": 147.5, "learning_rate": 0.0001, "loss": 0.0098, "step": 1180 }, { "epoch": 147.99, "learning_rate": 0.0001, "loss": 0.0103, "step": 1184 }, { "epoch": 148.5, "learning_rate": 0.0001, "loss": 0.0098, "step": 1188 }, { "epoch": 148.99, "learning_rate": 0.0001, "loss": 0.0098, "step": 1192 }, { "epoch": 149.5, "learning_rate": 0.0001, "loss": 0.0093, "step": 1196 }, { "epoch": 149.99, "learning_rate": 0.0001, "loss": 0.0092, "step": 1200 }, { "epoch": 150.5, "learning_rate": 0.0001, "loss": 0.0087, "step": 1204 }, { "epoch": 150.99, "learning_rate": 0.0001, "loss": 0.0085, "step": 1208 }, { "epoch": 151.5, "learning_rate": 0.0001, "loss": 0.0089, "step": 1212 }, { "epoch": 151.99, "learning_rate": 0.0001, "loss": 0.0085, "step": 1216 }, { "epoch": 151.99, "eval_exact_match": 0.43892750744786496, "eval_exec": 0.47765640516385305, "eval_loss": 0.45582684874534607, "eval_runtime": 213.7344, "eval_samples_per_second": 6.082, "step": 1216 }, { "epoch": 152.5, "learning_rate": 0.0001, "loss": 0.0092, "step": 1220 }, { "epoch": 152.99, "learning_rate": 0.0001, "loss": 0.009, "step": 1224 }, { "epoch": 153.5, "learning_rate": 0.0001, "loss": 0.0089, "step": 1228 }, { "epoch": 153.99, "learning_rate": 0.0001, "loss": 0.0095, "step": 1232 }, { "epoch": 154.5, "learning_rate": 0.0001, "loss": 0.0089, "step": 1236 }, { "epoch": 154.99, "learning_rate": 0.0001, "loss": 0.009, "step": 1240 }, { "epoch": 155.5, "learning_rate": 0.0001, "loss": 0.0084, "step": 1244 }, { "epoch": 155.99, "learning_rate": 0.0001, "loss": 0.0088, "step": 1248 }, { "epoch": 156.5, "learning_rate": 0.0001, "loss": 0.0084, "step": 1252 }, { "epoch": 156.99, "learning_rate": 0.0001, "loss": 0.0086, "step": 1256 }, { "epoch": 157.5, "learning_rate": 0.0001, "loss": 0.0087, "step": 1260 }, { "epoch": 157.99, "learning_rate": 0.0001, "loss": 0.0084, "step": 1264 }, { "epoch": 158.5, "learning_rate": 0.0001, "loss": 0.0081, "step": 1268 }, { "epoch": 158.99, "learning_rate": 0.0001, "loss": 0.008, "step": 1272 }, { "epoch": 159.5, "learning_rate": 0.0001, "loss": 0.0082, "step": 1276 }, { "epoch": 159.99, "learning_rate": 0.0001, "loss": 0.008, "step": 1280 }, { "epoch": 159.99, "eval_exact_match": 0.4200595829195631, "eval_exec": 0.47070506454816285, "eval_loss": 0.45270583033561707, "eval_runtime": 204.4816, "eval_samples_per_second": 6.358, "step": 1280 }, { "epoch": 160.5, "learning_rate": 0.0001, "loss": 0.0077, "step": 1284 }, { "epoch": 160.99, "learning_rate": 0.0001, "loss": 0.0077, "step": 1288 }, { "epoch": 161.5, "learning_rate": 0.0001, "loss": 0.0086, "step": 1292 }, { "epoch": 161.99, "learning_rate": 0.0001, "loss": 0.0078, "step": 1296 }, { "epoch": 162.5, "learning_rate": 0.0001, "loss": 0.0082, "step": 1300 }, { "epoch": 162.99, "learning_rate": 0.0001, "loss": 0.0079, "step": 1304 }, { "epoch": 163.5, "learning_rate": 0.0001, "loss": 0.0077, "step": 1308 }, { "epoch": 163.99, "learning_rate": 0.0001, "loss": 0.0076, "step": 1312 }, { "epoch": 164.5, "learning_rate": 0.0001, "loss": 0.0078, "step": 1316 }, { "epoch": 164.99, "learning_rate": 0.0001, "loss": 0.0079, "step": 1320 }, { "epoch": 165.5, "learning_rate": 0.0001, "loss": 0.0081, "step": 1324 }, { "epoch": 165.99, "learning_rate": 0.0001, "loss": 0.008, "step": 1328 }, { "epoch": 166.5, "learning_rate": 0.0001, "loss": 0.0086, "step": 1332 }, { "epoch": 166.99, "learning_rate": 0.0001, "loss": 0.0085, "step": 1336 }, { "epoch": 167.5, "learning_rate": 0.0001, "loss": 0.0073, "step": 1340 }, { "epoch": 167.99, "learning_rate": 0.0001, "loss": 0.0069, "step": 1344 }, { "epoch": 167.99, "eval_exact_match": 0.42502482621648463, "eval_exec": 0.4766633565044687, "eval_loss": 0.46810275316238403, "eval_runtime": 209.333, "eval_samples_per_second": 6.21, "step": 1344 }, { "epoch": 168.5, "learning_rate": 0.0001, "loss": 0.0073, "step": 1348 }, { "epoch": 168.99, "learning_rate": 0.0001, "loss": 0.007, "step": 1352 }, { "epoch": 169.5, "learning_rate": 0.0001, "loss": 0.0074, "step": 1356 }, { "epoch": 169.99, "learning_rate": 0.0001, "loss": 0.0073, "step": 1360 }, { "epoch": 170.5, "learning_rate": 0.0001, "loss": 0.007, "step": 1364 }, { "epoch": 170.99, "learning_rate": 0.0001, "loss": 0.0069, "step": 1368 }, { "epoch": 171.5, "learning_rate": 0.0001, "loss": 0.0065, "step": 1372 }, { "epoch": 171.99, "learning_rate": 0.0001, "loss": 0.007, "step": 1376 }, { "epoch": 172.5, "learning_rate": 0.0001, "loss": 0.0068, "step": 1380 }, { "epoch": 172.99, "learning_rate": 0.0001, "loss": 0.007, "step": 1384 }, { "epoch": 173.5, "learning_rate": 0.0001, "loss": 0.0066, "step": 1388 }, { "epoch": 173.99, "learning_rate": 0.0001, "loss": 0.007, "step": 1392 }, { "epoch": 174.5, "learning_rate": 0.0001, "loss": 0.0068, "step": 1396 }, { "epoch": 174.99, "learning_rate": 0.0001, "loss": 0.0066, "step": 1400 }, { "epoch": 175.5, "learning_rate": 0.0001, "loss": 0.0067, "step": 1404 }, { "epoch": 175.99, "learning_rate": 0.0001, "loss": 0.007, "step": 1408 }, { "epoch": 175.99, "eval_exact_match": 0.423038728897716, "eval_exec": 0.47070506454816285, "eval_loss": 0.48861581087112427, "eval_runtime": 209.5612, "eval_samples_per_second": 6.203, "step": 1408 }, { "epoch": 176.5, "learning_rate": 0.0001, "loss": 0.0068, "step": 1412 }, { "epoch": 176.99, "learning_rate": 0.0001, "loss": 0.0066, "step": 1416 }, { "epoch": 177.5, "learning_rate": 0.0001, "loss": 0.0066, "step": 1420 }, { "epoch": 177.99, "learning_rate": 0.0001, "loss": 0.0065, "step": 1424 }, { "epoch": 178.5, "learning_rate": 0.0001, "loss": 0.0066, "step": 1428 }, { "epoch": 178.99, "learning_rate": 0.0001, "loss": 0.0069, "step": 1432 }, { "epoch": 179.5, "learning_rate": 0.0001, "loss": 0.0064, "step": 1436 }, { "epoch": 179.99, "learning_rate": 0.0001, "loss": 0.0062, "step": 1440 }, { "epoch": 180.5, "learning_rate": 0.0001, "loss": 0.0063, "step": 1444 }, { "epoch": 180.99, "learning_rate": 0.0001, "loss": 0.0063, "step": 1448 }, { "epoch": 181.5, "learning_rate": 0.0001, "loss": 0.0063, "step": 1452 }, { "epoch": 181.99, "learning_rate": 0.0001, "loss": 0.0058, "step": 1456 }, { "epoch": 182.5, "learning_rate": 0.0001, "loss": 0.0066, "step": 1460 }, { "epoch": 182.99, "learning_rate": 0.0001, "loss": 0.0074, "step": 1464 }, { "epoch": 183.5, "learning_rate": 0.0001, "loss": 0.0083, "step": 1468 }, { "epoch": 183.99, "learning_rate": 0.0001, "loss": 0.0075, "step": 1472 }, { "epoch": 183.99, "eval_exact_match": 0.4399205561072492, "eval_exec": 0.4856007944389275, "eval_loss": 0.46796470880508423, "eval_runtime": 198.2198, "eval_samples_per_second": 6.558, "step": 1472 }, { "epoch": 184.5, "learning_rate": 0.0001, "loss": 0.0065, "step": 1476 }, { "epoch": 184.99, "learning_rate": 0.0001, "loss": 0.0059, "step": 1480 }, { "epoch": 185.5, "learning_rate": 0.0001, "loss": 0.006, "step": 1484 }, { "epoch": 185.99, "learning_rate": 0.0001, "loss": 0.0061, "step": 1488 }, { "epoch": 186.5, "learning_rate": 0.0001, "loss": 0.006, "step": 1492 }, { "epoch": 186.99, "learning_rate": 0.0001, "loss": 0.0061, "step": 1496 }, { "epoch": 187.5, "learning_rate": 0.0001, "loss": 0.0064, "step": 1500 }, { "epoch": 187.99, "learning_rate": 0.0001, "loss": 0.0062, "step": 1504 }, { "epoch": 188.5, "learning_rate": 0.0001, "loss": 0.006, "step": 1508 }, { "epoch": 188.99, "learning_rate": 0.0001, "loss": 0.0059, "step": 1512 }, { "epoch": 189.5, "learning_rate": 0.0001, "loss": 0.0062, "step": 1516 }, { "epoch": 189.99, "learning_rate": 0.0001, "loss": 0.007, "step": 1520 }, { "epoch": 190.5, "learning_rate": 0.0001, "loss": 0.0077, "step": 1524 }, { "epoch": 190.99, "learning_rate": 0.0001, "loss": 0.0057, "step": 1528 }, { "epoch": 191.5, "learning_rate": 0.0001, "loss": 0.0055, "step": 1532 }, { "epoch": 191.99, "learning_rate": 0.0001, "loss": 0.0059, "step": 1536 }, { "epoch": 191.99, "eval_exact_match": 0.43197616683217477, "eval_exec": 0.4746772591857001, "eval_loss": 0.4912528097629547, "eval_runtime": 222.179, "eval_samples_per_second": 5.851, "step": 1536 }, { "epoch": 192.5, "learning_rate": 0.0001, "loss": 0.006, "step": 1540 }, { "epoch": 192.99, "learning_rate": 0.0001, "loss": 0.0055, "step": 1544 }, { "epoch": 193.5, "learning_rate": 0.0001, "loss": 0.0052, "step": 1548 }, { "epoch": 193.99, "learning_rate": 0.0001, "loss": 0.0055, "step": 1552 }, { "epoch": 194.5, "learning_rate": 0.0001, "loss": 0.0056, "step": 1556 }, { "epoch": 194.99, "learning_rate": 0.0001, "loss": 0.0055, "step": 1560 }, { "epoch": 195.5, "learning_rate": 0.0001, "loss": 0.0052, "step": 1564 }, { "epoch": 195.99, "learning_rate": 0.0001, "loss": 0.0054, "step": 1568 }, { "epoch": 196.5, "learning_rate": 0.0001, "loss": 0.0054, "step": 1572 }, { "epoch": 196.99, "learning_rate": 0.0001, "loss": 0.0052, "step": 1576 }, { "epoch": 197.5, "learning_rate": 0.0001, "loss": 0.005, "step": 1580 }, { "epoch": 197.99, "learning_rate": 0.0001, "loss": 0.0053, "step": 1584 }, { "epoch": 198.5, "learning_rate": 0.0001, "loss": 0.005, "step": 1588 }, { "epoch": 198.99, "learning_rate": 0.0001, "loss": 0.0054, "step": 1592 }, { "epoch": 199.5, "learning_rate": 0.0001, "loss": 0.0051, "step": 1596 }, { "epoch": 199.99, "learning_rate": 0.0001, "loss": 0.005, "step": 1600 }, { "epoch": 199.99, "eval_exact_match": 0.4329692154915591, "eval_exec": 0.48659384309831183, "eval_loss": 0.4948062002658844, "eval_runtime": 223.792, "eval_samples_per_second": 5.809, "step": 1600 }, { "epoch": 200.5, "learning_rate": 0.0001, "loss": 0.0046, "step": 1604 }, { "epoch": 200.99, "learning_rate": 0.0001, "loss": 0.0046, "step": 1608 }, { "epoch": 201.5, "learning_rate": 0.0001, "loss": 0.005, "step": 1612 }, { "epoch": 201.99, "learning_rate": 0.0001, "loss": 0.005, "step": 1616 }, { "epoch": 202.5, "learning_rate": 0.0001, "loss": 0.0049, "step": 1620 }, { "epoch": 202.99, "learning_rate": 0.0001, "loss": 0.0048, "step": 1624 }, { "epoch": 203.5, "learning_rate": 0.0001, "loss": 0.0051, "step": 1628 }, { "epoch": 203.99, "learning_rate": 0.0001, "loss": 0.0046, "step": 1632 }, { "epoch": 204.5, "learning_rate": 0.0001, "loss": 0.0051, "step": 1636 }, { "epoch": 204.99, "learning_rate": 0.0001, "loss": 0.005, "step": 1640 }, { "epoch": 205.5, "learning_rate": 0.0001, "loss": 0.005, "step": 1644 }, { "epoch": 205.99, "learning_rate": 0.0001, "loss": 0.0049, "step": 1648 }, { "epoch": 206.5, "learning_rate": 0.0001, "loss": 0.0045, "step": 1652 }, { "epoch": 206.99, "learning_rate": 0.0001, "loss": 0.0048, "step": 1656 }, { "epoch": 207.5, "learning_rate": 0.0001, "loss": 0.0048, "step": 1660 }, { "epoch": 207.99, "learning_rate": 0.0001, "loss": 0.0047, "step": 1664 }, { "epoch": 207.99, "eval_exact_match": 0.42502482621648463, "eval_exec": 0.48063555114200596, "eval_loss": 0.4956875443458557, "eval_runtime": 203.094, "eval_samples_per_second": 6.401, "step": 1664 }, { "epoch": 208.5, "learning_rate": 0.0001, "loss": 0.0046, "step": 1668 }, { "epoch": 208.99, "learning_rate": 0.0001, "loss": 0.0044, "step": 1672 }, { "epoch": 209.5, "learning_rate": 0.0001, "loss": 0.0047, "step": 1676 }, { "epoch": 209.99, "learning_rate": 0.0001, "loss": 0.0042, "step": 1680 }, { "epoch": 210.5, "learning_rate": 0.0001, "loss": 0.0043, "step": 1684 }, { "epoch": 210.99, "learning_rate": 0.0001, "loss": 0.0047, "step": 1688 }, { "epoch": 211.5, "learning_rate": 0.0001, "loss": 0.0051, "step": 1692 }, { "epoch": 211.99, "learning_rate": 0.0001, "loss": 0.0049, "step": 1696 }, { "epoch": 212.5, "learning_rate": 0.0001, "loss": 0.0049, "step": 1700 }, { "epoch": 212.99, "learning_rate": 0.0001, "loss": 0.0044, "step": 1704 }, { "epoch": 213.5, "learning_rate": 0.0001, "loss": 0.0044, "step": 1708 }, { "epoch": 213.99, "learning_rate": 0.0001, "loss": 0.0045, "step": 1712 }, { "epoch": 214.5, "learning_rate": 0.0001, "loss": 0.0042, "step": 1716 }, { "epoch": 214.99, "learning_rate": 0.0001, "loss": 0.0043, "step": 1720 }, { "epoch": 215.5, "learning_rate": 0.0001, "loss": 0.0047, "step": 1724 }, { "epoch": 215.99, "learning_rate": 0.0001, "loss": 0.0045, "step": 1728 }, { "epoch": 215.99, "eval_exact_match": 0.4339622641509434, "eval_exec": 0.4726911618669315, "eval_loss": 0.4982646703720093, "eval_runtime": 200.9709, "eval_samples_per_second": 6.469, "step": 1728 }, { "epoch": 216.5, "learning_rate": 0.0001, "loss": 0.0045, "step": 1732 }, { "epoch": 216.99, "learning_rate": 0.0001, "loss": 0.0046, "step": 1736 }, { "epoch": 217.5, "learning_rate": 0.0001, "loss": 0.0042, "step": 1740 }, { "epoch": 217.99, "learning_rate": 0.0001, "loss": 0.0046, "step": 1744 }, { "epoch": 218.5, "learning_rate": 0.0001, "loss": 0.0038, "step": 1748 }, { "epoch": 218.99, "learning_rate": 0.0001, "loss": 0.0045, "step": 1752 }, { "epoch": 219.5, "learning_rate": 0.0001, "loss": 0.0042, "step": 1756 }, { "epoch": 219.99, "learning_rate": 0.0001, "loss": 0.0041, "step": 1760 }, { "epoch": 220.5, "learning_rate": 0.0001, "loss": 0.0046, "step": 1764 }, { "epoch": 220.99, "learning_rate": 0.0001, "loss": 0.004, "step": 1768 }, { "epoch": 221.5, "learning_rate": 0.0001, "loss": 0.0042, "step": 1772 }, { "epoch": 221.99, "learning_rate": 0.0001, "loss": 0.0039, "step": 1776 }, { "epoch": 222.5, "learning_rate": 0.0001, "loss": 0.0043, "step": 1780 }, { "epoch": 222.99, "learning_rate": 0.0001, "loss": 0.0041, "step": 1784 }, { "epoch": 223.5, "learning_rate": 0.0001, "loss": 0.0038, "step": 1788 }, { "epoch": 223.99, "learning_rate": 0.0001, "loss": 0.0042, "step": 1792 }, { "epoch": 223.99, "eval_exact_match": 0.43793445878848064, "eval_exec": 0.4756703078450844, "eval_loss": 0.5064935088157654, "eval_runtime": 200.6112, "eval_samples_per_second": 6.48, "step": 1792 }, { "epoch": 224.5, "learning_rate": 0.0001, "loss": 0.005, "step": 1796 }, { "epoch": 224.99, "learning_rate": 0.0001, "loss": 0.0046, "step": 1800 }, { "epoch": 225.5, "learning_rate": 0.0001, "loss": 0.004, "step": 1804 }, { "epoch": 225.99, "learning_rate": 0.0001, "loss": 0.0037, "step": 1808 }, { "epoch": 226.5, "learning_rate": 0.0001, "loss": 0.0039, "step": 1812 }, { "epoch": 226.99, "learning_rate": 0.0001, "loss": 0.0037, "step": 1816 }, { "epoch": 227.5, "learning_rate": 0.0001, "loss": 0.0037, "step": 1820 }, { "epoch": 227.99, "learning_rate": 0.0001, "loss": 0.0039, "step": 1824 }, { "epoch": 228.5, "learning_rate": 0.0001, "loss": 0.0037, "step": 1828 }, { "epoch": 228.99, "learning_rate": 0.0001, "loss": 0.0038, "step": 1832 }, { "epoch": 229.5, "learning_rate": 0.0001, "loss": 0.0036, "step": 1836 }, { "epoch": 229.99, "learning_rate": 0.0001, "loss": 0.0036, "step": 1840 }, { "epoch": 230.5, "learning_rate": 0.0001, "loss": 0.0037, "step": 1844 }, { "epoch": 230.99, "learning_rate": 0.0001, "loss": 0.0036, "step": 1848 }, { "epoch": 231.5, "learning_rate": 0.0001, "loss": 0.0038, "step": 1852 }, { "epoch": 231.99, "learning_rate": 0.0001, "loss": 0.0036, "step": 1856 }, { "epoch": 231.99, "eval_exact_match": 0.43793445878848064, "eval_exec": 0.4856007944389275, "eval_loss": 0.5247978568077087, "eval_runtime": 207.1906, "eval_samples_per_second": 6.274, "step": 1856 }, { "epoch": 232.5, "learning_rate": 0.0001, "loss": 0.0034, "step": 1860 }, { "epoch": 232.99, "learning_rate": 0.0001, "loss": 0.0039, "step": 1864 }, { "epoch": 233.5, "learning_rate": 0.0001, "loss": 0.0039, "step": 1868 }, { "epoch": 233.99, "learning_rate": 0.0001, "loss": 0.0034, "step": 1872 }, { "epoch": 234.5, "learning_rate": 0.0001, "loss": 0.0042, "step": 1876 }, { "epoch": 234.99, "learning_rate": 0.0001, "loss": 0.004, "step": 1880 }, { "epoch": 235.5, "learning_rate": 0.0001, "loss": 0.0047, "step": 1884 }, { "epoch": 235.99, "learning_rate": 0.0001, "loss": 0.0065, "step": 1888 }, { "epoch": 236.5, "learning_rate": 0.0001, "loss": 0.0036, "step": 1892 }, { "epoch": 236.99, "learning_rate": 0.0001, "loss": 0.0036, "step": 1896 }, { "epoch": 237.5, "learning_rate": 0.0001, "loss": 0.0036, "step": 1900 }, { "epoch": 237.99, "learning_rate": 0.0001, "loss": 0.0041, "step": 1904 }, { "epoch": 238.5, "learning_rate": 0.0001, "loss": 0.0036, "step": 1908 }, { "epoch": 238.99, "learning_rate": 0.0001, "loss": 0.0035, "step": 1912 }, { "epoch": 239.5, "learning_rate": 0.0001, "loss": 0.0037, "step": 1916 }, { "epoch": 239.99, "learning_rate": 0.0001, "loss": 0.0035, "step": 1920 }, { "epoch": 239.99, "eval_exact_match": 0.43892750744786496, "eval_exec": 0.48659384309831183, "eval_loss": 0.5224528908729553, "eval_runtime": 194.9847, "eval_samples_per_second": 6.667, "step": 1920 }, { "epoch": 240.5, "learning_rate": 0.0001, "loss": 0.0074, "step": 1924 }, { "epoch": 240.99, "learning_rate": 0.0001, "loss": 0.0054, "step": 1928 }, { "epoch": 241.5, "learning_rate": 0.0001, "loss": 0.0037, "step": 1932 }, { "epoch": 241.99, "learning_rate": 0.0001, "loss": 0.0037, "step": 1936 }, { "epoch": 242.5, "learning_rate": 0.0001, "loss": 0.0035, "step": 1940 }, { "epoch": 242.99, "learning_rate": 0.0001, "loss": 0.0037, "step": 1944 }, { "epoch": 243.5, "learning_rate": 0.0001, "loss": 0.0035, "step": 1948 }, { "epoch": 243.99, "learning_rate": 0.0001, "loss": 0.0031, "step": 1952 }, { "epoch": 244.5, "learning_rate": 0.0001, "loss": 0.0034, "step": 1956 }, { "epoch": 244.99, "learning_rate": 0.0001, "loss": 0.0035, "step": 1960 }, { "epoch": 245.5, "learning_rate": 0.0001, "loss": 0.0031, "step": 1964 }, { "epoch": 245.99, "learning_rate": 0.0001, "loss": 0.0036, "step": 1968 }, { "epoch": 246.5, "learning_rate": 0.0001, "loss": 0.0038, "step": 1972 }, { "epoch": 246.99, "learning_rate": 0.0001, "loss": 0.0035, "step": 1976 }, { "epoch": 247.5, "learning_rate": 0.0001, "loss": 0.0033, "step": 1980 }, { "epoch": 247.99, "learning_rate": 0.0001, "loss": 0.0031, "step": 1984 }, { "epoch": 247.99, "eval_exact_match": 0.4299900695134062, "eval_exec": 0.4856007944389275, "eval_loss": 0.521920382976532, "eval_runtime": 197.9426, "eval_samples_per_second": 6.568, "step": 1984 }, { "epoch": 248.5, "learning_rate": 0.0001, "loss": 0.0034, "step": 1988 }, { "epoch": 248.99, "learning_rate": 0.0001, "loss": 0.0033, "step": 1992 }, { "epoch": 249.5, "learning_rate": 0.0001, "loss": 0.0034, "step": 1996 }, { "epoch": 249.99, "learning_rate": 0.0001, "loss": 0.0031, "step": 2000 }, { "epoch": 250.5, "learning_rate": 0.0001, "loss": 0.0031, "step": 2004 }, { "epoch": 250.99, "learning_rate": 0.0001, "loss": 0.0032, "step": 2008 }, { "epoch": 251.5, "learning_rate": 0.0001, "loss": 0.0033, "step": 2012 }, { "epoch": 251.99, "learning_rate": 0.0001, "loss": 0.0032, "step": 2016 }, { "epoch": 252.5, "learning_rate": 0.0001, "loss": 0.0032, "step": 2020 }, { "epoch": 252.99, "learning_rate": 0.0001, "loss": 0.003, "step": 2024 }, { "epoch": 253.5, "learning_rate": 0.0001, "loss": 0.003, "step": 2028 }, { "epoch": 253.99, "learning_rate": 0.0001, "loss": 0.0032, "step": 2032 }, { "epoch": 254.5, "learning_rate": 0.0001, "loss": 0.0033, "step": 2036 }, { "epoch": 254.99, "learning_rate": 0.0001, "loss": 0.0039, "step": 2040 }, { "epoch": 255.5, "learning_rate": 0.0001, "loss": 0.0064, "step": 2044 }, { "epoch": 255.99, "learning_rate": 0.0001, "loss": 0.0035, "step": 2048 }, { "epoch": 255.99, "eval_exact_match": 0.4399205561072492, "eval_exec": 0.49056603773584906, "eval_loss": 0.516386091709137, "eval_runtime": 193.5596, "eval_samples_per_second": 6.716, "step": 2048 }, { "epoch": 256.5, "learning_rate": 0.0001, "loss": 0.0032, "step": 2052 }, { "epoch": 256.99, "learning_rate": 0.0001, "loss": 0.0033, "step": 2056 }, { "epoch": 257.5, "learning_rate": 0.0001, "loss": 0.0027, "step": 2060 }, { "epoch": 257.99, "learning_rate": 0.0001, "loss": 0.0031, "step": 2064 }, { "epoch": 258.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 2068 }, { "epoch": 258.99, "learning_rate": 0.0001, "loss": 0.0032, "step": 2072 }, { "epoch": 259.5, "learning_rate": 0.0001, "loss": 0.0032, "step": 2076 }, { "epoch": 259.99, "learning_rate": 0.0001, "loss": 0.003, "step": 2080 }, { "epoch": 260.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 2084 }, { "epoch": 260.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 2088 }, { "epoch": 261.5, "learning_rate": 0.0001, "loss": 0.0031, "step": 2092 }, { "epoch": 261.99, "learning_rate": 0.0001, "loss": 0.0033, "step": 2096 }, { "epoch": 262.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 2100 }, { "epoch": 262.99, "learning_rate": 0.0001, "loss": 0.0031, "step": 2104 }, { "epoch": 263.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 2108 }, { "epoch": 263.99, "learning_rate": 0.0001, "loss": 0.0032, "step": 2112 }, { "epoch": 263.99, "eval_exact_match": 0.43793445878848064, "eval_exec": 0.48758689175769615, "eval_loss": 0.5402066707611084, "eval_runtime": 203.0032, "eval_samples_per_second": 6.404, "step": 2112 }, { "epoch": 264.5, "learning_rate": 0.0001, "loss": 0.0028, "step": 2116 }, { "epoch": 264.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 2120 }, { "epoch": 265.5, "learning_rate": 0.0001, "loss": 0.0028, "step": 2124 }, { "epoch": 265.99, "learning_rate": 0.0001, "loss": 0.0032, "step": 2128 }, { "epoch": 266.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 2132 }, { "epoch": 266.99, "learning_rate": 0.0001, "loss": 0.0031, "step": 2136 }, { "epoch": 267.5, "learning_rate": 0.0001, "loss": 0.0031, "step": 2140 }, { "epoch": 267.99, "learning_rate": 0.0001, "loss": 0.0028, "step": 2144 }, { "epoch": 268.5, "learning_rate": 0.0001, "loss": 0.003, "step": 2148 }, { "epoch": 268.99, "learning_rate": 0.0001, "loss": 0.003, "step": 2152 }, { "epoch": 269.5, "learning_rate": 0.0001, "loss": 0.0032, "step": 2156 }, { "epoch": 269.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 2160 }, { "epoch": 270.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 2164 }, { "epoch": 270.99, "learning_rate": 0.0001, "loss": 0.003, "step": 2168 }, { "epoch": 271.5, "learning_rate": 0.0001, "loss": 0.003, "step": 2172 }, { "epoch": 271.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 2176 }, { "epoch": 271.99, "eval_exact_match": 0.43495531281032773, "eval_exec": 0.4816285998013903, "eval_loss": 0.5360086560249329, "eval_runtime": 197.8829, "eval_samples_per_second": 6.57, "step": 2176 }, { "epoch": 272.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 2180 }, { "epoch": 272.99, "learning_rate": 0.0001, "loss": 0.003, "step": 2184 }, { "epoch": 273.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 2188 }, { "epoch": 273.99, "learning_rate": 0.0001, "loss": 0.0028, "step": 2192 }, { "epoch": 274.5, "learning_rate": 0.0001, "loss": 0.0027, "step": 2196 }, { "epoch": 274.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 2200 }, { "epoch": 275.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 2204 }, { "epoch": 275.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 2208 }, { "epoch": 276.5, "learning_rate": 0.0001, "loss": 0.0028, "step": 2212 }, { "epoch": 276.99, "learning_rate": 0.0001, "loss": 0.0027, "step": 2216 }, { "epoch": 277.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 2220 }, { "epoch": 277.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 2224 }, { "epoch": 278.5, "learning_rate": 0.0001, "loss": 0.0027, "step": 2228 }, { "epoch": 278.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 2232 }, { "epoch": 279.5, "learning_rate": 0.0001, "loss": 0.0026, "step": 2236 }, { "epoch": 279.99, "learning_rate": 0.0001, "loss": 0.0027, "step": 2240 }, { "epoch": 279.99, "eval_exact_match": 0.4369414101290963, "eval_exec": 0.48063555114200596, "eval_loss": 0.5520691871643066, "eval_runtime": 204.4424, "eval_samples_per_second": 6.359, "step": 2240 }, { "epoch": 280.5, "learning_rate": 0.0001, "loss": 0.0027, "step": 2244 }, { "epoch": 280.99, "learning_rate": 0.0001, "loss": 0.003, "step": 2248 }, { "epoch": 281.5, "learning_rate": 0.0001, "loss": 0.003, "step": 2252 }, { "epoch": 281.99, "learning_rate": 0.0001, "loss": 0.0027, "step": 2256 }, { "epoch": 282.5, "learning_rate": 0.0001, "loss": 0.0028, "step": 2260 }, { "epoch": 282.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 2264 }, { "epoch": 283.5, "learning_rate": 0.0001, "loss": 0.0028, "step": 2268 }, { "epoch": 283.99, "learning_rate": 0.0001, "loss": 0.0027, "step": 2272 }, { "epoch": 284.5, "learning_rate": 0.0001, "loss": 0.0027, "step": 2276 }, { "epoch": 284.99, "learning_rate": 0.0001, "loss": 0.0027, "step": 2280 }, { "epoch": 285.5, "learning_rate": 0.0001, "loss": 0.0029, "step": 2284 }, { "epoch": 285.99, "learning_rate": 0.0001, "loss": 0.0027, "step": 2288 }, { "epoch": 286.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 2292 }, { "epoch": 286.99, "learning_rate": 0.0001, "loss": 0.0024, "step": 2296 }, { "epoch": 287.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 2300 }, { "epoch": 287.99, "learning_rate": 0.0001, "loss": 0.0024, "step": 2304 }, { "epoch": 287.99, "eval_exact_match": 0.4438927507447865, "eval_exec": 0.48659384309831183, "eval_loss": 0.5534113645553589, "eval_runtime": 202.8208, "eval_samples_per_second": 6.41, "step": 2304 }, { "epoch": 288.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 2308 }, { "epoch": 288.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 2312 }, { "epoch": 289.5, "learning_rate": 0.0001, "loss": 0.0027, "step": 2316 }, { "epoch": 289.99, "learning_rate": 0.0001, "loss": 0.0025, "step": 2320 }, { "epoch": 290.5, "learning_rate": 0.0001, "loss": 0.0026, "step": 2324 }, { "epoch": 290.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 2328 }, { "epoch": 291.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 2332 }, { "epoch": 291.99, "learning_rate": 0.0001, "loss": 0.0025, "step": 2336 }, { "epoch": 292.5, "learning_rate": 0.0001, "loss": 0.0026, "step": 2340 }, { "epoch": 292.99, "learning_rate": 0.0001, "loss": 0.0024, "step": 2344 }, { "epoch": 293.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 2348 }, { "epoch": 293.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 2352 }, { "epoch": 294.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 2356 }, { "epoch": 294.99, "learning_rate": 0.0001, "loss": 0.0024, "step": 2360 }, { "epoch": 295.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 2364 }, { "epoch": 295.99, "learning_rate": 0.0001, "loss": 0.0025, "step": 2368 }, { "epoch": 295.99, "eval_exact_match": 0.4428997020854022, "eval_exec": 0.4816285998013903, "eval_loss": 0.557854413986206, "eval_runtime": 203.9919, "eval_samples_per_second": 6.373, "step": 2368 }, { "epoch": 296.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 2372 }, { "epoch": 296.99, "learning_rate": 0.0001, "loss": 0.0025, "step": 2376 }, { "epoch": 297.5, "learning_rate": 0.0001, "loss": 0.0026, "step": 2380 }, { "epoch": 297.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2384 }, { "epoch": 298.5, "learning_rate": 0.0001, "loss": 0.0023, "step": 2388 }, { "epoch": 298.99, "learning_rate": 0.0001, "loss": 0.0024, "step": 2392 }, { "epoch": 299.5, "learning_rate": 0.0001, "loss": 0.0026, "step": 2396 }, { "epoch": 299.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 2400 }, { "epoch": 300.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2404 }, { "epoch": 300.99, "learning_rate": 0.0001, "loss": 0.0025, "step": 2408 }, { "epoch": 301.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 2412 }, { "epoch": 301.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2416 }, { "epoch": 302.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 2420 }, { "epoch": 302.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 2424 }, { "epoch": 303.5, "learning_rate": 0.0001, "loss": 0.0028, "step": 2428 }, { "epoch": 303.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 2432 }, { "epoch": 303.99, "eval_exact_match": 0.44985104270109233, "eval_exec": 0.49056603773584906, "eval_loss": 0.5580935478210449, "eval_runtime": 200.2593, "eval_samples_per_second": 6.492, "step": 2432 }, { "epoch": 304.5, "learning_rate": 0.0001, "loss": 0.0026, "step": 2436 }, { "epoch": 304.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 2440 }, { "epoch": 305.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 2444 }, { "epoch": 305.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 2448 }, { "epoch": 306.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 2452 }, { "epoch": 306.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2456 }, { "epoch": 307.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 2460 }, { "epoch": 307.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 2464 }, { "epoch": 308.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 2468 }, { "epoch": 308.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 2472 }, { "epoch": 309.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 2476 }, { "epoch": 309.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2480 }, { "epoch": 310.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2484 }, { "epoch": 310.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 2488 }, { "epoch": 311.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2492 }, { "epoch": 311.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2496 }, { "epoch": 311.99, "eval_exact_match": 0.4200595829195631, "eval_exec": 0.47070506454816285, "eval_loss": 0.560897946357727, "eval_runtime": 202.502, "eval_samples_per_second": 6.42, "step": 2496 }, { "epoch": 312.5, "learning_rate": 0.0001, "loss": 0.0023, "step": 2500 }, { "epoch": 312.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2504 }, { "epoch": 313.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 2508 }, { "epoch": 313.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 2512 }, { "epoch": 314.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 2516 }, { "epoch": 314.99, "learning_rate": 0.0001, "loss": 0.0026, "step": 2520 }, { "epoch": 315.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 2524 }, { "epoch": 315.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2528 }, { "epoch": 316.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 2532 }, { "epoch": 316.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 2536 }, { "epoch": 317.5, "learning_rate": 0.0001, "loss": 0.0021, "step": 2540 }, { "epoch": 317.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 2544 }, { "epoch": 318.5, "learning_rate": 0.0001, "loss": 0.0023, "step": 2548 }, { "epoch": 318.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2552 }, { "epoch": 319.5, "learning_rate": 0.0001, "loss": 0.0023, "step": 2556 }, { "epoch": 319.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2560 }, { "epoch": 319.99, "eval_exact_match": 0.42800397219463754, "eval_exec": 0.46971201588877853, "eval_loss": 0.5524822473526001, "eval_runtime": 204.4122, "eval_samples_per_second": 6.36, "step": 2560 }, { "epoch": 320.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2564 }, { "epoch": 320.99, "learning_rate": 0.0001, "loss": 0.0024, "step": 2568 }, { "epoch": 321.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 2572 }, { "epoch": 321.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2576 }, { "epoch": 322.5, "learning_rate": 0.0001, "loss": 0.0023, "step": 2580 }, { "epoch": 322.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 2584 }, { "epoch": 323.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2588 }, { "epoch": 323.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 2592 }, { "epoch": 324.5, "learning_rate": 0.0001, "loss": 0.0021, "step": 2596 }, { "epoch": 324.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 2600 }, { "epoch": 325.5, "learning_rate": 0.0001, "loss": 0.0021, "step": 2604 }, { "epoch": 325.99, "learning_rate": 0.0001, "loss": 0.002, "step": 2608 }, { "epoch": 326.5, "learning_rate": 0.0001, "loss": 0.0021, "step": 2612 }, { "epoch": 326.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 2616 }, { "epoch": 327.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2620 }, { "epoch": 327.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 2624 }, { "epoch": 327.99, "eval_exact_match": 0.4260178748758689, "eval_exec": 0.48361469712015887, "eval_loss": 0.5746508240699768, "eval_runtime": 194.402, "eval_samples_per_second": 6.687, "step": 2624 }, { "epoch": 328.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 2628 }, { "epoch": 328.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2632 }, { "epoch": 329.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2636 }, { "epoch": 329.99, "learning_rate": 0.0001, "loss": 0.002, "step": 2640 }, { "epoch": 330.5, "learning_rate": 0.0001, "loss": 0.0025, "step": 2644 }, { "epoch": 330.99, "learning_rate": 0.0001, "loss": 0.003, "step": 2648 }, { "epoch": 331.5, "learning_rate": 0.0001, "loss": 0.0021, "step": 2652 }, { "epoch": 331.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 2656 }, { "epoch": 332.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2660 }, { "epoch": 332.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 2664 }, { "epoch": 333.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2668 }, { "epoch": 333.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 2672 }, { "epoch": 334.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2676 }, { "epoch": 334.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 2680 }, { "epoch": 335.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 2684 }, { "epoch": 335.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 2688 }, { "epoch": 335.99, "eval_exact_match": 0.4369414101290963, "eval_exec": 0.4786494538232373, "eval_loss": 0.5734978914260864, "eval_runtime": 199.4394, "eval_samples_per_second": 6.518, "step": 2688 }, { "epoch": 336.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 2692 }, { "epoch": 336.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 2696 }, { "epoch": 337.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 2700 }, { "epoch": 337.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 2704 }, { "epoch": 338.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 2708 }, { "epoch": 338.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2712 }, { "epoch": 339.5, "learning_rate": 0.0001, "loss": 0.0018, "step": 2716 }, { "epoch": 339.99, "learning_rate": 0.0001, "loss": 0.002, "step": 2720 }, { "epoch": 340.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 2724 }, { "epoch": 340.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 2728 }, { "epoch": 341.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2732 }, { "epoch": 341.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 2736 }, { "epoch": 342.5, "learning_rate": 0.0001, "loss": 0.0034, "step": 2740 }, { "epoch": 342.99, "learning_rate": 0.0001, "loss": 0.0041, "step": 2744 }, { "epoch": 343.5, "learning_rate": 0.0001, "loss": 0.0021, "step": 2748 }, { "epoch": 343.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 2752 }, { "epoch": 343.99, "eval_exact_match": 0.4369414101290963, "eval_exec": 0.48063555114200596, "eval_loss": 0.5549472570419312, "eval_runtime": 209.2028, "eval_samples_per_second": 6.214, "step": 2752 }, { "epoch": 344.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 2756 }, { "epoch": 344.99, "learning_rate": 0.0001, "loss": 0.0018, "step": 2760 }, { "epoch": 345.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 2764 }, { "epoch": 345.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 2768 }, { "epoch": 346.5, "learning_rate": 0.0001, "loss": 0.0022, "step": 2772 }, { "epoch": 346.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 2776 }, { "epoch": 347.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 2780 }, { "epoch": 347.99, "learning_rate": 0.0001, "loss": 0.002, "step": 2784 }, { "epoch": 348.5, "learning_rate": 0.0001, "loss": 0.0018, "step": 2788 }, { "epoch": 348.99, "learning_rate": 0.0001, "loss": 0.0018, "step": 2792 }, { "epoch": 349.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2796 }, { "epoch": 349.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 2800 }, { "epoch": 350.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 2804 }, { "epoch": 350.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 2808 }, { "epoch": 351.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 2812 }, { "epoch": 351.99, "learning_rate": 0.0001, "loss": 0.002, "step": 2816 }, { "epoch": 351.99, "eval_exact_match": 0.4448857994041708, "eval_exec": 0.48758689175769615, "eval_loss": 0.5706749558448792, "eval_runtime": 208.8886, "eval_samples_per_second": 6.223, "step": 2816 }, { "epoch": 352.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 2820 }, { "epoch": 352.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 2824 }, { "epoch": 353.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 2828 }, { "epoch": 353.99, "learning_rate": 0.0001, "loss": 0.0018, "step": 2832 }, { "epoch": 354.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2836 }, { "epoch": 354.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 2840 }, { "epoch": 355.5, "learning_rate": 0.0001, "loss": 0.0018, "step": 2844 }, { "epoch": 355.99, "learning_rate": 0.0001, "loss": 0.002, "step": 2848 }, { "epoch": 356.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 2852 }, { "epoch": 356.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 2856 }, { "epoch": 357.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 2860 }, { "epoch": 357.99, "learning_rate": 0.0001, "loss": 0.002, "step": 2864 }, { "epoch": 358.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 2868 }, { "epoch": 358.99, "learning_rate": 0.0001, "loss": 0.0018, "step": 2872 }, { "epoch": 359.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 2876 }, { "epoch": 359.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 2880 }, { "epoch": 359.99, "eval_exact_match": 0.4468718967229394, "eval_exec": 0.48957298907646474, "eval_loss": 0.5861152410507202, "eval_runtime": 206.5997, "eval_samples_per_second": 6.292, "step": 2880 }, { "epoch": 360.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 2884 }, { "epoch": 360.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 2888 }, { "epoch": 361.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 2892 }, { "epoch": 361.99, "learning_rate": 0.0001, "loss": 0.0018, "step": 2896 }, { "epoch": 362.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 2900 }, { "epoch": 362.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 2904 }, { "epoch": 363.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 2908 }, { "epoch": 363.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 2912 }, { "epoch": 364.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 2916 }, { "epoch": 364.99, "learning_rate": 0.0001, "loss": 0.0018, "step": 2920 }, { "epoch": 365.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 2924 }, { "epoch": 365.99, "learning_rate": 0.0001, "loss": 0.0018, "step": 2928 }, { "epoch": 366.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 2932 }, { "epoch": 366.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 2936 }, { "epoch": 367.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 2940 }, { "epoch": 367.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 2944 }, { "epoch": 367.99, "eval_exact_match": 0.44786494538232374, "eval_exec": 0.4955312810327706, "eval_loss": 0.5812374949455261, "eval_runtime": 213.3063, "eval_samples_per_second": 6.095, "step": 2944 }, { "epoch": 368.5, "learning_rate": 0.0001, "loss": 0.0018, "step": 2948 }, { "epoch": 368.99, "learning_rate": 0.0001, "loss": 0.0021, "step": 2952 }, { "epoch": 369.5, "learning_rate": 0.0001, "loss": 0.0018, "step": 2956 }, { "epoch": 369.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 2960 }, { "epoch": 370.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 2964 }, { "epoch": 370.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 2968 }, { "epoch": 371.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 2972 }, { "epoch": 371.99, "learning_rate": 0.0001, "loss": 0.0018, "step": 2976 }, { "epoch": 372.5, "learning_rate": 0.0001, "loss": 0.0018, "step": 2980 }, { "epoch": 372.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 2984 }, { "epoch": 373.5, "learning_rate": 0.0001, "loss": 0.002, "step": 2988 }, { "epoch": 373.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 2992 }, { "epoch": 374.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 2996 }, { "epoch": 374.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 3000 }, { "epoch": 375.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 3004 }, { "epoch": 375.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 3008 }, { "epoch": 375.99, "eval_exact_match": 0.4438927507447865, "eval_exec": 0.48659384309831183, "eval_loss": 0.5652831792831421, "eval_runtime": 210.776, "eval_samples_per_second": 6.168, "step": 3008 }, { "epoch": 376.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 3012 }, { "epoch": 376.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 3016 }, { "epoch": 377.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 3020 }, { "epoch": 377.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3024 }, { "epoch": 378.5, "learning_rate": 0.0001, "loss": 0.0021, "step": 3028 }, { "epoch": 378.99, "learning_rate": 0.0001, "loss": 0.0019, "step": 3032 }, { "epoch": 379.5, "learning_rate": 0.0001, "loss": 0.0019, "step": 3036 }, { "epoch": 379.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3040 }, { "epoch": 380.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3044 }, { "epoch": 380.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 3048 }, { "epoch": 381.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3052 }, { "epoch": 381.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3056 }, { "epoch": 382.5, "learning_rate": 0.0001, "loss": 0.0018, "step": 3060 }, { "epoch": 382.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3064 }, { "epoch": 383.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3068 }, { "epoch": 383.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3072 }, { "epoch": 383.99, "eval_exact_match": 0.44985104270109233, "eval_exec": 0.4915590863952334, "eval_loss": 0.5784198641777039, "eval_runtime": 204.8823, "eval_samples_per_second": 6.345, "step": 3072 }, { "epoch": 384.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3076 }, { "epoch": 384.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3080 }, { "epoch": 385.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 3084 }, { "epoch": 385.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3088 }, { "epoch": 386.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 3092 }, { "epoch": 386.99, "learning_rate": 0.0001, "loss": 0.0018, "step": 3096 }, { "epoch": 387.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3100 }, { "epoch": 387.99, "learning_rate": 0.0001, "loss": 0.0018, "step": 3104 }, { "epoch": 388.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3108 }, { "epoch": 388.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3112 }, { "epoch": 389.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3116 }, { "epoch": 389.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3120 }, { "epoch": 390.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3124 }, { "epoch": 390.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3128 }, { "epoch": 391.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 3132 }, { "epoch": 391.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3136 }, { "epoch": 391.99, "eval_exact_match": 0.4528301886792453, "eval_exec": 0.4925521350546177, "eval_loss": 0.5775428414344788, "eval_runtime": 211.2569, "eval_samples_per_second": 6.154, "step": 3136 }, { "epoch": 392.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3140 }, { "epoch": 392.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3144 }, { "epoch": 393.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3148 }, { "epoch": 393.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3152 }, { "epoch": 394.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3156 }, { "epoch": 394.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3160 }, { "epoch": 395.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3164 }, { "epoch": 395.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 3168 }, { "epoch": 396.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3172 }, { "epoch": 396.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3176 }, { "epoch": 397.5, "learning_rate": 0.0001, "loss": 0.0018, "step": 3180 }, { "epoch": 397.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3184 }, { "epoch": 398.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 3188 }, { "epoch": 398.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3192 }, { "epoch": 399.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3196 }, { "epoch": 399.99, "learning_rate": 0.0001, "loss": 0.0018, "step": 3200 }, { "epoch": 399.99, "eval_exact_match": 0.4538232373386296, "eval_exec": 0.49751737835153925, "eval_loss": 0.5791714191436768, "eval_runtime": 202.6045, "eval_samples_per_second": 6.416, "step": 3200 }, { "epoch": 400.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3204 }, { "epoch": 400.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 3208 }, { "epoch": 401.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 3212 }, { "epoch": 401.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3216 }, { "epoch": 402.5, "learning_rate": 0.0001, "loss": 0.0017, "step": 3220 }, { "epoch": 402.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3224 }, { "epoch": 403.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3228 }, { "epoch": 403.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 3232 }, { "epoch": 404.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3236 }, { "epoch": 404.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3240 }, { "epoch": 405.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3244 }, { "epoch": 405.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3248 }, { "epoch": 406.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3252 }, { "epoch": 406.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3256 }, { "epoch": 407.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3260 }, { "epoch": 407.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3264 }, { "epoch": 407.99, "eval_exact_match": 0.44190665342601787, "eval_exec": 0.48361469712015887, "eval_loss": 0.58585524559021, "eval_runtime": 208.6202, "eval_samples_per_second": 6.231, "step": 3264 }, { "epoch": 408.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3268 }, { "epoch": 408.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3272 }, { "epoch": 409.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3276 }, { "epoch": 409.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3280 }, { "epoch": 410.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3284 }, { "epoch": 410.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3288 }, { "epoch": 411.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3292 }, { "epoch": 411.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3296 }, { "epoch": 412.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3300 }, { "epoch": 412.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3304 }, { "epoch": 413.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3308 }, { "epoch": 413.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3312 }, { "epoch": 414.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3316 }, { "epoch": 414.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3320 }, { "epoch": 415.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3324 }, { "epoch": 415.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3328 }, { "epoch": 415.99, "eval_exact_match": 0.44786494538232374, "eval_exec": 0.48361469712015887, "eval_loss": 0.5857390761375427, "eval_runtime": 199.7986, "eval_samples_per_second": 6.507, "step": 3328 }, { "epoch": 416.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3332 }, { "epoch": 416.99, "learning_rate": 0.0001, "loss": 0.0023, "step": 3336 }, { "epoch": 417.5, "learning_rate": 0.0001, "loss": 0.0024, "step": 3340 }, { "epoch": 417.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3344 }, { "epoch": 418.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3348 }, { "epoch": 418.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3352 }, { "epoch": 419.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3356 }, { "epoch": 419.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3360 }, { "epoch": 420.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3364 }, { "epoch": 420.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3368 }, { "epoch": 421.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3372 }, { "epoch": 421.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3376 }, { "epoch": 422.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3380 }, { "epoch": 422.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3384 }, { "epoch": 423.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3388 }, { "epoch": 423.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3392 }, { "epoch": 423.99, "eval_exact_match": 0.4468718967229394, "eval_exec": 0.48758689175769615, "eval_loss": 0.5896801948547363, "eval_runtime": 211.8567, "eval_samples_per_second": 6.136, "step": 3392 }, { "epoch": 424.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3396 }, { "epoch": 424.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3400 }, { "epoch": 425.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3404 }, { "epoch": 425.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3408 }, { "epoch": 426.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3412 }, { "epoch": 426.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3416 }, { "epoch": 427.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3420 }, { "epoch": 427.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3424 }, { "epoch": 428.5, "learning_rate": 0.0001, "loss": 0.0016, "step": 3428 }, { "epoch": 428.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3432 }, { "epoch": 429.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3436 }, { "epoch": 429.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3440 }, { "epoch": 430.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3444 }, { "epoch": 430.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3448 }, { "epoch": 431.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3452 }, { "epoch": 431.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3456 }, { "epoch": 431.99, "eval_exact_match": 0.44885799404170806, "eval_exec": 0.4846077457795432, "eval_loss": 0.5921575427055359, "eval_runtime": 197.9512, "eval_samples_per_second": 6.567, "step": 3456 }, { "epoch": 432.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3460 }, { "epoch": 432.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3464 }, { "epoch": 433.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3468 }, { "epoch": 433.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3472 }, { "epoch": 434.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3476 }, { "epoch": 434.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3480 }, { "epoch": 435.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3484 }, { "epoch": 435.99, "learning_rate": 0.0001, "loss": 0.0017, "step": 3488 }, { "epoch": 436.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3492 }, { "epoch": 436.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3496 }, { "epoch": 437.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3500 }, { "epoch": 437.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3504 }, { "epoch": 438.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3508 }, { "epoch": 438.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3512 }, { "epoch": 439.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3516 }, { "epoch": 439.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3520 }, { "epoch": 439.99, "eval_exact_match": 0.4428997020854022, "eval_exec": 0.49056603773584906, "eval_loss": 0.5778002142906189, "eval_runtime": 200.4589, "eval_samples_per_second": 6.485, "step": 3520 }, { "epoch": 440.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3524 }, { "epoch": 440.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3528 }, { "epoch": 441.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3532 }, { "epoch": 441.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3536 }, { "epoch": 442.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3540 }, { "epoch": 442.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3544 }, { "epoch": 443.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3548 }, { "epoch": 443.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3552 }, { "epoch": 444.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3556 }, { "epoch": 444.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3560 }, { "epoch": 445.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3564 }, { "epoch": 445.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3568 }, { "epoch": 446.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3572 }, { "epoch": 446.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3576 }, { "epoch": 447.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3580 }, { "epoch": 447.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3584 }, { "epoch": 447.99, "eval_exact_match": 0.4438927507447865, "eval_exec": 0.47765640516385305, "eval_loss": 0.584464967250824, "eval_runtime": 195.0196, "eval_samples_per_second": 6.666, "step": 3584 }, { "epoch": 448.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3588 }, { "epoch": 448.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3592 }, { "epoch": 449.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3596 }, { "epoch": 449.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3600 }, { "epoch": 450.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3604 }, { "epoch": 450.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3608 }, { "epoch": 451.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3612 }, { "epoch": 451.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3616 }, { "epoch": 452.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3620 }, { "epoch": 452.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3624 }, { "epoch": 453.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3628 }, { "epoch": 453.99, "learning_rate": 0.0001, "loss": 0.0016, "step": 3632 }, { "epoch": 454.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3636 }, { "epoch": 454.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3640 }, { "epoch": 455.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3644 }, { "epoch": 455.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3648 }, { "epoch": 455.99, "eval_exact_match": 0.4438927507447865, "eval_exec": 0.4846077457795432, "eval_loss": 0.6006260514259338, "eval_runtime": 207.0142, "eval_samples_per_second": 6.28, "step": 3648 }, { "epoch": 456.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3652 }, { "epoch": 456.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3656 }, { "epoch": 457.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3660 }, { "epoch": 457.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3664 }, { "epoch": 458.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3668 }, { "epoch": 458.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3672 }, { "epoch": 459.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3676 }, { "epoch": 459.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3680 }, { "epoch": 460.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3684 }, { "epoch": 460.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3688 }, { "epoch": 461.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3692 }, { "epoch": 461.99, "learning_rate": 0.0001, "loss": 0.0015, "step": 3696 }, { "epoch": 462.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3700 }, { "epoch": 462.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3704 }, { "epoch": 463.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3708 }, { "epoch": 463.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3712 }, { "epoch": 463.99, "eval_exact_match": 0.4448857994041708, "eval_exec": 0.47765640516385305, "eval_loss": 0.6055679321289062, "eval_runtime": 203.8538, "eval_samples_per_second": 6.377, "step": 3712 }, { "epoch": 464.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3716 }, { "epoch": 464.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3720 }, { "epoch": 465.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3724 }, { "epoch": 465.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3728 }, { "epoch": 466.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3732 }, { "epoch": 466.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3736 }, { "epoch": 467.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3740 }, { "epoch": 467.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3744 }, { "epoch": 468.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3748 }, { "epoch": 468.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3752 }, { "epoch": 469.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3756 }, { "epoch": 469.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3760 }, { "epoch": 470.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3764 }, { "epoch": 470.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3768 }, { "epoch": 471.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3772 }, { "epoch": 471.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3776 }, { "epoch": 471.99, "eval_exact_match": 0.44190665342601787, "eval_exec": 0.47368421052631576, "eval_loss": 0.6094422340393066, "eval_runtime": 200.6337, "eval_samples_per_second": 6.479, "step": 3776 }, { "epoch": 472.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3780 }, { "epoch": 472.99, "learning_rate": 0.0001, "loss": 0.001, "step": 3784 }, { "epoch": 473.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3788 }, { "epoch": 473.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3792 }, { "epoch": 474.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3796 }, { "epoch": 474.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3800 }, { "epoch": 475.5, "learning_rate": 0.0001, "loss": 0.001, "step": 3804 }, { "epoch": 475.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3808 }, { "epoch": 476.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3812 }, { "epoch": 476.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3816 }, { "epoch": 477.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3820 }, { "epoch": 477.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3824 }, { "epoch": 478.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3828 }, { "epoch": 478.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3832 }, { "epoch": 479.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3836 }, { "epoch": 479.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3840 }, { "epoch": 479.99, "eval_exact_match": 0.4458788480635551, "eval_exec": 0.49354518371400197, "eval_loss": 0.6145819425582886, "eval_runtime": 190.4305, "eval_samples_per_second": 6.827, "step": 3840 }, { "epoch": 480.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3844 }, { "epoch": 480.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3848 }, { "epoch": 481.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3852 }, { "epoch": 481.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3856 }, { "epoch": 482.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3860 }, { "epoch": 482.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3864 }, { "epoch": 483.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3868 }, { "epoch": 483.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3872 }, { "epoch": 484.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3876 }, { "epoch": 484.99, "learning_rate": 0.0001, "loss": 0.001, "step": 3880 }, { "epoch": 485.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3884 }, { "epoch": 485.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 3888 }, { "epoch": 486.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3892 }, { "epoch": 486.99, "learning_rate": 0.0001, "loss": 0.001, "step": 3896 }, { "epoch": 487.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3900 }, { "epoch": 487.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3904 }, { "epoch": 487.99, "eval_exact_match": 0.44190665342601787, "eval_exec": 0.4846077457795432, "eval_loss": 0.6196692585945129, "eval_runtime": 197.0844, "eval_samples_per_second": 6.596, "step": 3904 }, { "epoch": 488.5, "learning_rate": 0.0001, "loss": 0.0015, "step": 3908 }, { "epoch": 488.99, "learning_rate": 0.0001, "loss": 0.0014, "step": 3912 }, { "epoch": 489.5, "learning_rate": 0.0001, "loss": 0.0014, "step": 3916 }, { "epoch": 489.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3920 }, { "epoch": 490.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3924 }, { "epoch": 490.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 3928 }, { "epoch": 491.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3932 }, { "epoch": 491.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3936 }, { "epoch": 492.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 3940 }, { "epoch": 492.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3944 }, { "epoch": 493.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3948 }, { "epoch": 493.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 3952 }, { "epoch": 494.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3956 }, { "epoch": 494.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 3960 }, { "epoch": 495.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3964 }, { "epoch": 495.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 3968 }, { "epoch": 495.99, "eval_exact_match": 0.4339622641509434, "eval_exec": 0.4756703078450844, "eval_loss": 0.6161912679672241, "eval_runtime": 201.1763, "eval_samples_per_second": 6.462, "step": 3968 }, { "epoch": 496.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3972 }, { "epoch": 496.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3976 }, { "epoch": 497.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 3980 }, { "epoch": 497.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 3984 }, { "epoch": 498.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 3988 }, { "epoch": 498.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 3992 }, { "epoch": 499.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 3996 }, { "epoch": 499.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4000 }, { "epoch": 500.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4004 }, { "epoch": 500.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4008 }, { "epoch": 501.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4012 }, { "epoch": 501.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4016 }, { "epoch": 502.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 4020 }, { "epoch": 502.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4024 }, { "epoch": 503.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4028 }, { "epoch": 503.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4032 }, { "epoch": 503.99, "eval_exact_match": 0.4399205561072492, "eval_exec": 0.4846077457795432, "eval_loss": 0.6102380156517029, "eval_runtime": 199.2958, "eval_samples_per_second": 6.523, "step": 4032 }, { "epoch": 504.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4036 }, { "epoch": 504.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4040 }, { "epoch": 505.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4044 }, { "epoch": 505.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4048 }, { "epoch": 506.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4052 }, { "epoch": 506.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4056 }, { "epoch": 507.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4060 }, { "epoch": 507.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4064 }, { "epoch": 508.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4068 }, { "epoch": 508.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4072 }, { "epoch": 509.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4076 }, { "epoch": 509.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4080 }, { "epoch": 510.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4084 }, { "epoch": 510.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4088 }, { "epoch": 511.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4092 }, { "epoch": 511.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4096 }, { "epoch": 511.99, "eval_exact_match": 0.4399205561072492, "eval_exec": 0.48659384309831183, "eval_loss": 0.619750440120697, "eval_runtime": 211.0292, "eval_samples_per_second": 6.16, "step": 4096 }, { "epoch": 512.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4100 }, { "epoch": 512.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4104 }, { "epoch": 513.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4108 }, { "epoch": 513.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4112 }, { "epoch": 514.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4116 }, { "epoch": 514.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4120 }, { "epoch": 515.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4124 }, { "epoch": 515.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4128 }, { "epoch": 516.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4132 }, { "epoch": 516.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4136 }, { "epoch": 517.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4140 }, { "epoch": 517.99, "learning_rate": 0.0001, "loss": 0.0013, "step": 4144 }, { "epoch": 518.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4148 }, { "epoch": 518.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4152 }, { "epoch": 519.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4156 }, { "epoch": 519.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4160 }, { "epoch": 519.99, "eval_exact_match": 0.4428997020854022, "eval_exec": 0.4856007944389275, "eval_loss": 0.6127471327781677, "eval_runtime": 203.9094, "eval_samples_per_second": 6.375, "step": 4160 }, { "epoch": 520.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4164 }, { "epoch": 520.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4168 }, { "epoch": 521.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4172 }, { "epoch": 521.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4176 }, { "epoch": 522.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4180 }, { "epoch": 522.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4184 }, { "epoch": 523.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4188 }, { "epoch": 523.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4192 }, { "epoch": 524.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4196 }, { "epoch": 524.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4200 }, { "epoch": 525.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4204 }, { "epoch": 525.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4208 }, { "epoch": 526.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4212 }, { "epoch": 526.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4216 }, { "epoch": 527.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4220 }, { "epoch": 527.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4224 }, { "epoch": 527.99, "eval_exact_match": 0.4458788480635551, "eval_exec": 0.48758689175769615, "eval_loss": 0.6248003244400024, "eval_runtime": 204.1177, "eval_samples_per_second": 6.369, "step": 4224 }, { "epoch": 528.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4228 }, { "epoch": 528.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4232 }, { "epoch": 529.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4236 }, { "epoch": 529.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4240 }, { "epoch": 530.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4244 }, { "epoch": 530.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4248 }, { "epoch": 531.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4252 }, { "epoch": 531.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4256 }, { "epoch": 532.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4260 }, { "epoch": 532.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4264 }, { "epoch": 533.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4268 }, { "epoch": 533.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4272 }, { "epoch": 534.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4276 }, { "epoch": 534.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4280 }, { "epoch": 535.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4284 }, { "epoch": 535.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4288 }, { "epoch": 535.99, "eval_exact_match": 0.4468718967229394, "eval_exec": 0.48063555114200596, "eval_loss": 0.6122114658355713, "eval_runtime": 196.9407, "eval_samples_per_second": 6.601, "step": 4288 }, { "epoch": 536.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4292 }, { "epoch": 536.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4296 }, { "epoch": 537.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4300 }, { "epoch": 537.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4304 }, { "epoch": 538.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 4308 }, { "epoch": 538.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4312 }, { "epoch": 539.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4316 }, { "epoch": 539.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4320 }, { "epoch": 540.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4324 }, { "epoch": 540.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4328 }, { "epoch": 541.5, "learning_rate": 0.0001, "loss": 0.0013, "step": 4332 }, { "epoch": 541.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4336 }, { "epoch": 542.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4340 }, { "epoch": 542.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4344 }, { "epoch": 543.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4348 }, { "epoch": 543.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4352 }, { "epoch": 543.99, "eval_exact_match": 0.43495531281032773, "eval_exec": 0.4816285998013903, "eval_loss": 0.605417013168335, "eval_runtime": 200.1247, "eval_samples_per_second": 6.496, "step": 4352 }, { "epoch": 544.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4356 }, { "epoch": 544.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4360 }, { "epoch": 545.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4364 }, { "epoch": 545.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4368 }, { "epoch": 546.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4372 }, { "epoch": 546.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4376 }, { "epoch": 547.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4380 }, { "epoch": 547.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4384 }, { "epoch": 548.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4388 }, { "epoch": 548.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4392 }, { "epoch": 549.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4396 }, { "epoch": 549.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4400 }, { "epoch": 550.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4404 }, { "epoch": 550.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4408 }, { "epoch": 551.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4412 }, { "epoch": 551.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4416 }, { "epoch": 551.99, "eval_exact_match": 0.4369414101290963, "eval_exec": 0.4746772591857001, "eval_loss": 0.6194772124290466, "eval_runtime": 195.0605, "eval_samples_per_second": 6.665, "step": 4416 }, { "epoch": 552.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4420 }, { "epoch": 552.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4424 }, { "epoch": 553.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4428 }, { "epoch": 553.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4432 }, { "epoch": 554.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4436 }, { "epoch": 554.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4440 }, { "epoch": 555.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4444 }, { "epoch": 555.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4448 }, { "epoch": 556.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4452 }, { "epoch": 556.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4456 }, { "epoch": 557.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4460 }, { "epoch": 557.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4464 }, { "epoch": 558.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4468 }, { "epoch": 558.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4472 }, { "epoch": 559.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4476 }, { "epoch": 559.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4480 }, { "epoch": 559.99, "eval_exact_match": 0.44885799404170806, "eval_exec": 0.4925521350546177, "eval_loss": 0.6179357767105103, "eval_runtime": 194.9028, "eval_samples_per_second": 6.67, "step": 4480 }, { "epoch": 560.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4484 }, { "epoch": 560.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4488 }, { "epoch": 561.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4492 }, { "epoch": 561.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4496 }, { "epoch": 562.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4500 }, { "epoch": 562.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4504 }, { "epoch": 563.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4508 }, { "epoch": 563.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4512 }, { "epoch": 564.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4516 }, { "epoch": 564.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4520 }, { "epoch": 565.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4524 }, { "epoch": 565.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4528 }, { "epoch": 566.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4532 }, { "epoch": 566.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4536 }, { "epoch": 567.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4540 }, { "epoch": 567.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4544 }, { "epoch": 567.99, "eval_exact_match": 0.45084409136047665, "eval_exec": 0.4915590863952334, "eval_loss": 0.6036345958709717, "eval_runtime": 196.9122, "eval_samples_per_second": 6.602, "step": 4544 }, { "epoch": 568.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4548 }, { "epoch": 568.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4552 }, { "epoch": 569.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4556 }, { "epoch": 569.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4560 }, { "epoch": 570.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4564 }, { "epoch": 570.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4568 }, { "epoch": 571.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4572 }, { "epoch": 571.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4576 }, { "epoch": 572.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4580 }, { "epoch": 572.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4584 }, { "epoch": 573.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4588 }, { "epoch": 573.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4592 }, { "epoch": 574.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4596 }, { "epoch": 574.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4600 }, { "epoch": 575.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4604 }, { "epoch": 575.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4608 }, { "epoch": 575.99, "eval_exact_match": 0.45183714001986097, "eval_exec": 0.49354518371400197, "eval_loss": 0.6121585369110107, "eval_runtime": 216.7301, "eval_samples_per_second": 5.998, "step": 4608 }, { "epoch": 576.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4612 }, { "epoch": 576.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4616 }, { "epoch": 577.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4620 }, { "epoch": 577.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4624 }, { "epoch": 578.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4628 }, { "epoch": 578.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4632 }, { "epoch": 579.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4636 }, { "epoch": 579.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4640 }, { "epoch": 580.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4644 }, { "epoch": 580.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4648 }, { "epoch": 581.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4652 }, { "epoch": 581.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4656 }, { "epoch": 582.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4660 }, { "epoch": 582.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4664 }, { "epoch": 583.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4668 }, { "epoch": 583.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4672 }, { "epoch": 583.99, "eval_exact_match": 0.4438927507447865, "eval_exec": 0.48063555114200596, "eval_loss": 0.6172407269477844, "eval_runtime": 196.291, "eval_samples_per_second": 6.623, "step": 4672 }, { "epoch": 584.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4676 }, { "epoch": 584.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4680 }, { "epoch": 585.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4684 }, { "epoch": 585.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4688 }, { "epoch": 586.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4692 }, { "epoch": 586.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4696 }, { "epoch": 587.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4700 }, { "epoch": 587.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 4704 }, { "epoch": 588.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4708 }, { "epoch": 588.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4712 }, { "epoch": 589.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4716 }, { "epoch": 589.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4720 }, { "epoch": 590.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4724 }, { "epoch": 590.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 4728 }, { "epoch": 591.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4732 }, { "epoch": 591.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4736 }, { "epoch": 591.99, "eval_exact_match": 0.43793445878848064, "eval_exec": 0.4766633565044687, "eval_loss": 0.6108298301696777, "eval_runtime": 203.2897, "eval_samples_per_second": 6.395, "step": 4736 }, { "epoch": 592.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4740 }, { "epoch": 592.99, "learning_rate": 0.0001, "loss": 0.0012, "step": 4744 }, { "epoch": 593.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4748 }, { "epoch": 593.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4752 }, { "epoch": 594.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 4756 }, { "epoch": 594.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4760 }, { "epoch": 595.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4764 }, { "epoch": 595.99, "learning_rate": 0.0001, "loss": 0.0022, "step": 4768 }, { "epoch": 596.5, "learning_rate": 0.0001, "loss": 0.0012, "step": 4772 }, { "epoch": 596.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4776 }, { "epoch": 597.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4780 }, { "epoch": 597.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4784 }, { "epoch": 598.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4788 }, { "epoch": 598.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 4792 }, { "epoch": 599.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4796 }, { "epoch": 599.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4800 }, { "epoch": 599.99, "eval_exact_match": 0.4369414101290963, "eval_exec": 0.4746772591857001, "eval_loss": 0.6420004367828369, "eval_runtime": 195.6774, "eval_samples_per_second": 6.644, "step": 4800 }, { "epoch": 600.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4804 }, { "epoch": 600.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4808 }, { "epoch": 601.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4812 }, { "epoch": 601.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4816 }, { "epoch": 602.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4820 }, { "epoch": 602.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4824 }, { "epoch": 603.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4828 }, { "epoch": 603.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4832 }, { "epoch": 604.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4836 }, { "epoch": 604.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4840 }, { "epoch": 605.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4844 }, { "epoch": 605.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 4848 }, { "epoch": 606.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4852 }, { "epoch": 606.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4856 }, { "epoch": 607.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4860 }, { "epoch": 607.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4864 }, { "epoch": 607.99, "eval_exact_match": 0.4438927507447865, "eval_exec": 0.4726911618669315, "eval_loss": 0.6424113512039185, "eval_runtime": 202.6801, "eval_samples_per_second": 6.414, "step": 4864 }, { "epoch": 608.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4868 }, { "epoch": 608.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4872 }, { "epoch": 609.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4876 }, { "epoch": 609.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4880 }, { "epoch": 610.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 4884 }, { "epoch": 610.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4888 }, { "epoch": 611.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4892 }, { "epoch": 611.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4896 }, { "epoch": 612.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4900 }, { "epoch": 612.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4904 }, { "epoch": 613.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4908 }, { "epoch": 613.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4912 }, { "epoch": 614.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4916 }, { "epoch": 614.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4920 }, { "epoch": 615.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4924 }, { "epoch": 615.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4928 }, { "epoch": 615.99, "eval_exact_match": 0.4339622641509434, "eval_exec": 0.46871896722939427, "eval_loss": 0.630379319190979, "eval_runtime": 198.8917, "eval_samples_per_second": 6.536, "step": 4928 }, { "epoch": 616.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4932 }, { "epoch": 616.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4936 }, { "epoch": 617.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4940 }, { "epoch": 617.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4944 }, { "epoch": 618.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4948 }, { "epoch": 618.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 4952 }, { "epoch": 619.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4956 }, { "epoch": 619.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 4960 }, { "epoch": 620.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4964 }, { "epoch": 620.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 4968 }, { "epoch": 621.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 4972 }, { "epoch": 621.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4976 }, { "epoch": 622.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4980 }, { "epoch": 622.99, "learning_rate": 0.0001, "loss": 0.001, "step": 4984 }, { "epoch": 623.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 4988 }, { "epoch": 623.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 4992 }, { "epoch": 623.99, "eval_exact_match": 0.44190665342601787, "eval_exec": 0.48361469712015887, "eval_loss": 0.6309102177619934, "eval_runtime": 191.856, "eval_samples_per_second": 6.776, "step": 4992 }, { "epoch": 624.5, "learning_rate": 0.0001, "loss": 0.001, "step": 4996 }, { "epoch": 624.99, "learning_rate": 0.0001, "loss": 0.001, "step": 5000 }, { "epoch": 625.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 5004 }, { "epoch": 625.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5008 }, { "epoch": 626.5, "learning_rate": 0.0001, "loss": 0.001, "step": 5012 }, { "epoch": 626.99, "learning_rate": 0.0001, "loss": 0.001, "step": 5016 }, { "epoch": 627.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5020 }, { "epoch": 627.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5024 }, { "epoch": 628.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5028 }, { "epoch": 628.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5032 }, { "epoch": 629.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5036 }, { "epoch": 629.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5040 }, { "epoch": 630.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5044 }, { "epoch": 630.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5048 }, { "epoch": 631.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5052 }, { "epoch": 631.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5056 }, { "epoch": 631.99, "eval_exact_match": 0.4369414101290963, "eval_exec": 0.46772591857000995, "eval_loss": 0.6289202570915222, "eval_runtime": 198.0458, "eval_samples_per_second": 6.564, "step": 5056 }, { "epoch": 632.5, "learning_rate": 0.0001, "loss": 0.001, "step": 5060 }, { "epoch": 632.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5064 }, { "epoch": 633.5, "learning_rate": 0.0001, "loss": 0.001, "step": 5068 }, { "epoch": 633.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5072 }, { "epoch": 634.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5076 }, { "epoch": 634.99, "learning_rate": 0.0001, "loss": 0.001, "step": 5080 }, { "epoch": 635.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5084 }, { "epoch": 635.99, "learning_rate": 0.0001, "loss": 0.001, "step": 5088 }, { "epoch": 636.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5092 }, { "epoch": 636.99, "learning_rate": 0.0001, "loss": 0.001, "step": 5096 }, { "epoch": 637.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5100 }, { "epoch": 637.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5104 }, { "epoch": 638.5, "learning_rate": 0.0001, "loss": 0.001, "step": 5108 }, { "epoch": 638.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5112 }, { "epoch": 639.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5116 }, { "epoch": 639.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5120 }, { "epoch": 639.99, "eval_exact_match": 0.44985104270109233, "eval_exec": 0.4885799404170804, "eval_loss": 0.6289829015731812, "eval_runtime": 208.2718, "eval_samples_per_second": 6.242, "step": 5120 }, { "epoch": 640.5, "learning_rate": 0.0001, "loss": 0.0011, "step": 5124 }, { "epoch": 640.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5128 }, { "epoch": 641.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5132 }, { "epoch": 641.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5136 }, { "epoch": 642.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5140 }, { "epoch": 642.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5144 }, { "epoch": 643.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5148 }, { "epoch": 643.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5152 }, { "epoch": 644.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5156 }, { "epoch": 644.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5160 }, { "epoch": 645.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5164 }, { "epoch": 645.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5168 }, { "epoch": 646.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5172 }, { "epoch": 646.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5176 }, { "epoch": 647.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5180 }, { "epoch": 647.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5184 }, { "epoch": 647.99, "eval_exact_match": 0.45084409136047665, "eval_exec": 0.4846077457795432, "eval_loss": 0.6306817531585693, "eval_runtime": 212.6718, "eval_samples_per_second": 6.113, "step": 5184 }, { "epoch": 648.5, "learning_rate": 0.0001, "loss": 0.001, "step": 5188 }, { "epoch": 648.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5192 }, { "epoch": 649.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5196 }, { "epoch": 649.99, "learning_rate": 0.0001, "loss": 0.001, "step": 5200 }, { "epoch": 650.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5204 }, { "epoch": 650.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5208 }, { "epoch": 651.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5212 }, { "epoch": 651.99, "learning_rate": 0.0001, "loss": 0.0011, "step": 5216 }, { "epoch": 652.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5220 }, { "epoch": 652.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5224 }, { "epoch": 653.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5228 }, { "epoch": 653.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5232 }, { "epoch": 654.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5236 }, { "epoch": 654.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5240 }, { "epoch": 655.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5244 }, { "epoch": 655.99, "learning_rate": 0.0001, "loss": 0.001, "step": 5248 }, { "epoch": 655.99, "eval_exact_match": 0.43495531281032773, "eval_exec": 0.4746772591857001, "eval_loss": 0.6379679441452026, "eval_runtime": 203.8275, "eval_samples_per_second": 6.378, "step": 5248 }, { "epoch": 656.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5252 }, { "epoch": 656.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5256 }, { "epoch": 657.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5260 }, { "epoch": 657.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5264 }, { "epoch": 658.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5268 }, { "epoch": 658.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5272 }, { "epoch": 659.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5276 }, { "epoch": 659.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5280 }, { "epoch": 660.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5284 }, { "epoch": 660.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5288 }, { "epoch": 661.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5292 }, { "epoch": 661.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5296 }, { "epoch": 662.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5300 }, { "epoch": 662.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5304 }, { "epoch": 663.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5308 }, { "epoch": 663.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5312 }, { "epoch": 663.99, "eval_exact_match": 0.43793445878848064, "eval_exec": 0.4766633565044687, "eval_loss": 0.6149209141731262, "eval_runtime": 205.2947, "eval_samples_per_second": 6.332, "step": 5312 }, { "epoch": 664.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5316 }, { "epoch": 664.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5320 }, { "epoch": 665.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5324 }, { "epoch": 665.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5328 }, { "epoch": 666.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5332 }, { "epoch": 666.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5336 }, { "epoch": 667.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5340 }, { "epoch": 667.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5344 }, { "epoch": 668.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5348 }, { "epoch": 668.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5352 }, { "epoch": 669.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5356 }, { "epoch": 669.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5360 }, { "epoch": 670.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5364 }, { "epoch": 670.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5368 }, { "epoch": 671.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5372 }, { "epoch": 671.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5376 }, { "epoch": 671.99, "eval_exact_match": 0.4458788480635551, "eval_exec": 0.47765640516385305, "eval_loss": 0.6341748237609863, "eval_runtime": 200.2501, "eval_samples_per_second": 6.492, "step": 5376 }, { "epoch": 672.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5380 }, { "epoch": 672.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5384 }, { "epoch": 673.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5388 }, { "epoch": 673.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5392 }, { "epoch": 674.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5396 }, { "epoch": 674.99, "learning_rate": 0.0001, "loss": 0.001, "step": 5400 }, { "epoch": 675.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5404 }, { "epoch": 675.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5408 }, { "epoch": 676.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5412 }, { "epoch": 676.99, "learning_rate": 0.0001, "loss": 0.001, "step": 5416 }, { "epoch": 677.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5420 }, { "epoch": 677.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5424 }, { "epoch": 678.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5428 }, { "epoch": 678.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5432 }, { "epoch": 679.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5436 }, { "epoch": 679.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5440 }, { "epoch": 679.99, "eval_exact_match": 0.43892750744786496, "eval_exec": 0.4766633565044687, "eval_loss": 0.6424917578697205, "eval_runtime": 214.8147, "eval_samples_per_second": 6.052, "step": 5440 }, { "epoch": 680.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5444 }, { "epoch": 680.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5448 }, { "epoch": 681.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5452 }, { "epoch": 681.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5456 }, { "epoch": 682.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5460 }, { "epoch": 682.99, "learning_rate": 0.0001, "loss": 0.0029, "step": 5464 }, { "epoch": 683.5, "learning_rate": 0.0001, "loss": 0.001, "step": 5468 }, { "epoch": 683.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5472 }, { "epoch": 684.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5476 }, { "epoch": 684.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5480 }, { "epoch": 685.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5484 }, { "epoch": 685.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5488 }, { "epoch": 686.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5492 }, { "epoch": 686.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5496 }, { "epoch": 687.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5500 }, { "epoch": 687.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5504 }, { "epoch": 687.99, "eval_exact_match": 0.44786494538232374, "eval_exec": 0.4826216484607746, "eval_loss": 0.6195693612098694, "eval_runtime": 205.7504, "eval_samples_per_second": 6.318, "step": 5504 }, { "epoch": 688.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5508 }, { "epoch": 688.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5512 }, { "epoch": 689.5, "learning_rate": 0.0001, "loss": 0.001, "step": 5516 }, { "epoch": 689.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5520 }, { "epoch": 690.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5524 }, { "epoch": 690.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5528 }, { "epoch": 691.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5532 }, { "epoch": 691.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5536 }, { "epoch": 692.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5540 }, { "epoch": 692.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5544 }, { "epoch": 693.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5548 }, { "epoch": 693.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5552 }, { "epoch": 694.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5556 }, { "epoch": 694.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5560 }, { "epoch": 695.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5564 }, { "epoch": 695.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5568 }, { "epoch": 695.99, "eval_exact_match": 0.4448857994041708, "eval_exec": 0.48361469712015887, "eval_loss": 0.6261533498764038, "eval_runtime": 213.1489, "eval_samples_per_second": 6.099, "step": 5568 }, { "epoch": 696.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5572 }, { "epoch": 696.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5576 }, { "epoch": 697.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5580 }, { "epoch": 697.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5584 }, { "epoch": 698.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5588 }, { "epoch": 698.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5592 }, { "epoch": 699.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5596 }, { "epoch": 699.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5600 }, { "epoch": 700.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5604 }, { "epoch": 700.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5608 }, { "epoch": 701.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5612 }, { "epoch": 701.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5616 }, { "epoch": 702.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5620 }, { "epoch": 702.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5624 }, { "epoch": 703.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5628 }, { "epoch": 703.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5632 }, { "epoch": 703.99, "eval_exact_match": 0.43892750744786496, "eval_exec": 0.4756703078450844, "eval_loss": 0.6435733437538147, "eval_runtime": 208.5122, "eval_samples_per_second": 6.235, "step": 5632 }, { "epoch": 704.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5636 }, { "epoch": 704.99, "learning_rate": 0.0001, "loss": 0.001, "step": 5640 }, { "epoch": 705.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5644 }, { "epoch": 705.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5648 }, { "epoch": 706.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5652 }, { "epoch": 706.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5656 }, { "epoch": 707.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5660 }, { "epoch": 707.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5664 }, { "epoch": 708.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5668 }, { "epoch": 708.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5672 }, { "epoch": 709.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5676 }, { "epoch": 709.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5680 }, { "epoch": 710.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5684 }, { "epoch": 710.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5688 }, { "epoch": 711.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5692 }, { "epoch": 711.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5696 }, { "epoch": 711.99, "eval_exact_match": 0.44985104270109233, "eval_exec": 0.48659384309831183, "eval_loss": 0.6287506818771362, "eval_runtime": 206.1374, "eval_samples_per_second": 6.306, "step": 5696 }, { "epoch": 712.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5700 }, { "epoch": 712.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5704 }, { "epoch": 713.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5708 }, { "epoch": 713.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5712 }, { "epoch": 714.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5716 }, { "epoch": 714.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5720 }, { "epoch": 715.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5724 }, { "epoch": 715.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5728 }, { "epoch": 716.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5732 }, { "epoch": 716.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5736 }, { "epoch": 717.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5740 }, { "epoch": 717.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5744 }, { "epoch": 718.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5748 }, { "epoch": 718.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5752 }, { "epoch": 719.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5756 }, { "epoch": 719.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5760 }, { "epoch": 719.99, "eval_exact_match": 0.4448857994041708, "eval_exec": 0.4856007944389275, "eval_loss": 0.641159176826477, "eval_runtime": 206.6444, "eval_samples_per_second": 6.291, "step": 5760 }, { "epoch": 720.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5764 }, { "epoch": 720.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5768 }, { "epoch": 721.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5772 }, { "epoch": 721.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5776 }, { "epoch": 722.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5780 }, { "epoch": 722.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5784 }, { "epoch": 723.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5788 }, { "epoch": 723.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 5792 }, { "epoch": 724.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5796 }, { "epoch": 724.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5800 }, { "epoch": 725.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5804 }, { "epoch": 725.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5808 }, { "epoch": 726.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5812 }, { "epoch": 726.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5816 }, { "epoch": 727.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5820 }, { "epoch": 727.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5824 }, { "epoch": 727.99, "eval_exact_match": 0.4458788480635551, "eval_exec": 0.48659384309831183, "eval_loss": 0.647663414478302, "eval_runtime": 211.128, "eval_samples_per_second": 6.157, "step": 5824 }, { "epoch": 728.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5828 }, { "epoch": 728.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5832 }, { "epoch": 729.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5836 }, { "epoch": 729.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5840 }, { "epoch": 730.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5844 }, { "epoch": 730.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5848 }, { "epoch": 731.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 5852 }, { "epoch": 731.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5856 }, { "epoch": 732.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5860 }, { "epoch": 732.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5864 }, { "epoch": 733.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 5868 }, { "epoch": 733.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5872 }, { "epoch": 734.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5876 }, { "epoch": 734.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5880 }, { "epoch": 735.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5884 }, { "epoch": 735.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5888 }, { "epoch": 735.99, "eval_exact_match": 0.4448857994041708, "eval_exec": 0.4846077457795432, "eval_loss": 0.6436724662780762, "eval_runtime": 200.2646, "eval_samples_per_second": 6.491, "step": 5888 }, { "epoch": 736.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5892 }, { "epoch": 736.99, "learning_rate": 0.0001, "loss": 0.0009, "step": 5896 }, { "epoch": 737.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5900 }, { "epoch": 737.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5904 }, { "epoch": 738.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5908 }, { "epoch": 738.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5912 }, { "epoch": 739.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5916 }, { "epoch": 739.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5920 }, { "epoch": 740.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5924 }, { "epoch": 740.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5928 }, { "epoch": 741.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5932 }, { "epoch": 741.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5936 }, { "epoch": 742.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 5940 }, { "epoch": 742.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 5944 }, { "epoch": 743.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 5948 }, { "epoch": 743.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5952 }, { "epoch": 743.99, "eval_exact_match": 0.44786494538232374, "eval_exec": 0.4826216484607746, "eval_loss": 0.648475706577301, "eval_runtime": 203.6491, "eval_samples_per_second": 6.384, "step": 5952 }, { "epoch": 744.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5956 }, { "epoch": 744.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5960 }, { "epoch": 745.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5964 }, { "epoch": 745.99, "learning_rate": 0.0001, "loss": 0.001, "step": 5968 }, { "epoch": 746.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 5972 }, { "epoch": 746.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 5976 }, { "epoch": 747.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5980 }, { "epoch": 747.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5984 }, { "epoch": 748.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 5988 }, { "epoch": 748.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 5992 }, { "epoch": 749.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 5996 }, { "epoch": 749.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6000 }, { "epoch": 750.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6004 }, { "epoch": 750.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6008 }, { "epoch": 751.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6012 }, { "epoch": 751.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6016 }, { "epoch": 751.99, "eval_exact_match": 0.4468718967229394, "eval_exec": 0.48063555114200596, "eval_loss": 0.6662933826446533, "eval_runtime": 196.1389, "eval_samples_per_second": 6.628, "step": 6016 }, { "epoch": 752.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6020 }, { "epoch": 752.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6024 }, { "epoch": 753.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6028 }, { "epoch": 753.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6032 }, { "epoch": 754.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6036 }, { "epoch": 754.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6040 }, { "epoch": 755.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6044 }, { "epoch": 755.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6048 }, { "epoch": 756.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6052 }, { "epoch": 756.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6056 }, { "epoch": 757.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6060 }, { "epoch": 757.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6064 }, { "epoch": 758.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6068 }, { "epoch": 758.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6072 }, { "epoch": 759.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6076 }, { "epoch": 759.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6080 }, { "epoch": 759.99, "eval_exact_match": 0.43793445878848064, "eval_exec": 0.46971201588877853, "eval_loss": 0.6573625206947327, "eval_runtime": 203.6771, "eval_samples_per_second": 6.383, "step": 6080 }, { "epoch": 760.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6084 }, { "epoch": 760.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6088 }, { "epoch": 761.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6092 }, { "epoch": 761.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6096 }, { "epoch": 762.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6100 }, { "epoch": 762.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6104 }, { "epoch": 763.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6108 }, { "epoch": 763.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6112 }, { "epoch": 764.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6116 }, { "epoch": 764.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6120 }, { "epoch": 765.5, "learning_rate": 0.0001, "loss": 0.0009, "step": 6124 }, { "epoch": 765.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6128 }, { "epoch": 766.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6132 }, { "epoch": 766.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6136 }, { "epoch": 767.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6140 }, { "epoch": 767.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6144 }, { "epoch": 767.99, "eval_exact_match": 0.45183714001986097, "eval_exec": 0.48957298907646474, "eval_loss": 0.6423913240432739, "eval_runtime": 196.8271, "eval_samples_per_second": 6.605, "step": 6144 }, { "epoch": 768.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6148 }, { "epoch": 768.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6152 }, { "epoch": 769.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6156 }, { "epoch": 769.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6160 }, { "epoch": 770.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6164 }, { "epoch": 770.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6168 }, { "epoch": 771.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6172 }, { "epoch": 771.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6176 }, { "epoch": 772.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6180 }, { "epoch": 772.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6184 }, { "epoch": 773.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6188 }, { "epoch": 773.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6192 }, { "epoch": 774.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6196 }, { "epoch": 774.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6200 }, { "epoch": 775.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6204 }, { "epoch": 775.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6208 }, { "epoch": 775.99, "eval_exact_match": 0.4558093346573982, "eval_exec": 0.49652432969215493, "eval_loss": 0.6396003365516663, "eval_runtime": 214.4091, "eval_samples_per_second": 6.063, "step": 6208 }, { "epoch": 776.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6212 }, { "epoch": 776.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6216 }, { "epoch": 777.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6220 }, { "epoch": 777.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6224 }, { "epoch": 778.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6228 }, { "epoch": 778.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6232 }, { "epoch": 779.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6236 }, { "epoch": 779.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6240 }, { "epoch": 780.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6244 }, { "epoch": 780.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6248 }, { "epoch": 781.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6252 }, { "epoch": 781.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6256 }, { "epoch": 782.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6260 }, { "epoch": 782.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6264 }, { "epoch": 783.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6268 }, { "epoch": 783.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6272 }, { "epoch": 783.99, "eval_exact_match": 0.43892750744786496, "eval_exec": 0.4856007944389275, "eval_loss": 0.6399450898170471, "eval_runtime": 226.2412, "eval_samples_per_second": 5.746, "step": 6272 }, { "epoch": 784.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6276 }, { "epoch": 784.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6280 }, { "epoch": 785.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6284 }, { "epoch": 785.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6288 }, { "epoch": 786.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6292 }, { "epoch": 786.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6296 }, { "epoch": 787.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6300 }, { "epoch": 787.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6304 }, { "epoch": 788.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6308 }, { "epoch": 788.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6312 }, { "epoch": 789.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6316 }, { "epoch": 789.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6320 }, { "epoch": 790.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6324 }, { "epoch": 790.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6328 }, { "epoch": 791.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6332 }, { "epoch": 791.99, "learning_rate": 0.0001, "loss": 0.001, "step": 6336 }, { "epoch": 791.99, "eval_exact_match": 0.44091360476663355, "eval_exec": 0.47964250248262164, "eval_loss": 0.6275960803031921, "eval_runtime": 209.1576, "eval_samples_per_second": 6.215, "step": 6336 }, { "epoch": 792.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6340 }, { "epoch": 792.99, "learning_rate": 0.0001, "loss": 0.0005, "step": 6344 }, { "epoch": 793.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6348 }, { "epoch": 793.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6352 }, { "epoch": 794.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6356 }, { "epoch": 794.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6360 }, { "epoch": 795.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6364 }, { "epoch": 795.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6368 }, { "epoch": 796.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6372 }, { "epoch": 796.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6376 }, { "epoch": 797.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6380 }, { "epoch": 797.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6384 }, { "epoch": 798.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6388 }, { "epoch": 798.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6392 }, { "epoch": 799.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6396 }, { "epoch": 799.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6400 }, { "epoch": 799.99, "eval_exact_match": 0.4468718967229394, "eval_exec": 0.48659384309831183, "eval_loss": 0.641415536403656, "eval_runtime": 208.4431, "eval_samples_per_second": 6.237, "step": 6400 }, { "epoch": 800.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6404 }, { "epoch": 800.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6408 }, { "epoch": 801.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6412 }, { "epoch": 801.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6416 }, { "epoch": 802.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6420 }, { "epoch": 802.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6424 }, { "epoch": 803.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6428 }, { "epoch": 803.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6432 }, { "epoch": 804.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6436 }, { "epoch": 804.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6440 }, { "epoch": 805.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6444 }, { "epoch": 805.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6448 }, { "epoch": 806.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6452 }, { "epoch": 806.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6456 }, { "epoch": 807.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6460 }, { "epoch": 807.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6464 }, { "epoch": 807.99, "eval_exact_match": 0.44786494538232374, "eval_exec": 0.4846077457795432, "eval_loss": 0.6324633359909058, "eval_runtime": 216.057, "eval_samples_per_second": 6.017, "step": 6464 }, { "epoch": 808.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6468 }, { "epoch": 808.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6472 }, { "epoch": 809.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6476 }, { "epoch": 809.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6480 }, { "epoch": 810.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6484 }, { "epoch": 810.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6488 }, { "epoch": 811.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6492 }, { "epoch": 811.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6496 }, { "epoch": 812.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6500 }, { "epoch": 812.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6504 }, { "epoch": 813.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6508 }, { "epoch": 813.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6512 }, { "epoch": 814.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6516 }, { "epoch": 814.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6520 }, { "epoch": 815.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6524 }, { "epoch": 815.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6528 }, { "epoch": 815.99, "eval_exact_match": 0.44190665342601787, "eval_exec": 0.4885799404170804, "eval_loss": 0.6281804442405701, "eval_runtime": 213.8722, "eval_samples_per_second": 6.078, "step": 6528 }, { "epoch": 816.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6532 }, { "epoch": 816.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6536 }, { "epoch": 817.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6540 }, { "epoch": 817.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6544 }, { "epoch": 818.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6548 }, { "epoch": 818.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6552 }, { "epoch": 819.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6556 }, { "epoch": 819.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6560 }, { "epoch": 820.5, "learning_rate": 0.0001, "loss": 0.0005, "step": 6564 }, { "epoch": 820.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6568 }, { "epoch": 821.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6572 }, { "epoch": 821.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6576 }, { "epoch": 822.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6580 }, { "epoch": 822.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6584 }, { "epoch": 823.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6588 }, { "epoch": 823.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6592 }, { "epoch": 823.99, "eval_exact_match": 0.45779543197616684, "eval_exec": 0.4915590863952334, "eval_loss": 0.6452751159667969, "eval_runtime": 223.4105, "eval_samples_per_second": 5.819, "step": 6592 }, { "epoch": 824.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6596 }, { "epoch": 824.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6600 }, { "epoch": 825.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6604 }, { "epoch": 825.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6608 }, { "epoch": 826.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6612 }, { "epoch": 826.99, "learning_rate": 0.0001, "loss": 0.0005, "step": 6616 }, { "epoch": 827.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6620 }, { "epoch": 827.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6624 }, { "epoch": 828.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6628 }, { "epoch": 828.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6632 }, { "epoch": 829.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6636 }, { "epoch": 829.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6640 }, { "epoch": 830.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6644 }, { "epoch": 830.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6648 }, { "epoch": 831.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6652 }, { "epoch": 831.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6656 }, { "epoch": 831.99, "eval_exact_match": 0.4637537239324727, "eval_exec": 0.49751737835153925, "eval_loss": 0.6585542559623718, "eval_runtime": 209.9114, "eval_samples_per_second": 6.193, "step": 6656 }, { "epoch": 832.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6660 }, { "epoch": 832.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6664 }, { "epoch": 833.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6668 }, { "epoch": 833.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6672 }, { "epoch": 834.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6676 }, { "epoch": 834.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6680 }, { "epoch": 835.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6684 }, { "epoch": 835.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6688 }, { "epoch": 836.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6692 }, { "epoch": 836.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6696 }, { "epoch": 837.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6700 }, { "epoch": 837.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6704 }, { "epoch": 838.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6708 }, { "epoch": 838.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6712 }, { "epoch": 839.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6716 }, { "epoch": 839.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6720 }, { "epoch": 839.99, "eval_exact_match": 0.45978152929493543, "eval_exec": 0.4945382323733863, "eval_loss": 0.660439670085907, "eval_runtime": 216.7543, "eval_samples_per_second": 5.998, "step": 6720 }, { "epoch": 840.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6724 }, { "epoch": 840.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6728 }, { "epoch": 841.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6732 }, { "epoch": 841.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6736 }, { "epoch": 842.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6740 }, { "epoch": 842.99, "learning_rate": 0.0001, "loss": 0.0005, "step": 6744 }, { "epoch": 843.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6748 }, { "epoch": 843.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6752 }, { "epoch": 844.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6756 }, { "epoch": 844.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6760 }, { "epoch": 845.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6764 }, { "epoch": 845.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6768 }, { "epoch": 846.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6772 }, { "epoch": 846.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6776 }, { "epoch": 847.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6780 }, { "epoch": 847.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6784 }, { "epoch": 847.99, "eval_exact_match": 0.4468718967229394, "eval_exec": 0.4885799404170804, "eval_loss": 0.6388683319091797, "eval_runtime": 225.744, "eval_samples_per_second": 5.759, "step": 6784 }, { "epoch": 848.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6788 }, { "epoch": 848.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6792 }, { "epoch": 849.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6796 }, { "epoch": 849.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6800 }, { "epoch": 850.5, "learning_rate": 0.0001, "loss": 0.0008, "step": 6804 }, { "epoch": 850.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6808 }, { "epoch": 851.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6812 }, { "epoch": 851.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6816 }, { "epoch": 852.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6820 }, { "epoch": 852.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6824 }, { "epoch": 853.5, "learning_rate": 0.0001, "loss": 0.0007, "step": 6828 }, { "epoch": 853.99, "learning_rate": 0.0001, "loss": 0.0008, "step": 6832 }, { "epoch": 854.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6836 }, { "epoch": 854.99, "learning_rate": 0.0001, "loss": 0.0007, "step": 6840 }, { "epoch": 855.5, "learning_rate": 0.0001, "loss": 0.0006, "step": 6844 }, { "epoch": 855.99, "learning_rate": 0.0001, "loss": 0.0006, "step": 6848 }, { "epoch": 855.99, "eval_exact_match": 0.4657398212512413, "eval_exec": 0.4925521350546177, "eval_loss": 0.6665723323822021, "eval_runtime": 199.9726, "eval_samples_per_second": 6.501, "step": 6848 } ], "max_steps": 24576, "num_train_epochs": 3072, "total_flos": 7.45169724254251e+18, "trial_name": null, "trial_params": null }