| { | |
| "best_metric": 0.4657398212512413, | |
| "best_model_checkpoint": "/mnt/chenzhi/dialogzoo/finetune/txt2sql_picard_cosql/checkpoint-6848", | |
| "epoch": 855.9933373712902, | |
| "global_step": 6848, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001, | |
| "loss": 8.9198, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001, | |
| "loss": 4.4541, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0276, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.0001, | |
| "loss": 1.1989, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.8306, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6585, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5417, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4681, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4011, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3661, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3404, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3268, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2935, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2853, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2694, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2601, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2487, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "eval_exact_match": 0.24726911618669314, | |
| "eval_exec": 0.30883813306852037, | |
| "eval_loss": 0.32532989978790283, | |
| "eval_runtime": 219.0487, | |
| "eval_samples_per_second": 5.935, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2352, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2257, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.222, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2146, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 10.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2115, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2032, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1954, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.186, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1808, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.18, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 13.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.177, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1714, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 14.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1714, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1615, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 15.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1599, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1565, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "eval_exact_match": 0.33068520357497516, | |
| "eval_exec": 0.3843098311817279, | |
| "eval_loss": 0.29061898589134216, | |
| "eval_runtime": 200.5643, | |
| "eval_samples_per_second": 6.482, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 16.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1513, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.143, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1426, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 17.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1403, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 18.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1441, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1378, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 19.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1344, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 19.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1293, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 20.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1314, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1219, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 21.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1196, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 21.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1192, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1203, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1189, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 23.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1154, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 23.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1142, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 23.99, | |
| "eval_exact_match": 0.3426017874875869, | |
| "eval_exec": 0.4011916583912612, | |
| "eval_loss": 0.28066790103912354, | |
| "eval_runtime": 235.2994, | |
| "eval_samples_per_second": 5.525, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 24.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1104, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1092, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 25.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1079, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 25.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1043, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 26.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1068, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 26.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1009, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 27.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1033, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 27.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1013, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 28.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0986, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0951, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 29.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0947, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 29.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0917, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 30.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0959, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 30.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0922, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 31.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0892, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 31.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0885, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 31.99, | |
| "eval_exact_match": 0.36742800397219466, | |
| "eval_exec": 0.41509433962264153, | |
| "eval_loss": 0.28681814670562744, | |
| "eval_runtime": 236.9193, | |
| "eval_samples_per_second": 5.487, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 32.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.087, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 32.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0853, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 33.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0847, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 33.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0821, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 34.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0802, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 34.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.084, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 35.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0844, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 35.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0803, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 36.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0786, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 36.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0735, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 37.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0784, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 37.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0733, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 38.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.075, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 38.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0747, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 39.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0733, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 39.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0712, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 39.99, | |
| "eval_exact_match": 0.407149950347567, | |
| "eval_exec": 0.44885799404170806, | |
| "eval_loss": 0.29831913113594055, | |
| "eval_runtime": 183.5809, | |
| "eval_samples_per_second": 7.081, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 40.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0706, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 40.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.071, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 41.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0675, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 41.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0663, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 42.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0652, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 42.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.068, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 43.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.066, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 43.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0658, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 44.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0628, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 44.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.063, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 45.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0607, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 45.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0605, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 46.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.06, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 46.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0618, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 47.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0605, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 47.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0586, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 47.99, | |
| "eval_exact_match": 0.423038728897716, | |
| "eval_exec": 0.4637537239324727, | |
| "eval_loss": 0.31259259581565857, | |
| "eval_runtime": 192.9699, | |
| "eval_samples_per_second": 6.737, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 48.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.058, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 48.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0573, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 49.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0594, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 49.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0552, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 50.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.056, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 50.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0537, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 51.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.054, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 51.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0555, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 52.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0515, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 52.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0522, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 53.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0507, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 53.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.051, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 54.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0492, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 54.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0503, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 55.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0484, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 55.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0486, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 55.99, | |
| "eval_exact_match": 0.4270109235352532, | |
| "eval_exec": 0.4657398212512413, | |
| "eval_loss": 0.32657375931739807, | |
| "eval_runtime": 181.0806, | |
| "eval_samples_per_second": 7.179, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 56.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0491, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 56.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0505, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 57.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0491, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 57.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0487, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 58.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0456, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 58.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0454, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 59.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0449, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 59.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0429, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 60.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0441, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 60.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0451, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 61.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0447, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 61.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0423, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 62.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0429, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 62.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0412, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 63.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0423, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 63.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0412, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 63.99, | |
| "eval_exact_match": 0.40913604766633566, | |
| "eval_exec": 0.45878848063555117, | |
| "eval_loss": 0.3272022306919098, | |
| "eval_runtime": 205.684, | |
| "eval_samples_per_second": 6.32, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 64.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0397, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 64.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0394, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 65.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0411, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 65.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0418, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 66.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.038, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 66.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0388, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 67.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0383, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 67.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0384, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 68.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.039, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 68.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0366, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 69.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0364, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 69.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0363, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 70.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.036, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 70.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0358, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 71.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0339, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 71.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0374, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 71.99, | |
| "eval_exact_match": 0.435948361469712, | |
| "eval_exec": 0.4766633565044687, | |
| "eval_loss": 0.3480900824069977, | |
| "eval_runtime": 174.2765, | |
| "eval_samples_per_second": 7.459, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 72.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0376, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 72.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0341, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 73.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0329, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 73.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0329, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 74.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0334, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 74.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0334, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 75.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0327, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 75.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0328, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 76.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0321, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 76.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0327, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 77.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0321, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 77.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.03, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 78.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0313, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 78.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0335, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 79.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0297, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 79.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0293, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 79.99, | |
| "eval_exact_match": 0.42899702085402186, | |
| "eval_exec": 0.46871896722939427, | |
| "eval_loss": 0.3477668762207031, | |
| "eval_runtime": 228.958, | |
| "eval_samples_per_second": 5.678, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 80.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0284, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 80.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.028, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 81.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0311, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 81.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0286, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 82.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0299, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 82.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0294, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 83.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0265, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 83.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0269, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 84.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0267, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 84.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0269, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 85.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.027, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 85.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0269, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 86.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.026, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 86.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0259, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 87.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0251, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 87.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0253, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 87.99, | |
| "eval_exact_match": 0.423038728897716, | |
| "eval_exec": 0.46971201588877853, | |
| "eval_loss": 0.36298030614852905, | |
| "eval_runtime": 237.4093, | |
| "eval_samples_per_second": 5.476, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 88.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0252, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 88.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0254, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 89.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0262, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 89.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0261, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 90.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0236, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 90.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.024, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 91.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0231, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 91.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0241, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 92.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0244, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 92.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0242, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 93.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.023, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 93.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.025, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 94.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0236, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 94.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.022, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 95.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0207, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 95.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0223, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 95.99, | |
| "eval_exact_match": 0.4329692154915591, | |
| "eval_exec": 0.46871896722939427, | |
| "eval_loss": 0.3830316960811615, | |
| "eval_runtime": 229.6043, | |
| "eval_samples_per_second": 5.662, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 96.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0226, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 96.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0215, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 97.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0213, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 97.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0209, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 98.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0199, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 98.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0208, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 99.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0205, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 99.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0209, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 100.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0217, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 100.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.02, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 101.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0192, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 101.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0195, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 102.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0194, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 102.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0193, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 103.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0212, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 103.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0195, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 103.99, | |
| "eval_exact_match": 0.41807348560079444, | |
| "eval_exec": 0.4667328699106256, | |
| "eval_loss": 0.388680100440979, | |
| "eval_runtime": 220.6138, | |
| "eval_samples_per_second": 5.893, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 104.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.018, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 104.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0185, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 105.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0195, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 105.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0196, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 106.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0189, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 106.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0182, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 107.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0168, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 107.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.018, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 108.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0181, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 108.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0179, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 109.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.017, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 109.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0187, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 110.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0178, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 110.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0161, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 111.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0164, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 111.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0164, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 111.99, | |
| "eval_exact_match": 0.41012909632572, | |
| "eval_exec": 0.464746772591857, | |
| "eval_loss": 0.3992396891117096, | |
| "eval_runtime": 240.1288, | |
| "eval_samples_per_second": 5.414, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 112.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0172, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 112.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0173, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 113.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0163, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 113.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0153, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 114.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0157, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 114.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0159, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 115.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.016, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 115.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0152, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 116.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0159, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 116.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0161, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 117.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0152, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 117.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0149, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 118.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0145, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 118.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0151, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 119.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0165, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 119.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0179, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 119.99, | |
| "eval_exact_match": 0.4329692154915591, | |
| "eval_exec": 0.4746772591857001, | |
| "eval_loss": 0.42190492153167725, | |
| "eval_runtime": 202.7706, | |
| "eval_samples_per_second": 6.411, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 120.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.017, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 120.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.014, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 121.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0144, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 121.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0141, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 122.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0137, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 122.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0143, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 123.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.015, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 123.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0157, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 124.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0137, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 124.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0131, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 125.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0135, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 125.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0133, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 126.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0128, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 126.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0134, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 127.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0125, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 127.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.012, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 127.99, | |
| "eval_exact_match": 0.43892750744786496, | |
| "eval_exec": 0.4856007944389275, | |
| "eval_loss": 0.4193364083766937, | |
| "eval_runtime": 203.6399, | |
| "eval_samples_per_second": 6.384, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 128.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.012, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 128.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0129, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 129.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0136, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 129.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0123, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 130.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0122, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 130.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0126, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 131.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0111, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 131.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0129, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 132.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0135, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 132.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.012, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 133.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0119, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 133.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0115, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 134.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0113, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 134.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0131, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 135.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0126, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 135.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0114, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 135.99, | |
| "eval_exact_match": 0.4240317775571003, | |
| "eval_exec": 0.4726911618669315, | |
| "eval_loss": 0.4311941862106323, | |
| "eval_runtime": 209.696, | |
| "eval_samples_per_second": 6.199, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 136.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0111, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 136.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0109, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 137.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0105, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 137.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0108, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 138.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0106, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 138.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.01, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 139.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0115, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 139.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0111, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 140.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0105, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 140.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0101, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 141.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0099, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 141.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0099, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 142.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0104, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 142.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0111, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 143.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.011, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 143.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0095, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 143.99, | |
| "eval_exact_match": 0.42105263157894735, | |
| "eval_exec": 0.47070506454816285, | |
| "eval_loss": 0.4453062117099762, | |
| "eval_runtime": 205.5286, | |
| "eval_samples_per_second": 6.325, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 144.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0098, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 144.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0098, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 145.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0096, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 145.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0101, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 146.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.01, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 146.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0095, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 147.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0098, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 147.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0103, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 148.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0098, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 148.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0098, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 149.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0093, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 149.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0092, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 150.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0087, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 150.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0085, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 151.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0089, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 151.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0085, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 151.99, | |
| "eval_exact_match": 0.43892750744786496, | |
| "eval_exec": 0.47765640516385305, | |
| "eval_loss": 0.45582684874534607, | |
| "eval_runtime": 213.7344, | |
| "eval_samples_per_second": 6.082, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 152.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0092, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 152.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.009, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 153.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0089, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 153.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0095, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 154.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0089, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 154.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.009, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 155.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0084, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 155.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0088, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 156.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0084, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 156.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0086, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 157.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0087, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 157.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0084, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 158.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0081, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 158.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.008, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 159.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0082, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 159.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.008, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 159.99, | |
| "eval_exact_match": 0.4200595829195631, | |
| "eval_exec": 0.47070506454816285, | |
| "eval_loss": 0.45270583033561707, | |
| "eval_runtime": 204.4816, | |
| "eval_samples_per_second": 6.358, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 160.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0077, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 160.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0077, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 161.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0086, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 161.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0078, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 162.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0082, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 162.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0079, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 163.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0077, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 163.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0076, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 164.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0078, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 164.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0079, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 165.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0081, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 165.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.008, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 166.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0086, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 166.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0085, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 167.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0073, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 167.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0069, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 167.99, | |
| "eval_exact_match": 0.42502482621648463, | |
| "eval_exec": 0.4766633565044687, | |
| "eval_loss": 0.46810275316238403, | |
| "eval_runtime": 209.333, | |
| "eval_samples_per_second": 6.21, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 168.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0073, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 168.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.007, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 169.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0074, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 169.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0073, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 170.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.007, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 170.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0069, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 171.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0065, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 171.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.007, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 172.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0068, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 172.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.007, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 173.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0066, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 173.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.007, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 174.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0068, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 174.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0066, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 175.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0067, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 175.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.007, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 175.99, | |
| "eval_exact_match": 0.423038728897716, | |
| "eval_exec": 0.47070506454816285, | |
| "eval_loss": 0.48861581087112427, | |
| "eval_runtime": 209.5612, | |
| "eval_samples_per_second": 6.203, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 176.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0068, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 176.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0066, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 177.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0066, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 177.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0065, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 178.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0066, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 178.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0069, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 179.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0064, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 179.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0062, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 180.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0063, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 180.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0063, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 181.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0063, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 181.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0058, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 182.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0066, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 182.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0074, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 183.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0083, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 183.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0075, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 183.99, | |
| "eval_exact_match": 0.4399205561072492, | |
| "eval_exec": 0.4856007944389275, | |
| "eval_loss": 0.46796470880508423, | |
| "eval_runtime": 198.2198, | |
| "eval_samples_per_second": 6.558, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 184.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0065, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 184.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0059, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 185.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.006, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 185.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0061, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 186.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.006, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 186.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0061, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 187.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0064, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 187.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0062, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 188.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.006, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 188.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0059, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 189.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0062, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 189.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.007, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 190.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0077, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 190.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0057, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 191.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0055, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 191.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0059, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 191.99, | |
| "eval_exact_match": 0.43197616683217477, | |
| "eval_exec": 0.4746772591857001, | |
| "eval_loss": 0.4912528097629547, | |
| "eval_runtime": 222.179, | |
| "eval_samples_per_second": 5.851, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 192.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.006, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 192.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0055, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 193.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0052, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 193.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0055, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 194.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0056, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 194.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0055, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 195.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0052, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 195.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0054, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 196.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0054, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 196.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0052, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 197.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.005, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 197.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0053, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 198.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.005, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 198.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0054, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 199.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0051, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 199.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.005, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 199.99, | |
| "eval_exact_match": 0.4329692154915591, | |
| "eval_exec": 0.48659384309831183, | |
| "eval_loss": 0.4948062002658844, | |
| "eval_runtime": 223.792, | |
| "eval_samples_per_second": 5.809, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 200.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0046, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 200.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0046, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 201.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.005, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 201.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.005, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 202.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0049, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 202.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0048, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 203.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0051, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 203.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0046, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 204.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0051, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 204.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.005, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 205.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.005, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 205.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0049, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 206.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0045, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 206.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0048, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 207.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0048, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 207.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0047, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 207.99, | |
| "eval_exact_match": 0.42502482621648463, | |
| "eval_exec": 0.48063555114200596, | |
| "eval_loss": 0.4956875443458557, | |
| "eval_runtime": 203.094, | |
| "eval_samples_per_second": 6.401, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 208.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0046, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 208.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0044, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 209.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0047, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 209.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0042, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 210.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0043, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 210.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0047, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 211.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0051, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 211.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0049, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 212.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0049, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 212.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0044, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 213.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0044, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 213.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0045, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 214.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0042, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 214.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0043, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 215.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0047, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 215.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0045, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 215.99, | |
| "eval_exact_match": 0.4339622641509434, | |
| "eval_exec": 0.4726911618669315, | |
| "eval_loss": 0.4982646703720093, | |
| "eval_runtime": 200.9709, | |
| "eval_samples_per_second": 6.469, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 216.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0045, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 216.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0046, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 217.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0042, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 217.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0046, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 218.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0038, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 218.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0045, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 219.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0042, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 219.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0041, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 220.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0046, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 220.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.004, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 221.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0042, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 221.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0039, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 222.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0043, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 222.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0041, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 223.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0038, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 223.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0042, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 223.99, | |
| "eval_exact_match": 0.43793445878848064, | |
| "eval_exec": 0.4756703078450844, | |
| "eval_loss": 0.5064935088157654, | |
| "eval_runtime": 200.6112, | |
| "eval_samples_per_second": 6.48, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 224.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.005, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 224.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0046, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 225.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.004, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 225.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0037, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 226.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0039, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 226.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0037, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 227.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0037, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 227.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0039, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 228.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0037, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 228.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0038, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 229.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0036, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 229.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0036, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 230.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0037, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 230.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0036, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 231.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0038, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 231.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0036, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 231.99, | |
| "eval_exact_match": 0.43793445878848064, | |
| "eval_exec": 0.4856007944389275, | |
| "eval_loss": 0.5247978568077087, | |
| "eval_runtime": 207.1906, | |
| "eval_samples_per_second": 6.274, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 232.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0034, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 232.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0039, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 233.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0039, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 233.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0034, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 234.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0042, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 234.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.004, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 235.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0047, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 235.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0065, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 236.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0036, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 236.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0036, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 237.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0036, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 237.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0041, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 238.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0036, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 238.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0035, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 239.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0037, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 239.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0035, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 239.99, | |
| "eval_exact_match": 0.43892750744786496, | |
| "eval_exec": 0.48659384309831183, | |
| "eval_loss": 0.5224528908729553, | |
| "eval_runtime": 194.9847, | |
| "eval_samples_per_second": 6.667, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 240.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0074, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 240.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0054, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 241.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0037, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 241.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0037, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 242.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0035, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 242.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0037, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 243.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0035, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 243.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0031, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 244.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0034, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 244.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0035, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 245.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0031, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 245.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0036, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 246.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0038, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 246.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0035, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 247.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0033, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 247.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0031, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 247.99, | |
| "eval_exact_match": 0.4299900695134062, | |
| "eval_exec": 0.4856007944389275, | |
| "eval_loss": 0.521920382976532, | |
| "eval_runtime": 197.9426, | |
| "eval_samples_per_second": 6.568, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 248.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0034, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 248.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0033, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 249.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0034, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 249.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0031, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 250.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0031, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 250.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0032, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 251.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0033, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 251.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0032, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 252.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0032, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 252.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.003, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 253.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.003, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 253.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0032, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 254.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0033, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 254.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0039, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 255.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0064, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 255.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0035, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 255.99, | |
| "eval_exact_match": 0.4399205561072492, | |
| "eval_exec": 0.49056603773584906, | |
| "eval_loss": 0.516386091709137, | |
| "eval_runtime": 193.5596, | |
| "eval_samples_per_second": 6.716, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 256.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0032, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 256.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0033, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 257.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 257.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0031, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 258.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 258.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0032, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 259.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0032, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 259.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.003, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 260.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 260.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 261.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0031, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 261.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0033, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 262.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 262.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0031, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 263.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 263.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0032, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 263.99, | |
| "eval_exact_match": 0.43793445878848064, | |
| "eval_exec": 0.48758689175769615, | |
| "eval_loss": 0.5402066707611084, | |
| "eval_runtime": 203.0032, | |
| "eval_samples_per_second": 6.404, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 264.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0028, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 264.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 265.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0028, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 265.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0032, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 266.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 266.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0031, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 267.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0031, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 267.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0028, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 268.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.003, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 268.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.003, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 269.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0032, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 269.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 270.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 270.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.003, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 271.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.003, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 271.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 271.99, | |
| "eval_exact_match": 0.43495531281032773, | |
| "eval_exec": 0.4816285998013903, | |
| "eval_loss": 0.5360086560249329, | |
| "eval_runtime": 197.8829, | |
| "eval_samples_per_second": 6.57, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 272.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 272.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.003, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 273.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 273.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0028, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 274.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 274.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 275.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 275.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 276.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0028, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 276.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 277.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 277.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 278.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 278.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 279.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 279.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 279.99, | |
| "eval_exact_match": 0.4369414101290963, | |
| "eval_exec": 0.48063555114200596, | |
| "eval_loss": 0.5520691871643066, | |
| "eval_runtime": 204.4424, | |
| "eval_samples_per_second": 6.359, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 280.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 280.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.003, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 281.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.003, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 281.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 282.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0028, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 282.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 283.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0028, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 283.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 284.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 284.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 285.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 285.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 286.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 286.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 287.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 287.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 287.99, | |
| "eval_exact_match": 0.4438927507447865, | |
| "eval_exec": 0.48659384309831183, | |
| "eval_loss": 0.5534113645553589, | |
| "eval_runtime": 202.8208, | |
| "eval_samples_per_second": 6.41, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 288.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 288.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 289.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0027, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 289.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 290.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 290.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 291.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 291.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 292.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 292.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 293.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 293.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 294.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 294.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 295.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 295.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 295.99, | |
| "eval_exact_match": 0.4428997020854022, | |
| "eval_exec": 0.4816285998013903, | |
| "eval_loss": 0.557854413986206, | |
| "eval_runtime": 203.9919, | |
| "eval_samples_per_second": 6.373, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 296.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 296.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 297.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 297.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 298.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 298.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 299.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 299.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 300.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 300.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 301.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 301.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 302.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 302.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 303.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0028, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 303.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 303.99, | |
| "eval_exact_match": 0.44985104270109233, | |
| "eval_exec": 0.49056603773584906, | |
| "eval_loss": 0.5580935478210449, | |
| "eval_runtime": 200.2593, | |
| "eval_samples_per_second": 6.492, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 304.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 304.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 305.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 305.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 306.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 306.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 307.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 307.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 308.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 308.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 309.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 309.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 310.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 310.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 311.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 311.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 311.99, | |
| "eval_exact_match": 0.4200595829195631, | |
| "eval_exec": 0.47070506454816285, | |
| "eval_loss": 0.560897946357727, | |
| "eval_runtime": 202.502, | |
| "eval_samples_per_second": 6.42, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 312.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 312.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 313.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 313.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 314.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 314.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0026, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 315.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 315.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 316.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 316.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 317.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 317.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 318.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 318.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 319.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 319.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 319.99, | |
| "eval_exact_match": 0.42800397219463754, | |
| "eval_exec": 0.46971201588877853, | |
| "eval_loss": 0.5524822473526001, | |
| "eval_runtime": 204.4122, | |
| "eval_samples_per_second": 6.36, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 320.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 320.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 321.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 321.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 322.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 322.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 323.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 323.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 324.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 324.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 325.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 325.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 326.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 326.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 327.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 327.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 327.99, | |
| "eval_exact_match": 0.4260178748758689, | |
| "eval_exec": 0.48361469712015887, | |
| "eval_loss": 0.5746508240699768, | |
| "eval_runtime": 194.402, | |
| "eval_samples_per_second": 6.687, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 328.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 328.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 329.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 329.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 330.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0025, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 330.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.003, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 331.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 331.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 332.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 332.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 333.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 333.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 334.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 334.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 335.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 335.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 335.99, | |
| "eval_exact_match": 0.4369414101290963, | |
| "eval_exec": 0.4786494538232373, | |
| "eval_loss": 0.5734978914260864, | |
| "eval_runtime": 199.4394, | |
| "eval_samples_per_second": 6.518, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 336.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 336.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 337.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 337.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 338.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 338.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 339.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 339.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 340.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 340.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 341.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 341.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 342.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0034, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 342.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0041, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 343.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 343.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 343.99, | |
| "eval_exact_match": 0.4369414101290963, | |
| "eval_exec": 0.48063555114200596, | |
| "eval_loss": 0.5549472570419312, | |
| "eval_runtime": 209.2028, | |
| "eval_samples_per_second": 6.214, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 344.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 344.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 345.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 345.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 346.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 346.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 347.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 347.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 348.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 348.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 349.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 349.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 350.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 350.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 351.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 351.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 351.99, | |
| "eval_exact_match": 0.4448857994041708, | |
| "eval_exec": 0.48758689175769615, | |
| "eval_loss": 0.5706749558448792, | |
| "eval_runtime": 208.8886, | |
| "eval_samples_per_second": 6.223, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 352.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 352.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 353.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 353.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 354.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 354.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 355.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 355.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 356.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 356.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 357.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 357.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2864 | |
| }, | |
| { | |
| "epoch": 358.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 358.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2872 | |
| }, | |
| { | |
| "epoch": 359.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2876 | |
| }, | |
| { | |
| "epoch": 359.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 359.99, | |
| "eval_exact_match": 0.4468718967229394, | |
| "eval_exec": 0.48957298907646474, | |
| "eval_loss": 0.5861152410507202, | |
| "eval_runtime": 206.5997, | |
| "eval_samples_per_second": 6.292, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 360.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 360.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2888 | |
| }, | |
| { | |
| "epoch": 361.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2892 | |
| }, | |
| { | |
| "epoch": 361.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2896 | |
| }, | |
| { | |
| "epoch": 362.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 362.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 363.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 2908 | |
| }, | |
| { | |
| "epoch": 363.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 364.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 364.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 365.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2924 | |
| }, | |
| { | |
| "epoch": 365.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 366.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2932 | |
| }, | |
| { | |
| "epoch": 366.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2936 | |
| }, | |
| { | |
| "epoch": 367.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 367.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 367.99, | |
| "eval_exact_match": 0.44786494538232374, | |
| "eval_exec": 0.4955312810327706, | |
| "eval_loss": 0.5812374949455261, | |
| "eval_runtime": 213.3063, | |
| "eval_samples_per_second": 6.095, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 368.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2948 | |
| }, | |
| { | |
| "epoch": 368.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 369.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 369.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 370.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 370.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 371.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2972 | |
| }, | |
| { | |
| "epoch": 371.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 372.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 372.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2984 | |
| }, | |
| { | |
| "epoch": 373.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.002, | |
| "step": 2988 | |
| }, | |
| { | |
| "epoch": 373.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 2992 | |
| }, | |
| { | |
| "epoch": 374.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 2996 | |
| }, | |
| { | |
| "epoch": 374.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 375.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3004 | |
| }, | |
| { | |
| "epoch": 375.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 375.99, | |
| "eval_exact_match": 0.4438927507447865, | |
| "eval_exec": 0.48659384309831183, | |
| "eval_loss": 0.5652831792831421, | |
| "eval_runtime": 210.776, | |
| "eval_samples_per_second": 6.168, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 376.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 3012 | |
| }, | |
| { | |
| "epoch": 376.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 377.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 377.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 378.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0021, | |
| "step": 3028 | |
| }, | |
| { | |
| "epoch": 378.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 3032 | |
| }, | |
| { | |
| "epoch": 379.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0019, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 379.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 380.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3044 | |
| }, | |
| { | |
| "epoch": 380.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 381.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3052 | |
| }, | |
| { | |
| "epoch": 381.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3056 | |
| }, | |
| { | |
| "epoch": 382.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 382.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 383.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 383.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 383.99, | |
| "eval_exact_match": 0.44985104270109233, | |
| "eval_exec": 0.4915590863952334, | |
| "eval_loss": 0.5784198641777039, | |
| "eval_runtime": 204.8823, | |
| "eval_samples_per_second": 6.345, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 384.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3076 | |
| }, | |
| { | |
| "epoch": 384.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 385.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3084 | |
| }, | |
| { | |
| "epoch": 385.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3088 | |
| }, | |
| { | |
| "epoch": 386.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3092 | |
| }, | |
| { | |
| "epoch": 386.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 387.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 387.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 388.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 388.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3112 | |
| }, | |
| { | |
| "epoch": 389.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 389.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 390.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 390.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 391.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 391.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 391.99, | |
| "eval_exact_match": 0.4528301886792453, | |
| "eval_exec": 0.4925521350546177, | |
| "eval_loss": 0.5775428414344788, | |
| "eval_runtime": 211.2569, | |
| "eval_samples_per_second": 6.154, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 392.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 392.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 393.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3148 | |
| }, | |
| { | |
| "epoch": 393.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3152 | |
| }, | |
| { | |
| "epoch": 394.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3156 | |
| }, | |
| { | |
| "epoch": 394.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 395.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 395.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 396.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 396.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3176 | |
| }, | |
| { | |
| "epoch": 397.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 397.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3184 | |
| }, | |
| { | |
| "epoch": 398.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3188 | |
| }, | |
| { | |
| "epoch": 398.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 399.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3196 | |
| }, | |
| { | |
| "epoch": 399.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0018, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 399.99, | |
| "eval_exact_match": 0.4538232373386296, | |
| "eval_exec": 0.49751737835153925, | |
| "eval_loss": 0.5791714191436768, | |
| "eval_runtime": 202.6045, | |
| "eval_samples_per_second": 6.416, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 400.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 400.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3208 | |
| }, | |
| { | |
| "epoch": 401.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3212 | |
| }, | |
| { | |
| "epoch": 401.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 402.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 402.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 403.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 403.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 404.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3236 | |
| }, | |
| { | |
| "epoch": 404.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 405.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 405.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 406.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3252 | |
| }, | |
| { | |
| "epoch": 406.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3256 | |
| }, | |
| { | |
| "epoch": 407.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 407.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 407.99, | |
| "eval_exact_match": 0.44190665342601787, | |
| "eval_exec": 0.48361469712015887, | |
| "eval_loss": 0.58585524559021, | |
| "eval_runtime": 208.6202, | |
| "eval_samples_per_second": 6.231, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 408.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3268 | |
| }, | |
| { | |
| "epoch": 408.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3272 | |
| }, | |
| { | |
| "epoch": 409.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 409.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 410.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3284 | |
| }, | |
| { | |
| "epoch": 410.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3288 | |
| }, | |
| { | |
| "epoch": 411.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3292 | |
| }, | |
| { | |
| "epoch": 411.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 412.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 412.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3304 | |
| }, | |
| { | |
| "epoch": 413.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3308 | |
| }, | |
| { | |
| "epoch": 413.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3312 | |
| }, | |
| { | |
| "epoch": 414.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3316 | |
| }, | |
| { | |
| "epoch": 414.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 415.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3324 | |
| }, | |
| { | |
| "epoch": 415.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 415.99, | |
| "eval_exact_match": 0.44786494538232374, | |
| "eval_exec": 0.48361469712015887, | |
| "eval_loss": 0.5857390761375427, | |
| "eval_runtime": 199.7986, | |
| "eval_samples_per_second": 6.507, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 416.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3332 | |
| }, | |
| { | |
| "epoch": 416.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0023, | |
| "step": 3336 | |
| }, | |
| { | |
| "epoch": 417.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0024, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 417.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3344 | |
| }, | |
| { | |
| "epoch": 418.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3348 | |
| }, | |
| { | |
| "epoch": 418.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3352 | |
| }, | |
| { | |
| "epoch": 419.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3356 | |
| }, | |
| { | |
| "epoch": 419.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 420.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3364 | |
| }, | |
| { | |
| "epoch": 420.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3368 | |
| }, | |
| { | |
| "epoch": 421.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3372 | |
| }, | |
| { | |
| "epoch": 421.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3376 | |
| }, | |
| { | |
| "epoch": 422.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 422.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3384 | |
| }, | |
| { | |
| "epoch": 423.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3388 | |
| }, | |
| { | |
| "epoch": 423.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 423.99, | |
| "eval_exact_match": 0.4468718967229394, | |
| "eval_exec": 0.48758689175769615, | |
| "eval_loss": 0.5896801948547363, | |
| "eval_runtime": 211.8567, | |
| "eval_samples_per_second": 6.136, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 424.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3396 | |
| }, | |
| { | |
| "epoch": 424.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 425.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3404 | |
| }, | |
| { | |
| "epoch": 425.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 426.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3412 | |
| }, | |
| { | |
| "epoch": 426.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3416 | |
| }, | |
| { | |
| "epoch": 427.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 427.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3424 | |
| }, | |
| { | |
| "epoch": 428.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3428 | |
| }, | |
| { | |
| "epoch": 428.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 429.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3436 | |
| }, | |
| { | |
| "epoch": 429.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 430.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3444 | |
| }, | |
| { | |
| "epoch": 430.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3448 | |
| }, | |
| { | |
| "epoch": 431.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3452 | |
| }, | |
| { | |
| "epoch": 431.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 431.99, | |
| "eval_exact_match": 0.44885799404170806, | |
| "eval_exec": 0.4846077457795432, | |
| "eval_loss": 0.5921575427055359, | |
| "eval_runtime": 197.9512, | |
| "eval_samples_per_second": 6.567, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 432.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 432.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3464 | |
| }, | |
| { | |
| "epoch": 433.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3468 | |
| }, | |
| { | |
| "epoch": 433.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3472 | |
| }, | |
| { | |
| "epoch": 434.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3476 | |
| }, | |
| { | |
| "epoch": 434.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 435.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3484 | |
| }, | |
| { | |
| "epoch": 435.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0017, | |
| "step": 3488 | |
| }, | |
| { | |
| "epoch": 436.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3492 | |
| }, | |
| { | |
| "epoch": 436.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3496 | |
| }, | |
| { | |
| "epoch": 437.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 437.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3504 | |
| }, | |
| { | |
| "epoch": 438.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3508 | |
| }, | |
| { | |
| "epoch": 438.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3512 | |
| }, | |
| { | |
| "epoch": 439.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3516 | |
| }, | |
| { | |
| "epoch": 439.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 439.99, | |
| "eval_exact_match": 0.4428997020854022, | |
| "eval_exec": 0.49056603773584906, | |
| "eval_loss": 0.5778002142906189, | |
| "eval_runtime": 200.4589, | |
| "eval_samples_per_second": 6.485, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 440.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3524 | |
| }, | |
| { | |
| "epoch": 440.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 441.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3532 | |
| }, | |
| { | |
| "epoch": 441.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3536 | |
| }, | |
| { | |
| "epoch": 442.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 442.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3544 | |
| }, | |
| { | |
| "epoch": 443.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3548 | |
| }, | |
| { | |
| "epoch": 443.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3552 | |
| }, | |
| { | |
| "epoch": 444.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3556 | |
| }, | |
| { | |
| "epoch": 444.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 445.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3564 | |
| }, | |
| { | |
| "epoch": 445.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3568 | |
| }, | |
| { | |
| "epoch": 446.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3572 | |
| }, | |
| { | |
| "epoch": 446.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3576 | |
| }, | |
| { | |
| "epoch": 447.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 447.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 447.99, | |
| "eval_exact_match": 0.4438927507447865, | |
| "eval_exec": 0.47765640516385305, | |
| "eval_loss": 0.584464967250824, | |
| "eval_runtime": 195.0196, | |
| "eval_samples_per_second": 6.666, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 448.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3588 | |
| }, | |
| { | |
| "epoch": 448.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3592 | |
| }, | |
| { | |
| "epoch": 449.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3596 | |
| }, | |
| { | |
| "epoch": 449.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 450.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3604 | |
| }, | |
| { | |
| "epoch": 450.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3608 | |
| }, | |
| { | |
| "epoch": 451.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3612 | |
| }, | |
| { | |
| "epoch": 451.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3616 | |
| }, | |
| { | |
| "epoch": 452.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 452.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3624 | |
| }, | |
| { | |
| "epoch": 453.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3628 | |
| }, | |
| { | |
| "epoch": 453.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0016, | |
| "step": 3632 | |
| }, | |
| { | |
| "epoch": 454.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3636 | |
| }, | |
| { | |
| "epoch": 454.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 455.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3644 | |
| }, | |
| { | |
| "epoch": 455.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3648 | |
| }, | |
| { | |
| "epoch": 455.99, | |
| "eval_exact_match": 0.4438927507447865, | |
| "eval_exec": 0.4846077457795432, | |
| "eval_loss": 0.6006260514259338, | |
| "eval_runtime": 207.0142, | |
| "eval_samples_per_second": 6.28, | |
| "step": 3648 | |
| }, | |
| { | |
| "epoch": 456.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3652 | |
| }, | |
| { | |
| "epoch": 456.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3656 | |
| }, | |
| { | |
| "epoch": 457.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 457.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3664 | |
| }, | |
| { | |
| "epoch": 458.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3668 | |
| }, | |
| { | |
| "epoch": 458.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3672 | |
| }, | |
| { | |
| "epoch": 459.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3676 | |
| }, | |
| { | |
| "epoch": 459.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 460.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3684 | |
| }, | |
| { | |
| "epoch": 460.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3688 | |
| }, | |
| { | |
| "epoch": 461.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 461.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 462.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 462.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3704 | |
| }, | |
| { | |
| "epoch": 463.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3708 | |
| }, | |
| { | |
| "epoch": 463.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 463.99, | |
| "eval_exact_match": 0.4448857994041708, | |
| "eval_exec": 0.47765640516385305, | |
| "eval_loss": 0.6055679321289062, | |
| "eval_runtime": 203.8538, | |
| "eval_samples_per_second": 6.377, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 464.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3716 | |
| }, | |
| { | |
| "epoch": 464.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 465.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3724 | |
| }, | |
| { | |
| "epoch": 465.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3728 | |
| }, | |
| { | |
| "epoch": 466.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3732 | |
| }, | |
| { | |
| "epoch": 466.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3736 | |
| }, | |
| { | |
| "epoch": 467.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 467.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 468.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3748 | |
| }, | |
| { | |
| "epoch": 468.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3752 | |
| }, | |
| { | |
| "epoch": 469.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3756 | |
| }, | |
| { | |
| "epoch": 469.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 470.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3764 | |
| }, | |
| { | |
| "epoch": 470.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3768 | |
| }, | |
| { | |
| "epoch": 471.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3772 | |
| }, | |
| { | |
| "epoch": 471.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3776 | |
| }, | |
| { | |
| "epoch": 471.99, | |
| "eval_exact_match": 0.44190665342601787, | |
| "eval_exec": 0.47368421052631576, | |
| "eval_loss": 0.6094422340393066, | |
| "eval_runtime": 200.6337, | |
| "eval_samples_per_second": 6.479, | |
| "step": 3776 | |
| }, | |
| { | |
| "epoch": 472.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 472.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 3784 | |
| }, | |
| { | |
| "epoch": 473.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3788 | |
| }, | |
| { | |
| "epoch": 473.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3792 | |
| }, | |
| { | |
| "epoch": 474.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3796 | |
| }, | |
| { | |
| "epoch": 474.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 475.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 3804 | |
| }, | |
| { | |
| "epoch": 475.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3808 | |
| }, | |
| { | |
| "epoch": 476.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3812 | |
| }, | |
| { | |
| "epoch": 476.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 477.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 477.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3824 | |
| }, | |
| { | |
| "epoch": 478.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3828 | |
| }, | |
| { | |
| "epoch": 478.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3832 | |
| }, | |
| { | |
| "epoch": 479.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3836 | |
| }, | |
| { | |
| "epoch": 479.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 479.99, | |
| "eval_exact_match": 0.4458788480635551, | |
| "eval_exec": 0.49354518371400197, | |
| "eval_loss": 0.6145819425582886, | |
| "eval_runtime": 190.4305, | |
| "eval_samples_per_second": 6.827, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 480.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3844 | |
| }, | |
| { | |
| "epoch": 480.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3848 | |
| }, | |
| { | |
| "epoch": 481.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3852 | |
| }, | |
| { | |
| "epoch": 481.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3856 | |
| }, | |
| { | |
| "epoch": 482.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 482.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 483.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3868 | |
| }, | |
| { | |
| "epoch": 483.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3872 | |
| }, | |
| { | |
| "epoch": 484.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3876 | |
| }, | |
| { | |
| "epoch": 484.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 485.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3884 | |
| }, | |
| { | |
| "epoch": 485.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3888 | |
| }, | |
| { | |
| "epoch": 486.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3892 | |
| }, | |
| { | |
| "epoch": 486.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 3896 | |
| }, | |
| { | |
| "epoch": 487.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 487.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3904 | |
| }, | |
| { | |
| "epoch": 487.99, | |
| "eval_exact_match": 0.44190665342601787, | |
| "eval_exec": 0.4846077457795432, | |
| "eval_loss": 0.6196692585945129, | |
| "eval_runtime": 197.0844, | |
| "eval_samples_per_second": 6.596, | |
| "step": 3904 | |
| }, | |
| { | |
| "epoch": 488.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0015, | |
| "step": 3908 | |
| }, | |
| { | |
| "epoch": 488.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3912 | |
| }, | |
| { | |
| "epoch": 489.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0014, | |
| "step": 3916 | |
| }, | |
| { | |
| "epoch": 489.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 490.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3924 | |
| }, | |
| { | |
| "epoch": 490.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3928 | |
| }, | |
| { | |
| "epoch": 491.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3932 | |
| }, | |
| { | |
| "epoch": 491.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3936 | |
| }, | |
| { | |
| "epoch": 492.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 492.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3944 | |
| }, | |
| { | |
| "epoch": 493.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3948 | |
| }, | |
| { | |
| "epoch": 493.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3952 | |
| }, | |
| { | |
| "epoch": 494.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3956 | |
| }, | |
| { | |
| "epoch": 494.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 495.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3964 | |
| }, | |
| { | |
| "epoch": 495.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 495.99, | |
| "eval_exact_match": 0.4339622641509434, | |
| "eval_exec": 0.4756703078450844, | |
| "eval_loss": 0.6161912679672241, | |
| "eval_runtime": 201.1763, | |
| "eval_samples_per_second": 6.462, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 496.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3972 | |
| }, | |
| { | |
| "epoch": 496.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3976 | |
| }, | |
| { | |
| "epoch": 497.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 497.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 3984 | |
| }, | |
| { | |
| "epoch": 498.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3988 | |
| }, | |
| { | |
| "epoch": 498.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 3992 | |
| }, | |
| { | |
| "epoch": 499.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 3996 | |
| }, | |
| { | |
| "epoch": 499.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 500.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4004 | |
| }, | |
| { | |
| "epoch": 500.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4008 | |
| }, | |
| { | |
| "epoch": 501.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4012 | |
| }, | |
| { | |
| "epoch": 501.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4016 | |
| }, | |
| { | |
| "epoch": 502.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 502.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4024 | |
| }, | |
| { | |
| "epoch": 503.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4028 | |
| }, | |
| { | |
| "epoch": 503.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 503.99, | |
| "eval_exact_match": 0.4399205561072492, | |
| "eval_exec": 0.4846077457795432, | |
| "eval_loss": 0.6102380156517029, | |
| "eval_runtime": 199.2958, | |
| "eval_samples_per_second": 6.523, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 504.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4036 | |
| }, | |
| { | |
| "epoch": 504.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 505.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4044 | |
| }, | |
| { | |
| "epoch": 505.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4048 | |
| }, | |
| { | |
| "epoch": 506.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4052 | |
| }, | |
| { | |
| "epoch": 506.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4056 | |
| }, | |
| { | |
| "epoch": 507.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 507.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4064 | |
| }, | |
| { | |
| "epoch": 508.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4068 | |
| }, | |
| { | |
| "epoch": 508.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4072 | |
| }, | |
| { | |
| "epoch": 509.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4076 | |
| }, | |
| { | |
| "epoch": 509.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 510.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4084 | |
| }, | |
| { | |
| "epoch": 510.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4088 | |
| }, | |
| { | |
| "epoch": 511.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4092 | |
| }, | |
| { | |
| "epoch": 511.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 511.99, | |
| "eval_exact_match": 0.4399205561072492, | |
| "eval_exec": 0.48659384309831183, | |
| "eval_loss": 0.619750440120697, | |
| "eval_runtime": 211.0292, | |
| "eval_samples_per_second": 6.16, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 512.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 512.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4104 | |
| }, | |
| { | |
| "epoch": 513.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4108 | |
| }, | |
| { | |
| "epoch": 513.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4112 | |
| }, | |
| { | |
| "epoch": 514.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4116 | |
| }, | |
| { | |
| "epoch": 514.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 515.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4124 | |
| }, | |
| { | |
| "epoch": 515.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4128 | |
| }, | |
| { | |
| "epoch": 516.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4132 | |
| }, | |
| { | |
| "epoch": 516.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4136 | |
| }, | |
| { | |
| "epoch": 517.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 517.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 4144 | |
| }, | |
| { | |
| "epoch": 518.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4148 | |
| }, | |
| { | |
| "epoch": 518.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4152 | |
| }, | |
| { | |
| "epoch": 519.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4156 | |
| }, | |
| { | |
| "epoch": 519.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 519.99, | |
| "eval_exact_match": 0.4428997020854022, | |
| "eval_exec": 0.4856007944389275, | |
| "eval_loss": 0.6127471327781677, | |
| "eval_runtime": 203.9094, | |
| "eval_samples_per_second": 6.375, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 520.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4164 | |
| }, | |
| { | |
| "epoch": 520.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4168 | |
| }, | |
| { | |
| "epoch": 521.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4172 | |
| }, | |
| { | |
| "epoch": 521.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4176 | |
| }, | |
| { | |
| "epoch": 522.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 522.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4184 | |
| }, | |
| { | |
| "epoch": 523.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4188 | |
| }, | |
| { | |
| "epoch": 523.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4192 | |
| }, | |
| { | |
| "epoch": 524.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4196 | |
| }, | |
| { | |
| "epoch": 524.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 525.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4204 | |
| }, | |
| { | |
| "epoch": 525.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4208 | |
| }, | |
| { | |
| "epoch": 526.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4212 | |
| }, | |
| { | |
| "epoch": 526.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4216 | |
| }, | |
| { | |
| "epoch": 527.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 527.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 527.99, | |
| "eval_exact_match": 0.4458788480635551, | |
| "eval_exec": 0.48758689175769615, | |
| "eval_loss": 0.6248003244400024, | |
| "eval_runtime": 204.1177, | |
| "eval_samples_per_second": 6.369, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 528.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4228 | |
| }, | |
| { | |
| "epoch": 528.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4232 | |
| }, | |
| { | |
| "epoch": 529.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4236 | |
| }, | |
| { | |
| "epoch": 529.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 530.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4244 | |
| }, | |
| { | |
| "epoch": 530.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4248 | |
| }, | |
| { | |
| "epoch": 531.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4252 | |
| }, | |
| { | |
| "epoch": 531.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4256 | |
| }, | |
| { | |
| "epoch": 532.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 532.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4264 | |
| }, | |
| { | |
| "epoch": 533.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4268 | |
| }, | |
| { | |
| "epoch": 533.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4272 | |
| }, | |
| { | |
| "epoch": 534.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4276 | |
| }, | |
| { | |
| "epoch": 534.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 535.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4284 | |
| }, | |
| { | |
| "epoch": 535.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4288 | |
| }, | |
| { | |
| "epoch": 535.99, | |
| "eval_exact_match": 0.4468718967229394, | |
| "eval_exec": 0.48063555114200596, | |
| "eval_loss": 0.6122114658355713, | |
| "eval_runtime": 196.9407, | |
| "eval_samples_per_second": 6.601, | |
| "step": 4288 | |
| }, | |
| { | |
| "epoch": 536.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4292 | |
| }, | |
| { | |
| "epoch": 536.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4296 | |
| }, | |
| { | |
| "epoch": 537.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 537.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4304 | |
| }, | |
| { | |
| "epoch": 538.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 4308 | |
| }, | |
| { | |
| "epoch": 538.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4312 | |
| }, | |
| { | |
| "epoch": 539.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4316 | |
| }, | |
| { | |
| "epoch": 539.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 540.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4324 | |
| }, | |
| { | |
| "epoch": 540.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4328 | |
| }, | |
| { | |
| "epoch": 541.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0013, | |
| "step": 4332 | |
| }, | |
| { | |
| "epoch": 541.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4336 | |
| }, | |
| { | |
| "epoch": 542.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 542.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4344 | |
| }, | |
| { | |
| "epoch": 543.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4348 | |
| }, | |
| { | |
| "epoch": 543.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 543.99, | |
| "eval_exact_match": 0.43495531281032773, | |
| "eval_exec": 0.4816285998013903, | |
| "eval_loss": 0.605417013168335, | |
| "eval_runtime": 200.1247, | |
| "eval_samples_per_second": 6.496, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 544.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4356 | |
| }, | |
| { | |
| "epoch": 544.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 545.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4364 | |
| }, | |
| { | |
| "epoch": 545.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4368 | |
| }, | |
| { | |
| "epoch": 546.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4372 | |
| }, | |
| { | |
| "epoch": 546.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4376 | |
| }, | |
| { | |
| "epoch": 547.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 547.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4384 | |
| }, | |
| { | |
| "epoch": 548.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4388 | |
| }, | |
| { | |
| "epoch": 548.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4392 | |
| }, | |
| { | |
| "epoch": 549.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4396 | |
| }, | |
| { | |
| "epoch": 549.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 550.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4404 | |
| }, | |
| { | |
| "epoch": 550.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4408 | |
| }, | |
| { | |
| "epoch": 551.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4412 | |
| }, | |
| { | |
| "epoch": 551.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4416 | |
| }, | |
| { | |
| "epoch": 551.99, | |
| "eval_exact_match": 0.4369414101290963, | |
| "eval_exec": 0.4746772591857001, | |
| "eval_loss": 0.6194772124290466, | |
| "eval_runtime": 195.0605, | |
| "eval_samples_per_second": 6.665, | |
| "step": 4416 | |
| }, | |
| { | |
| "epoch": 552.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 552.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4424 | |
| }, | |
| { | |
| "epoch": 553.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4428 | |
| }, | |
| { | |
| "epoch": 553.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4432 | |
| }, | |
| { | |
| "epoch": 554.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4436 | |
| }, | |
| { | |
| "epoch": 554.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 555.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4444 | |
| }, | |
| { | |
| "epoch": 555.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4448 | |
| }, | |
| { | |
| "epoch": 556.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4452 | |
| }, | |
| { | |
| "epoch": 556.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4456 | |
| }, | |
| { | |
| "epoch": 557.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 557.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4464 | |
| }, | |
| { | |
| "epoch": 558.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4468 | |
| }, | |
| { | |
| "epoch": 558.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4472 | |
| }, | |
| { | |
| "epoch": 559.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4476 | |
| }, | |
| { | |
| "epoch": 559.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 559.99, | |
| "eval_exact_match": 0.44885799404170806, | |
| "eval_exec": 0.4925521350546177, | |
| "eval_loss": 0.6179357767105103, | |
| "eval_runtime": 194.9028, | |
| "eval_samples_per_second": 6.67, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 560.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4484 | |
| }, | |
| { | |
| "epoch": 560.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4488 | |
| }, | |
| { | |
| "epoch": 561.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4492 | |
| }, | |
| { | |
| "epoch": 561.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4496 | |
| }, | |
| { | |
| "epoch": 562.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 562.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4504 | |
| }, | |
| { | |
| "epoch": 563.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4508 | |
| }, | |
| { | |
| "epoch": 563.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4512 | |
| }, | |
| { | |
| "epoch": 564.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4516 | |
| }, | |
| { | |
| "epoch": 564.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 565.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4524 | |
| }, | |
| { | |
| "epoch": 565.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4528 | |
| }, | |
| { | |
| "epoch": 566.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4532 | |
| }, | |
| { | |
| "epoch": 566.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4536 | |
| }, | |
| { | |
| "epoch": 567.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 567.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4544 | |
| }, | |
| { | |
| "epoch": 567.99, | |
| "eval_exact_match": 0.45084409136047665, | |
| "eval_exec": 0.4915590863952334, | |
| "eval_loss": 0.6036345958709717, | |
| "eval_runtime": 196.9122, | |
| "eval_samples_per_second": 6.602, | |
| "step": 4544 | |
| }, | |
| { | |
| "epoch": 568.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4548 | |
| }, | |
| { | |
| "epoch": 568.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4552 | |
| }, | |
| { | |
| "epoch": 569.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4556 | |
| }, | |
| { | |
| "epoch": 569.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 570.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4564 | |
| }, | |
| { | |
| "epoch": 570.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4568 | |
| }, | |
| { | |
| "epoch": 571.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4572 | |
| }, | |
| { | |
| "epoch": 571.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4576 | |
| }, | |
| { | |
| "epoch": 572.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 572.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4584 | |
| }, | |
| { | |
| "epoch": 573.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4588 | |
| }, | |
| { | |
| "epoch": 573.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4592 | |
| }, | |
| { | |
| "epoch": 574.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4596 | |
| }, | |
| { | |
| "epoch": 574.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 575.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4604 | |
| }, | |
| { | |
| "epoch": 575.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 575.99, | |
| "eval_exact_match": 0.45183714001986097, | |
| "eval_exec": 0.49354518371400197, | |
| "eval_loss": 0.6121585369110107, | |
| "eval_runtime": 216.7301, | |
| "eval_samples_per_second": 5.998, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 576.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4612 | |
| }, | |
| { | |
| "epoch": 576.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4616 | |
| }, | |
| { | |
| "epoch": 577.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 577.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4624 | |
| }, | |
| { | |
| "epoch": 578.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4628 | |
| }, | |
| { | |
| "epoch": 578.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4632 | |
| }, | |
| { | |
| "epoch": 579.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4636 | |
| }, | |
| { | |
| "epoch": 579.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 580.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4644 | |
| }, | |
| { | |
| "epoch": 580.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4648 | |
| }, | |
| { | |
| "epoch": 581.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4652 | |
| }, | |
| { | |
| "epoch": 581.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4656 | |
| }, | |
| { | |
| "epoch": 582.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 582.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4664 | |
| }, | |
| { | |
| "epoch": 583.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4668 | |
| }, | |
| { | |
| "epoch": 583.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4672 | |
| }, | |
| { | |
| "epoch": 583.99, | |
| "eval_exact_match": 0.4438927507447865, | |
| "eval_exec": 0.48063555114200596, | |
| "eval_loss": 0.6172407269477844, | |
| "eval_runtime": 196.291, | |
| "eval_samples_per_second": 6.623, | |
| "step": 4672 | |
| }, | |
| { | |
| "epoch": 584.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4676 | |
| }, | |
| { | |
| "epoch": 584.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 585.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4684 | |
| }, | |
| { | |
| "epoch": 585.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4688 | |
| }, | |
| { | |
| "epoch": 586.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4692 | |
| }, | |
| { | |
| "epoch": 586.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4696 | |
| }, | |
| { | |
| "epoch": 587.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 587.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4704 | |
| }, | |
| { | |
| "epoch": 588.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4708 | |
| }, | |
| { | |
| "epoch": 588.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4712 | |
| }, | |
| { | |
| "epoch": 589.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4716 | |
| }, | |
| { | |
| "epoch": 589.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 590.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4724 | |
| }, | |
| { | |
| "epoch": 590.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4728 | |
| }, | |
| { | |
| "epoch": 591.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4732 | |
| }, | |
| { | |
| "epoch": 591.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 591.99, | |
| "eval_exact_match": 0.43793445878848064, | |
| "eval_exec": 0.4766633565044687, | |
| "eval_loss": 0.6108298301696777, | |
| "eval_runtime": 203.2897, | |
| "eval_samples_per_second": 6.395, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 592.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 592.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4744 | |
| }, | |
| { | |
| "epoch": 593.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4748 | |
| }, | |
| { | |
| "epoch": 593.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4752 | |
| }, | |
| { | |
| "epoch": 594.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 4756 | |
| }, | |
| { | |
| "epoch": 594.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 595.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4764 | |
| }, | |
| { | |
| "epoch": 595.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0022, | |
| "step": 4768 | |
| }, | |
| { | |
| "epoch": 596.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0012, | |
| "step": 4772 | |
| }, | |
| { | |
| "epoch": 596.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4776 | |
| }, | |
| { | |
| "epoch": 597.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 597.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4784 | |
| }, | |
| { | |
| "epoch": 598.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4788 | |
| }, | |
| { | |
| "epoch": 598.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4792 | |
| }, | |
| { | |
| "epoch": 599.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4796 | |
| }, | |
| { | |
| "epoch": 599.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 599.99, | |
| "eval_exact_match": 0.4369414101290963, | |
| "eval_exec": 0.4746772591857001, | |
| "eval_loss": 0.6420004367828369, | |
| "eval_runtime": 195.6774, | |
| "eval_samples_per_second": 6.644, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 600.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4804 | |
| }, | |
| { | |
| "epoch": 600.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4808 | |
| }, | |
| { | |
| "epoch": 601.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4812 | |
| }, | |
| { | |
| "epoch": 601.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4816 | |
| }, | |
| { | |
| "epoch": 602.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 602.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4824 | |
| }, | |
| { | |
| "epoch": 603.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4828 | |
| }, | |
| { | |
| "epoch": 603.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4832 | |
| }, | |
| { | |
| "epoch": 604.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4836 | |
| }, | |
| { | |
| "epoch": 604.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 605.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4844 | |
| }, | |
| { | |
| "epoch": 605.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4848 | |
| }, | |
| { | |
| "epoch": 606.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4852 | |
| }, | |
| { | |
| "epoch": 606.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4856 | |
| }, | |
| { | |
| "epoch": 607.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 607.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 607.99, | |
| "eval_exact_match": 0.4438927507447865, | |
| "eval_exec": 0.4726911618669315, | |
| "eval_loss": 0.6424113512039185, | |
| "eval_runtime": 202.6801, | |
| "eval_samples_per_second": 6.414, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 608.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4868 | |
| }, | |
| { | |
| "epoch": 608.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4872 | |
| }, | |
| { | |
| "epoch": 609.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4876 | |
| }, | |
| { | |
| "epoch": 609.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 610.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 4884 | |
| }, | |
| { | |
| "epoch": 610.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4888 | |
| }, | |
| { | |
| "epoch": 611.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4892 | |
| }, | |
| { | |
| "epoch": 611.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4896 | |
| }, | |
| { | |
| "epoch": 612.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 612.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4904 | |
| }, | |
| { | |
| "epoch": 613.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4908 | |
| }, | |
| { | |
| "epoch": 613.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4912 | |
| }, | |
| { | |
| "epoch": 614.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4916 | |
| }, | |
| { | |
| "epoch": 614.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 615.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4924 | |
| }, | |
| { | |
| "epoch": 615.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4928 | |
| }, | |
| { | |
| "epoch": 615.99, | |
| "eval_exact_match": 0.4339622641509434, | |
| "eval_exec": 0.46871896722939427, | |
| "eval_loss": 0.630379319190979, | |
| "eval_runtime": 198.8917, | |
| "eval_samples_per_second": 6.536, | |
| "step": 4928 | |
| }, | |
| { | |
| "epoch": 616.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4932 | |
| }, | |
| { | |
| "epoch": 616.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4936 | |
| }, | |
| { | |
| "epoch": 617.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 617.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4944 | |
| }, | |
| { | |
| "epoch": 618.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4948 | |
| }, | |
| { | |
| "epoch": 618.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4952 | |
| }, | |
| { | |
| "epoch": 619.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4956 | |
| }, | |
| { | |
| "epoch": 619.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 620.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4964 | |
| }, | |
| { | |
| "epoch": 620.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4968 | |
| }, | |
| { | |
| "epoch": 621.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 4972 | |
| }, | |
| { | |
| "epoch": 621.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4976 | |
| }, | |
| { | |
| "epoch": 622.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 622.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4984 | |
| }, | |
| { | |
| "epoch": 623.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4988 | |
| }, | |
| { | |
| "epoch": 623.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 623.99, | |
| "eval_exact_match": 0.44190665342601787, | |
| "eval_exec": 0.48361469712015887, | |
| "eval_loss": 0.6309102177619934, | |
| "eval_runtime": 191.856, | |
| "eval_samples_per_second": 6.776, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 624.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 4996 | |
| }, | |
| { | |
| "epoch": 624.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 625.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 5004 | |
| }, | |
| { | |
| "epoch": 625.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5008 | |
| }, | |
| { | |
| "epoch": 626.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5012 | |
| }, | |
| { | |
| "epoch": 626.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5016 | |
| }, | |
| { | |
| "epoch": 627.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 627.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5024 | |
| }, | |
| { | |
| "epoch": 628.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5028 | |
| }, | |
| { | |
| "epoch": 628.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5032 | |
| }, | |
| { | |
| "epoch": 629.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5036 | |
| }, | |
| { | |
| "epoch": 629.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 630.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5044 | |
| }, | |
| { | |
| "epoch": 630.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5048 | |
| }, | |
| { | |
| "epoch": 631.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5052 | |
| }, | |
| { | |
| "epoch": 631.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5056 | |
| }, | |
| { | |
| "epoch": 631.99, | |
| "eval_exact_match": 0.4369414101290963, | |
| "eval_exec": 0.46772591857000995, | |
| "eval_loss": 0.6289202570915222, | |
| "eval_runtime": 198.0458, | |
| "eval_samples_per_second": 6.564, | |
| "step": 5056 | |
| }, | |
| { | |
| "epoch": 632.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 632.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5064 | |
| }, | |
| { | |
| "epoch": 633.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5068 | |
| }, | |
| { | |
| "epoch": 633.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5072 | |
| }, | |
| { | |
| "epoch": 634.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5076 | |
| }, | |
| { | |
| "epoch": 634.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 635.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5084 | |
| }, | |
| { | |
| "epoch": 635.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5088 | |
| }, | |
| { | |
| "epoch": 636.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5092 | |
| }, | |
| { | |
| "epoch": 636.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5096 | |
| }, | |
| { | |
| "epoch": 637.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 637.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5104 | |
| }, | |
| { | |
| "epoch": 638.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5108 | |
| }, | |
| { | |
| "epoch": 638.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5112 | |
| }, | |
| { | |
| "epoch": 639.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5116 | |
| }, | |
| { | |
| "epoch": 639.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 639.99, | |
| "eval_exact_match": 0.44985104270109233, | |
| "eval_exec": 0.4885799404170804, | |
| "eval_loss": 0.6289829015731812, | |
| "eval_runtime": 208.2718, | |
| "eval_samples_per_second": 6.242, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 640.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 5124 | |
| }, | |
| { | |
| "epoch": 640.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5128 | |
| }, | |
| { | |
| "epoch": 641.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5132 | |
| }, | |
| { | |
| "epoch": 641.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5136 | |
| }, | |
| { | |
| "epoch": 642.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 642.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5144 | |
| }, | |
| { | |
| "epoch": 643.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5148 | |
| }, | |
| { | |
| "epoch": 643.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5152 | |
| }, | |
| { | |
| "epoch": 644.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5156 | |
| }, | |
| { | |
| "epoch": 644.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 645.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5164 | |
| }, | |
| { | |
| "epoch": 645.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5168 | |
| }, | |
| { | |
| "epoch": 646.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5172 | |
| }, | |
| { | |
| "epoch": 646.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5176 | |
| }, | |
| { | |
| "epoch": 647.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 647.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5184 | |
| }, | |
| { | |
| "epoch": 647.99, | |
| "eval_exact_match": 0.45084409136047665, | |
| "eval_exec": 0.4846077457795432, | |
| "eval_loss": 0.6306817531585693, | |
| "eval_runtime": 212.6718, | |
| "eval_samples_per_second": 6.113, | |
| "step": 5184 | |
| }, | |
| { | |
| "epoch": 648.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5188 | |
| }, | |
| { | |
| "epoch": 648.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5192 | |
| }, | |
| { | |
| "epoch": 649.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5196 | |
| }, | |
| { | |
| "epoch": 649.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 650.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5204 | |
| }, | |
| { | |
| "epoch": 650.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5208 | |
| }, | |
| { | |
| "epoch": 651.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5212 | |
| }, | |
| { | |
| "epoch": 651.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0011, | |
| "step": 5216 | |
| }, | |
| { | |
| "epoch": 652.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 652.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5224 | |
| }, | |
| { | |
| "epoch": 653.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5228 | |
| }, | |
| { | |
| "epoch": 653.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5232 | |
| }, | |
| { | |
| "epoch": 654.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5236 | |
| }, | |
| { | |
| "epoch": 654.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 655.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5244 | |
| }, | |
| { | |
| "epoch": 655.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5248 | |
| }, | |
| { | |
| "epoch": 655.99, | |
| "eval_exact_match": 0.43495531281032773, | |
| "eval_exec": 0.4746772591857001, | |
| "eval_loss": 0.6379679441452026, | |
| "eval_runtime": 203.8275, | |
| "eval_samples_per_second": 6.378, | |
| "step": 5248 | |
| }, | |
| { | |
| "epoch": 656.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5252 | |
| }, | |
| { | |
| "epoch": 656.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5256 | |
| }, | |
| { | |
| "epoch": 657.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 657.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5264 | |
| }, | |
| { | |
| "epoch": 658.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5268 | |
| }, | |
| { | |
| "epoch": 658.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5272 | |
| }, | |
| { | |
| "epoch": 659.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5276 | |
| }, | |
| { | |
| "epoch": 659.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 660.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5284 | |
| }, | |
| { | |
| "epoch": 660.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5288 | |
| }, | |
| { | |
| "epoch": 661.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5292 | |
| }, | |
| { | |
| "epoch": 661.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5296 | |
| }, | |
| { | |
| "epoch": 662.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 662.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5304 | |
| }, | |
| { | |
| "epoch": 663.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5308 | |
| }, | |
| { | |
| "epoch": 663.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5312 | |
| }, | |
| { | |
| "epoch": 663.99, | |
| "eval_exact_match": 0.43793445878848064, | |
| "eval_exec": 0.4766633565044687, | |
| "eval_loss": 0.6149209141731262, | |
| "eval_runtime": 205.2947, | |
| "eval_samples_per_second": 6.332, | |
| "step": 5312 | |
| }, | |
| { | |
| "epoch": 664.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5316 | |
| }, | |
| { | |
| "epoch": 664.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 665.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5324 | |
| }, | |
| { | |
| "epoch": 665.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5328 | |
| }, | |
| { | |
| "epoch": 666.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5332 | |
| }, | |
| { | |
| "epoch": 666.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5336 | |
| }, | |
| { | |
| "epoch": 667.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 667.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5344 | |
| }, | |
| { | |
| "epoch": 668.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5348 | |
| }, | |
| { | |
| "epoch": 668.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5352 | |
| }, | |
| { | |
| "epoch": 669.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5356 | |
| }, | |
| { | |
| "epoch": 669.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 670.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5364 | |
| }, | |
| { | |
| "epoch": 670.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5368 | |
| }, | |
| { | |
| "epoch": 671.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5372 | |
| }, | |
| { | |
| "epoch": 671.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 671.99, | |
| "eval_exact_match": 0.4458788480635551, | |
| "eval_exec": 0.47765640516385305, | |
| "eval_loss": 0.6341748237609863, | |
| "eval_runtime": 200.2501, | |
| "eval_samples_per_second": 6.492, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 672.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 672.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5384 | |
| }, | |
| { | |
| "epoch": 673.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5388 | |
| }, | |
| { | |
| "epoch": 673.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5392 | |
| }, | |
| { | |
| "epoch": 674.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5396 | |
| }, | |
| { | |
| "epoch": 674.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 675.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5404 | |
| }, | |
| { | |
| "epoch": 675.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5408 | |
| }, | |
| { | |
| "epoch": 676.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5412 | |
| }, | |
| { | |
| "epoch": 676.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5416 | |
| }, | |
| { | |
| "epoch": 677.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 677.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5424 | |
| }, | |
| { | |
| "epoch": 678.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5428 | |
| }, | |
| { | |
| "epoch": 678.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5432 | |
| }, | |
| { | |
| "epoch": 679.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5436 | |
| }, | |
| { | |
| "epoch": 679.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 679.99, | |
| "eval_exact_match": 0.43892750744786496, | |
| "eval_exec": 0.4766633565044687, | |
| "eval_loss": 0.6424917578697205, | |
| "eval_runtime": 214.8147, | |
| "eval_samples_per_second": 6.052, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 680.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5444 | |
| }, | |
| { | |
| "epoch": 680.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5448 | |
| }, | |
| { | |
| "epoch": 681.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5452 | |
| }, | |
| { | |
| "epoch": 681.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5456 | |
| }, | |
| { | |
| "epoch": 682.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 682.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0029, | |
| "step": 5464 | |
| }, | |
| { | |
| "epoch": 683.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5468 | |
| }, | |
| { | |
| "epoch": 683.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5472 | |
| }, | |
| { | |
| "epoch": 684.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5476 | |
| }, | |
| { | |
| "epoch": 684.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 685.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5484 | |
| }, | |
| { | |
| "epoch": 685.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5488 | |
| }, | |
| { | |
| "epoch": 686.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5492 | |
| }, | |
| { | |
| "epoch": 686.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5496 | |
| }, | |
| { | |
| "epoch": 687.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 687.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5504 | |
| }, | |
| { | |
| "epoch": 687.99, | |
| "eval_exact_match": 0.44786494538232374, | |
| "eval_exec": 0.4826216484607746, | |
| "eval_loss": 0.6195693612098694, | |
| "eval_runtime": 205.7504, | |
| "eval_samples_per_second": 6.318, | |
| "step": 5504 | |
| }, | |
| { | |
| "epoch": 688.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5508 | |
| }, | |
| { | |
| "epoch": 688.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5512 | |
| }, | |
| { | |
| "epoch": 689.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5516 | |
| }, | |
| { | |
| "epoch": 689.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 690.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5524 | |
| }, | |
| { | |
| "epoch": 690.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5528 | |
| }, | |
| { | |
| "epoch": 691.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5532 | |
| }, | |
| { | |
| "epoch": 691.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5536 | |
| }, | |
| { | |
| "epoch": 692.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 692.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5544 | |
| }, | |
| { | |
| "epoch": 693.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5548 | |
| }, | |
| { | |
| "epoch": 693.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5552 | |
| }, | |
| { | |
| "epoch": 694.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5556 | |
| }, | |
| { | |
| "epoch": 694.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 695.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5564 | |
| }, | |
| { | |
| "epoch": 695.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5568 | |
| }, | |
| { | |
| "epoch": 695.99, | |
| "eval_exact_match": 0.4448857994041708, | |
| "eval_exec": 0.48361469712015887, | |
| "eval_loss": 0.6261533498764038, | |
| "eval_runtime": 213.1489, | |
| "eval_samples_per_second": 6.099, | |
| "step": 5568 | |
| }, | |
| { | |
| "epoch": 696.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5572 | |
| }, | |
| { | |
| "epoch": 696.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5576 | |
| }, | |
| { | |
| "epoch": 697.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 697.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5584 | |
| }, | |
| { | |
| "epoch": 698.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5588 | |
| }, | |
| { | |
| "epoch": 698.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5592 | |
| }, | |
| { | |
| "epoch": 699.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5596 | |
| }, | |
| { | |
| "epoch": 699.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 700.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5604 | |
| }, | |
| { | |
| "epoch": 700.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5608 | |
| }, | |
| { | |
| "epoch": 701.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5612 | |
| }, | |
| { | |
| "epoch": 701.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5616 | |
| }, | |
| { | |
| "epoch": 702.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 702.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5624 | |
| }, | |
| { | |
| "epoch": 703.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5628 | |
| }, | |
| { | |
| "epoch": 703.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5632 | |
| }, | |
| { | |
| "epoch": 703.99, | |
| "eval_exact_match": 0.43892750744786496, | |
| "eval_exec": 0.4756703078450844, | |
| "eval_loss": 0.6435733437538147, | |
| "eval_runtime": 208.5122, | |
| "eval_samples_per_second": 6.235, | |
| "step": 5632 | |
| }, | |
| { | |
| "epoch": 704.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5636 | |
| }, | |
| { | |
| "epoch": 704.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 705.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5644 | |
| }, | |
| { | |
| "epoch": 705.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5648 | |
| }, | |
| { | |
| "epoch": 706.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5652 | |
| }, | |
| { | |
| "epoch": 706.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5656 | |
| }, | |
| { | |
| "epoch": 707.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 707.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5664 | |
| }, | |
| { | |
| "epoch": 708.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5668 | |
| }, | |
| { | |
| "epoch": 708.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5672 | |
| }, | |
| { | |
| "epoch": 709.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5676 | |
| }, | |
| { | |
| "epoch": 709.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 710.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5684 | |
| }, | |
| { | |
| "epoch": 710.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 711.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5692 | |
| }, | |
| { | |
| "epoch": 711.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5696 | |
| }, | |
| { | |
| "epoch": 711.99, | |
| "eval_exact_match": 0.44985104270109233, | |
| "eval_exec": 0.48659384309831183, | |
| "eval_loss": 0.6287506818771362, | |
| "eval_runtime": 206.1374, | |
| "eval_samples_per_second": 6.306, | |
| "step": 5696 | |
| }, | |
| { | |
| "epoch": 712.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 712.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5704 | |
| }, | |
| { | |
| "epoch": 713.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5708 | |
| }, | |
| { | |
| "epoch": 713.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5712 | |
| }, | |
| { | |
| "epoch": 714.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5716 | |
| }, | |
| { | |
| "epoch": 714.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 715.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5724 | |
| }, | |
| { | |
| "epoch": 715.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5728 | |
| }, | |
| { | |
| "epoch": 716.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5732 | |
| }, | |
| { | |
| "epoch": 716.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5736 | |
| }, | |
| { | |
| "epoch": 717.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 717.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5744 | |
| }, | |
| { | |
| "epoch": 718.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5748 | |
| }, | |
| { | |
| "epoch": 718.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5752 | |
| }, | |
| { | |
| "epoch": 719.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5756 | |
| }, | |
| { | |
| "epoch": 719.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 719.99, | |
| "eval_exact_match": 0.4448857994041708, | |
| "eval_exec": 0.4856007944389275, | |
| "eval_loss": 0.641159176826477, | |
| "eval_runtime": 206.6444, | |
| "eval_samples_per_second": 6.291, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 720.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5764 | |
| }, | |
| { | |
| "epoch": 720.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5768 | |
| }, | |
| { | |
| "epoch": 721.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5772 | |
| }, | |
| { | |
| "epoch": 721.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5776 | |
| }, | |
| { | |
| "epoch": 722.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 722.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5784 | |
| }, | |
| { | |
| "epoch": 723.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5788 | |
| }, | |
| { | |
| "epoch": 723.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 5792 | |
| }, | |
| { | |
| "epoch": 724.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5796 | |
| }, | |
| { | |
| "epoch": 724.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 725.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5804 | |
| }, | |
| { | |
| "epoch": 725.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5808 | |
| }, | |
| { | |
| "epoch": 726.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5812 | |
| }, | |
| { | |
| "epoch": 726.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5816 | |
| }, | |
| { | |
| "epoch": 727.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 727.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5824 | |
| }, | |
| { | |
| "epoch": 727.99, | |
| "eval_exact_match": 0.4458788480635551, | |
| "eval_exec": 0.48659384309831183, | |
| "eval_loss": 0.647663414478302, | |
| "eval_runtime": 211.128, | |
| "eval_samples_per_second": 6.157, | |
| "step": 5824 | |
| }, | |
| { | |
| "epoch": 728.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5828 | |
| }, | |
| { | |
| "epoch": 728.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5832 | |
| }, | |
| { | |
| "epoch": 729.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5836 | |
| }, | |
| { | |
| "epoch": 729.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 730.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5844 | |
| }, | |
| { | |
| "epoch": 730.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5848 | |
| }, | |
| { | |
| "epoch": 731.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 5852 | |
| }, | |
| { | |
| "epoch": 731.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5856 | |
| }, | |
| { | |
| "epoch": 732.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 732.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5864 | |
| }, | |
| { | |
| "epoch": 733.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 5868 | |
| }, | |
| { | |
| "epoch": 733.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5872 | |
| }, | |
| { | |
| "epoch": 734.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5876 | |
| }, | |
| { | |
| "epoch": 734.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 735.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5884 | |
| }, | |
| { | |
| "epoch": 735.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5888 | |
| }, | |
| { | |
| "epoch": 735.99, | |
| "eval_exact_match": 0.4448857994041708, | |
| "eval_exec": 0.4846077457795432, | |
| "eval_loss": 0.6436724662780762, | |
| "eval_runtime": 200.2646, | |
| "eval_samples_per_second": 6.491, | |
| "step": 5888 | |
| }, | |
| { | |
| "epoch": 736.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5892 | |
| }, | |
| { | |
| "epoch": 736.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5896 | |
| }, | |
| { | |
| "epoch": 737.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 737.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5904 | |
| }, | |
| { | |
| "epoch": 738.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5908 | |
| }, | |
| { | |
| "epoch": 738.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5912 | |
| }, | |
| { | |
| "epoch": 739.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5916 | |
| }, | |
| { | |
| "epoch": 739.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 740.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5924 | |
| }, | |
| { | |
| "epoch": 740.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5928 | |
| }, | |
| { | |
| "epoch": 741.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5932 | |
| }, | |
| { | |
| "epoch": 741.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5936 | |
| }, | |
| { | |
| "epoch": 742.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 742.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 5944 | |
| }, | |
| { | |
| "epoch": 743.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 5948 | |
| }, | |
| { | |
| "epoch": 743.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5952 | |
| }, | |
| { | |
| "epoch": 743.99, | |
| "eval_exact_match": 0.44786494538232374, | |
| "eval_exec": 0.4826216484607746, | |
| "eval_loss": 0.648475706577301, | |
| "eval_runtime": 203.6491, | |
| "eval_samples_per_second": 6.384, | |
| "step": 5952 | |
| }, | |
| { | |
| "epoch": 744.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5956 | |
| }, | |
| { | |
| "epoch": 744.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 745.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5964 | |
| }, | |
| { | |
| "epoch": 745.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 5968 | |
| }, | |
| { | |
| "epoch": 746.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 5972 | |
| }, | |
| { | |
| "epoch": 746.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5976 | |
| }, | |
| { | |
| "epoch": 747.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 747.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5984 | |
| }, | |
| { | |
| "epoch": 748.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5988 | |
| }, | |
| { | |
| "epoch": 748.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 5992 | |
| }, | |
| { | |
| "epoch": 749.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 5996 | |
| }, | |
| { | |
| "epoch": 749.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 750.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6004 | |
| }, | |
| { | |
| "epoch": 750.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6008 | |
| }, | |
| { | |
| "epoch": 751.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6012 | |
| }, | |
| { | |
| "epoch": 751.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6016 | |
| }, | |
| { | |
| "epoch": 751.99, | |
| "eval_exact_match": 0.4468718967229394, | |
| "eval_exec": 0.48063555114200596, | |
| "eval_loss": 0.6662933826446533, | |
| "eval_runtime": 196.1389, | |
| "eval_samples_per_second": 6.628, | |
| "step": 6016 | |
| }, | |
| { | |
| "epoch": 752.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 752.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6024 | |
| }, | |
| { | |
| "epoch": 753.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6028 | |
| }, | |
| { | |
| "epoch": 753.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6032 | |
| }, | |
| { | |
| "epoch": 754.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6036 | |
| }, | |
| { | |
| "epoch": 754.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 755.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6044 | |
| }, | |
| { | |
| "epoch": 755.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6048 | |
| }, | |
| { | |
| "epoch": 756.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6052 | |
| }, | |
| { | |
| "epoch": 756.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6056 | |
| }, | |
| { | |
| "epoch": 757.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 757.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6064 | |
| }, | |
| { | |
| "epoch": 758.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6068 | |
| }, | |
| { | |
| "epoch": 758.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6072 | |
| }, | |
| { | |
| "epoch": 759.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6076 | |
| }, | |
| { | |
| "epoch": 759.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 759.99, | |
| "eval_exact_match": 0.43793445878848064, | |
| "eval_exec": 0.46971201588877853, | |
| "eval_loss": 0.6573625206947327, | |
| "eval_runtime": 203.6771, | |
| "eval_samples_per_second": 6.383, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 760.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6084 | |
| }, | |
| { | |
| "epoch": 760.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6088 | |
| }, | |
| { | |
| "epoch": 761.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6092 | |
| }, | |
| { | |
| "epoch": 761.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6096 | |
| }, | |
| { | |
| "epoch": 762.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 762.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6104 | |
| }, | |
| { | |
| "epoch": 763.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6108 | |
| }, | |
| { | |
| "epoch": 763.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6112 | |
| }, | |
| { | |
| "epoch": 764.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6116 | |
| }, | |
| { | |
| "epoch": 764.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 765.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0009, | |
| "step": 6124 | |
| }, | |
| { | |
| "epoch": 765.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6128 | |
| }, | |
| { | |
| "epoch": 766.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6132 | |
| }, | |
| { | |
| "epoch": 766.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6136 | |
| }, | |
| { | |
| "epoch": 767.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 767.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6144 | |
| }, | |
| { | |
| "epoch": 767.99, | |
| "eval_exact_match": 0.45183714001986097, | |
| "eval_exec": 0.48957298907646474, | |
| "eval_loss": 0.6423913240432739, | |
| "eval_runtime": 196.8271, | |
| "eval_samples_per_second": 6.605, | |
| "step": 6144 | |
| }, | |
| { | |
| "epoch": 768.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6148 | |
| }, | |
| { | |
| "epoch": 768.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6152 | |
| }, | |
| { | |
| "epoch": 769.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6156 | |
| }, | |
| { | |
| "epoch": 769.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 770.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6164 | |
| }, | |
| { | |
| "epoch": 770.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6168 | |
| }, | |
| { | |
| "epoch": 771.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6172 | |
| }, | |
| { | |
| "epoch": 771.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6176 | |
| }, | |
| { | |
| "epoch": 772.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 772.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6184 | |
| }, | |
| { | |
| "epoch": 773.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6188 | |
| }, | |
| { | |
| "epoch": 773.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6192 | |
| }, | |
| { | |
| "epoch": 774.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6196 | |
| }, | |
| { | |
| "epoch": 774.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 775.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6204 | |
| }, | |
| { | |
| "epoch": 775.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6208 | |
| }, | |
| { | |
| "epoch": 775.99, | |
| "eval_exact_match": 0.4558093346573982, | |
| "eval_exec": 0.49652432969215493, | |
| "eval_loss": 0.6396003365516663, | |
| "eval_runtime": 214.4091, | |
| "eval_samples_per_second": 6.063, | |
| "step": 6208 | |
| }, | |
| { | |
| "epoch": 776.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6212 | |
| }, | |
| { | |
| "epoch": 776.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6216 | |
| }, | |
| { | |
| "epoch": 777.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 777.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6224 | |
| }, | |
| { | |
| "epoch": 778.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6228 | |
| }, | |
| { | |
| "epoch": 778.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6232 | |
| }, | |
| { | |
| "epoch": 779.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6236 | |
| }, | |
| { | |
| "epoch": 779.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 780.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6244 | |
| }, | |
| { | |
| "epoch": 780.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6248 | |
| }, | |
| { | |
| "epoch": 781.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6252 | |
| }, | |
| { | |
| "epoch": 781.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6256 | |
| }, | |
| { | |
| "epoch": 782.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 782.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6264 | |
| }, | |
| { | |
| "epoch": 783.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6268 | |
| }, | |
| { | |
| "epoch": 783.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6272 | |
| }, | |
| { | |
| "epoch": 783.99, | |
| "eval_exact_match": 0.43892750744786496, | |
| "eval_exec": 0.4856007944389275, | |
| "eval_loss": 0.6399450898170471, | |
| "eval_runtime": 226.2412, | |
| "eval_samples_per_second": 5.746, | |
| "step": 6272 | |
| }, | |
| { | |
| "epoch": 784.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6276 | |
| }, | |
| { | |
| "epoch": 784.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 785.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6284 | |
| }, | |
| { | |
| "epoch": 785.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6288 | |
| }, | |
| { | |
| "epoch": 786.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6292 | |
| }, | |
| { | |
| "epoch": 786.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6296 | |
| }, | |
| { | |
| "epoch": 787.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 787.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6304 | |
| }, | |
| { | |
| "epoch": 788.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6308 | |
| }, | |
| { | |
| "epoch": 788.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6312 | |
| }, | |
| { | |
| "epoch": 789.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6316 | |
| }, | |
| { | |
| "epoch": 789.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 790.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6324 | |
| }, | |
| { | |
| "epoch": 790.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6328 | |
| }, | |
| { | |
| "epoch": 791.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6332 | |
| }, | |
| { | |
| "epoch": 791.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.001, | |
| "step": 6336 | |
| }, | |
| { | |
| "epoch": 791.99, | |
| "eval_exact_match": 0.44091360476663355, | |
| "eval_exec": 0.47964250248262164, | |
| "eval_loss": 0.6275960803031921, | |
| "eval_runtime": 209.1576, | |
| "eval_samples_per_second": 6.215, | |
| "step": 6336 | |
| }, | |
| { | |
| "epoch": 792.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 792.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0005, | |
| "step": 6344 | |
| }, | |
| { | |
| "epoch": 793.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6348 | |
| }, | |
| { | |
| "epoch": 793.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6352 | |
| }, | |
| { | |
| "epoch": 794.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6356 | |
| }, | |
| { | |
| "epoch": 794.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 795.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6364 | |
| }, | |
| { | |
| "epoch": 795.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6368 | |
| }, | |
| { | |
| "epoch": 796.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6372 | |
| }, | |
| { | |
| "epoch": 796.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6376 | |
| }, | |
| { | |
| "epoch": 797.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 797.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6384 | |
| }, | |
| { | |
| "epoch": 798.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6388 | |
| }, | |
| { | |
| "epoch": 798.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6392 | |
| }, | |
| { | |
| "epoch": 799.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6396 | |
| }, | |
| { | |
| "epoch": 799.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 799.99, | |
| "eval_exact_match": 0.4468718967229394, | |
| "eval_exec": 0.48659384309831183, | |
| "eval_loss": 0.641415536403656, | |
| "eval_runtime": 208.4431, | |
| "eval_samples_per_second": 6.237, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 800.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6404 | |
| }, | |
| { | |
| "epoch": 800.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6408 | |
| }, | |
| { | |
| "epoch": 801.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6412 | |
| }, | |
| { | |
| "epoch": 801.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6416 | |
| }, | |
| { | |
| "epoch": 802.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 802.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6424 | |
| }, | |
| { | |
| "epoch": 803.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6428 | |
| }, | |
| { | |
| "epoch": 803.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6432 | |
| }, | |
| { | |
| "epoch": 804.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6436 | |
| }, | |
| { | |
| "epoch": 804.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 805.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6444 | |
| }, | |
| { | |
| "epoch": 805.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6448 | |
| }, | |
| { | |
| "epoch": 806.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6452 | |
| }, | |
| { | |
| "epoch": 806.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6456 | |
| }, | |
| { | |
| "epoch": 807.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 807.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6464 | |
| }, | |
| { | |
| "epoch": 807.99, | |
| "eval_exact_match": 0.44786494538232374, | |
| "eval_exec": 0.4846077457795432, | |
| "eval_loss": 0.6324633359909058, | |
| "eval_runtime": 216.057, | |
| "eval_samples_per_second": 6.017, | |
| "step": 6464 | |
| }, | |
| { | |
| "epoch": 808.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6468 | |
| }, | |
| { | |
| "epoch": 808.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6472 | |
| }, | |
| { | |
| "epoch": 809.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6476 | |
| }, | |
| { | |
| "epoch": 809.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 810.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6484 | |
| }, | |
| { | |
| "epoch": 810.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6488 | |
| }, | |
| { | |
| "epoch": 811.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6492 | |
| }, | |
| { | |
| "epoch": 811.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6496 | |
| }, | |
| { | |
| "epoch": 812.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 812.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6504 | |
| }, | |
| { | |
| "epoch": 813.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6508 | |
| }, | |
| { | |
| "epoch": 813.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6512 | |
| }, | |
| { | |
| "epoch": 814.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6516 | |
| }, | |
| { | |
| "epoch": 814.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 815.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6524 | |
| }, | |
| { | |
| "epoch": 815.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6528 | |
| }, | |
| { | |
| "epoch": 815.99, | |
| "eval_exact_match": 0.44190665342601787, | |
| "eval_exec": 0.4885799404170804, | |
| "eval_loss": 0.6281804442405701, | |
| "eval_runtime": 213.8722, | |
| "eval_samples_per_second": 6.078, | |
| "step": 6528 | |
| }, | |
| { | |
| "epoch": 816.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6532 | |
| }, | |
| { | |
| "epoch": 816.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6536 | |
| }, | |
| { | |
| "epoch": 817.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 817.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6544 | |
| }, | |
| { | |
| "epoch": 818.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6548 | |
| }, | |
| { | |
| "epoch": 818.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6552 | |
| }, | |
| { | |
| "epoch": 819.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6556 | |
| }, | |
| { | |
| "epoch": 819.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 820.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0005, | |
| "step": 6564 | |
| }, | |
| { | |
| "epoch": 820.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6568 | |
| }, | |
| { | |
| "epoch": 821.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6572 | |
| }, | |
| { | |
| "epoch": 821.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6576 | |
| }, | |
| { | |
| "epoch": 822.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 822.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6584 | |
| }, | |
| { | |
| "epoch": 823.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6588 | |
| }, | |
| { | |
| "epoch": 823.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6592 | |
| }, | |
| { | |
| "epoch": 823.99, | |
| "eval_exact_match": 0.45779543197616684, | |
| "eval_exec": 0.4915590863952334, | |
| "eval_loss": 0.6452751159667969, | |
| "eval_runtime": 223.4105, | |
| "eval_samples_per_second": 5.819, | |
| "step": 6592 | |
| }, | |
| { | |
| "epoch": 824.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6596 | |
| }, | |
| { | |
| "epoch": 824.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 825.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6604 | |
| }, | |
| { | |
| "epoch": 825.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6608 | |
| }, | |
| { | |
| "epoch": 826.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6612 | |
| }, | |
| { | |
| "epoch": 826.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0005, | |
| "step": 6616 | |
| }, | |
| { | |
| "epoch": 827.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 827.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6624 | |
| }, | |
| { | |
| "epoch": 828.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6628 | |
| }, | |
| { | |
| "epoch": 828.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6632 | |
| }, | |
| { | |
| "epoch": 829.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6636 | |
| }, | |
| { | |
| "epoch": 829.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 830.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6644 | |
| }, | |
| { | |
| "epoch": 830.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6648 | |
| }, | |
| { | |
| "epoch": 831.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6652 | |
| }, | |
| { | |
| "epoch": 831.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6656 | |
| }, | |
| { | |
| "epoch": 831.99, | |
| "eval_exact_match": 0.4637537239324727, | |
| "eval_exec": 0.49751737835153925, | |
| "eval_loss": 0.6585542559623718, | |
| "eval_runtime": 209.9114, | |
| "eval_samples_per_second": 6.193, | |
| "step": 6656 | |
| }, | |
| { | |
| "epoch": 832.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 832.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6664 | |
| }, | |
| { | |
| "epoch": 833.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6668 | |
| }, | |
| { | |
| "epoch": 833.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6672 | |
| }, | |
| { | |
| "epoch": 834.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6676 | |
| }, | |
| { | |
| "epoch": 834.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 835.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6684 | |
| }, | |
| { | |
| "epoch": 835.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6688 | |
| }, | |
| { | |
| "epoch": 836.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6692 | |
| }, | |
| { | |
| "epoch": 836.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6696 | |
| }, | |
| { | |
| "epoch": 837.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 837.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6704 | |
| }, | |
| { | |
| "epoch": 838.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6708 | |
| }, | |
| { | |
| "epoch": 838.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6712 | |
| }, | |
| { | |
| "epoch": 839.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6716 | |
| }, | |
| { | |
| "epoch": 839.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 839.99, | |
| "eval_exact_match": 0.45978152929493543, | |
| "eval_exec": 0.4945382323733863, | |
| "eval_loss": 0.660439670085907, | |
| "eval_runtime": 216.7543, | |
| "eval_samples_per_second": 5.998, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 840.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6724 | |
| }, | |
| { | |
| "epoch": 840.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6728 | |
| }, | |
| { | |
| "epoch": 841.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6732 | |
| }, | |
| { | |
| "epoch": 841.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6736 | |
| }, | |
| { | |
| "epoch": 842.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 842.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0005, | |
| "step": 6744 | |
| }, | |
| { | |
| "epoch": 843.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6748 | |
| }, | |
| { | |
| "epoch": 843.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6752 | |
| }, | |
| { | |
| "epoch": 844.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6756 | |
| }, | |
| { | |
| "epoch": 844.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 845.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6764 | |
| }, | |
| { | |
| "epoch": 845.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6768 | |
| }, | |
| { | |
| "epoch": 846.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6772 | |
| }, | |
| { | |
| "epoch": 846.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6776 | |
| }, | |
| { | |
| "epoch": 847.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 847.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6784 | |
| }, | |
| { | |
| "epoch": 847.99, | |
| "eval_exact_match": 0.4468718967229394, | |
| "eval_exec": 0.4885799404170804, | |
| "eval_loss": 0.6388683319091797, | |
| "eval_runtime": 225.744, | |
| "eval_samples_per_second": 5.759, | |
| "step": 6784 | |
| }, | |
| { | |
| "epoch": 848.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6788 | |
| }, | |
| { | |
| "epoch": 848.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6792 | |
| }, | |
| { | |
| "epoch": 849.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6796 | |
| }, | |
| { | |
| "epoch": 849.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 850.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6804 | |
| }, | |
| { | |
| "epoch": 850.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6808 | |
| }, | |
| { | |
| "epoch": 851.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6812 | |
| }, | |
| { | |
| "epoch": 851.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6816 | |
| }, | |
| { | |
| "epoch": 852.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 852.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6824 | |
| }, | |
| { | |
| "epoch": 853.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6828 | |
| }, | |
| { | |
| "epoch": 853.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0008, | |
| "step": 6832 | |
| }, | |
| { | |
| "epoch": 854.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6836 | |
| }, | |
| { | |
| "epoch": 854.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0007, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 855.5, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6844 | |
| }, | |
| { | |
| "epoch": 855.99, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 6848 | |
| }, | |
| { | |
| "epoch": 855.99, | |
| "eval_exact_match": 0.4657398212512413, | |
| "eval_exec": 0.4925521350546177, | |
| "eval_loss": 0.6665723323822021, | |
| "eval_runtime": 199.9726, | |
| "eval_samples_per_second": 6.501, | |
| "step": 6848 | |
| } | |
| ], | |
| "max_steps": 24576, | |
| "num_train_epochs": 3072, | |
| "total_flos": 7.45169724254251e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |