{ "best_metric": 0.16957539319992065, "best_model_checkpoint": "AlexWang99/byt5_add_3k/checkpoint-420", "epoch": 105.0, "eval_steps": 500, "global_step": 420, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 1.3957098722457886, "eval_runtime": 10.6898, "eval_samples_per_second": 935.474, "eval_steps_per_second": 1.216, "step": 4 }, { "epoch": 2.0, "eval_loss": 1.4048140048980713, "eval_runtime": 10.8698, "eval_samples_per_second": 919.98, "eval_steps_per_second": 1.196, "step": 8 }, { "epoch": 3.0, "eval_loss": 1.3850985765457153, "eval_runtime": 10.7938, "eval_samples_per_second": 926.459, "eval_steps_per_second": 1.204, "step": 12 }, { "epoch": 4.0, "eval_loss": 1.3645013570785522, "eval_runtime": 10.9791, "eval_samples_per_second": 910.819, "eval_steps_per_second": 1.184, "step": 16 }, { "epoch": 5.0, "eval_loss": 1.3509438037872314, "eval_runtime": 10.86, "eval_samples_per_second": 920.81, "eval_steps_per_second": 1.197, "step": 20 }, { "epoch": 6.0, "eval_loss": 1.335862398147583, "eval_runtime": 11.0208, "eval_samples_per_second": 907.379, "eval_steps_per_second": 1.18, "step": 24 }, { "epoch": 7.0, "eval_loss": 1.304105281829834, "eval_runtime": 10.8828, "eval_samples_per_second": 918.882, "eval_steps_per_second": 1.195, "step": 28 }, { "epoch": 8.0, "eval_loss": 1.2957689762115479, "eval_runtime": 11.1512, "eval_samples_per_second": 896.761, "eval_steps_per_second": 1.166, "step": 32 }, { "epoch": 9.0, "eval_loss": 1.2636315822601318, "eval_runtime": 10.8581, "eval_samples_per_second": 920.974, "eval_steps_per_second": 1.197, "step": 36 }, { "epoch": 10.0, "eval_loss": 1.2440863847732544, "eval_runtime": 10.9972, "eval_samples_per_second": 909.325, "eval_steps_per_second": 1.182, "step": 40 }, { "epoch": 11.0, "eval_loss": 1.2331980466842651, "eval_runtime": 10.8669, "eval_samples_per_second": 920.225, "eval_steps_per_second": 1.196, "step": 44 }, { "epoch": 12.0, "eval_loss": 1.20121431350708, "eval_runtime": 10.9961, "eval_samples_per_second": 909.41, "eval_steps_per_second": 1.182, "step": 48 }, { "epoch": 13.0, "eval_loss": 1.1870373487472534, "eval_runtime": 10.8735, "eval_samples_per_second": 919.667, "eval_steps_per_second": 1.196, "step": 52 }, { "epoch": 14.0, "eval_loss": 1.1519101858139038, "eval_runtime": 10.9927, "eval_samples_per_second": 909.695, "eval_steps_per_second": 1.183, "step": 56 }, { "epoch": 15.0, "eval_loss": 1.1413601636886597, "eval_runtime": 10.8635, "eval_samples_per_second": 920.514, "eval_steps_per_second": 1.197, "step": 60 }, { "epoch": 16.0, "eval_loss": 1.0864189863204956, "eval_runtime": 11.1699, "eval_samples_per_second": 895.261, "eval_steps_per_second": 1.164, "step": 64 }, { "epoch": 17.0, "eval_loss": 1.0611509084701538, "eval_runtime": 10.8685, "eval_samples_per_second": 920.087, "eval_steps_per_second": 1.196, "step": 68 }, { "epoch": 18.0, "eval_loss": 1.0090259313583374, "eval_runtime": 10.9973, "eval_samples_per_second": 909.312, "eval_steps_per_second": 1.182, "step": 72 }, { "epoch": 19.0, "eval_loss": 0.9998962879180908, "eval_runtime": 10.8691, "eval_samples_per_second": 920.04, "eval_steps_per_second": 1.196, "step": 76 }, { "epoch": 20.0, "eval_loss": 0.952064573764801, "eval_runtime": 11.0179, "eval_samples_per_second": 907.614, "eval_steps_per_second": 1.18, "step": 80 }, { "epoch": 21.0, "eval_loss": 0.9199039340019226, "eval_runtime": 10.8623, "eval_samples_per_second": 920.615, "eval_steps_per_second": 1.197, "step": 84 }, { "epoch": 22.0, "eval_loss": 0.8742589950561523, "eval_runtime": 11.0074, "eval_samples_per_second": 908.483, "eval_steps_per_second": 1.181, "step": 88 }, { "epoch": 23.0, "eval_loss": 0.8637756109237671, "eval_runtime": 10.8638, "eval_samples_per_second": 920.485, "eval_steps_per_second": 1.197, "step": 92 }, { "epoch": 24.0, "eval_loss": 0.8123971819877625, "eval_runtime": 11.1682, "eval_samples_per_second": 895.401, "eval_steps_per_second": 1.164, "step": 96 }, { "epoch": 25.0, "eval_loss": 0.7750455737113953, "eval_runtime": 10.8579, "eval_samples_per_second": 920.993, "eval_steps_per_second": 1.197, "step": 100 }, { "epoch": 26.0, "eval_loss": 0.7488656640052795, "eval_runtime": 11.0064, "eval_samples_per_second": 908.566, "eval_steps_per_second": 1.181, "step": 104 }, { "epoch": 27.0, "eval_loss": 0.7134984135627747, "eval_runtime": 10.8755, "eval_samples_per_second": 919.497, "eval_steps_per_second": 1.195, "step": 108 }, { "epoch": 28.0, "eval_loss": 0.6777770519256592, "eval_runtime": 10.9907, "eval_samples_per_second": 909.862, "eval_steps_per_second": 1.183, "step": 112 }, { "epoch": 29.0, "eval_loss": 0.6627815365791321, "eval_runtime": 10.8722, "eval_samples_per_second": 919.775, "eval_steps_per_second": 1.196, "step": 116 }, { "epoch": 30.0, "eval_loss": 0.6153420209884644, "eval_runtime": 10.9986, "eval_samples_per_second": 909.21, "eval_steps_per_second": 1.182, "step": 120 }, { "epoch": 31.0, "eval_loss": 0.6009132862091064, "eval_runtime": 10.8581, "eval_samples_per_second": 920.97, "eval_steps_per_second": 1.197, "step": 124 }, { "epoch": 32.0, "eval_loss": 0.5706290006637573, "eval_runtime": 11.1587, "eval_samples_per_second": 896.164, "eval_steps_per_second": 1.165, "step": 128 }, { "epoch": 33.0, "eval_loss": 0.5482128262519836, "eval_runtime": 10.8592, "eval_samples_per_second": 920.875, "eval_steps_per_second": 1.197, "step": 132 }, { "epoch": 34.0, "eval_loss": 0.5287255644798279, "eval_runtime": 11.0008, "eval_samples_per_second": 909.021, "eval_steps_per_second": 1.182, "step": 136 }, { "epoch": 35.0, "eval_loss": 0.4995749592781067, "eval_runtime": 10.8655, "eval_samples_per_second": 920.343, "eval_steps_per_second": 1.196, "step": 140 }, { "epoch": 36.0, "eval_loss": 0.4935281276702881, "eval_runtime": 10.9993, "eval_samples_per_second": 909.152, "eval_steps_per_second": 1.182, "step": 144 }, { "epoch": 37.0, "eval_loss": 0.4704650938510895, "eval_runtime": 10.8728, "eval_samples_per_second": 919.725, "eval_steps_per_second": 1.196, "step": 148 }, { "epoch": 38.0, "eval_loss": 0.46444249153137207, "eval_runtime": 10.9963, "eval_samples_per_second": 909.398, "eval_steps_per_second": 1.182, "step": 152 }, { "epoch": 39.0, "eval_loss": 0.4404006898403168, "eval_runtime": 10.8495, "eval_samples_per_second": 921.704, "eval_steps_per_second": 1.198, "step": 156 }, { "epoch": 40.0, "eval_loss": 0.41056767106056213, "eval_runtime": 11.1601, "eval_samples_per_second": 896.052, "eval_steps_per_second": 1.165, "step": 160 }, { "epoch": 41.0, "eval_loss": 0.41203612089157104, "eval_runtime": 10.8638, "eval_samples_per_second": 920.488, "eval_steps_per_second": 1.197, "step": 164 }, { "epoch": 42.0, "eval_loss": 0.39367642998695374, "eval_runtime": 10.9996, "eval_samples_per_second": 909.125, "eval_steps_per_second": 1.182, "step": 168 }, { "epoch": 43.0, "eval_loss": 0.38801082968711853, "eval_runtime": 10.8728, "eval_samples_per_second": 919.724, "eval_steps_per_second": 1.196, "step": 172 }, { "epoch": 44.0, "eval_loss": 0.3695450723171234, "eval_runtime": 11.0081, "eval_samples_per_second": 908.423, "eval_steps_per_second": 1.181, "step": 176 }, { "epoch": 45.0, "eval_loss": 0.36823761463165283, "eval_runtime": 10.8803, "eval_samples_per_second": 919.096, "eval_steps_per_second": 1.195, "step": 180 }, { "epoch": 46.0, "eval_loss": 0.36352187395095825, "eval_runtime": 11.0188, "eval_samples_per_second": 907.538, "eval_steps_per_second": 1.18, "step": 184 }, { "epoch": 47.0, "eval_loss": 0.3410819172859192, "eval_runtime": 10.8644, "eval_samples_per_second": 920.439, "eval_steps_per_second": 1.197, "step": 188 }, { "epoch": 48.0, "eval_loss": 0.34205111861228943, "eval_runtime": 11.1579, "eval_samples_per_second": 896.228, "eval_steps_per_second": 1.165, "step": 192 }, { "epoch": 49.0, "eval_loss": 0.32984980940818787, "eval_runtime": 10.8559, "eval_samples_per_second": 921.16, "eval_steps_per_second": 1.198, "step": 196 }, { "epoch": 50.0, "eval_loss": 0.32862576842308044, "eval_runtime": 10.9935, "eval_samples_per_second": 909.631, "eval_steps_per_second": 1.183, "step": 200 }, { "epoch": 51.0, "eval_loss": 0.31809937953948975, "eval_runtime": 10.8616, "eval_samples_per_second": 920.672, "eval_steps_per_second": 1.197, "step": 204 }, { "epoch": 52.0, "eval_loss": 0.3181401193141937, "eval_runtime": 10.9831, "eval_samples_per_second": 910.489, "eval_steps_per_second": 1.184, "step": 208 }, { "epoch": 53.0, "eval_loss": 0.31942903995513916, "eval_runtime": 10.8766, "eval_samples_per_second": 919.404, "eval_steps_per_second": 1.195, "step": 212 }, { "epoch": 54.0, "eval_loss": 0.29720762372016907, "eval_runtime": 11.0007, "eval_samples_per_second": 909.037, "eval_steps_per_second": 1.182, "step": 216 }, { "epoch": 55.0, "eval_loss": 0.28963199257850647, "eval_runtime": 10.8682, "eval_samples_per_second": 920.116, "eval_steps_per_second": 1.196, "step": 220 }, { "epoch": 56.0, "eval_loss": 0.28118211030960083, "eval_runtime": 11.1702, "eval_samples_per_second": 895.242, "eval_steps_per_second": 1.164, "step": 224 }, { "epoch": 57.0, "eval_loss": 0.2807424068450928, "eval_runtime": 10.8728, "eval_samples_per_second": 919.723, "eval_steps_per_second": 1.196, "step": 228 }, { "epoch": 58.0, "eval_loss": 0.2821776568889618, "eval_runtime": 11.013, "eval_samples_per_second": 908.02, "eval_steps_per_second": 1.18, "step": 232 }, { "epoch": 59.0, "eval_loss": 0.2738954722881317, "eval_runtime": 10.882, "eval_samples_per_second": 918.95, "eval_steps_per_second": 1.195, "step": 236 }, { "epoch": 60.0, "eval_loss": 0.259623646736145, "eval_runtime": 10.9964, "eval_samples_per_second": 909.385, "eval_steps_per_second": 1.182, "step": 240 }, { "epoch": 61.0, "eval_loss": 0.26454034447669983, "eval_runtime": 10.8812, "eval_samples_per_second": 919.014, "eval_steps_per_second": 1.195, "step": 244 }, { "epoch": 62.0, "eval_loss": 0.2502776086330414, "eval_runtime": 11.0146, "eval_samples_per_second": 907.884, "eval_steps_per_second": 1.18, "step": 248 }, { "epoch": 63.0, "eval_loss": 0.24190129339694977, "eval_runtime": 10.8776, "eval_samples_per_second": 919.32, "eval_steps_per_second": 1.195, "step": 252 }, { "epoch": 64.0, "eval_loss": 0.2520209848880768, "eval_runtime": 11.1696, "eval_samples_per_second": 895.288, "eval_steps_per_second": 1.164, "step": 256 }, { "epoch": 65.0, "eval_loss": 0.24023157358169556, "eval_runtime": 10.8767, "eval_samples_per_second": 919.398, "eval_steps_per_second": 1.195, "step": 260 }, { "epoch": 66.0, "eval_loss": 0.2362491935491562, "eval_runtime": 11.0127, "eval_samples_per_second": 908.04, "eval_steps_per_second": 1.18, "step": 264 }, { "epoch": 67.0, "eval_loss": 0.23966462910175323, "eval_runtime": 10.8781, "eval_samples_per_second": 919.276, "eval_steps_per_second": 1.195, "step": 268 }, { "epoch": 68.0, "eval_loss": 0.2406124770641327, "eval_runtime": 11.0206, "eval_samples_per_second": 907.394, "eval_steps_per_second": 1.18, "step": 272 }, { "epoch": 69.0, "eval_loss": 0.22616925835609436, "eval_runtime": 10.8781, "eval_samples_per_second": 919.274, "eval_steps_per_second": 1.195, "step": 276 }, { "epoch": 70.0, "eval_loss": 0.2212550789117813, "eval_runtime": 11.0106, "eval_samples_per_second": 908.219, "eval_steps_per_second": 1.181, "step": 280 }, { "epoch": 71.0, "eval_loss": 0.2343885451555252, "eval_runtime": 10.8799, "eval_samples_per_second": 919.125, "eval_steps_per_second": 1.195, "step": 284 }, { "epoch": 72.0, "eval_loss": 0.2180890589952469, "eval_runtime": 11.175, "eval_samples_per_second": 894.855, "eval_steps_per_second": 1.163, "step": 288 }, { "epoch": 73.0, "eval_loss": 0.21395854651927948, "eval_runtime": 10.8684, "eval_samples_per_second": 920.101, "eval_steps_per_second": 1.196, "step": 292 }, { "epoch": 74.0, "eval_loss": 0.21711787581443787, "eval_runtime": 11.005, "eval_samples_per_second": 908.68, "eval_steps_per_second": 1.181, "step": 296 }, { "epoch": 75.0, "eval_loss": 0.21878717839717865, "eval_runtime": 10.8803, "eval_samples_per_second": 919.091, "eval_steps_per_second": 1.195, "step": 300 }, { "epoch": 76.0, "eval_loss": 0.21271127462387085, "eval_runtime": 11.1791, "eval_samples_per_second": 894.529, "eval_steps_per_second": 1.163, "step": 304 }, { "epoch": 77.0, "eval_loss": 0.20574086904525757, "eval_runtime": 10.8704, "eval_samples_per_second": 919.927, "eval_steps_per_second": 1.196, "step": 308 }, { "epoch": 78.0, "eval_loss": 0.20641738176345825, "eval_runtime": 11.0014, "eval_samples_per_second": 908.978, "eval_steps_per_second": 1.182, "step": 312 }, { "epoch": 79.0, "eval_loss": 0.20721706748008728, "eval_runtime": 10.8792, "eval_samples_per_second": 919.189, "eval_steps_per_second": 1.195, "step": 316 }, { "epoch": 80.0, "eval_loss": 0.20023952424526215, "eval_runtime": 11.174, "eval_samples_per_second": 894.931, "eval_steps_per_second": 1.163, "step": 320 }, { "epoch": 81.0, "eval_loss": 0.20403145253658295, "eval_runtime": 10.8566, "eval_samples_per_second": 921.097, "eval_steps_per_second": 1.197, "step": 324 }, { "epoch": 82.0, "eval_loss": 0.19536912441253662, "eval_runtime": 11.0057, "eval_samples_per_second": 908.621, "eval_steps_per_second": 1.181, "step": 328 }, { "epoch": 83.0, "eval_loss": 0.19563594460487366, "eval_runtime": 10.8635, "eval_samples_per_second": 920.516, "eval_steps_per_second": 1.197, "step": 332 }, { "epoch": 84.0, "eval_loss": 0.1962701380252838, "eval_runtime": 11.1829, "eval_samples_per_second": 894.219, "eval_steps_per_second": 1.162, "step": 336 }, { "epoch": 85.0, "eval_loss": 0.19198498129844666, "eval_runtime": 10.8644, "eval_samples_per_second": 920.438, "eval_steps_per_second": 1.197, "step": 340 }, { "epoch": 86.0, "eval_loss": 0.18079973757266998, "eval_runtime": 11.0069, "eval_samples_per_second": 908.518, "eval_steps_per_second": 1.181, "step": 344 }, { "epoch": 87.0, "eval_loss": 0.18332232534885406, "eval_runtime": 10.8634, "eval_samples_per_second": 920.526, "eval_steps_per_second": 1.197, "step": 348 }, { "epoch": 88.0, "eval_loss": 0.19687600433826447, "eval_runtime": 11.1808, "eval_samples_per_second": 894.389, "eval_steps_per_second": 1.163, "step": 352 }, { "epoch": 89.0, "eval_loss": 0.20110972225666046, "eval_runtime": 10.8709, "eval_samples_per_second": 919.884, "eval_steps_per_second": 1.196, "step": 356 }, { "epoch": 90.0, "eval_loss": 0.18666134774684906, "eval_runtime": 11.0027, "eval_samples_per_second": 908.869, "eval_steps_per_second": 1.182, "step": 360 }, { "epoch": 91.0, "eval_loss": 0.1773829162120819, "eval_runtime": 10.8627, "eval_samples_per_second": 920.586, "eval_steps_per_second": 1.197, "step": 364 }, { "epoch": 92.0, "eval_loss": 0.18139097094535828, "eval_runtime": 11.158, "eval_samples_per_second": 896.215, "eval_steps_per_second": 1.165, "step": 368 }, { "epoch": 93.0, "eval_loss": 0.18620698153972626, "eval_runtime": 10.8654, "eval_samples_per_second": 920.355, "eval_steps_per_second": 1.196, "step": 372 }, { "epoch": 94.0, "eval_loss": 0.1856929063796997, "eval_runtime": 10.9909, "eval_samples_per_second": 909.844, "eval_steps_per_second": 1.183, "step": 376 }, { "epoch": 95.0, "eval_loss": 0.17794571816921234, "eval_runtime": 10.8741, "eval_samples_per_second": 919.613, "eval_steps_per_second": 1.195, "step": 380 }, { "epoch": 96.0, "eval_loss": 0.17274315655231476, "eval_runtime": 11.1798, "eval_samples_per_second": 894.47, "eval_steps_per_second": 1.163, "step": 384 }, { "epoch": 97.0, "eval_loss": 0.17167899012565613, "eval_runtime": 10.8508, "eval_samples_per_second": 921.589, "eval_steps_per_second": 1.198, "step": 388 }, { "epoch": 98.0, "eval_loss": 0.17758916318416595, "eval_runtime": 11.019, "eval_samples_per_second": 907.523, "eval_steps_per_second": 1.18, "step": 392 }, { "epoch": 99.0, "eval_loss": 0.18558244407176971, "eval_runtime": 10.8574, "eval_samples_per_second": 921.033, "eval_steps_per_second": 1.197, "step": 396 }, { "epoch": 100.0, "eval_loss": 0.18669435381889343, "eval_runtime": 11.1461, "eval_samples_per_second": 897.177, "eval_steps_per_second": 1.166, "step": 400 }, { "epoch": 101.0, "eval_loss": 0.1823727786540985, "eval_runtime": 10.8583, "eval_samples_per_second": 920.956, "eval_steps_per_second": 1.197, "step": 404 }, { "epoch": 102.0, "eval_loss": 0.17717821896076202, "eval_runtime": 11.0086, "eval_samples_per_second": 908.379, "eval_steps_per_second": 1.181, "step": 408 }, { "epoch": 103.0, "eval_loss": 0.17209963500499725, "eval_runtime": 10.8863, "eval_samples_per_second": 918.589, "eval_steps_per_second": 1.194, "step": 412 }, { "epoch": 104.0, "eval_loss": 0.1696111112833023, "eval_runtime": 11.178, "eval_samples_per_second": 894.611, "eval_steps_per_second": 1.163, "step": 416 }, { "epoch": 105.0, "eval_loss": 0.16957539319992065, "eval_runtime": 10.8672, "eval_samples_per_second": 920.2, "eval_steps_per_second": 1.196, "step": 420 } ], "logging_steps": 500, "max_steps": 440, "num_train_epochs": 110, "save_steps": 500, "total_flos": 9043941150720000.0, "trial_name": null, "trial_params": null }