| { | |
| "best_metric": 0.16957539319992065, | |
| "best_model_checkpoint": "AlexWang99/byt5_add_3k/checkpoint-420", | |
| "epoch": 105.0, | |
| "eval_steps": 500, | |
| "global_step": 420, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.3957098722457886, | |
| "eval_runtime": 10.6898, | |
| "eval_samples_per_second": 935.474, | |
| "eval_steps_per_second": 1.216, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.4048140048980713, | |
| "eval_runtime": 10.8698, | |
| "eval_samples_per_second": 919.98, | |
| "eval_steps_per_second": 1.196, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.3850985765457153, | |
| "eval_runtime": 10.7938, | |
| "eval_samples_per_second": 926.459, | |
| "eval_steps_per_second": 1.204, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.3645013570785522, | |
| "eval_runtime": 10.9791, | |
| "eval_samples_per_second": 910.819, | |
| "eval_steps_per_second": 1.184, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.3509438037872314, | |
| "eval_runtime": 10.86, | |
| "eval_samples_per_second": 920.81, | |
| "eval_steps_per_second": 1.197, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.335862398147583, | |
| "eval_runtime": 11.0208, | |
| "eval_samples_per_second": 907.379, | |
| "eval_steps_per_second": 1.18, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.304105281829834, | |
| "eval_runtime": 10.8828, | |
| "eval_samples_per_second": 918.882, | |
| "eval_steps_per_second": 1.195, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.2957689762115479, | |
| "eval_runtime": 11.1512, | |
| "eval_samples_per_second": 896.761, | |
| "eval_steps_per_second": 1.166, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.2636315822601318, | |
| "eval_runtime": 10.8581, | |
| "eval_samples_per_second": 920.974, | |
| "eval_steps_per_second": 1.197, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 1.2440863847732544, | |
| "eval_runtime": 10.9972, | |
| "eval_samples_per_second": 909.325, | |
| "eval_steps_per_second": 1.182, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 1.2331980466842651, | |
| "eval_runtime": 10.8669, | |
| "eval_samples_per_second": 920.225, | |
| "eval_steps_per_second": 1.196, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 1.20121431350708, | |
| "eval_runtime": 10.9961, | |
| "eval_samples_per_second": 909.41, | |
| "eval_steps_per_second": 1.182, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 1.1870373487472534, | |
| "eval_runtime": 10.8735, | |
| "eval_samples_per_second": 919.667, | |
| "eval_steps_per_second": 1.196, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 1.1519101858139038, | |
| "eval_runtime": 10.9927, | |
| "eval_samples_per_second": 909.695, | |
| "eval_steps_per_second": 1.183, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 1.1413601636886597, | |
| "eval_runtime": 10.8635, | |
| "eval_samples_per_second": 920.514, | |
| "eval_steps_per_second": 1.197, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 1.0864189863204956, | |
| "eval_runtime": 11.1699, | |
| "eval_samples_per_second": 895.261, | |
| "eval_steps_per_second": 1.164, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 1.0611509084701538, | |
| "eval_runtime": 10.8685, | |
| "eval_samples_per_second": 920.087, | |
| "eval_steps_per_second": 1.196, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 1.0090259313583374, | |
| "eval_runtime": 10.9973, | |
| "eval_samples_per_second": 909.312, | |
| "eval_steps_per_second": 1.182, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 0.9998962879180908, | |
| "eval_runtime": 10.8691, | |
| "eval_samples_per_second": 920.04, | |
| "eval_steps_per_second": 1.196, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.952064573764801, | |
| "eval_runtime": 11.0179, | |
| "eval_samples_per_second": 907.614, | |
| "eval_steps_per_second": 1.18, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.9199039340019226, | |
| "eval_runtime": 10.8623, | |
| "eval_samples_per_second": 920.615, | |
| "eval_steps_per_second": 1.197, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.8742589950561523, | |
| "eval_runtime": 11.0074, | |
| "eval_samples_per_second": 908.483, | |
| "eval_steps_per_second": 1.181, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.8637756109237671, | |
| "eval_runtime": 10.8638, | |
| "eval_samples_per_second": 920.485, | |
| "eval_steps_per_second": 1.197, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.8123971819877625, | |
| "eval_runtime": 11.1682, | |
| "eval_samples_per_second": 895.401, | |
| "eval_steps_per_second": 1.164, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.7750455737113953, | |
| "eval_runtime": 10.8579, | |
| "eval_samples_per_second": 920.993, | |
| "eval_steps_per_second": 1.197, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.7488656640052795, | |
| "eval_runtime": 11.0064, | |
| "eval_samples_per_second": 908.566, | |
| "eval_steps_per_second": 1.181, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 0.7134984135627747, | |
| "eval_runtime": 10.8755, | |
| "eval_samples_per_second": 919.497, | |
| "eval_steps_per_second": 1.195, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 0.6777770519256592, | |
| "eval_runtime": 10.9907, | |
| "eval_samples_per_second": 909.862, | |
| "eval_steps_per_second": 1.183, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 0.6627815365791321, | |
| "eval_runtime": 10.8722, | |
| "eval_samples_per_second": 919.775, | |
| "eval_steps_per_second": 1.196, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 0.6153420209884644, | |
| "eval_runtime": 10.9986, | |
| "eval_samples_per_second": 909.21, | |
| "eval_steps_per_second": 1.182, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 0.6009132862091064, | |
| "eval_runtime": 10.8581, | |
| "eval_samples_per_second": 920.97, | |
| "eval_steps_per_second": 1.197, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 0.5706290006637573, | |
| "eval_runtime": 11.1587, | |
| "eval_samples_per_second": 896.164, | |
| "eval_steps_per_second": 1.165, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 0.5482128262519836, | |
| "eval_runtime": 10.8592, | |
| "eval_samples_per_second": 920.875, | |
| "eval_steps_per_second": 1.197, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 0.5287255644798279, | |
| "eval_runtime": 11.0008, | |
| "eval_samples_per_second": 909.021, | |
| "eval_steps_per_second": 1.182, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 0.4995749592781067, | |
| "eval_runtime": 10.8655, | |
| "eval_samples_per_second": 920.343, | |
| "eval_steps_per_second": 1.196, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 0.4935281276702881, | |
| "eval_runtime": 10.9993, | |
| "eval_samples_per_second": 909.152, | |
| "eval_steps_per_second": 1.182, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 0.4704650938510895, | |
| "eval_runtime": 10.8728, | |
| "eval_samples_per_second": 919.725, | |
| "eval_steps_per_second": 1.196, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 0.46444249153137207, | |
| "eval_runtime": 10.9963, | |
| "eval_samples_per_second": 909.398, | |
| "eval_steps_per_second": 1.182, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 0.4404006898403168, | |
| "eval_runtime": 10.8495, | |
| "eval_samples_per_second": 921.704, | |
| "eval_steps_per_second": 1.198, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 0.41056767106056213, | |
| "eval_runtime": 11.1601, | |
| "eval_samples_per_second": 896.052, | |
| "eval_steps_per_second": 1.165, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 0.41203612089157104, | |
| "eval_runtime": 10.8638, | |
| "eval_samples_per_second": 920.488, | |
| "eval_steps_per_second": 1.197, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 0.39367642998695374, | |
| "eval_runtime": 10.9996, | |
| "eval_samples_per_second": 909.125, | |
| "eval_steps_per_second": 1.182, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 0.38801082968711853, | |
| "eval_runtime": 10.8728, | |
| "eval_samples_per_second": 919.724, | |
| "eval_steps_per_second": 1.196, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 0.3695450723171234, | |
| "eval_runtime": 11.0081, | |
| "eval_samples_per_second": 908.423, | |
| "eval_steps_per_second": 1.181, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 0.36823761463165283, | |
| "eval_runtime": 10.8803, | |
| "eval_samples_per_second": 919.096, | |
| "eval_steps_per_second": 1.195, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 0.36352187395095825, | |
| "eval_runtime": 11.0188, | |
| "eval_samples_per_second": 907.538, | |
| "eval_steps_per_second": 1.18, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 0.3410819172859192, | |
| "eval_runtime": 10.8644, | |
| "eval_samples_per_second": 920.439, | |
| "eval_steps_per_second": 1.197, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 0.34205111861228943, | |
| "eval_runtime": 11.1579, | |
| "eval_samples_per_second": 896.228, | |
| "eval_steps_per_second": 1.165, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_loss": 0.32984980940818787, | |
| "eval_runtime": 10.8559, | |
| "eval_samples_per_second": 921.16, | |
| "eval_steps_per_second": 1.198, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 0.32862576842308044, | |
| "eval_runtime": 10.9935, | |
| "eval_samples_per_second": 909.631, | |
| "eval_steps_per_second": 1.183, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_loss": 0.31809937953948975, | |
| "eval_runtime": 10.8616, | |
| "eval_samples_per_second": 920.672, | |
| "eval_steps_per_second": 1.197, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_loss": 0.3181401193141937, | |
| "eval_runtime": 10.9831, | |
| "eval_samples_per_second": 910.489, | |
| "eval_steps_per_second": 1.184, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_loss": 0.31942903995513916, | |
| "eval_runtime": 10.8766, | |
| "eval_samples_per_second": 919.404, | |
| "eval_steps_per_second": 1.195, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_loss": 0.29720762372016907, | |
| "eval_runtime": 11.0007, | |
| "eval_samples_per_second": 909.037, | |
| "eval_steps_per_second": 1.182, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_loss": 0.28963199257850647, | |
| "eval_runtime": 10.8682, | |
| "eval_samples_per_second": 920.116, | |
| "eval_steps_per_second": 1.196, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_loss": 0.28118211030960083, | |
| "eval_runtime": 11.1702, | |
| "eval_samples_per_second": 895.242, | |
| "eval_steps_per_second": 1.164, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_loss": 0.2807424068450928, | |
| "eval_runtime": 10.8728, | |
| "eval_samples_per_second": 919.723, | |
| "eval_steps_per_second": 1.196, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_loss": 0.2821776568889618, | |
| "eval_runtime": 11.013, | |
| "eval_samples_per_second": 908.02, | |
| "eval_steps_per_second": 1.18, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_loss": 0.2738954722881317, | |
| "eval_runtime": 10.882, | |
| "eval_samples_per_second": 918.95, | |
| "eval_steps_per_second": 1.195, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_loss": 0.259623646736145, | |
| "eval_runtime": 10.9964, | |
| "eval_samples_per_second": 909.385, | |
| "eval_steps_per_second": 1.182, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_loss": 0.26454034447669983, | |
| "eval_runtime": 10.8812, | |
| "eval_samples_per_second": 919.014, | |
| "eval_steps_per_second": 1.195, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_loss": 0.2502776086330414, | |
| "eval_runtime": 11.0146, | |
| "eval_samples_per_second": 907.884, | |
| "eval_steps_per_second": 1.18, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_loss": 0.24190129339694977, | |
| "eval_runtime": 10.8776, | |
| "eval_samples_per_second": 919.32, | |
| "eval_steps_per_second": 1.195, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_loss": 0.2520209848880768, | |
| "eval_runtime": 11.1696, | |
| "eval_samples_per_second": 895.288, | |
| "eval_steps_per_second": 1.164, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_loss": 0.24023157358169556, | |
| "eval_runtime": 10.8767, | |
| "eval_samples_per_second": 919.398, | |
| "eval_steps_per_second": 1.195, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_loss": 0.2362491935491562, | |
| "eval_runtime": 11.0127, | |
| "eval_samples_per_second": 908.04, | |
| "eval_steps_per_second": 1.18, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_loss": 0.23966462910175323, | |
| "eval_runtime": 10.8781, | |
| "eval_samples_per_second": 919.276, | |
| "eval_steps_per_second": 1.195, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_loss": 0.2406124770641327, | |
| "eval_runtime": 11.0206, | |
| "eval_samples_per_second": 907.394, | |
| "eval_steps_per_second": 1.18, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_loss": 0.22616925835609436, | |
| "eval_runtime": 10.8781, | |
| "eval_samples_per_second": 919.274, | |
| "eval_steps_per_second": 1.195, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_loss": 0.2212550789117813, | |
| "eval_runtime": 11.0106, | |
| "eval_samples_per_second": 908.219, | |
| "eval_steps_per_second": 1.181, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_loss": 0.2343885451555252, | |
| "eval_runtime": 10.8799, | |
| "eval_samples_per_second": 919.125, | |
| "eval_steps_per_second": 1.195, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_loss": 0.2180890589952469, | |
| "eval_runtime": 11.175, | |
| "eval_samples_per_second": 894.855, | |
| "eval_steps_per_second": 1.163, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_loss": 0.21395854651927948, | |
| "eval_runtime": 10.8684, | |
| "eval_samples_per_second": 920.101, | |
| "eval_steps_per_second": 1.196, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_loss": 0.21711787581443787, | |
| "eval_runtime": 11.005, | |
| "eval_samples_per_second": 908.68, | |
| "eval_steps_per_second": 1.181, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_loss": 0.21878717839717865, | |
| "eval_runtime": 10.8803, | |
| "eval_samples_per_second": 919.091, | |
| "eval_steps_per_second": 1.195, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_loss": 0.21271127462387085, | |
| "eval_runtime": 11.1791, | |
| "eval_samples_per_second": 894.529, | |
| "eval_steps_per_second": 1.163, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "eval_loss": 0.20574086904525757, | |
| "eval_runtime": 10.8704, | |
| "eval_samples_per_second": 919.927, | |
| "eval_steps_per_second": 1.196, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_loss": 0.20641738176345825, | |
| "eval_runtime": 11.0014, | |
| "eval_samples_per_second": 908.978, | |
| "eval_steps_per_second": 1.182, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "eval_loss": 0.20721706748008728, | |
| "eval_runtime": 10.8792, | |
| "eval_samples_per_second": 919.189, | |
| "eval_steps_per_second": 1.195, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_loss": 0.20023952424526215, | |
| "eval_runtime": 11.174, | |
| "eval_samples_per_second": 894.931, | |
| "eval_steps_per_second": 1.163, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "eval_loss": 0.20403145253658295, | |
| "eval_runtime": 10.8566, | |
| "eval_samples_per_second": 921.097, | |
| "eval_steps_per_second": 1.197, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_loss": 0.19536912441253662, | |
| "eval_runtime": 11.0057, | |
| "eval_samples_per_second": 908.621, | |
| "eval_steps_per_second": 1.181, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "eval_loss": 0.19563594460487366, | |
| "eval_runtime": 10.8635, | |
| "eval_samples_per_second": 920.516, | |
| "eval_steps_per_second": 1.197, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_loss": 0.1962701380252838, | |
| "eval_runtime": 11.1829, | |
| "eval_samples_per_second": 894.219, | |
| "eval_steps_per_second": 1.162, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_loss": 0.19198498129844666, | |
| "eval_runtime": 10.8644, | |
| "eval_samples_per_second": 920.438, | |
| "eval_steps_per_second": 1.197, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_loss": 0.18079973757266998, | |
| "eval_runtime": 11.0069, | |
| "eval_samples_per_second": 908.518, | |
| "eval_steps_per_second": 1.181, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "eval_loss": 0.18332232534885406, | |
| "eval_runtime": 10.8634, | |
| "eval_samples_per_second": 920.526, | |
| "eval_steps_per_second": 1.197, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_loss": 0.19687600433826447, | |
| "eval_runtime": 11.1808, | |
| "eval_samples_per_second": 894.389, | |
| "eval_steps_per_second": 1.163, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "eval_loss": 0.20110972225666046, | |
| "eval_runtime": 10.8709, | |
| "eval_samples_per_second": 919.884, | |
| "eval_steps_per_second": 1.196, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_loss": 0.18666134774684906, | |
| "eval_runtime": 11.0027, | |
| "eval_samples_per_second": 908.869, | |
| "eval_steps_per_second": 1.182, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "eval_loss": 0.1773829162120819, | |
| "eval_runtime": 10.8627, | |
| "eval_samples_per_second": 920.586, | |
| "eval_steps_per_second": 1.197, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_loss": 0.18139097094535828, | |
| "eval_runtime": 11.158, | |
| "eval_samples_per_second": 896.215, | |
| "eval_steps_per_second": 1.165, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "eval_loss": 0.18620698153972626, | |
| "eval_runtime": 10.8654, | |
| "eval_samples_per_second": 920.355, | |
| "eval_steps_per_second": 1.196, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_loss": 0.1856929063796997, | |
| "eval_runtime": 10.9909, | |
| "eval_samples_per_second": 909.844, | |
| "eval_steps_per_second": 1.183, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "eval_loss": 0.17794571816921234, | |
| "eval_runtime": 10.8741, | |
| "eval_samples_per_second": 919.613, | |
| "eval_steps_per_second": 1.195, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_loss": 0.17274315655231476, | |
| "eval_runtime": 11.1798, | |
| "eval_samples_per_second": 894.47, | |
| "eval_steps_per_second": 1.163, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "eval_loss": 0.17167899012565613, | |
| "eval_runtime": 10.8508, | |
| "eval_samples_per_second": 921.589, | |
| "eval_steps_per_second": 1.198, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_loss": 0.17758916318416595, | |
| "eval_runtime": 11.019, | |
| "eval_samples_per_second": 907.523, | |
| "eval_steps_per_second": 1.18, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "eval_loss": 0.18558244407176971, | |
| "eval_runtime": 10.8574, | |
| "eval_samples_per_second": 921.033, | |
| "eval_steps_per_second": 1.197, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_loss": 0.18669435381889343, | |
| "eval_runtime": 11.1461, | |
| "eval_samples_per_second": 897.177, | |
| "eval_steps_per_second": 1.166, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 101.0, | |
| "eval_loss": 0.1823727786540985, | |
| "eval_runtime": 10.8583, | |
| "eval_samples_per_second": 920.956, | |
| "eval_steps_per_second": 1.197, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 102.0, | |
| "eval_loss": 0.17717821896076202, | |
| "eval_runtime": 11.0086, | |
| "eval_samples_per_second": 908.379, | |
| "eval_steps_per_second": 1.181, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 103.0, | |
| "eval_loss": 0.17209963500499725, | |
| "eval_runtime": 10.8863, | |
| "eval_samples_per_second": 918.589, | |
| "eval_steps_per_second": 1.194, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "eval_loss": 0.1696111112833023, | |
| "eval_runtime": 11.178, | |
| "eval_samples_per_second": 894.611, | |
| "eval_steps_per_second": 1.163, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 105.0, | |
| "eval_loss": 0.16957539319992065, | |
| "eval_runtime": 10.8672, | |
| "eval_samples_per_second": 920.2, | |
| "eval_steps_per_second": 1.196, | |
| "step": 420 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 440, | |
| "num_train_epochs": 110, | |
| "save_steps": 500, | |
| "total_flos": 9043941150720000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |