byt5_3k / trainer_state.json
Alexziyu's picture
11
7dcfdef
{
"best_metric": 0.16957539319992065,
"best_model_checkpoint": "AlexWang99/byt5_add_3k/checkpoint-420",
"epoch": 105.0,
"eval_steps": 500,
"global_step": 420,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 1.3957098722457886,
"eval_runtime": 10.6898,
"eval_samples_per_second": 935.474,
"eval_steps_per_second": 1.216,
"step": 4
},
{
"epoch": 2.0,
"eval_loss": 1.4048140048980713,
"eval_runtime": 10.8698,
"eval_samples_per_second": 919.98,
"eval_steps_per_second": 1.196,
"step": 8
},
{
"epoch": 3.0,
"eval_loss": 1.3850985765457153,
"eval_runtime": 10.7938,
"eval_samples_per_second": 926.459,
"eval_steps_per_second": 1.204,
"step": 12
},
{
"epoch": 4.0,
"eval_loss": 1.3645013570785522,
"eval_runtime": 10.9791,
"eval_samples_per_second": 910.819,
"eval_steps_per_second": 1.184,
"step": 16
},
{
"epoch": 5.0,
"eval_loss": 1.3509438037872314,
"eval_runtime": 10.86,
"eval_samples_per_second": 920.81,
"eval_steps_per_second": 1.197,
"step": 20
},
{
"epoch": 6.0,
"eval_loss": 1.335862398147583,
"eval_runtime": 11.0208,
"eval_samples_per_second": 907.379,
"eval_steps_per_second": 1.18,
"step": 24
},
{
"epoch": 7.0,
"eval_loss": 1.304105281829834,
"eval_runtime": 10.8828,
"eval_samples_per_second": 918.882,
"eval_steps_per_second": 1.195,
"step": 28
},
{
"epoch": 8.0,
"eval_loss": 1.2957689762115479,
"eval_runtime": 11.1512,
"eval_samples_per_second": 896.761,
"eval_steps_per_second": 1.166,
"step": 32
},
{
"epoch": 9.0,
"eval_loss": 1.2636315822601318,
"eval_runtime": 10.8581,
"eval_samples_per_second": 920.974,
"eval_steps_per_second": 1.197,
"step": 36
},
{
"epoch": 10.0,
"eval_loss": 1.2440863847732544,
"eval_runtime": 10.9972,
"eval_samples_per_second": 909.325,
"eval_steps_per_second": 1.182,
"step": 40
},
{
"epoch": 11.0,
"eval_loss": 1.2331980466842651,
"eval_runtime": 10.8669,
"eval_samples_per_second": 920.225,
"eval_steps_per_second": 1.196,
"step": 44
},
{
"epoch": 12.0,
"eval_loss": 1.20121431350708,
"eval_runtime": 10.9961,
"eval_samples_per_second": 909.41,
"eval_steps_per_second": 1.182,
"step": 48
},
{
"epoch": 13.0,
"eval_loss": 1.1870373487472534,
"eval_runtime": 10.8735,
"eval_samples_per_second": 919.667,
"eval_steps_per_second": 1.196,
"step": 52
},
{
"epoch": 14.0,
"eval_loss": 1.1519101858139038,
"eval_runtime": 10.9927,
"eval_samples_per_second": 909.695,
"eval_steps_per_second": 1.183,
"step": 56
},
{
"epoch": 15.0,
"eval_loss": 1.1413601636886597,
"eval_runtime": 10.8635,
"eval_samples_per_second": 920.514,
"eval_steps_per_second": 1.197,
"step": 60
},
{
"epoch": 16.0,
"eval_loss": 1.0864189863204956,
"eval_runtime": 11.1699,
"eval_samples_per_second": 895.261,
"eval_steps_per_second": 1.164,
"step": 64
},
{
"epoch": 17.0,
"eval_loss": 1.0611509084701538,
"eval_runtime": 10.8685,
"eval_samples_per_second": 920.087,
"eval_steps_per_second": 1.196,
"step": 68
},
{
"epoch": 18.0,
"eval_loss": 1.0090259313583374,
"eval_runtime": 10.9973,
"eval_samples_per_second": 909.312,
"eval_steps_per_second": 1.182,
"step": 72
},
{
"epoch": 19.0,
"eval_loss": 0.9998962879180908,
"eval_runtime": 10.8691,
"eval_samples_per_second": 920.04,
"eval_steps_per_second": 1.196,
"step": 76
},
{
"epoch": 20.0,
"eval_loss": 0.952064573764801,
"eval_runtime": 11.0179,
"eval_samples_per_second": 907.614,
"eval_steps_per_second": 1.18,
"step": 80
},
{
"epoch": 21.0,
"eval_loss": 0.9199039340019226,
"eval_runtime": 10.8623,
"eval_samples_per_second": 920.615,
"eval_steps_per_second": 1.197,
"step": 84
},
{
"epoch": 22.0,
"eval_loss": 0.8742589950561523,
"eval_runtime": 11.0074,
"eval_samples_per_second": 908.483,
"eval_steps_per_second": 1.181,
"step": 88
},
{
"epoch": 23.0,
"eval_loss": 0.8637756109237671,
"eval_runtime": 10.8638,
"eval_samples_per_second": 920.485,
"eval_steps_per_second": 1.197,
"step": 92
},
{
"epoch": 24.0,
"eval_loss": 0.8123971819877625,
"eval_runtime": 11.1682,
"eval_samples_per_second": 895.401,
"eval_steps_per_second": 1.164,
"step": 96
},
{
"epoch": 25.0,
"eval_loss": 0.7750455737113953,
"eval_runtime": 10.8579,
"eval_samples_per_second": 920.993,
"eval_steps_per_second": 1.197,
"step": 100
},
{
"epoch": 26.0,
"eval_loss": 0.7488656640052795,
"eval_runtime": 11.0064,
"eval_samples_per_second": 908.566,
"eval_steps_per_second": 1.181,
"step": 104
},
{
"epoch": 27.0,
"eval_loss": 0.7134984135627747,
"eval_runtime": 10.8755,
"eval_samples_per_second": 919.497,
"eval_steps_per_second": 1.195,
"step": 108
},
{
"epoch": 28.0,
"eval_loss": 0.6777770519256592,
"eval_runtime": 10.9907,
"eval_samples_per_second": 909.862,
"eval_steps_per_second": 1.183,
"step": 112
},
{
"epoch": 29.0,
"eval_loss": 0.6627815365791321,
"eval_runtime": 10.8722,
"eval_samples_per_second": 919.775,
"eval_steps_per_second": 1.196,
"step": 116
},
{
"epoch": 30.0,
"eval_loss": 0.6153420209884644,
"eval_runtime": 10.9986,
"eval_samples_per_second": 909.21,
"eval_steps_per_second": 1.182,
"step": 120
},
{
"epoch": 31.0,
"eval_loss": 0.6009132862091064,
"eval_runtime": 10.8581,
"eval_samples_per_second": 920.97,
"eval_steps_per_second": 1.197,
"step": 124
},
{
"epoch": 32.0,
"eval_loss": 0.5706290006637573,
"eval_runtime": 11.1587,
"eval_samples_per_second": 896.164,
"eval_steps_per_second": 1.165,
"step": 128
},
{
"epoch": 33.0,
"eval_loss": 0.5482128262519836,
"eval_runtime": 10.8592,
"eval_samples_per_second": 920.875,
"eval_steps_per_second": 1.197,
"step": 132
},
{
"epoch": 34.0,
"eval_loss": 0.5287255644798279,
"eval_runtime": 11.0008,
"eval_samples_per_second": 909.021,
"eval_steps_per_second": 1.182,
"step": 136
},
{
"epoch": 35.0,
"eval_loss": 0.4995749592781067,
"eval_runtime": 10.8655,
"eval_samples_per_second": 920.343,
"eval_steps_per_second": 1.196,
"step": 140
},
{
"epoch": 36.0,
"eval_loss": 0.4935281276702881,
"eval_runtime": 10.9993,
"eval_samples_per_second": 909.152,
"eval_steps_per_second": 1.182,
"step": 144
},
{
"epoch": 37.0,
"eval_loss": 0.4704650938510895,
"eval_runtime": 10.8728,
"eval_samples_per_second": 919.725,
"eval_steps_per_second": 1.196,
"step": 148
},
{
"epoch": 38.0,
"eval_loss": 0.46444249153137207,
"eval_runtime": 10.9963,
"eval_samples_per_second": 909.398,
"eval_steps_per_second": 1.182,
"step": 152
},
{
"epoch": 39.0,
"eval_loss": 0.4404006898403168,
"eval_runtime": 10.8495,
"eval_samples_per_second": 921.704,
"eval_steps_per_second": 1.198,
"step": 156
},
{
"epoch": 40.0,
"eval_loss": 0.41056767106056213,
"eval_runtime": 11.1601,
"eval_samples_per_second": 896.052,
"eval_steps_per_second": 1.165,
"step": 160
},
{
"epoch": 41.0,
"eval_loss": 0.41203612089157104,
"eval_runtime": 10.8638,
"eval_samples_per_second": 920.488,
"eval_steps_per_second": 1.197,
"step": 164
},
{
"epoch": 42.0,
"eval_loss": 0.39367642998695374,
"eval_runtime": 10.9996,
"eval_samples_per_second": 909.125,
"eval_steps_per_second": 1.182,
"step": 168
},
{
"epoch": 43.0,
"eval_loss": 0.38801082968711853,
"eval_runtime": 10.8728,
"eval_samples_per_second": 919.724,
"eval_steps_per_second": 1.196,
"step": 172
},
{
"epoch": 44.0,
"eval_loss": 0.3695450723171234,
"eval_runtime": 11.0081,
"eval_samples_per_second": 908.423,
"eval_steps_per_second": 1.181,
"step": 176
},
{
"epoch": 45.0,
"eval_loss": 0.36823761463165283,
"eval_runtime": 10.8803,
"eval_samples_per_second": 919.096,
"eval_steps_per_second": 1.195,
"step": 180
},
{
"epoch": 46.0,
"eval_loss": 0.36352187395095825,
"eval_runtime": 11.0188,
"eval_samples_per_second": 907.538,
"eval_steps_per_second": 1.18,
"step": 184
},
{
"epoch": 47.0,
"eval_loss": 0.3410819172859192,
"eval_runtime": 10.8644,
"eval_samples_per_second": 920.439,
"eval_steps_per_second": 1.197,
"step": 188
},
{
"epoch": 48.0,
"eval_loss": 0.34205111861228943,
"eval_runtime": 11.1579,
"eval_samples_per_second": 896.228,
"eval_steps_per_second": 1.165,
"step": 192
},
{
"epoch": 49.0,
"eval_loss": 0.32984980940818787,
"eval_runtime": 10.8559,
"eval_samples_per_second": 921.16,
"eval_steps_per_second": 1.198,
"step": 196
},
{
"epoch": 50.0,
"eval_loss": 0.32862576842308044,
"eval_runtime": 10.9935,
"eval_samples_per_second": 909.631,
"eval_steps_per_second": 1.183,
"step": 200
},
{
"epoch": 51.0,
"eval_loss": 0.31809937953948975,
"eval_runtime": 10.8616,
"eval_samples_per_second": 920.672,
"eval_steps_per_second": 1.197,
"step": 204
},
{
"epoch": 52.0,
"eval_loss": 0.3181401193141937,
"eval_runtime": 10.9831,
"eval_samples_per_second": 910.489,
"eval_steps_per_second": 1.184,
"step": 208
},
{
"epoch": 53.0,
"eval_loss": 0.31942903995513916,
"eval_runtime": 10.8766,
"eval_samples_per_second": 919.404,
"eval_steps_per_second": 1.195,
"step": 212
},
{
"epoch": 54.0,
"eval_loss": 0.29720762372016907,
"eval_runtime": 11.0007,
"eval_samples_per_second": 909.037,
"eval_steps_per_second": 1.182,
"step": 216
},
{
"epoch": 55.0,
"eval_loss": 0.28963199257850647,
"eval_runtime": 10.8682,
"eval_samples_per_second": 920.116,
"eval_steps_per_second": 1.196,
"step": 220
},
{
"epoch": 56.0,
"eval_loss": 0.28118211030960083,
"eval_runtime": 11.1702,
"eval_samples_per_second": 895.242,
"eval_steps_per_second": 1.164,
"step": 224
},
{
"epoch": 57.0,
"eval_loss": 0.2807424068450928,
"eval_runtime": 10.8728,
"eval_samples_per_second": 919.723,
"eval_steps_per_second": 1.196,
"step": 228
},
{
"epoch": 58.0,
"eval_loss": 0.2821776568889618,
"eval_runtime": 11.013,
"eval_samples_per_second": 908.02,
"eval_steps_per_second": 1.18,
"step": 232
},
{
"epoch": 59.0,
"eval_loss": 0.2738954722881317,
"eval_runtime": 10.882,
"eval_samples_per_second": 918.95,
"eval_steps_per_second": 1.195,
"step": 236
},
{
"epoch": 60.0,
"eval_loss": 0.259623646736145,
"eval_runtime": 10.9964,
"eval_samples_per_second": 909.385,
"eval_steps_per_second": 1.182,
"step": 240
},
{
"epoch": 61.0,
"eval_loss": 0.26454034447669983,
"eval_runtime": 10.8812,
"eval_samples_per_second": 919.014,
"eval_steps_per_second": 1.195,
"step": 244
},
{
"epoch": 62.0,
"eval_loss": 0.2502776086330414,
"eval_runtime": 11.0146,
"eval_samples_per_second": 907.884,
"eval_steps_per_second": 1.18,
"step": 248
},
{
"epoch": 63.0,
"eval_loss": 0.24190129339694977,
"eval_runtime": 10.8776,
"eval_samples_per_second": 919.32,
"eval_steps_per_second": 1.195,
"step": 252
},
{
"epoch": 64.0,
"eval_loss": 0.2520209848880768,
"eval_runtime": 11.1696,
"eval_samples_per_second": 895.288,
"eval_steps_per_second": 1.164,
"step": 256
},
{
"epoch": 65.0,
"eval_loss": 0.24023157358169556,
"eval_runtime": 10.8767,
"eval_samples_per_second": 919.398,
"eval_steps_per_second": 1.195,
"step": 260
},
{
"epoch": 66.0,
"eval_loss": 0.2362491935491562,
"eval_runtime": 11.0127,
"eval_samples_per_second": 908.04,
"eval_steps_per_second": 1.18,
"step": 264
},
{
"epoch": 67.0,
"eval_loss": 0.23966462910175323,
"eval_runtime": 10.8781,
"eval_samples_per_second": 919.276,
"eval_steps_per_second": 1.195,
"step": 268
},
{
"epoch": 68.0,
"eval_loss": 0.2406124770641327,
"eval_runtime": 11.0206,
"eval_samples_per_second": 907.394,
"eval_steps_per_second": 1.18,
"step": 272
},
{
"epoch": 69.0,
"eval_loss": 0.22616925835609436,
"eval_runtime": 10.8781,
"eval_samples_per_second": 919.274,
"eval_steps_per_second": 1.195,
"step": 276
},
{
"epoch": 70.0,
"eval_loss": 0.2212550789117813,
"eval_runtime": 11.0106,
"eval_samples_per_second": 908.219,
"eval_steps_per_second": 1.181,
"step": 280
},
{
"epoch": 71.0,
"eval_loss": 0.2343885451555252,
"eval_runtime": 10.8799,
"eval_samples_per_second": 919.125,
"eval_steps_per_second": 1.195,
"step": 284
},
{
"epoch": 72.0,
"eval_loss": 0.2180890589952469,
"eval_runtime": 11.175,
"eval_samples_per_second": 894.855,
"eval_steps_per_second": 1.163,
"step": 288
},
{
"epoch": 73.0,
"eval_loss": 0.21395854651927948,
"eval_runtime": 10.8684,
"eval_samples_per_second": 920.101,
"eval_steps_per_second": 1.196,
"step": 292
},
{
"epoch": 74.0,
"eval_loss": 0.21711787581443787,
"eval_runtime": 11.005,
"eval_samples_per_second": 908.68,
"eval_steps_per_second": 1.181,
"step": 296
},
{
"epoch": 75.0,
"eval_loss": 0.21878717839717865,
"eval_runtime": 10.8803,
"eval_samples_per_second": 919.091,
"eval_steps_per_second": 1.195,
"step": 300
},
{
"epoch": 76.0,
"eval_loss": 0.21271127462387085,
"eval_runtime": 11.1791,
"eval_samples_per_second": 894.529,
"eval_steps_per_second": 1.163,
"step": 304
},
{
"epoch": 77.0,
"eval_loss": 0.20574086904525757,
"eval_runtime": 10.8704,
"eval_samples_per_second": 919.927,
"eval_steps_per_second": 1.196,
"step": 308
},
{
"epoch": 78.0,
"eval_loss": 0.20641738176345825,
"eval_runtime": 11.0014,
"eval_samples_per_second": 908.978,
"eval_steps_per_second": 1.182,
"step": 312
},
{
"epoch": 79.0,
"eval_loss": 0.20721706748008728,
"eval_runtime": 10.8792,
"eval_samples_per_second": 919.189,
"eval_steps_per_second": 1.195,
"step": 316
},
{
"epoch": 80.0,
"eval_loss": 0.20023952424526215,
"eval_runtime": 11.174,
"eval_samples_per_second": 894.931,
"eval_steps_per_second": 1.163,
"step": 320
},
{
"epoch": 81.0,
"eval_loss": 0.20403145253658295,
"eval_runtime": 10.8566,
"eval_samples_per_second": 921.097,
"eval_steps_per_second": 1.197,
"step": 324
},
{
"epoch": 82.0,
"eval_loss": 0.19536912441253662,
"eval_runtime": 11.0057,
"eval_samples_per_second": 908.621,
"eval_steps_per_second": 1.181,
"step": 328
},
{
"epoch": 83.0,
"eval_loss": 0.19563594460487366,
"eval_runtime": 10.8635,
"eval_samples_per_second": 920.516,
"eval_steps_per_second": 1.197,
"step": 332
},
{
"epoch": 84.0,
"eval_loss": 0.1962701380252838,
"eval_runtime": 11.1829,
"eval_samples_per_second": 894.219,
"eval_steps_per_second": 1.162,
"step": 336
},
{
"epoch": 85.0,
"eval_loss": 0.19198498129844666,
"eval_runtime": 10.8644,
"eval_samples_per_second": 920.438,
"eval_steps_per_second": 1.197,
"step": 340
},
{
"epoch": 86.0,
"eval_loss": 0.18079973757266998,
"eval_runtime": 11.0069,
"eval_samples_per_second": 908.518,
"eval_steps_per_second": 1.181,
"step": 344
},
{
"epoch": 87.0,
"eval_loss": 0.18332232534885406,
"eval_runtime": 10.8634,
"eval_samples_per_second": 920.526,
"eval_steps_per_second": 1.197,
"step": 348
},
{
"epoch": 88.0,
"eval_loss": 0.19687600433826447,
"eval_runtime": 11.1808,
"eval_samples_per_second": 894.389,
"eval_steps_per_second": 1.163,
"step": 352
},
{
"epoch": 89.0,
"eval_loss": 0.20110972225666046,
"eval_runtime": 10.8709,
"eval_samples_per_second": 919.884,
"eval_steps_per_second": 1.196,
"step": 356
},
{
"epoch": 90.0,
"eval_loss": 0.18666134774684906,
"eval_runtime": 11.0027,
"eval_samples_per_second": 908.869,
"eval_steps_per_second": 1.182,
"step": 360
},
{
"epoch": 91.0,
"eval_loss": 0.1773829162120819,
"eval_runtime": 10.8627,
"eval_samples_per_second": 920.586,
"eval_steps_per_second": 1.197,
"step": 364
},
{
"epoch": 92.0,
"eval_loss": 0.18139097094535828,
"eval_runtime": 11.158,
"eval_samples_per_second": 896.215,
"eval_steps_per_second": 1.165,
"step": 368
},
{
"epoch": 93.0,
"eval_loss": 0.18620698153972626,
"eval_runtime": 10.8654,
"eval_samples_per_second": 920.355,
"eval_steps_per_second": 1.196,
"step": 372
},
{
"epoch": 94.0,
"eval_loss": 0.1856929063796997,
"eval_runtime": 10.9909,
"eval_samples_per_second": 909.844,
"eval_steps_per_second": 1.183,
"step": 376
},
{
"epoch": 95.0,
"eval_loss": 0.17794571816921234,
"eval_runtime": 10.8741,
"eval_samples_per_second": 919.613,
"eval_steps_per_second": 1.195,
"step": 380
},
{
"epoch": 96.0,
"eval_loss": 0.17274315655231476,
"eval_runtime": 11.1798,
"eval_samples_per_second": 894.47,
"eval_steps_per_second": 1.163,
"step": 384
},
{
"epoch": 97.0,
"eval_loss": 0.17167899012565613,
"eval_runtime": 10.8508,
"eval_samples_per_second": 921.589,
"eval_steps_per_second": 1.198,
"step": 388
},
{
"epoch": 98.0,
"eval_loss": 0.17758916318416595,
"eval_runtime": 11.019,
"eval_samples_per_second": 907.523,
"eval_steps_per_second": 1.18,
"step": 392
},
{
"epoch": 99.0,
"eval_loss": 0.18558244407176971,
"eval_runtime": 10.8574,
"eval_samples_per_second": 921.033,
"eval_steps_per_second": 1.197,
"step": 396
},
{
"epoch": 100.0,
"eval_loss": 0.18669435381889343,
"eval_runtime": 11.1461,
"eval_samples_per_second": 897.177,
"eval_steps_per_second": 1.166,
"step": 400
},
{
"epoch": 101.0,
"eval_loss": 0.1823727786540985,
"eval_runtime": 10.8583,
"eval_samples_per_second": 920.956,
"eval_steps_per_second": 1.197,
"step": 404
},
{
"epoch": 102.0,
"eval_loss": 0.17717821896076202,
"eval_runtime": 11.0086,
"eval_samples_per_second": 908.379,
"eval_steps_per_second": 1.181,
"step": 408
},
{
"epoch": 103.0,
"eval_loss": 0.17209963500499725,
"eval_runtime": 10.8863,
"eval_samples_per_second": 918.589,
"eval_steps_per_second": 1.194,
"step": 412
},
{
"epoch": 104.0,
"eval_loss": 0.1696111112833023,
"eval_runtime": 11.178,
"eval_samples_per_second": 894.611,
"eval_steps_per_second": 1.163,
"step": 416
},
{
"epoch": 105.0,
"eval_loss": 0.16957539319992065,
"eval_runtime": 10.8672,
"eval_samples_per_second": 920.2,
"eval_steps_per_second": 1.196,
"step": 420
}
],
"logging_steps": 500,
"max_steps": 440,
"num_train_epochs": 110,
"save_steps": 500,
"total_flos": 9043941150720000.0,
"trial_name": null,
"trial_params": null
}