byt5_5k / trainer_state.json
Alexziyu's picture
11
5ed0662
{
"best_metric": 0.15095455944538116,
"best_model_checkpoint": "AlexWang99/byt5_add_5k/checkpoint-329",
"epoch": 47.0,
"eval_steps": 500,
"global_step": 329,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 1.0333173274993896,
"eval_runtime": 10.7395,
"eval_samples_per_second": 931.141,
"eval_steps_per_second": 1.21,
"step": 7
},
{
"epoch": 2.0,
"eval_loss": 0.9484947323799133,
"eval_runtime": 10.7979,
"eval_samples_per_second": 926.109,
"eval_steps_per_second": 1.204,
"step": 14
},
{
"epoch": 3.0,
"eval_loss": 0.8662577271461487,
"eval_runtime": 10.9139,
"eval_samples_per_second": 916.266,
"eval_steps_per_second": 1.191,
"step": 21
},
{
"epoch": 4.0,
"eval_loss": 0.816236138343811,
"eval_runtime": 10.8703,
"eval_samples_per_second": 919.938,
"eval_steps_per_second": 1.196,
"step": 28
},
{
"epoch": 5.0,
"eval_loss": 0.7725976705551147,
"eval_runtime": 11.0507,
"eval_samples_per_second": 904.923,
"eval_steps_per_second": 1.176,
"step": 35
},
{
"epoch": 6.0,
"eval_loss": 0.7079625725746155,
"eval_runtime": 10.9553,
"eval_samples_per_second": 912.803,
"eval_steps_per_second": 1.187,
"step": 42
},
{
"epoch": 7.0,
"eval_loss": 0.6394063234329224,
"eval_runtime": 10.8958,
"eval_samples_per_second": 917.786,
"eval_steps_per_second": 1.193,
"step": 49
},
{
"epoch": 8.0,
"eval_loss": 0.5353133678436279,
"eval_runtime": 11.1585,
"eval_samples_per_second": 896.181,
"eval_steps_per_second": 1.165,
"step": 56
},
{
"epoch": 9.0,
"eval_loss": 0.501313328742981,
"eval_runtime": 10.9739,
"eval_samples_per_second": 911.249,
"eval_steps_per_second": 1.185,
"step": 63
},
{
"epoch": 10.0,
"eval_loss": 0.45894965529441833,
"eval_runtime": 10.8763,
"eval_samples_per_second": 919.43,
"eval_steps_per_second": 1.195,
"step": 70
},
{
"epoch": 11.0,
"eval_loss": 0.4472431540489197,
"eval_runtime": 10.9138,
"eval_samples_per_second": 916.274,
"eval_steps_per_second": 1.191,
"step": 77
},
{
"epoch": 12.0,
"eval_loss": 0.4227907359600067,
"eval_runtime": 10.9807,
"eval_samples_per_second": 910.685,
"eval_steps_per_second": 1.184,
"step": 84
},
{
"epoch": 13.0,
"eval_loss": 0.39944639801979065,
"eval_runtime": 11.1342,
"eval_samples_per_second": 898.131,
"eval_steps_per_second": 1.168,
"step": 91
},
{
"epoch": 14.0,
"eval_loss": 0.38267987966537476,
"eval_runtime": 10.9641,
"eval_samples_per_second": 912.068,
"eval_steps_per_second": 1.186,
"step": 98
},
{
"epoch": 15.0,
"eval_loss": 0.319256991147995,
"eval_runtime": 10.7912,
"eval_samples_per_second": 926.681,
"eval_steps_per_second": 1.205,
"step": 105
},
{
"epoch": 16.0,
"eval_loss": 0.3410109579563141,
"eval_runtime": 10.7915,
"eval_samples_per_second": 926.652,
"eval_steps_per_second": 1.205,
"step": 112
},
{
"epoch": 17.0,
"eval_loss": 0.32387295365333557,
"eval_runtime": 10.7997,
"eval_samples_per_second": 925.948,
"eval_steps_per_second": 1.204,
"step": 119
},
{
"epoch": 18.0,
"eval_loss": 0.2893741726875305,
"eval_runtime": 10.8261,
"eval_samples_per_second": 923.697,
"eval_steps_per_second": 1.201,
"step": 126
},
{
"epoch": 19.0,
"eval_loss": 0.26235708594322205,
"eval_runtime": 10.9401,
"eval_samples_per_second": 914.071,
"eval_steps_per_second": 1.188,
"step": 133
},
{
"epoch": 20.0,
"eval_loss": 0.26693689823150635,
"eval_runtime": 10.9641,
"eval_samples_per_second": 912.07,
"eval_steps_per_second": 1.186,
"step": 140
},
{
"epoch": 21.0,
"eval_loss": 0.2766812741756439,
"eval_runtime": 11.0072,
"eval_samples_per_second": 908.497,
"eval_steps_per_second": 1.181,
"step": 147
},
{
"epoch": 22.0,
"eval_loss": 0.24048474431037903,
"eval_runtime": 10.8863,
"eval_samples_per_second": 918.586,
"eval_steps_per_second": 1.194,
"step": 154
},
{
"epoch": 23.0,
"eval_loss": 0.23866714537143707,
"eval_runtime": 10.7952,
"eval_samples_per_second": 926.334,
"eval_steps_per_second": 1.204,
"step": 161
},
{
"epoch": 24.0,
"eval_loss": 0.24236781895160675,
"eval_runtime": 11.0325,
"eval_samples_per_second": 906.412,
"eval_steps_per_second": 1.178,
"step": 168
},
{
"epoch": 25.0,
"eval_loss": 0.2102736085653305,
"eval_runtime": 10.9008,
"eval_samples_per_second": 917.366,
"eval_steps_per_second": 1.193,
"step": 175
},
{
"epoch": 26.0,
"eval_loss": 0.22354699671268463,
"eval_runtime": 10.8227,
"eval_samples_per_second": 923.988,
"eval_steps_per_second": 1.201,
"step": 182
},
{
"epoch": 27.0,
"eval_loss": 0.21701256930828094,
"eval_runtime": 10.8903,
"eval_samples_per_second": 918.249,
"eval_steps_per_second": 1.194,
"step": 189
},
{
"epoch": 28.0,
"eval_loss": 0.20713335275650024,
"eval_runtime": 11.0977,
"eval_samples_per_second": 901.088,
"eval_steps_per_second": 1.171,
"step": 196
},
{
"epoch": 29.0,
"eval_loss": 0.19325299561023712,
"eval_runtime": 10.8557,
"eval_samples_per_second": 921.179,
"eval_steps_per_second": 1.198,
"step": 203
},
{
"epoch": 30.0,
"eval_loss": 0.19234418869018555,
"eval_runtime": 10.8944,
"eval_samples_per_second": 917.9,
"eval_steps_per_second": 1.193,
"step": 210
},
{
"epoch": 31.0,
"eval_loss": 0.19294323027133942,
"eval_runtime": 11.1467,
"eval_samples_per_second": 897.123,
"eval_steps_per_second": 1.166,
"step": 217
},
{
"epoch": 32.0,
"eval_loss": 0.18856723606586456,
"eval_runtime": 10.9076,
"eval_samples_per_second": 916.795,
"eval_steps_per_second": 1.192,
"step": 224
},
{
"epoch": 33.0,
"eval_loss": 0.1928592473268509,
"eval_runtime": 10.9684,
"eval_samples_per_second": 911.712,
"eval_steps_per_second": 1.185,
"step": 231
},
{
"epoch": 34.0,
"eval_loss": 0.17712894082069397,
"eval_runtime": 10.9664,
"eval_samples_per_second": 911.873,
"eval_steps_per_second": 1.185,
"step": 238
},
{
"epoch": 35.0,
"eval_loss": 0.1747332066297531,
"eval_runtime": 10.9577,
"eval_samples_per_second": 912.602,
"eval_steps_per_second": 1.186,
"step": 245
},
{
"epoch": 36.0,
"eval_loss": 0.1693752259016037,
"eval_runtime": 10.8762,
"eval_samples_per_second": 919.44,
"eval_steps_per_second": 1.195,
"step": 252
},
{
"epoch": 37.0,
"eval_loss": 0.17445072531700134,
"eval_runtime": 10.872,
"eval_samples_per_second": 919.793,
"eval_steps_per_second": 1.196,
"step": 259
},
{
"epoch": 38.0,
"eval_loss": 0.1689160168170929,
"eval_runtime": 11.1193,
"eval_samples_per_second": 899.339,
"eval_steps_per_second": 1.169,
"step": 266
},
{
"epoch": 39.0,
"eval_loss": 0.16354702413082123,
"eval_runtime": 11.1231,
"eval_samples_per_second": 899.033,
"eval_steps_per_second": 1.169,
"step": 273
},
{
"epoch": 40.0,
"eval_loss": 0.1702088564634323,
"eval_runtime": 11.117,
"eval_samples_per_second": 899.521,
"eval_steps_per_second": 1.169,
"step": 280
},
{
"epoch": 41.0,
"eval_loss": 0.16428792476654053,
"eval_runtime": 10.9632,
"eval_samples_per_second": 912.145,
"eval_steps_per_second": 1.186,
"step": 287
},
{
"epoch": 42.0,
"eval_loss": 0.16399501264095306,
"eval_runtime": 10.8713,
"eval_samples_per_second": 919.853,
"eval_steps_per_second": 1.196,
"step": 294
},
{
"epoch": 43.0,
"eval_loss": 0.15999095141887665,
"eval_runtime": 10.8706,
"eval_samples_per_second": 919.913,
"eval_steps_per_second": 1.196,
"step": 301
},
{
"epoch": 44.0,
"eval_loss": 0.16396267712116241,
"eval_runtime": 11.1122,
"eval_samples_per_second": 899.908,
"eval_steps_per_second": 1.17,
"step": 308
},
{
"epoch": 45.0,
"eval_loss": 0.16081862151622772,
"eval_runtime": 10.8849,
"eval_samples_per_second": 918.705,
"eval_steps_per_second": 1.194,
"step": 315
},
{
"epoch": 46.0,
"eval_loss": 0.1533430963754654,
"eval_runtime": 10.8716,
"eval_samples_per_second": 919.827,
"eval_steps_per_second": 1.196,
"step": 322
},
{
"epoch": 47.0,
"eval_loss": 0.15095455944538116,
"eval_runtime": 10.9494,
"eval_samples_per_second": 913.291,
"eval_steps_per_second": 1.187,
"step": 329
}
],
"logging_steps": 500,
"max_steps": 350,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 6747067207680000.0,
"trial_name": null,
"trial_params": null
}