stsb / trainer_state.json
ShengdingHu's picture
Training in progress, step 100
114a0ce
{
"best_metric": 89.89082639431217,
"best_model_checkpoint": "outputs/bitfit/t5-base/stsb/checkpoint-900",
"epoch": 20.0,
"global_step": 3600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.56,
"eval_average_metrics": 89.13717477824025,
"eval_loss": 0.9123751521110535,
"eval_pearson": 89.06474617714444,
"eval_runtime": 3.9799,
"eval_samples_per_second": 188.447,
"eval_spearmanr": 89.20960337933604,
"step": 100
},
{
"epoch": 1.11,
"eval_average_metrics": 89.23837198704238,
"eval_loss": 0.90521639585495,
"eval_pearson": 89.21696934784924,
"eval_runtime": 2.8408,
"eval_samples_per_second": 264.006,
"eval_spearmanr": 89.25977462623553,
"step": 200
},
{
"epoch": 1.67,
"eval_average_metrics": 88.85409457301812,
"eval_loss": 0.8948209881782532,
"eval_pearson": 88.76416701427588,
"eval_runtime": 3.5011,
"eval_samples_per_second": 214.217,
"eval_spearmanr": 88.94402213176035,
"step": 300
},
{
"epoch": 2.22,
"eval_average_metrics": 88.89962091835693,
"eval_loss": 0.8898776173591614,
"eval_pearson": 88.91820308047936,
"eval_runtime": 3.1447,
"eval_samples_per_second": 238.499,
"eval_spearmanr": 88.8810387562345,
"step": 400
},
{
"epoch": 2.78,
"learning_rate": 0.00025833333333333334,
"loss": 0.8458,
"step": 500
},
{
"epoch": 2.78,
"eval_average_metrics": 89.72250747135166,
"eval_loss": 0.8902953863143921,
"eval_pearson": 89.89215489261863,
"eval_runtime": 4.5165,
"eval_samples_per_second": 166.059,
"eval_spearmanr": 89.55286005008469,
"step": 500
},
{
"epoch": 3.33,
"eval_average_metrics": 89.46605644881728,
"eval_loss": 0.927269697189331,
"eval_pearson": 89.51595346590146,
"eval_runtime": 4.621,
"eval_samples_per_second": 162.302,
"eval_spearmanr": 89.41615943173312,
"step": 600
},
{
"epoch": 3.89,
"eval_average_metrics": 89.25697052652335,
"eval_loss": 0.8973984122276306,
"eval_pearson": 89.47540644836778,
"eval_runtime": 3.2452,
"eval_samples_per_second": 231.108,
"eval_spearmanr": 89.03853460467892,
"step": 700
},
{
"epoch": 4.44,
"eval_average_metrics": 89.77994978800227,
"eval_loss": 0.9015573859214783,
"eval_pearson": 89.98660087713056,
"eval_runtime": 5.9734,
"eval_samples_per_second": 125.557,
"eval_spearmanr": 89.57329869887398,
"step": 800
},
{
"epoch": 5.0,
"eval_average_metrics": 89.89082639431217,
"eval_loss": 0.8983659148216248,
"eval_pearson": 90.00270057438823,
"eval_runtime": 4.0652,
"eval_samples_per_second": 184.494,
"eval_spearmanr": 89.77895221423611,
"step": 900
},
{
"epoch": 5.56,
"learning_rate": 0.00021666666666666666,
"loss": 0.7859,
"step": 1000
},
{
"epoch": 5.56,
"eval_average_metrics": 89.15652744561595,
"eval_loss": 0.9311385154724121,
"eval_pearson": 89.25999907777292,
"eval_runtime": 3.1115,
"eval_samples_per_second": 241.042,
"eval_spearmanr": 89.053055813459,
"step": 1000
},
{
"epoch": 6.11,
"eval_average_metrics": 89.36900055711244,
"eval_loss": 0.914900541305542,
"eval_pearson": 89.48560950002296,
"eval_runtime": 4.2641,
"eval_samples_per_second": 175.886,
"eval_spearmanr": 89.25239161420193,
"step": 1100
},
{
"epoch": 6.67,
"eval_average_metrics": 89.41410457111328,
"eval_loss": 0.9506754279136658,
"eval_pearson": 89.45609872168974,
"eval_runtime": 3.0012,
"eval_samples_per_second": 249.9,
"eval_spearmanr": 89.37211042053684,
"step": 1200
},
{
"epoch": 7.22,
"eval_average_metrics": 89.44094336465075,
"eval_loss": 0.953001081943512,
"eval_pearson": 89.51193389114643,
"eval_runtime": 2.7971,
"eval_samples_per_second": 268.139,
"eval_spearmanr": 89.36995283815507,
"step": 1300
},
{
"epoch": 7.78,
"eval_average_metrics": 89.17605484273516,
"eval_loss": 0.9723613262176514,
"eval_pearson": 89.21895062324246,
"eval_runtime": 3.7026,
"eval_samples_per_second": 202.559,
"eval_spearmanr": 89.13315906222785,
"step": 1400
},
{
"epoch": 8.33,
"learning_rate": 0.000175,
"loss": 0.724,
"step": 1500
},
{
"epoch": 8.33,
"eval_average_metrics": 89.48308644886173,
"eval_loss": 0.9874196648597717,
"eval_pearson": 89.5217033530652,
"eval_runtime": 3.886,
"eval_samples_per_second": 193.003,
"eval_spearmanr": 89.44446954465825,
"step": 1500
},
{
"epoch": 8.89,
"eval_average_metrics": 89.79760216722264,
"eval_loss": 0.9869381785392761,
"eval_pearson": 89.95375320955753,
"eval_runtime": 4.168,
"eval_samples_per_second": 179.942,
"eval_spearmanr": 89.64145112488774,
"step": 1600
},
{
"epoch": 9.44,
"eval_average_metrics": 89.29279708906819,
"eval_loss": 0.9963127970695496,
"eval_pearson": 89.46662281864026,
"eval_runtime": 3.4947,
"eval_samples_per_second": 214.608,
"eval_spearmanr": 89.11897135949613,
"step": 1700
},
{
"epoch": 10.0,
"eval_average_metrics": 89.79299996621938,
"eval_loss": 0.9937890768051147,
"eval_pearson": 89.88666855723739,
"eval_runtime": 4.5917,
"eval_samples_per_second": 163.339,
"eval_spearmanr": 89.69933137520137,
"step": 1800
},
{
"epoch": 10.56,
"eval_average_metrics": 89.27048678268574,
"eval_loss": 1.006996750831604,
"eval_pearson": 89.39560825429157,
"eval_runtime": 3.591,
"eval_samples_per_second": 208.856,
"eval_spearmanr": 89.1453653110799,
"step": 1900
},
{
"epoch": 11.11,
"learning_rate": 0.0001333333333333333,
"loss": 0.6693,
"step": 2000
},
{
"epoch": 11.11,
"eval_average_metrics": 89.62053083130016,
"eval_loss": 1.0332725048065186,
"eval_pearson": 89.69477341302792,
"eval_runtime": 3.768,
"eval_samples_per_second": 199.043,
"eval_spearmanr": 89.54628824957238,
"step": 2000
},
{
"epoch": 11.67,
"eval_average_metrics": 89.13711909449259,
"eval_loss": 1.0669463872909546,
"eval_pearson": 89.29635083082506,
"eval_runtime": 3.5887,
"eval_samples_per_second": 208.99,
"eval_spearmanr": 88.97788735816013,
"step": 2100
},
{
"epoch": 12.22,
"eval_average_metrics": 89.42971483116142,
"eval_loss": 1.0573464632034302,
"eval_pearson": 89.58674143842262,
"eval_runtime": 3.0808,
"eval_samples_per_second": 243.441,
"eval_spearmanr": 89.2726882239002,
"step": 2200
},
{
"epoch": 12.78,
"eval_average_metrics": 89.42097809467734,
"eval_loss": 1.0824470520019531,
"eval_pearson": 89.54344639693154,
"eval_runtime": 3.8679,
"eval_samples_per_second": 193.903,
"eval_spearmanr": 89.29850979242312,
"step": 2300
},
{
"epoch": 13.33,
"eval_average_metrics": 89.38136709242659,
"eval_loss": 1.0812472105026245,
"eval_pearson": 89.52312670086297,
"eval_runtime": 3.7051,
"eval_samples_per_second": 202.426,
"eval_spearmanr": 89.23960748399018,
"step": 2400
},
{
"epoch": 13.89,
"learning_rate": 9.166666666666667e-05,
"loss": 0.6142,
"step": 2500
},
{
"epoch": 13.89,
"eval_average_metrics": 89.82337151387273,
"eval_loss": 1.0920954942703247,
"eval_pearson": 89.99504557050292,
"eval_runtime": 3.8424,
"eval_samples_per_second": 195.19,
"eval_spearmanr": 89.65169745724253,
"step": 2500
},
{
"epoch": 14.44,
"eval_average_metrics": 89.56749485985972,
"eval_loss": 1.1302155256271362,
"eval_pearson": 89.70885507009902,
"eval_runtime": 3.9276,
"eval_samples_per_second": 190.954,
"eval_spearmanr": 89.42613464962042,
"step": 2600
},
{
"epoch": 15.0,
"eval_average_metrics": 89.61009900620351,
"eval_loss": 1.14012610912323,
"eval_pearson": 89.68171447646405,
"eval_runtime": 3.7232,
"eval_samples_per_second": 201.438,
"eval_spearmanr": 89.53848353594297,
"step": 2700
},
{
"epoch": 15.56,
"eval_average_metrics": 89.27459685358238,
"eval_loss": 1.1592134237289429,
"eval_pearson": 89.32968690818579,
"eval_runtime": 4.52,
"eval_samples_per_second": 165.929,
"eval_spearmanr": 89.21950679897895,
"step": 2800
},
{
"epoch": 16.11,
"eval_average_metrics": 89.65586988882487,
"eval_loss": 1.1740944385528564,
"eval_pearson": 89.72543322190315,
"eval_runtime": 3.2065,
"eval_samples_per_second": 233.903,
"eval_spearmanr": 89.5863065557466,
"step": 2900
},
{
"epoch": 16.67,
"learning_rate": 4.9999999999999996e-05,
"loss": 0.5671,
"step": 3000
},
{
"epoch": 16.67,
"eval_average_metrics": 89.74259369919744,
"eval_loss": 1.1778316497802734,
"eval_pearson": 89.82066967172904,
"eval_runtime": 3.705,
"eval_samples_per_second": 202.432,
"eval_spearmanr": 89.66451772666586,
"step": 3000
},
{
"epoch": 17.22,
"eval_average_metrics": 89.67436613731013,
"eval_loss": 1.1869001388549805,
"eval_pearson": 89.822535941685,
"eval_runtime": 4.6778,
"eval_samples_per_second": 160.331,
"eval_spearmanr": 89.52619633293524,
"step": 3100
},
{
"epoch": 17.78,
"eval_average_metrics": 89.64929997632476,
"eval_loss": 1.1901123523712158,
"eval_pearson": 89.78058123437394,
"eval_runtime": 3.3067,
"eval_samples_per_second": 226.812,
"eval_spearmanr": 89.51801871827558,
"step": 3200
},
{
"epoch": 18.33,
"eval_average_metrics": 89.78282294446872,
"eval_loss": 1.1919935941696167,
"eval_pearson": 89.9585327889659,
"eval_runtime": 4.0203,
"eval_samples_per_second": 186.553,
"eval_spearmanr": 89.60711309997154,
"step": 3300
},
{
"epoch": 18.89,
"eval_average_metrics": 89.64203906594571,
"eval_loss": 1.1938636302947998,
"eval_pearson": 89.76878899591361,
"eval_runtime": 2.5879,
"eval_samples_per_second": 289.807,
"eval_spearmanr": 89.51528913597781,
"step": 3400
},
{
"epoch": 19.44,
"learning_rate": 8.333333333333332e-06,
"loss": 0.5407,
"step": 3500
},
{
"epoch": 19.44,
"eval_average_metrics": 89.85580070489866,
"eval_loss": 1.2061032056808472,
"eval_pearson": 89.99507172793925,
"eval_runtime": 3.3291,
"eval_samples_per_second": 225.288,
"eval_spearmanr": 89.71652968185808,
"step": 3500
},
{
"epoch": 20.0,
"eval_average_metrics": 89.66212005188657,
"eval_loss": 1.2073893547058105,
"eval_pearson": 89.79939157390831,
"eval_runtime": 3.5775,
"eval_samples_per_second": 209.645,
"eval_spearmanr": 89.52484852986483,
"step": 3600
},
{
"epoch": 20.0,
"step": 3600,
"total_flos": 1.2081375923975424e+16,
"train_loss": 0.6740480825636121,
"train_runtime": 1306.7363,
"train_samples_per_second": 87.99,
"train_steps_per_second": 2.755
}
],
"max_steps": 3600,
"num_train_epochs": 20,
"total_flos": 1.2081375923975424e+16,
"trial_name": null,
"trial_params": null
}