out-glue-sst2 / trainer_state.json
Tural's picture
End of training
3a7384e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 1755,
"global_step": 3510,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 1.985754985754986e-05,
"loss": 0.5449,
"step": 25
},
{
"epoch": 0.14,
"learning_rate": 1.9715099715099716e-05,
"loss": 0.3107,
"step": 50
},
{
"epoch": 0.21,
"learning_rate": 1.9572649572649574e-05,
"loss": 0.2798,
"step": 75
},
{
"epoch": 0.28,
"learning_rate": 1.943019943019943e-05,
"loss": 0.2448,
"step": 100
},
{
"epoch": 0.36,
"learning_rate": 1.928774928774929e-05,
"loss": 0.2447,
"step": 125
},
{
"epoch": 0.43,
"learning_rate": 1.914529914529915e-05,
"loss": 0.2314,
"step": 150
},
{
"epoch": 0.5,
"learning_rate": 1.9002849002849003e-05,
"loss": 0.1924,
"step": 175
},
{
"epoch": 0.57,
"learning_rate": 1.8860398860398864e-05,
"loss": 0.2063,
"step": 200
},
{
"epoch": 0.64,
"learning_rate": 1.8717948717948718e-05,
"loss": 0.1999,
"step": 225
},
{
"epoch": 0.71,
"learning_rate": 1.8575498575498575e-05,
"loss": 0.1794,
"step": 250
},
{
"epoch": 0.78,
"learning_rate": 1.8433048433048436e-05,
"loss": 0.1884,
"step": 275
},
{
"epoch": 0.85,
"learning_rate": 1.829059829059829e-05,
"loss": 0.1819,
"step": 300
},
{
"epoch": 0.93,
"learning_rate": 1.814814814814815e-05,
"loss": 0.1682,
"step": 325
},
{
"epoch": 1.0,
"learning_rate": 1.8005698005698008e-05,
"loss": 0.1808,
"step": 350
},
{
"epoch": 1.07,
"learning_rate": 1.7863247863247866e-05,
"loss": 0.1332,
"step": 375
},
{
"epoch": 1.14,
"learning_rate": 1.7720797720797723e-05,
"loss": 0.1152,
"step": 400
},
{
"epoch": 1.21,
"learning_rate": 1.757834757834758e-05,
"loss": 0.1308,
"step": 425
},
{
"epoch": 1.28,
"learning_rate": 1.7435897435897438e-05,
"loss": 0.1344,
"step": 450
},
{
"epoch": 1.35,
"learning_rate": 1.7293447293447295e-05,
"loss": 0.1267,
"step": 475
},
{
"epoch": 1.42,
"learning_rate": 1.7150997150997152e-05,
"loss": 0.1333,
"step": 500
},
{
"epoch": 1.5,
"learning_rate": 1.700854700854701e-05,
"loss": 0.1278,
"step": 525
},
{
"epoch": 1.57,
"learning_rate": 1.6866096866096867e-05,
"loss": 0.1326,
"step": 550
},
{
"epoch": 1.64,
"learning_rate": 1.6723646723646725e-05,
"loss": 0.1148,
"step": 575
},
{
"epoch": 1.71,
"learning_rate": 1.6581196581196585e-05,
"loss": 0.1207,
"step": 600
},
{
"epoch": 1.78,
"learning_rate": 1.643874643874644e-05,
"loss": 0.12,
"step": 625
},
{
"epoch": 1.85,
"learning_rate": 1.6296296296296297e-05,
"loss": 0.1129,
"step": 650
},
{
"epoch": 1.92,
"learning_rate": 1.6153846153846154e-05,
"loss": 0.1295,
"step": 675
},
{
"epoch": 1.99,
"learning_rate": 1.601139601139601e-05,
"loss": 0.1244,
"step": 700
},
{
"epoch": 2.07,
"learning_rate": 1.5868945868945872e-05,
"loss": 0.0947,
"step": 725
},
{
"epoch": 2.14,
"learning_rate": 1.5726495726495726e-05,
"loss": 0.0791,
"step": 750
},
{
"epoch": 2.21,
"learning_rate": 1.5584045584045587e-05,
"loss": 0.0846,
"step": 775
},
{
"epoch": 2.28,
"learning_rate": 1.5441595441595444e-05,
"loss": 0.083,
"step": 800
},
{
"epoch": 2.35,
"learning_rate": 1.5299145299145298e-05,
"loss": 0.0859,
"step": 825
},
{
"epoch": 2.42,
"learning_rate": 1.5156695156695157e-05,
"loss": 0.0851,
"step": 850
},
{
"epoch": 2.49,
"learning_rate": 1.5014245014245015e-05,
"loss": 0.0916,
"step": 875
},
{
"epoch": 2.56,
"learning_rate": 1.4871794871794874e-05,
"loss": 0.0822,
"step": 900
},
{
"epoch": 2.64,
"learning_rate": 1.472934472934473e-05,
"loss": 0.0933,
"step": 925
},
{
"epoch": 2.71,
"learning_rate": 1.4586894586894588e-05,
"loss": 0.0829,
"step": 950
},
{
"epoch": 2.78,
"learning_rate": 1.4444444444444446e-05,
"loss": 0.0908,
"step": 975
},
{
"epoch": 2.85,
"learning_rate": 1.4301994301994305e-05,
"loss": 0.0911,
"step": 1000
},
{
"epoch": 2.92,
"learning_rate": 1.415954415954416e-05,
"loss": 0.0851,
"step": 1025
},
{
"epoch": 2.99,
"learning_rate": 1.4017094017094018e-05,
"loss": 0.0958,
"step": 1050
},
{
"epoch": 3.06,
"learning_rate": 1.3874643874643875e-05,
"loss": 0.0588,
"step": 1075
},
{
"epoch": 3.13,
"learning_rate": 1.3732193732193733e-05,
"loss": 0.0566,
"step": 1100
},
{
"epoch": 3.21,
"learning_rate": 1.3589743589743592e-05,
"loss": 0.0573,
"step": 1125
},
{
"epoch": 3.28,
"learning_rate": 1.3447293447293447e-05,
"loss": 0.0603,
"step": 1150
},
{
"epoch": 3.35,
"learning_rate": 1.3304843304843306e-05,
"loss": 0.0753,
"step": 1175
},
{
"epoch": 3.42,
"learning_rate": 1.3162393162393164e-05,
"loss": 0.0577,
"step": 1200
},
{
"epoch": 3.49,
"learning_rate": 1.301994301994302e-05,
"loss": 0.0678,
"step": 1225
},
{
"epoch": 3.56,
"learning_rate": 1.2877492877492879e-05,
"loss": 0.0696,
"step": 1250
},
{
"epoch": 3.63,
"learning_rate": 1.2735042735042736e-05,
"loss": 0.067,
"step": 1275
},
{
"epoch": 3.7,
"learning_rate": 1.2592592592592593e-05,
"loss": 0.0658,
"step": 1300
},
{
"epoch": 3.77,
"learning_rate": 1.245014245014245e-05,
"loss": 0.0701,
"step": 1325
},
{
"epoch": 3.85,
"learning_rate": 1.230769230769231e-05,
"loss": 0.0634,
"step": 1350
},
{
"epoch": 3.92,
"learning_rate": 1.2165242165242165e-05,
"loss": 0.0711,
"step": 1375
},
{
"epoch": 3.99,
"learning_rate": 1.2022792022792024e-05,
"loss": 0.0667,
"step": 1400
},
{
"epoch": 4.06,
"learning_rate": 1.1880341880341882e-05,
"loss": 0.048,
"step": 1425
},
{
"epoch": 4.13,
"learning_rate": 1.1737891737891738e-05,
"loss": 0.0461,
"step": 1450
},
{
"epoch": 4.2,
"learning_rate": 1.1595441595441597e-05,
"loss": 0.0418,
"step": 1475
},
{
"epoch": 4.27,
"learning_rate": 1.1452991452991454e-05,
"loss": 0.0578,
"step": 1500
},
{
"epoch": 4.34,
"learning_rate": 1.1310541310541311e-05,
"loss": 0.0463,
"step": 1525
},
{
"epoch": 4.42,
"learning_rate": 1.1168091168091169e-05,
"loss": 0.0458,
"step": 1550
},
{
"epoch": 4.49,
"learning_rate": 1.1025641025641028e-05,
"loss": 0.0509,
"step": 1575
},
{
"epoch": 4.56,
"learning_rate": 1.0883190883190883e-05,
"loss": 0.0509,
"step": 1600
},
{
"epoch": 4.63,
"learning_rate": 1.0740740740740742e-05,
"loss": 0.0453,
"step": 1625
},
{
"epoch": 4.7,
"learning_rate": 1.05982905982906e-05,
"loss": 0.0425,
"step": 1650
},
{
"epoch": 4.77,
"learning_rate": 1.0455840455840456e-05,
"loss": 0.0596,
"step": 1675
},
{
"epoch": 4.84,
"learning_rate": 1.0313390313390315e-05,
"loss": 0.0461,
"step": 1700
},
{
"epoch": 4.91,
"learning_rate": 1.0170940170940172e-05,
"loss": 0.0561,
"step": 1725
},
{
"epoch": 4.99,
"learning_rate": 1.002849002849003e-05,
"loss": 0.0523,
"step": 1750
},
{
"epoch": 5.06,
"learning_rate": 9.886039886039887e-06,
"loss": 0.0352,
"step": 1775
},
{
"epoch": 5.13,
"learning_rate": 9.743589743589744e-06,
"loss": 0.036,
"step": 1800
},
{
"epoch": 5.2,
"learning_rate": 9.601139601139601e-06,
"loss": 0.036,
"step": 1825
},
{
"epoch": 5.27,
"learning_rate": 9.458689458689459e-06,
"loss": 0.0343,
"step": 1850
},
{
"epoch": 5.34,
"learning_rate": 9.316239316239318e-06,
"loss": 0.0332,
"step": 1875
},
{
"epoch": 5.41,
"learning_rate": 9.173789173789175e-06,
"loss": 0.0398,
"step": 1900
},
{
"epoch": 5.48,
"learning_rate": 9.031339031339033e-06,
"loss": 0.0412,
"step": 1925
},
{
"epoch": 5.56,
"learning_rate": 8.888888888888888e-06,
"loss": 0.0397,
"step": 1950
},
{
"epoch": 5.63,
"learning_rate": 8.746438746438747e-06,
"loss": 0.0421,
"step": 1975
},
{
"epoch": 5.7,
"learning_rate": 8.603988603988605e-06,
"loss": 0.0386,
"step": 2000
},
{
"epoch": 5.77,
"learning_rate": 8.461538461538462e-06,
"loss": 0.0405,
"step": 2025
},
{
"epoch": 5.84,
"learning_rate": 8.31908831908832e-06,
"loss": 0.0431,
"step": 2050
},
{
"epoch": 5.91,
"learning_rate": 8.176638176638177e-06,
"loss": 0.0464,
"step": 2075
},
{
"epoch": 5.98,
"learning_rate": 8.034188034188036e-06,
"loss": 0.0462,
"step": 2100
},
{
"epoch": 6.05,
"learning_rate": 7.891737891737893e-06,
"loss": 0.0261,
"step": 2125
},
{
"epoch": 6.13,
"learning_rate": 7.749287749287749e-06,
"loss": 0.0304,
"step": 2150
},
{
"epoch": 6.2,
"learning_rate": 7.606837606837607e-06,
"loss": 0.029,
"step": 2175
},
{
"epoch": 6.27,
"learning_rate": 7.4643874643874645e-06,
"loss": 0.0316,
"step": 2200
},
{
"epoch": 6.34,
"learning_rate": 7.321937321937323e-06,
"loss": 0.0306,
"step": 2225
},
{
"epoch": 6.41,
"learning_rate": 7.17948717948718e-06,
"loss": 0.035,
"step": 2250
},
{
"epoch": 6.48,
"learning_rate": 7.0370370370370375e-06,
"loss": 0.0245,
"step": 2275
},
{
"epoch": 6.55,
"learning_rate": 6.894586894586896e-06,
"loss": 0.0207,
"step": 2300
},
{
"epoch": 6.62,
"learning_rate": 6.752136752136753e-06,
"loss": 0.0314,
"step": 2325
},
{
"epoch": 6.7,
"learning_rate": 6.60968660968661e-06,
"loss": 0.0291,
"step": 2350
},
{
"epoch": 6.77,
"learning_rate": 6.467236467236467e-06,
"loss": 0.0331,
"step": 2375
},
{
"epoch": 6.84,
"learning_rate": 6.324786324786325e-06,
"loss": 0.0297,
"step": 2400
},
{
"epoch": 6.91,
"learning_rate": 6.1823361823361825e-06,
"loss": 0.0321,
"step": 2425
},
{
"epoch": 6.98,
"learning_rate": 6.039886039886041e-06,
"loss": 0.0313,
"step": 2450
},
{
"epoch": 7.05,
"learning_rate": 5.897435897435898e-06,
"loss": 0.0228,
"step": 2475
},
{
"epoch": 7.12,
"learning_rate": 5.7549857549857555e-06,
"loss": 0.0167,
"step": 2500
},
{
"epoch": 7.19,
"learning_rate": 5.612535612535614e-06,
"loss": 0.0193,
"step": 2525
},
{
"epoch": 7.26,
"learning_rate": 5.470085470085471e-06,
"loss": 0.0244,
"step": 2550
},
{
"epoch": 7.34,
"learning_rate": 5.327635327635328e-06,
"loss": 0.0234,
"step": 2575
},
{
"epoch": 7.41,
"learning_rate": 5.185185185185185e-06,
"loss": 0.0232,
"step": 2600
},
{
"epoch": 7.48,
"learning_rate": 5.042735042735043e-06,
"loss": 0.025,
"step": 2625
},
{
"epoch": 7.55,
"learning_rate": 4.9002849002849006e-06,
"loss": 0.0265,
"step": 2650
},
{
"epoch": 7.62,
"learning_rate": 4.757834757834758e-06,
"loss": 0.0251,
"step": 2675
},
{
"epoch": 7.69,
"learning_rate": 4.615384615384616e-06,
"loss": 0.0269,
"step": 2700
},
{
"epoch": 7.76,
"learning_rate": 4.4729344729344735e-06,
"loss": 0.0264,
"step": 2725
},
{
"epoch": 7.83,
"learning_rate": 4.330484330484331e-06,
"loss": 0.0211,
"step": 2750
},
{
"epoch": 7.91,
"learning_rate": 4.188034188034188e-06,
"loss": 0.03,
"step": 2775
},
{
"epoch": 7.98,
"learning_rate": 4.0455840455840465e-06,
"loss": 0.0224,
"step": 2800
},
{
"epoch": 8.05,
"learning_rate": 3.903133903133903e-06,
"loss": 0.0233,
"step": 2825
},
{
"epoch": 8.12,
"learning_rate": 3.760683760683761e-06,
"loss": 0.0213,
"step": 2850
},
{
"epoch": 8.19,
"learning_rate": 3.6182336182336186e-06,
"loss": 0.0157,
"step": 2875
},
{
"epoch": 8.26,
"learning_rate": 3.4757834757834764e-06,
"loss": 0.0171,
"step": 2900
},
{
"epoch": 8.33,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0189,
"step": 2925
},
{
"epoch": 8.4,
"learning_rate": 3.190883190883191e-06,
"loss": 0.0154,
"step": 2950
},
{
"epoch": 8.48,
"learning_rate": 3.048433048433049e-06,
"loss": 0.0178,
"step": 2975
},
{
"epoch": 8.55,
"learning_rate": 2.9059829059829063e-06,
"loss": 0.0257,
"step": 3000
},
{
"epoch": 8.62,
"learning_rate": 2.7635327635327636e-06,
"loss": 0.0166,
"step": 3025
},
{
"epoch": 8.69,
"learning_rate": 2.6210826210826214e-06,
"loss": 0.0202,
"step": 3050
},
{
"epoch": 8.76,
"learning_rate": 2.478632478632479e-06,
"loss": 0.0176,
"step": 3075
},
{
"epoch": 8.83,
"learning_rate": 2.336182336182336e-06,
"loss": 0.0128,
"step": 3100
},
{
"epoch": 8.9,
"learning_rate": 2.193732193732194e-06,
"loss": 0.0263,
"step": 3125
},
{
"epoch": 8.97,
"learning_rate": 2.0512820512820513e-06,
"loss": 0.025,
"step": 3150
},
{
"epoch": 9.05,
"learning_rate": 1.908831908831909e-06,
"loss": 0.0151,
"step": 3175
},
{
"epoch": 9.12,
"learning_rate": 1.7663817663817665e-06,
"loss": 0.015,
"step": 3200
},
{
"epoch": 9.19,
"learning_rate": 1.623931623931624e-06,
"loss": 0.0125,
"step": 3225
},
{
"epoch": 9.26,
"learning_rate": 1.4814814814814815e-06,
"loss": 0.0116,
"step": 3250
},
{
"epoch": 9.33,
"learning_rate": 1.3390313390313392e-06,
"loss": 0.0197,
"step": 3275
},
{
"epoch": 9.4,
"learning_rate": 1.1965811965811968e-06,
"loss": 0.0196,
"step": 3300
},
{
"epoch": 9.47,
"learning_rate": 1.0541310541310542e-06,
"loss": 0.0183,
"step": 3325
},
{
"epoch": 9.54,
"learning_rate": 9.116809116809118e-07,
"loss": 0.0171,
"step": 3350
},
{
"epoch": 9.62,
"learning_rate": 7.692307692307694e-07,
"loss": 0.0139,
"step": 3375
},
{
"epoch": 9.69,
"learning_rate": 6.267806267806268e-07,
"loss": 0.0158,
"step": 3400
},
{
"epoch": 9.76,
"learning_rate": 4.843304843304843e-07,
"loss": 0.0178,
"step": 3425
},
{
"epoch": 9.83,
"learning_rate": 3.4188034188034194e-07,
"loss": 0.0112,
"step": 3450
},
{
"epoch": 9.9,
"learning_rate": 1.9943019943019944e-07,
"loss": 0.0146,
"step": 3475
},
{
"epoch": 9.97,
"learning_rate": 5.6980056980056986e-08,
"loss": 0.0108,
"step": 3500
},
{
"epoch": 10.0,
"step": 3510,
"total_flos": 4.43006661686016e+16,
"train_loss": 0.0692601463139227,
"train_runtime": 1373.3603,
"train_samples_per_second": 490.396,
"train_steps_per_second": 2.556
}
],
"logging_steps": 25,
"max_steps": 3510,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 4.43006661686016e+16,
"trial_name": null,
"trial_params": null
}