emotion_classification / trainer_state.json
jtas's picture
End of training
b9a95d3 verified
{
"best_metric": 1.1135584115982056,
"best_model_checkpoint": "./model/emotion_classification/checkpoint-301",
"epoch": 30.0,
"eval_steps": 500,
"global_step": 1290,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 9.972609476841367e-05,
"loss": 1.9172,
"step": 43
},
{
"epoch": 1.0,
"eval_accuracy": 0.43333333333333335,
"eval_f1": 0.32631946371225834,
"eval_loss": 1.5750515460968018,
"eval_runtime": 4.0633,
"eval_samples_per_second": 29.533,
"eval_steps_per_second": 1.969,
"step": 43
},
{
"epoch": 2.0,
"learning_rate": 9.890738003669029e-05,
"loss": 1.4505,
"step": 86
},
{
"epoch": 2.0,
"eval_accuracy": 0.5333333333333333,
"eval_f1": 0.4650626007978949,
"eval_loss": 1.3041330575942993,
"eval_runtime": 4.1206,
"eval_samples_per_second": 29.122,
"eval_steps_per_second": 1.941,
"step": 86
},
{
"epoch": 3.0,
"learning_rate": 9.755282581475769e-05,
"loss": 1.1121,
"step": 129
},
{
"epoch": 3.0,
"eval_accuracy": 0.48333333333333334,
"eval_f1": 0.46841408327694595,
"eval_loss": 1.2902252674102783,
"eval_runtime": 4.2082,
"eval_samples_per_second": 28.516,
"eval_steps_per_second": 1.901,
"step": 129
},
{
"epoch": 4.0,
"learning_rate": 9.567727288213005e-05,
"loss": 0.8491,
"step": 172
},
{
"epoch": 4.0,
"eval_accuracy": 0.5166666666666667,
"eval_f1": 0.49163216814145605,
"eval_loss": 1.2308896780014038,
"eval_runtime": 4.2459,
"eval_samples_per_second": 28.263,
"eval_steps_per_second": 1.884,
"step": 172
},
{
"epoch": 5.0,
"learning_rate": 9.330127018922194e-05,
"loss": 0.6168,
"step": 215
},
{
"epoch": 5.0,
"eval_accuracy": 0.5583333333333333,
"eval_f1": 0.5309547133900583,
"eval_loss": 1.2573224306106567,
"eval_runtime": 4.1217,
"eval_samples_per_second": 29.114,
"eval_steps_per_second": 1.941,
"step": 215
},
{
"epoch": 6.0,
"learning_rate": 9.045084971874738e-05,
"loss": 0.3953,
"step": 258
},
{
"epoch": 6.0,
"eval_accuracy": 0.575,
"eval_f1": 0.5400530867590878,
"eval_loss": 1.1502172946929932,
"eval_runtime": 4.3017,
"eval_samples_per_second": 27.896,
"eval_steps_per_second": 1.86,
"step": 258
},
{
"epoch": 7.0,
"learning_rate": 8.715724127386972e-05,
"loss": 0.3048,
"step": 301
},
{
"epoch": 7.0,
"eval_accuracy": 0.65,
"eval_f1": 0.6231481481481482,
"eval_loss": 1.1135584115982056,
"eval_runtime": 4.1683,
"eval_samples_per_second": 28.789,
"eval_steps_per_second": 1.919,
"step": 301
},
{
"epoch": 8.0,
"learning_rate": 8.345653031794292e-05,
"loss": 0.1875,
"step": 344
},
{
"epoch": 8.0,
"eval_accuracy": 0.5666666666666667,
"eval_f1": 0.5597741659473293,
"eval_loss": 1.4224319458007812,
"eval_runtime": 4.2212,
"eval_samples_per_second": 28.428,
"eval_steps_per_second": 1.895,
"step": 344
},
{
"epoch": 9.0,
"learning_rate": 7.938926261462366e-05,
"loss": 0.1277,
"step": 387
},
{
"epoch": 9.0,
"eval_accuracy": 0.6166666666666667,
"eval_f1": 0.6011135939243728,
"eval_loss": 1.346667766571045,
"eval_runtime": 3.955,
"eval_samples_per_second": 30.341,
"eval_steps_per_second": 2.023,
"step": 387
},
{
"epoch": 10.0,
"learning_rate": 7.500000000000001e-05,
"loss": 0.1123,
"step": 430
},
{
"epoch": 10.0,
"eval_accuracy": 0.5833333333333334,
"eval_f1": 0.5656809749645115,
"eval_loss": 1.583767294883728,
"eval_runtime": 4.0704,
"eval_samples_per_second": 29.481,
"eval_steps_per_second": 1.965,
"step": 430
},
{
"epoch": 11.0,
"learning_rate": 7.033683215379002e-05,
"loss": 0.1123,
"step": 473
},
{
"epoch": 11.0,
"eval_accuracy": 0.5833333333333334,
"eval_f1": 0.5549914858886633,
"eval_loss": 1.50627601146698,
"eval_runtime": 4.0673,
"eval_samples_per_second": 29.504,
"eval_steps_per_second": 1.967,
"step": 473
},
{
"epoch": 12.0,
"learning_rate": 6.545084971874738e-05,
"loss": 0.0694,
"step": 516
},
{
"epoch": 12.0,
"eval_accuracy": 0.55,
"eval_f1": 0.5320146001860588,
"eval_loss": 1.7733001708984375,
"eval_runtime": 4.0123,
"eval_samples_per_second": 29.908,
"eval_steps_per_second": 1.994,
"step": 516
},
{
"epoch": 13.0,
"learning_rate": 6.0395584540887963e-05,
"loss": 0.0499,
"step": 559
},
{
"epoch": 13.0,
"eval_accuracy": 0.5833333333333334,
"eval_f1": 0.5536173850790786,
"eval_loss": 1.6328585147857666,
"eval_runtime": 4.0403,
"eval_samples_per_second": 29.701,
"eval_steps_per_second": 1.98,
"step": 559
},
{
"epoch": 14.0,
"learning_rate": 5.522642316338268e-05,
"loss": 0.0367,
"step": 602
},
{
"epoch": 14.0,
"eval_accuracy": 0.5833333333333334,
"eval_f1": 0.5684853336495889,
"eval_loss": 1.6878242492675781,
"eval_runtime": 3.9845,
"eval_samples_per_second": 30.116,
"eval_steps_per_second": 2.008,
"step": 602
},
{
"epoch": 15.0,
"learning_rate": 5e-05,
"loss": 0.0291,
"step": 645
},
{
"epoch": 15.0,
"eval_accuracy": 0.575,
"eval_f1": 0.5392005606664051,
"eval_loss": 1.685531735420227,
"eval_runtime": 3.9802,
"eval_samples_per_second": 30.149,
"eval_steps_per_second": 2.01,
"step": 645
},
{
"epoch": 16.0,
"learning_rate": 4.477357683661734e-05,
"loss": 0.0284,
"step": 688
},
{
"epoch": 16.0,
"eval_accuracy": 0.6083333333333333,
"eval_f1": 0.5880117663277057,
"eval_loss": 1.7869312763214111,
"eval_runtime": 4.0749,
"eval_samples_per_second": 29.448,
"eval_steps_per_second": 1.963,
"step": 688
},
{
"epoch": 17.0,
"learning_rate": 3.960441545911204e-05,
"loss": 0.0316,
"step": 731
},
{
"epoch": 17.0,
"eval_accuracy": 0.5916666666666667,
"eval_f1": 0.566969594710963,
"eval_loss": 1.5830930471420288,
"eval_runtime": 4.0457,
"eval_samples_per_second": 29.661,
"eval_steps_per_second": 1.977,
"step": 731
},
{
"epoch": 18.0,
"learning_rate": 3.4549150281252636e-05,
"loss": 0.0273,
"step": 774
},
{
"epoch": 18.0,
"eval_accuracy": 0.625,
"eval_f1": 0.598442406793843,
"eval_loss": 1.5932706594467163,
"eval_runtime": 4.1291,
"eval_samples_per_second": 29.062,
"eval_steps_per_second": 1.937,
"step": 774
},
{
"epoch": 19.0,
"learning_rate": 2.9663167846209998e-05,
"loss": 0.0234,
"step": 817
},
{
"epoch": 19.0,
"eval_accuracy": 0.5833333333333334,
"eval_f1": 0.5652118457947398,
"eval_loss": 1.7830352783203125,
"eval_runtime": 4.2401,
"eval_samples_per_second": 28.301,
"eval_steps_per_second": 1.887,
"step": 817
},
{
"epoch": 20.0,
"learning_rate": 2.500000000000001e-05,
"loss": 0.0194,
"step": 860
},
{
"epoch": 20.0,
"eval_accuracy": 0.6083333333333333,
"eval_f1": 0.5878385871868214,
"eval_loss": 1.6804471015930176,
"eval_runtime": 3.9914,
"eval_samples_per_second": 30.065,
"eval_steps_per_second": 2.004,
"step": 860
},
{
"epoch": 21.0,
"learning_rate": 2.061073738537635e-05,
"loss": 0.0214,
"step": 903
},
{
"epoch": 21.0,
"eval_accuracy": 0.6,
"eval_f1": 0.5700534489379734,
"eval_loss": 1.596238374710083,
"eval_runtime": 4.0432,
"eval_samples_per_second": 29.68,
"eval_steps_per_second": 1.979,
"step": 903
},
{
"epoch": 22.0,
"learning_rate": 1.6543469682057106e-05,
"loss": 0.0204,
"step": 946
},
{
"epoch": 22.0,
"eval_accuracy": 0.625,
"eval_f1": 0.5992033196773662,
"eval_loss": 1.5684361457824707,
"eval_runtime": 4.0843,
"eval_samples_per_second": 29.381,
"eval_steps_per_second": 1.959,
"step": 946
},
{
"epoch": 23.0,
"learning_rate": 1.2842758726130283e-05,
"loss": 0.0178,
"step": 989
},
{
"epoch": 23.0,
"eval_accuracy": 0.625,
"eval_f1": 0.5992033196773662,
"eval_loss": 1.592431664466858,
"eval_runtime": 4.0813,
"eval_samples_per_second": 29.402,
"eval_steps_per_second": 1.96,
"step": 989
},
{
"epoch": 24.0,
"learning_rate": 9.549150281252633e-06,
"loss": 0.0173,
"step": 1032
},
{
"epoch": 24.0,
"eval_accuracy": 0.6166666666666667,
"eval_f1": 0.5932522595359776,
"eval_loss": 1.6227874755859375,
"eval_runtime": 4.2004,
"eval_samples_per_second": 28.569,
"eval_steps_per_second": 1.905,
"step": 1032
},
{
"epoch": 25.0,
"learning_rate": 6.698729810778065e-06,
"loss": 0.016,
"step": 1075
},
{
"epoch": 25.0,
"eval_accuracy": 0.6333333333333333,
"eval_f1": 0.6072678358063984,
"eval_loss": 1.6176973581314087,
"eval_runtime": 4.0151,
"eval_samples_per_second": 29.887,
"eval_steps_per_second": 1.992,
"step": 1075
},
{
"epoch": 26.0,
"learning_rate": 4.322727117869951e-06,
"loss": 0.016,
"step": 1118
},
{
"epoch": 26.0,
"eval_accuracy": 0.625,
"eval_f1": 0.6009141292059813,
"eval_loss": 1.6267857551574707,
"eval_runtime": 4.055,
"eval_samples_per_second": 29.593,
"eval_steps_per_second": 1.973,
"step": 1118
},
{
"epoch": 27.0,
"learning_rate": 2.4471741852423237e-06,
"loss": 0.016,
"step": 1161
},
{
"epoch": 27.0,
"eval_accuracy": 0.625,
"eval_f1": 0.6009141292059813,
"eval_loss": 1.6387226581573486,
"eval_runtime": 4.2902,
"eval_samples_per_second": 27.971,
"eval_steps_per_second": 1.865,
"step": 1161
},
{
"epoch": 28.0,
"learning_rate": 1.0926199633097157e-06,
"loss": 0.0159,
"step": 1204
},
{
"epoch": 28.0,
"eval_accuracy": 0.625,
"eval_f1": 0.6009141292059813,
"eval_loss": 1.6403223276138306,
"eval_runtime": 4.1003,
"eval_samples_per_second": 29.266,
"eval_steps_per_second": 1.951,
"step": 1204
},
{
"epoch": 29.0,
"learning_rate": 2.7390523158633554e-07,
"loss": 0.0162,
"step": 1247
},
{
"epoch": 29.0,
"eval_accuracy": 0.625,
"eval_f1": 0.6009141292059813,
"eval_loss": 1.6409173011779785,
"eval_runtime": 4.0023,
"eval_samples_per_second": 29.983,
"eval_steps_per_second": 1.999,
"step": 1247
},
{
"epoch": 30.0,
"learning_rate": 0.0,
"loss": 0.018,
"step": 1290
},
{
"epoch": 30.0,
"eval_accuracy": 0.625,
"eval_f1": 0.6009141292059813,
"eval_loss": 1.6411793231964111,
"eval_runtime": 4.269,
"eval_samples_per_second": 28.11,
"eval_steps_per_second": 1.874,
"step": 1290
},
{
"epoch": 30.0,
"step": 1290,
"total_flos": 1.5809215993675776e+18,
"train_loss": 0.25586533430934877,
"train_runtime": 2391.6751,
"train_samples_per_second": 8.53,
"train_steps_per_second": 0.539
}
],
"logging_steps": 500,
"max_steps": 1290,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 1.5809215993675776e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}