reranking_model / trainer_state.json
Cathy's picture
Add model weight and config
29fc92c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.996539792387544,
"global_step": 3600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.8460490703582764,
"eval_loss": 0.5365713834762573,
"eval_runtime": 8.0333,
"eval_samples_per_second": 91.369,
"eval_steps_per_second": 45.685,
"step": 180
},
{
"epoch": 2.0,
"eval_accuracy": 0.8092643022537231,
"eval_loss": 0.5189609527587891,
"eval_runtime": 8.0124,
"eval_samples_per_second": 91.608,
"eval_steps_per_second": 45.804,
"step": 360
},
{
"epoch": 2.78,
"learning_rate": 4.309722222222222e-05,
"loss": 0.4021,
"step": 500
},
{
"epoch": 3.0,
"eval_accuracy": 0.8283378481864929,
"eval_loss": 0.6708077788352966,
"eval_runtime": 8.0551,
"eval_samples_per_second": 91.122,
"eval_steps_per_second": 45.561,
"step": 540
},
{
"epoch": 4.0,
"eval_accuracy": 0.8542234301567078,
"eval_loss": 0.516476571559906,
"eval_runtime": 8.0611,
"eval_samples_per_second": 91.054,
"eval_steps_per_second": 45.527,
"step": 720
},
{
"epoch": 5.0,
"eval_accuracy": 0.8188011050224304,
"eval_loss": 0.6029361486434937,
"eval_runtime": 8.0681,
"eval_samples_per_second": 90.975,
"eval_steps_per_second": 45.488,
"step": 900
},
{
"epoch": 5.55,
"learning_rate": 3.6166666666666674e-05,
"loss": 0.2576,
"step": 1000
},
{
"epoch": 6.0,
"eval_accuracy": 0.8487738370895386,
"eval_loss": 0.6060934066772461,
"eval_runtime": 8.0522,
"eval_samples_per_second": 91.155,
"eval_steps_per_second": 45.578,
"step": 1080
},
{
"epoch": 7.0,
"eval_accuracy": 0.8514986634254456,
"eval_loss": 0.748849630355835,
"eval_runtime": 8.0692,
"eval_samples_per_second": 90.963,
"eval_steps_per_second": 45.481,
"step": 1260
},
{
"epoch": 8.0,
"eval_accuracy": 0.8651226162910461,
"eval_loss": 0.6119422912597656,
"eval_runtime": 8.0635,
"eval_samples_per_second": 91.028,
"eval_steps_per_second": 45.514,
"step": 1440
},
{
"epoch": 8.33,
"learning_rate": 2.9236111111111115e-05,
"loss": 0.1738,
"step": 1500
},
{
"epoch": 9.0,
"eval_accuracy": 0.8542234301567078,
"eval_loss": 0.6864181160926819,
"eval_runtime": 8.212,
"eval_samples_per_second": 89.382,
"eval_steps_per_second": 44.691,
"step": 1620
},
{
"epoch": 10.0,
"eval_accuracy": 0.8446866273880005,
"eval_loss": 0.7817405462265015,
"eval_runtime": 8.0215,
"eval_samples_per_second": 91.505,
"eval_steps_per_second": 45.752,
"step": 1800
},
{
"epoch": 11.0,
"eval_accuracy": 0.8514986634254456,
"eval_loss": 0.6188392043113708,
"eval_runtime": 8.0857,
"eval_samples_per_second": 90.777,
"eval_steps_per_second": 45.389,
"step": 1980
},
{
"epoch": 11.11,
"learning_rate": 2.2305555555555556e-05,
"loss": 0.1303,
"step": 2000
},
{
"epoch": 12.0,
"eval_accuracy": 0.8569482564926147,
"eval_loss": 0.5936163663864136,
"eval_runtime": 8.0618,
"eval_samples_per_second": 91.047,
"eval_steps_per_second": 45.523,
"step": 2160
},
{
"epoch": 13.0,
"eval_accuracy": 0.859673023223877,
"eval_loss": 0.6109394431114197,
"eval_runtime": 8.0512,
"eval_samples_per_second": 91.167,
"eval_steps_per_second": 45.583,
"step": 2340
},
{
"epoch": 13.89,
"learning_rate": 1.5375e-05,
"loss": 0.1226,
"step": 2500
},
{
"epoch": 14.0,
"eval_accuracy": 0.8501362204551697,
"eval_loss": 0.7600889205932617,
"eval_runtime": 8.0176,
"eval_samples_per_second": 91.549,
"eval_steps_per_second": 45.774,
"step": 2520
},
{
"epoch": 15.0,
"eval_accuracy": 0.8501362204551697,
"eval_loss": 0.6596993803977966,
"eval_runtime": 8.0565,
"eval_samples_per_second": 91.107,
"eval_steps_per_second": 45.553,
"step": 2700
},
{
"epoch": 16.0,
"eval_accuracy": 0.8460490703582764,
"eval_loss": 0.712175190448761,
"eval_runtime": 8.0456,
"eval_samples_per_second": 91.23,
"eval_steps_per_second": 45.615,
"step": 2880
},
{
"epoch": 16.66,
"learning_rate": 8.430555555555556e-06,
"loss": 0.1261,
"step": 3000
},
{
"epoch": 17.0,
"eval_accuracy": 0.8514986634254456,
"eval_loss": 0.7294248938560486,
"eval_runtime": 8.0369,
"eval_samples_per_second": 91.329,
"eval_steps_per_second": 45.664,
"step": 3060
},
{
"epoch": 18.0,
"eval_accuracy": 0.863760232925415,
"eval_loss": 0.6875426173210144,
"eval_runtime": 8.0764,
"eval_samples_per_second": 90.882,
"eval_steps_per_second": 45.441,
"step": 3240
},
{
"epoch": 19.0,
"eval_accuracy": 0.8664849996566772,
"eval_loss": 0.6823601126670837,
"eval_runtime": 8.0997,
"eval_samples_per_second": 90.62,
"eval_steps_per_second": 45.31,
"step": 3420
},
{
"epoch": 19.44,
"learning_rate": 1.4861111111111113e-06,
"loss": 0.1044,
"step": 3500
},
{
"epoch": 20.0,
"eval_accuracy": 0.8623978495597839,
"eval_loss": 0.6752045750617981,
"eval_runtime": 8.0488,
"eval_samples_per_second": 91.193,
"eval_steps_per_second": 45.597,
"step": 3600
},
{
"epoch": 20.0,
"step": 3600,
"total_flos": 1.3459697175036672e+16,
"train_loss": 0.18581116994222005,
"train_runtime": 2514.2374,
"train_samples_per_second": 22.981,
"train_steps_per_second": 1.432
}
],
"max_steps": 3600,
"num_train_epochs": 20,
"total_flos": 1.3459697175036672e+16,
"trial_name": null,
"trial_params": null
}