EAMT2023-Baseline-RO-EN / trainer_state.json
Javad Pourmostafa Roshan Sharami
Baseline-RO-EN
67c7d07
{
"best_metric": 0.016893472522497177,
"best_model_checkpoint": "RO-EN_1/checkpoint-6000",
"epoch": 27.397260273972602,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.28,
"learning_rate": 4.943150684931507e-06,
"loss": 0.0535,
"step": 500
},
{
"epoch": 2.28,
"eval_loss": 0.031102217733860016,
"eval_mse": 0.031102217733860016,
"eval_pearson_correlation": 0.8579101465486042,
"eval_rmse": 0.1763582080602646,
"eval_runtime": 2.5761,
"eval_samples_per_second": 388.185,
"eval_spearman_corr": 0.7634071672215601,
"eval_steps_per_second": 12.422,
"learning_rate": 4.943150684931507e-06,
"step": 500
},
{
"epoch": 4.57,
"learning_rate": 4.886187214611872e-06,
"loss": 0.0397,
"step": 1000
},
{
"epoch": 4.57,
"eval_loss": 0.05550598353147507,
"eval_mse": 0.05550597980618477,
"eval_pearson_correlation": 0.8498936520403519,
"eval_rmse": 0.23559707403182983,
"eval_runtime": 2.579,
"eval_samples_per_second": 387.749,
"eval_spearman_corr": 0.758833553959186,
"eval_steps_per_second": 12.408,
"learning_rate": 4.886187214611872e-06,
"step": 1000
},
{
"epoch": 6.85,
"learning_rate": 4.829109589041096e-06,
"loss": 0.0337,
"step": 1500
},
{
"epoch": 6.85,
"eval_loss": 0.021814454346895218,
"eval_mse": 0.02181445248425007,
"eval_pearson_correlation": 0.8574044008888592,
"eval_rmse": 0.14769716560840607,
"eval_runtime": 2.6081,
"eval_samples_per_second": 383.423,
"eval_spearman_corr": 0.7602827223453543,
"eval_steps_per_second": 12.27,
"learning_rate": 4.829109589041096e-06,
"step": 1500
},
{
"epoch": 9.13,
"learning_rate": 4.77203196347032e-06,
"loss": 0.0287,
"step": 2000
},
{
"epoch": 9.13,
"eval_loss": 0.019617466256022453,
"eval_mse": 0.019617464393377304,
"eval_pearson_correlation": 0.8628991091021981,
"eval_rmse": 0.1400623619556427,
"eval_runtime": 2.6039,
"eval_samples_per_second": 384.039,
"eval_spearman_corr": 0.7663673153661713,
"eval_steps_per_second": 12.289,
"learning_rate": 4.77203196347032e-06,
"step": 2000
},
{
"epoch": 11.42,
"learning_rate": 4.715068493150685e-06,
"loss": 0.0245,
"step": 2500
},
{
"epoch": 11.42,
"eval_loss": 0.02356332167983055,
"eval_mse": 0.02356332167983055,
"eval_pearson_correlation": 0.8441016733928287,
"eval_rmse": 0.15350349247455597,
"eval_runtime": 2.598,
"eval_samples_per_second": 384.906,
"eval_spearman_corr": 0.7481853356383714,
"eval_steps_per_second": 12.317,
"learning_rate": 4.715068493150685e-06,
"step": 2500
},
{
"epoch": 13.7,
"learning_rate": 4.657990867579909e-06,
"loss": 0.0222,
"step": 3000
},
{
"epoch": 13.7,
"eval_loss": 0.027557892724871635,
"eval_mse": 0.027557892724871635,
"eval_pearson_correlation": 0.8438974893300436,
"eval_rmse": 0.1660057008266449,
"eval_runtime": 2.5972,
"eval_samples_per_second": 385.026,
"eval_spearman_corr": 0.7389656607088352,
"eval_steps_per_second": 12.321,
"learning_rate": 4.657990867579909e-06,
"step": 3000
},
{
"epoch": 15.98,
"learning_rate": 4.6009132420091325e-06,
"loss": 0.0192,
"step": 3500
},
{
"epoch": 15.98,
"eval_loss": 0.01782875880599022,
"eval_mse": 0.01782875880599022,
"eval_pearson_correlation": 0.849721481092871,
"eval_rmse": 0.13352437317371368,
"eval_runtime": 2.5686,
"eval_samples_per_second": 389.319,
"eval_spearman_corr": 0.7444835858187753,
"eval_steps_per_second": 12.458,
"learning_rate": 4.6009132420091325e-06,
"step": 3500
},
{
"epoch": 18.26,
"learning_rate": 4.5438356164383565e-06,
"loss": 0.017,
"step": 4000
},
{
"epoch": 18.26,
"eval_loss": 0.036885496228933334,
"eval_mse": 0.036885492503643036,
"eval_pearson_correlation": 0.8272909677260112,
"eval_rmse": 0.19205595552921295,
"eval_runtime": 2.5678,
"eval_samples_per_second": 389.436,
"eval_spearman_corr": 0.7383094416231242,
"eval_steps_per_second": 12.462,
"learning_rate": 4.5438356164383565e-06,
"step": 4000
},
{
"epoch": 20.55,
"learning_rate": 4.4867579908675805e-06,
"loss": 0.0156,
"step": 4500
},
{
"epoch": 20.55,
"eval_loss": 0.02785499580204487,
"eval_mse": 0.02785499580204487,
"eval_pearson_correlation": 0.842883000565405,
"eval_rmse": 0.16689816117286682,
"eval_runtime": 2.5716,
"eval_samples_per_second": 388.862,
"eval_spearman_corr": 0.7491195713472709,
"eval_steps_per_second": 12.444,
"learning_rate": 4.4867579908675805e-06,
"step": 4500
},
{
"epoch": 22.83,
"learning_rate": 4.429680365296804e-06,
"loss": 0.0137,
"step": 5000
},
{
"epoch": 22.83,
"eval_loss": 0.030997304245829582,
"eval_mse": 0.03099730797111988,
"eval_pearson_correlation": 0.8406359358077359,
"eval_rmse": 0.1760605275630951,
"eval_runtime": 2.6837,
"eval_samples_per_second": 372.623,
"eval_spearman_corr": 0.7367537850258313,
"eval_steps_per_second": 11.924,
"learning_rate": 4.429680365296804e-06,
"step": 5000
},
{
"epoch": 25.11,
"learning_rate": 4.372602739726028e-06,
"loss": 0.0121,
"step": 5500
},
{
"epoch": 25.11,
"eval_loss": 0.01819545030593872,
"eval_mse": 0.01819545030593872,
"eval_pearson_correlation": 0.8390532054666895,
"eval_rmse": 0.13489051163196564,
"eval_runtime": 2.5637,
"eval_samples_per_second": 390.062,
"eval_spearman_corr": 0.7348519956483917,
"eval_steps_per_second": 12.482,
"learning_rate": 4.372602739726028e-06,
"step": 5500
},
{
"epoch": 27.4,
"learning_rate": 4.315525114155252e-06,
"loss": 0.0111,
"step": 6000
},
{
"epoch": 27.4,
"eval_loss": 0.016893472522497177,
"eval_mse": 0.016893472522497177,
"eval_pearson_correlation": 0.8536975289078368,
"eval_rmse": 0.12997488677501678,
"eval_runtime": 2.5817,
"eval_samples_per_second": 387.337,
"eval_spearman_corr": 0.7512867640197094,
"eval_steps_per_second": 12.395,
"learning_rate": 4.315525114155252e-06,
"step": 6000
}
],
"max_steps": 43800,
"num_train_epochs": 200,
"total_flos": 5.23619884848744e+16,
"trial_name": null,
"trial_params": null
}