Far50BrBERT-base / trainer_state.json
giggio's picture
initial model
ae7c2ec
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 11.450381679389313,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.9964473845399976,
"eval_f1": 0.7887323943661972,
"eval_loss": 0.02695615217089653,
"eval_precision": 0.8484848484848485,
"eval_recall": 0.7368421052631579,
"eval_runtime": 1.6552,
"eval_samples_per_second": 59.207,
"eval_steps_per_second": 19.937,
"step": 131
},
{
"epoch": 2.0,
"eval_accuracy": 0.9969374004655152,
"eval_f1": 0.8533333333333334,
"eval_loss": 0.022969316691160202,
"eval_precision": 0.8648648648648649,
"eval_recall": 0.8421052631578947,
"eval_runtime": 1.6508,
"eval_samples_per_second": 59.365,
"eval_steps_per_second": 19.99,
"step": 262
},
{
"epoch": 3.0,
"eval_accuracy": 0.9968761484748254,
"eval_f1": 0.8211920529801323,
"eval_loss": 0.023406116291880608,
"eval_precision": 0.8266666666666667,
"eval_recall": 0.8157894736842105,
"eval_runtime": 1.6791,
"eval_samples_per_second": 58.364,
"eval_steps_per_second": 19.653,
"step": 393
},
{
"epoch": 3.82,
"learning_rate": 3.727735368956743e-05,
"loss": 0.0151,
"step": 500
},
{
"epoch": 4.0,
"eval_accuracy": 0.9977949283351709,
"eval_f1": 0.832214765100671,
"eval_loss": 0.01629047282040119,
"eval_precision": 0.8493150684931506,
"eval_recall": 0.8157894736842105,
"eval_runtime": 1.6475,
"eval_samples_per_second": 59.485,
"eval_steps_per_second": 20.031,
"step": 524
},
{
"epoch": 5.0,
"eval_accuracy": 0.9976724243537914,
"eval_f1": 0.8609271523178809,
"eval_loss": 0.016509264707565308,
"eval_precision": 0.8666666666666667,
"eval_recall": 0.8552631578947368,
"eval_runtime": 1.6672,
"eval_samples_per_second": 58.78,
"eval_steps_per_second": 19.793,
"step": 655
},
{
"epoch": 6.0,
"eval_accuracy": 0.9976111723631018,
"eval_f1": 0.8535031847133758,
"eval_loss": 0.01784520410001278,
"eval_precision": 0.8271604938271605,
"eval_recall": 0.881578947368421,
"eval_runtime": 1.6462,
"eval_samples_per_second": 59.532,
"eval_steps_per_second": 20.046,
"step": 786
},
{
"epoch": 7.0,
"eval_accuracy": 0.99797868430724,
"eval_f1": 0.9103448275862069,
"eval_loss": 0.021767688915133476,
"eval_precision": 0.9565217391304348,
"eval_recall": 0.868421052631579,
"eval_runtime": 1.6645,
"eval_samples_per_second": 58.876,
"eval_steps_per_second": 19.825,
"step": 917
},
{
"epoch": 7.63,
"learning_rate": 2.455470737913486e-05,
"loss": 0.0004,
"step": 1000
},
{
"epoch": 8.0,
"eval_accuracy": 0.9981624402793091,
"eval_f1": 0.9115646258503401,
"eval_loss": 0.01889556646347046,
"eval_precision": 0.9436619718309859,
"eval_recall": 0.881578947368421,
"eval_runtime": 1.6651,
"eval_samples_per_second": 58.855,
"eval_steps_per_second": 19.819,
"step": 1048
},
{
"epoch": 9.0,
"eval_accuracy": 0.9981011882886194,
"eval_f1": 0.8918918918918918,
"eval_loss": 0.020160716027021408,
"eval_precision": 0.9166666666666666,
"eval_recall": 0.868421052631579,
"eval_runtime": 1.6829,
"eval_samples_per_second": 58.232,
"eval_steps_per_second": 19.609,
"step": 1179
},
{
"epoch": 10.0,
"eval_accuracy": 0.99797868430724,
"eval_f1": 0.8918918918918918,
"eval_loss": 0.021420367062091827,
"eval_precision": 0.9166666666666666,
"eval_recall": 0.868421052631579,
"eval_runtime": 1.656,
"eval_samples_per_second": 59.18,
"eval_steps_per_second": 19.928,
"step": 1310
},
{
"epoch": 11.0,
"eval_accuracy": 0.9982236922699987,
"eval_f1": 0.9251700680272109,
"eval_loss": 0.01935696415603161,
"eval_precision": 0.9577464788732394,
"eval_recall": 0.8947368421052632,
"eval_runtime": 1.6602,
"eval_samples_per_second": 59.028,
"eval_steps_per_second": 19.877,
"step": 1441
},
{
"epoch": 11.45,
"learning_rate": 1.1832061068702292e-05,
"loss": 0.0001,
"step": 1500
}
],
"max_steps": 1965,
"num_train_epochs": 15,
"total_flos": 693341757784872.0,
"trial_name": null,
"trial_params": null
}