fatima_fellowship_roberta_small / trainer_state.json
mdrame's picture
Upload trainer_state.json
cdcd0a3
{
"best_metric": 0.001740535837598145,
"best_model_checkpoint": "./results/checkpoint-4400",
"epoch": 0.5499340079190497,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 4e-05,
"loss": 0.1162,
"step": 400
},
{
"epoch": 0.02,
"eval_accuracy": 0.5149715416975996,
"eval_loss": 0.03933835029602051,
"eval_runtime": 296.392,
"eval_samples_per_second": 13.634,
"eval_steps_per_second": 6.819,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 4.9722489454599276e-05,
"loss": 0.0276,
"step": 800
},
{
"epoch": 0.04,
"eval_accuracy": 0.9866369710467706,
"eval_loss": 0.01901180110871792,
"eval_runtime": 296.5449,
"eval_samples_per_second": 13.627,
"eval_steps_per_second": 6.815,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 4.935247539406498e-05,
"loss": 0.0101,
"step": 1200
},
{
"epoch": 0.07,
"eval_accuracy": 0.998020292006929,
"eval_loss": 0.005310355219990015,
"eval_runtime": 296.1395,
"eval_samples_per_second": 13.646,
"eval_steps_per_second": 6.824,
"step": 1200
},
{
"epoch": 0.09,
"learning_rate": 4.8982461333530676e-05,
"loss": 0.1565,
"step": 1600
},
{
"epoch": 0.09,
"eval_accuracy": 0.9965355110121257,
"eval_loss": 0.006798223592340946,
"eval_runtime": 295.8389,
"eval_samples_per_second": 13.659,
"eval_steps_per_second": 6.831,
"step": 1600
},
{
"epoch": 0.11,
"learning_rate": 4.861244727299638e-05,
"loss": 0.0087,
"step": 2000
},
{
"epoch": 0.11,
"eval_accuracy": 0.9965355110121257,
"eval_loss": 0.005225275177508593,
"eval_runtime": 295.8222,
"eval_samples_per_second": 13.66,
"eval_steps_per_second": 6.832,
"step": 2000
},
{
"epoch": 0.13,
"learning_rate": 4.8242433212462075e-05,
"loss": 0.0075,
"step": 2400
},
{
"epoch": 0.13,
"eval_accuracy": 0.9965355110121257,
"eval_loss": 0.0062185670249164104,
"eval_runtime": 295.9205,
"eval_samples_per_second": 13.656,
"eval_steps_per_second": 6.83,
"step": 2400
},
{
"epoch": 0.15,
"learning_rate": 4.787241915192778e-05,
"loss": 0.009,
"step": 2800
},
{
"epoch": 0.15,
"eval_accuracy": 0.9965355110121257,
"eval_loss": 0.006374023854732513,
"eval_runtime": 295.0119,
"eval_samples_per_second": 13.698,
"eval_steps_per_second": 6.851,
"step": 2800
},
{
"epoch": 0.18,
"learning_rate": 4.7502405091393474e-05,
"loss": 0.0097,
"step": 3200
},
{
"epoch": 0.18,
"eval_accuracy": 0.996040584013858,
"eval_loss": 0.004588003735989332,
"eval_runtime": 295.221,
"eval_samples_per_second": 13.688,
"eval_steps_per_second": 6.846,
"step": 3200
},
{
"epoch": 0.2,
"learning_rate": 4.713239103085918e-05,
"loss": 0.0112,
"step": 3600
},
{
"epoch": 0.2,
"eval_accuracy": 0.9928235585251175,
"eval_loss": 0.008035719394683838,
"eval_runtime": 295.0591,
"eval_samples_per_second": 13.696,
"eval_steps_per_second": 6.849,
"step": 3600
},
{
"epoch": 0.22,
"learning_rate": 4.676237697032487e-05,
"loss": 0.012,
"step": 4000
},
{
"epoch": 0.22,
"eval_accuracy": 0.46745854986389507,
"eval_loss": 0.004350500181317329,
"eval_runtime": 294.9657,
"eval_samples_per_second": 13.7,
"eval_steps_per_second": 6.852,
"step": 4000
},
{
"epoch": 0.24,
"learning_rate": 4.6392362909790576e-05,
"loss": 0.0112,
"step": 4400
},
{
"epoch": 0.24,
"eval_accuracy": 0.46745854986389507,
"eval_loss": 0.001740535837598145,
"eval_runtime": 295.095,
"eval_samples_per_second": 13.694,
"eval_steps_per_second": 6.849,
"step": 4400
},
{
"epoch": 0.26,
"learning_rate": 4.602234884925627e-05,
"loss": 0.0054,
"step": 4800
},
{
"epoch": 0.26,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.006510263774544001,
"eval_runtime": 295.0687,
"eval_samples_per_second": 13.695,
"eval_steps_per_second": 6.849,
"step": 4800
},
{
"epoch": 0.29,
"learning_rate": 4.5652334788721976e-05,
"loss": 0.0057,
"step": 5200
},
{
"epoch": 0.29,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.0030648894608020782,
"eval_runtime": 295.3544,
"eval_samples_per_second": 13.682,
"eval_steps_per_second": 6.843,
"step": 5200
},
{
"epoch": 0.31,
"learning_rate": 4.528232072818767e-05,
"loss": 0.01,
"step": 5600
},
{
"epoch": 0.31,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.004091416019946337,
"eval_runtime": 295.5003,
"eval_samples_per_second": 13.675,
"eval_steps_per_second": 6.839,
"step": 5600
},
{
"epoch": 0.33,
"learning_rate": 4.4912306667653375e-05,
"loss": 0.0058,
"step": 6000
},
{
"epoch": 0.33,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.005346548743546009,
"eval_runtime": 295.3803,
"eval_samples_per_second": 13.681,
"eval_steps_per_second": 6.842,
"step": 6000
},
{
"epoch": 0.35,
"learning_rate": 4.454229260711907e-05,
"loss": 0.0041,
"step": 6400
},
{
"epoch": 0.35,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.002612765645608306,
"eval_runtime": 295.3189,
"eval_samples_per_second": 13.684,
"eval_steps_per_second": 6.843,
"step": 6400
},
{
"epoch": 0.37,
"learning_rate": 4.4172278546584774e-05,
"loss": 0.007,
"step": 6800
},
{
"epoch": 0.37,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.0033588616643100977,
"eval_runtime": 295.3743,
"eval_samples_per_second": 13.681,
"eval_steps_per_second": 6.842,
"step": 6800
},
{
"epoch": 0.4,
"learning_rate": 4.380226448605047e-05,
"loss": 0.005,
"step": 7200
},
{
"epoch": 0.4,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.0025339156854897738,
"eval_runtime": 295.0446,
"eval_samples_per_second": 13.696,
"eval_steps_per_second": 6.85,
"step": 7200
},
{
"epoch": 0.42,
"learning_rate": 4.3432250425516174e-05,
"loss": 0.0072,
"step": 7600
},
{
"epoch": 0.42,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.0034889692906290293,
"eval_runtime": 295.1897,
"eval_samples_per_second": 13.69,
"eval_steps_per_second": 6.846,
"step": 7600
},
{
"epoch": 0.44,
"learning_rate": 4.306223636498187e-05,
"loss": 0.0095,
"step": 8000
},
{
"epoch": 0.44,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.00457270722836256,
"eval_runtime": 295.4281,
"eval_samples_per_second": 13.678,
"eval_steps_per_second": 6.841,
"step": 8000
},
{
"epoch": 0.46,
"learning_rate": 4.269222230444757e-05,
"loss": 0.0045,
"step": 8400
},
{
"epoch": 0.46,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.0037365807220339775,
"eval_runtime": 295.4056,
"eval_samples_per_second": 13.679,
"eval_steps_per_second": 6.841,
"step": 8400
},
{
"epoch": 0.48,
"learning_rate": 4.232220824391327e-05,
"loss": 0.0035,
"step": 8800
},
{
"epoch": 0.48,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.0037374396342784166,
"eval_runtime": 295.8559,
"eval_samples_per_second": 13.659,
"eval_steps_per_second": 6.831,
"step": 8800
},
{
"epoch": 0.51,
"learning_rate": 4.1952194183378965e-05,
"loss": 0.0058,
"step": 9200
},
{
"epoch": 0.51,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.0036408177111297846,
"eval_runtime": 296.2986,
"eval_samples_per_second": 13.638,
"eval_steps_per_second": 6.821,
"step": 9200
},
{
"epoch": 0.53,
"learning_rate": 4.158218012284467e-05,
"loss": 0.0043,
"step": 9600
},
{
"epoch": 0.53,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.0026330926921218634,
"eval_runtime": 295.7809,
"eval_samples_per_second": 13.662,
"eval_steps_per_second": 6.833,
"step": 9600
},
{
"epoch": 0.55,
"learning_rate": 4.121216606231037e-05,
"loss": 0.0026,
"step": 10000
},
{
"epoch": 0.55,
"eval_accuracy": 0.9982677555060628,
"eval_loss": 0.0019218171946704388,
"eval_runtime": 296.1402,
"eval_samples_per_second": 13.646,
"eval_steps_per_second": 6.824,
"step": 10000
}
],
"max_steps": 54552,
"num_train_epochs": 3,
"total_flos": 5262173859840000.0,
"trial_name": null,
"trial_params": null
}