baby_chillanto / trainer_state.json
Rafeq's picture
Upload 2 files
279370a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9691629955947136,
"global_step": 220,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 9.559471365638767e-05,
"loss": 1.5082,
"step": 10
},
{
"epoch": 0.04,
"eval_accuracy": 0.3876652121543884,
"eval_loss": 1.4889150857925415,
"eval_runtime": 331.6444,
"eval_samples_per_second": 1.369,
"eval_steps_per_second": 0.344,
"step": 10
},
{
"epoch": 0.09,
"learning_rate": 9.118942731277533e-05,
"loss": 1.4193,
"step": 20
},
{
"epoch": 0.09,
"eval_accuracy": 0.47356829047203064,
"eval_loss": 1.3213800191879272,
"eval_runtime": 340.6201,
"eval_samples_per_second": 1.333,
"eval_steps_per_second": 0.335,
"step": 20
},
{
"epoch": 0.13,
"learning_rate": 8.6784140969163e-05,
"loss": 1.2624,
"step": 30
},
{
"epoch": 0.13,
"eval_accuracy": 0.5044052600860596,
"eval_loss": 1.2342218160629272,
"eval_runtime": 350.6765,
"eval_samples_per_second": 1.295,
"eval_steps_per_second": 0.325,
"step": 30
},
{
"epoch": 0.18,
"learning_rate": 8.237885462555066e-05,
"loss": 1.3081,
"step": 40
},
{
"epoch": 0.18,
"eval_accuracy": 0.592510998249054,
"eval_loss": 1.0834957361221313,
"eval_runtime": 348.0747,
"eval_samples_per_second": 1.304,
"eval_steps_per_second": 0.328,
"step": 40
},
{
"epoch": 0.22,
"learning_rate": 7.797356828193832e-05,
"loss": 1.1592,
"step": 50
},
{
"epoch": 0.22,
"eval_accuracy": 0.5110132098197937,
"eval_loss": 1.0756179094314575,
"eval_runtime": 352.6671,
"eval_samples_per_second": 1.287,
"eval_steps_per_second": 0.323,
"step": 50
},
{
"epoch": 0.26,
"learning_rate": 7.3568281938326e-05,
"loss": 0.8424,
"step": 60
},
{
"epoch": 0.26,
"eval_accuracy": 0.634361207485199,
"eval_loss": 0.965646505355835,
"eval_runtime": 341.7366,
"eval_samples_per_second": 1.329,
"eval_steps_per_second": 0.334,
"step": 60
},
{
"epoch": 0.31,
"learning_rate": 6.916299559471366e-05,
"loss": 0.7428,
"step": 70
},
{
"epoch": 0.31,
"eval_accuracy": 0.6189427375793457,
"eval_loss": 0.8488078713417053,
"eval_runtime": 340.5032,
"eval_samples_per_second": 1.333,
"eval_steps_per_second": 0.335,
"step": 70
},
{
"epoch": 0.35,
"learning_rate": 6.475770925110133e-05,
"loss": 0.857,
"step": 80
},
{
"epoch": 0.35,
"eval_accuracy": 0.6806167364120483,
"eval_loss": 0.7956423163414001,
"eval_runtime": 340.5035,
"eval_samples_per_second": 1.333,
"eval_steps_per_second": 0.335,
"step": 80
},
{
"epoch": 0.4,
"learning_rate": 6.035242290748899e-05,
"loss": 1.0037,
"step": 90
},
{
"epoch": 0.4,
"eval_accuracy": 0.7731277346611023,
"eval_loss": 0.6489915251731873,
"eval_runtime": 342.3782,
"eval_samples_per_second": 1.326,
"eval_steps_per_second": 0.333,
"step": 90
},
{
"epoch": 0.44,
"learning_rate": 5.5947136563876653e-05,
"loss": 0.8157,
"step": 100
},
{
"epoch": 0.44,
"eval_accuracy": 0.6762114763259888,
"eval_loss": 0.6552606225013733,
"eval_runtime": 334.1663,
"eval_samples_per_second": 1.359,
"eval_steps_per_second": 0.341,
"step": 100
},
{
"epoch": 0.48,
"learning_rate": 5.154185022026432e-05,
"loss": 0.5338,
"step": 110
},
{
"epoch": 0.48,
"eval_accuracy": 0.8039647340774536,
"eval_loss": 0.517014741897583,
"eval_runtime": 340.6693,
"eval_samples_per_second": 1.333,
"eval_steps_per_second": 0.335,
"step": 110
},
{
"epoch": 0.53,
"learning_rate": 4.7136563876651986e-05,
"loss": 0.509,
"step": 120
},
{
"epoch": 0.53,
"eval_accuracy": 0.8259912133216858,
"eval_loss": 0.48134031891822815,
"eval_runtime": 335.5911,
"eval_samples_per_second": 1.353,
"eval_steps_per_second": 0.34,
"step": 120
},
{
"epoch": 0.57,
"learning_rate": 4.273127753303965e-05,
"loss": 0.5301,
"step": 130
},
{
"epoch": 0.57,
"eval_accuracy": 0.8017621040344238,
"eval_loss": 0.4793573021888733,
"eval_runtime": 330.9864,
"eval_samples_per_second": 1.372,
"eval_steps_per_second": 0.344,
"step": 130
},
{
"epoch": 0.62,
"learning_rate": 3.832599118942731e-05,
"loss": 0.6642,
"step": 140
},
{
"epoch": 0.62,
"eval_accuracy": 0.8215858936309814,
"eval_loss": 0.42870593070983887,
"eval_runtime": 340.6662,
"eval_samples_per_second": 1.333,
"eval_steps_per_second": 0.335,
"step": 140
},
{
"epoch": 0.66,
"learning_rate": 3.392070484581498e-05,
"loss": 0.3661,
"step": 150
},
{
"epoch": 0.66,
"eval_accuracy": 0.8590308427810669,
"eval_loss": 0.38554972410202026,
"eval_runtime": 338.1541,
"eval_samples_per_second": 1.343,
"eval_steps_per_second": 0.337,
"step": 150
},
{
"epoch": 0.7,
"learning_rate": 2.9515418502202647e-05,
"loss": 0.2914,
"step": 160
},
{
"epoch": 0.7,
"eval_accuracy": 0.8237885236740112,
"eval_loss": 0.4844018816947937,
"eval_runtime": 337.4013,
"eval_samples_per_second": 1.346,
"eval_steps_per_second": 0.338,
"step": 160
},
{
"epoch": 0.75,
"learning_rate": 2.511013215859031e-05,
"loss": 0.3867,
"step": 170
},
{
"epoch": 0.75,
"eval_accuracy": 0.7775330543518066,
"eval_loss": 0.6406939029693604,
"eval_runtime": 341.5663,
"eval_samples_per_second": 1.329,
"eval_steps_per_second": 0.334,
"step": 170
},
{
"epoch": 0.79,
"learning_rate": 2.0704845814977973e-05,
"loss": 0.5115,
"step": 180
},
{
"epoch": 0.79,
"eval_accuracy": 0.8766520023345947,
"eval_loss": 0.33483564853668213,
"eval_runtime": 340.3465,
"eval_samples_per_second": 1.334,
"eval_steps_per_second": 0.335,
"step": 180
},
{
"epoch": 0.84,
"learning_rate": 1.629955947136564e-05,
"loss": 0.3104,
"step": 190
},
{
"epoch": 0.84,
"eval_accuracy": 0.8634361028671265,
"eval_loss": 0.34648624062538147,
"eval_runtime": 337.7392,
"eval_samples_per_second": 1.344,
"eval_steps_per_second": 0.338,
"step": 190
},
{
"epoch": 0.88,
"learning_rate": 1.1894273127753304e-05,
"loss": 0.3787,
"step": 200
},
{
"epoch": 0.88,
"eval_accuracy": 0.865638792514801,
"eval_loss": 0.34313932061195374,
"eval_runtime": 337.3724,
"eval_samples_per_second": 1.346,
"eval_steps_per_second": 0.338,
"step": 200
},
{
"epoch": 0.93,
"learning_rate": 7.488986784140969e-06,
"loss": 0.2639,
"step": 210
},
{
"epoch": 0.93,
"eval_accuracy": 0.8568282127380371,
"eval_loss": 0.3529247045516968,
"eval_runtime": 342.9636,
"eval_samples_per_second": 1.324,
"eval_steps_per_second": 0.332,
"step": 210
},
{
"epoch": 0.97,
"learning_rate": 3.0837004405286347e-06,
"loss": 0.2893,
"step": 220
},
{
"epoch": 0.97,
"eval_accuracy": 0.865638792514801,
"eval_loss": 0.34023645520210266,
"eval_runtime": 336.1969,
"eval_samples_per_second": 1.35,
"eval_steps_per_second": 0.339,
"step": 220
}
],
"max_steps": 227,
"num_train_epochs": 1,
"total_flos": 5.371779943296e+16,
"trial_name": null,
"trial_params": null
}