fine-tune-3e-4 / trainer_state.json
neverloses87's picture
End of training
29077fe
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.2340425531914896,
"global_step": 188,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 0.00015789473684210524,
"loss": 0.7328,
"step": 10
},
{
"epoch": 0.11,
"learning_rate": 0.00029822485207100586,
"loss": 0.6679,
"step": 20
},
{
"epoch": 0.16,
"learning_rate": 0.0002804733727810651,
"loss": 0.5821,
"step": 30
},
{
"epoch": 0.21,
"learning_rate": 0.00026272189349112423,
"loss": 0.5167,
"step": 40
},
{
"epoch": 0.25,
"eval_accuracy": 0.8396584440227703,
"eval_loss": 0.4641830027103424,
"eval_runtime": 815.2421,
"eval_samples_per_second": 1.293,
"eval_steps_per_second": 0.021,
"step": 47
},
{
"epoch": 1.01,
"learning_rate": 0.0002449704142011834,
"loss": 0.4418,
"step": 50
},
{
"epoch": 1.06,
"learning_rate": 0.00022721893491124259,
"loss": 0.4147,
"step": 60
},
{
"epoch": 1.12,
"learning_rate": 0.00020946745562130177,
"loss": 0.3811,
"step": 70
},
{
"epoch": 1.17,
"learning_rate": 0.00019171597633136094,
"loss": 0.3601,
"step": 80
},
{
"epoch": 1.22,
"learning_rate": 0.0001739644970414201,
"loss": 0.3341,
"step": 90
},
{
"epoch": 1.24,
"eval_accuracy": 0.8823529411764706,
"eval_loss": 0.363214373588562,
"eval_runtime": 789.1022,
"eval_samples_per_second": 1.336,
"eval_steps_per_second": 0.022,
"step": 94
},
{
"epoch": 2.02,
"learning_rate": 0.00015621301775147929,
"loss": 0.3092,
"step": 100
},
{
"epoch": 2.07,
"learning_rate": 0.00013846153846153845,
"loss": 0.3099,
"step": 110
},
{
"epoch": 2.13,
"learning_rate": 0.00012071005917159762,
"loss": 0.3077,
"step": 120
},
{
"epoch": 2.18,
"learning_rate": 0.00010295857988165678,
"loss": 0.3326,
"step": 130
},
{
"epoch": 2.23,
"learning_rate": 8.520710059171597e-05,
"loss": 0.2945,
"step": 140
},
{
"epoch": 2.24,
"eval_accuracy": 0.8946869070208728,
"eval_loss": 0.3224978744983673,
"eval_runtime": 802.0516,
"eval_samples_per_second": 1.314,
"eval_steps_per_second": 0.021,
"step": 141
},
{
"epoch": 3.03,
"learning_rate": 6.745562130177515e-05,
"loss": 0.2907,
"step": 150
},
{
"epoch": 3.09,
"learning_rate": 4.9704142011834316e-05,
"loss": 0.2774,
"step": 160
},
{
"epoch": 3.14,
"learning_rate": 3.195266272189349e-05,
"loss": 0.289,
"step": 170
},
{
"epoch": 3.19,
"learning_rate": 1.4201183431952662e-05,
"loss": 0.2909,
"step": 180
},
{
"epoch": 3.23,
"eval_accuracy": 0.8975332068311196,
"eval_loss": 0.31426551938056946,
"eval_runtime": 814.9805,
"eval_samples_per_second": 1.293,
"eval_steps_per_second": 0.021,
"step": 188
},
{
"epoch": 3.23,
"step": 188,
"total_flos": 1.4951529009071456e+19,
"train_loss": 0.3928315411222742,
"train_runtime": 11312.6208,
"train_samples_per_second": 1.064,
"train_steps_per_second": 0.017
}
],
"max_steps": 188,
"num_train_epochs": 9223372036854775807,
"total_flos": 1.4951529009071456e+19,
"trial_name": null,
"trial_params": null
}