x-x / trainer_state.json
khoon485's picture
1122
07a9b76
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 11.965708791795748,
"global_step": 21000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 0.5735206007957458,
"eval_runtime": 226.9724,
"eval_samples_per_second": 109.67,
"eval_steps_per_second": 27.417,
"step": 1755
},
{
"epoch": 2.0,
"eval_loss": 0.5326883792877197,
"eval_runtime": 225.773,
"eval_samples_per_second": 110.252,
"eval_steps_per_second": 27.563,
"step": 3510
},
{
"epoch": 3.0,
"eval_loss": 0.5004657506942749,
"eval_runtime": 223.9431,
"eval_samples_per_second": 111.153,
"eval_steps_per_second": 27.788,
"step": 5265
},
{
"epoch": 4.0,
"eval_loss": 0.48436784744262695,
"eval_runtime": 223.6051,
"eval_samples_per_second": 111.321,
"eval_steps_per_second": 27.83,
"step": 7020
},
{
"epoch": 5.0,
"eval_loss": 0.4659739136695862,
"eval_runtime": 221.9535,
"eval_samples_per_second": 112.15,
"eval_steps_per_second": 28.037,
"step": 8775
},
{
"epoch": 5.7,
"learning_rate": 5.251661918328585e-05,
"loss": 0.4274,
"step": 10000
},
{
"epoch": 6.0,
"eval_loss": 0.4595305621623993,
"eval_runtime": 232.1017,
"eval_samples_per_second": 107.246,
"eval_steps_per_second": 26.812,
"step": 10530
},
{
"epoch": 7.0,
"eval_loss": 0.44809117913246155,
"eval_runtime": 230.3824,
"eval_samples_per_second": 108.046,
"eval_steps_per_second": 27.012,
"step": 12285
},
{
"epoch": 8.0,
"eval_loss": 0.44372686743736267,
"eval_runtime": 220.8242,
"eval_samples_per_second": 112.723,
"eval_steps_per_second": 28.181,
"step": 14040
},
{
"epoch": 9.0,
"eval_loss": 0.4426543712615967,
"eval_runtime": 226.8705,
"eval_samples_per_second": 109.719,
"eval_steps_per_second": 27.43,
"step": 15795
},
{
"epoch": 10.0,
"eval_loss": 0.4342004656791687,
"eval_runtime": 221.9982,
"eval_samples_per_second": 112.127,
"eval_steps_per_second": 28.032,
"step": 17550
},
{
"epoch": 11.0,
"eval_loss": 0.43299490213394165,
"eval_runtime": 221.9453,
"eval_samples_per_second": 112.154,
"eval_steps_per_second": 28.038,
"step": 19305
},
{
"epoch": 11.4,
"learning_rate": 5.0332383665717e-06,
"loss": 0.2777,
"step": 20000
}
],
"max_steps": 21060,
"num_train_epochs": 12,
"total_flos": 4.092582190428979e+17,
"trial_name": null,
"trial_params": null
}