BertjeWDialDataALLQonly / trainer_state.json
Jeska's picture
End of training
7473103
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"global_step": 13065,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.57,
"learning_rate": 1.9234596249521625e-05,
"loss": 2.2122,
"step": 500
},
{
"epoch": 1.0,
"eval_loss": 2.0468506813049316,
"eval_runtime": 45.9615,
"eval_samples_per_second": 63.814,
"eval_steps_per_second": 7.985,
"step": 871
},
{
"epoch": 1.15,
"learning_rate": 1.8469192499043248e-05,
"loss": 2.1507,
"step": 1000
},
{
"epoch": 1.72,
"learning_rate": 1.770378874856487e-05,
"loss": 2.0961,
"step": 1500
},
{
"epoch": 2.0,
"eval_loss": 2.0117030143737793,
"eval_runtime": 45.9973,
"eval_samples_per_second": 63.765,
"eval_steps_per_second": 7.979,
"step": 1742
},
{
"epoch": 2.3,
"learning_rate": 1.693838499808649e-05,
"loss": 2.0572,
"step": 2000
},
{
"epoch": 2.87,
"learning_rate": 1.6172981247608114e-05,
"loss": 2.0628,
"step": 2500
},
{
"epoch": 3.0,
"eval_loss": 2.004037380218506,
"eval_runtime": 45.987,
"eval_samples_per_second": 63.779,
"eval_steps_per_second": 7.981,
"step": 2613
},
{
"epoch": 3.44,
"learning_rate": 1.5407577497129737e-05,
"loss": 2.0173,
"step": 3000
},
{
"epoch": 4.0,
"eval_loss": 1.990085244178772,
"eval_runtime": 45.9573,
"eval_samples_per_second": 63.82,
"eval_steps_per_second": 7.986,
"step": 3484
},
{
"epoch": 4.02,
"learning_rate": 1.464217374665136e-05,
"loss": 2.0011,
"step": 3500
},
{
"epoch": 4.59,
"learning_rate": 1.3876769996172983e-05,
"loss": 1.9772,
"step": 4000
},
{
"epoch": 5.0,
"eval_loss": 1.9711157083511353,
"eval_runtime": 45.9361,
"eval_samples_per_second": 63.85,
"eval_steps_per_second": 7.989,
"step": 4355
},
{
"epoch": 5.17,
"learning_rate": 1.3111366245694605e-05,
"loss": 1.9615,
"step": 4500
},
{
"epoch": 5.74,
"learning_rate": 1.2345962495216228e-05,
"loss": 1.9455,
"step": 5000
},
{
"epoch": 6.0,
"eval_loss": 1.9784963130950928,
"eval_runtime": 45.9339,
"eval_samples_per_second": 63.853,
"eval_steps_per_second": 7.99,
"step": 5226
},
{
"epoch": 6.31,
"learning_rate": 1.1580558744737851e-05,
"loss": 1.9343,
"step": 5500
},
{
"epoch": 6.89,
"learning_rate": 1.0815154994259474e-05,
"loss": 1.917,
"step": 6000
},
{
"epoch": 7.0,
"eval_loss": 1.9379661083221436,
"eval_runtime": 45.9382,
"eval_samples_per_second": 63.847,
"eval_steps_per_second": 7.989,
"step": 6097
},
{
"epoch": 7.46,
"learning_rate": 1.0049751243781096e-05,
"loss": 1.8933,
"step": 6500
},
{
"epoch": 8.0,
"eval_loss": 1.9650969505310059,
"eval_runtime": 45.9336,
"eval_samples_per_second": 63.853,
"eval_steps_per_second": 7.99,
"step": 6968
},
{
"epoch": 8.04,
"learning_rate": 9.284347493302717e-06,
"loss": 1.8992,
"step": 7000
},
{
"epoch": 8.61,
"learning_rate": 8.51894374282434e-06,
"loss": 1.8708,
"step": 7500
},
{
"epoch": 9.0,
"eval_loss": 1.9915274381637573,
"eval_runtime": 45.9645,
"eval_samples_per_second": 63.81,
"eval_steps_per_second": 7.984,
"step": 7839
},
{
"epoch": 9.18,
"learning_rate": 7.753539992345964e-06,
"loss": 1.8698,
"step": 8000
},
{
"epoch": 9.76,
"learning_rate": 6.988136241867586e-06,
"loss": 1.862,
"step": 8500
},
{
"epoch": 10.0,
"eval_loss": 1.9309507608413696,
"eval_runtime": 45.9471,
"eval_samples_per_second": 63.834,
"eval_steps_per_second": 7.987,
"step": 8710
},
{
"epoch": 10.33,
"learning_rate": 6.222732491389208e-06,
"loss": 1.862,
"step": 9000
},
{
"epoch": 10.91,
"learning_rate": 5.457328740910831e-06,
"loss": 1.8545,
"step": 9500
},
{
"epoch": 11.0,
"eval_loss": 1.9422342777252197,
"eval_runtime": 45.9437,
"eval_samples_per_second": 63.839,
"eval_steps_per_second": 7.988,
"step": 9581
},
{
"epoch": 11.48,
"learning_rate": 4.691924990432454e-06,
"loss": 1.8231,
"step": 10000
},
{
"epoch": 12.0,
"eval_loss": 1.931045651435852,
"eval_runtime": 45.9553,
"eval_samples_per_second": 63.823,
"eval_steps_per_second": 7.986,
"step": 10452
},
{
"epoch": 12.06,
"learning_rate": 3.926521239954076e-06,
"loss": 1.8175,
"step": 10500
},
{
"epoch": 12.63,
"learning_rate": 3.1611174894756987e-06,
"loss": 1.8141,
"step": 11000
},
{
"epoch": 13.0,
"eval_loss": 1.936198115348816,
"eval_runtime": 45.947,
"eval_samples_per_second": 63.834,
"eval_steps_per_second": 7.987,
"step": 11323
},
{
"epoch": 13.2,
"learning_rate": 2.395713738997321e-06,
"loss": 1.84,
"step": 11500
},
{
"epoch": 13.78,
"learning_rate": 1.630309988518944e-06,
"loss": 1.7939,
"step": 12000
},
{
"epoch": 14.0,
"eval_loss": 1.9334322214126587,
"eval_runtime": 45.9955,
"eval_samples_per_second": 63.767,
"eval_steps_per_second": 7.979,
"step": 12194
},
{
"epoch": 14.35,
"learning_rate": 8.649062380405665e-07,
"loss": 1.8165,
"step": 12500
},
{
"epoch": 14.93,
"learning_rate": 9.950248756218906e-08,
"loss": 1.8035,
"step": 13000
},
{
"epoch": 15.0,
"eval_loss": 1.9196784496307373,
"eval_runtime": 45.9399,
"eval_samples_per_second": 63.844,
"eval_steps_per_second": 7.989,
"step": 13065
},
{
"epoch": 15.0,
"step": 13065,
"total_flos": 2.2004859851993088e+17,
"train_loss": 1.9284902631757455,
"train_runtime": 49525.3053,
"train_samples_per_second": 16.881,
"train_steps_per_second": 0.264
}
],
"max_steps": 13065,
"num_train_epochs": 15,
"total_flos": 2.2004859851993088e+17,
"trial_name": null,
"trial_params": null
}