taskmaster2-1k-traces / trainer_state.json
penfever's picture
End of training
ad2e619 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 80,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.31746031746031744,
"grad_norm": 3.1160410477431384,
"learning_rate": 2e-05,
"loss": 0.743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18026351928710938,
"step": 5,
"valid_targets_mean": 4387.2,
"valid_targets_min": 740
},
{
"epoch": 0.6349206349206349,
"grad_norm": 0.9623025342413126,
"learning_rate": 3.998096443163716e-05,
"loss": 0.6426,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15755076706409454,
"step": 10,
"valid_targets_mean": 3791.7,
"valid_targets_min": 695
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.5400417708688194,
"learning_rate": 3.931851652578137e-05,
"loss": 0.5922,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1631372570991516,
"step": 15,
"valid_targets_mean": 4048.3,
"valid_targets_min": 1151
},
{
"epoch": 1.253968253968254,
"grad_norm": 0.4346375139896992,
"learning_rate": 3.774021666356444e-05,
"loss": 0.5513,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13641172647476196,
"step": 20,
"valid_targets_mean": 3890.5,
"valid_targets_min": 1004
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.3583194659422831,
"learning_rate": 3.532088886237956e-05,
"loss": 0.5268,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.157705619931221,
"step": 25,
"valid_targets_mean": 4477.9,
"valid_targets_min": 656
},
{
"epoch": 1.8888888888888888,
"grad_norm": 0.2998700734251974,
"learning_rate": 3.217522858017442e-05,
"loss": 0.5194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12757813930511475,
"step": 30,
"valid_targets_mean": 4071.6,
"valid_targets_min": 773
},
{
"epoch": 2.1904761904761907,
"grad_norm": 0.354322163099566,
"learning_rate": 2.8452365234813992e-05,
"loss": 0.4999,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1116921454668045,
"step": 35,
"valid_targets_mean": 3261.8,
"valid_targets_min": 695
},
{
"epoch": 2.507936507936508,
"grad_norm": 0.2736861113965854,
"learning_rate": 2.4328792278762058e-05,
"loss": 0.476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11490871757268906,
"step": 40,
"valid_targets_mean": 3865.1,
"valid_targets_min": 746
},
{
"epoch": 2.825396825396825,
"grad_norm": 0.26810971560154995,
"learning_rate": 2e-05,
"loss": 0.4743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09590233117341995,
"step": 45,
"valid_targets_mean": 3056.7,
"valid_targets_min": 726
},
{
"epoch": 3.126984126984127,
"grad_norm": 0.26045065338871903,
"learning_rate": 1.5671207721237945e-05,
"loss": 0.4803,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1086278185248375,
"step": 50,
"valid_targets_mean": 3327.7,
"valid_targets_min": 746
},
{
"epoch": 3.4444444444444446,
"grad_norm": 0.2646056254599043,
"learning_rate": 1.1547634765186016e-05,
"loss": 0.4741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10994982719421387,
"step": 55,
"valid_targets_mean": 3503.4,
"valid_targets_min": 569
},
{
"epoch": 3.761904761904762,
"grad_norm": 0.23864309413170337,
"learning_rate": 7.824771419825588e-06,
"loss": 0.4612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09653226286172867,
"step": 60,
"valid_targets_mean": 3644.4,
"valid_targets_min": 1042
},
{
"epoch": 4.063492063492063,
"grad_norm": 0.2652889879375569,
"learning_rate": 4.679111137620442e-06,
"loss": 0.4606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1475735604763031,
"step": 65,
"valid_targets_mean": 4244.4,
"valid_targets_min": 2068
},
{
"epoch": 4.380952380952381,
"grad_norm": 0.28736207202393793,
"learning_rate": 2.259783336435566e-06,
"loss": 0.467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10715197026729584,
"step": 70,
"valid_targets_mean": 3558.8,
"valid_targets_min": 733
},
{
"epoch": 4.698412698412699,
"grad_norm": 0.26387185108781563,
"learning_rate": 6.814834742186361e-07,
"loss": 0.459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10606744885444641,
"step": 75,
"valid_targets_mean": 3355.8,
"valid_targets_min": 761
},
{
"epoch": 5.0,
"grad_norm": 0.2624385775505968,
"learning_rate": 1.9035568362844037e-08,
"loss": 0.4442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11343716084957123,
"step": 80,
"valid_targets_mean": 3113.4,
"valid_targets_min": 587
},
{
"epoch": 5.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11343716084957123,
"step": 80,
"total_flos": 1.3290499486829773e+17,
"train_loss": 0.5170090794563293,
"train_runtime": 1718.7633,
"train_samples_per_second": 2.909,
"train_steps_per_second": 0.047,
"valid_targets_mean": 3113.4,
"valid_targets_min": 587
}
],
"logging_steps": 5,
"max_steps": 80,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.3290499486829773e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}