llama2-testv10 / checkpoint-140 /trainer_state.json
Flyfer's picture
Upload folder using huggingface_hub
0abee2b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.38888888888889,
"eval_steps": 500,
"global_step": 140,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17,
"learning_rate": 2.4999999999999998e-05,
"loss": 1.7264,
"step": 3
},
{
"epoch": 0.33,
"learning_rate": 4.9999999999999996e-05,
"loss": 1.9502,
"step": 6
},
{
"epoch": 1.11,
"learning_rate": 7.5e-05,
"loss": 1.8229,
"step": 9
},
{
"epoch": 1.28,
"learning_rate": 9.999999999999999e-05,
"loss": 1.4868,
"step": 12
},
{
"epoch": 2.06,
"learning_rate": 0.000125,
"loss": 1.6675,
"step": 15
},
{
"epoch": 2.22,
"learning_rate": 0.00015,
"loss": 1.2277,
"step": 18
},
{
"epoch": 2.39,
"learning_rate": 0.000175,
"loss": 1.4422,
"step": 21
},
{
"epoch": 3.17,
"learning_rate": 0.00019999999999999998,
"loss": 1.3051,
"step": 24
},
{
"epoch": 3.33,
"learning_rate": 0.000225,
"loss": 1.0676,
"step": 27
},
{
"epoch": 4.11,
"learning_rate": 0.00025,
"loss": 1.1766,
"step": 30
},
{
"epoch": 4.28,
"learning_rate": 0.00027499999999999996,
"loss": 0.9316,
"step": 33
},
{
"epoch": 5.06,
"learning_rate": 0.0003,
"loss": 0.6064,
"step": 36
},
{
"epoch": 5.22,
"learning_rate": 0.00029722222222222216,
"loss": 0.5883,
"step": 39
},
{
"epoch": 5.39,
"learning_rate": 0.00029444444444444445,
"loss": 0.6243,
"step": 42
},
{
"epoch": 6.17,
"learning_rate": 0.00029166666666666664,
"loss": 0.5,
"step": 45
},
{
"epoch": 6.33,
"learning_rate": 0.0002888888888888888,
"loss": 0.4061,
"step": 48
},
{
"epoch": 7.11,
"learning_rate": 0.0002861111111111111,
"loss": 0.3036,
"step": 51
},
{
"epoch": 7.28,
"learning_rate": 0.0002833333333333333,
"loss": 0.3437,
"step": 54
},
{
"epoch": 8.06,
"learning_rate": 0.00028055555555555554,
"loss": 0.2798,
"step": 57
},
{
"epoch": 8.22,
"learning_rate": 0.0002777777777777778,
"loss": 0.1906,
"step": 60
},
{
"epoch": 8.39,
"learning_rate": 0.00027499999999999996,
"loss": 0.2051,
"step": 63
},
{
"epoch": 9.17,
"learning_rate": 0.0002722222222222222,
"loss": 0.1486,
"step": 66
},
{
"epoch": 9.33,
"learning_rate": 0.00026944444444444444,
"loss": 0.1745,
"step": 69
},
{
"epoch": 10.11,
"learning_rate": 0.0002666666666666666,
"loss": 0.1235,
"step": 72
},
{
"epoch": 10.28,
"learning_rate": 0.00026388888888888886,
"loss": 0.1066,
"step": 75
},
{
"epoch": 11.06,
"learning_rate": 0.0002611111111111111,
"loss": 0.0659,
"step": 78
},
{
"epoch": 11.22,
"learning_rate": 0.00025833333333333334,
"loss": 0.0865,
"step": 81
},
{
"epoch": 11.39,
"learning_rate": 0.00025555555555555553,
"loss": 0.0582,
"step": 84
},
{
"epoch": 12.17,
"learning_rate": 0.00025277777777777777,
"loss": 0.0557,
"step": 87
},
{
"epoch": 12.33,
"learning_rate": 0.00025,
"loss": 0.0528,
"step": 90
},
{
"epoch": 13.11,
"learning_rate": 0.0002472222222222222,
"loss": 0.0384,
"step": 93
},
{
"epoch": 13.28,
"learning_rate": 0.00024444444444444443,
"loss": 0.0341,
"step": 96
},
{
"epoch": 14.06,
"learning_rate": 0.00024166666666666664,
"loss": 0.0294,
"step": 99
},
{
"epoch": 14.22,
"learning_rate": 0.00023888888888888885,
"loss": 0.0193,
"step": 102
},
{
"epoch": 14.39,
"learning_rate": 0.00023611111111111112,
"loss": 0.0254,
"step": 105
},
{
"epoch": 15.17,
"learning_rate": 0.0002333333333333333,
"loss": 0.0192,
"step": 108
},
{
"epoch": 15.33,
"learning_rate": 0.00023055555555555552,
"loss": 0.0149,
"step": 111
},
{
"epoch": 16.11,
"learning_rate": 0.00022777777777777778,
"loss": 0.0194,
"step": 114
},
{
"epoch": 16.28,
"learning_rate": 0.000225,
"loss": 0.0177,
"step": 117
},
{
"epoch": 17.06,
"learning_rate": 0.00022222222222222218,
"loss": 0.0144,
"step": 120
},
{
"epoch": 17.22,
"learning_rate": 0.00021944444444444444,
"loss": 0.013,
"step": 123
},
{
"epoch": 17.39,
"learning_rate": 0.00021666666666666666,
"loss": 0.0165,
"step": 126
},
{
"epoch": 18.17,
"learning_rate": 0.00021388888888888884,
"loss": 0.012,
"step": 129
},
{
"epoch": 18.33,
"learning_rate": 0.0002111111111111111,
"loss": 0.0137,
"step": 132
},
{
"epoch": 19.11,
"learning_rate": 0.00020833333333333332,
"loss": 0.0083,
"step": 135
},
{
"epoch": 19.28,
"learning_rate": 0.00020555555555555556,
"loss": 0.0144,
"step": 138
}
],
"logging_steps": 3,
"max_steps": 360,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 1.13811835060224e+16,
"trial_name": null,
"trial_params": null
}