BhBHT5New / checkpoint-3170 /trainer_state.json
Sabbir772's picture
Upload checkpoint folder via API
e605e4d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 3170,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15772870662460567,
"grad_norm": 8.993573188781738,
"learning_rate": 3.390694006309148e-05,
"loss": 2.6552,
"step": 100
},
{
"epoch": 0.31545741324921134,
"grad_norm": 6.61809778213501,
"learning_rate": 3.280283911671924e-05,
"loss": 2.2263,
"step": 200
},
{
"epoch": 0.47318611987381703,
"grad_norm": 10.127768516540527,
"learning_rate": 3.1698738170347005e-05,
"loss": 1.9796,
"step": 300
},
{
"epoch": 0.6309148264984227,
"grad_norm": 4.960940361022949,
"learning_rate": 3.0594637223974764e-05,
"loss": 2.0323,
"step": 400
},
{
"epoch": 0.7886435331230284,
"grad_norm": 5.231279373168945,
"learning_rate": 2.9490536277602523e-05,
"loss": 2.0473,
"step": 500
},
{
"epoch": 0.7886435331230284,
"eval_bleu": 57.32056474546698,
"eval_chrf": 81.52338542769736,
"eval_loss": 0.7680727243423462,
"eval_runtime": 43.2314,
"eval_samples_per_second": 5.181,
"eval_steps_per_second": 0.648,
"step": 500
},
{
"epoch": 0.9463722397476341,
"grad_norm": 4.041931629180908,
"learning_rate": 2.838643533123028e-05,
"loss": 1.9593,
"step": 600
},
{
"epoch": 1.1041009463722398,
"grad_norm": 5.660284519195557,
"learning_rate": 2.728233438485804e-05,
"loss": 1.9291,
"step": 700
},
{
"epoch": 1.2618296529968454,
"grad_norm": 4.831106662750244,
"learning_rate": 2.6178233438485802e-05,
"loss": 1.9062,
"step": 800
},
{
"epoch": 1.4195583596214512,
"grad_norm": 5.246473789215088,
"learning_rate": 2.5074132492113564e-05,
"loss": 1.8709,
"step": 900
},
{
"epoch": 1.5772870662460567,
"grad_norm": 6.091129779815674,
"learning_rate": 2.3970031545741323e-05,
"loss": 1.8274,
"step": 1000
},
{
"epoch": 1.5772870662460567,
"eval_bleu": 56.853343569870646,
"eval_chrf": 81.1312982973412,
"eval_loss": 0.8501662015914917,
"eval_runtime": 25.1617,
"eval_samples_per_second": 8.902,
"eval_steps_per_second": 1.113,
"step": 1000
},
{
"epoch": 1.7350157728706623,
"grad_norm": 4.976201057434082,
"learning_rate": 2.286593059936908e-05,
"loss": 1.8565,
"step": 1100
},
{
"epoch": 1.8927444794952681,
"grad_norm": 6.352173805236816,
"learning_rate": 2.1761829652996844e-05,
"loss": 1.7704,
"step": 1200
},
{
"epoch": 2.050473186119874,
"grad_norm": 5.1022233963012695,
"learning_rate": 2.0657728706624606e-05,
"loss": 1.7229,
"step": 1300
},
{
"epoch": 2.2082018927444795,
"grad_norm": 3.9384002685546875,
"learning_rate": 1.9553627760252364e-05,
"loss": 1.6988,
"step": 1400
},
{
"epoch": 2.365930599369085,
"grad_norm": 9.005499839782715,
"learning_rate": 1.8449526813880123e-05,
"loss": 1.7559,
"step": 1500
},
{
"epoch": 2.365930599369085,
"eval_bleu": 55.558969113821774,
"eval_chrf": 79.91366807649794,
"eval_loss": 0.8783891797065735,
"eval_runtime": 25.3515,
"eval_samples_per_second": 8.836,
"eval_steps_per_second": 1.104,
"step": 1500
},
{
"epoch": 2.5236593059936907,
"grad_norm": 5.816145420074463,
"learning_rate": 1.7345425867507885e-05,
"loss": 1.7198,
"step": 1600
},
{
"epoch": 2.6813880126182967,
"grad_norm": 4.037476062774658,
"learning_rate": 1.6241324921135647e-05,
"loss": 1.7311,
"step": 1700
},
{
"epoch": 2.8391167192429023,
"grad_norm": 4.826643943786621,
"learning_rate": 1.5137223974763406e-05,
"loss": 1.7583,
"step": 1800
},
{
"epoch": 2.996845425867508,
"grad_norm": 5.530210018157959,
"learning_rate": 1.4033123028391166e-05,
"loss": 1.6305,
"step": 1900
},
{
"epoch": 3.1545741324921135,
"grad_norm": 5.22488260269165,
"learning_rate": 1.2929022082018927e-05,
"loss": 1.7053,
"step": 2000
},
{
"epoch": 3.1545741324921135,
"eval_bleu": 55.323355114843025,
"eval_chrf": 79.52359278149143,
"eval_loss": 0.9229835271835327,
"eval_runtime": 24.6216,
"eval_samples_per_second": 9.098,
"eval_steps_per_second": 1.137,
"step": 2000
},
{
"epoch": 3.312302839116719,
"grad_norm": 6.181191444396973,
"learning_rate": 1.1824921135646685e-05,
"loss": 1.6653,
"step": 2100
},
{
"epoch": 3.470031545741325,
"grad_norm": 3.005113363265991,
"learning_rate": 1.0720820189274447e-05,
"loss": 1.6738,
"step": 2200
},
{
"epoch": 3.6277602523659307,
"grad_norm": 4.87603759765625,
"learning_rate": 9.616719242902206e-06,
"loss": 1.6694,
"step": 2300
},
{
"epoch": 3.7854889589905363,
"grad_norm": 3.4485676288604736,
"learning_rate": 8.512618296529968e-06,
"loss": 1.6154,
"step": 2400
},
{
"epoch": 3.943217665615142,
"grad_norm": 4.3030219078063965,
"learning_rate": 7.4085173501577285e-06,
"loss": 1.5943,
"step": 2500
},
{
"epoch": 3.943217665615142,
"eval_bleu": 55.5956316646253,
"eval_chrf": 79.72369308541496,
"eval_loss": 0.9278033375740051,
"eval_runtime": 25.7316,
"eval_samples_per_second": 8.705,
"eval_steps_per_second": 1.088,
"step": 2500
},
{
"epoch": 4.100946372239748,
"grad_norm": 4.7534284591674805,
"learning_rate": 6.304416403785489e-06,
"loss": 1.6937,
"step": 2600
},
{
"epoch": 4.2586750788643535,
"grad_norm": 5.381317138671875,
"learning_rate": 5.200315457413248e-06,
"loss": 1.5835,
"step": 2700
},
{
"epoch": 4.416403785488959,
"grad_norm": 7.387664794921875,
"learning_rate": 4.09621451104101e-06,
"loss": 1.6145,
"step": 2800
},
{
"epoch": 4.574132492113565,
"grad_norm": 4.751382827758789,
"learning_rate": 2.9921135646687696e-06,
"loss": 1.6218,
"step": 2900
},
{
"epoch": 4.73186119873817,
"grad_norm": 4.115559101104736,
"learning_rate": 1.8880126182965297e-06,
"loss": 1.6476,
"step": 3000
},
{
"epoch": 4.73186119873817,
"eval_bleu": 55.67318568538766,
"eval_chrf": 79.82548413247255,
"eval_loss": 0.9428688287734985,
"eval_runtime": 24.3618,
"eval_samples_per_second": 9.195,
"eval_steps_per_second": 1.149,
"step": 3000
},
{
"epoch": 4.889589905362776,
"grad_norm": 8.612651824951172,
"learning_rate": 7.839116719242902e-07,
"loss": 1.6411,
"step": 3100
}
],
"logging_steps": 100,
"max_steps": 3170,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8671674976174080.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}