bangla-med-sum / trainer_state.json
sazzadul's picture
Upload folder using huggingface_hub
af091de verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.0,
"eval_steps": 500,
"global_step": 5022,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8960573476702509,
"grad_norm": 14.137969970703125,
"learning_rate": 1.3440860215053763e-05,
"loss": 2.3274,
"step": 500
},
{
"epoch": 1.0,
"eval_loss": 1.3252462148666382,
"eval_rouge1": 0.5630271149940076,
"eval_rouge2": 0.37605870482604453,
"eval_rougeL": 0.4984598485550909,
"eval_rougeLsum": 0.571321002640571,
"eval_runtime": 49.4969,
"eval_samples_per_second": 2.384,
"eval_steps_per_second": 0.606,
"step": 558
},
{
"epoch": 1.7921146953405018,
"grad_norm": 7.504210472106934,
"learning_rate": 1.3679808841099164e-05,
"loss": 1.17,
"step": 1000
},
{
"epoch": 2.0,
"eval_loss": 1.0907840728759766,
"eval_rouge1": 0.5562967978764126,
"eval_rouge2": 0.3744307754882274,
"eval_rougeL": 0.5008576418920765,
"eval_rougeLsum": 0.5639195444626521,
"eval_runtime": 49.3033,
"eval_samples_per_second": 2.393,
"eval_steps_per_second": 0.608,
"step": 1116
},
{
"epoch": 2.688172043010753,
"grad_norm": 4.531522750854492,
"learning_rate": 1.2186379928315411e-05,
"loss": 0.8956,
"step": 1500
},
{
"epoch": 3.0,
"eval_loss": 1.0720860958099365,
"eval_rouge1": 0.5636947893309734,
"eval_rouge2": 0.3744948097288884,
"eval_rougeL": 0.5089233474314114,
"eval_rougeLsum": 0.5687286653850421,
"eval_runtime": 50.0187,
"eval_samples_per_second": 2.359,
"eval_steps_per_second": 0.6,
"step": 1674
},
{
"epoch": 3.5842293906810037,
"grad_norm": 5.381931304931641,
"learning_rate": 1.0692951015531662e-05,
"loss": 0.7209,
"step": 2000
},
{
"epoch": 4.0,
"eval_loss": 1.0957852602005005,
"eval_rouge1": 0.5540030400056567,
"eval_rouge2": 0.3692226432826772,
"eval_rougeL": 0.49758037286998147,
"eval_rougeLsum": 0.5561693313473722,
"eval_runtime": 47.7686,
"eval_samples_per_second": 2.47,
"eval_steps_per_second": 0.628,
"step": 2232
},
{
"epoch": 4.480286738351254,
"grad_norm": 4.125611305236816,
"learning_rate": 9.19952210274791e-06,
"loss": 0.5647,
"step": 2500
},
{
"epoch": 5.0,
"eval_loss": 1.1148895025253296,
"eval_rouge1": 0.5806893682446067,
"eval_rouge2": 0.3943863308109349,
"eval_rougeL": 0.5236896549899279,
"eval_rougeLsum": 0.5875367327889313,
"eval_runtime": 48.2153,
"eval_samples_per_second": 2.447,
"eval_steps_per_second": 0.622,
"step": 2790
},
{
"epoch": 5.376344086021505,
"grad_norm": 5.281921863555908,
"learning_rate": 7.706093189964159e-06,
"loss": 0.4652,
"step": 3000
},
{
"epoch": 6.0,
"eval_loss": 1.1711769104003906,
"eval_rouge1": 0.567309904415172,
"eval_rouge2": 0.37849761310553864,
"eval_rougeL": 0.5092459232917496,
"eval_rougeLsum": 0.5729114339966073,
"eval_runtime": 47.1162,
"eval_samples_per_second": 2.504,
"eval_steps_per_second": 0.637,
"step": 3348
},
{
"epoch": 6.272401433691757,
"grad_norm": 5.2352166175842285,
"learning_rate": 6.212664277180406e-06,
"loss": 0.3682,
"step": 3500
},
{
"epoch": 7.0,
"eval_loss": 1.2149335145950317,
"eval_rouge1": 0.5779989309920852,
"eval_rouge2": 0.3932839170198733,
"eval_rougeL": 0.519612547289248,
"eval_rougeLsum": 0.5844791754998846,
"eval_runtime": 49.3993,
"eval_samples_per_second": 2.389,
"eval_steps_per_second": 0.607,
"step": 3906
},
{
"epoch": 7.168458781362007,
"grad_norm": 5.129788875579834,
"learning_rate": 4.719235364396655e-06,
"loss": 0.2964,
"step": 4000
},
{
"epoch": 8.0,
"eval_loss": 1.2541477680206299,
"eval_rouge1": 0.5741091721906807,
"eval_rouge2": 0.3913854513407571,
"eval_rougeL": 0.5175964085703821,
"eval_rougeLsum": 0.5780595369378698,
"eval_runtime": 51.0845,
"eval_samples_per_second": 2.31,
"eval_steps_per_second": 0.587,
"step": 4464
},
{
"epoch": 8.064516129032258,
"grad_norm": 5.963613033294678,
"learning_rate": 3.225806451612903e-06,
"loss": 0.2434,
"step": 4500
},
{
"epoch": 8.960573476702509,
"grad_norm": 5.311295032501221,
"learning_rate": 1.7323775388291518e-06,
"loss": 0.2005,
"step": 5000
},
{
"epoch": 9.0,
"eval_loss": 1.283581256866455,
"eval_rouge1": 0.5951372356242162,
"eval_rouge2": 0.40327134125204,
"eval_rougeL": 0.5352477504226778,
"eval_rougeLsum": 0.6005008499350015,
"eval_runtime": 90.0692,
"eval_samples_per_second": 1.31,
"eval_steps_per_second": 0.333,
"step": 5022
}
],
"logging_steps": 500,
"max_steps": 5580,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4941750029647872.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}