wav2vec2-shobdotori / trainer_state.json
Somik033's picture
Upload folder using huggingface_hub
51ce3db verified
{
"best_global_step": 2600,
"best_metric": 0.5833333333333334,
"best_model_checkpoint": "/kaggle/working/wav2vec2-bengali-finetuned/checkpoint-2400",
"epoch": 30.0,
"eval_steps": 200,
"global_step": 2940,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01020408163265306,
"grad_norm": 1263584.0,
"learning_rate": 0.0,
"loss": 25.6736,
"step": 1
},
{
"epoch": 1.0204081632653061,
"grad_norm": 3843419.25,
"learning_rate": 9.900000000000002e-06,
"loss": 23.811,
"step": 100
},
{
"epoch": 2.0408163265306123,
"grad_norm": 1233849.75,
"learning_rate": 1.9900000000000003e-05,
"loss": 13.9598,
"step": 200
},
{
"epoch": 2.0408163265306123,
"eval_loss": 5.328553199768066,
"eval_runtime": 11.0357,
"eval_samples_per_second": 14.951,
"eval_steps_per_second": 1.903,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 3.061224489795918,
"grad_norm": 101215.4375,
"learning_rate": 2.9900000000000002e-05,
"loss": 4.6042,
"step": 300
},
{
"epoch": 4.081632653061225,
"grad_norm": 204473.296875,
"learning_rate": 3.99e-05,
"loss": 3.5131,
"step": 400
},
{
"epoch": 4.081632653061225,
"eval_loss": 3.380544900894165,
"eval_runtime": 10.5068,
"eval_samples_per_second": 15.704,
"eval_steps_per_second": 1.999,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 5.1020408163265305,
"grad_norm": 106750.3828125,
"learning_rate": 4.99e-05,
"loss": 3.2448,
"step": 500
},
{
"epoch": 6.122448979591836,
"grad_norm": 56899.28125,
"learning_rate": 4.7971311475409835e-05,
"loss": 2.9613,
"step": 600
},
{
"epoch": 6.122448979591836,
"eval_loss": 2.839696168899536,
"eval_runtime": 10.5616,
"eval_samples_per_second": 15.623,
"eval_steps_per_second": 1.988,
"eval_wer": 1.0,
"step": 600
},
{
"epoch": 7.142857142857143,
"grad_norm": 155864.65625,
"learning_rate": 4.5922131147540986e-05,
"loss": 2.8199,
"step": 700
},
{
"epoch": 8.16326530612245,
"grad_norm": 75451.0546875,
"learning_rate": 4.387295081967213e-05,
"loss": 2.7197,
"step": 800
},
{
"epoch": 8.16326530612245,
"eval_loss": 2.606015920639038,
"eval_runtime": 10.535,
"eval_samples_per_second": 15.662,
"eval_steps_per_second": 1.993,
"eval_wer": 0.8457207207207207,
"step": 800
},
{
"epoch": 9.183673469387756,
"grad_norm": 76782.7421875,
"learning_rate": 4.182377049180328e-05,
"loss": 2.6389,
"step": 900
},
{
"epoch": 10.204081632653061,
"grad_norm": 64796.6171875,
"learning_rate": 3.977459016393443e-05,
"loss": 2.5596,
"step": 1000
},
{
"epoch": 10.204081632653061,
"eval_loss": 2.4175667762756348,
"eval_runtime": 10.5336,
"eval_samples_per_second": 15.664,
"eval_steps_per_second": 1.994,
"eval_wer": 0.7815315315315315,
"step": 1000
},
{
"epoch": 11.224489795918368,
"grad_norm": 147111.484375,
"learning_rate": 3.772540983606558e-05,
"loss": 2.4538,
"step": 1100
},
{
"epoch": 12.244897959183673,
"grad_norm": 91618.7890625,
"learning_rate": 3.5676229508196724e-05,
"loss": 2.3313,
"step": 1200
},
{
"epoch": 12.244897959183673,
"eval_loss": 2.050769805908203,
"eval_runtime": 10.5394,
"eval_samples_per_second": 15.656,
"eval_steps_per_second": 1.993,
"eval_wer": 0.7792792792792793,
"step": 1200
},
{
"epoch": 13.26530612244898,
"grad_norm": 135995.46875,
"learning_rate": 3.362704918032787e-05,
"loss": 2.1482,
"step": 1300
},
{
"epoch": 14.285714285714286,
"grad_norm": 91693.046875,
"learning_rate": 3.157786885245902e-05,
"loss": 1.9715,
"step": 1400
},
{
"epoch": 14.285714285714286,
"eval_loss": 1.6378555297851562,
"eval_runtime": 10.5414,
"eval_samples_per_second": 15.653,
"eval_steps_per_second": 1.992,
"eval_wer": 0.7894144144144144,
"step": 1400
},
{
"epoch": 15.306122448979592,
"grad_norm": 106778.4609375,
"learning_rate": 2.9528688524590165e-05,
"loss": 1.8192,
"step": 1500
},
{
"epoch": 16.3265306122449,
"grad_norm": 93321.4921875,
"learning_rate": 2.7479508196721317e-05,
"loss": 1.706,
"step": 1600
},
{
"epoch": 16.3265306122449,
"eval_loss": 1.3998359441757202,
"eval_runtime": 10.5682,
"eval_samples_per_second": 15.613,
"eval_steps_per_second": 1.987,
"eval_wer": 0.7263513513513513,
"step": 1600
},
{
"epoch": 17.346938775510203,
"grad_norm": 111310.1328125,
"learning_rate": 2.5430327868852462e-05,
"loss": 1.5935,
"step": 1700
},
{
"epoch": 18.367346938775512,
"grad_norm": 131049.40625,
"learning_rate": 2.338114754098361e-05,
"loss": 1.5412,
"step": 1800
},
{
"epoch": 18.367346938775512,
"eval_loss": 1.2491974830627441,
"eval_runtime": 10.5527,
"eval_samples_per_second": 15.636,
"eval_steps_per_second": 1.99,
"eval_wer": 0.6722972972972973,
"step": 1800
},
{
"epoch": 19.387755102040817,
"grad_norm": 150843.8125,
"learning_rate": 2.1331967213114755e-05,
"loss": 1.4727,
"step": 1900
},
{
"epoch": 20.408163265306122,
"grad_norm": 124916.6015625,
"learning_rate": 1.9282786885245903e-05,
"loss": 1.4023,
"step": 2000
},
{
"epoch": 20.408163265306122,
"eval_loss": 1.138938546180725,
"eval_runtime": 10.5847,
"eval_samples_per_second": 15.589,
"eval_steps_per_second": 1.984,
"eval_wer": 0.6317567567567568,
"step": 2000
},
{
"epoch": 21.428571428571427,
"grad_norm": 260327.703125,
"learning_rate": 1.7233606557377048e-05,
"loss": 1.3585,
"step": 2100
},
{
"epoch": 22.448979591836736,
"grad_norm": 156561.15625,
"learning_rate": 1.5184426229508198e-05,
"loss": 1.3188,
"step": 2200
},
{
"epoch": 22.448979591836736,
"eval_loss": 1.0857497453689575,
"eval_runtime": 10.5312,
"eval_samples_per_second": 15.668,
"eval_steps_per_second": 1.994,
"eval_wer": 0.6148648648648649,
"step": 2200
},
{
"epoch": 23.46938775510204,
"grad_norm": 137688.609375,
"learning_rate": 1.3135245901639346e-05,
"loss": 1.2872,
"step": 2300
},
{
"epoch": 24.489795918367346,
"grad_norm": 149320.140625,
"learning_rate": 1.1086065573770492e-05,
"loss": 1.2707,
"step": 2400
},
{
"epoch": 24.489795918367346,
"eval_loss": 1.035390019416809,
"eval_runtime": 10.5453,
"eval_samples_per_second": 15.647,
"eval_steps_per_second": 1.991,
"eval_wer": 0.6047297297297297,
"step": 2400
},
{
"epoch": 25.510204081632654,
"grad_norm": 160687.875,
"learning_rate": 9.036885245901639e-06,
"loss": 1.2261,
"step": 2500
},
{
"epoch": 26.53061224489796,
"grad_norm": 175958.0625,
"learning_rate": 6.987704918032788e-06,
"loss": 1.222,
"step": 2600
},
{
"epoch": 26.53061224489796,
"eval_loss": 1.011266827583313,
"eval_runtime": 10.5487,
"eval_samples_per_second": 15.642,
"eval_steps_per_second": 1.991,
"eval_wer": 0.5833333333333334,
"step": 2600
},
{
"epoch": 27.551020408163264,
"grad_norm": 129829.171875,
"learning_rate": 4.938524590163935e-06,
"loss": 1.2061,
"step": 2700
},
{
"epoch": 28.571428571428573,
"grad_norm": 180224.375,
"learning_rate": 2.8893442622950824e-06,
"loss": 1.2084,
"step": 2800
},
{
"epoch": 28.571428571428573,
"eval_loss": 0.9903889298439026,
"eval_runtime": 10.5794,
"eval_samples_per_second": 15.596,
"eval_steps_per_second": 1.985,
"eval_wer": 0.5833333333333334,
"step": 2800
},
{
"epoch": 29.591836734693878,
"grad_norm": 153736.046875,
"learning_rate": 8.401639344262295e-07,
"loss": 1.1881,
"step": 2900
},
{
"epoch": 30.0,
"step": 2940,
"total_flos": 1.8456230162149265e+19,
"train_loss": 3.165084928395797,
"train_runtime": 15598.205,
"train_samples_per_second": 6.024,
"train_steps_per_second": 0.188
}
],
"logging_steps": 100,
"max_steps": 2940,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 400,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.8456230162149265e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}