whisper-base-me / trainer_state.json
razhan's picture
End of training
ba470b0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 656,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12195121951219512,
"grad_norm": 42.956241607666016,
"learning_rate": 1.5e-06,
"loss": 3.946,
"step": 20
},
{
"epoch": 0.24390243902439024,
"grad_norm": 18.780467987060547,
"learning_rate": 3.5e-06,
"loss": 2.801,
"step": 40
},
{
"epoch": 0.36585365853658536,
"grad_norm": 10.132372856140137,
"learning_rate": 5.500000000000001e-06,
"loss": 1.829,
"step": 60
},
{
"epoch": 0.4878048780487805,
"grad_norm": 6.34807825088501,
"learning_rate": 7.500000000000001e-06,
"loss": 1.1664,
"step": 80
},
{
"epoch": 0.6097560975609756,
"grad_norm": 3.893784523010254,
"learning_rate": 9.5e-06,
"loss": 0.8395,
"step": 100
},
{
"epoch": 0.7317073170731707,
"grad_norm": 3.6245577335357666,
"learning_rate": 9.73021582733813e-06,
"loss": 0.6985,
"step": 120
},
{
"epoch": 0.8536585365853658,
"grad_norm": 3.2853732109069824,
"learning_rate": 9.370503597122303e-06,
"loss": 0.6198,
"step": 140
},
{
"epoch": 0.975609756097561,
"grad_norm": 3.1876819133758545,
"learning_rate": 9.010791366906476e-06,
"loss": 0.5933,
"step": 160
},
{
"epoch": 1.0,
"eval_avg_cer": 0.35838623267570763,
"eval_avg_wer": 0.8217081943985028,
"eval_gilaki_cer": 0.38532785111447737,
"eval_gilaki_wer": 0.9969254419677172,
"eval_hawrami_cer": 0.11622886617633012,
"eval_hawrami_wer": 0.5257112184648417,
"eval_laki_kurdish_cer": 0.24613632666341304,
"eval_laki_kurdish_wer": 0.7856850715746422,
"eval_loss": 0.7336705923080444,
"eval_mazanderani_cer": 0.3138505136379738,
"eval_mazanderani_wer": 0.8884360672440142,
"eval_runtime": 457.0137,
"eval_samples_per_second": 7.129,
"eval_southern_kurdish_cer": 0.21365623482062313,
"eval_southern_kurdish_wer": 0.6900034831069314,
"eval_steps_per_second": 0.057,
"eval_talysh_cer": 0.5,
"eval_talysh_wer": 0.9166666666666666,
"eval_zazaki_cer": 0.7335038363171356,
"eval_zazaki_wer": 0.9485294117647058,
"step": 164
},
{
"epoch": 1.0975609756097562,
"grad_norm": 3.3254923820495605,
"learning_rate": 8.651079136690648e-06,
"loss": 0.5337,
"step": 180
},
{
"epoch": 1.2195121951219512,
"grad_norm": 3.416600227355957,
"learning_rate": 8.29136690647482e-06,
"loss": 0.5054,
"step": 200
},
{
"epoch": 1.3414634146341464,
"grad_norm": 3.544034481048584,
"learning_rate": 7.931654676258992e-06,
"loss": 0.4915,
"step": 220
},
{
"epoch": 1.4634146341463414,
"grad_norm": 2.7927117347717285,
"learning_rate": 7.571942446043166e-06,
"loss": 0.47,
"step": 240
},
{
"epoch": 1.5853658536585367,
"grad_norm": 3.406677007675171,
"learning_rate": 7.212230215827338e-06,
"loss": 0.4681,
"step": 260
},
{
"epoch": 1.7073170731707317,
"grad_norm": 2.7277846336364746,
"learning_rate": 6.852517985611511e-06,
"loss": 0.4653,
"step": 280
},
{
"epoch": 1.8292682926829267,
"grad_norm": 2.8852195739746094,
"learning_rate": 6.4928057553956835e-06,
"loss": 0.4281,
"step": 300
},
{
"epoch": 1.951219512195122,
"grad_norm": 3.2276437282562256,
"learning_rate": 6.133093525179856e-06,
"loss": 0.4436,
"step": 320
},
{
"epoch": 2.0,
"eval_avg_cer": 0.2926722864574391,
"eval_avg_wer": 0.7377513408917141,
"eval_gilaki_cer": 0.38182211642501623,
"eval_gilaki_wer": 0.9723289777094543,
"eval_hawrami_cer": 0.09332314452192186,
"eval_hawrami_wer": 0.44186795491143316,
"eval_laki_kurdish_cer": 0.1845615747519115,
"eval_laki_kurdish_wer": 0.6486707566462168,
"eval_loss": 0.5930325984954834,
"eval_mazanderani_cer": 0.29389538316212066,
"eval_mazanderani_wer": 0.7514009169638308,
"eval_runtime": 456.2009,
"eval_samples_per_second": 7.142,
"eval_southern_kurdish_cer": 0.17975851779890362,
"eval_southern_kurdish_wer": 0.5950888192267503,
"eval_steps_per_second": 0.057,
"eval_talysh_cer": 0.5,
"eval_talysh_wer": 0.9166666666666666,
"eval_zazaki_cer": 0.4153452685421995,
"eval_zazaki_wer": 0.8382352941176471,
"step": 328
},
{
"epoch": 2.073170731707317,
"grad_norm": 3.5604612827301025,
"learning_rate": 5.773381294964029e-06,
"loss": 0.4201,
"step": 340
},
{
"epoch": 2.1951219512195124,
"grad_norm": 2.8034543991088867,
"learning_rate": 5.413669064748202e-06,
"loss": 0.407,
"step": 360
},
{
"epoch": 2.317073170731707,
"grad_norm": 3.1773579120635986,
"learning_rate": 5.053956834532374e-06,
"loss": 0.3865,
"step": 380
},
{
"epoch": 2.4390243902439024,
"grad_norm": 2.6149208545684814,
"learning_rate": 4.6942446043165475e-06,
"loss": 0.4109,
"step": 400
},
{
"epoch": 2.5609756097560976,
"grad_norm": 2.5704352855682373,
"learning_rate": 4.33453237410072e-06,
"loss": 0.3879,
"step": 420
},
{
"epoch": 2.682926829268293,
"grad_norm": 2.887918472290039,
"learning_rate": 3.974820143884892e-06,
"loss": 0.3831,
"step": 440
},
{
"epoch": 2.8048780487804876,
"grad_norm": 2.9402122497558594,
"learning_rate": 3.6151079136690647e-06,
"loss": 0.3946,
"step": 460
},
{
"epoch": 2.926829268292683,
"grad_norm": 2.4520623683929443,
"learning_rate": 3.2553956834532375e-06,
"loss": 0.387,
"step": 480
},
{
"epoch": 3.0,
"eval_avg_cer": 0.2707400949444132,
"eval_avg_wer": 0.7061799250572226,
"eval_gilaki_cer": 0.3735122267907379,
"eval_gilaki_wer": 0.9523443504996156,
"eval_hawrami_cer": 0.08913936383608749,
"eval_hawrami_wer": 0.42297369833601717,
"eval_laki_kurdish_cer": 0.1678054335448186,
"eval_laki_kurdish_wer": 0.6094069529652352,
"eval_loss": 0.5509017109870911,
"eval_mazanderani_cer": 0.24359428503955602,
"eval_mazanderani_wer": 0.6821192052980133,
"eval_runtime": 459.5728,
"eval_samples_per_second": 7.089,
"eval_southern_kurdish_cer": 0.1871140101311498,
"eval_southern_kurdish_wer": 0.5705329153605015,
"eval_steps_per_second": 0.057,
"eval_talysh_cer": 0.5,
"eval_talysh_wer": 0.9166666666666666,
"eval_zazaki_cer": 0.3340153452685422,
"eval_zazaki_wer": 0.7892156862745098,
"step": 492
},
{
"epoch": 3.048780487804878,
"grad_norm": 2.502338409423828,
"learning_rate": 2.89568345323741e-06,
"loss": 0.3788,
"step": 500
},
{
"epoch": 3.1707317073170733,
"grad_norm": 2.474639654159546,
"learning_rate": 2.5359712230215827e-06,
"loss": 0.3829,
"step": 520
},
{
"epoch": 3.292682926829268,
"grad_norm": 2.5425171852111816,
"learning_rate": 2.1762589928057555e-06,
"loss": 0.3557,
"step": 540
},
{
"epoch": 3.4146341463414633,
"grad_norm": 2.498109817504883,
"learning_rate": 1.8165467625899283e-06,
"loss": 0.3607,
"step": 560
},
{
"epoch": 3.5365853658536586,
"grad_norm": 2.560028076171875,
"learning_rate": 1.4568345323741009e-06,
"loss": 0.3544,
"step": 580
},
{
"epoch": 3.658536585365854,
"grad_norm": 2.3217689990997314,
"learning_rate": 1.0971223021582735e-06,
"loss": 0.3614,
"step": 600
},
{
"epoch": 3.7804878048780486,
"grad_norm": 2.47906231880188,
"learning_rate": 7.37410071942446e-07,
"loss": 0.3608,
"step": 620
},
{
"epoch": 3.902439024390244,
"grad_norm": 2.4336979389190674,
"learning_rate": 3.7769784172661875e-07,
"loss": 0.3575,
"step": 640
},
{
"epoch": 4.0,
"eval_avg_cer": 0.2678389158869433,
"eval_avg_wer": 0.7003291969579674,
"eval_gilaki_cer": 0.38182211642501623,
"eval_gilaki_wer": 0.9606072252113759,
"eval_hawrami_cer": 0.08648390486197345,
"eval_hawrami_wer": 0.4144927536231884,
"eval_laki_kurdish_cer": 0.16528387831462502,
"eval_laki_kurdish_wer": 0.603680981595092,
"eval_loss": 0.5420735478401184,
"eval_mazanderani_cer": 0.2510331798323297,
"eval_mazanderani_wer": 0.6836474783494652,
"eval_runtime": 457.2923,
"eval_samples_per_second": 7.125,
"eval_southern_kurdish_cer": 0.17209076399972242,
"eval_southern_kurdish_wer": 0.5609543712991989,
"eval_steps_per_second": 0.057,
"eval_talysh_cer": 0.5,
"eval_talysh_wer": 0.9166666666666666,
"eval_zazaki_cer": 0.31815856777493606,
"eval_zazaki_wer": 0.7622549019607843,
"step": 656
},
{
"epoch": 4.0,
"step": 656,
"total_flos": 5.41529079939072e+18,
"train_loss": 0.6912094768954486,
"train_runtime": 12274.8821,
"train_samples_per_second": 6.802,
"train_steps_per_second": 0.053
}
],
"logging_steps": 20,
"max_steps": 656,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.41529079939072e+18,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}