whisper-tiny-javanese-openslr-v4 / trainer_state.json
bagasshw's picture
End of training
011ca40 verified
{
"best_global_step": 46500,
"best_metric": 0.15599121044112013,
"best_model_checkpoint": "/home/cluster-dgxa100/slp01/bagas-fine-tune-whisper/whisper-tiny-javanese-openslr-v4/checkpoint-46500",
"epoch": 5.403631058518398,
"eval_steps": 500,
"global_step": 50000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01080672178094775,
"grad_norm": 25.346445083618164,
"learning_rate": 1.94e-06,
"loss": 3.5433,
"step": 100
},
{
"epoch": 0.0216134435618955,
"grad_norm": 21.656307220458984,
"learning_rate": 3.94e-06,
"loss": 2.0264,
"step": 200
},
{
"epoch": 0.03242016534284325,
"grad_norm": 18.657211303710938,
"learning_rate": 5.94e-06,
"loss": 1.5688,
"step": 300
},
{
"epoch": 0.043226887123791,
"grad_norm": 16.42237663269043,
"learning_rate": 7.94e-06,
"loss": 1.3214,
"step": 400
},
{
"epoch": 0.054033608904738746,
"grad_norm": 18.631206512451172,
"learning_rate": 9.940000000000001e-06,
"loss": 1.1788,
"step": 500
},
{
"epoch": 0.054033608904738746,
"eval_loss": 0.967095136642456,
"eval_runtime": 5770.5819,
"eval_samples_per_second": 3.207,
"eval_steps_per_second": 0.802,
"eval_wer": 0.6590292385770924,
"step": 500
},
{
"epoch": 0.0648403306856865,
"grad_norm": 15.337555885314941,
"learning_rate": 1.1940000000000001e-05,
"loss": 1.0627,
"step": 600
},
{
"epoch": 0.07564705246663425,
"grad_norm": 14.623177528381348,
"learning_rate": 1.394e-05,
"loss": 0.9632,
"step": 700
},
{
"epoch": 0.086453774247582,
"grad_norm": 17.126712799072266,
"learning_rate": 1.5940000000000003e-05,
"loss": 0.906,
"step": 800
},
{
"epoch": 0.09726049602852975,
"grad_norm": 16.75067710876465,
"learning_rate": 1.794e-05,
"loss": 0.8503,
"step": 900
},
{
"epoch": 0.10806721780947749,
"grad_norm": 14.265076637268066,
"learning_rate": 1.9940000000000002e-05,
"loss": 0.8015,
"step": 1000
},
{
"epoch": 0.10806721780947749,
"eval_loss": 0.6976613402366638,
"eval_runtime": 5463.0331,
"eval_samples_per_second": 3.387,
"eval_steps_per_second": 0.847,
"eval_wer": 0.5304858499049883,
"step": 1000
},
{
"epoch": 0.11887393959042525,
"grad_norm": 13.737130165100098,
"learning_rate": 1.9784444444444446e-05,
"loss": 0.7589,
"step": 1100
},
{
"epoch": 0.129680661371373,
"grad_norm": 18.01378631591797,
"learning_rate": 1.9562222222222225e-05,
"loss": 0.7589,
"step": 1200
},
{
"epoch": 0.14048738315232073,
"grad_norm": 11.696120262145996,
"learning_rate": 1.934e-05,
"loss": 0.7087,
"step": 1300
},
{
"epoch": 0.1512941049332685,
"grad_norm": 13.419560432434082,
"learning_rate": 1.911777777777778e-05,
"loss": 0.683,
"step": 1400
},
{
"epoch": 0.16210082671421625,
"grad_norm": 12.753211975097656,
"learning_rate": 1.8895555555555557e-05,
"loss": 0.6498,
"step": 1500
},
{
"epoch": 0.16210082671421625,
"eval_loss": 0.5724753737449646,
"eval_runtime": 4564.4621,
"eval_samples_per_second": 4.054,
"eval_steps_per_second": 1.013,
"eval_wer": 0.6670133485560569,
"step": 1500
},
{
"epoch": 0.172907548495164,
"grad_norm": 11.64907455444336,
"learning_rate": 1.8673333333333333e-05,
"loss": 0.6216,
"step": 1600
},
{
"epoch": 0.18371427027611173,
"grad_norm": 13.781865119934082,
"learning_rate": 1.8451111111111113e-05,
"loss": 0.6138,
"step": 1700
},
{
"epoch": 0.1945209920570595,
"grad_norm": 12.58388900756836,
"learning_rate": 1.822888888888889e-05,
"loss": 0.595,
"step": 1800
},
{
"epoch": 0.20532771383800724,
"grad_norm": 14.661055564880371,
"learning_rate": 1.8006666666666668e-05,
"loss": 0.5938,
"step": 1900
},
{
"epoch": 0.21613443561895498,
"grad_norm": 11.948161125183105,
"learning_rate": 1.7784444444444448e-05,
"loss": 0.5828,
"step": 2000
},
{
"epoch": 0.21613443561895498,
"eval_loss": 0.5093731880187988,
"eval_runtime": 5328.6402,
"eval_samples_per_second": 3.473,
"eval_steps_per_second": 0.868,
"eval_wer": 0.4828939857208768,
"step": 2000
},
{
"epoch": 0.22694115739990273,
"grad_norm": 12.322188377380371,
"learning_rate": 1.7562222222222224e-05,
"loss": 0.5752,
"step": 2100
},
{
"epoch": 0.2377478791808505,
"grad_norm": 17.046159744262695,
"learning_rate": 1.734e-05,
"loss": 0.5663,
"step": 2200
},
{
"epoch": 0.24855460096179824,
"grad_norm": 10.154263496398926,
"learning_rate": 1.711777777777778e-05,
"loss": 0.537,
"step": 2300
},
{
"epoch": 0.259361322742746,
"grad_norm": 11.958285331726074,
"learning_rate": 1.6895555555555556e-05,
"loss": 0.5246,
"step": 2400
},
{
"epoch": 0.27016804452369375,
"grad_norm": 10.264266014099121,
"learning_rate": 1.6673333333333335e-05,
"loss": 0.5226,
"step": 2500
},
{
"epoch": 0.27016804452369375,
"eval_loss": 0.46415480971336365,
"eval_runtime": 4645.745,
"eval_samples_per_second": 3.983,
"eval_steps_per_second": 0.996,
"eval_wer": 0.38602898052064843,
"step": 2500
},
{
"epoch": 0.28097476630464147,
"grad_norm": 12.049257278442383,
"learning_rate": 1.6451111111111115e-05,
"loss": 0.493,
"step": 2600
},
{
"epoch": 0.29178148808558924,
"grad_norm": 9.821508407592773,
"learning_rate": 1.622888888888889e-05,
"loss": 0.5153,
"step": 2700
},
{
"epoch": 0.302588209866537,
"grad_norm": 10.481095314025879,
"learning_rate": 1.6006666666666667e-05,
"loss": 0.5,
"step": 2800
},
{
"epoch": 0.3133949316474847,
"grad_norm": 10.193309783935547,
"learning_rate": 1.5784444444444447e-05,
"loss": 0.5248,
"step": 2900
},
{
"epoch": 0.3242016534284325,
"grad_norm": 12.328668594360352,
"learning_rate": 1.5562222222222223e-05,
"loss": 0.4955,
"step": 3000
},
{
"epoch": 0.3242016534284325,
"eval_loss": 0.4340818226337433,
"eval_runtime": 4456.9484,
"eval_samples_per_second": 4.152,
"eval_steps_per_second": 1.038,
"eval_wer": 0.39154200455117727,
"step": 3000
},
{
"epoch": 0.33500837520938026,
"grad_norm": 12.583343505859375,
"learning_rate": 1.5340000000000002e-05,
"loss": 0.5082,
"step": 3100
},
{
"epoch": 0.345815096990328,
"grad_norm": 8.40932846069336,
"learning_rate": 1.511777777777778e-05,
"loss": 0.4905,
"step": 3200
},
{
"epoch": 0.35662181877127574,
"grad_norm": 14.150980949401855,
"learning_rate": 1.4895555555555556e-05,
"loss": 0.466,
"step": 3300
},
{
"epoch": 0.36742854055222346,
"grad_norm": 13.014771461486816,
"learning_rate": 1.4673333333333336e-05,
"loss": 0.4788,
"step": 3400
},
{
"epoch": 0.37823526233317123,
"grad_norm": 11.843710899353027,
"learning_rate": 1.4451111111111112e-05,
"loss": 0.4616,
"step": 3500
},
{
"epoch": 0.37823526233317123,
"eval_loss": 0.4127795398235321,
"eval_runtime": 4528.1925,
"eval_samples_per_second": 4.086,
"eval_steps_per_second": 1.022,
"eval_wer": 0.35399088200564593,
"step": 3500
},
{
"epoch": 0.389041984114119,
"grad_norm": 11.520469665527344,
"learning_rate": 1.422888888888889e-05,
"loss": 0.4695,
"step": 3600
},
{
"epoch": 0.3998487058950667,
"grad_norm": 10.21032428741455,
"learning_rate": 1.400666666666667e-05,
"loss": 0.47,
"step": 3700
},
{
"epoch": 0.4106554276760145,
"grad_norm": 9.393896102905273,
"learning_rate": 1.3784444444444445e-05,
"loss": 0.4656,
"step": 3800
},
{
"epoch": 0.42146214945696225,
"grad_norm": 10.503016471862793,
"learning_rate": 1.3562222222222223e-05,
"loss": 0.4446,
"step": 3900
},
{
"epoch": 0.43226887123790997,
"grad_norm": 10.747596740722656,
"learning_rate": 1.3340000000000001e-05,
"loss": 0.4474,
"step": 4000
},
{
"epoch": 0.43226887123790997,
"eval_loss": 0.3900074064731598,
"eval_runtime": 4858.8536,
"eval_samples_per_second": 3.808,
"eval_steps_per_second": 0.952,
"eval_wer": 0.36136504038974343,
"step": 4000
},
{
"epoch": 0.44307559301885774,
"grad_norm": 13.275285720825195,
"learning_rate": 1.3117777777777779e-05,
"loss": 0.4488,
"step": 4100
},
{
"epoch": 0.45388231479980545,
"grad_norm": 11.318832397460938,
"learning_rate": 1.2897777777777778e-05,
"loss": 0.4292,
"step": 4200
},
{
"epoch": 0.4646890365807532,
"grad_norm": 10.3064546585083,
"learning_rate": 1.2675555555555557e-05,
"loss": 0.4302,
"step": 4300
},
{
"epoch": 0.475495758361701,
"grad_norm": 11.634562492370605,
"learning_rate": 1.2453333333333335e-05,
"loss": 0.426,
"step": 4400
},
{
"epoch": 0.4863024801426487,
"grad_norm": 10.647918701171875,
"learning_rate": 1.2231111111111111e-05,
"loss": 0.4387,
"step": 4500
},
{
"epoch": 0.4863024801426487,
"eval_loss": 0.37359631061553955,
"eval_runtime": 4990.4878,
"eval_samples_per_second": 3.708,
"eval_steps_per_second": 0.927,
"eval_wer": 0.35633684967821144,
"step": 4500
},
{
"epoch": 0.4971092019235965,
"grad_norm": 9.396610260009766,
"learning_rate": 1.200888888888889e-05,
"loss": 0.4195,
"step": 4600
},
{
"epoch": 0.5079159237045442,
"grad_norm": 10.845105171203613,
"learning_rate": 1.1786666666666668e-05,
"loss": 0.4056,
"step": 4700
},
{
"epoch": 0.518722645485492,
"grad_norm": 9.404190063476562,
"learning_rate": 1.1564444444444445e-05,
"loss": 0.4306,
"step": 4800
},
{
"epoch": 0.5295293672664397,
"grad_norm": 9.176289558410645,
"learning_rate": 1.1342222222222224e-05,
"loss": 0.4239,
"step": 4900
},
{
"epoch": 0.5403360890473875,
"grad_norm": 10.088706016540527,
"learning_rate": 1.1120000000000002e-05,
"loss": 0.4154,
"step": 5000
},
{
"epoch": 0.5403360890473875,
"eval_loss": 0.36057594418525696,
"eval_runtime": 5945.658,
"eval_samples_per_second": 3.112,
"eval_steps_per_second": 0.778,
"eval_wer": 0.32743452795220485,
"step": 5000
},
{
"epoch": 0.5511428108283353,
"grad_norm": 9.688194274902344,
"learning_rate": 1.0897777777777778e-05,
"loss": 0.4115,
"step": 5100
},
{
"epoch": 0.5619495326092829,
"grad_norm": 9.752260208129883,
"learning_rate": 1.0675555555555558e-05,
"loss": 0.3854,
"step": 5200
},
{
"epoch": 0.5727562543902307,
"grad_norm": 10.447392463684082,
"learning_rate": 1.0453333333333334e-05,
"loss": 0.4141,
"step": 5300
},
{
"epoch": 0.5835629761711785,
"grad_norm": 11.185776710510254,
"learning_rate": 1.0231111111111112e-05,
"loss": 0.3924,
"step": 5400
},
{
"epoch": 0.5943696979521262,
"grad_norm": 10.3914794921875,
"learning_rate": 1.000888888888889e-05,
"loss": 0.419,
"step": 5500
},
{
"epoch": 0.5943696979521262,
"eval_loss": 0.3494803309440613,
"eval_runtime": 6902.9208,
"eval_samples_per_second": 2.681,
"eval_steps_per_second": 0.67,
"eval_wer": 0.314375307908257,
"step": 5500
},
{
"epoch": 0.605176419733074,
"grad_norm": 11.420536041259766,
"learning_rate": 9.786666666666667e-06,
"loss": 0.4096,
"step": 5600
},
{
"epoch": 0.6159831415140217,
"grad_norm": 9.05328369140625,
"learning_rate": 9.564444444444445e-06,
"loss": 0.3917,
"step": 5700
},
{
"epoch": 0.6267898632949694,
"grad_norm": 10.281911849975586,
"learning_rate": 9.342222222222223e-06,
"loss": 0.3965,
"step": 5800
},
{
"epoch": 0.6375965850759172,
"grad_norm": 10.587265014648438,
"learning_rate": 9.12e-06,
"loss": 0.374,
"step": 5900
},
{
"epoch": 0.648403306856865,
"grad_norm": 7.721372127532959,
"learning_rate": 8.897777777777779e-06,
"loss": 0.3799,
"step": 6000
},
{
"epoch": 0.648403306856865,
"eval_loss": 0.3397567868232727,
"eval_runtime": 7002.8513,
"eval_samples_per_second": 2.642,
"eval_steps_per_second": 0.661,
"eval_wer": 0.2921668139413039,
"step": 6000
},
{
"epoch": 0.6592100286378128,
"grad_norm": 6.785597324371338,
"learning_rate": 8.675555555555556e-06,
"loss": 0.3953,
"step": 6100
},
{
"epoch": 0.6700167504187605,
"grad_norm": 9.53781509399414,
"learning_rate": 8.453333333333334e-06,
"loss": 0.3786,
"step": 6200
},
{
"epoch": 0.6808234721997082,
"grad_norm": 8.857239723205566,
"learning_rate": 8.231111111111112e-06,
"loss": 0.3744,
"step": 6300
},
{
"epoch": 0.691630193980656,
"grad_norm": 9.638261795043945,
"learning_rate": 8.00888888888889e-06,
"loss": 0.3809,
"step": 6400
},
{
"epoch": 0.7024369157616037,
"grad_norm": 8.304004669189453,
"learning_rate": 7.786666666666666e-06,
"loss": 0.3802,
"step": 6500
},
{
"epoch": 0.7024369157616037,
"eval_loss": 0.3289755880832672,
"eval_runtime": 5885.8991,
"eval_samples_per_second": 3.144,
"eval_steps_per_second": 0.786,
"eval_wer": 0.3044049452998538,
"step": 6500
},
{
"epoch": 0.7132436375425515,
"grad_norm": 9.978581428527832,
"learning_rate": 7.564444444444446e-06,
"loss": 0.3537,
"step": 6600
},
{
"epoch": 0.7240503593234993,
"grad_norm": 10.849929809570312,
"learning_rate": 7.342222222222223e-06,
"loss": 0.3762,
"step": 6700
},
{
"epoch": 0.7348570811044469,
"grad_norm": 11.856138229370117,
"learning_rate": 7.1200000000000004e-06,
"loss": 0.3477,
"step": 6800
},
{
"epoch": 0.7456638028853947,
"grad_norm": 10.761491775512695,
"learning_rate": 6.897777777777779e-06,
"loss": 0.361,
"step": 6900
},
{
"epoch": 0.7564705246663425,
"grad_norm": 9.24421501159668,
"learning_rate": 6.675555555555556e-06,
"loss": 0.3611,
"step": 7000
},
{
"epoch": 0.7564705246663425,
"eval_loss": 0.3224972188472748,
"eval_runtime": 5632.6127,
"eval_samples_per_second": 3.285,
"eval_steps_per_second": 0.821,
"eval_wer": 0.2823372093932546,
"step": 7000
},
{
"epoch": 0.7672772464472902,
"grad_norm": 10.52470874786377,
"learning_rate": 6.453333333333334e-06,
"loss": 0.3638,
"step": 7100
},
{
"epoch": 0.778083968228238,
"grad_norm": 9.080463409423828,
"learning_rate": 6.231111111111111e-06,
"loss": 0.3532,
"step": 7200
},
{
"epoch": 0.7888906900091858,
"grad_norm": 8.789374351501465,
"learning_rate": 6.00888888888889e-06,
"loss": 0.3592,
"step": 7300
},
{
"epoch": 0.7996974117901334,
"grad_norm": 8.97732162475586,
"learning_rate": 5.7866666666666674e-06,
"loss": 0.3611,
"step": 7400
},
{
"epoch": 0.8105041335710812,
"grad_norm": 10.455592155456543,
"learning_rate": 5.5644444444444444e-06,
"loss": 0.3548,
"step": 7500
},
{
"epoch": 0.8105041335710812,
"eval_loss": 0.31678903102874756,
"eval_runtime": 3060.9871,
"eval_samples_per_second": 6.045,
"eval_steps_per_second": 1.511,
"eval_wer": 0.27332869353060313,
"step": 7500
},
{
"epoch": 0.821310855352029,
"grad_norm": 8.56920051574707,
"learning_rate": 5.342222222222223e-06,
"loss": 0.3628,
"step": 7600
},
{
"epoch": 0.8321175771329767,
"grad_norm": 11.37761402130127,
"learning_rate": 5.12e-06,
"loss": 0.3353,
"step": 7700
},
{
"epoch": 0.8429242989139245,
"grad_norm": 9.396086692810059,
"learning_rate": 4.897777777777778e-06,
"loss": 0.3704,
"step": 7800
},
{
"epoch": 0.8537310206948722,
"grad_norm": 10.0977144241333,
"learning_rate": 4.677777777777778e-06,
"loss": 0.364,
"step": 7900
},
{
"epoch": 0.8645377424758199,
"grad_norm": 8.653088569641113,
"learning_rate": 4.455555555555555e-06,
"loss": 0.346,
"step": 8000
},
{
"epoch": 0.8645377424758199,
"eval_loss": 0.3104597330093384,
"eval_runtime": 3053.8514,
"eval_samples_per_second": 6.059,
"eval_steps_per_second": 1.515,
"eval_wer": 0.26601709428444076,
"step": 8000
},
{
"epoch": 0.8753444642567677,
"grad_norm": 9.058122634887695,
"learning_rate": 4.233333333333334e-06,
"loss": 0.3382,
"step": 8100
},
{
"epoch": 0.8861511860377155,
"grad_norm": 12.135452270507812,
"learning_rate": 4.011111111111111e-06,
"loss": 0.3456,
"step": 8200
},
{
"epoch": 0.8969579078186632,
"grad_norm": 6.601293563842773,
"learning_rate": 3.7888888888888893e-06,
"loss": 0.3404,
"step": 8300
},
{
"epoch": 0.9077646295996109,
"grad_norm": 9.51930046081543,
"learning_rate": 3.566666666666667e-06,
"loss": 0.3479,
"step": 8400
},
{
"epoch": 0.9185713513805587,
"grad_norm": 7.031350135803223,
"learning_rate": 3.3444444444444445e-06,
"loss": 0.3547,
"step": 8500
},
{
"epoch": 0.9185713513805587,
"eval_loss": 0.3063461184501648,
"eval_runtime": 3070.7291,
"eval_samples_per_second": 6.026,
"eval_steps_per_second": 1.506,
"eval_wer": 0.27081068822871623,
"step": 8500
},
{
"epoch": 0.9293780731615064,
"grad_norm": 11.10822868347168,
"learning_rate": 3.1222222222222228e-06,
"loss": 0.3454,
"step": 8600
},
{
"epoch": 0.9401847949424542,
"grad_norm": 9.607211112976074,
"learning_rate": 2.9e-06,
"loss": 0.3319,
"step": 8700
},
{
"epoch": 0.950991516723402,
"grad_norm": 10.614663124084473,
"learning_rate": 2.677777777777778e-06,
"loss": 0.3441,
"step": 8800
},
{
"epoch": 0.9617982385043498,
"grad_norm": 8.344138145446777,
"learning_rate": 2.455555555555556e-06,
"loss": 0.3466,
"step": 8900
},
{
"epoch": 0.9726049602852974,
"grad_norm": 11.955930709838867,
"learning_rate": 2.2333333333333333e-06,
"loss": 0.3211,
"step": 9000
},
{
"epoch": 0.9726049602852974,
"eval_loss": 0.30189329385757446,
"eval_runtime": 3095.8164,
"eval_samples_per_second": 5.977,
"eval_steps_per_second": 1.494,
"eval_wer": 0.28268910454413937,
"step": 9000
},
{
"epoch": 0.9834116820662452,
"grad_norm": 9.438616752624512,
"learning_rate": 2.011111111111111e-06,
"loss": 0.343,
"step": 9100
},
{
"epoch": 0.994218403847193,
"grad_norm": 10.029309272766113,
"learning_rate": 1.788888888888889e-06,
"loss": 0.3582,
"step": 9200
},
{
"epoch": 1.0050791592370454,
"grad_norm": 9.47360610961914,
"learning_rate": 1.566666666666667e-06,
"loss": 0.3024,
"step": 9300
},
{
"epoch": 1.0158858810179932,
"grad_norm": 9.3403959274292,
"learning_rate": 1.3444444444444446e-06,
"loss": 0.2811,
"step": 9400
},
{
"epoch": 1.026692602798941,
"grad_norm": 9.723664283752441,
"learning_rate": 1.1222222222222222e-06,
"loss": 0.2718,
"step": 9500
},
{
"epoch": 1.026692602798941,
"eval_loss": 0.2989746034145355,
"eval_runtime": 3189.5179,
"eval_samples_per_second": 5.802,
"eval_steps_per_second": 1.45,
"eval_wer": 0.2659936346077151,
"step": 9500
},
{
"epoch": 1.0374993245798887,
"grad_norm": 7.739469051361084,
"learning_rate": 9.000000000000001e-07,
"loss": 0.2765,
"step": 9600
},
{
"epoch": 1.0483060463608365,
"grad_norm": 8.379693984985352,
"learning_rate": 6.777777777777779e-07,
"loss": 0.2872,
"step": 9700
},
{
"epoch": 1.0591127681417842,
"grad_norm": 8.849838256835938,
"learning_rate": 4.5555555555555563e-07,
"loss": 0.2782,
"step": 9800
},
{
"epoch": 1.069919489922732,
"grad_norm": 8.006597518920898,
"learning_rate": 2.3333333333333336e-07,
"loss": 0.2673,
"step": 9900
},
{
"epoch": 1.0807262117036798,
"grad_norm": 10.859480857849121,
"learning_rate": 1.1111111111111112e-08,
"loss": 0.2859,
"step": 10000
},
{
"epoch": 1.0807262117036798,
"eval_loss": 0.2979792058467865,
"eval_runtime": 3174.7879,
"eval_samples_per_second": 5.828,
"eval_steps_per_second": 1.457,
"eval_wer": 0.2586507557925852,
"step": 10000
},
{
"epoch": 1.0915329334846273,
"grad_norm": 8.480154037475586,
"learning_rate": 1.7735555555555558e-05,
"loss": 0.3044,
"step": 10100
},
{
"epoch": 1.102339655265575,
"grad_norm": 8.65846061706543,
"learning_rate": 1.7691111111111113e-05,
"loss": 0.3126,
"step": 10200
},
{
"epoch": 1.1131463770465229,
"grad_norm": 8.054668426513672,
"learning_rate": 1.764711111111111e-05,
"loss": 0.315,
"step": 10300
},
{
"epoch": 1.1239530988274706,
"grad_norm": 9.317754745483398,
"learning_rate": 1.7602666666666667e-05,
"loss": 0.3139,
"step": 10400
},
{
"epoch": 1.1347598206084184,
"grad_norm": 6.9345879554748535,
"learning_rate": 1.7558222222222222e-05,
"loss": 0.2917,
"step": 10500
},
{
"epoch": 1.1347598206084184,
"eval_loss": 0.3268890976905823,
"eval_runtime": 5010.973,
"eval_samples_per_second": 3.693,
"eval_steps_per_second": 0.923,
"eval_wer": 0.2518552694343872,
"step": 10500
},
{
"epoch": 1.1455665423893662,
"grad_norm": 10.198206901550293,
"learning_rate": 1.7513777777777777e-05,
"loss": 0.3099,
"step": 10600
},
{
"epoch": 1.156373264170314,
"grad_norm": 10.446975708007812,
"learning_rate": 1.7469333333333332e-05,
"loss": 0.3109,
"step": 10700
},
{
"epoch": 1.1671799859512617,
"grad_norm": 8.202065467834473,
"learning_rate": 1.742488888888889e-05,
"loss": 0.3033,
"step": 10800
},
{
"epoch": 1.1779867077322095,
"grad_norm": 9.471212387084961,
"learning_rate": 1.7380444444444446e-05,
"loss": 0.3121,
"step": 10900
},
{
"epoch": 1.1887934295131573,
"grad_norm": 9.272053718566895,
"learning_rate": 1.7336e-05,
"loss": 0.3117,
"step": 11000
},
{
"epoch": 1.1887934295131573,
"eval_loss": 0.32140180468559265,
"eval_runtime": 4975.5976,
"eval_samples_per_second": 3.719,
"eval_steps_per_second": 0.93,
"eval_wer": 0.2575403310942375,
"step": 11000
},
{
"epoch": 1.199600151294105,
"grad_norm": 8.912075996398926,
"learning_rate": 1.7291555555555557e-05,
"loss": 0.3163,
"step": 11100
},
{
"epoch": 1.2104068730750526,
"grad_norm": 12.307350158691406,
"learning_rate": 1.7247111111111112e-05,
"loss": 0.3087,
"step": 11200
},
{
"epoch": 1.2212135948560003,
"grad_norm": 8.338894844055176,
"learning_rate": 1.7202666666666667e-05,
"loss": 0.3264,
"step": 11300
},
{
"epoch": 1.232020316636948,
"grad_norm": 10.600968360900879,
"learning_rate": 1.7158222222222222e-05,
"loss": 0.3144,
"step": 11400
},
{
"epoch": 1.2428270384178959,
"grad_norm": 9.626172065734863,
"learning_rate": 1.711377777777778e-05,
"loss": 0.3204,
"step": 11500
},
{
"epoch": 1.2428270384178959,
"eval_loss": 0.3168378174304962,
"eval_runtime": 5060.1288,
"eval_samples_per_second": 3.657,
"eval_steps_per_second": 0.914,
"eval_wer": 0.2646173335731434,
"step": 11500
},
{
"epoch": 1.2536337601988436,
"grad_norm": 8.312841415405273,
"learning_rate": 1.7069333333333336e-05,
"loss": 0.2968,
"step": 11600
},
{
"epoch": 1.2644404819797914,
"grad_norm": 8.717096328735352,
"learning_rate": 1.702488888888889e-05,
"loss": 0.3145,
"step": 11700
},
{
"epoch": 1.2752472037607392,
"grad_norm": 7.836411952972412,
"learning_rate": 1.6980444444444447e-05,
"loss": 0.3144,
"step": 11800
},
{
"epoch": 1.286053925541687,
"grad_norm": 7.561498165130615,
"learning_rate": 1.6936000000000002e-05,
"loss": 0.315,
"step": 11900
},
{
"epoch": 1.2968606473226347,
"grad_norm": 9.085077285766602,
"learning_rate": 1.6891555555555557e-05,
"loss": 0.2962,
"step": 12000
},
{
"epoch": 1.2968606473226347,
"eval_loss": 0.3087281286716461,
"eval_runtime": 5226.3325,
"eval_samples_per_second": 3.541,
"eval_steps_per_second": 0.885,
"eval_wer": 0.24104817835610226,
"step": 12000
},
{
"epoch": 1.3076673691035825,
"grad_norm": 10.332176208496094,
"learning_rate": 1.6847111111111112e-05,
"loss": 0.3092,
"step": 12100
},
{
"epoch": 1.3184740908845303,
"grad_norm": 10.159818649291992,
"learning_rate": 1.6802666666666668e-05,
"loss": 0.301,
"step": 12200
},
{
"epoch": 1.3292808126654778,
"grad_norm": 8.238092422485352,
"learning_rate": 1.6758222222222226e-05,
"loss": 0.307,
"step": 12300
},
{
"epoch": 1.3400875344464258,
"grad_norm": 6.4258317947387695,
"learning_rate": 1.671377777777778e-05,
"loss": 0.3022,
"step": 12400
},
{
"epoch": 1.3508942562273734,
"grad_norm": 10.840997695922852,
"learning_rate": 1.6669333333333337e-05,
"loss": 0.2961,
"step": 12500
},
{
"epoch": 1.3508942562273734,
"eval_loss": 0.3057025372982025,
"eval_runtime": 5051.0013,
"eval_samples_per_second": 3.663,
"eval_steps_per_second": 0.916,
"eval_wer": 0.23847543380852212,
"step": 12500
},
{
"epoch": 1.3617009780083211,
"grad_norm": 12.025620460510254,
"learning_rate": 1.6624888888888892e-05,
"loss": 0.2959,
"step": 12600
},
{
"epoch": 1.372507699789269,
"grad_norm": 7.729722023010254,
"learning_rate": 1.6580444444444447e-05,
"loss": 0.2917,
"step": 12700
},
{
"epoch": 1.3833144215702167,
"grad_norm": 10.813538551330566,
"learning_rate": 1.6536000000000002e-05,
"loss": 0.3176,
"step": 12800
},
{
"epoch": 1.3941211433511644,
"grad_norm": 9.306085586547852,
"learning_rate": 1.6491555555555558e-05,
"loss": 0.3092,
"step": 12900
},
{
"epoch": 1.4049278651321122,
"grad_norm": 8.277687072753906,
"learning_rate": 1.6447111111111113e-05,
"loss": 0.2887,
"step": 13000
},
{
"epoch": 1.4049278651321122,
"eval_loss": 0.298722505569458,
"eval_runtime": 5198.4556,
"eval_samples_per_second": 3.56,
"eval_steps_per_second": 0.89,
"eval_wer": 0.22810625669578274,
"step": 13000
},
{
"epoch": 1.41573458691306,
"grad_norm": 6.493645191192627,
"learning_rate": 1.640311111111111e-05,
"loss": 0.2895,
"step": 13100
},
{
"epoch": 1.4265413086940077,
"grad_norm": 12.606965065002441,
"learning_rate": 1.6358666666666666e-05,
"loss": 0.3072,
"step": 13200
},
{
"epoch": 1.4373480304749555,
"grad_norm": 11.579826354980469,
"learning_rate": 1.6314222222222225e-05,
"loss": 0.3046,
"step": 13300
},
{
"epoch": 1.448154752255903,
"grad_norm": 8.03853702545166,
"learning_rate": 1.626977777777778e-05,
"loss": 0.2983,
"step": 13400
},
{
"epoch": 1.458961474036851,
"grad_norm": 5.478261470794678,
"learning_rate": 1.6225333333333335e-05,
"loss": 0.2981,
"step": 13500
},
{
"epoch": 1.458961474036851,
"eval_loss": 0.29534754157066345,
"eval_runtime": 5398.8982,
"eval_samples_per_second": 3.427,
"eval_steps_per_second": 0.857,
"eval_wer": 0.23218042055380478,
"step": 13500
},
{
"epoch": 1.4697681958177986,
"grad_norm": 11.128421783447266,
"learning_rate": 1.618088888888889e-05,
"loss": 0.2863,
"step": 13600
},
{
"epoch": 1.4805749175987464,
"grad_norm": 9.209829330444336,
"learning_rate": 1.6136444444444446e-05,
"loss": 0.2945,
"step": 13700
},
{
"epoch": 1.4913816393796941,
"grad_norm": 7.881196022033691,
"learning_rate": 1.6092e-05,
"loss": 0.2881,
"step": 13800
},
{
"epoch": 1.502188361160642,
"grad_norm": 7.5840630531311035,
"learning_rate": 1.6047555555555556e-05,
"loss": 0.2881,
"step": 13900
},
{
"epoch": 1.5129950829415897,
"grad_norm": 9.444194793701172,
"learning_rate": 1.600311111111111e-05,
"loss": 0.2994,
"step": 14000
},
{
"epoch": 1.5129950829415897,
"eval_loss": 0.29087820649147034,
"eval_runtime": 5347.4253,
"eval_samples_per_second": 3.46,
"eval_steps_per_second": 0.865,
"eval_wer": 0.23219606033828855,
"step": 14000
},
{
"epoch": 1.5238018047225375,
"grad_norm": 8.870946884155273,
"learning_rate": 1.595866666666667e-05,
"loss": 0.2817,
"step": 14100
},
{
"epoch": 1.5346085265034852,
"grad_norm": 8.750350952148438,
"learning_rate": 1.5914222222222225e-05,
"loss": 0.2748,
"step": 14200
},
{
"epoch": 1.5454152482844328,
"grad_norm": 8.175148010253906,
"learning_rate": 1.586977777777778e-05,
"loss": 0.2941,
"step": 14300
},
{
"epoch": 1.5562219700653808,
"grad_norm": 8.30854320526123,
"learning_rate": 1.5825333333333336e-05,
"loss": 0.2861,
"step": 14400
},
{
"epoch": 1.5670286918463283,
"grad_norm": 6.031162261962891,
"learning_rate": 1.578088888888889e-05,
"loss": 0.2818,
"step": 14500
},
{
"epoch": 1.5670286918463283,
"eval_loss": 0.2847795784473419,
"eval_runtime": 4891.0559,
"eval_samples_per_second": 3.783,
"eval_steps_per_second": 0.946,
"eval_wer": 0.21997356876422242,
"step": 14500
},
{
"epoch": 1.5778354136272763,
"grad_norm": 9.509627342224121,
"learning_rate": 1.5736444444444446e-05,
"loss": 0.276,
"step": 14600
},
{
"epoch": 1.5886421354082239,
"grad_norm": 9.79079532623291,
"learning_rate": 1.5692e-05,
"loss": 0.2918,
"step": 14700
},
{
"epoch": 1.5994488571891716,
"grad_norm": 5.595622539520264,
"learning_rate": 1.5647555555555557e-05,
"loss": 0.282,
"step": 14800
},
{
"epoch": 1.6102555789701194,
"grad_norm": 8.00671100616455,
"learning_rate": 1.5603111111111112e-05,
"loss": 0.2884,
"step": 14900
},
{
"epoch": 1.6210623007510672,
"grad_norm": 9.086261749267578,
"learning_rate": 1.5558666666666667e-05,
"loss": 0.2851,
"step": 15000
},
{
"epoch": 1.6210623007510672,
"eval_loss": 0.2829968333244324,
"eval_runtime": 4762.5655,
"eval_samples_per_second": 3.885,
"eval_steps_per_second": 0.971,
"eval_wer": 0.21664229466917945,
"step": 15000
},
{
"epoch": 1.631869022532015,
"grad_norm": 7.383193016052246,
"learning_rate": 1.5514222222222222e-05,
"loss": 0.2811,
"step": 15100
},
{
"epoch": 1.6426757443129627,
"grad_norm": 8.950287818908691,
"learning_rate": 1.5469777777777778e-05,
"loss": 0.2897,
"step": 15200
},
{
"epoch": 1.6534824660939105,
"grad_norm": 9.632174491882324,
"learning_rate": 1.5425333333333333e-05,
"loss": 0.2622,
"step": 15300
},
{
"epoch": 1.664289187874858,
"grad_norm": 9.395116806030273,
"learning_rate": 1.5380888888888888e-05,
"loss": 0.2839,
"step": 15400
},
{
"epoch": 1.675095909655806,
"grad_norm": 7.435296535491943,
"learning_rate": 1.5336444444444443e-05,
"loss": 0.275,
"step": 15500
},
{
"epoch": 1.675095909655806,
"eval_loss": 0.2770063579082489,
"eval_runtime": 3549.6262,
"eval_samples_per_second": 5.213,
"eval_steps_per_second": 1.303,
"eval_wer": 0.2128887463930747,
"step": 15500
},
{
"epoch": 1.6859026314367536,
"grad_norm": 6.951478004455566,
"learning_rate": 1.5292e-05,
"loss": 0.2573,
"step": 15600
},
{
"epoch": 1.6967093532177016,
"grad_norm": 9.954898834228516,
"learning_rate": 1.5247555555555557e-05,
"loss": 0.2722,
"step": 15700
},
{
"epoch": 1.707516074998649,
"grad_norm": 7.309504985809326,
"learning_rate": 1.5203111111111112e-05,
"loss": 0.2578,
"step": 15800
},
{
"epoch": 1.7183227967795969,
"grad_norm": 7.3711042404174805,
"learning_rate": 1.5158666666666668e-05,
"loss": 0.2718,
"step": 15900
},
{
"epoch": 1.7291295185605446,
"grad_norm": 10.445212364196777,
"learning_rate": 1.5114222222222223e-05,
"loss": 0.2689,
"step": 16000
},
{
"epoch": 1.7291295185605446,
"eval_loss": 0.27603384852409363,
"eval_runtime": 3570.6477,
"eval_samples_per_second": 5.182,
"eval_steps_per_second": 1.296,
"eval_wer": 0.21191907975508098,
"step": 16000
},
{
"epoch": 1.7399362403414924,
"grad_norm": 5.5510573387146,
"learning_rate": 1.5069777777777778e-05,
"loss": 0.2831,
"step": 16100
},
{
"epoch": 1.7507429621224402,
"grad_norm": 7.619734287261963,
"learning_rate": 1.5025333333333333e-05,
"loss": 0.2596,
"step": 16200
},
{
"epoch": 1.761549683903388,
"grad_norm": 8.503314018249512,
"learning_rate": 1.4980888888888889e-05,
"loss": 0.2741,
"step": 16300
},
{
"epoch": 1.7723564056843357,
"grad_norm": 7.427919387817383,
"learning_rate": 1.4936444444444447e-05,
"loss": 0.2826,
"step": 16400
},
{
"epoch": 1.7831631274652833,
"grad_norm": 6.663356781005859,
"learning_rate": 1.4892000000000002e-05,
"loss": 0.2796,
"step": 16500
},
{
"epoch": 1.7831631274652833,
"eval_loss": 0.26777052879333496,
"eval_runtime": 3498.6608,
"eval_samples_per_second": 5.289,
"eval_steps_per_second": 1.322,
"eval_wer": 0.20020488117673738,
"step": 16500
},
{
"epoch": 1.7939698492462313,
"grad_norm": 7.476739883422852,
"learning_rate": 1.4847555555555558e-05,
"loss": 0.2507,
"step": 16600
},
{
"epoch": 1.8047765710271788,
"grad_norm": 7.695949077606201,
"learning_rate": 1.4803111111111113e-05,
"loss": 0.2578,
"step": 16700
},
{
"epoch": 1.8155832928081268,
"grad_norm": 9.240167617797852,
"learning_rate": 1.4758666666666668e-05,
"loss": 0.2917,
"step": 16800
},
{
"epoch": 1.8263900145890744,
"grad_norm": 8.233548164367676,
"learning_rate": 1.4714222222222223e-05,
"loss": 0.2587,
"step": 16900
},
{
"epoch": 1.8371967363700221,
"grad_norm": 9.109882354736328,
"learning_rate": 1.4669777777777779e-05,
"loss": 0.2717,
"step": 17000
},
{
"epoch": 1.8371967363700221,
"eval_loss": 0.2652583122253418,
"eval_runtime": 3449.0658,
"eval_samples_per_second": 5.365,
"eval_steps_per_second": 1.341,
"eval_wer": 0.20007976290086724,
"step": 17000
},
{
"epoch": 1.84800345815097,
"grad_norm": 6.163556098937988,
"learning_rate": 1.4625333333333334e-05,
"loss": 0.2607,
"step": 17100
},
{
"epoch": 1.8588101799319177,
"grad_norm": 9.27730941772461,
"learning_rate": 1.4581333333333334e-05,
"loss": 0.2581,
"step": 17200
},
{
"epoch": 1.8696169017128654,
"grad_norm": 8.40946102142334,
"learning_rate": 1.4536888888888889e-05,
"loss": 0.2577,
"step": 17300
},
{
"epoch": 1.8804236234938132,
"grad_norm": 8.552946090698242,
"learning_rate": 1.4492444444444444e-05,
"loss": 0.2741,
"step": 17400
},
{
"epoch": 1.891230345274761,
"grad_norm": 6.056818962097168,
"learning_rate": 1.4448000000000001e-05,
"loss": 0.2661,
"step": 17500
},
{
"epoch": 1.891230345274761,
"eval_loss": 0.2625672221183777,
"eval_runtime": 3434.9257,
"eval_samples_per_second": 5.387,
"eval_steps_per_second": 1.347,
"eval_wer": 0.20144824404319708,
"step": 17500
},
{
"epoch": 1.9020370670557085,
"grad_norm": 7.4529571533203125,
"learning_rate": 1.4403555555555556e-05,
"loss": 0.2638,
"step": 17600
},
{
"epoch": 1.9128437888366565,
"grad_norm": 6.99146032333374,
"learning_rate": 1.4359111111111112e-05,
"loss": 0.268,
"step": 17700
},
{
"epoch": 1.923650510617604,
"grad_norm": 6.872374534606934,
"learning_rate": 1.4314666666666669e-05,
"loss": 0.2469,
"step": 17800
},
{
"epoch": 1.934457232398552,
"grad_norm": 8.856480598449707,
"learning_rate": 1.4270222222222224e-05,
"loss": 0.2685,
"step": 17900
},
{
"epoch": 1.9452639541794996,
"grad_norm": 9.830224990844727,
"learning_rate": 1.4225777777777779e-05,
"loss": 0.2612,
"step": 18000
},
{
"epoch": 1.9452639541794996,
"eval_loss": 0.2572856843471527,
"eval_runtime": 3453.1421,
"eval_samples_per_second": 5.359,
"eval_steps_per_second": 1.34,
"eval_wer": 0.19530180874107556,
"step": 18000
},
{
"epoch": 1.9560706759604474,
"grad_norm": 7.073034286499023,
"learning_rate": 1.4181333333333334e-05,
"loss": 0.2631,
"step": 18100
},
{
"epoch": 1.9668773977413951,
"grad_norm": 8.844318389892578,
"learning_rate": 1.4136888888888891e-05,
"loss": 0.263,
"step": 18200
},
{
"epoch": 1.977684119522343,
"grad_norm": 7.525826454162598,
"learning_rate": 1.4092444444444446e-05,
"loss": 0.2643,
"step": 18300
},
{
"epoch": 1.9884908413032907,
"grad_norm": 7.551785945892334,
"learning_rate": 1.4048000000000002e-05,
"loss": 0.2491,
"step": 18400
},
{
"epoch": 1.9992975630842384,
"grad_norm": 8.573739051818848,
"learning_rate": 1.4003555555555557e-05,
"loss": 0.2532,
"step": 18500
},
{
"epoch": 1.9992975630842384,
"eval_loss": 0.2554282248020172,
"eval_runtime": 3437.8823,
"eval_samples_per_second": 5.382,
"eval_steps_per_second": 1.346,
"eval_wer": 0.19536436787901063,
"step": 18500
},
{
"epoch": 2.0101583184740908,
"grad_norm": 250204.390625,
"learning_rate": 1.3959111111111112e-05,
"loss": 0.2066,
"step": 18600
},
{
"epoch": 2.0209650402550388,
"grad_norm": 299758.4375,
"learning_rate": 1.3914666666666667e-05,
"loss": 0.1958,
"step": 18700
},
{
"epoch": 2.0317717620359863,
"grad_norm": 245057.34375,
"learning_rate": 1.3870222222222223e-05,
"loss": 0.2,
"step": 18800
},
{
"epoch": 2.0425784838169343,
"grad_norm": 334399.46875,
"learning_rate": 1.3825777777777778e-05,
"loss": 0.192,
"step": 18900
},
{
"epoch": 2.053385205597882,
"grad_norm": 236247.03125,
"learning_rate": 1.3781333333333335e-05,
"loss": 0.1993,
"step": 19000
},
{
"epoch": 2.053385205597882,
"eval_loss": 0.2527328431606293,
"eval_runtime": 2819.5348,
"eval_samples_per_second": 6.563,
"eval_steps_per_second": 0.82,
"eval_wer": 0.19490299423673943,
"step": 19000
},
{
"epoch": 2.06419192737883,
"grad_norm": 260942.1875,
"learning_rate": 1.373688888888889e-05,
"loss": 0.1947,
"step": 19100
},
{
"epoch": 2.0749986491597774,
"grad_norm": 247902.484375,
"learning_rate": 1.3692444444444445e-05,
"loss": 0.1969,
"step": 19200
},
{
"epoch": 2.085805370940725,
"grad_norm": 229633.375,
"learning_rate": 1.3648e-05,
"loss": 0.1934,
"step": 19300
},
{
"epoch": 2.096612092721673,
"grad_norm": 214819.4375,
"learning_rate": 1.3603555555555556e-05,
"loss": 0.1936,
"step": 19400
},
{
"epoch": 2.1074188145026205,
"grad_norm": 225145.53125,
"learning_rate": 1.3559111111111113e-05,
"loss": 0.2009,
"step": 19500
},
{
"epoch": 2.1074188145026205,
"eval_loss": 0.25053051114082336,
"eval_runtime": 2775.1844,
"eval_samples_per_second": 6.668,
"eval_steps_per_second": 0.833,
"eval_wer": 0.1897340454648535,
"step": 19500
},
{
"epoch": 2.1182255362835685,
"grad_norm": 335387.5,
"learning_rate": 1.3514666666666668e-05,
"loss": 0.1993,
"step": 19600
},
{
"epoch": 2.129032258064516,
"grad_norm": 276303.59375,
"learning_rate": 1.3470222222222223e-05,
"loss": 0.1927,
"step": 19700
},
{
"epoch": 2.139838979845464,
"grad_norm": 314395.15625,
"learning_rate": 1.342577777777778e-05,
"loss": 0.1828,
"step": 19800
},
{
"epoch": 2.1506457016264116,
"grad_norm": 224658.71875,
"learning_rate": 1.3381333333333335e-05,
"loss": 0.1998,
"step": 19900
},
{
"epoch": 2.1614524234073595,
"grad_norm": 269057.15625,
"learning_rate": 1.333688888888889e-05,
"loss": 0.1929,
"step": 20000
},
{
"epoch": 2.1614524234073595,
"eval_loss": 0.24842554330825806,
"eval_runtime": 2752.0603,
"eval_samples_per_second": 6.724,
"eval_steps_per_second": 0.84,
"eval_wer": 0.19266650505556032,
"step": 20000
},
{
"epoch": 2.172259145188307,
"grad_norm": 295629.75,
"learning_rate": 1.3292444444444446e-05,
"loss": 0.1881,
"step": 20100
},
{
"epoch": 2.1830658669692546,
"grad_norm": 253882.09375,
"learning_rate": 1.3248000000000001e-05,
"loss": 0.1918,
"step": 20200
},
{
"epoch": 2.1938725887502026,
"grad_norm": 220421.125,
"learning_rate": 1.3203555555555556e-05,
"loss": 0.1944,
"step": 20300
},
{
"epoch": 2.20467931053115,
"grad_norm": 167626.5625,
"learning_rate": 1.3159111111111111e-05,
"loss": 0.194,
"step": 20400
},
{
"epoch": 2.215486032312098,
"grad_norm": 303924.09375,
"learning_rate": 1.3114666666666667e-05,
"loss": 0.2011,
"step": 20500
},
{
"epoch": 2.215486032312098,
"eval_loss": 0.24535807967185974,
"eval_runtime": 2748.7126,
"eval_samples_per_second": 6.732,
"eval_steps_per_second": 0.841,
"eval_wer": 0.1894759890208713,
"step": 20500
},
{
"epoch": 2.2262927540930457,
"grad_norm": 202871.59375,
"learning_rate": 1.3070222222222223e-05,
"loss": 0.2008,
"step": 20600
},
{
"epoch": 2.2370994758739937,
"grad_norm": 263771.4375,
"learning_rate": 1.3025777777777779e-05,
"loss": 0.1995,
"step": 20700
},
{
"epoch": 2.2479061976549413,
"grad_norm": 214771.859375,
"learning_rate": 1.2981333333333334e-05,
"loss": 0.1911,
"step": 20800
},
{
"epoch": 2.2587129194358893,
"grad_norm": 286280.8125,
"learning_rate": 1.293688888888889e-05,
"loss": 0.1939,
"step": 20900
},
{
"epoch": 2.269519641216837,
"grad_norm": 310779.4375,
"learning_rate": 1.2892444444444444e-05,
"loss": 0.1828,
"step": 21000
},
{
"epoch": 2.269519641216837,
"eval_loss": 0.24438022077083588,
"eval_runtime": 2733.7874,
"eval_samples_per_second": 6.769,
"eval_steps_per_second": 0.846,
"eval_wer": 0.1892179325768891,
"step": 21000
},
{
"epoch": 2.280326362997785,
"grad_norm": 254828.265625,
"learning_rate": 1.2848e-05,
"loss": 0.1933,
"step": 21100
},
{
"epoch": 2.2911330847787323,
"grad_norm": 239186.609375,
"learning_rate": 1.2803555555555557e-05,
"loss": 0.2002,
"step": 21200
},
{
"epoch": 2.3019398065596803,
"grad_norm": 239042.359375,
"learning_rate": 1.2759111111111113e-05,
"loss": 0.1924,
"step": 21300
},
{
"epoch": 2.312746528340628,
"grad_norm": 260667.75,
"learning_rate": 1.2714666666666669e-05,
"loss": 0.2005,
"step": 21400
},
{
"epoch": 2.3235532501215754,
"grad_norm": 245939.078125,
"learning_rate": 1.2670222222222224e-05,
"loss": 0.1823,
"step": 21500
},
{
"epoch": 2.3235532501215754,
"eval_loss": 0.24371393024921417,
"eval_runtime": 2738.6211,
"eval_samples_per_second": 6.757,
"eval_steps_per_second": 0.845,
"eval_wer": 0.18452599723175814,
"step": 21500
},
{
"epoch": 2.3343599719025234,
"grad_norm": 315165.90625,
"learning_rate": 1.262577777777778e-05,
"loss": 0.1958,
"step": 21600
},
{
"epoch": 2.345166693683471,
"grad_norm": 236173.65625,
"learning_rate": 1.2581333333333334e-05,
"loss": 0.1884,
"step": 21700
},
{
"epoch": 2.355973415464419,
"grad_norm": 176315.5,
"learning_rate": 1.253688888888889e-05,
"loss": 0.1953,
"step": 21800
},
{
"epoch": 2.3667801372453665,
"grad_norm": 257501.171875,
"learning_rate": 1.2492444444444445e-05,
"loss": 0.1968,
"step": 21900
},
{
"epoch": 2.3775868590263145,
"grad_norm": 253721.75,
"learning_rate": 1.2448e-05,
"loss": 0.186,
"step": 22000
},
{
"epoch": 2.3775868590263145,
"eval_loss": 0.24074091017246246,
"eval_runtime": 2749.0556,
"eval_samples_per_second": 6.731,
"eval_steps_per_second": 0.841,
"eval_wer": 0.18391604563689112,
"step": 22000
},
{
"epoch": 2.388393580807262,
"grad_norm": 173236.4375,
"learning_rate": 1.2403555555555557e-05,
"loss": 0.2004,
"step": 22100
},
{
"epoch": 2.39920030258821,
"grad_norm": 219634.15625,
"learning_rate": 1.2359111111111112e-05,
"loss": 0.1986,
"step": 22200
},
{
"epoch": 2.4100070243691576,
"grad_norm": 276572.0,
"learning_rate": 1.2314666666666667e-05,
"loss": 0.1821,
"step": 22300
},
{
"epoch": 2.420813746150105,
"grad_norm": 166507.25,
"learning_rate": 1.2270222222222223e-05,
"loss": 0.1785,
"step": 22400
},
{
"epoch": 2.431620467931053,
"grad_norm": 264705.6875,
"learning_rate": 1.2225777777777778e-05,
"loss": 0.1898,
"step": 22500
},
{
"epoch": 2.431620467931053,
"eval_loss": 0.23902596533298492,
"eval_runtime": 2754.9189,
"eval_samples_per_second": 6.717,
"eval_steps_per_second": 0.84,
"eval_wer": 0.18278216126181782,
"step": 22500
},
{
"epoch": 2.4424271897120007,
"grad_norm": 222752.09375,
"learning_rate": 1.2181333333333333e-05,
"loss": 0.1884,
"step": 22600
},
{
"epoch": 2.4532339114929487,
"grad_norm": 206168.15625,
"learning_rate": 1.2136888888888888e-05,
"loss": 0.1911,
"step": 22700
},
{
"epoch": 2.464040633273896,
"grad_norm": 255710.078125,
"learning_rate": 1.2092444444444444e-05,
"loss": 0.1876,
"step": 22800
},
{
"epoch": 2.474847355054844,
"grad_norm": 158791.09375,
"learning_rate": 1.2048000000000002e-05,
"loss": 0.1962,
"step": 22900
},
{
"epoch": 2.4856540768357918,
"grad_norm": 322706.125,
"learning_rate": 1.2003555555555557e-05,
"loss": 0.1789,
"step": 23000
},
{
"epoch": 2.4856540768357918,
"eval_loss": 0.2363387644290924,
"eval_runtime": 2786.1335,
"eval_samples_per_second": 6.641,
"eval_steps_per_second": 0.83,
"eval_wer": 0.17904425277019684,
"step": 23000
},
{
"epoch": 2.4964607986167398,
"grad_norm": 262923.15625,
"learning_rate": 1.1959111111111113e-05,
"loss": 0.1861,
"step": 23100
},
{
"epoch": 2.5072675203976873,
"grad_norm": 216068.8125,
"learning_rate": 1.1914666666666668e-05,
"loss": 0.1827,
"step": 23200
},
{
"epoch": 2.518074242178635,
"grad_norm": 188912.265625,
"learning_rate": 1.1870222222222223e-05,
"loss": 0.1827,
"step": 23300
},
{
"epoch": 2.528880963959583,
"grad_norm": 222542.171875,
"learning_rate": 1.1825777777777778e-05,
"loss": 0.19,
"step": 23400
},
{
"epoch": 2.539687685740531,
"grad_norm": 202856.296875,
"learning_rate": 1.1781333333333334e-05,
"loss": 0.1765,
"step": 23500
},
{
"epoch": 2.539687685740531,
"eval_loss": 0.2353278398513794,
"eval_runtime": 2813.1319,
"eval_samples_per_second": 6.578,
"eval_steps_per_second": 0.822,
"eval_wer": 0.17970112371851515,
"step": 23500
},
{
"epoch": 2.5504944075214784,
"grad_norm": 238512.4375,
"learning_rate": 1.1736888888888889e-05,
"loss": 0.1869,
"step": 23600
},
{
"epoch": 2.561301129302426,
"grad_norm": 276903.75,
"learning_rate": 1.1692444444444446e-05,
"loss": 0.1963,
"step": 23700
},
{
"epoch": 2.572107851083374,
"grad_norm": 329353.65625,
"learning_rate": 1.1648000000000001e-05,
"loss": 0.1839,
"step": 23800
},
{
"epoch": 2.5829145728643215,
"grad_norm": 330138.1875,
"learning_rate": 1.1603555555555556e-05,
"loss": 0.1808,
"step": 23900
},
{
"epoch": 2.5937212946452695,
"grad_norm": 279948.03125,
"learning_rate": 1.1559111111111111e-05,
"loss": 0.1808,
"step": 24000
},
{
"epoch": 2.5937212946452695,
"eval_loss": 0.23201151192188263,
"eval_runtime": 3188.0735,
"eval_samples_per_second": 5.804,
"eval_steps_per_second": 0.726,
"eval_wer": 0.17965420436506385,
"step": 24000
},
{
"epoch": 2.604528016426217,
"grad_norm": 216210.78125,
"learning_rate": 1.1514666666666667e-05,
"loss": 0.1831,
"step": 24100
},
{
"epoch": 2.615334738207165,
"grad_norm": 282255.96875,
"learning_rate": 1.1470222222222222e-05,
"loss": 0.1826,
"step": 24200
},
{
"epoch": 2.6261414599881125,
"grad_norm": 260694.609375,
"learning_rate": 1.1425777777777777e-05,
"loss": 0.189,
"step": 24300
},
{
"epoch": 2.6369481817690605,
"grad_norm": 212081.46875,
"learning_rate": 1.1381333333333336e-05,
"loss": 0.1859,
"step": 24400
},
{
"epoch": 2.647754903550008,
"grad_norm": 217712.515625,
"learning_rate": 1.1336888888888891e-05,
"loss": 0.1771,
"step": 24500
},
{
"epoch": 2.647754903550008,
"eval_loss": 0.22906863689422607,
"eval_runtime": 3261.465,
"eval_samples_per_second": 5.674,
"eval_steps_per_second": 0.709,
"eval_wer": 0.17766795173562508,
"step": 24500
},
{
"epoch": 2.6585616253309556,
"grad_norm": 298979.4375,
"learning_rate": 1.1292444444444446e-05,
"loss": 0.1884,
"step": 24600
},
{
"epoch": 2.6693683471119036,
"grad_norm": 220226.265625,
"learning_rate": 1.1248000000000001e-05,
"loss": 0.1938,
"step": 24700
},
{
"epoch": 2.6801750688928516,
"grad_norm": 208763.828125,
"learning_rate": 1.1203555555555557e-05,
"loss": 0.1764,
"step": 24800
},
{
"epoch": 2.690981790673799,
"grad_norm": 141644.71875,
"learning_rate": 1.1159111111111112e-05,
"loss": 0.1797,
"step": 24900
},
{
"epoch": 2.7017885124547467,
"grad_norm": 232710.078125,
"learning_rate": 1.1114666666666667e-05,
"loss": 0.183,
"step": 25000
},
{
"epoch": 2.7017885124547467,
"eval_loss": 0.22756607830524445,
"eval_runtime": 2974.7193,
"eval_samples_per_second": 6.22,
"eval_steps_per_second": 0.778,
"eval_wer": 0.17883311567966592,
"step": 25000
},
{
"epoch": 2.7125952342356947,
"grad_norm": 348240.0,
"learning_rate": 1.1070222222222222e-05,
"loss": 0.1888,
"step": 25100
},
{
"epoch": 2.7234019560166423,
"grad_norm": 219766.265625,
"learning_rate": 1.102577777777778e-05,
"loss": 0.1839,
"step": 25200
},
{
"epoch": 2.7342086777975902,
"grad_norm": 240334.796875,
"learning_rate": 1.0981333333333334e-05,
"loss": 0.1802,
"step": 25300
},
{
"epoch": 2.745015399578538,
"grad_norm": 226478.640625,
"learning_rate": 1.093688888888889e-05,
"loss": 0.18,
"step": 25400
},
{
"epoch": 2.7558221213594853,
"grad_norm": 248438.5,
"learning_rate": 1.0892444444444445e-05,
"loss": 0.178,
"step": 25500
},
{
"epoch": 2.7558221213594853,
"eval_loss": 0.22500741481781006,
"eval_runtime": 2728.4865,
"eval_samples_per_second": 6.782,
"eval_steps_per_second": 0.848,
"eval_wer": 0.17539236309323658,
"step": 25500
},
{
"epoch": 2.7666288431404333,
"grad_norm": 229927.078125,
"learning_rate": 1.0848e-05,
"loss": 0.191,
"step": 25600
},
{
"epoch": 2.7774355649213813,
"grad_norm": 300608.5,
"learning_rate": 1.0803555555555555e-05,
"loss": 0.1982,
"step": 25700
},
{
"epoch": 2.788242286702329,
"grad_norm": 293853.34375,
"learning_rate": 1.075911111111111e-05,
"loss": 0.1747,
"step": 25800
},
{
"epoch": 2.7990490084832764,
"grad_norm": 210441.359375,
"learning_rate": 1.0714666666666666e-05,
"loss": 0.1808,
"step": 25900
},
{
"epoch": 2.8098557302642244,
"grad_norm": 175316.484375,
"learning_rate": 1.0670222222222224e-05,
"loss": 0.1829,
"step": 26000
},
{
"epoch": 2.8098557302642244,
"eval_loss": 0.2231319695711136,
"eval_runtime": 2895.6838,
"eval_samples_per_second": 6.39,
"eval_steps_per_second": 0.799,
"eval_wer": 0.17550966147686484,
"step": 26000
},
{
"epoch": 2.820662452045172,
"grad_norm": 162894.484375,
"learning_rate": 1.062577777777778e-05,
"loss": 0.1772,
"step": 26100
},
{
"epoch": 2.83146917382612,
"grad_norm": 399308.75,
"learning_rate": 1.0581333333333335e-05,
"loss": 0.183,
"step": 26200
},
{
"epoch": 2.8422758956070675,
"grad_norm": 237068.109375,
"learning_rate": 1.053688888888889e-05,
"loss": 0.1867,
"step": 26300
},
{
"epoch": 2.8530826173880155,
"grad_norm": 274371.4375,
"learning_rate": 1.0492444444444445e-05,
"loss": 0.189,
"step": 26400
},
{
"epoch": 2.863889339168963,
"grad_norm": 218764.46875,
"learning_rate": 1.0448e-05,
"loss": 0.183,
"step": 26500
},
{
"epoch": 2.863889339168963,
"eval_loss": 0.2216072529554367,
"eval_runtime": 2986.971,
"eval_samples_per_second": 6.195,
"eval_steps_per_second": 0.774,
"eval_wer": 0.17901297320122928,
"step": 26500
},
{
"epoch": 2.874696060949911,
"grad_norm": 200908.984375,
"learning_rate": 1.0403555555555556e-05,
"loss": 0.1714,
"step": 26600
},
{
"epoch": 2.8855027827308586,
"grad_norm": 247789.078125,
"learning_rate": 1.0359111111111111e-05,
"loss": 0.1775,
"step": 26700
},
{
"epoch": 2.896309504511806,
"grad_norm": 237418.84375,
"learning_rate": 1.0314666666666668e-05,
"loss": 0.1695,
"step": 26800
},
{
"epoch": 2.907116226292754,
"grad_norm": 253792.0,
"learning_rate": 1.0270222222222223e-05,
"loss": 0.1806,
"step": 26900
},
{
"epoch": 2.917922948073702,
"grad_norm": 205986.421875,
"learning_rate": 1.0225777777777778e-05,
"loss": 0.1812,
"step": 27000
},
{
"epoch": 2.917922948073702,
"eval_loss": 0.21981683373451233,
"eval_runtime": 2808.8113,
"eval_samples_per_second": 6.588,
"eval_steps_per_second": 0.823,
"eval_wer": 0.1729369169292847,
"step": 27000
},
{
"epoch": 2.9287296698546497,
"grad_norm": 194869.09375,
"learning_rate": 1.0181333333333334e-05,
"loss": 0.1805,
"step": 27100
},
{
"epoch": 2.939536391635597,
"grad_norm": 223082.046875,
"learning_rate": 1.0136888888888889e-05,
"loss": 0.1797,
"step": 27200
},
{
"epoch": 2.950343113416545,
"grad_norm": 186045.15625,
"learning_rate": 1.0092444444444444e-05,
"loss": 0.1809,
"step": 27300
},
{
"epoch": 2.9611498351974928,
"grad_norm": 307597.75,
"learning_rate": 1.0048e-05,
"loss": 0.1752,
"step": 27400
},
{
"epoch": 2.9719565569784407,
"grad_norm": 256261.78125,
"learning_rate": 1.0003555555555558e-05,
"loss": 0.1697,
"step": 27500
},
{
"epoch": 2.9719565569784407,
"eval_loss": 0.21857349574565887,
"eval_runtime": 2775.0986,
"eval_samples_per_second": 6.668,
"eval_steps_per_second": 0.833,
"eval_wer": 0.17267104059306063,
"step": 27500
},
{
"epoch": 2.9827632787593883,
"grad_norm": 140632.125,
"learning_rate": 9.959111111111111e-06,
"loss": 0.1773,
"step": 27600
},
{
"epoch": 2.993570000540336,
"grad_norm": 238205.53125,
"learning_rate": 9.914666666666668e-06,
"loss": 0.1799,
"step": 27700
},
{
"epoch": 3.0044307559301884,
"grad_norm": 153436.546875,
"learning_rate": 9.870222222222224e-06,
"loss": 0.1618,
"step": 27800
},
{
"epoch": 3.0152374777111364,
"grad_norm": 179979.90625,
"learning_rate": 9.825777777777779e-06,
"loss": 0.1261,
"step": 27900
},
{
"epoch": 3.026044199492084,
"grad_norm": 237624.703125,
"learning_rate": 9.781333333333334e-06,
"loss": 0.1317,
"step": 28000
},
{
"epoch": 3.026044199492084,
"eval_loss": 0.21732862293720245,
"eval_runtime": 2726.3235,
"eval_samples_per_second": 6.787,
"eval_steps_per_second": 0.848,
"eval_wer": 0.17278051908444703,
"step": 28000
},
{
"epoch": 3.036850921273032,
"grad_norm": 245701.53125,
"learning_rate": 9.73688888888889e-06,
"loss": 0.1383,
"step": 28100
},
{
"epoch": 3.0476576430539795,
"grad_norm": 94987.546875,
"learning_rate": 9.692444444444446e-06,
"loss": 0.1317,
"step": 28200
},
{
"epoch": 3.0584643648349275,
"grad_norm": 204097.234375,
"learning_rate": 9.648000000000001e-06,
"loss": 0.1349,
"step": 28300
},
{
"epoch": 3.069271086615875,
"grad_norm": 142045.625,
"learning_rate": 9.603555555555557e-06,
"loss": 0.1308,
"step": 28400
},
{
"epoch": 3.080077808396823,
"grad_norm": 192114.71875,
"learning_rate": 9.559111111111112e-06,
"loss": 0.1298,
"step": 28500
},
{
"epoch": 3.080077808396823,
"eval_loss": 0.21591147780418396,
"eval_runtime": 2949.2136,
"eval_samples_per_second": 6.274,
"eval_steps_per_second": 0.784,
"eval_wer": 0.16897223156264907,
"step": 28500
},
{
"epoch": 3.0908845301777705,
"grad_norm": 84732.625,
"learning_rate": 9.514666666666667e-06,
"loss": 0.1182,
"step": 28600
},
{
"epoch": 3.1016912519587185,
"grad_norm": 176436.625,
"learning_rate": 9.470222222222222e-06,
"loss": 0.1275,
"step": 28700
},
{
"epoch": 3.112497973739666,
"grad_norm": 216536.46875,
"learning_rate": 9.425777777777778e-06,
"loss": 0.1346,
"step": 28800
},
{
"epoch": 3.1233046955206136,
"grad_norm": 227679.296875,
"learning_rate": 9.381333333333335e-06,
"loss": 0.1255,
"step": 28900
},
{
"epoch": 3.1341114173015616,
"grad_norm": 172016.46875,
"learning_rate": 9.33688888888889e-06,
"loss": 0.1272,
"step": 29000
},
{
"epoch": 3.1341114173015616,
"eval_loss": 0.21611380577087402,
"eval_runtime": 2875.8761,
"eval_samples_per_second": 6.434,
"eval_steps_per_second": 0.804,
"eval_wer": 0.16858123695055482,
"step": 29000
},
{
"epoch": 3.144918139082509,
"grad_norm": 135140.53125,
"learning_rate": 9.292444444444445e-06,
"loss": 0.1327,
"step": 29100
},
{
"epoch": 3.155724860863457,
"grad_norm": 245684.453125,
"learning_rate": 9.248e-06,
"loss": 0.1359,
"step": 29200
},
{
"epoch": 3.1665315826444047,
"grad_norm": 184601.390625,
"learning_rate": 9.203555555555557e-06,
"loss": 0.131,
"step": 29300
},
{
"epoch": 3.1773383044253527,
"grad_norm": 157958.8125,
"learning_rate": 9.159111111111112e-06,
"loss": 0.129,
"step": 29400
},
{
"epoch": 3.1881450262063002,
"grad_norm": 174601.015625,
"learning_rate": 9.114666666666668e-06,
"loss": 0.1389,
"step": 29500
},
{
"epoch": 3.1881450262063002,
"eval_loss": 0.21482256054878235,
"eval_runtime": 2995.308,
"eval_samples_per_second": 6.178,
"eval_steps_per_second": 0.772,
"eval_wer": 0.17062222882568678,
"step": 29500
},
{
"epoch": 3.1989517479872482,
"grad_norm": 366836.96875,
"learning_rate": 9.070222222222223e-06,
"loss": 0.1375,
"step": 29600
},
{
"epoch": 3.209758469768196,
"grad_norm": 217750.703125,
"learning_rate": 9.025777777777778e-06,
"loss": 0.1393,
"step": 29700
},
{
"epoch": 3.2205651915491433,
"grad_norm": 427549.6875,
"learning_rate": 8.981333333333333e-06,
"loss": 0.1211,
"step": 29800
},
{
"epoch": 3.2313719133300913,
"grad_norm": 139396.234375,
"learning_rate": 8.93688888888889e-06,
"loss": 0.1278,
"step": 29900
},
{
"epoch": 3.242178635111039,
"grad_norm": 165933.109375,
"learning_rate": 8.892444444444445e-06,
"loss": 0.1379,
"step": 30000
},
{
"epoch": 3.242178635111039,
"eval_loss": 0.2138548046350479,
"eval_runtime": 3184.9662,
"eval_samples_per_second": 5.81,
"eval_steps_per_second": 0.726,
"eval_wer": 0.16925374768335694,
"step": 30000
},
{
"epoch": 3.252985356891987,
"grad_norm": 289672.5,
"learning_rate": 8.848e-06,
"loss": 0.1407,
"step": 30100
},
{
"epoch": 3.2637920786729344,
"grad_norm": 145135.828125,
"learning_rate": 8.803555555555556e-06,
"loss": 0.1319,
"step": 30200
},
{
"epoch": 3.2745988004538824,
"grad_norm": 184688.515625,
"learning_rate": 8.759111111111111e-06,
"loss": 0.1308,
"step": 30300
},
{
"epoch": 3.28540552223483,
"grad_norm": 201466.8125,
"learning_rate": 8.714666666666666e-06,
"loss": 0.1322,
"step": 30400
},
{
"epoch": 3.296212244015778,
"grad_norm": 194344.265625,
"learning_rate": 8.670222222222223e-06,
"loss": 0.1312,
"step": 30500
},
{
"epoch": 3.296212244015778,
"eval_loss": 0.21327927708625793,
"eval_runtime": 3054.7075,
"eval_samples_per_second": 6.058,
"eval_steps_per_second": 0.757,
"eval_wer": 0.1713885782653915,
"step": 30500
},
{
"epoch": 3.3070189657967255,
"grad_norm": 204070.3125,
"learning_rate": 8.625777777777779e-06,
"loss": 0.1222,
"step": 30600
},
{
"epoch": 3.3178256875776735,
"grad_norm": 258432.828125,
"learning_rate": 8.581333333333334e-06,
"loss": 0.1277,
"step": 30700
},
{
"epoch": 3.328632409358621,
"grad_norm": 251962.796875,
"learning_rate": 8.53688888888889e-06,
"loss": 0.1265,
"step": 30800
},
{
"epoch": 3.339439131139569,
"grad_norm": 226804.796875,
"learning_rate": 8.492444444444446e-06,
"loss": 0.1292,
"step": 30900
},
{
"epoch": 3.3502458529205166,
"grad_norm": 191085.5625,
"learning_rate": 8.448000000000001e-06,
"loss": 0.1212,
"step": 31000
},
{
"epoch": 3.3502458529205166,
"eval_loss": 0.21162918210029602,
"eval_runtime": 3559.2497,
"eval_samples_per_second": 5.199,
"eval_steps_per_second": 0.65,
"eval_wer": 0.17063786861017055,
"step": 31000
},
{
"epoch": 3.361052574701464,
"grad_norm": 186760.765625,
"learning_rate": 8.403555555555556e-06,
"loss": 0.1281,
"step": 31100
},
{
"epoch": 3.371859296482412,
"grad_norm": 234347.296875,
"learning_rate": 8.359111111111112e-06,
"loss": 0.1338,
"step": 31200
},
{
"epoch": 3.3826660182633597,
"grad_norm": 223205.296875,
"learning_rate": 8.314666666666667e-06,
"loss": 0.1365,
"step": 31300
},
{
"epoch": 3.3934727400443077,
"grad_norm": 227340.140625,
"learning_rate": 8.270222222222222e-06,
"loss": 0.1263,
"step": 31400
},
{
"epoch": 3.404279461825255,
"grad_norm": 330275.71875,
"learning_rate": 8.225777777777777e-06,
"loss": 0.1265,
"step": 31500
},
{
"epoch": 3.404279461825255,
"eval_loss": 0.21032755076885223,
"eval_runtime": 3585.0413,
"eval_samples_per_second": 5.161,
"eval_steps_per_second": 0.645,
"eval_wer": 0.16658716442887417,
"step": 31500
},
{
"epoch": 3.415086183606203,
"grad_norm": 202428.109375,
"learning_rate": 8.181333333333334e-06,
"loss": 0.1302,
"step": 31600
},
{
"epoch": 3.4258929053871507,
"grad_norm": 139416.578125,
"learning_rate": 8.13688888888889e-06,
"loss": 0.126,
"step": 31700
},
{
"epoch": 3.4366996271680987,
"grad_norm": 151699.484375,
"learning_rate": 8.092444444444445e-06,
"loss": 0.1273,
"step": 31800
},
{
"epoch": 3.4475063489490463,
"grad_norm": 127831.4609375,
"learning_rate": 8.048e-06,
"loss": 0.1347,
"step": 31900
},
{
"epoch": 3.458313070729994,
"grad_norm": 225660.5625,
"learning_rate": 8.003555555555557e-06,
"loss": 0.1261,
"step": 32000
},
{
"epoch": 3.458313070729994,
"eval_loss": 0.20947901904582977,
"eval_runtime": 3432.2646,
"eval_samples_per_second": 5.391,
"eval_steps_per_second": 0.674,
"eval_wer": 0.17062222882568678,
"step": 32000
},
{
"epoch": 3.469119792510942,
"grad_norm": 5.6989545822143555,
"learning_rate": 7.959111111111112e-06,
"loss": 0.1326,
"step": 32100
},
{
"epoch": 3.4799265142918894,
"grad_norm": 5.444442272186279,
"learning_rate": 7.914666666666667e-06,
"loss": 0.1391,
"step": 32200
},
{
"epoch": 3.4907332360728374,
"grad_norm": 5.629488945007324,
"learning_rate": 7.870222222222222e-06,
"loss": 0.134,
"step": 32300
},
{
"epoch": 3.501539957853785,
"grad_norm": 9.071991920471191,
"learning_rate": 7.82577777777778e-06,
"loss": 0.1345,
"step": 32400
},
{
"epoch": 3.512346679634733,
"grad_norm": 8.57175064086914,
"learning_rate": 7.781333333333335e-06,
"loss": 0.127,
"step": 32500
},
{
"epoch": 3.512346679634733,
"eval_loss": 0.20792409777641296,
"eval_runtime": 3540.4171,
"eval_samples_per_second": 5.227,
"eval_steps_per_second": 1.307,
"eval_wer": 0.16730659451512758,
"step": 32500
},
{
"epoch": 3.5231534014156805,
"grad_norm": 7.08974552154541,
"learning_rate": 7.73688888888889e-06,
"loss": 0.1325,
"step": 32600
},
{
"epoch": 3.5339601231966284,
"grad_norm": 5.81699275970459,
"learning_rate": 7.692444444444445e-06,
"loss": 0.1337,
"step": 32700
},
{
"epoch": 3.544766844977576,
"grad_norm": 8.329341888427734,
"learning_rate": 7.648444444444445e-06,
"loss": 0.1295,
"step": 32800
},
{
"epoch": 3.5555735667585235,
"grad_norm": 9.390524864196777,
"learning_rate": 7.604e-06,
"loss": 0.1308,
"step": 32900
},
{
"epoch": 3.5663802885394715,
"grad_norm": 7.076089859008789,
"learning_rate": 7.5595555555555565e-06,
"loss": 0.1346,
"step": 33000
},
{
"epoch": 3.5663802885394715,
"eval_loss": 0.20613741874694824,
"eval_runtime": 3517.8125,
"eval_samples_per_second": 5.26,
"eval_steps_per_second": 1.315,
"eval_wer": 0.1682840810453632,
"step": 33000
},
{
"epoch": 3.5771870103204195,
"grad_norm": 6.325503826141357,
"learning_rate": 7.515111111111112e-06,
"loss": 0.14,
"step": 33100
},
{
"epoch": 3.587993732101367,
"grad_norm": 7.135802745819092,
"learning_rate": 7.470666666666667e-06,
"loss": 0.1292,
"step": 33200
},
{
"epoch": 3.5988004538823146,
"grad_norm": 5.185844898223877,
"learning_rate": 7.426222222222222e-06,
"loss": 0.1375,
"step": 33300
},
{
"epoch": 3.6096071756632626,
"grad_norm": 7.516198635101318,
"learning_rate": 7.381777777777779e-06,
"loss": 0.1287,
"step": 33400
},
{
"epoch": 3.62041389744421,
"grad_norm": 6.644392490386963,
"learning_rate": 7.337333333333334e-06,
"loss": 0.1283,
"step": 33500
},
{
"epoch": 3.62041389744421,
"eval_loss": 0.20456381142139435,
"eval_runtime": 3492.6992,
"eval_samples_per_second": 5.298,
"eval_steps_per_second": 1.324,
"eval_wer": 0.16519522360981867,
"step": 33500
},
{
"epoch": 3.631220619225158,
"grad_norm": 7.233791351318359,
"learning_rate": 7.2928888888888895e-06,
"loss": 0.1373,
"step": 33600
},
{
"epoch": 3.6420273410061057,
"grad_norm": 5.153164386749268,
"learning_rate": 7.248444444444445e-06,
"loss": 0.1368,
"step": 33700
},
{
"epoch": 3.6528340627870537,
"grad_norm": 6.022379398345947,
"learning_rate": 7.204000000000001e-06,
"loss": 0.1377,
"step": 33800
},
{
"epoch": 3.6636407845680012,
"grad_norm": 7.33857536315918,
"learning_rate": 7.159555555555556e-06,
"loss": 0.1343,
"step": 33900
},
{
"epoch": 3.6744475063489492,
"grad_norm": 6.584815502166748,
"learning_rate": 7.115111111111111e-06,
"loss": 0.1244,
"step": 34000
},
{
"epoch": 3.6744475063489492,
"eval_loss": 0.20398086309432983,
"eval_runtime": 3469.4987,
"eval_samples_per_second": 5.333,
"eval_steps_per_second": 1.333,
"eval_wer": 0.168432658997959,
"step": 34000
},
{
"epoch": 3.685254228129897,
"grad_norm": 6.824450492858887,
"learning_rate": 7.0706666666666665e-06,
"loss": 0.1255,
"step": 34100
},
{
"epoch": 3.6960609499108443,
"grad_norm": 5.974719047546387,
"learning_rate": 7.0262222222222234e-06,
"loss": 0.1302,
"step": 34200
},
{
"epoch": 3.7068676716917923,
"grad_norm": 6.354248523712158,
"learning_rate": 6.981777777777779e-06,
"loss": 0.1245,
"step": 34300
},
{
"epoch": 3.7176743934727403,
"grad_norm": 5.096312999725342,
"learning_rate": 6.937333333333334e-06,
"loss": 0.1369,
"step": 34400
},
{
"epoch": 3.728481115253688,
"grad_norm": 5.251643180847168,
"learning_rate": 6.892888888888889e-06,
"loss": 0.1207,
"step": 34500
},
{
"epoch": 3.728481115253688,
"eval_loss": 0.20263046026229858,
"eval_runtime": 3278.7844,
"eval_samples_per_second": 5.644,
"eval_steps_per_second": 1.411,
"eval_wer": 0.16479640910548252,
"step": 34500
},
{
"epoch": 3.7392878370346354,
"grad_norm": 7.432106971740723,
"learning_rate": 6.848444444444445e-06,
"loss": 0.1337,
"step": 34600
},
{
"epoch": 3.7500945588155834,
"grad_norm": 4.93491268157959,
"learning_rate": 6.804e-06,
"loss": 0.1257,
"step": 34700
},
{
"epoch": 3.760901280596531,
"grad_norm": 6.047059059143066,
"learning_rate": 6.760000000000001e-06,
"loss": 0.1206,
"step": 34800
},
{
"epoch": 3.771708002377479,
"grad_norm": 6.542396545410156,
"learning_rate": 6.7155555555555566e-06,
"loss": 0.1271,
"step": 34900
},
{
"epoch": 3.7825147241584265,
"grad_norm": 5.706289768218994,
"learning_rate": 6.671111111111112e-06,
"loss": 0.1239,
"step": 35000
},
{
"epoch": 3.7825147241584265,
"eval_loss": 0.20222991704940796,
"eval_runtime": 3317.3157,
"eval_samples_per_second": 5.578,
"eval_steps_per_second": 1.395,
"eval_wer": 0.16217674520445108,
"step": 35000
},
{
"epoch": 3.793321445939374,
"grad_norm": 7.686710834503174,
"learning_rate": 6.626666666666667e-06,
"loss": 0.1298,
"step": 35100
},
{
"epoch": 3.804128167720322,
"grad_norm": 7.791649341583252,
"learning_rate": 6.582222222222223e-06,
"loss": 0.1276,
"step": 35200
},
{
"epoch": 3.81493488950127,
"grad_norm": 5.835906505584717,
"learning_rate": 6.537777777777778e-06,
"loss": 0.1244,
"step": 35300
},
{
"epoch": 3.8257416112822176,
"grad_norm": 8.771524429321289,
"learning_rate": 6.4933333333333336e-06,
"loss": 0.1316,
"step": 35400
},
{
"epoch": 3.836548333063165,
"grad_norm": 7.212921619415283,
"learning_rate": 6.448888888888889e-06,
"loss": 0.1308,
"step": 35500
},
{
"epoch": 3.836548333063165,
"eval_loss": 0.19980210065841675,
"eval_runtime": 3260.0277,
"eval_samples_per_second": 5.676,
"eval_steps_per_second": 1.419,
"eval_wer": 0.16239570218722385,
"step": 35500
},
{
"epoch": 3.847355054844113,
"grad_norm": 5.587503910064697,
"learning_rate": 6.404444444444446e-06,
"loss": 0.1317,
"step": 35600
},
{
"epoch": 3.8581617766250607,
"grad_norm": 8.271342277526855,
"learning_rate": 6.360444444444445e-06,
"loss": 0.1316,
"step": 35700
},
{
"epoch": 3.8689684984060086,
"grad_norm": 6.529531955718994,
"learning_rate": 6.316000000000001e-06,
"loss": 0.1257,
"step": 35800
},
{
"epoch": 3.879775220186956,
"grad_norm": 5.135924816131592,
"learning_rate": 6.271555555555556e-06,
"loss": 0.1309,
"step": 35900
},
{
"epoch": 3.890581941967904,
"grad_norm": 4.710616588592529,
"learning_rate": 6.2271111111111115e-06,
"loss": 0.1272,
"step": 36000
},
{
"epoch": 3.890581941967904,
"eval_loss": 0.19968418776988983,
"eval_runtime": 3207.2086,
"eval_samples_per_second": 5.77,
"eval_steps_per_second": 1.442,
"eval_wer": 0.16488242792014327,
"step": 36000
},
{
"epoch": 3.9013886637488517,
"grad_norm": 9.243986129760742,
"learning_rate": 6.182666666666667e-06,
"loss": 0.1301,
"step": 36100
},
{
"epoch": 3.9121953855297997,
"grad_norm": 6.014384746551514,
"learning_rate": 6.138222222222223e-06,
"loss": 0.1261,
"step": 36200
},
{
"epoch": 3.9230021073107473,
"grad_norm": 5.140791893005371,
"learning_rate": 6.093777777777779e-06,
"loss": 0.1219,
"step": 36300
},
{
"epoch": 3.933808829091695,
"grad_norm": 4.738403797149658,
"learning_rate": 6.049333333333334e-06,
"loss": 0.1244,
"step": 36400
},
{
"epoch": 3.944615550872643,
"grad_norm": 4.881937026977539,
"learning_rate": 6.004888888888889e-06,
"loss": 0.1328,
"step": 36500
},
{
"epoch": 3.944615550872643,
"eval_loss": 0.19876359403133392,
"eval_runtime": 3246.3553,
"eval_samples_per_second": 5.7,
"eval_steps_per_second": 1.425,
"eval_wer": 0.1647103902908218,
"step": 36500
},
{
"epoch": 3.955422272653591,
"grad_norm": 9.000225067138672,
"learning_rate": 5.960444444444445e-06,
"loss": 0.1238,
"step": 36600
},
{
"epoch": 3.9662289944345384,
"grad_norm": 4.82861328125,
"learning_rate": 5.916000000000001e-06,
"loss": 0.1268,
"step": 36700
},
{
"epoch": 3.977035716215486,
"grad_norm": 4.868381023406982,
"learning_rate": 5.871555555555556e-06,
"loss": 0.1262,
"step": 36800
},
{
"epoch": 3.987842437996434,
"grad_norm": 10.557507514953613,
"learning_rate": 5.827111111111111e-06,
"loss": 0.134,
"step": 36900
},
{
"epoch": 3.9986491597773814,
"grad_norm": 6.664336204528809,
"learning_rate": 5.782666666666667e-06,
"loss": 0.1256,
"step": 37000
},
{
"epoch": 3.9986491597773814,
"eval_loss": 0.19714923202991486,
"eval_runtime": 3358.3537,
"eval_samples_per_second": 5.51,
"eval_steps_per_second": 1.377,
"eval_wer": 0.1653203418856888,
"step": 37000
},
{
"epoch": 4.009509915167234,
"grad_norm": 6.824110507965088,
"learning_rate": 5.738222222222223e-06,
"loss": 0.095,
"step": 37100
},
{
"epoch": 4.0203166369481815,
"grad_norm": 6.033724308013916,
"learning_rate": 5.6937777777777785e-06,
"loss": 0.0924,
"step": 37200
},
{
"epoch": 4.031123358729129,
"grad_norm": 8.13729476928711,
"learning_rate": 5.649333333333334e-06,
"loss": 0.1009,
"step": 37300
},
{
"epoch": 4.0419300805100775,
"grad_norm": 7.620489597320557,
"learning_rate": 5.60488888888889e-06,
"loss": 0.0994,
"step": 37400
},
{
"epoch": 4.052736802291025,
"grad_norm": 5.248648166656494,
"learning_rate": 5.560444444444445e-06,
"loss": 0.0953,
"step": 37500
},
{
"epoch": 4.052736802291025,
"eval_loss": 0.19735735654830933,
"eval_runtime": 3623.9668,
"eval_samples_per_second": 5.106,
"eval_steps_per_second": 1.277,
"eval_wer": 0.1603938097733013,
"step": 37500
},
{
"epoch": 4.063543524071973,
"grad_norm": 5.766596794128418,
"learning_rate": 5.516e-06,
"loss": 0.0947,
"step": 37600
},
{
"epoch": 4.07435024585292,
"grad_norm": 4.6069231033325195,
"learning_rate": 5.4715555555555554e-06,
"loss": 0.0957,
"step": 37700
},
{
"epoch": 4.085156967633869,
"grad_norm": 5.434189319610596,
"learning_rate": 5.4271111111111115e-06,
"loss": 0.0992,
"step": 37800
},
{
"epoch": 4.095963689414816,
"grad_norm": 2.9330973625183105,
"learning_rate": 5.382666666666667e-06,
"loss": 0.0943,
"step": 37900
},
{
"epoch": 4.106770411195764,
"grad_norm": 4.690386772155762,
"learning_rate": 5.338222222222223e-06,
"loss": 0.0946,
"step": 38000
},
{
"epoch": 4.106770411195764,
"eval_loss": 0.19785380363464355,
"eval_runtime": 3496.2995,
"eval_samples_per_second": 5.292,
"eval_steps_per_second": 1.323,
"eval_wer": 0.16248954089412648,
"step": 38000
},
{
"epoch": 4.117577132976711,
"grad_norm": 5.448973655700684,
"learning_rate": 5.293777777777778e-06,
"loss": 0.0928,
"step": 38100
},
{
"epoch": 4.12838385475766,
"grad_norm": 6.168562889099121,
"learning_rate": 5.249333333333334e-06,
"loss": 0.0977,
"step": 38200
},
{
"epoch": 4.139190576538607,
"grad_norm": 6.410705089569092,
"learning_rate": 5.204888888888889e-06,
"loss": 0.0954,
"step": 38300
},
{
"epoch": 4.149997298319555,
"grad_norm": 6.880079746246338,
"learning_rate": 5.160444444444445e-06,
"loss": 0.0982,
"step": 38400
},
{
"epoch": 4.160804020100502,
"grad_norm": 4.514254570007324,
"learning_rate": 5.116000000000001e-06,
"loss": 0.0933,
"step": 38500
},
{
"epoch": 4.160804020100502,
"eval_loss": 0.19639329612255096,
"eval_runtime": 3441.5727,
"eval_samples_per_second": 5.377,
"eval_steps_per_second": 1.344,
"eval_wer": 0.16088646298454007,
"step": 38500
},
{
"epoch": 4.17161074188145,
"grad_norm": 5.864041328430176,
"learning_rate": 5.071555555555556e-06,
"loss": 0.0946,
"step": 38600
},
{
"epoch": 4.182417463662398,
"grad_norm": 5.394285678863525,
"learning_rate": 5.027111111111111e-06,
"loss": 0.098,
"step": 38700
},
{
"epoch": 4.193224185443346,
"grad_norm": 5.213718891143799,
"learning_rate": 4.982666666666667e-06,
"loss": 0.0948,
"step": 38800
},
{
"epoch": 4.204030907224293,
"grad_norm": 3.7767562866210938,
"learning_rate": 4.938222222222222e-06,
"loss": 0.099,
"step": 38900
},
{
"epoch": 4.214837629005241,
"grad_norm": 8.426477432250977,
"learning_rate": 4.8937777777777785e-06,
"loss": 0.1025,
"step": 39000
},
{
"epoch": 4.214837629005241,
"eval_loss": 0.19617383182048798,
"eval_runtime": 3407.3151,
"eval_samples_per_second": 5.431,
"eval_steps_per_second": 1.358,
"eval_wer": 0.16335754893297572,
"step": 39000
},
{
"epoch": 4.225644350786189,
"grad_norm": 6.989054203033447,
"learning_rate": 4.849333333333334e-06,
"loss": 0.0922,
"step": 39100
},
{
"epoch": 4.236451072567137,
"grad_norm": 7.133777618408203,
"learning_rate": 4.80488888888889e-06,
"loss": 0.0971,
"step": 39200
},
{
"epoch": 4.2472577943480845,
"grad_norm": 5.765046119689941,
"learning_rate": 4.760444444444445e-06,
"loss": 0.0919,
"step": 39300
},
{
"epoch": 4.258064516129032,
"grad_norm": 5.539346694946289,
"learning_rate": 4.716e-06,
"loss": 0.096,
"step": 39400
},
{
"epoch": 4.2688712379099805,
"grad_norm": 6.360944747924805,
"learning_rate": 4.6715555555555555e-06,
"loss": 0.1002,
"step": 39500
},
{
"epoch": 4.2688712379099805,
"eval_loss": 0.1956612765789032,
"eval_runtime": 3418.2414,
"eval_samples_per_second": 5.413,
"eval_steps_per_second": 1.353,
"eval_wer": 0.16324025054934743,
"step": 39500
},
{
"epoch": 4.279677959690928,
"grad_norm": 6.404228210449219,
"learning_rate": 4.6271111111111116e-06,
"loss": 0.1,
"step": 39600
},
{
"epoch": 4.290484681471876,
"grad_norm": 5.106740474700928,
"learning_rate": 4.582666666666667e-06,
"loss": 0.1012,
"step": 39700
},
{
"epoch": 4.301291403252823,
"grad_norm": 7.007205009460449,
"learning_rate": 4.538222222222223e-06,
"loss": 0.103,
"step": 39800
},
{
"epoch": 4.312098125033771,
"grad_norm": 7.048201084136963,
"learning_rate": 4.493777777777778e-06,
"loss": 0.0937,
"step": 39900
},
{
"epoch": 4.322904846814719,
"grad_norm": 5.77664852142334,
"learning_rate": 4.449333333333334e-06,
"loss": 0.0976,
"step": 40000
},
{
"epoch": 4.322904846814719,
"eval_loss": 0.1948525756597519,
"eval_runtime": 3400.5876,
"eval_samples_per_second": 5.441,
"eval_steps_per_second": 1.36,
"eval_wer": 0.16208290649754847,
"step": 40000
},
{
"epoch": 4.333711568595667,
"grad_norm": 7.6147918701171875,
"learning_rate": 4.404888888888889e-06,
"loss": 0.094,
"step": 40100
},
{
"epoch": 4.344518290376614,
"grad_norm": 2.64292049407959,
"learning_rate": 4.360444444444445e-06,
"loss": 0.0987,
"step": 40200
},
{
"epoch": 4.355325012157562,
"grad_norm": 4.686502456665039,
"learning_rate": 4.316e-06,
"loss": 0.0985,
"step": 40300
},
{
"epoch": 4.366131733938509,
"grad_norm": 6.833780288696289,
"learning_rate": 4.271555555555556e-06,
"loss": 0.0866,
"step": 40400
},
{
"epoch": 4.376938455719458,
"grad_norm": 3.5335001945495605,
"learning_rate": 4.227111111111111e-06,
"loss": 0.0983,
"step": 40500
},
{
"epoch": 4.376938455719458,
"eval_loss": 0.19357165694236755,
"eval_runtime": 3407.3735,
"eval_samples_per_second": 5.431,
"eval_steps_per_second": 1.358,
"eval_wer": 0.1605111081569296,
"step": 40500
},
{
"epoch": 4.387745177500405,
"grad_norm": 6.259922981262207,
"learning_rate": 4.183111111111112e-06,
"loss": 0.0941,
"step": 40600
},
{
"epoch": 4.398551899281353,
"grad_norm": 3.454116106033325,
"learning_rate": 4.138666666666667e-06,
"loss": 0.0931,
"step": 40700
},
{
"epoch": 4.4093586210623,
"grad_norm": 5.945463180541992,
"learning_rate": 4.0942222222222225e-06,
"loss": 0.0973,
"step": 40800
},
{
"epoch": 4.420165342843249,
"grad_norm": 6.534635066986084,
"learning_rate": 4.049777777777778e-06,
"loss": 0.0941,
"step": 40900
},
{
"epoch": 4.430972064624196,
"grad_norm": 6.562328815460205,
"learning_rate": 4.005333333333334e-06,
"loss": 0.0995,
"step": 41000
},
{
"epoch": 4.430972064624196,
"eval_loss": 0.1934925764799118,
"eval_runtime": 3377.5938,
"eval_samples_per_second": 5.478,
"eval_steps_per_second": 1.37,
"eval_wer": 0.1607769844931537,
"step": 41000
},
{
"epoch": 4.441778786405144,
"grad_norm": 4.691957473754883,
"learning_rate": 3.960888888888889e-06,
"loss": 0.0907,
"step": 41100
},
{
"epoch": 4.4525855081860914,
"grad_norm": 7.0055646896362305,
"learning_rate": 3.916444444444445e-06,
"loss": 0.0984,
"step": 41200
},
{
"epoch": 4.46339222996704,
"grad_norm": 4.472280979156494,
"learning_rate": 3.872e-06,
"loss": 0.0954,
"step": 41300
},
{
"epoch": 4.474198951747987,
"grad_norm": 7.1928277015686035,
"learning_rate": 3.8275555555555564e-06,
"loss": 0.0989,
"step": 41400
},
{
"epoch": 4.485005673528935,
"grad_norm": 6.270200729370117,
"learning_rate": 3.7831111111111112e-06,
"loss": 0.0877,
"step": 41500
},
{
"epoch": 4.485005673528935,
"eval_loss": 0.19299255311489105,
"eval_runtime": 3437.135,
"eval_samples_per_second": 5.384,
"eval_steps_per_second": 1.346,
"eval_wer": 0.16083954363108877,
"step": 41500
},
{
"epoch": 4.4958123953098825,
"grad_norm": 4.897816181182861,
"learning_rate": 3.7386666666666673e-06,
"loss": 0.0878,
"step": 41600
},
{
"epoch": 4.50661911709083,
"grad_norm": 8.050458908081055,
"learning_rate": 3.6942222222222226e-06,
"loss": 0.0973,
"step": 41700
},
{
"epoch": 4.5174258388717785,
"grad_norm": 6.027979373931885,
"learning_rate": 3.649777777777778e-06,
"loss": 0.0967,
"step": 41800
},
{
"epoch": 4.528232560652726,
"grad_norm": 7.365042686462402,
"learning_rate": 3.6053333333333334e-06,
"loss": 0.0965,
"step": 41900
},
{
"epoch": 4.539039282433674,
"grad_norm": 6.175548076629639,
"learning_rate": 3.560888888888889e-06,
"loss": 0.0985,
"step": 42000
},
{
"epoch": 4.539039282433674,
"eval_loss": 0.19284753501415253,
"eval_runtime": 3493.7006,
"eval_samples_per_second": 5.296,
"eval_steps_per_second": 1.324,
"eval_wer": 0.1633028096872825,
"step": 42000
},
{
"epoch": 4.549846004214621,
"grad_norm": 3.834568738937378,
"learning_rate": 3.5164444444444447e-06,
"loss": 0.0946,
"step": 42100
},
{
"epoch": 4.56065272599557,
"grad_norm": 5.286928176879883,
"learning_rate": 3.4720000000000004e-06,
"loss": 0.0984,
"step": 42200
},
{
"epoch": 4.571459447776517,
"grad_norm": 4.384335994720459,
"learning_rate": 3.4275555555555556e-06,
"loss": 0.0859,
"step": 42300
},
{
"epoch": 4.582266169557465,
"grad_norm": 6.872238636016846,
"learning_rate": 3.3831111111111113e-06,
"loss": 0.1006,
"step": 42400
},
{
"epoch": 4.593072891338412,
"grad_norm": 5.914927005767822,
"learning_rate": 3.338666666666667e-06,
"loss": 0.0887,
"step": 42500
},
{
"epoch": 4.593072891338412,
"eval_loss": 0.19172823429107666,
"eval_runtime": 3473.6538,
"eval_samples_per_second": 5.327,
"eval_steps_per_second": 1.332,
"eval_wer": 0.1616449925320029,
"step": 42500
},
{
"epoch": 4.603879613119361,
"grad_norm": 2.9186370372772217,
"learning_rate": 3.2942222222222226e-06,
"loss": 0.0929,
"step": 42600
},
{
"epoch": 4.614686334900308,
"grad_norm": 7.138390064239502,
"learning_rate": 3.249777777777778e-06,
"loss": 0.1014,
"step": 42700
},
{
"epoch": 4.625493056681256,
"grad_norm": 5.447595596313477,
"learning_rate": 3.2053333333333334e-06,
"loss": 0.0942,
"step": 42800
},
{
"epoch": 4.636299778462203,
"grad_norm": 5.631972789764404,
"learning_rate": 3.160888888888889e-06,
"loss": 0.0928,
"step": 42900
},
{
"epoch": 4.647106500243151,
"grad_norm": 6.55267333984375,
"learning_rate": 3.1164444444444448e-06,
"loss": 0.0909,
"step": 43000
},
{
"epoch": 4.647106500243151,
"eval_loss": 0.1917807012796402,
"eval_runtime": 3531.9979,
"eval_samples_per_second": 5.239,
"eval_steps_per_second": 1.31,
"eval_wer": 0.1603547103120919,
"step": 43000
},
{
"epoch": 4.657913222024099,
"grad_norm": 5.544260501861572,
"learning_rate": 3.0728888888888893e-06,
"loss": 0.0952,
"step": 43100
},
{
"epoch": 4.668719943805047,
"grad_norm": 6.711052417755127,
"learning_rate": 3.028444444444445e-06,
"loss": 0.0893,
"step": 43200
},
{
"epoch": 4.679526665585994,
"grad_norm": 5.341217994689941,
"learning_rate": 2.984e-06,
"loss": 0.0894,
"step": 43300
},
{
"epoch": 4.690333387366942,
"grad_norm": 6.262836933135986,
"learning_rate": 2.9395555555555562e-06,
"loss": 0.0933,
"step": 43400
},
{
"epoch": 4.7011401091478895,
"grad_norm": 5.641539096832275,
"learning_rate": 2.8951111111111114e-06,
"loss": 0.0908,
"step": 43500
},
{
"epoch": 4.7011401091478895,
"eval_loss": 0.19096316397190094,
"eval_runtime": 3434.1139,
"eval_samples_per_second": 5.388,
"eval_steps_per_second": 1.347,
"eval_wer": 0.1592677452904699,
"step": 43500
},
{
"epoch": 4.711946830928838,
"grad_norm": 5.90605354309082,
"learning_rate": 2.850666666666667e-06,
"loss": 0.0955,
"step": 43600
},
{
"epoch": 4.7227535527097855,
"grad_norm": 7.320056438446045,
"learning_rate": 2.8062222222222223e-06,
"loss": 0.0862,
"step": 43700
},
{
"epoch": 4.733560274490733,
"grad_norm": 7.9307026863098145,
"learning_rate": 2.7617777777777784e-06,
"loss": 0.0968,
"step": 43800
},
{
"epoch": 4.7443669962716815,
"grad_norm": 3.4138481616973877,
"learning_rate": 2.7173333333333336e-06,
"loss": 0.095,
"step": 43900
},
{
"epoch": 4.755173718052629,
"grad_norm": 5.649805068969727,
"learning_rate": 2.6728888888888893e-06,
"loss": 0.0931,
"step": 44000
},
{
"epoch": 4.755173718052629,
"eval_loss": 0.19024226069450378,
"eval_runtime": 3527.1779,
"eval_samples_per_second": 5.246,
"eval_steps_per_second": 1.312,
"eval_wer": 0.1579227238248657,
"step": 44000
},
{
"epoch": 4.7659804398335766,
"grad_norm": 5.45325231552124,
"learning_rate": 2.6284444444444445e-06,
"loss": 0.0897,
"step": 44100
},
{
"epoch": 4.776787161614524,
"grad_norm": 4.2618408203125,
"learning_rate": 2.5840000000000006e-06,
"loss": 0.0921,
"step": 44200
},
{
"epoch": 4.787593883395472,
"grad_norm": 6.174403190612793,
"learning_rate": 2.539555555555556e-06,
"loss": 0.0954,
"step": 44300
},
{
"epoch": 4.79840060517642,
"grad_norm": 4.927825927734375,
"learning_rate": 2.495111111111111e-06,
"loss": 0.0891,
"step": 44400
},
{
"epoch": 4.809207326957368,
"grad_norm": 4.512660503387451,
"learning_rate": 2.4506666666666667e-06,
"loss": 0.0938,
"step": 44500
},
{
"epoch": 4.809207326957368,
"eval_loss": 0.18895868957042694,
"eval_runtime": 4201.807,
"eval_samples_per_second": 4.404,
"eval_steps_per_second": 1.101,
"eval_wer": 0.158172960376606,
"step": 44500
},
{
"epoch": 4.820014048738315,
"grad_norm": 5.174787998199463,
"learning_rate": 2.4062222222222223e-06,
"loss": 0.0925,
"step": 44600
},
{
"epoch": 4.830820770519263,
"grad_norm": 6.067021369934082,
"learning_rate": 2.361777777777778e-06,
"loss": 0.0917,
"step": 44700
},
{
"epoch": 4.84162749230021,
"grad_norm": 7.221127033233643,
"learning_rate": 2.3173333333333336e-06,
"loss": 0.1004,
"step": 44800
},
{
"epoch": 4.852434214081159,
"grad_norm": 6.763819217681885,
"learning_rate": 2.2728888888888893e-06,
"loss": 0.0934,
"step": 44900
},
{
"epoch": 4.863240935862106,
"grad_norm": 6.756659030914307,
"learning_rate": 2.228444444444445e-06,
"loss": 0.0925,
"step": 45000
},
{
"epoch": 4.863240935862106,
"eval_loss": 0.18888415396213531,
"eval_runtime": 4200.6639,
"eval_samples_per_second": 4.405,
"eval_steps_per_second": 1.101,
"eval_wer": 0.15939286356634005,
"step": 45000
},
{
"epoch": 4.874047657643054,
"grad_norm": 5.333858966827393,
"learning_rate": 2.184e-06,
"loss": 0.0937,
"step": 45100
},
{
"epoch": 4.884854379424001,
"grad_norm": 5.267432689666748,
"learning_rate": 2.139555555555556e-06,
"loss": 0.1001,
"step": 45200
},
{
"epoch": 4.89566110120495,
"grad_norm": 6.311913967132568,
"learning_rate": 2.0951111111111115e-06,
"loss": 0.09,
"step": 45300
},
{
"epoch": 4.906467822985897,
"grad_norm": 8.137591361999512,
"learning_rate": 2.0506666666666667e-06,
"loss": 0.0918,
"step": 45400
},
{
"epoch": 4.917274544766845,
"grad_norm": 8.778470993041992,
"learning_rate": 2.0062222222222224e-06,
"loss": 0.0943,
"step": 45500
},
{
"epoch": 4.917274544766845,
"eval_loss": 0.18823765218257904,
"eval_runtime": 4132.8456,
"eval_samples_per_second": 4.477,
"eval_steps_per_second": 1.119,
"eval_wer": 0.15777414587226987,
"step": 45500
},
{
"epoch": 4.928081266547792,
"grad_norm": 6.091644287109375,
"learning_rate": 1.961777777777778e-06,
"loss": 0.0917,
"step": 45600
},
{
"epoch": 4.938887988328741,
"grad_norm": 4.328017234802246,
"learning_rate": 1.9173333333333337e-06,
"loss": 0.0931,
"step": 45700
},
{
"epoch": 4.949694710109688,
"grad_norm": 6.655763626098633,
"learning_rate": 1.8728888888888891e-06,
"loss": 0.0909,
"step": 45800
},
{
"epoch": 4.960501431890636,
"grad_norm": 4.749749183654785,
"learning_rate": 1.8284444444444445e-06,
"loss": 0.0899,
"step": 45900
},
{
"epoch": 4.9713081536715835,
"grad_norm": 5.6424455642700195,
"learning_rate": 1.7840000000000002e-06,
"loss": 0.0918,
"step": 46000
},
{
"epoch": 4.9713081536715835,
"eval_loss": 0.18786819279193878,
"eval_runtime": 4143.7949,
"eval_samples_per_second": 4.465,
"eval_steps_per_second": 1.116,
"eval_wer": 0.1583919173593788,
"step": 46000
},
{
"epoch": 4.982114875452531,
"grad_norm": 5.470109462738037,
"learning_rate": 1.74e-06,
"loss": 0.0994,
"step": 46100
},
{
"epoch": 4.9929215972334795,
"grad_norm": 5.171388149261475,
"learning_rate": 1.6955555555555555e-06,
"loss": 0.0943,
"step": 46200
},
{
"epoch": 5.0036742854055225,
"grad_norm": 6.812433242797852,
"learning_rate": 1.6511111111111112e-06,
"loss": 0.0892,
"step": 46300
},
{
"epoch": 5.01448100718647,
"grad_norm": 3.1371068954467773,
"learning_rate": 1.606666666666667e-06,
"loss": 0.0758,
"step": 46400
},
{
"epoch": 5.025287728967418,
"grad_norm": 3.814182758331299,
"learning_rate": 1.5622222222222225e-06,
"loss": 0.0791,
"step": 46500
},
{
"epoch": 5.025287728967418,
"eval_loss": 0.18768277764320374,
"eval_runtime": 4051.6008,
"eval_samples_per_second": 4.567,
"eval_steps_per_second": 1.142,
"eval_wer": 0.15599121044112013,
"step": 46500
},
{
"epoch": 5.036094450748365,
"grad_norm": 5.319475173950195,
"learning_rate": 1.5177777777777781e-06,
"loss": 0.0759,
"step": 46600
},
{
"epoch": 5.046901172529314,
"grad_norm": 7.009033679962158,
"learning_rate": 1.4733333333333336e-06,
"loss": 0.0757,
"step": 46700
},
{
"epoch": 5.057707894310261,
"grad_norm": 4.960785865783691,
"learning_rate": 1.4288888888888892e-06,
"loss": 0.0835,
"step": 46800
},
{
"epoch": 5.068514616091209,
"grad_norm": 3.7821145057678223,
"learning_rate": 1.3844444444444446e-06,
"loss": 0.0775,
"step": 46900
},
{
"epoch": 5.079321337872156,
"grad_norm": 5.3354668617248535,
"learning_rate": 1.34e-06,
"loss": 0.077,
"step": 47000
},
{
"epoch": 5.079321337872156,
"eval_loss": 0.18772615492343903,
"eval_runtime": 4041.9986,
"eval_samples_per_second": 4.578,
"eval_steps_per_second": 1.144,
"eval_wer": 0.15872035283353794,
"step": 47000
},
{
"epoch": 5.090128059653105,
"grad_norm": 4.3733320236206055,
"learning_rate": 1.2955555555555557e-06,
"loss": 0.0809,
"step": 47100
},
{
"epoch": 5.100934781434052,
"grad_norm": 2.8010852336883545,
"learning_rate": 1.2511111111111112e-06,
"loss": 0.0733,
"step": 47200
},
{
"epoch": 5.111741503215,
"grad_norm": 10.156082153320312,
"learning_rate": 1.2066666666666668e-06,
"loss": 0.0736,
"step": 47300
},
{
"epoch": 5.122548224995947,
"grad_norm": 8.13224983215332,
"learning_rate": 1.1622222222222223e-06,
"loss": 0.0797,
"step": 47400
},
{
"epoch": 5.133354946776895,
"grad_norm": 3.623875856399536,
"learning_rate": 1.117777777777778e-06,
"loss": 0.0769,
"step": 47500
},
{
"epoch": 5.133354946776895,
"eval_loss": 0.18783515691757202,
"eval_runtime": 4090.0533,
"eval_samples_per_second": 4.524,
"eval_steps_per_second": 1.131,
"eval_wer": 0.15967437968704792,
"step": 47500
},
{
"epoch": 5.144161668557843,
"grad_norm": 4.278363227844238,
"learning_rate": 1.0733333333333334e-06,
"loss": 0.0765,
"step": 47600
},
{
"epoch": 5.154968390338791,
"grad_norm": 5.6777191162109375,
"learning_rate": 1.028888888888889e-06,
"loss": 0.074,
"step": 47700
},
{
"epoch": 5.165775112119738,
"grad_norm": 5.946367263793945,
"learning_rate": 9.844444444444445e-07,
"loss": 0.0777,
"step": 47800
},
{
"epoch": 5.176581833900686,
"grad_norm": 6.455644607543945,
"learning_rate": 9.400000000000001e-07,
"loss": 0.0804,
"step": 47900
},
{
"epoch": 5.187388555681634,
"grad_norm": 4.086187839508057,
"learning_rate": 8.955555555555557e-07,
"loss": 0.0744,
"step": 48000
},
{
"epoch": 5.187388555681634,
"eval_loss": 0.18759387731552124,
"eval_runtime": 4109.5273,
"eval_samples_per_second": 4.503,
"eval_steps_per_second": 1.126,
"eval_wer": 0.15845447649731387,
"step": 48000
},
{
"epoch": 5.198195277462582,
"grad_norm": 5.9285736083984375,
"learning_rate": 8.511111111111112e-07,
"loss": 0.0775,
"step": 48100
},
{
"epoch": 5.2090019992435295,
"grad_norm": 4.34613037109375,
"learning_rate": 8.066666666666667e-07,
"loss": 0.0772,
"step": 48200
},
{
"epoch": 5.219808721024477,
"grad_norm": 5.6380109786987305,
"learning_rate": 7.622222222222223e-07,
"loss": 0.0736,
"step": 48300
},
{
"epoch": 5.230615442805425,
"grad_norm": 6.854168891906738,
"learning_rate": 7.177777777777778e-07,
"loss": 0.0748,
"step": 48400
},
{
"epoch": 5.241422164586373,
"grad_norm": 5.549808502197266,
"learning_rate": 6.733333333333334e-07,
"loss": 0.0775,
"step": 48500
},
{
"epoch": 5.241422164586373,
"eval_loss": 0.18737368285655975,
"eval_runtime": 4061.569,
"eval_samples_per_second": 4.556,
"eval_steps_per_second": 1.139,
"eval_wer": 0.1595258017344521,
"step": 48500
},
{
"epoch": 5.2522288863673205,
"grad_norm": 4.706333160400391,
"learning_rate": 6.288888888888889e-07,
"loss": 0.0804,
"step": 48600
},
{
"epoch": 5.263035608148268,
"grad_norm": 6.192058563232422,
"learning_rate": 5.844444444444445e-07,
"loss": 0.0727,
"step": 48700
},
{
"epoch": 5.273842329929216,
"grad_norm": 6.242217540740967,
"learning_rate": 5.4e-07,
"loss": 0.0779,
"step": 48800
},
{
"epoch": 5.284649051710164,
"grad_norm": 4.3639326095581055,
"learning_rate": 4.955555555555556e-07,
"loss": 0.0763,
"step": 48900
},
{
"epoch": 5.295455773491112,
"grad_norm": 4.59783411026001,
"learning_rate": 4.511111111111111e-07,
"loss": 0.069,
"step": 49000
},
{
"epoch": 5.295455773491112,
"eval_loss": 0.1873014122247696,
"eval_runtime": 4056.6992,
"eval_samples_per_second": 4.561,
"eval_steps_per_second": 1.14,
"eval_wer": 0.15788362436365627,
"step": 49000
},
{
"epoch": 5.306262495272059,
"grad_norm": 2.637141227722168,
"learning_rate": 4.0666666666666666e-07,
"loss": 0.0755,
"step": 49100
},
{
"epoch": 5.317069217053007,
"grad_norm": 4.836416244506836,
"learning_rate": 3.622222222222223e-07,
"loss": 0.0792,
"step": 49200
},
{
"epoch": 5.327875938833955,
"grad_norm": 3.9841196537017822,
"learning_rate": 3.177777777777778e-07,
"loss": 0.0779,
"step": 49300
},
{
"epoch": 5.338682660614903,
"grad_norm": 5.457947731018066,
"learning_rate": 2.7333333333333335e-07,
"loss": 0.0745,
"step": 49400
},
{
"epoch": 5.34948938239585,
"grad_norm": 2.749351978302002,
"learning_rate": 2.2888888888888892e-07,
"loss": 0.0761,
"step": 49500
},
{
"epoch": 5.34948938239585,
"eval_loss": 0.18704187870025635,
"eval_runtime": 4067.5033,
"eval_samples_per_second": 4.549,
"eval_steps_per_second": 1.137,
"eval_wer": 0.15749262975156203,
"step": 49500
},
{
"epoch": 5.360404171394608,
"grad_norm": 4.275832653045654,
"learning_rate": 1.8444444444444446e-07,
"loss": 0.0745,
"step": 49600
},
{
"epoch": 5.371210893175555,
"grad_norm": 4.123929023742676,
"learning_rate": 1.4e-07,
"loss": 0.0791,
"step": 49700
},
{
"epoch": 5.382017614956503,
"grad_norm": 4.660633563995361,
"learning_rate": 9.555555555555556e-08,
"loss": 0.0788,
"step": 49800
},
{
"epoch": 5.39282433673745,
"grad_norm": 7.454755783081055,
"learning_rate": 5.111111111111112e-08,
"loss": 0.0782,
"step": 49900
},
{
"epoch": 5.403631058518398,
"grad_norm": 5.680870056152344,
"learning_rate": 6.666666666666667e-09,
"loss": 0.0711,
"step": 50000
},
{
"epoch": 5.403631058518398,
"eval_loss": 0.1868782788515091,
"eval_runtime": 3402.9501,
"eval_samples_per_second": 5.438,
"eval_steps_per_second": 1.359,
"eval_wer": 0.15829025876023428,
"step": 50000
},
{
"epoch": 5.403631058518398,
"step": 50000,
"total_flos": 1.969520657154048e+19,
"train_loss": 0.0007634645557403565,
"train_runtime": 4719.3215,
"train_samples_per_second": 169.516,
"train_steps_per_second": 10.595
}
],
"logging_steps": 100,
"max_steps": 50000,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.969520657154048e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}