Wav2vec2-wolof / trainer_state.json
Leonel-Maia's picture
End of training
dbc38aa verified
Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN, "... is not valid JSON
{
"best_global_step": 5000,
"best_metric": 0.28820186853408813,
"best_model_checkpoint": "./Wav2vec2-wolof/checkpoint-5000",
"epoch": 3.485254691689008,
"eval_steps": 500,
"global_step": 6500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013404825737265416,
"grad_norm": 26.66357421875,
"learning_rate": 6.899999999999999e-06,
"loss": 24.1179,
"step": 25
},
{
"epoch": 0.02680965147453083,
"grad_norm": 30.594707489013672,
"learning_rate": 1.4099999999999999e-05,
"loss": 23.0079,
"step": 50
},
{
"epoch": 0.040214477211796246,
"grad_norm": 24.911148071289062,
"learning_rate": 2.1599999999999996e-05,
"loss": 14.5492,
"step": 75
},
{
"epoch": 0.05361930294906166,
"grad_norm": 6.641660690307617,
"learning_rate": 2.91e-05,
"loss": 5.6879,
"step": 100
},
{
"epoch": 0.06702412868632708,
"grad_norm": 2.7079834938049316,
"learning_rate": 3.6599999999999995e-05,
"loss": 4.0839,
"step": 125
},
{
"epoch": 0.08042895442359249,
"grad_norm": 1.957312822341919,
"learning_rate": 4.4099999999999995e-05,
"loss": 3.6578,
"step": 150
},
{
"epoch": 0.0938337801608579,
"grad_norm": 0.7005499005317688,
"learning_rate": 5.1599999999999994e-05,
"loss": 3.2777,
"step": 175
},
{
"epoch": 0.10723860589812333,
"grad_norm": 0.8753517270088196,
"learning_rate": 5.91e-05,
"loss": 3.1782,
"step": 200
},
{
"epoch": 0.12064343163538874,
"grad_norm": 1.0705907344818115,
"learning_rate": 6.659999999999999e-05,
"loss": 3.066,
"step": 225
},
{
"epoch": 0.13404825737265416,
"grad_norm": 1.0790441036224365,
"learning_rate": 7.41e-05,
"loss": 3.0007,
"step": 250
},
{
"epoch": 0.14745308310991956,
"grad_norm": 2.6309916973114014,
"learning_rate": 8.16e-05,
"loss": 2.9045,
"step": 275
},
{
"epoch": 0.16085790884718498,
"grad_norm": 1.937912106513977,
"learning_rate": 8.909999999999998e-05,
"loss": 2.4134,
"step": 300
},
{
"epoch": 0.1742627345844504,
"grad_norm": 0.9296526908874512,
"learning_rate": 9.659999999999999e-05,
"loss": 1.6152,
"step": 325
},
{
"epoch": 0.1876675603217158,
"grad_norm": 2.7531306743621826,
"learning_rate": 0.00010409999999999998,
"loss": 1.3614,
"step": 350
},
{
"epoch": 0.20107238605898123,
"grad_norm": 0.8136234879493713,
"learning_rate": 0.00011159999999999999,
"loss": 1.1165,
"step": 375
},
{
"epoch": 0.21447721179624665,
"grad_norm": 1.2151700258255005,
"learning_rate": 0.0001191,
"loss": 1.0421,
"step": 400
},
{
"epoch": 0.22788203753351208,
"grad_norm": 0.6655362248420715,
"learning_rate": 0.0001266,
"loss": 0.7823,
"step": 425
},
{
"epoch": 0.24128686327077747,
"grad_norm": 1.2359944581985474,
"learning_rate": 0.00013409999999999998,
"loss": 0.8167,
"step": 450
},
{
"epoch": 0.2546916890080429,
"grad_norm": 0.479769766330719,
"learning_rate": 0.00014159999999999997,
"loss": 0.619,
"step": 475
},
{
"epoch": 0.2680965147453083,
"grad_norm": 1.5369658470153809,
"learning_rate": 0.0001491,
"loss": 0.6813,
"step": 500
},
{
"epoch": 0.2680965147453083,
"eval_cer": 0.1590862737099735,
"eval_loss": 0.46627911925315857,
"eval_runtime": 187.8681,
"eval_samples_per_second": 8.357,
"eval_steps_per_second": 1.049,
"eval_wer": 0.5099869024230518,
"step": 500
},
{
"epoch": 0.28150134048257375,
"grad_norm": 0.5673221945762634,
"learning_rate": 0.00015659999999999998,
"loss": 0.5007,
"step": 525
},
{
"epoch": 0.2949061662198391,
"grad_norm": 1.1437309980392456,
"learning_rate": 0.0001641,
"loss": 0.5798,
"step": 550
},
{
"epoch": 0.30831099195710454,
"grad_norm": 0.846733808517456,
"learning_rate": 0.00017159999999999997,
"loss": 0.4721,
"step": 575
},
{
"epoch": 0.32171581769436997,
"grad_norm": 1.40889310836792,
"learning_rate": 0.0001791,
"loss": 0.5589,
"step": 600
},
{
"epoch": 0.3351206434316354,
"grad_norm": 0.6701321601867676,
"learning_rate": 0.00018659999999999998,
"loss": 0.4675,
"step": 625
},
{
"epoch": 0.3485254691689008,
"grad_norm": 1.3316681385040283,
"learning_rate": 0.0001941,
"loss": 0.5296,
"step": 650
},
{
"epoch": 0.36193029490616624,
"grad_norm": 0.660426676273346,
"learning_rate": 0.0002016,
"loss": 0.4336,
"step": 675
},
{
"epoch": 0.3753351206434316,
"grad_norm": 2.398401975631714,
"learning_rate": 0.00020909999999999996,
"loss": 0.5475,
"step": 700
},
{
"epoch": 0.38873994638069703,
"grad_norm": 0.4912319481372833,
"learning_rate": 0.00021659999999999998,
"loss": 0.4247,
"step": 725
},
{
"epoch": 0.40214477211796246,
"grad_norm": 1.7861497402191162,
"learning_rate": 0.00022409999999999997,
"loss": 0.5189,
"step": 750
},
{
"epoch": 0.4155495978552279,
"grad_norm": 0.6414456367492676,
"learning_rate": 0.0002316,
"loss": 0.4367,
"step": 775
},
{
"epoch": 0.4289544235924933,
"grad_norm": 2.092426061630249,
"learning_rate": 0.00023909999999999998,
"loss": 0.4774,
"step": 800
},
{
"epoch": 0.44235924932975873,
"grad_norm": 1.7996277809143066,
"learning_rate": 0.0002466,
"loss": 0.4069,
"step": 825
},
{
"epoch": 0.45576407506702415,
"grad_norm": 1.8674991130828857,
"learning_rate": 0.0002541,
"loss": 0.5321,
"step": 850
},
{
"epoch": 0.4691689008042895,
"grad_norm": 0.6605350375175476,
"learning_rate": 0.00026159999999999996,
"loss": 0.5289,
"step": 875
},
{
"epoch": 0.48257372654155495,
"grad_norm": 1.1707311868667603,
"learning_rate": 0.0002691,
"loss": 0.5099,
"step": 900
},
{
"epoch": 0.4959785522788204,
"grad_norm": 0.7791293263435364,
"learning_rate": 0.0002766,
"loss": 0.427,
"step": 925
},
{
"epoch": 0.5093833780160858,
"grad_norm": 2.19549822807312,
"learning_rate": 0.00028409999999999997,
"loss": 0.5421,
"step": 950
},
{
"epoch": 0.5227882037533512,
"grad_norm": 0.9283122420310974,
"learning_rate": 0.0002916,
"loss": 0.4518,
"step": 975
},
{
"epoch": 0.5361930294906166,
"grad_norm": 1.525386929512024,
"learning_rate": 0.00029909999999999995,
"loss": 0.5015,
"step": 1000
},
{
"epoch": 0.5361930294906166,
"eval_cer": 0.14878645727105855,
"eval_loss": 0.3607601523399353,
"eval_runtime": 191.1162,
"eval_samples_per_second": 8.215,
"eval_steps_per_second": 1.031,
"eval_wer": 0.4816088190351452,
"step": 1000
},
{
"epoch": 0.5495978552278821,
"grad_norm": 1.58902907371521,
"learning_rate": 0.00029994048692515775,
"loss": 0.4443,
"step": 1025
},
{
"epoch": 0.5630026809651475,
"grad_norm": 1.3990167379379272,
"learning_rate": 0.0002998728584310189,
"loss": 0.5085,
"step": 1050
},
{
"epoch": 0.5764075067024129,
"grad_norm": 1.941627025604248,
"learning_rate": 0.00029980522993688,
"loss": 0.4453,
"step": 1075
},
{
"epoch": 0.5898123324396782,
"grad_norm": 1.8028769493103027,
"learning_rate": 0.0002997376014427412,
"loss": 0.5275,
"step": 1100
},
{
"epoch": 0.6032171581769437,
"grad_norm": 0.8877336978912354,
"learning_rate": 0.00029966997294860235,
"loss": 0.4264,
"step": 1125
},
{
"epoch": 0.6166219839142091,
"grad_norm": 1.3580883741378784,
"learning_rate": 0.00029960234445446343,
"loss": 0.5471,
"step": 1150
},
{
"epoch": 0.6300268096514745,
"grad_norm": 0.9151347875595093,
"learning_rate": 0.00029953471596032456,
"loss": 0.4462,
"step": 1175
},
{
"epoch": 0.6434316353887399,
"grad_norm": 1.6365715265274048,
"learning_rate": 0.00029946708746618575,
"loss": 0.4745,
"step": 1200
},
{
"epoch": 0.6568364611260054,
"grad_norm": 0.9017271995544434,
"learning_rate": 0.0002993994589720469,
"loss": 0.4098,
"step": 1225
},
{
"epoch": 0.6702412868632708,
"grad_norm": 1.4687843322753906,
"learning_rate": 0.00029933183047790797,
"loss": 0.4814,
"step": 1250
},
{
"epoch": 0.6836461126005362,
"grad_norm": 0.9523298144340515,
"learning_rate": 0.0002992642019837691,
"loss": 0.4071,
"step": 1275
},
{
"epoch": 0.6970509383378016,
"grad_norm": 1.300267219543457,
"learning_rate": 0.0002991965734896303,
"loss": 0.489,
"step": 1300
},
{
"epoch": 0.710455764075067,
"grad_norm": 0.8380106091499329,
"learning_rate": 0.00029912894499549143,
"loss": 0.4088,
"step": 1325
},
{
"epoch": 0.7238605898123325,
"grad_norm": 1.501035213470459,
"learning_rate": 0.00029906131650135257,
"loss": 0.4985,
"step": 1350
},
{
"epoch": 0.7372654155495979,
"grad_norm": 0.4843454360961914,
"learning_rate": 0.00029899368800721365,
"loss": 0.4098,
"step": 1375
},
{
"epoch": 0.7506702412868632,
"grad_norm": 1.5197869539260864,
"learning_rate": 0.0002989260595130748,
"loss": 0.481,
"step": 1400
},
{
"epoch": 0.7640750670241286,
"grad_norm": 0.7740542888641357,
"learning_rate": 0.000298858431018936,
"loss": 0.4852,
"step": 1425
},
{
"epoch": 0.7774798927613941,
"grad_norm": 1.8375496864318848,
"learning_rate": 0.0002987908025247971,
"loss": 0.4678,
"step": 1450
},
{
"epoch": 0.7908847184986595,
"grad_norm": 0.48813843727111816,
"learning_rate": 0.00029872317403065824,
"loss": 0.4322,
"step": 1475
},
{
"epoch": 0.8042895442359249,
"grad_norm": 1.3342444896697998,
"learning_rate": 0.0002986555455365193,
"loss": 0.4424,
"step": 1500
},
{
"epoch": 0.8042895442359249,
"eval_cer": 0.14556847281709612,
"eval_loss": 0.3339381515979767,
"eval_runtime": 191.3899,
"eval_samples_per_second": 8.203,
"eval_steps_per_second": 1.029,
"eval_wer": 0.4802444881030343,
"step": 1500
},
{
"epoch": 0.8176943699731903,
"grad_norm": 0.9165742993354797,
"learning_rate": 0.0002985879170423805,
"loss": 0.4495,
"step": 1525
},
{
"epoch": 0.8310991957104558,
"grad_norm": 1.5298058986663818,
"learning_rate": 0.00029852028854824165,
"loss": 0.4839,
"step": 1550
},
{
"epoch": 0.8445040214477212,
"grad_norm": 0.5936838984489441,
"learning_rate": 0.0002984526600541028,
"loss": 0.446,
"step": 1575
},
{
"epoch": 0.8579088471849866,
"grad_norm": 2.4456019401550293,
"learning_rate": 0.00029838503155996387,
"loss": 0.4568,
"step": 1600
},
{
"epoch": 0.871313672922252,
"grad_norm": 0.5276655554771423,
"learning_rate": 0.00029831740306582506,
"loss": 0.4752,
"step": 1625
},
{
"epoch": 0.8847184986595175,
"grad_norm": 8.446830749511719,
"learning_rate": 0.0002982497745716862,
"loss": 0.4943,
"step": 1650
},
{
"epoch": 0.8981233243967829,
"grad_norm": 0.7062143683433533,
"learning_rate": 0.0002981848512173129,
"loss": 0.8819,
"step": 1675
},
{
"epoch": 0.9115281501340483,
"grad_norm": 2.223888397216797,
"learning_rate": 0.000298117222723174,
"loss": 0.4901,
"step": 1700
},
{
"epoch": 0.9249329758713136,
"grad_norm": 0.7132174372673035,
"learning_rate": 0.0002980495942290351,
"loss": 0.4252,
"step": 1725
},
{
"epoch": 0.938337801608579,
"grad_norm": 1.3910300731658936,
"learning_rate": 0.0002979819657348963,
"loss": 0.6194,
"step": 1750
},
{
"epoch": 0.9517426273458445,
"grad_norm": 1.0311365127563477,
"learning_rate": 0.00029791433724075744,
"loss": 0.4023,
"step": 1775
},
{
"epoch": 0.9651474530831099,
"grad_norm": 1.4001951217651367,
"learning_rate": 0.00029784670874661857,
"loss": 0.4346,
"step": 1800
},
{
"epoch": 0.9785522788203753,
"grad_norm": 0.7526248097419739,
"learning_rate": 0.00029777908025247965,
"loss": 0.401,
"step": 1825
},
{
"epoch": 0.9919571045576407,
"grad_norm": NaN,
"learning_rate": 0.00029771145175834084,
"loss": 0.4831,
"step": 1850
},
{
"epoch": 1.0053619302949062,
"grad_norm": 1.6143497228622437,
"learning_rate": 0.00029764652840396754,
"loss": 0.4327,
"step": 1875
},
{
"epoch": 1.0187667560321716,
"grad_norm": 0.9131807088851929,
"learning_rate": 0.0002975788999098286,
"loss": 0.3143,
"step": 1900
},
{
"epoch": 1.032171581769437,
"grad_norm": 0.5591608285903931,
"learning_rate": 0.00029751127141568976,
"loss": 0.4522,
"step": 1925
},
{
"epoch": 1.0455764075067024,
"grad_norm": 0.6661513447761536,
"learning_rate": 0.0002974436429215509,
"loss": 0.3374,
"step": 1950
},
{
"epoch": 1.0589812332439679,
"grad_norm": 0.5489794015884399,
"learning_rate": 0.0002973760144274121,
"loss": 0.4572,
"step": 1975
},
{
"epoch": 1.0723860589812333,
"grad_norm": 0.7652894258499146,
"learning_rate": 0.0002973083859332732,
"loss": 0.3254,
"step": 2000
},
{
"epoch": 1.0723860589812333,
"eval_cer": 0.14708681759466993,
"eval_loss": 0.3146507740020752,
"eval_runtime": 190.0206,
"eval_samples_per_second": 8.262,
"eval_steps_per_second": 1.037,
"eval_wer": 0.47920759659462997,
"step": 2000
},
{
"epoch": 1.0857908847184987,
"grad_norm": 0.4912964105606079,
"learning_rate": 0.0002972407574391343,
"loss": 0.4793,
"step": 2025
},
{
"epoch": 1.0991957104557641,
"grad_norm": 0.5184613466262817,
"learning_rate": 0.00029717312894499544,
"loss": 0.3278,
"step": 2050
},
{
"epoch": 1.1126005361930296,
"grad_norm": 0.3988397717475891,
"learning_rate": 0.0002971055004508566,
"loss": 0.4684,
"step": 2075
},
{
"epoch": 1.126005361930295,
"grad_norm": 1.1345094442367554,
"learning_rate": 0.00029703787195671776,
"loss": 0.3179,
"step": 2100
},
{
"epoch": 1.1394101876675604,
"grad_norm": 0.4202677309513092,
"learning_rate": 0.0002969702434625789,
"loss": 0.4557,
"step": 2125
},
{
"epoch": 1.1528150134048256,
"grad_norm": 0.757359504699707,
"learning_rate": 0.00029690261496844,
"loss": 0.3207,
"step": 2150
},
{
"epoch": 1.1662198391420913,
"grad_norm": 1.6614487171173096,
"learning_rate": 0.0002968376916140667,
"loss": 0.494,
"step": 2175
},
{
"epoch": 1.1796246648793565,
"grad_norm": 0.7348321080207825,
"learning_rate": 0.00029677006311992787,
"loss": 0.3561,
"step": 2200
},
{
"epoch": 1.193029490616622,
"grad_norm": 0.577582061290741,
"learning_rate": 0.00029670243462578895,
"loss": 0.454,
"step": 2225
},
{
"epoch": 1.2064343163538873,
"grad_norm": 0.6139086484909058,
"learning_rate": 0.0002966348061316501,
"loss": 0.319,
"step": 2250
},
{
"epoch": 1.2198391420911527,
"grad_norm": 0.7023029923439026,
"learning_rate": 0.0002965671776375112,
"loss": 0.4586,
"step": 2275
},
{
"epoch": 1.2332439678284182,
"grad_norm": 0.9260369539260864,
"learning_rate": 0.0002964995491433724,
"loss": 0.3373,
"step": 2300
},
{
"epoch": 1.2466487935656836,
"grad_norm": 0.42650407552719116,
"learning_rate": 0.00029643192064923355,
"loss": 0.4738,
"step": 2325
},
{
"epoch": 1.260053619302949,
"grad_norm": 0.5607989430427551,
"learning_rate": 0.00029636429215509463,
"loss": 0.328,
"step": 2350
},
{
"epoch": 1.2734584450402144,
"grad_norm": 0.6092125773429871,
"learning_rate": 0.00029629666366095576,
"loss": 0.4625,
"step": 2375
},
{
"epoch": 1.2868632707774799,
"grad_norm": 0.9334998726844788,
"learning_rate": 0.00029622903516681695,
"loss": 0.3387,
"step": 2400
},
{
"epoch": 1.3002680965147453,
"grad_norm": 0.49639058113098145,
"learning_rate": 0.0002961614066726781,
"loss": 0.4863,
"step": 2425
},
{
"epoch": 1.3136729222520107,
"grad_norm": 0.6151789426803589,
"learning_rate": 0.0002960937781785392,
"loss": 0.323,
"step": 2450
},
{
"epoch": 1.3270777479892761,
"grad_norm": 0.5085999965667725,
"learning_rate": 0.0002960261496844003,
"loss": 0.4538,
"step": 2475
},
{
"epoch": 1.3404825737265416,
"grad_norm": 1.1368881464004517,
"learning_rate": 0.00029595852119026144,
"loss": 0.5671,
"step": 2500
},
{
"epoch": 1.3404825737265416,
"eval_cer": 0.1469735082829107,
"eval_loss": 0.3150199055671692,
"eval_runtime": 189.299,
"eval_samples_per_second": 8.294,
"eval_steps_per_second": 1.041,
"eval_wer": 0.4757149094084261,
"step": 2500
},
{
"epoch": 1.353887399463807,
"grad_norm": 0.5132752060890198,
"learning_rate": 0.00029589089269612263,
"loss": 0.452,
"step": 2525
},
{
"epoch": 1.3672922252010724,
"grad_norm": 0.8539701700210571,
"learning_rate": 0.00029582326420198377,
"loss": 0.3149,
"step": 2550
},
{
"epoch": 1.3806970509383378,
"grad_norm": 1.3219923973083496,
"learning_rate": 0.00029575563570784485,
"loss": 0.4735,
"step": 2575
},
{
"epoch": 1.3941018766756033,
"grad_norm": 1.1187196969985962,
"learning_rate": 0.000295688007213706,
"loss": 0.3357,
"step": 2600
},
{
"epoch": 1.4075067024128687,
"grad_norm": 0.5335268378257751,
"learning_rate": 0.0002956203787195672,
"loss": 0.4755,
"step": 2625
},
{
"epoch": 1.420911528150134,
"grad_norm": 1.1260613203048706,
"learning_rate": 0.0002955527502254283,
"loss": 0.3312,
"step": 2650
},
{
"epoch": 1.4343163538873995,
"grad_norm": 0.4925306737422943,
"learning_rate": 0.00029548512173128944,
"loss": 0.4918,
"step": 2675
},
{
"epoch": 1.447721179624665,
"grad_norm": 0.8351105451583862,
"learning_rate": 0.0002954174932371505,
"loss": 0.3408,
"step": 2700
},
{
"epoch": 1.4611260053619302,
"grad_norm": 0.45573753118515015,
"learning_rate": 0.0002953498647430117,
"loss": 0.4778,
"step": 2725
},
{
"epoch": 1.4745308310991958,
"grad_norm": 0.9699208736419678,
"learning_rate": 0.00029528223624887285,
"loss": 0.3117,
"step": 2750
},
{
"epoch": 1.487935656836461,
"grad_norm": 2.6216440200805664,
"learning_rate": 0.000295214607754734,
"loss": 0.5113,
"step": 2775
},
{
"epoch": 1.5013404825737267,
"grad_norm": 0.6581635475158691,
"learning_rate": 0.0002951469792605951,
"loss": 0.3661,
"step": 2800
},
{
"epoch": 1.5147453083109919,
"grad_norm": 0.4799867272377014,
"learning_rate": 0.00029507935076645626,
"loss": 0.492,
"step": 2825
},
{
"epoch": 1.5281501340482575,
"grad_norm": 0.5858296751976013,
"learning_rate": 0.0002950117222723174,
"loss": 0.3361,
"step": 2850
},
{
"epoch": 1.5415549597855227,
"grad_norm": 0.5094213485717773,
"learning_rate": 0.00029494409377817853,
"loss": 0.4414,
"step": 2875
},
{
"epoch": 1.5549597855227884,
"grad_norm": 0.852539598941803,
"learning_rate": 0.00029487646528403966,
"loss": 0.3532,
"step": 2900
},
{
"epoch": 1.5683646112600536,
"grad_norm": 2.0929360389709473,
"learning_rate": 0.00029480883678990074,
"loss": 0.4614,
"step": 2925
},
{
"epoch": 1.5817694369973192,
"grad_norm": 1.2485980987548828,
"learning_rate": 0.00029474120829576193,
"loss": 0.3141,
"step": 2950
},
{
"epoch": 1.5951742627345844,
"grad_norm": 0.5923244953155518,
"learning_rate": 0.00029467357980162307,
"loss": 0.4607,
"step": 2975
},
{
"epoch": 1.6085790884718498,
"grad_norm": 0.7467523813247681,
"learning_rate": 0.0002946059513074842,
"loss": 0.3204,
"step": 3000
},
{
"epoch": 1.6085790884718498,
"eval_cer": 0.14647494731117003,
"eval_loss": 0.30716007947921753,
"eval_runtime": 192.6644,
"eval_samples_per_second": 8.149,
"eval_steps_per_second": 1.023,
"eval_wer": 0.46922069417157825,
"step": 3000
},
{
"epoch": 1.6219839142091153,
"grad_norm": 0.6438741683959961,
"learning_rate": 0.00029453832281334534,
"loss": 0.4795,
"step": 3025
},
{
"epoch": 1.6353887399463807,
"grad_norm": 0.6687526702880859,
"learning_rate": 0.0002944706943192065,
"loss": 0.3193,
"step": 3050
},
{
"epoch": 1.648793565683646,
"grad_norm": 0.4715401232242584,
"learning_rate": 0.0002944030658250676,
"loss": 0.4776,
"step": 3075
},
{
"epoch": 1.6621983914209115,
"grad_norm": 0.8782249093055725,
"learning_rate": 0.00029433543733092875,
"loss": 0.33,
"step": 3100
},
{
"epoch": 1.675603217158177,
"grad_norm": 0.551409900188446,
"learning_rate": 0.0002942678088367899,
"loss": 0.4803,
"step": 3125
},
{
"epoch": 1.6890080428954424,
"grad_norm": 0.5042707324028015,
"learning_rate": 0.000294200180342651,
"loss": 0.2894,
"step": 3150
},
{
"epoch": 1.7024128686327078,
"grad_norm": 0.47696781158447266,
"learning_rate": 0.00029413255184851215,
"loss": 0.4747,
"step": 3175
},
{
"epoch": 1.7158176943699732,
"grad_norm": 0.8431724905967712,
"learning_rate": 0.0002940649233543733,
"loss": 0.3053,
"step": 3200
},
{
"epoch": 1.7292225201072386,
"grad_norm": 0.71580570936203,
"learning_rate": 0.0002939972948602344,
"loss": 0.4638,
"step": 3225
},
{
"epoch": 1.742627345844504,
"grad_norm": 0.7655317187309265,
"learning_rate": 0.00029392966636609556,
"loss": 0.3156,
"step": 3250
},
{
"epoch": 1.7560321715817695,
"grad_norm": 0.5282989144325256,
"learning_rate": 0.0002938620378719567,
"loss": 0.5191,
"step": 3275
},
{
"epoch": 1.7694369973190347,
"grad_norm": 0.6365646123886108,
"learning_rate": 0.00029379440937781783,
"loss": 0.3131,
"step": 3300
},
{
"epoch": 1.7828418230563003,
"grad_norm": 0.427397221326828,
"learning_rate": 0.00029372678088367897,
"loss": 0.4547,
"step": 3325
},
{
"epoch": 1.7962466487935655,
"grad_norm": 1.0049763917922974,
"learning_rate": 0.0002936591523895401,
"loss": 0.3297,
"step": 3350
},
{
"epoch": 1.8096514745308312,
"grad_norm": 0.541320264339447,
"learning_rate": 0.00029359152389540124,
"loss": 0.4603,
"step": 3375
},
{
"epoch": 1.8230563002680964,
"grad_norm": 0.7566213607788086,
"learning_rate": 0.0002935238954012624,
"loss": 0.3123,
"step": 3400
},
{
"epoch": 1.836461126005362,
"grad_norm": 0.46456801891326904,
"learning_rate": 0.0002934562669071235,
"loss": 0.4906,
"step": 3425
},
{
"epoch": 1.8498659517426272,
"grad_norm": 0.9275864362716675,
"learning_rate": 0.00029338863841298464,
"loss": 0.3128,
"step": 3450
},
{
"epoch": 1.863270777479893,
"grad_norm": 0.7383331060409546,
"learning_rate": 0.0002933210099188458,
"loss": 0.4679,
"step": 3475
},
{
"epoch": 1.876675603217158,
"grad_norm": 0.607969343662262,
"learning_rate": 0.0002932533814247069,
"loss": 0.32,
"step": 3500
},
{
"epoch": 1.876675603217158,
"eval_cer": 0.1412287261767172,
"eval_loss": 0.29745474457740784,
"eval_runtime": 194.0942,
"eval_samples_per_second": 8.089,
"eval_steps_per_second": 1.015,
"eval_wer": 0.4685112420868806,
"step": 3500
},
{
"epoch": 1.8900804289544237,
"grad_norm": 0.6645176410675049,
"learning_rate": 0.00029318575293056805,
"loss": 0.4393,
"step": 3525
},
{
"epoch": 1.903485254691689,
"grad_norm": 0.4631984531879425,
"learning_rate": 0.0002931181244364292,
"loss": 0.3076,
"step": 3550
},
{
"epoch": 1.9168900804289544,
"grad_norm": 0.5980284810066223,
"learning_rate": 0.0002930504959422903,
"loss": 0.4694,
"step": 3575
},
{
"epoch": 1.9302949061662198,
"grad_norm": 0.504612922668457,
"learning_rate": 0.00029298286744815146,
"loss": 0.322,
"step": 3600
},
{
"epoch": 1.9436997319034852,
"grad_norm": 0.43368223309516907,
"learning_rate": 0.0002929152389540126,
"loss": 0.4843,
"step": 3625
},
{
"epoch": 1.9571045576407506,
"grad_norm": 0.9730172753334045,
"learning_rate": 0.00029284761045987373,
"loss": 0.3064,
"step": 3650
},
{
"epoch": 1.970509383378016,
"grad_norm": 0.42133307456970215,
"learning_rate": 0.00029277998196573486,
"loss": 0.4305,
"step": 3675
},
{
"epoch": 1.9839142091152815,
"grad_norm": 0.8820632100105286,
"learning_rate": 0.000292712353471596,
"loss": 0.3189,
"step": 3700
},
{
"epoch": 1.997319034852547,
"grad_norm": 0.5759428143501282,
"learning_rate": 0.00029264472497745713,
"loss": 0.4366,
"step": 3725
},
{
"epoch": 2.0107238605898123,
"grad_norm": 1.1622861623764038,
"learning_rate": 0.00029257709648331827,
"loss": 0.385,
"step": 3750
},
{
"epoch": 2.0241286863270775,
"grad_norm": 0.8884519934654236,
"learning_rate": 0.0002925094679891794,
"loss": 0.3082,
"step": 3775
},
{
"epoch": 2.037533512064343,
"grad_norm": 0.42693793773651123,
"learning_rate": 0.00029244183949504054,
"loss": 0.3979,
"step": 3800
},
{
"epoch": 2.0509383378016084,
"grad_norm": 0.8100738525390625,
"learning_rate": 0.0002923742110009017,
"loss": 0.3165,
"step": 3825
},
{
"epoch": 2.064343163538874,
"grad_norm": 0.5522972941398621,
"learning_rate": 0.0002923065825067628,
"loss": 0.3963,
"step": 3850
},
{
"epoch": 2.0777479892761392,
"grad_norm": 0.6663551330566406,
"learning_rate": 0.00029223895401262395,
"loss": 0.2666,
"step": 3875
},
{
"epoch": 2.091152815013405,
"grad_norm": 0.5006572604179382,
"learning_rate": 0.00029217132551848514,
"loss": 0.3829,
"step": 3900
},
{
"epoch": 2.10455764075067,
"grad_norm": 1.3403582572937012,
"learning_rate": 0.0002921036970243462,
"loss": 0.3436,
"step": 3925
},
{
"epoch": 2.1179624664879357,
"grad_norm": 0.798907995223999,
"learning_rate": 0.00029203606853020735,
"loss": 0.3897,
"step": 3950
},
{
"epoch": 2.131367292225201,
"grad_norm": 0.6176706552505493,
"learning_rate": 0.0002919684400360685,
"loss": 0.2982,
"step": 3975
},
{
"epoch": 2.1447721179624666,
"grad_norm": 1.0198493003845215,
"learning_rate": 0.0002919035166816952,
"loss": 0.3854,
"step": 4000
},
{
"epoch": 2.1447721179624666,
"eval_cer": 0.14156865411199493,
"eval_loss": 0.2999935746192932,
"eval_runtime": 193.6993,
"eval_samples_per_second": 8.105,
"eval_steps_per_second": 1.017,
"eval_wer": 0.4663828858327876,
"step": 4000
},
{
"epoch": 2.158176943699732,
"grad_norm": 0.8894768357276917,
"learning_rate": 0.0002918358881875563,
"loss": 0.3069,
"step": 4025
},
{
"epoch": 2.1715817694369974,
"grad_norm": 0.5018978118896484,
"learning_rate": 0.00029176825969341746,
"loss": 0.3727,
"step": 4050
},
{
"epoch": 2.1849865951742626,
"grad_norm": 1.342336654663086,
"learning_rate": 0.0002917006311992786,
"loss": 0.2903,
"step": 4075
},
{
"epoch": 2.1983914209115283,
"grad_norm": 1.140587329864502,
"learning_rate": 0.00029163300270513973,
"loss": 0.5605,
"step": 4100
},
{
"epoch": 2.2117962466487935,
"grad_norm": 0.8588898181915283,
"learning_rate": 0.00029156537421100087,
"loss": 0.31,
"step": 4125
},
{
"epoch": 2.225201072386059,
"grad_norm": 0.5349767804145813,
"learning_rate": 0.000291497745716862,
"loss": 0.413,
"step": 4150
},
{
"epoch": 2.2386058981233243,
"grad_norm": 1.0299067497253418,
"learning_rate": 0.00029143011722272314,
"loss": 0.3054,
"step": 4175
},
{
"epoch": 2.25201072386059,
"grad_norm": 1.3161804676055908,
"learning_rate": 0.0002913624887285843,
"loss": 0.4185,
"step": 4200
},
{
"epoch": 2.265415549597855,
"grad_norm": 1.2179690599441528,
"learning_rate": 0.00029129486023444546,
"loss": 0.3208,
"step": 4225
},
{
"epoch": 2.278820375335121,
"grad_norm": 0.7258560061454773,
"learning_rate": 0.00029122723174030655,
"loss": 0.4248,
"step": 4250
},
{
"epoch": 2.292225201072386,
"grad_norm": 0.7003055810928345,
"learning_rate": 0.0002911596032461677,
"loss": 0.3122,
"step": 4275
},
{
"epoch": 2.3056300268096512,
"grad_norm": 0.7047171592712402,
"learning_rate": 0.0002910919747520288,
"loss": 0.4407,
"step": 4300
},
{
"epoch": 2.319034852546917,
"grad_norm": 0.8658078908920288,
"learning_rate": 0.00029102434625788995,
"loss": 0.3302,
"step": 4325
},
{
"epoch": 2.3324396782841825,
"grad_norm": 0.7273276448249817,
"learning_rate": 0.0002909567177637511,
"loss": 0.4077,
"step": 4350
},
{
"epoch": 2.3458445040214477,
"grad_norm": 0.8991754651069641,
"learning_rate": 0.0002908890892696122,
"loss": 0.3087,
"step": 4375
},
{
"epoch": 2.359249329758713,
"grad_norm": 2.1196842193603516,
"learning_rate": 0.00029082146077547336,
"loss": 0.3878,
"step": 4400
},
{
"epoch": 2.3726541554959786,
"grad_norm": 1.3642330169677734,
"learning_rate": 0.0002907538322813345,
"loss": 0.3097,
"step": 4425
},
{
"epoch": 2.386058981233244,
"grad_norm": 0.4507950246334076,
"learning_rate": 0.0002906862037871957,
"loss": 0.3878,
"step": 4450
},
{
"epoch": 2.3994638069705094,
"grad_norm": 0.6684398055076599,
"learning_rate": 0.00029061857529305677,
"loss": 0.2896,
"step": 4475
},
{
"epoch": 2.4128686327077746,
"grad_norm": 2.0050833225250244,
"learning_rate": 0.0002905509467989179,
"loss": 0.3809,
"step": 4500
},
{
"epoch": 2.4128686327077746,
"eval_cer": 0.13980102884855078,
"eval_loss": 0.2892570197582245,
"eval_runtime": 193.0289,
"eval_samples_per_second": 8.133,
"eval_steps_per_second": 1.021,
"eval_wer": 0.46065269591792185,
"step": 4500
},
{
"epoch": 2.4262734584450403,
"grad_norm": 0.7828475832939148,
"learning_rate": 0.00029048331830477904,
"loss": 0.3337,
"step": 4525
},
{
"epoch": 2.4396782841823055,
"grad_norm": 0.5278825759887695,
"learning_rate": 0.0002904156898106402,
"loss": 0.3781,
"step": 4550
},
{
"epoch": 2.453083109919571,
"grad_norm": 1.2074604034423828,
"learning_rate": 0.00029034806131650136,
"loss": 0.332,
"step": 4575
},
{
"epoch": 2.4664879356568363,
"grad_norm": 0.5711201429367065,
"learning_rate": 0.00029028043282236244,
"loss": 0.43,
"step": 4600
},
{
"epoch": 2.479892761394102,
"grad_norm": 1.5318876504898071,
"learning_rate": 0.0002902128043282236,
"loss": 0.3717,
"step": 4625
},
{
"epoch": 2.493297587131367,
"grad_norm": 0.6812917590141296,
"learning_rate": 0.0002901451758340847,
"loss": 0.3999,
"step": 4650
},
{
"epoch": 2.506702412868633,
"grad_norm": 1.1768240928649902,
"learning_rate": 0.0002900775473399459,
"loss": 0.3182,
"step": 4675
},
{
"epoch": 2.520107238605898,
"grad_norm": 2.612589120864868,
"learning_rate": 0.000290009918845807,
"loss": 0.3892,
"step": 4700
},
{
"epoch": 2.5335120643431637,
"grad_norm": 0.8447991609573364,
"learning_rate": 0.0002899422903516681,
"loss": 0.3011,
"step": 4725
},
{
"epoch": 2.546916890080429,
"grad_norm": 0.5008765459060669,
"learning_rate": 0.00028987466185752926,
"loss": 0.4069,
"step": 4750
},
{
"epoch": 2.5603217158176945,
"grad_norm": 0.8445732593536377,
"learning_rate": 0.00028980703336339045,
"loss": 0.3358,
"step": 4775
},
{
"epoch": 2.5737265415549597,
"grad_norm": 0.5978402495384216,
"learning_rate": 0.0002897394048692516,
"loss": 0.4049,
"step": 4800
},
{
"epoch": 2.5871313672922254,
"grad_norm": 0.932671844959259,
"learning_rate": 0.00028967177637511266,
"loss": 0.2997,
"step": 4825
},
{
"epoch": 2.6005361930294906,
"grad_norm": 0.38006141781806946,
"learning_rate": 0.0002896041478809738,
"loss": 0.3849,
"step": 4850
},
{
"epoch": 2.6139410187667558,
"grad_norm": 0.8640321493148804,
"learning_rate": 0.000289536519386835,
"loss": 0.2714,
"step": 4875
},
{
"epoch": 2.6273458445040214,
"grad_norm": 0.5667803883552551,
"learning_rate": 0.0002894688908926961,
"loss": 0.3881,
"step": 4900
},
{
"epoch": 2.640750670241287,
"grad_norm": 0.8906360864639282,
"learning_rate": 0.00028940126239855726,
"loss": 0.3157,
"step": 4925
},
{
"epoch": 2.6541554959785523,
"grad_norm": 0.6210130453109741,
"learning_rate": 0.00028933363390441834,
"loss": 0.4235,
"step": 4950
},
{
"epoch": 2.6675603217158175,
"grad_norm": 0.8941358327865601,
"learning_rate": 0.00028926600541027953,
"loss": 0.3045,
"step": 4975
},
{
"epoch": 2.680965147453083,
"grad_norm": 0.7417230606079102,
"learning_rate": 0.00028919837691614066,
"loss": 0.3977,
"step": 5000
},
{
"epoch": 2.680965147453083,
"eval_cer": 0.1418859201849208,
"eval_loss": 0.28820186853408813,
"eval_runtime": 192.4195,
"eval_samples_per_second": 8.159,
"eval_steps_per_second": 1.024,
"eval_wer": 0.46436367605326345,
"step": 5000
},
{
"epoch": 2.6943699731903488,
"grad_norm": 0.9162290692329407,
"learning_rate": 0.0002891307484220018,
"loss": 0.3241,
"step": 5025
},
{
"epoch": 2.707774798927614,
"grad_norm": 0.48595941066741943,
"learning_rate": 0.0002890631199278629,
"loss": 0.4285,
"step": 5050
},
{
"epoch": 2.721179624664879,
"grad_norm": 0.8688729405403137,
"learning_rate": 0.000288995491433724,
"loss": 0.3234,
"step": 5075
},
{
"epoch": 2.734584450402145,
"grad_norm": 1.4491957426071167,
"learning_rate": 0.0002889278629395852,
"loss": 0.4005,
"step": 5100
},
{
"epoch": 2.7479892761394105,
"grad_norm": 0.6688870191574097,
"learning_rate": 0.00028886023444544634,
"loss": 0.3033,
"step": 5125
},
{
"epoch": 2.7613941018766757,
"grad_norm": 0.6579515933990479,
"learning_rate": 0.0002887926059513075,
"loss": 0.4209,
"step": 5150
},
{
"epoch": 2.774798927613941,
"grad_norm": 0.7731506824493408,
"learning_rate": 0.00028872497745716856,
"loss": 0.3119,
"step": 5175
},
{
"epoch": 2.7882037533512065,
"grad_norm": 0.4523961842060089,
"learning_rate": 0.00028865734896302975,
"loss": 0.4168,
"step": 5200
},
{
"epoch": 2.8016085790884717,
"grad_norm": 0.9980618953704834,
"learning_rate": 0.0002885897204688909,
"loss": 0.2943,
"step": 5225
},
{
"epoch": 2.8150134048257374,
"grad_norm": 1.1214523315429688,
"learning_rate": 0.000288522091974752,
"loss": 0.4058,
"step": 5250
},
{
"epoch": 2.8284182305630026,
"grad_norm": 3.955530881881714,
"learning_rate": 0.00028845446348061316,
"loss": 0.2864,
"step": 5275
},
{
"epoch": 2.841823056300268,
"grad_norm": 0.5203446745872498,
"learning_rate": 0.0002883868349864743,
"loss": 0.4013,
"step": 5300
},
{
"epoch": 2.8552278820375334,
"grad_norm": 1.0400564670562744,
"learning_rate": 0.0002883192064923354,
"loss": 0.2924,
"step": 5325
},
{
"epoch": 2.868632707774799,
"grad_norm": 0.5297791361808777,
"learning_rate": 0.00028825157799819656,
"loss": 0.4127,
"step": 5350
},
{
"epoch": 2.8820375335120643,
"grad_norm": 2.9058032035827637,
"learning_rate": 0.0002881839495040577,
"loss": 0.3282,
"step": 5375
},
{
"epoch": 2.89544235924933,
"grad_norm": 0.5886743664741516,
"learning_rate": 0.0002881163210099188,
"loss": 0.4285,
"step": 5400
},
{
"epoch": 2.908847184986595,
"grad_norm": 1.0201635360717773,
"learning_rate": 0.00028804869251577997,
"loss": 0.3718,
"step": 5425
},
{
"epoch": 2.9222520107238603,
"grad_norm": 0.44661447405815125,
"learning_rate": 0.0002879810640216411,
"loss": 0.4447,
"step": 5450
},
{
"epoch": 2.935656836461126,
"grad_norm": 0.9066615104675293,
"learning_rate": 0.00028791343552750224,
"loss": 0.2977,
"step": 5475
},
{
"epoch": 2.9490616621983916,
"grad_norm": 0.4934927523136139,
"learning_rate": 0.0002878458070333634,
"loss": 0.3966,
"step": 5500
},
{
"epoch": 2.9490616621983916,
"eval_cer": 0.14172728714845786,
"eval_loss": 0.30226650834083557,
"eval_runtime": 191.4811,
"eval_samples_per_second": 8.199,
"eval_steps_per_second": 1.029,
"eval_wer": 0.46567343374808995,
"step": 5500
},
{
"epoch": 2.962466487935657,
"grad_norm": 2.8394415378570557,
"learning_rate": 0.0002877781785392245,
"loss": 0.2746,
"step": 5525
},
{
"epoch": 2.975871313672922,
"grad_norm": 0.5205143094062805,
"learning_rate": 0.00028771055004508565,
"loss": 0.3791,
"step": 5550
},
{
"epoch": 2.9892761394101877,
"grad_norm": 1.6631228923797607,
"learning_rate": 0.0002876429215509468,
"loss": 0.3137,
"step": 5575
},
{
"epoch": 3.002680965147453,
"grad_norm": 0.5890147089958191,
"learning_rate": 0.0002875752930568079,
"loss": 0.3853,
"step": 5600
},
{
"epoch": 3.0160857908847185,
"grad_norm": 0.6414802670478821,
"learning_rate": 0.00028750766456266905,
"loss": 0.2758,
"step": 5625
},
{
"epoch": 3.0294906166219837,
"grad_norm": 0.44740453362464905,
"learning_rate": 0.0002874400360685302,
"loss": 0.3238,
"step": 5650
},
{
"epoch": 3.0428954423592494,
"grad_norm": 0.456421822309494,
"learning_rate": 0.0002873724075743913,
"loss": 0.309,
"step": 5675
},
{
"epoch": 3.0563002680965146,
"grad_norm": 0.36823776364326477,
"learning_rate": 0.00028730477908025246,
"loss": 0.3289,
"step": 5700
},
{
"epoch": 3.06970509383378,
"grad_norm": 0.5027665495872498,
"learning_rate": 0.0002872371505861136,
"loss": 0.2792,
"step": 5725
},
{
"epoch": 3.0831099195710454,
"grad_norm": 0.5492646098136902,
"learning_rate": 0.00028716952209197473,
"loss": 0.3331,
"step": 5750
},
{
"epoch": 3.096514745308311,
"grad_norm": 0.42318084836006165,
"learning_rate": 0.00028710189359783586,
"loss": 0.2944,
"step": 5775
},
{
"epoch": 3.1099195710455763,
"grad_norm": 0.42389240860939026,
"learning_rate": 0.000287034265103697,
"loss": 0.3291,
"step": 5800
},
{
"epoch": 3.123324396782842,
"grad_norm": 0.8874741196632385,
"learning_rate": 0.00028696663660955814,
"loss": 0.2894,
"step": 5825
},
{
"epoch": 3.136729222520107,
"grad_norm": 0.5217534303665161,
"learning_rate": 0.00028689900811541927,
"loss": 0.3404,
"step": 5850
},
{
"epoch": 3.1501340482573728,
"grad_norm": 0.5872902274131775,
"learning_rate": 0.0002868313796212804,
"loss": 0.2703,
"step": 5875
},
{
"epoch": 3.163538873994638,
"grad_norm": 0.6532200574874878,
"learning_rate": 0.00028676375112714154,
"loss": 0.366,
"step": 5900
},
{
"epoch": 3.1769436997319036,
"grad_norm": 0.6171639561653137,
"learning_rate": 0.0002866961226330027,
"loss": 0.3054,
"step": 5925
},
{
"epoch": 3.190348525469169,
"grad_norm": 0.5158005356788635,
"learning_rate": 0.0002866284941388638,
"loss": 0.355,
"step": 5950
},
{
"epoch": 3.2037533512064345,
"grad_norm": 0.47394490242004395,
"learning_rate": 0.00028656086564472495,
"loss": 0.3056,
"step": 5975
},
{
"epoch": 3.2171581769436997,
"grad_norm": 0.49569785594940186,
"learning_rate": 0.0002864932371505861,
"loss": 0.3585,
"step": 6000
},
{
"epoch": 3.2171581769436997,
"eval_cer": 0.14582908423414237,
"eval_loss": 0.3506743311882019,
"eval_runtime": 192.8218,
"eval_samples_per_second": 8.142,
"eval_steps_per_second": 1.022,
"eval_wer": 0.4764243614931238,
"step": 6000
},
{
"epoch": 3.2305630026809653,
"grad_norm": 0.5296260714530945,
"learning_rate": 0.0002864256086564472,
"loss": 0.3262,
"step": 6025
},
{
"epoch": 3.2439678284182305,
"grad_norm": 0.38092923164367676,
"learning_rate": 0.0002863606853020739,
"loss": 0.3516,
"step": 6050
},
{
"epoch": 3.257372654155496,
"grad_norm": 0.7664552927017212,
"learning_rate": 0.00028629305680793506,
"loss": 0.3316,
"step": 6075
},
{
"epoch": 3.2707774798927614,
"grad_norm": 0.5759456753730774,
"learning_rate": 0.0002862254283137962,
"loss": 0.3403,
"step": 6100
},
{
"epoch": 3.284182305630027,
"grad_norm": 1.9567292928695679,
"learning_rate": 0.00028615779981965733,
"loss": 0.3166,
"step": 6125
},
{
"epoch": 3.297587131367292,
"grad_norm": 0.49598929286003113,
"learning_rate": 0.00028609017132551846,
"loss": 0.3824,
"step": 6150
},
{
"epoch": 3.310991957104558,
"grad_norm": 0.5681086778640747,
"learning_rate": 0.0002860225428313796,
"loss": 0.3187,
"step": 6175
},
{
"epoch": 3.324396782841823,
"grad_norm": 0.43048450350761414,
"learning_rate": 0.00028595491433724073,
"loss": 0.3564,
"step": 6200
},
{
"epoch": 3.3378016085790883,
"grad_norm": 0.45195090770721436,
"learning_rate": 0.00028588728584310187,
"loss": 0.3022,
"step": 6225
},
{
"epoch": 3.351206434316354,
"grad_norm": 0.6295568346977234,
"learning_rate": 0.000285819657348963,
"loss": 0.3714,
"step": 6250
},
{
"epoch": 3.3646112600536195,
"grad_norm": 0.5551475882530212,
"learning_rate": 0.00028575202885482414,
"loss": 0.444,
"step": 6275
},
{
"epoch": 3.3780160857908847,
"grad_norm": 0.5752814412117004,
"learning_rate": 0.0002856844003606853,
"loss": 0.3906,
"step": 6300
},
{
"epoch": 3.39142091152815,
"grad_norm": 0.44677379727363586,
"learning_rate": 0.0002856167718665464,
"loss": 0.2972,
"step": 6325
},
{
"epoch": 3.4048257372654156,
"grad_norm": 0.592958390712738,
"learning_rate": 0.00028554914337240755,
"loss": 0.3616,
"step": 6350
},
{
"epoch": 3.418230563002681,
"grad_norm": 0.29190266132354736,
"learning_rate": 0.0002854815148782687,
"loss": 0.2773,
"step": 6375
},
{
"epoch": 3.4316353887399464,
"grad_norm": 0.318469762802124,
"learning_rate": 0.0002854138863841298,
"loss": 0.3451,
"step": 6400
},
{
"epoch": 3.4450402144772116,
"grad_norm": 0.4345405399799347,
"learning_rate": 0.00028534625788999095,
"loss": 0.286,
"step": 6425
},
{
"epoch": 3.4584450402144773,
"grad_norm": 0.6703570485115051,
"learning_rate": 0.0002852786293958521,
"loss": 0.3612,
"step": 6450
},
{
"epoch": 3.4718498659517425,
"grad_norm": 0.7129529118537903,
"learning_rate": 0.0002852110009017132,
"loss": 0.3035,
"step": 6475
},
{
"epoch": 3.485254691689008,
"grad_norm": 1.0257657766342163,
"learning_rate": 0.00028514337240757436,
"loss": 0.338,
"step": 6500
},
{
"epoch": 3.485254691689008,
"eval_cer": 0.14121739524554128,
"eval_loss": 0.29056990146636963,
"eval_runtime": 191.9797,
"eval_samples_per_second": 8.178,
"eval_steps_per_second": 1.026,
"eval_wer": 0.46463654223968565,
"step": 6500
},
{
"epoch": 3.485254691689008,
"step": 6500,
"total_flos": 3.3381487836753715e+19,
"train_loss": 0.7533885692449717,
"train_runtime": 52302.0768,
"train_samples_per_second": 68.456,
"train_steps_per_second": 2.139
}
],
"logging_steps": 25,
"max_steps": 111900,
"num_input_tokens_seen": 0,
"num_train_epochs": 60,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.3381487836753715e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}