Wav2vec2-Hausa / trainer_state.json
Leonel-Maia's picture
End of training
29e01cb verified
Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN, "... is not valid JSON
{
"best_global_step": 3000,
"best_metric": 0.22896352410316467,
"best_model_checkpoint": "./Wav2vec2-Hausa/checkpoint-3000",
"epoch": 14.947804473902236,
"eval_steps": 500,
"global_step": 4500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08285004142502071,
"grad_norm": 9.556963920593262,
"learning_rate": 7.499999999999999e-06,
"loss": 18.2394,
"step": 25
},
{
"epoch": 0.16570008285004142,
"grad_norm": 18.581457138061523,
"learning_rate": 1.47e-05,
"loss": 16.9665,
"step": 50
},
{
"epoch": 0.24855012427506215,
"grad_norm": 15.846641540527344,
"learning_rate": 2.2199999999999998e-05,
"loss": 10.9287,
"step": 75
},
{
"epoch": 0.33140016570008285,
"grad_norm": 16.06224822998047,
"learning_rate": 2.97e-05,
"loss": 6.6297,
"step": 100
},
{
"epoch": 0.4142502071251036,
"grad_norm": 9.293783187866211,
"learning_rate": 3.7199999999999996e-05,
"loss": 5.2272,
"step": 125
},
{
"epoch": 0.4971002485501243,
"grad_norm": 8.752418518066406,
"learning_rate": 4.4699999999999996e-05,
"loss": 4.4165,
"step": 150
},
{
"epoch": 0.579950289975145,
"grad_norm": 4.067853927612305,
"learning_rate": 5.2199999999999995e-05,
"loss": 3.8388,
"step": 175
},
{
"epoch": 0.6628003314001657,
"grad_norm": NaN,
"learning_rate": 5.94e-05,
"loss": 3.4621,
"step": 200
},
{
"epoch": 0.7456503728251864,
"grad_norm": 0.7896580100059509,
"learning_rate": 6.69e-05,
"loss": 3.0669,
"step": 225
},
{
"epoch": 0.8285004142502072,
"grad_norm": 0.7531073689460754,
"learning_rate": 7.439999999999999e-05,
"loss": 2.8899,
"step": 250
},
{
"epoch": 0.9113504556752279,
"grad_norm": 0.3927903473377228,
"learning_rate": 8.19e-05,
"loss": 2.8231,
"step": 275
},
{
"epoch": 0.9942004971002486,
"grad_norm": 0.9705857038497925,
"learning_rate": 8.939999999999999e-05,
"loss": 2.7892,
"step": 300
},
{
"epoch": 1.0795360397680198,
"grad_norm": 0.5089633464813232,
"learning_rate": 9.69e-05,
"loss": 2.8119,
"step": 325
},
{
"epoch": 1.1623860811930407,
"grad_norm": 0.313754141330719,
"learning_rate": 0.00010439999999999999,
"loss": 2.7591,
"step": 350
},
{
"epoch": 1.2452361226180613,
"grad_norm": 0.7091944217681885,
"learning_rate": 0.0001119,
"loss": 2.7743,
"step": 375
},
{
"epoch": 1.328086164043082,
"grad_norm": 0.2798990309238434,
"learning_rate": 0.0001194,
"loss": 2.6965,
"step": 400
},
{
"epoch": 1.4109362054681027,
"grad_norm": 0.22546878457069397,
"learning_rate": 0.0001269,
"loss": 2.6989,
"step": 425
},
{
"epoch": 1.4937862468931233,
"grad_norm": 0.3145970404148102,
"learning_rate": 0.0001344,
"loss": 2.6602,
"step": 450
},
{
"epoch": 1.5766362883181442,
"grad_norm": 0.46619847416877747,
"learning_rate": 0.00014189999999999998,
"loss": 2.6611,
"step": 475
},
{
"epoch": 1.659486329743165,
"grad_norm": 1.2694575786590576,
"learning_rate": 0.0001494,
"loss": 2.2402,
"step": 500
},
{
"epoch": 1.659486329743165,
"eval_cer": 0.6183230906678939,
"eval_loss": 1.7612072229385376,
"eval_runtime": 180.0066,
"eval_samples_per_second": 6.722,
"eval_steps_per_second": 0.844,
"eval_wer": 0.9446567586694975,
"step": 500
},
{
"epoch": 1.7423363711681856,
"grad_norm": 0.9558189511299133,
"learning_rate": 0.0001569,
"loss": 1.5439,
"step": 525
},
{
"epoch": 1.8251864125932062,
"grad_norm": 1.1704384088516235,
"learning_rate": 0.0001644,
"loss": 1.0171,
"step": 550
},
{
"epoch": 1.908036454018227,
"grad_norm": 0.6667094826698303,
"learning_rate": 0.00017189999999999998,
"loss": 0.9064,
"step": 575
},
{
"epoch": 1.9908864954432477,
"grad_norm": 1.4252177476882935,
"learning_rate": 0.00017939999999999997,
"loss": 0.7337,
"step": 600
},
{
"epoch": 2.0762220381110192,
"grad_norm": 0.727504312992096,
"learning_rate": 0.00018659999999999998,
"loss": 0.7596,
"step": 625
},
{
"epoch": 2.1590720795360396,
"grad_norm": 1.1159340143203735,
"learning_rate": 0.0001941,
"loss": 0.5878,
"step": 650
},
{
"epoch": 2.2419221209610605,
"grad_norm": 0.969331681728363,
"learning_rate": 0.0002016,
"loss": 0.7385,
"step": 675
},
{
"epoch": 2.3247721623860813,
"grad_norm": 1.1958396434783936,
"learning_rate": 0.00020909999999999996,
"loss": 0.5456,
"step": 700
},
{
"epoch": 2.407622203811102,
"grad_norm": 0.7370328307151794,
"learning_rate": 0.00021659999999999998,
"loss": 0.6401,
"step": 725
},
{
"epoch": 2.4904722452361225,
"grad_norm": 0.9496448040008545,
"learning_rate": 0.00022409999999999997,
"loss": 0.5123,
"step": 750
},
{
"epoch": 2.5733222866611434,
"grad_norm": 0.8332341909408569,
"learning_rate": 0.0002316,
"loss": 0.6013,
"step": 775
},
{
"epoch": 2.656172328086164,
"grad_norm": 1.3926903009414673,
"learning_rate": 0.00023909999999999998,
"loss": 0.4888,
"step": 800
},
{
"epoch": 2.7390223695111846,
"grad_norm": 0.8815742135047913,
"learning_rate": 0.0002466,
"loss": 0.5939,
"step": 825
},
{
"epoch": 2.8218724109362054,
"grad_norm": 1.0045580863952637,
"learning_rate": 0.0002541,
"loss": 0.4609,
"step": 850
},
{
"epoch": 2.9047224523612263,
"grad_norm": 0.6735367178916931,
"learning_rate": 0.00026129999999999995,
"loss": 0.5541,
"step": 875
},
{
"epoch": 2.9875724937862467,
"grad_norm": 0.8095821738243103,
"learning_rate": 0.0002688,
"loss": 0.4487,
"step": 900
},
{
"epoch": 3.0729080364540184,
"grad_norm": 1.2476941347122192,
"learning_rate": 0.0002763,
"loss": 0.5603,
"step": 925
},
{
"epoch": 3.155758077879039,
"grad_norm": 1.500581979751587,
"learning_rate": 0.00028379999999999996,
"loss": 0.3883,
"step": 950
},
{
"epoch": 3.2386081193040597,
"grad_norm": 0.7201348543167114,
"learning_rate": 0.0002913,
"loss": 0.5197,
"step": 975
},
{
"epoch": 3.3214581607290805,
"grad_norm": 1.012593388557434,
"learning_rate": 0.0002988,
"loss": 0.3626,
"step": 1000
},
{
"epoch": 3.3214581607290805,
"eval_cer": 0.12113869561686114,
"eval_loss": 0.36199814081192017,
"eval_runtime": 178.0685,
"eval_samples_per_second": 6.795,
"eval_steps_per_second": 0.854,
"eval_wer": 0.44961075725406935,
"step": 1000
},
{
"epoch": 3.404308202154101,
"grad_norm": 0.9544251561164856,
"learning_rate": 0.00029964830011723327,
"loss": 0.515,
"step": 1025
},
{
"epoch": 3.4871582435791217,
"grad_norm": 0.855119526386261,
"learning_rate": 0.0002992086752637749,
"loss": 0.3888,
"step": 1050
},
{
"epoch": 3.5700082850041426,
"grad_norm": 0.9061466455459595,
"learning_rate": 0.0002987690504103165,
"loss": 0.4937,
"step": 1075
},
{
"epoch": 3.652858326429163,
"grad_norm": 0.8522987365722656,
"learning_rate": 0.00029832942555685815,
"loss": 0.3774,
"step": 1100
},
{
"epoch": 3.735708367854184,
"grad_norm": 0.7319638729095459,
"learning_rate": 0.00029788980070339976,
"loss": 0.4526,
"step": 1125
},
{
"epoch": 3.8185584092792046,
"grad_norm": 0.9154307842254639,
"learning_rate": 0.00029745017584994137,
"loss": 0.3795,
"step": 1150
},
{
"epoch": 3.9014084507042255,
"grad_norm": 0.8338823318481445,
"learning_rate": 0.000297010550996483,
"loss": 0.4426,
"step": 1175
},
{
"epoch": 3.9842584921292463,
"grad_norm": 0.9627026915550232,
"learning_rate": 0.0002965709261430246,
"loss": 0.3554,
"step": 1200
},
{
"epoch": 4.069594034797017,
"grad_norm": 0.5187656879425049,
"learning_rate": 0.0002961313012895662,
"loss": 0.4345,
"step": 1225
},
{
"epoch": 4.1524440762220385,
"grad_norm": 0.9797393679618835,
"learning_rate": 0.0002956916764361078,
"loss": 0.3001,
"step": 1250
},
{
"epoch": 4.235294117647059,
"grad_norm": 0.9796245098114014,
"learning_rate": 0.00029525205158264947,
"loss": 0.4158,
"step": 1275
},
{
"epoch": 4.318144159072079,
"grad_norm": 1.0057493448257446,
"learning_rate": 0.0002948124267291911,
"loss": 0.2856,
"step": 1300
},
{
"epoch": 4.4009942004971006,
"grad_norm": 0.6741095781326294,
"learning_rate": 0.0002943728018757327,
"loss": 0.4099,
"step": 1325
},
{
"epoch": 4.483844241922121,
"grad_norm": 0.7850795984268188,
"learning_rate": 0.0002939331770222743,
"loss": 0.3011,
"step": 1350
},
{
"epoch": 4.566694283347141,
"grad_norm": 0.5698910355567932,
"learning_rate": 0.0002934935521688159,
"loss": 0.4201,
"step": 1375
},
{
"epoch": 4.649544324772163,
"grad_norm": 0.5989360809326172,
"learning_rate": 0.0002930539273153575,
"loss": 0.2986,
"step": 1400
},
{
"epoch": 4.732394366197183,
"grad_norm": 0.6864707469940186,
"learning_rate": 0.0002926318874560375,
"loss": 0.4006,
"step": 1425
},
{
"epoch": 4.815244407622204,
"grad_norm": 0.8572924137115479,
"learning_rate": 0.0002921922626025791,
"loss": 0.2953,
"step": 1450
},
{
"epoch": 4.898094449047225,
"grad_norm": 0.4575251638889313,
"learning_rate": 0.0002917526377491207,
"loss": 0.4145,
"step": 1475
},
{
"epoch": 4.980944490472245,
"grad_norm": 0.7143483757972717,
"learning_rate": 0.00029131301289566237,
"loss": 0.2916,
"step": 1500
},
{
"epoch": 4.980944490472245,
"eval_cer": 0.0923193545767117,
"eval_loss": 0.2641300559043884,
"eval_runtime": 179.3147,
"eval_samples_per_second": 6.748,
"eval_steps_per_second": 0.848,
"eval_wer": 0.36036801132342533,
"step": 1500
},
{
"epoch": 5.066280033140017,
"grad_norm": 1.980385184288025,
"learning_rate": 0.000290873388042204,
"loss": 0.3535,
"step": 1525
},
{
"epoch": 5.149130074565037,
"grad_norm": 0.8608137965202332,
"learning_rate": 0.0002904337631887456,
"loss": 0.2469,
"step": 1550
},
{
"epoch": 5.231980115990058,
"grad_norm": 0.5075603723526001,
"learning_rate": 0.0002899941383352872,
"loss": 0.3486,
"step": 1575
},
{
"epoch": 5.314830157415079,
"grad_norm": 0.7245315909385681,
"learning_rate": 0.0002895545134818288,
"loss": 0.2515,
"step": 1600
},
{
"epoch": 5.397680198840099,
"grad_norm": 0.4333842694759369,
"learning_rate": 0.0002891148886283704,
"loss": 0.3337,
"step": 1625
},
{
"epoch": 5.48053024026512,
"grad_norm": 0.7363935112953186,
"learning_rate": 0.000288675263774912,
"loss": 0.2576,
"step": 1650
},
{
"epoch": 5.563380281690141,
"grad_norm": 0.5363740921020508,
"learning_rate": 0.000288253223915592,
"loss": 0.3484,
"step": 1675
},
{
"epoch": 5.646230323115161,
"grad_norm": 0.5194985866546631,
"learning_rate": 0.0002878135990621336,
"loss": 0.2545,
"step": 1700
},
{
"epoch": 5.729080364540183,
"grad_norm": 0.5578182935714722,
"learning_rate": 0.00028737397420867527,
"loss": 0.3502,
"step": 1725
},
{
"epoch": 5.811930405965203,
"grad_norm": 0.6931495070457458,
"learning_rate": 0.0002869343493552169,
"loss": 0.2312,
"step": 1750
},
{
"epoch": 5.894780447390223,
"grad_norm": 0.59634929895401,
"learning_rate": 0.0002864947245017585,
"loss": 0.3525,
"step": 1775
},
{
"epoch": 5.977630488815245,
"grad_norm": 0.544572651386261,
"learning_rate": 0.0002860550996483001,
"loss": 0.241,
"step": 1800
},
{
"epoch": 6.062966031483016,
"grad_norm": 0.5554734468460083,
"learning_rate": 0.0002856154747948417,
"loss": 0.3355,
"step": 1825
},
{
"epoch": 6.145816072908037,
"grad_norm": 0.9242589473724365,
"learning_rate": 0.0002851758499413833,
"loss": 0.2105,
"step": 1850
},
{
"epoch": 6.228666114333057,
"grad_norm": 0.45407700538635254,
"learning_rate": 0.0002847362250879249,
"loss": 0.3085,
"step": 1875
},
{
"epoch": 6.311516155758078,
"grad_norm": 1.0744433403015137,
"learning_rate": 0.0002842966002344666,
"loss": 0.2101,
"step": 1900
},
{
"epoch": 6.394366197183099,
"grad_norm": 0.4946906864643097,
"learning_rate": 0.0002838569753810082,
"loss": 0.3161,
"step": 1925
},
{
"epoch": 6.477216238608119,
"grad_norm": 0.6537393927574158,
"learning_rate": 0.0002834173505275498,
"loss": 0.2341,
"step": 1950
},
{
"epoch": 6.56006628003314,
"grad_norm": 0.3927314281463623,
"learning_rate": 0.0002829777256740914,
"loss": 0.3191,
"step": 1975
},
{
"epoch": 6.642916321458161,
"grad_norm": 1.1492557525634766,
"learning_rate": 0.000282538100820633,
"loss": 0.2105,
"step": 2000
},
{
"epoch": 6.642916321458161,
"eval_cer": 0.0847930259631264,
"eval_loss": 0.2543812096118927,
"eval_runtime": 179.4122,
"eval_samples_per_second": 6.744,
"eval_steps_per_second": 0.847,
"eval_wer": 0.3295116772823779,
"step": 2000
},
{
"epoch": 6.725766362883181,
"grad_norm": 0.4488711953163147,
"learning_rate": 0.000282116060961313,
"loss": 0.307,
"step": 2025
},
{
"epoch": 6.808616404308202,
"grad_norm": 0.7720121145248413,
"learning_rate": 0.0002816764361078546,
"loss": 0.2181,
"step": 2050
},
{
"epoch": 6.891466445733223,
"grad_norm": 0.47668084502220154,
"learning_rate": 0.0002812368112543962,
"loss": 0.3084,
"step": 2075
},
{
"epoch": 6.9743164871582435,
"grad_norm": 0.8669754266738892,
"learning_rate": 0.00028079718640093787,
"loss": 0.2189,
"step": 2100
},
{
"epoch": 7.059652029826015,
"grad_norm": 0.40827029943466187,
"learning_rate": 0.0002803575615474795,
"loss": 0.2932,
"step": 2125
},
{
"epoch": 7.142502071251036,
"grad_norm": 0.5760928988456726,
"learning_rate": 0.0002799179366940211,
"loss": 0.2059,
"step": 2150
},
{
"epoch": 7.225352112676056,
"grad_norm": 0.46470338106155396,
"learning_rate": 0.0002794783118405627,
"loss": 0.2942,
"step": 2175
},
{
"epoch": 7.308202154101077,
"grad_norm": 0.6504044532775879,
"learning_rate": 0.0002790386869871043,
"loss": 0.2016,
"step": 2200
},
{
"epoch": 7.391052195526098,
"grad_norm": 0.41604796051979065,
"learning_rate": 0.0002785990621336459,
"loss": 0.2735,
"step": 2225
},
{
"epoch": 7.473902236951118,
"grad_norm": 0.9678609371185303,
"learning_rate": 0.00027815943728018753,
"loss": 0.1919,
"step": 2250
},
{
"epoch": 7.556752278376139,
"grad_norm": 0.39137783646583557,
"learning_rate": 0.0002777373974208675,
"loss": 0.285,
"step": 2275
},
{
"epoch": 7.63960231980116,
"grad_norm": 0.500848114490509,
"learning_rate": 0.0002772977725674091,
"loss": 0.1994,
"step": 2300
},
{
"epoch": 7.72245236122618,
"grad_norm": 0.4283003807067871,
"learning_rate": 0.00027685814771395077,
"loss": 0.2917,
"step": 2325
},
{
"epoch": 7.8053024026512015,
"grad_norm": 0.607266366481781,
"learning_rate": 0.0002764185228604924,
"loss": 0.1982,
"step": 2350
},
{
"epoch": 7.888152444076222,
"grad_norm": 0.45504409074783325,
"learning_rate": 0.000275978898007034,
"loss": 0.2724,
"step": 2375
},
{
"epoch": 7.971002485501243,
"grad_norm": 0.8149850368499756,
"learning_rate": 0.0002755392731535756,
"loss": 0.1888,
"step": 2400
},
{
"epoch": 8.056338028169014,
"grad_norm": 1.0355818271636963,
"learning_rate": 0.0002750996483001172,
"loss": 0.2714,
"step": 2425
},
{
"epoch": 8.139188069594034,
"grad_norm": 0.4683228135108948,
"learning_rate": 0.0002746600234466588,
"loss": 0.1818,
"step": 2450
},
{
"epoch": 8.222038111019055,
"grad_norm": 0.4191352128982544,
"learning_rate": 0.0002742203985932004,
"loss": 0.2556,
"step": 2475
},
{
"epoch": 8.304888152444077,
"grad_norm": 0.7036840915679932,
"learning_rate": 0.0002737807737397421,
"loss": 0.1804,
"step": 2500
},
{
"epoch": 8.304888152444077,
"eval_cer": 0.07958144949508623,
"eval_loss": 0.23702508211135864,
"eval_runtime": 180.5015,
"eval_samples_per_second": 6.704,
"eval_steps_per_second": 0.842,
"eval_wer": 0.3151450813871196,
"step": 2500
},
{
"epoch": 8.387738193869097,
"grad_norm": 0.34809935092926025,
"learning_rate": 0.0002733411488862837,
"loss": 0.262,
"step": 2525
},
{
"epoch": 8.470588235294118,
"grad_norm": 1.302828311920166,
"learning_rate": 0.0002729015240328253,
"loss": 0.1991,
"step": 2550
},
{
"epoch": 8.553438276719138,
"grad_norm": 0.5225396156311035,
"learning_rate": 0.0002724618991793669,
"loss": 0.2477,
"step": 2575
},
{
"epoch": 8.636288318144159,
"grad_norm": 0.6288148760795593,
"learning_rate": 0.0002720222743259085,
"loss": 0.1829,
"step": 2600
},
{
"epoch": 8.719138359569179,
"grad_norm": 0.370772123336792,
"learning_rate": 0.00027158264947245013,
"loss": 0.2546,
"step": 2625
},
{
"epoch": 8.801988400994201,
"grad_norm": 0.5763248801231384,
"learning_rate": 0.00027114302461899174,
"loss": 0.1728,
"step": 2650
},
{
"epoch": 8.884838442419221,
"grad_norm": 0.4807268977165222,
"learning_rate": 0.0002707033997655334,
"loss": 0.2604,
"step": 2675
},
{
"epoch": 8.967688483844242,
"grad_norm": 1.5762399435043335,
"learning_rate": 0.000270263774912075,
"loss": 0.1808,
"step": 2700
},
{
"epoch": 9.053024026512013,
"grad_norm": 0.36433079838752747,
"learning_rate": 0.000269841735052755,
"loss": 0.2444,
"step": 2725
},
{
"epoch": 9.135874067937033,
"grad_norm": 1.2679426670074463,
"learning_rate": 0.0002694021101992966,
"loss": 0.1699,
"step": 2750
},
{
"epoch": 9.218724109362055,
"grad_norm": 0.38808515667915344,
"learning_rate": 0.0002689624853458382,
"loss": 0.2331,
"step": 2775
},
{
"epoch": 9.301574150787076,
"grad_norm": 1.0244839191436768,
"learning_rate": 0.0002685228604923798,
"loss": 0.1615,
"step": 2800
},
{
"epoch": 9.384424192212096,
"grad_norm": 0.4677698314189911,
"learning_rate": 0.0002681008206330598,
"loss": 0.2436,
"step": 2825
},
{
"epoch": 9.467274233637117,
"grad_norm": 0.9670608639717102,
"learning_rate": 0.0002676611957796014,
"loss": 0.1672,
"step": 2850
},
{
"epoch": 9.550124275062137,
"grad_norm": 0.6787462830543518,
"learning_rate": 0.000267221570926143,
"loss": 0.2447,
"step": 2875
},
{
"epoch": 9.632974316487159,
"grad_norm": 0.5140098333358765,
"learning_rate": 0.0002667819460726846,
"loss": 0.1588,
"step": 2900
},
{
"epoch": 9.71582435791218,
"grad_norm": 0.678119957447052,
"learning_rate": 0.00026634232121922627,
"loss": 0.2347,
"step": 2925
},
{
"epoch": 9.7986743993372,
"grad_norm": 0.668165385723114,
"learning_rate": 0.0002659026963657679,
"loss": 0.1838,
"step": 2950
},
{
"epoch": 9.88152444076222,
"grad_norm": 0.4628326892852783,
"learning_rate": 0.0002654630715123095,
"loss": 0.2462,
"step": 2975
},
{
"epoch": 9.96437448218724,
"grad_norm": 0.3794308602809906,
"learning_rate": 0.0002650234466588511,
"loss": 0.182,
"step": 3000
},
{
"epoch": 9.96437448218724,
"eval_cer": 0.07455938489861115,
"eval_loss": 0.22896352410316467,
"eval_runtime": 180.0793,
"eval_samples_per_second": 6.719,
"eval_steps_per_second": 0.844,
"eval_wer": 0.2959660297239915,
"step": 3000
},
{
"epoch": 10.049710024855013,
"grad_norm": 0.7407336831092834,
"learning_rate": 0.0002645838218053927,
"loss": 0.2413,
"step": 3025
},
{
"epoch": 10.132560066280034,
"grad_norm": 0.71273273229599,
"learning_rate": 0.0002641441969519343,
"loss": 0.1593,
"step": 3050
},
{
"epoch": 10.215410107705054,
"grad_norm": 0.4058437943458557,
"learning_rate": 0.00026370457209847593,
"loss": 0.2376,
"step": 3075
},
{
"epoch": 10.298260149130074,
"grad_norm": 1.03704833984375,
"learning_rate": 0.0002632649472450176,
"loss": 0.1545,
"step": 3100
},
{
"epoch": 10.381110190555095,
"grad_norm": 0.3222190737724304,
"learning_rate": 0.0002628253223915592,
"loss": 0.2186,
"step": 3125
},
{
"epoch": 10.463960231980115,
"grad_norm": 0.4006061851978302,
"learning_rate": 0.0002623856975381008,
"loss": 0.1506,
"step": 3150
},
{
"epoch": 10.546810273405137,
"grad_norm": 1.0020666122436523,
"learning_rate": 0.0002619460726846424,
"loss": 0.2305,
"step": 3175
},
{
"epoch": 10.629660314830158,
"grad_norm": 0.6109996438026428,
"learning_rate": 0.00026150644783118403,
"loss": 0.163,
"step": 3200
},
{
"epoch": 10.712510356255178,
"grad_norm": 0.5075812935829163,
"learning_rate": 0.00026106682297772564,
"loss": 0.2304,
"step": 3225
},
{
"epoch": 10.795360397680199,
"grad_norm": 0.7409548759460449,
"learning_rate": 0.00026062719812426725,
"loss": 0.182,
"step": 3250
},
{
"epoch": 10.878210439105219,
"grad_norm": 0.6909148097038269,
"learning_rate": 0.0002601875732708089,
"loss": 0.2306,
"step": 3275
},
{
"epoch": 10.96106048053024,
"grad_norm": 0.5709498524665833,
"learning_rate": 0.0002597479484173505,
"loss": 0.1667,
"step": 3300
},
{
"epoch": 11.046396023198012,
"grad_norm": 0.5173778533935547,
"learning_rate": 0.0002593259085580305,
"loss": 0.2397,
"step": 3325
},
{
"epoch": 11.129246064623032,
"grad_norm": 0.628476083278656,
"learning_rate": 0.0002588862837045721,
"loss": 0.1709,
"step": 3350
},
{
"epoch": 11.212096106048053,
"grad_norm": 0.782648503780365,
"learning_rate": 0.0002584466588511137,
"loss": 0.2319,
"step": 3375
},
{
"epoch": 11.294946147473073,
"grad_norm": 0.6060341000556946,
"learning_rate": 0.0002580070339976553,
"loss": 0.1792,
"step": 3400
},
{
"epoch": 11.377796188898094,
"grad_norm": 0.9537221789360046,
"learning_rate": 0.0002575674091441969,
"loss": 0.2451,
"step": 3425
},
{
"epoch": 11.460646230323116,
"grad_norm": 0.6490224599838257,
"learning_rate": 0.00025712778429073853,
"loss": 0.1818,
"step": 3450
},
{
"epoch": 11.543496271748136,
"grad_norm": 0.9516769647598267,
"learning_rate": 0.0002567057444314185,
"loss": 0.2567,
"step": 3475
},
{
"epoch": 11.626346313173157,
"grad_norm": 1.2261236906051636,
"learning_rate": 0.0002562661195779601,
"loss": 0.2024,
"step": 3500
},
{
"epoch": 11.626346313173157,
"eval_cer": 0.07910766981617348,
"eval_loss": 0.25268253684043884,
"eval_runtime": 179.0467,
"eval_samples_per_second": 6.758,
"eval_steps_per_second": 0.849,
"eval_wer": 0.3087048832271762,
"step": 3500
},
{
"epoch": 11.709196354598177,
"grad_norm": 1.0352481603622437,
"learning_rate": 0.0002558264947245018,
"loss": 0.2509,
"step": 3525
},
{
"epoch": 11.792046396023197,
"grad_norm": 1.3052682876586914,
"learning_rate": 0.0002553868698710434,
"loss": 0.1896,
"step": 3550
},
{
"epoch": 11.87489643744822,
"grad_norm": 0.7363812923431396,
"learning_rate": 0.000254947245017585,
"loss": 0.2877,
"step": 3575
},
{
"epoch": 11.95774647887324,
"grad_norm": 1.4226346015930176,
"learning_rate": 0.0002545076201641266,
"loss": 0.2163,
"step": 3600
},
{
"epoch": 12.04308202154101,
"grad_norm": 1.1405287981033325,
"learning_rate": 0.0002540679953106682,
"loss": 0.2954,
"step": 3625
},
{
"epoch": 12.125932062966031,
"grad_norm": 3.9781391620635986,
"learning_rate": 0.0002536283704572098,
"loss": 0.2087,
"step": 3650
},
{
"epoch": 12.208782104391052,
"grad_norm": 0.7986987829208374,
"learning_rate": 0.00025318874560375143,
"loss": 0.2763,
"step": 3675
},
{
"epoch": 12.291632145816074,
"grad_norm": 2.4786369800567627,
"learning_rate": 0.0002527491207502931,
"loss": 0.2251,
"step": 3700
},
{
"epoch": 12.374482187241094,
"grad_norm": 0.7642366290092468,
"learning_rate": 0.0002523094958968347,
"loss": 0.2828,
"step": 3725
},
{
"epoch": 12.457332228666115,
"grad_norm": 2.443129777908325,
"learning_rate": 0.0002518698710433763,
"loss": 0.2458,
"step": 3750
},
{
"epoch": 12.540182270091135,
"grad_norm": 1.845415711402893,
"learning_rate": 0.0002514302461899179,
"loss": 0.2796,
"step": 3775
},
{
"epoch": 12.623032311516155,
"grad_norm": 1.1014904975891113,
"learning_rate": 0.00025099062133645953,
"loss": 0.2366,
"step": 3800
},
{
"epoch": 12.705882352941176,
"grad_norm": 1.1710816621780396,
"learning_rate": 0.0002505685814771395,
"loss": 0.2931,
"step": 3825
},
{
"epoch": 12.788732394366198,
"grad_norm": 0.7777267098426819,
"learning_rate": 0.0002501289566236811,
"loss": 0.2452,
"step": 3850
},
{
"epoch": 12.871582435791218,
"grad_norm": 0.5831831693649292,
"learning_rate": 0.0002496893317702227,
"loss": 0.2844,
"step": 3875
},
{
"epoch": 12.954432477216239,
"grad_norm": 0.8225266337394714,
"learning_rate": 0.00024924970691676433,
"loss": 0.2112,
"step": 3900
},
{
"epoch": 13.03976801988401,
"grad_norm": 0.8047693967819214,
"learning_rate": 0.000248810082063306,
"loss": 0.2547,
"step": 3925
},
{
"epoch": 13.12261806130903,
"grad_norm": 0.9043530225753784,
"learning_rate": 0.0002483704572098476,
"loss": 0.2062,
"step": 3950
},
{
"epoch": 13.205468102734052,
"grad_norm": 0.4219953715801239,
"learning_rate": 0.0002479308323563892,
"loss": 0.2632,
"step": 3975
},
{
"epoch": 13.288318144159073,
"grad_norm": 1.659414529800415,
"learning_rate": 0.0002474912075029308,
"loss": 0.2358,
"step": 4000
},
{
"epoch": 13.288318144159073,
"eval_cer": 0.0747353602079216,
"eval_loss": 0.24355952441692352,
"eval_runtime": 180.0102,
"eval_samples_per_second": 6.722,
"eval_steps_per_second": 0.844,
"eval_wer": 0.2952583156404813,
"step": 4000
},
{
"epoch": 13.371168185584093,
"grad_norm": 0.5549167990684509,
"learning_rate": 0.00024705158264947243,
"loss": 0.2611,
"step": 4025
},
{
"epoch": 13.454018227009113,
"grad_norm": 0.9828294515609741,
"learning_rate": 0.00024661195779601404,
"loss": 0.2381,
"step": 4050
},
{
"epoch": 13.536868268434134,
"grad_norm": 0.5421575903892517,
"learning_rate": 0.00024617233294255565,
"loss": 0.2858,
"step": 4075
},
{
"epoch": 13.619718309859154,
"grad_norm": 0.5966264009475708,
"learning_rate": 0.0002457327080890973,
"loss": 0.261,
"step": 4100
},
{
"epoch": 13.702568351284176,
"grad_norm": 0.5706139206886292,
"learning_rate": 0.0002453106682297773,
"loss": 0.2843,
"step": 4125
},
{
"epoch": 13.785418392709197,
"grad_norm": 0.43442562222480774,
"learning_rate": 0.0002448710433763189,
"loss": 0.303,
"step": 4150
},
{
"epoch": 13.868268434134217,
"grad_norm": 0.6309686899185181,
"learning_rate": 0.0002444314185228605,
"loss": 0.3506,
"step": 4175
},
{
"epoch": 13.951118475559237,
"grad_norm": 0.6217506527900696,
"learning_rate": 0.00024399179366940208,
"loss": 0.3241,
"step": 4200
},
{
"epoch": 14.036454018227008,
"grad_norm": 0.3866030275821686,
"learning_rate": 0.00024355216881594372,
"loss": 0.3698,
"step": 4225
},
{
"epoch": 14.11930405965203,
"grad_norm": 0.30347779393196106,
"learning_rate": 0.00024311254396248532,
"loss": 0.3304,
"step": 4250
},
{
"epoch": 14.202154101077051,
"grad_norm": 0.5292103290557861,
"learning_rate": 0.00024267291910902693,
"loss": 0.3129,
"step": 4275
},
{
"epoch": 14.285004142502071,
"grad_norm": 0.7136854529380798,
"learning_rate": 0.00024223329425556854,
"loss": 0.2908,
"step": 4300
},
{
"epoch": 14.367854183927092,
"grad_norm": 0.3742729127407074,
"learning_rate": 0.00024179366940211018,
"loss": 0.3209,
"step": 4325
},
{
"epoch": 14.450704225352112,
"grad_norm": 0.33382654190063477,
"learning_rate": 0.00024135404454865182,
"loss": 0.2917,
"step": 4350
},
{
"epoch": 14.533554266777134,
"grad_norm": 0.3485744893550873,
"learning_rate": 0.00024091441969519343,
"loss": 0.3577,
"step": 4375
},
{
"epoch": 14.616404308202155,
"grad_norm": 0.6169712543487549,
"learning_rate": 0.00024047479484173503,
"loss": 0.3632,
"step": 4400
},
{
"epoch": 14.699254349627175,
"grad_norm": 0.3555282652378082,
"learning_rate": 0.00024003516998827664,
"loss": 0.3632,
"step": 4425
},
{
"epoch": 14.782104391052195,
"grad_norm": 0.3529140055179596,
"learning_rate": 0.00023959554513481825,
"loss": 0.3397,
"step": 4450
},
{
"epoch": 14.864954432477216,
"grad_norm": 0.6318807005882263,
"learning_rate": 0.00023915592028135986,
"loss": 0.3358,
"step": 4475
},
{
"epoch": 14.947804473902236,
"grad_norm": 0.6898398399353027,
"learning_rate": 0.00023871629542790153,
"loss": 0.3622,
"step": 4500
},
{
"epoch": 14.947804473902236,
"eval_cer": 0.07164902401386145,
"eval_loss": 0.2824631929397583,
"eval_runtime": 184.989,
"eval_samples_per_second": 6.541,
"eval_steps_per_second": 0.822,
"eval_wer": 0.28365180467091294,
"step": 4500
},
{
"epoch": 14.947804473902236,
"step": 4500,
"total_flos": 3.1185273254659265e+19,
"train_loss": 0.8522552142143249,
"train_runtime": 37475.2897,
"train_samples_per_second": 15.455,
"train_steps_per_second": 0.482
}
],
"logging_steps": 25,
"max_steps": 18060,
"num_input_tokens_seen": 0,
"num_train_epochs": 60,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.1185273254659265e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}