whisper-tiny-ru / trainer_state.json
rndteam41's picture
Added model
e067553 verified
{
"best_global_step": 3300,
"best_metric": 26.2158686937448,
"best_model_checkpoint": "./whisper-tiny-ru/checkpoint-3300",
"epoch": 5.28,
"eval_steps": 100,
"global_step": 3300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"grad_norm": 30.96117401123047,
"learning_rate": 4.800000000000001e-07,
"loss": 1.661016845703125,
"step": 25
},
{
"epoch": 0.08,
"grad_norm": 22.207429885864258,
"learning_rate": 9.800000000000001e-07,
"loss": 1.5516070556640624,
"step": 50
},
{
"epoch": 0.12,
"grad_norm": 21.638269424438477,
"learning_rate": 1.48e-06,
"loss": 1.33806884765625,
"step": 75
},
{
"epoch": 0.16,
"grad_norm": 21.087303161621094,
"learning_rate": 1.98e-06,
"loss": 1.0702142333984375,
"step": 100
},
{
"epoch": 0.16,
"eval_loss": 0.9838109016418457,
"eval_runtime": 1688.971,
"eval_samples_per_second": 4.732,
"eval_steps_per_second": 0.592,
"eval_wer": 58.649118826109984,
"step": 100
},
{
"epoch": 0.2,
"grad_norm": 18.6301212310791,
"learning_rate": 2.4800000000000004e-06,
"loss": 0.9264418029785156,
"step": 125
},
{
"epoch": 0.24,
"grad_norm": 17.07522964477539,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.7360333251953125,
"step": 150
},
{
"epoch": 0.28,
"grad_norm": 19.41968536376953,
"learning_rate": 3.48e-06,
"loss": 0.7099385833740235,
"step": 175
},
{
"epoch": 0.32,
"grad_norm": 16.938819885253906,
"learning_rate": 3.980000000000001e-06,
"loss": 0.700680923461914,
"step": 200
},
{
"epoch": 0.32,
"eval_loss": 0.6107771992683411,
"eval_runtime": 1633.6115,
"eval_samples_per_second": 4.893,
"eval_steps_per_second": 0.612,
"eval_wer": 45.457983511080855,
"step": 200
},
{
"epoch": 0.36,
"grad_norm": 15.689166069030762,
"learning_rate": 4.48e-06,
"loss": 0.6384918212890625,
"step": 225
},
{
"epoch": 0.4,
"grad_norm": 20.370248794555664,
"learning_rate": 4.980000000000001e-06,
"loss": 0.6048641204833984,
"step": 250
},
{
"epoch": 0.44,
"grad_norm": 11.012890815734863,
"learning_rate": 5.480000000000001e-06,
"loss": 0.5702555847167968,
"step": 275
},
{
"epoch": 0.48,
"grad_norm": 19.070072174072266,
"learning_rate": 5.98e-06,
"loss": 0.5977656555175781,
"step": 300
},
{
"epoch": 0.48,
"eval_loss": 0.532837450504303,
"eval_runtime": 1594.1788,
"eval_samples_per_second": 5.014,
"eval_steps_per_second": 0.627,
"eval_wer": 41.2702014471926,
"step": 300
},
{
"epoch": 0.52,
"grad_norm": 13.001229286193848,
"learning_rate": 6.480000000000001e-06,
"loss": 0.5543878555297852,
"step": 325
},
{
"epoch": 0.56,
"grad_norm": 19.709369659423828,
"learning_rate": 6.98e-06,
"loss": 0.545843734741211,
"step": 350
},
{
"epoch": 0.6,
"grad_norm": 15.140043258666992,
"learning_rate": 7.48e-06,
"loss": 0.5528886413574219,
"step": 375
},
{
"epoch": 0.64,
"grad_norm": 12.472454071044922,
"learning_rate": 7.980000000000002e-06,
"loss": 0.49836795806884765,
"step": 400
},
{
"epoch": 0.64,
"eval_loss": 0.4811266362667084,
"eval_runtime": 1597.9191,
"eval_samples_per_second": 5.002,
"eval_steps_per_second": 0.626,
"eval_wer": 37.518594155762294,
"step": 400
},
{
"epoch": 0.68,
"grad_norm": 15.188615798950195,
"learning_rate": 8.48e-06,
"loss": 0.48630184173583985,
"step": 425
},
{
"epoch": 0.72,
"grad_norm": 17.552886962890625,
"learning_rate": 8.98e-06,
"loss": 0.4862052917480469,
"step": 450
},
{
"epoch": 0.76,
"grad_norm": 12.936896324157715,
"learning_rate": 9.48e-06,
"loss": 0.47983272552490236,
"step": 475
},
{
"epoch": 0.8,
"grad_norm": 14.850130081176758,
"learning_rate": 9.980000000000001e-06,
"loss": 0.47797527313232424,
"step": 500
},
{
"epoch": 0.8,
"eval_loss": 0.44456836581230164,
"eval_runtime": 1586.2776,
"eval_samples_per_second": 5.039,
"eval_steps_per_second": 0.63,
"eval_wer": 35.09568111338023,
"step": 500
},
{
"epoch": 0.84,
"grad_norm": 16.829805374145508,
"learning_rate": 9.946666666666667e-06,
"loss": 0.5019921493530274,
"step": 525
},
{
"epoch": 0.88,
"grad_norm": 13.922273635864258,
"learning_rate": 9.891111111111113e-06,
"loss": 0.46826896667480467,
"step": 550
},
{
"epoch": 0.92,
"grad_norm": 15.372945785522461,
"learning_rate": 9.835555555555556e-06,
"loss": 0.42755321502685545,
"step": 575
},
{
"epoch": 0.96,
"grad_norm": 16.532119750976562,
"learning_rate": 9.780000000000001e-06,
"loss": 0.49279567718505857,
"step": 600
},
{
"epoch": 0.96,
"eval_loss": 0.4159170389175415,
"eval_runtime": 1613.2907,
"eval_samples_per_second": 4.954,
"eval_steps_per_second": 0.62,
"eval_wer": 33.749338174116936,
"step": 600
},
{
"epoch": 1.0,
"grad_norm": 17.019622802734375,
"learning_rate": 9.724444444444445e-06,
"loss": 0.3978841781616211,
"step": 625
},
{
"epoch": 1.04,
"grad_norm": 9.66409683227539,
"learning_rate": 9.66888888888889e-06,
"loss": 0.3251683807373047,
"step": 650
},
{
"epoch": 1.08,
"grad_norm": 11.652173042297363,
"learning_rate": 9.613333333333335e-06,
"loss": 0.34501224517822265,
"step": 675
},
{
"epoch": 1.12,
"grad_norm": 10.360984802246094,
"learning_rate": 9.557777777777777e-06,
"loss": 0.34316062927246094,
"step": 700
},
{
"epoch": 1.12,
"eval_loss": 0.3949244022369385,
"eval_runtime": 1589.2681,
"eval_samples_per_second": 5.029,
"eval_steps_per_second": 0.629,
"eval_wer": 32.24919950583667,
"step": 700
},
{
"epoch": 1.16,
"grad_norm": 12.48491382598877,
"learning_rate": 9.502222222222223e-06,
"loss": 0.34458335876464846,
"step": 725
},
{
"epoch": 1.2,
"grad_norm": 11.152288436889648,
"learning_rate": 9.446666666666667e-06,
"loss": 0.32888599395751955,
"step": 750
},
{
"epoch": 1.24,
"grad_norm": 10.6038818359375,
"learning_rate": 9.391111111111111e-06,
"loss": 0.31568107604980467,
"step": 775
},
{
"epoch": 1.28,
"grad_norm": 18.213455200195312,
"learning_rate": 9.335555555555557e-06,
"loss": 0.34547271728515627,
"step": 800
},
{
"epoch": 1.28,
"eval_loss": 0.38666781783103943,
"eval_runtime": 1606.6532,
"eval_samples_per_second": 4.975,
"eval_steps_per_second": 0.622,
"eval_wer": 31.583591760582912,
"step": 800
},
{
"epoch": 1.32,
"grad_norm": 13.49360466003418,
"learning_rate": 9.280000000000001e-06,
"loss": 0.3703644943237305,
"step": 825
},
{
"epoch": 1.3599999999999999,
"grad_norm": 9.994672775268555,
"learning_rate": 9.224444444444445e-06,
"loss": 0.31469236373901366,
"step": 850
},
{
"epoch": 1.4,
"grad_norm": 12.134446144104004,
"learning_rate": 9.168888888888889e-06,
"loss": 0.3096772575378418,
"step": 875
},
{
"epoch": 1.44,
"grad_norm": 9.686901092529297,
"learning_rate": 9.113333333333335e-06,
"loss": 0.30561195373535155,
"step": 900
},
{
"epoch": 1.44,
"eval_loss": 0.37363526225090027,
"eval_runtime": 1588.6091,
"eval_samples_per_second": 5.031,
"eval_steps_per_second": 0.629,
"eval_wer": 30.577616418324382,
"step": 900
},
{
"epoch": 1.48,
"grad_norm": 11.473066329956055,
"learning_rate": 9.057777777777779e-06,
"loss": 0.33498191833496094,
"step": 925
},
{
"epoch": 1.52,
"grad_norm": 10.787580490112305,
"learning_rate": 9.002222222222223e-06,
"loss": 0.33107086181640627,
"step": 950
},
{
"epoch": 1.56,
"grad_norm": 9.3997220993042,
"learning_rate": 8.946666666666669e-06,
"loss": 0.31393367767333985,
"step": 975
},
{
"epoch": 1.6,
"grad_norm": 14.870068550109863,
"learning_rate": 8.891111111111111e-06,
"loss": 0.3480434036254883,
"step": 1000
},
{
"epoch": 1.6,
"eval_loss": 0.36670026183128357,
"eval_runtime": 1580.5347,
"eval_samples_per_second": 5.057,
"eval_steps_per_second": 0.633,
"eval_wer": 30.19438771651161,
"step": 1000
},
{
"epoch": 1.6400000000000001,
"grad_norm": 15.575242042541504,
"learning_rate": 8.835555555555557e-06,
"loss": 0.3521144485473633,
"step": 1025
},
{
"epoch": 1.6800000000000002,
"grad_norm": 13.404891014099121,
"learning_rate": 8.78e-06,
"loss": 0.3038243865966797,
"step": 1050
},
{
"epoch": 1.72,
"grad_norm": 11.040489196777344,
"learning_rate": 8.724444444444445e-06,
"loss": 0.33296077728271484,
"step": 1075
},
{
"epoch": 1.76,
"grad_norm": 11.390976905822754,
"learning_rate": 8.66888888888889e-06,
"loss": 0.3266580581665039,
"step": 1100
},
{
"epoch": 1.76,
"eval_loss": 0.3538387417793274,
"eval_runtime": 1245.1395,
"eval_samples_per_second": 6.419,
"eval_steps_per_second": 0.803,
"eval_wer": 29.1329450621486,
"step": 1100
},
{
"epoch": 1.8,
"grad_norm": 9.734987258911133,
"learning_rate": 8.613333333333333e-06,
"loss": 0.30483461380004884,
"step": 1125
},
{
"epoch": 1.8399999999999999,
"grad_norm": 13.66518497467041,
"learning_rate": 8.557777777777778e-06,
"loss": 0.337967643737793,
"step": 1150
},
{
"epoch": 1.88,
"grad_norm": 14.166866302490234,
"learning_rate": 8.502222222222223e-06,
"loss": 0.33347091674804685,
"step": 1175
},
{
"epoch": 1.92,
"grad_norm": 10.097210884094238,
"learning_rate": 8.446666666666668e-06,
"loss": 0.30127151489257814,
"step": 1200
},
{
"epoch": 1.92,
"eval_loss": 0.3475528061389923,
"eval_runtime": 1114.263,
"eval_samples_per_second": 7.173,
"eval_steps_per_second": 0.897,
"eval_wer": 28.73206767012077,
"step": 1200
},
{
"epoch": 1.96,
"grad_norm": 8.526341438293457,
"learning_rate": 8.391111111111112e-06,
"loss": 0.30967933654785157,
"step": 1225
},
{
"epoch": 2.0,
"grad_norm": 13.744101524353027,
"learning_rate": 8.335555555555556e-06,
"loss": 0.2926918983459473,
"step": 1250
},
{
"epoch": 2.04,
"grad_norm": 12.457859992980957,
"learning_rate": 8.28e-06,
"loss": 0.19928192138671874,
"step": 1275
},
{
"epoch": 2.08,
"grad_norm": 10.412860870361328,
"learning_rate": 8.224444444444444e-06,
"loss": 0.2255691719055176,
"step": 1300
},
{
"epoch": 2.08,
"eval_loss": 0.3463591933250427,
"eval_runtime": 1136.9055,
"eval_samples_per_second": 7.03,
"eval_steps_per_second": 0.88,
"eval_wer": 28.709376496987115,
"step": 1300
},
{
"epoch": 2.12,
"grad_norm": 12.349778175354004,
"learning_rate": 8.16888888888889e-06,
"loss": 0.2224934768676758,
"step": 1325
},
{
"epoch": 2.16,
"grad_norm": 7.292425155639648,
"learning_rate": 8.113333333333334e-06,
"loss": 0.20890113830566406,
"step": 1350
},
{
"epoch": 2.2,
"grad_norm": 7.30359411239624,
"learning_rate": 8.057777777777778e-06,
"loss": 0.22248428344726562,
"step": 1375
},
{
"epoch": 2.24,
"grad_norm": 6.8495683670043945,
"learning_rate": 8.002222222222222e-06,
"loss": 0.1987138557434082,
"step": 1400
},
{
"epoch": 2.24,
"eval_loss": 0.34195244312286377,
"eval_runtime": 1142.688,
"eval_samples_per_second": 6.995,
"eval_steps_per_second": 0.875,
"eval_wer": 28.3009353805814,
"step": 1400
},
{
"epoch": 2.2800000000000002,
"grad_norm": 9.956233024597168,
"learning_rate": 7.946666666666666e-06,
"loss": 0.18319826126098632,
"step": 1425
},
{
"epoch": 2.32,
"grad_norm": 9.506035804748535,
"learning_rate": 7.891111111111112e-06,
"loss": 0.2086960792541504,
"step": 1450
},
{
"epoch": 2.36,
"grad_norm": 9.610784530639648,
"learning_rate": 7.835555555555556e-06,
"loss": 0.20998584747314453,
"step": 1475
},
{
"epoch": 2.4,
"grad_norm": 10.06142807006836,
"learning_rate": 7.78e-06,
"loss": 0.19923351287841798,
"step": 1500
},
{
"epoch": 2.4,
"eval_loss": 0.337080180644989,
"eval_runtime": 1129.0462,
"eval_samples_per_second": 7.079,
"eval_steps_per_second": 0.886,
"eval_wer": 28.12444847843078,
"step": 1500
},
{
"epoch": 2.44,
"grad_norm": 10.249608993530273,
"learning_rate": 7.724444444444446e-06,
"loss": 0.20815914154052734,
"step": 1525
},
{
"epoch": 2.48,
"grad_norm": 8.729615211486816,
"learning_rate": 7.66888888888889e-06,
"loss": 0.19476179122924805,
"step": 1550
},
{
"epoch": 2.52,
"grad_norm": 7.07798957824707,
"learning_rate": 7.613333333333334e-06,
"loss": 0.20427942276000977,
"step": 1575
},
{
"epoch": 2.56,
"grad_norm": 12.55591106414795,
"learning_rate": 7.557777777777779e-06,
"loss": 0.19880136489868164,
"step": 1600
},
{
"epoch": 2.56,
"eval_loss": 0.3345155715942383,
"eval_runtime": 1157.6671,
"eval_samples_per_second": 6.904,
"eval_steps_per_second": 0.864,
"eval_wer": 27.466404457554898,
"step": 1600
},
{
"epoch": 2.6,
"grad_norm": 9.194686889648438,
"learning_rate": 7.502222222222223e-06,
"loss": 0.20003116607666016,
"step": 1625
},
{
"epoch": 2.64,
"grad_norm": 8.028614044189453,
"learning_rate": 7.446666666666668e-06,
"loss": 0.20664962768554687,
"step": 1650
},
{
"epoch": 2.68,
"grad_norm": 9.309157371520996,
"learning_rate": 7.3911111111111125e-06,
"loss": 0.2059481430053711,
"step": 1675
},
{
"epoch": 2.7199999999999998,
"grad_norm": 7.072760105133057,
"learning_rate": 7.335555555555556e-06,
"loss": 0.18960922241210937,
"step": 1700
},
{
"epoch": 2.7199999999999998,
"eval_loss": 0.3304011821746826,
"eval_runtime": 1136.784,
"eval_samples_per_second": 7.031,
"eval_steps_per_second": 0.88,
"eval_wer": 27.48405314776996,
"step": 1700
},
{
"epoch": 2.76,
"grad_norm": 10.164315223693848,
"learning_rate": 7.280000000000001e-06,
"loss": 0.20121437072753906,
"step": 1725
},
{
"epoch": 2.8,
"grad_norm": 11.21286392211914,
"learning_rate": 7.224444444444445e-06,
"loss": 0.2160506057739258,
"step": 1750
},
{
"epoch": 2.84,
"grad_norm": 7.09088659286499,
"learning_rate": 7.1688888888888895e-06,
"loss": 0.1943138313293457,
"step": 1775
},
{
"epoch": 2.88,
"grad_norm": 7.879263401031494,
"learning_rate": 7.113333333333334e-06,
"loss": 0.193405818939209,
"step": 1800
},
{
"epoch": 2.88,
"eval_loss": 0.3283212184906006,
"eval_runtime": 1146.053,
"eval_samples_per_second": 6.974,
"eval_steps_per_second": 0.873,
"eval_wer": 27.456319491717725,
"step": 1800
},
{
"epoch": 2.92,
"grad_norm": 10.002179145812988,
"learning_rate": 7.057777777777778e-06,
"loss": 0.18917253494262695,
"step": 1825
},
{
"epoch": 2.96,
"grad_norm": 9.466012954711914,
"learning_rate": 7.0022222222222225e-06,
"loss": 0.19346149444580077,
"step": 1850
},
{
"epoch": 3.0,
"grad_norm": 14.86670207977295,
"learning_rate": 6.946666666666667e-06,
"loss": 0.20311836242675782,
"step": 1875
},
{
"epoch": 3.04,
"grad_norm": 7.088613986968994,
"learning_rate": 6.891111111111111e-06,
"loss": 0.12550613403320313,
"step": 1900
},
{
"epoch": 3.04,
"eval_loss": 0.326405793428421,
"eval_runtime": 1140.5284,
"eval_samples_per_second": 7.008,
"eval_steps_per_second": 0.877,
"eval_wer": 27.247056450596276,
"step": 1900
},
{
"epoch": 3.08,
"grad_norm": 5.738883972167969,
"learning_rate": 6.835555555555556e-06,
"loss": 0.1307435894012451,
"step": 1925
},
{
"epoch": 3.12,
"grad_norm": 5.431838035583496,
"learning_rate": 6.780000000000001e-06,
"loss": 0.11987467765808106,
"step": 1950
},
{
"epoch": 3.16,
"grad_norm": 8.73540210723877,
"learning_rate": 6.724444444444444e-06,
"loss": 0.1516973114013672,
"step": 1975
},
{
"epoch": 3.2,
"grad_norm": 6.3792724609375,
"learning_rate": 6.668888888888889e-06,
"loss": 0.13660179138183592,
"step": 2000
},
{
"epoch": 3.2,
"eval_loss": 0.32666918635368347,
"eval_runtime": 1124.088,
"eval_samples_per_second": 7.111,
"eval_steps_per_second": 0.89,
"eval_wer": 27.363033557723824,
"step": 2000
},
{
"epoch": 3.24,
"grad_norm": 5.221762657165527,
"learning_rate": 6.613333333333334e-06,
"loss": 0.12183536529541016,
"step": 2025
},
{
"epoch": 3.2800000000000002,
"grad_norm": 7.180768013000488,
"learning_rate": 6.557777777777778e-06,
"loss": 0.1264752769470215,
"step": 2050
},
{
"epoch": 3.32,
"grad_norm": 8.103682518005371,
"learning_rate": 6.502222222222223e-06,
"loss": 0.14041830062866212,
"step": 2075
},
{
"epoch": 3.36,
"grad_norm": 6.988570690155029,
"learning_rate": 6.446666666666668e-06,
"loss": 0.14171558380126953,
"step": 2100
},
{
"epoch": 3.36,
"eval_loss": 0.3258770704269409,
"eval_runtime": 1142.5612,
"eval_samples_per_second": 6.996,
"eval_steps_per_second": 0.875,
"eval_wer": 27.148728033683785,
"step": 2100
},
{
"epoch": 3.4,
"grad_norm": 7.272939205169678,
"learning_rate": 6.391111111111111e-06,
"loss": 0.12976963996887206,
"step": 2125
},
{
"epoch": 3.44,
"grad_norm": 9.169845581054688,
"learning_rate": 6.335555555555556e-06,
"loss": 0.13874659538269044,
"step": 2150
},
{
"epoch": 3.48,
"grad_norm": 9.13535213470459,
"learning_rate": 6.280000000000001e-06,
"loss": 0.1423179054260254,
"step": 2175
},
{
"epoch": 3.52,
"grad_norm": 5.841824531555176,
"learning_rate": 6.224444444444445e-06,
"loss": 0.12778244972229003,
"step": 2200
},
{
"epoch": 3.52,
"eval_loss": 0.32502686977386475,
"eval_runtime": 1129.5273,
"eval_samples_per_second": 7.076,
"eval_steps_per_second": 0.885,
"eval_wer": 27.098303204497892,
"step": 2200
},
{
"epoch": 3.56,
"grad_norm": 6.991465091705322,
"learning_rate": 6.16888888888889e-06,
"loss": 0.1317989444732666,
"step": 2225
},
{
"epoch": 3.6,
"grad_norm": 8.489235877990723,
"learning_rate": 6.113333333333333e-06,
"loss": 0.12462780952453613,
"step": 2250
},
{
"epoch": 3.64,
"grad_norm": 8.89243221282959,
"learning_rate": 6.057777777777778e-06,
"loss": 0.11276106834411621,
"step": 2275
},
{
"epoch": 3.68,
"grad_norm": 7.854825019836426,
"learning_rate": 6.002222222222223e-06,
"loss": 0.128636474609375,
"step": 2300
},
{
"epoch": 3.68,
"eval_loss": 0.32361486554145813,
"eval_runtime": 1145.2768,
"eval_samples_per_second": 6.979,
"eval_steps_per_second": 0.873,
"eval_wer": 27.141164309305903,
"step": 2300
},
{
"epoch": 3.7199999999999998,
"grad_norm": 10.046810150146484,
"learning_rate": 5.946666666666668e-06,
"loss": 0.13479949951171874,
"step": 2325
},
{
"epoch": 3.76,
"grad_norm": 6.566898345947266,
"learning_rate": 5.891111111111112e-06,
"loss": 0.13264819145202636,
"step": 2350
},
{
"epoch": 3.8,
"grad_norm": 6.007510662078857,
"learning_rate": 5.8355555555555565e-06,
"loss": 0.11804925918579101,
"step": 2375
},
{
"epoch": 3.84,
"grad_norm": 6.695367336273193,
"learning_rate": 5.78e-06,
"loss": 0.12892417907714843,
"step": 2400
},
{
"epoch": 3.84,
"eval_loss": 0.32250407338142395,
"eval_runtime": 1145.1862,
"eval_samples_per_second": 6.98,
"eval_steps_per_second": 0.873,
"eval_wer": 26.57640622242392,
"step": 2400
},
{
"epoch": 3.88,
"grad_norm": 8.012511253356934,
"learning_rate": 5.724444444444445e-06,
"loss": 0.13116491317749024,
"step": 2425
},
{
"epoch": 3.92,
"grad_norm": 7.509751319885254,
"learning_rate": 5.6688888888888895e-06,
"loss": 0.1309671401977539,
"step": 2450
},
{
"epoch": 3.96,
"grad_norm": 9.579854011535645,
"learning_rate": 5.613333333333334e-06,
"loss": 0.12149713516235351,
"step": 2475
},
{
"epoch": 4.0,
"grad_norm": 16.018325805664062,
"learning_rate": 5.557777777777778e-06,
"loss": 0.1331118392944336,
"step": 2500
},
{
"epoch": 4.0,
"eval_loss": 0.3214564025402069,
"eval_runtime": 1135.6798,
"eval_samples_per_second": 7.038,
"eval_steps_per_second": 0.881,
"eval_wer": 27.128558102009432,
"step": 2500
},
{
"epoch": 4.04,
"grad_norm": 3.181704521179199,
"learning_rate": 5.5022222222222224e-06,
"loss": 0.08591601371765137,
"step": 2525
},
{
"epoch": 4.08,
"grad_norm": 6.204383373260498,
"learning_rate": 5.4466666666666665e-06,
"loss": 0.08950037002563477,
"step": 2550
},
{
"epoch": 4.12,
"grad_norm": 6.119636535644531,
"learning_rate": 5.391111111111111e-06,
"loss": 0.0826924991607666,
"step": 2575
},
{
"epoch": 4.16,
"grad_norm": 6.250202178955078,
"learning_rate": 5.335555555555556e-06,
"loss": 0.07985872268676758,
"step": 2600
},
{
"epoch": 4.16,
"eval_loss": 0.324444979429245,
"eval_runtime": 1147.6894,
"eval_samples_per_second": 6.964,
"eval_steps_per_second": 0.871,
"eval_wer": 26.546151324912387,
"step": 2600
},
{
"epoch": 4.2,
"grad_norm": 4.202062606811523,
"learning_rate": 5.28e-06,
"loss": 0.07818631649017334,
"step": 2625
},
{
"epoch": 4.24,
"grad_norm": 8.979434967041016,
"learning_rate": 5.224444444444445e-06,
"loss": 0.08032341957092286,
"step": 2650
},
{
"epoch": 4.28,
"grad_norm": 5.299781799316406,
"learning_rate": 5.168888888888889e-06,
"loss": 0.08594310760498047,
"step": 2675
},
{
"epoch": 4.32,
"grad_norm": 4.9248762130737305,
"learning_rate": 5.113333333333333e-06,
"loss": 0.08457598686218262,
"step": 2700
},
{
"epoch": 4.32,
"eval_loss": 0.32594889402389526,
"eval_runtime": 1130.0663,
"eval_samples_per_second": 7.073,
"eval_steps_per_second": 0.885,
"eval_wer": 26.546151324912387,
"step": 2700
},
{
"epoch": 4.36,
"grad_norm": 10.462182998657227,
"learning_rate": 5.057777777777778e-06,
"loss": 0.08635611534118652,
"step": 2725
},
{
"epoch": 4.4,
"grad_norm": 6.411299705505371,
"learning_rate": 5.002222222222223e-06,
"loss": 0.08314334869384765,
"step": 2750
},
{
"epoch": 4.44,
"grad_norm": 6.515404224395752,
"learning_rate": 4.946666666666667e-06,
"loss": 0.09247981071472168,
"step": 2775
},
{
"epoch": 4.48,
"grad_norm": 9.802311897277832,
"learning_rate": 4.891111111111111e-06,
"loss": 0.09131069183349609,
"step": 2800
},
{
"epoch": 4.48,
"eval_loss": 0.32451489567756653,
"eval_runtime": 1144.7419,
"eval_samples_per_second": 6.982,
"eval_steps_per_second": 0.874,
"eval_wer": 26.392355595895417,
"step": 2800
},
{
"epoch": 4.52,
"grad_norm": 7.073087215423584,
"learning_rate": 4.835555555555556e-06,
"loss": 0.08045567512512207,
"step": 2825
},
{
"epoch": 4.5600000000000005,
"grad_norm": 6.24620246887207,
"learning_rate": 4.78e-06,
"loss": 0.09462824821472168,
"step": 2850
},
{
"epoch": 4.6,
"grad_norm": 4.449136734008789,
"learning_rate": 4.724444444444445e-06,
"loss": 0.08349855422973633,
"step": 2875
},
{
"epoch": 4.64,
"grad_norm": 3.942056894302368,
"learning_rate": 4.66888888888889e-06,
"loss": 0.07720262527465821,
"step": 2900
},
{
"epoch": 4.64,
"eval_loss": 0.32689812779426575,
"eval_runtime": 1153.1464,
"eval_samples_per_second": 6.931,
"eval_steps_per_second": 0.867,
"eval_wer": 27.04031465093412,
"step": 2900
},
{
"epoch": 4.68,
"grad_norm": 5.481267929077148,
"learning_rate": 4.613333333333334e-06,
"loss": 0.09236433029174805,
"step": 2925
},
{
"epoch": 4.72,
"grad_norm": 7.2641215324401855,
"learning_rate": 4.557777777777778e-06,
"loss": 0.09008319854736328,
"step": 2950
},
{
"epoch": 4.76,
"grad_norm": 8.626544952392578,
"learning_rate": 4.502222222222223e-06,
"loss": 0.09662159919738769,
"step": 2975
},
{
"epoch": 4.8,
"grad_norm": 7.221775531768799,
"learning_rate": 4.446666666666667e-06,
"loss": 0.08148813247680664,
"step": 3000
},
{
"epoch": 4.8,
"eval_loss": 0.32437387108802795,
"eval_runtime": 1136.3082,
"eval_samples_per_second": 7.034,
"eval_steps_per_second": 0.88,
"eval_wer": 26.768020573330308,
"step": 3000
},
{
"epoch": 4.84,
"grad_norm": 3.961613655090332,
"learning_rate": 4.391111111111112e-06,
"loss": 0.07602582931518555,
"step": 3025
},
{
"epoch": 4.88,
"grad_norm": 11.219801902770996,
"learning_rate": 4.3355555555555565e-06,
"loss": 0.0879791259765625,
"step": 3050
},
{
"epoch": 4.92,
"grad_norm": 5.104950904846191,
"learning_rate": 4.2800000000000005e-06,
"loss": 0.08852799415588379,
"step": 3075
},
{
"epoch": 4.96,
"grad_norm": 5.801946640014648,
"learning_rate": 4.2244444444444446e-06,
"loss": 0.07789647579193115,
"step": 3100
},
{
"epoch": 4.96,
"eval_loss": 0.32314595580101013,
"eval_runtime": 1146.6672,
"eval_samples_per_second": 6.971,
"eval_steps_per_second": 0.872,
"eval_wer": 26.296548420442228,
"step": 3100
},
{
"epoch": 5.0,
"grad_norm": 7.96620512008667,
"learning_rate": 4.168888888888889e-06,
"loss": 0.08891249656677246,
"step": 3125
},
{
"epoch": 5.04,
"grad_norm": 2.248185396194458,
"learning_rate": 4.1133333333333335e-06,
"loss": 0.06223374366760254,
"step": 3150
},
{
"epoch": 5.08,
"grad_norm": 3.364957571029663,
"learning_rate": 4.057777777777778e-06,
"loss": 0.058481874465942385,
"step": 3175
},
{
"epoch": 5.12,
"grad_norm": 3.7165310382843018,
"learning_rate": 4.002222222222222e-06,
"loss": 0.06104232311248779,
"step": 3200
},
{
"epoch": 5.12,
"eval_loss": 0.32432663440704346,
"eval_runtime": 1128.5261,
"eval_samples_per_second": 7.083,
"eval_steps_per_second": 0.886,
"eval_wer": 26.44278042508131,
"step": 3200
},
{
"epoch": 5.16,
"grad_norm": 5.533367156982422,
"learning_rate": 3.946666666666667e-06,
"loss": 0.06542285442352296,
"step": 3225
},
{
"epoch": 5.2,
"grad_norm": 3.9828567504882812,
"learning_rate": 3.891111111111111e-06,
"loss": 0.05872833728790283,
"step": 3250
},
{
"epoch": 5.24,
"grad_norm": 4.2224249839782715,
"learning_rate": 3.835555555555555e-06,
"loss": 0.05860544204711914,
"step": 3275
},
{
"epoch": 5.28,
"grad_norm": 4.558178901672363,
"learning_rate": 3.7800000000000002e-06,
"loss": 0.055550127029418944,
"step": 3300
},
{
"epoch": 5.28,
"eval_loss": 0.3261101543903351,
"eval_runtime": 1144.1278,
"eval_samples_per_second": 6.986,
"eval_steps_per_second": 0.874,
"eval_wer": 26.2158686937448,
"step": 3300
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.2991385677824e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}