{ "best_global_step": 3300, "best_metric": 26.2158686937448, "best_model_checkpoint": "./whisper-tiny-ru/checkpoint-3300", "epoch": 5.28, "eval_steps": 100, "global_step": 3300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 30.96117401123047, "learning_rate": 4.800000000000001e-07, "loss": 1.661016845703125, "step": 25 }, { "epoch": 0.08, "grad_norm": 22.207429885864258, "learning_rate": 9.800000000000001e-07, "loss": 1.5516070556640624, "step": 50 }, { "epoch": 0.12, "grad_norm": 21.638269424438477, "learning_rate": 1.48e-06, "loss": 1.33806884765625, "step": 75 }, { "epoch": 0.16, "grad_norm": 21.087303161621094, "learning_rate": 1.98e-06, "loss": 1.0702142333984375, "step": 100 }, { "epoch": 0.16, "eval_loss": 0.9838109016418457, "eval_runtime": 1688.971, "eval_samples_per_second": 4.732, "eval_steps_per_second": 0.592, "eval_wer": 58.649118826109984, "step": 100 }, { "epoch": 0.2, "grad_norm": 18.6301212310791, "learning_rate": 2.4800000000000004e-06, "loss": 0.9264418029785156, "step": 125 }, { "epoch": 0.24, "grad_norm": 17.07522964477539, "learning_rate": 2.9800000000000003e-06, "loss": 0.7360333251953125, "step": 150 }, { "epoch": 0.28, "grad_norm": 19.41968536376953, "learning_rate": 3.48e-06, "loss": 0.7099385833740235, "step": 175 }, { "epoch": 0.32, "grad_norm": 16.938819885253906, "learning_rate": 3.980000000000001e-06, "loss": 0.700680923461914, "step": 200 }, { "epoch": 0.32, "eval_loss": 0.6107771992683411, "eval_runtime": 1633.6115, "eval_samples_per_second": 4.893, "eval_steps_per_second": 0.612, "eval_wer": 45.457983511080855, "step": 200 }, { "epoch": 0.36, "grad_norm": 15.689166069030762, "learning_rate": 4.48e-06, "loss": 0.6384918212890625, "step": 225 }, { "epoch": 0.4, "grad_norm": 20.370248794555664, "learning_rate": 4.980000000000001e-06, "loss": 0.6048641204833984, "step": 250 }, { "epoch": 0.44, "grad_norm": 11.012890815734863, "learning_rate": 5.480000000000001e-06, "loss": 0.5702555847167968, "step": 275 }, { "epoch": 0.48, "grad_norm": 19.070072174072266, "learning_rate": 5.98e-06, "loss": 0.5977656555175781, "step": 300 }, { "epoch": 0.48, "eval_loss": 0.532837450504303, "eval_runtime": 1594.1788, "eval_samples_per_second": 5.014, "eval_steps_per_second": 0.627, "eval_wer": 41.2702014471926, "step": 300 }, { "epoch": 0.52, "grad_norm": 13.001229286193848, "learning_rate": 6.480000000000001e-06, "loss": 0.5543878555297852, "step": 325 }, { "epoch": 0.56, "grad_norm": 19.709369659423828, "learning_rate": 6.98e-06, "loss": 0.545843734741211, "step": 350 }, { "epoch": 0.6, "grad_norm": 15.140043258666992, "learning_rate": 7.48e-06, "loss": 0.5528886413574219, "step": 375 }, { "epoch": 0.64, "grad_norm": 12.472454071044922, "learning_rate": 7.980000000000002e-06, "loss": 0.49836795806884765, "step": 400 }, { "epoch": 0.64, "eval_loss": 0.4811266362667084, "eval_runtime": 1597.9191, "eval_samples_per_second": 5.002, "eval_steps_per_second": 0.626, "eval_wer": 37.518594155762294, "step": 400 }, { "epoch": 0.68, "grad_norm": 15.188615798950195, "learning_rate": 8.48e-06, "loss": 0.48630184173583985, "step": 425 }, { "epoch": 0.72, "grad_norm": 17.552886962890625, "learning_rate": 8.98e-06, "loss": 0.4862052917480469, "step": 450 }, { "epoch": 0.76, "grad_norm": 12.936896324157715, "learning_rate": 9.48e-06, "loss": 0.47983272552490236, "step": 475 }, { "epoch": 0.8, "grad_norm": 14.850130081176758, "learning_rate": 9.980000000000001e-06, "loss": 0.47797527313232424, "step": 500 }, { "epoch": 0.8, "eval_loss": 0.44456836581230164, "eval_runtime": 1586.2776, "eval_samples_per_second": 5.039, "eval_steps_per_second": 0.63, "eval_wer": 35.09568111338023, "step": 500 }, { "epoch": 0.84, "grad_norm": 16.829805374145508, "learning_rate": 9.946666666666667e-06, "loss": 0.5019921493530274, "step": 525 }, { "epoch": 0.88, "grad_norm": 13.922273635864258, "learning_rate": 9.891111111111113e-06, "loss": 0.46826896667480467, "step": 550 }, { "epoch": 0.92, "grad_norm": 15.372945785522461, "learning_rate": 9.835555555555556e-06, "loss": 0.42755321502685545, "step": 575 }, { "epoch": 0.96, "grad_norm": 16.532119750976562, "learning_rate": 9.780000000000001e-06, "loss": 0.49279567718505857, "step": 600 }, { "epoch": 0.96, "eval_loss": 0.4159170389175415, "eval_runtime": 1613.2907, "eval_samples_per_second": 4.954, "eval_steps_per_second": 0.62, "eval_wer": 33.749338174116936, "step": 600 }, { "epoch": 1.0, "grad_norm": 17.019622802734375, "learning_rate": 9.724444444444445e-06, "loss": 0.3978841781616211, "step": 625 }, { "epoch": 1.04, "grad_norm": 9.66409683227539, "learning_rate": 9.66888888888889e-06, "loss": 0.3251683807373047, "step": 650 }, { "epoch": 1.08, "grad_norm": 11.652173042297363, "learning_rate": 9.613333333333335e-06, "loss": 0.34501224517822265, "step": 675 }, { "epoch": 1.12, "grad_norm": 10.360984802246094, "learning_rate": 9.557777777777777e-06, "loss": 0.34316062927246094, "step": 700 }, { "epoch": 1.12, "eval_loss": 0.3949244022369385, "eval_runtime": 1589.2681, "eval_samples_per_second": 5.029, "eval_steps_per_second": 0.629, "eval_wer": 32.24919950583667, "step": 700 }, { "epoch": 1.16, "grad_norm": 12.48491382598877, "learning_rate": 9.502222222222223e-06, "loss": 0.34458335876464846, "step": 725 }, { "epoch": 1.2, "grad_norm": 11.152288436889648, "learning_rate": 9.446666666666667e-06, "loss": 0.32888599395751955, "step": 750 }, { "epoch": 1.24, "grad_norm": 10.6038818359375, "learning_rate": 9.391111111111111e-06, "loss": 0.31568107604980467, "step": 775 }, { "epoch": 1.28, "grad_norm": 18.213455200195312, "learning_rate": 9.335555555555557e-06, "loss": 0.34547271728515627, "step": 800 }, { "epoch": 1.28, "eval_loss": 0.38666781783103943, "eval_runtime": 1606.6532, "eval_samples_per_second": 4.975, "eval_steps_per_second": 0.622, "eval_wer": 31.583591760582912, "step": 800 }, { "epoch": 1.32, "grad_norm": 13.49360466003418, "learning_rate": 9.280000000000001e-06, "loss": 0.3703644943237305, "step": 825 }, { "epoch": 1.3599999999999999, "grad_norm": 9.994672775268555, "learning_rate": 9.224444444444445e-06, "loss": 0.31469236373901366, "step": 850 }, { "epoch": 1.4, "grad_norm": 12.134446144104004, "learning_rate": 9.168888888888889e-06, "loss": 0.3096772575378418, "step": 875 }, { "epoch": 1.44, "grad_norm": 9.686901092529297, "learning_rate": 9.113333333333335e-06, "loss": 0.30561195373535155, "step": 900 }, { "epoch": 1.44, "eval_loss": 0.37363526225090027, "eval_runtime": 1588.6091, "eval_samples_per_second": 5.031, "eval_steps_per_second": 0.629, "eval_wer": 30.577616418324382, "step": 900 }, { "epoch": 1.48, "grad_norm": 11.473066329956055, "learning_rate": 9.057777777777779e-06, "loss": 0.33498191833496094, "step": 925 }, { "epoch": 1.52, "grad_norm": 10.787580490112305, "learning_rate": 9.002222222222223e-06, "loss": 0.33107086181640627, "step": 950 }, { "epoch": 1.56, "grad_norm": 9.3997220993042, "learning_rate": 8.946666666666669e-06, "loss": 0.31393367767333985, "step": 975 }, { "epoch": 1.6, "grad_norm": 14.870068550109863, "learning_rate": 8.891111111111111e-06, "loss": 0.3480434036254883, "step": 1000 }, { "epoch": 1.6, "eval_loss": 0.36670026183128357, "eval_runtime": 1580.5347, "eval_samples_per_second": 5.057, "eval_steps_per_second": 0.633, "eval_wer": 30.19438771651161, "step": 1000 }, { "epoch": 1.6400000000000001, "grad_norm": 15.575242042541504, "learning_rate": 8.835555555555557e-06, "loss": 0.3521144485473633, "step": 1025 }, { "epoch": 1.6800000000000002, "grad_norm": 13.404891014099121, "learning_rate": 8.78e-06, "loss": 0.3038243865966797, "step": 1050 }, { "epoch": 1.72, "grad_norm": 11.040489196777344, "learning_rate": 8.724444444444445e-06, "loss": 0.33296077728271484, "step": 1075 }, { "epoch": 1.76, "grad_norm": 11.390976905822754, "learning_rate": 8.66888888888889e-06, "loss": 0.3266580581665039, "step": 1100 }, { "epoch": 1.76, "eval_loss": 0.3538387417793274, "eval_runtime": 1245.1395, "eval_samples_per_second": 6.419, "eval_steps_per_second": 0.803, "eval_wer": 29.1329450621486, "step": 1100 }, { "epoch": 1.8, "grad_norm": 9.734987258911133, "learning_rate": 8.613333333333333e-06, "loss": 0.30483461380004884, "step": 1125 }, { "epoch": 1.8399999999999999, "grad_norm": 13.66518497467041, "learning_rate": 8.557777777777778e-06, "loss": 0.337967643737793, "step": 1150 }, { "epoch": 1.88, "grad_norm": 14.166866302490234, "learning_rate": 8.502222222222223e-06, "loss": 0.33347091674804685, "step": 1175 }, { "epoch": 1.92, "grad_norm": 10.097210884094238, "learning_rate": 8.446666666666668e-06, "loss": 0.30127151489257814, "step": 1200 }, { "epoch": 1.92, "eval_loss": 0.3475528061389923, "eval_runtime": 1114.263, "eval_samples_per_second": 7.173, "eval_steps_per_second": 0.897, "eval_wer": 28.73206767012077, "step": 1200 }, { "epoch": 1.96, "grad_norm": 8.526341438293457, "learning_rate": 8.391111111111112e-06, "loss": 0.30967933654785157, "step": 1225 }, { "epoch": 2.0, "grad_norm": 13.744101524353027, "learning_rate": 8.335555555555556e-06, "loss": 0.2926918983459473, "step": 1250 }, { "epoch": 2.04, "grad_norm": 12.457859992980957, "learning_rate": 8.28e-06, "loss": 0.19928192138671874, "step": 1275 }, { "epoch": 2.08, "grad_norm": 10.412860870361328, "learning_rate": 8.224444444444444e-06, "loss": 0.2255691719055176, "step": 1300 }, { "epoch": 2.08, "eval_loss": 0.3463591933250427, "eval_runtime": 1136.9055, "eval_samples_per_second": 7.03, "eval_steps_per_second": 0.88, "eval_wer": 28.709376496987115, "step": 1300 }, { "epoch": 2.12, "grad_norm": 12.349778175354004, "learning_rate": 8.16888888888889e-06, "loss": 0.2224934768676758, "step": 1325 }, { "epoch": 2.16, "grad_norm": 7.292425155639648, "learning_rate": 8.113333333333334e-06, "loss": 0.20890113830566406, "step": 1350 }, { "epoch": 2.2, "grad_norm": 7.30359411239624, "learning_rate": 8.057777777777778e-06, "loss": 0.22248428344726562, "step": 1375 }, { "epoch": 2.24, "grad_norm": 6.8495683670043945, "learning_rate": 8.002222222222222e-06, "loss": 0.1987138557434082, "step": 1400 }, { "epoch": 2.24, "eval_loss": 0.34195244312286377, "eval_runtime": 1142.688, "eval_samples_per_second": 6.995, "eval_steps_per_second": 0.875, "eval_wer": 28.3009353805814, "step": 1400 }, { "epoch": 2.2800000000000002, "grad_norm": 9.956233024597168, "learning_rate": 7.946666666666666e-06, "loss": 0.18319826126098632, "step": 1425 }, { "epoch": 2.32, "grad_norm": 9.506035804748535, "learning_rate": 7.891111111111112e-06, "loss": 0.2086960792541504, "step": 1450 }, { "epoch": 2.36, "grad_norm": 9.610784530639648, "learning_rate": 7.835555555555556e-06, "loss": 0.20998584747314453, "step": 1475 }, { "epoch": 2.4, "grad_norm": 10.06142807006836, "learning_rate": 7.78e-06, "loss": 0.19923351287841798, "step": 1500 }, { "epoch": 2.4, "eval_loss": 0.337080180644989, "eval_runtime": 1129.0462, "eval_samples_per_second": 7.079, "eval_steps_per_second": 0.886, "eval_wer": 28.12444847843078, "step": 1500 }, { "epoch": 2.44, "grad_norm": 10.249608993530273, "learning_rate": 7.724444444444446e-06, "loss": 0.20815914154052734, "step": 1525 }, { "epoch": 2.48, "grad_norm": 8.729615211486816, "learning_rate": 7.66888888888889e-06, "loss": 0.19476179122924805, "step": 1550 }, { "epoch": 2.52, "grad_norm": 7.07798957824707, "learning_rate": 7.613333333333334e-06, "loss": 0.20427942276000977, "step": 1575 }, { "epoch": 2.56, "grad_norm": 12.55591106414795, "learning_rate": 7.557777777777779e-06, "loss": 0.19880136489868164, "step": 1600 }, { "epoch": 2.56, "eval_loss": 0.3345155715942383, "eval_runtime": 1157.6671, "eval_samples_per_second": 6.904, "eval_steps_per_second": 0.864, "eval_wer": 27.466404457554898, "step": 1600 }, { "epoch": 2.6, "grad_norm": 9.194686889648438, "learning_rate": 7.502222222222223e-06, "loss": 0.20003116607666016, "step": 1625 }, { "epoch": 2.64, "grad_norm": 8.028614044189453, "learning_rate": 7.446666666666668e-06, "loss": 0.20664962768554687, "step": 1650 }, { "epoch": 2.68, "grad_norm": 9.309157371520996, "learning_rate": 7.3911111111111125e-06, "loss": 0.2059481430053711, "step": 1675 }, { "epoch": 2.7199999999999998, "grad_norm": 7.072760105133057, "learning_rate": 7.335555555555556e-06, "loss": 0.18960922241210937, "step": 1700 }, { "epoch": 2.7199999999999998, "eval_loss": 0.3304011821746826, "eval_runtime": 1136.784, "eval_samples_per_second": 7.031, "eval_steps_per_second": 0.88, "eval_wer": 27.48405314776996, "step": 1700 }, { "epoch": 2.76, "grad_norm": 10.164315223693848, "learning_rate": 7.280000000000001e-06, "loss": 0.20121437072753906, "step": 1725 }, { "epoch": 2.8, "grad_norm": 11.21286392211914, "learning_rate": 7.224444444444445e-06, "loss": 0.2160506057739258, "step": 1750 }, { "epoch": 2.84, "grad_norm": 7.09088659286499, "learning_rate": 7.1688888888888895e-06, "loss": 0.1943138313293457, "step": 1775 }, { "epoch": 2.88, "grad_norm": 7.879263401031494, "learning_rate": 7.113333333333334e-06, "loss": 0.193405818939209, "step": 1800 }, { "epoch": 2.88, "eval_loss": 0.3283212184906006, "eval_runtime": 1146.053, "eval_samples_per_second": 6.974, "eval_steps_per_second": 0.873, "eval_wer": 27.456319491717725, "step": 1800 }, { "epoch": 2.92, "grad_norm": 10.002179145812988, "learning_rate": 7.057777777777778e-06, "loss": 0.18917253494262695, "step": 1825 }, { "epoch": 2.96, "grad_norm": 9.466012954711914, "learning_rate": 7.0022222222222225e-06, "loss": 0.19346149444580077, "step": 1850 }, { "epoch": 3.0, "grad_norm": 14.86670207977295, "learning_rate": 6.946666666666667e-06, "loss": 0.20311836242675782, "step": 1875 }, { "epoch": 3.04, "grad_norm": 7.088613986968994, "learning_rate": 6.891111111111111e-06, "loss": 0.12550613403320313, "step": 1900 }, { "epoch": 3.04, "eval_loss": 0.326405793428421, "eval_runtime": 1140.5284, "eval_samples_per_second": 7.008, "eval_steps_per_second": 0.877, "eval_wer": 27.247056450596276, "step": 1900 }, { "epoch": 3.08, "grad_norm": 5.738883972167969, "learning_rate": 6.835555555555556e-06, "loss": 0.1307435894012451, "step": 1925 }, { "epoch": 3.12, "grad_norm": 5.431838035583496, "learning_rate": 6.780000000000001e-06, "loss": 0.11987467765808106, "step": 1950 }, { "epoch": 3.16, "grad_norm": 8.73540210723877, "learning_rate": 6.724444444444444e-06, "loss": 0.1516973114013672, "step": 1975 }, { "epoch": 3.2, "grad_norm": 6.3792724609375, "learning_rate": 6.668888888888889e-06, "loss": 0.13660179138183592, "step": 2000 }, { "epoch": 3.2, "eval_loss": 0.32666918635368347, "eval_runtime": 1124.088, "eval_samples_per_second": 7.111, "eval_steps_per_second": 0.89, "eval_wer": 27.363033557723824, "step": 2000 }, { "epoch": 3.24, "grad_norm": 5.221762657165527, "learning_rate": 6.613333333333334e-06, "loss": 0.12183536529541016, "step": 2025 }, { "epoch": 3.2800000000000002, "grad_norm": 7.180768013000488, "learning_rate": 6.557777777777778e-06, "loss": 0.1264752769470215, "step": 2050 }, { "epoch": 3.32, "grad_norm": 8.103682518005371, "learning_rate": 6.502222222222223e-06, "loss": 0.14041830062866212, "step": 2075 }, { "epoch": 3.36, "grad_norm": 6.988570690155029, "learning_rate": 6.446666666666668e-06, "loss": 0.14171558380126953, "step": 2100 }, { "epoch": 3.36, "eval_loss": 0.3258770704269409, "eval_runtime": 1142.5612, "eval_samples_per_second": 6.996, "eval_steps_per_second": 0.875, "eval_wer": 27.148728033683785, "step": 2100 }, { "epoch": 3.4, "grad_norm": 7.272939205169678, "learning_rate": 6.391111111111111e-06, "loss": 0.12976963996887206, "step": 2125 }, { "epoch": 3.44, "grad_norm": 9.169845581054688, "learning_rate": 6.335555555555556e-06, "loss": 0.13874659538269044, "step": 2150 }, { "epoch": 3.48, "grad_norm": 9.13535213470459, "learning_rate": 6.280000000000001e-06, "loss": 0.1423179054260254, "step": 2175 }, { "epoch": 3.52, "grad_norm": 5.841824531555176, "learning_rate": 6.224444444444445e-06, "loss": 0.12778244972229003, "step": 2200 }, { "epoch": 3.52, "eval_loss": 0.32502686977386475, "eval_runtime": 1129.5273, "eval_samples_per_second": 7.076, "eval_steps_per_second": 0.885, "eval_wer": 27.098303204497892, "step": 2200 }, { "epoch": 3.56, "grad_norm": 6.991465091705322, "learning_rate": 6.16888888888889e-06, "loss": 0.1317989444732666, "step": 2225 }, { "epoch": 3.6, "grad_norm": 8.489235877990723, "learning_rate": 6.113333333333333e-06, "loss": 0.12462780952453613, "step": 2250 }, { "epoch": 3.64, "grad_norm": 8.89243221282959, "learning_rate": 6.057777777777778e-06, "loss": 0.11276106834411621, "step": 2275 }, { "epoch": 3.68, "grad_norm": 7.854825019836426, "learning_rate": 6.002222222222223e-06, "loss": 0.128636474609375, "step": 2300 }, { "epoch": 3.68, "eval_loss": 0.32361486554145813, "eval_runtime": 1145.2768, "eval_samples_per_second": 6.979, "eval_steps_per_second": 0.873, "eval_wer": 27.141164309305903, "step": 2300 }, { "epoch": 3.7199999999999998, "grad_norm": 10.046810150146484, "learning_rate": 5.946666666666668e-06, "loss": 0.13479949951171874, "step": 2325 }, { "epoch": 3.76, "grad_norm": 6.566898345947266, "learning_rate": 5.891111111111112e-06, "loss": 0.13264819145202636, "step": 2350 }, { "epoch": 3.8, "grad_norm": 6.007510662078857, "learning_rate": 5.8355555555555565e-06, "loss": 0.11804925918579101, "step": 2375 }, { "epoch": 3.84, "grad_norm": 6.695367336273193, "learning_rate": 5.78e-06, "loss": 0.12892417907714843, "step": 2400 }, { "epoch": 3.84, "eval_loss": 0.32250407338142395, "eval_runtime": 1145.1862, "eval_samples_per_second": 6.98, "eval_steps_per_second": 0.873, "eval_wer": 26.57640622242392, "step": 2400 }, { "epoch": 3.88, "grad_norm": 8.012511253356934, "learning_rate": 5.724444444444445e-06, "loss": 0.13116491317749024, "step": 2425 }, { "epoch": 3.92, "grad_norm": 7.509751319885254, "learning_rate": 5.6688888888888895e-06, "loss": 0.1309671401977539, "step": 2450 }, { "epoch": 3.96, "grad_norm": 9.579854011535645, "learning_rate": 5.613333333333334e-06, "loss": 0.12149713516235351, "step": 2475 }, { "epoch": 4.0, "grad_norm": 16.018325805664062, "learning_rate": 5.557777777777778e-06, "loss": 0.1331118392944336, "step": 2500 }, { "epoch": 4.0, "eval_loss": 0.3214564025402069, "eval_runtime": 1135.6798, "eval_samples_per_second": 7.038, "eval_steps_per_second": 0.881, "eval_wer": 27.128558102009432, "step": 2500 }, { "epoch": 4.04, "grad_norm": 3.181704521179199, "learning_rate": 5.5022222222222224e-06, "loss": 0.08591601371765137, "step": 2525 }, { "epoch": 4.08, "grad_norm": 6.204383373260498, "learning_rate": 5.4466666666666665e-06, "loss": 0.08950037002563477, "step": 2550 }, { "epoch": 4.12, "grad_norm": 6.119636535644531, "learning_rate": 5.391111111111111e-06, "loss": 0.0826924991607666, "step": 2575 }, { "epoch": 4.16, "grad_norm": 6.250202178955078, "learning_rate": 5.335555555555556e-06, "loss": 0.07985872268676758, "step": 2600 }, { "epoch": 4.16, "eval_loss": 0.324444979429245, "eval_runtime": 1147.6894, "eval_samples_per_second": 6.964, "eval_steps_per_second": 0.871, "eval_wer": 26.546151324912387, "step": 2600 }, { "epoch": 4.2, "grad_norm": 4.202062606811523, "learning_rate": 5.28e-06, "loss": 0.07818631649017334, "step": 2625 }, { "epoch": 4.24, "grad_norm": 8.979434967041016, "learning_rate": 5.224444444444445e-06, "loss": 0.08032341957092286, "step": 2650 }, { "epoch": 4.28, "grad_norm": 5.299781799316406, "learning_rate": 5.168888888888889e-06, "loss": 0.08594310760498047, "step": 2675 }, { "epoch": 4.32, "grad_norm": 4.9248762130737305, "learning_rate": 5.113333333333333e-06, "loss": 0.08457598686218262, "step": 2700 }, { "epoch": 4.32, "eval_loss": 0.32594889402389526, "eval_runtime": 1130.0663, "eval_samples_per_second": 7.073, "eval_steps_per_second": 0.885, "eval_wer": 26.546151324912387, "step": 2700 }, { "epoch": 4.36, "grad_norm": 10.462182998657227, "learning_rate": 5.057777777777778e-06, "loss": 0.08635611534118652, "step": 2725 }, { "epoch": 4.4, "grad_norm": 6.411299705505371, "learning_rate": 5.002222222222223e-06, "loss": 0.08314334869384765, "step": 2750 }, { "epoch": 4.44, "grad_norm": 6.515404224395752, "learning_rate": 4.946666666666667e-06, "loss": 0.09247981071472168, "step": 2775 }, { "epoch": 4.48, "grad_norm": 9.802311897277832, "learning_rate": 4.891111111111111e-06, "loss": 0.09131069183349609, "step": 2800 }, { "epoch": 4.48, "eval_loss": 0.32451489567756653, "eval_runtime": 1144.7419, "eval_samples_per_second": 6.982, "eval_steps_per_second": 0.874, "eval_wer": 26.392355595895417, "step": 2800 }, { "epoch": 4.52, "grad_norm": 7.073087215423584, "learning_rate": 4.835555555555556e-06, "loss": 0.08045567512512207, "step": 2825 }, { "epoch": 4.5600000000000005, "grad_norm": 6.24620246887207, "learning_rate": 4.78e-06, "loss": 0.09462824821472168, "step": 2850 }, { "epoch": 4.6, "grad_norm": 4.449136734008789, "learning_rate": 4.724444444444445e-06, "loss": 0.08349855422973633, "step": 2875 }, { "epoch": 4.64, "grad_norm": 3.942056894302368, "learning_rate": 4.66888888888889e-06, "loss": 0.07720262527465821, "step": 2900 }, { "epoch": 4.64, "eval_loss": 0.32689812779426575, "eval_runtime": 1153.1464, "eval_samples_per_second": 6.931, "eval_steps_per_second": 0.867, "eval_wer": 27.04031465093412, "step": 2900 }, { "epoch": 4.68, "grad_norm": 5.481267929077148, "learning_rate": 4.613333333333334e-06, "loss": 0.09236433029174805, "step": 2925 }, { "epoch": 4.72, "grad_norm": 7.2641215324401855, "learning_rate": 4.557777777777778e-06, "loss": 0.09008319854736328, "step": 2950 }, { "epoch": 4.76, "grad_norm": 8.626544952392578, "learning_rate": 4.502222222222223e-06, "loss": 0.09662159919738769, "step": 2975 }, { "epoch": 4.8, "grad_norm": 7.221775531768799, "learning_rate": 4.446666666666667e-06, "loss": 0.08148813247680664, "step": 3000 }, { "epoch": 4.8, "eval_loss": 0.32437387108802795, "eval_runtime": 1136.3082, "eval_samples_per_second": 7.034, "eval_steps_per_second": 0.88, "eval_wer": 26.768020573330308, "step": 3000 }, { "epoch": 4.84, "grad_norm": 3.961613655090332, "learning_rate": 4.391111111111112e-06, "loss": 0.07602582931518555, "step": 3025 }, { "epoch": 4.88, "grad_norm": 11.219801902770996, "learning_rate": 4.3355555555555565e-06, "loss": 0.0879791259765625, "step": 3050 }, { "epoch": 4.92, "grad_norm": 5.104950904846191, "learning_rate": 4.2800000000000005e-06, "loss": 0.08852799415588379, "step": 3075 }, { "epoch": 4.96, "grad_norm": 5.801946640014648, "learning_rate": 4.2244444444444446e-06, "loss": 0.07789647579193115, "step": 3100 }, { "epoch": 4.96, "eval_loss": 0.32314595580101013, "eval_runtime": 1146.6672, "eval_samples_per_second": 6.971, "eval_steps_per_second": 0.872, "eval_wer": 26.296548420442228, "step": 3100 }, { "epoch": 5.0, "grad_norm": 7.96620512008667, "learning_rate": 4.168888888888889e-06, "loss": 0.08891249656677246, "step": 3125 }, { "epoch": 5.04, "grad_norm": 2.248185396194458, "learning_rate": 4.1133333333333335e-06, "loss": 0.06223374366760254, "step": 3150 }, { "epoch": 5.08, "grad_norm": 3.364957571029663, "learning_rate": 4.057777777777778e-06, "loss": 0.058481874465942385, "step": 3175 }, { "epoch": 5.12, "grad_norm": 3.7165310382843018, "learning_rate": 4.002222222222222e-06, "loss": 0.06104232311248779, "step": 3200 }, { "epoch": 5.12, "eval_loss": 0.32432663440704346, "eval_runtime": 1128.5261, "eval_samples_per_second": 7.083, "eval_steps_per_second": 0.886, "eval_wer": 26.44278042508131, "step": 3200 }, { "epoch": 5.16, "grad_norm": 5.533367156982422, "learning_rate": 3.946666666666667e-06, "loss": 0.06542285442352296, "step": 3225 }, { "epoch": 5.2, "grad_norm": 3.9828567504882812, "learning_rate": 3.891111111111111e-06, "loss": 0.05872833728790283, "step": 3250 }, { "epoch": 5.24, "grad_norm": 4.2224249839782715, "learning_rate": 3.835555555555555e-06, "loss": 0.05860544204711914, "step": 3275 }, { "epoch": 5.28, "grad_norm": 4.558178901672363, "learning_rate": 3.7800000000000002e-06, "loss": 0.055550127029418944, "step": 3300 }, { "epoch": 5.28, "eval_loss": 0.3261101543903351, "eval_runtime": 1144.1278, "eval_samples_per_second": 6.986, "eval_steps_per_second": 0.874, "eval_wer": 26.2158686937448, "step": 3300 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2991385677824e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }