| { |
| "best_global_step": 3300, |
| "best_metric": 26.2158686937448, |
| "best_model_checkpoint": "./whisper-tiny-ru/checkpoint-3300", |
| "epoch": 5.28, |
| "eval_steps": 100, |
| "global_step": 3300, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "grad_norm": 30.96117401123047, |
| "learning_rate": 4.800000000000001e-07, |
| "loss": 1.661016845703125, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 22.207429885864258, |
| "learning_rate": 9.800000000000001e-07, |
| "loss": 1.5516070556640624, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 21.638269424438477, |
| "learning_rate": 1.48e-06, |
| "loss": 1.33806884765625, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 21.087303161621094, |
| "learning_rate": 1.98e-06, |
| "loss": 1.0702142333984375, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_loss": 0.9838109016418457, |
| "eval_runtime": 1688.971, |
| "eval_samples_per_second": 4.732, |
| "eval_steps_per_second": 0.592, |
| "eval_wer": 58.649118826109984, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 18.6301212310791, |
| "learning_rate": 2.4800000000000004e-06, |
| "loss": 0.9264418029785156, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 17.07522964477539, |
| "learning_rate": 2.9800000000000003e-06, |
| "loss": 0.7360333251953125, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 19.41968536376953, |
| "learning_rate": 3.48e-06, |
| "loss": 0.7099385833740235, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 16.938819885253906, |
| "learning_rate": 3.980000000000001e-06, |
| "loss": 0.700680923461914, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_loss": 0.6107771992683411, |
| "eval_runtime": 1633.6115, |
| "eval_samples_per_second": 4.893, |
| "eval_steps_per_second": 0.612, |
| "eval_wer": 45.457983511080855, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 15.689166069030762, |
| "learning_rate": 4.48e-06, |
| "loss": 0.6384918212890625, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 20.370248794555664, |
| "learning_rate": 4.980000000000001e-06, |
| "loss": 0.6048641204833984, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 11.012890815734863, |
| "learning_rate": 5.480000000000001e-06, |
| "loss": 0.5702555847167968, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 19.070072174072266, |
| "learning_rate": 5.98e-06, |
| "loss": 0.5977656555175781, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_loss": 0.532837450504303, |
| "eval_runtime": 1594.1788, |
| "eval_samples_per_second": 5.014, |
| "eval_steps_per_second": 0.627, |
| "eval_wer": 41.2702014471926, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 13.001229286193848, |
| "learning_rate": 6.480000000000001e-06, |
| "loss": 0.5543878555297852, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 19.709369659423828, |
| "learning_rate": 6.98e-06, |
| "loss": 0.545843734741211, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 15.140043258666992, |
| "learning_rate": 7.48e-06, |
| "loss": 0.5528886413574219, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 12.472454071044922, |
| "learning_rate": 7.980000000000002e-06, |
| "loss": 0.49836795806884765, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_loss": 0.4811266362667084, |
| "eval_runtime": 1597.9191, |
| "eval_samples_per_second": 5.002, |
| "eval_steps_per_second": 0.626, |
| "eval_wer": 37.518594155762294, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 15.188615798950195, |
| "learning_rate": 8.48e-06, |
| "loss": 0.48630184173583985, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 17.552886962890625, |
| "learning_rate": 8.98e-06, |
| "loss": 0.4862052917480469, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 12.936896324157715, |
| "learning_rate": 9.48e-06, |
| "loss": 0.47983272552490236, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 14.850130081176758, |
| "learning_rate": 9.980000000000001e-06, |
| "loss": 0.47797527313232424, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_loss": 0.44456836581230164, |
| "eval_runtime": 1586.2776, |
| "eval_samples_per_second": 5.039, |
| "eval_steps_per_second": 0.63, |
| "eval_wer": 35.09568111338023, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 16.829805374145508, |
| "learning_rate": 9.946666666666667e-06, |
| "loss": 0.5019921493530274, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 13.922273635864258, |
| "learning_rate": 9.891111111111113e-06, |
| "loss": 0.46826896667480467, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 15.372945785522461, |
| "learning_rate": 9.835555555555556e-06, |
| "loss": 0.42755321502685545, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 16.532119750976562, |
| "learning_rate": 9.780000000000001e-06, |
| "loss": 0.49279567718505857, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.96, |
| "eval_loss": 0.4159170389175415, |
| "eval_runtime": 1613.2907, |
| "eval_samples_per_second": 4.954, |
| "eval_steps_per_second": 0.62, |
| "eval_wer": 33.749338174116936, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 17.019622802734375, |
| "learning_rate": 9.724444444444445e-06, |
| "loss": 0.3978841781616211, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 9.66409683227539, |
| "learning_rate": 9.66888888888889e-06, |
| "loss": 0.3251683807373047, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 11.652173042297363, |
| "learning_rate": 9.613333333333335e-06, |
| "loss": 0.34501224517822265, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 10.360984802246094, |
| "learning_rate": 9.557777777777777e-06, |
| "loss": 0.34316062927246094, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.12, |
| "eval_loss": 0.3949244022369385, |
| "eval_runtime": 1589.2681, |
| "eval_samples_per_second": 5.029, |
| "eval_steps_per_second": 0.629, |
| "eval_wer": 32.24919950583667, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 12.48491382598877, |
| "learning_rate": 9.502222222222223e-06, |
| "loss": 0.34458335876464846, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 11.152288436889648, |
| "learning_rate": 9.446666666666667e-06, |
| "loss": 0.32888599395751955, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 10.6038818359375, |
| "learning_rate": 9.391111111111111e-06, |
| "loss": 0.31568107604980467, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 18.213455200195312, |
| "learning_rate": 9.335555555555557e-06, |
| "loss": 0.34547271728515627, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_loss": 0.38666781783103943, |
| "eval_runtime": 1606.6532, |
| "eval_samples_per_second": 4.975, |
| "eval_steps_per_second": 0.622, |
| "eval_wer": 31.583591760582912, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 13.49360466003418, |
| "learning_rate": 9.280000000000001e-06, |
| "loss": 0.3703644943237305, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 9.994672775268555, |
| "learning_rate": 9.224444444444445e-06, |
| "loss": 0.31469236373901366, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 12.134446144104004, |
| "learning_rate": 9.168888888888889e-06, |
| "loss": 0.3096772575378418, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 9.686901092529297, |
| "learning_rate": 9.113333333333335e-06, |
| "loss": 0.30561195373535155, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.44, |
| "eval_loss": 0.37363526225090027, |
| "eval_runtime": 1588.6091, |
| "eval_samples_per_second": 5.031, |
| "eval_steps_per_second": 0.629, |
| "eval_wer": 30.577616418324382, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 11.473066329956055, |
| "learning_rate": 9.057777777777779e-06, |
| "loss": 0.33498191833496094, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 10.787580490112305, |
| "learning_rate": 9.002222222222223e-06, |
| "loss": 0.33107086181640627, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 9.3997220993042, |
| "learning_rate": 8.946666666666669e-06, |
| "loss": 0.31393367767333985, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 14.870068550109863, |
| "learning_rate": 8.891111111111111e-06, |
| "loss": 0.3480434036254883, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_loss": 0.36670026183128357, |
| "eval_runtime": 1580.5347, |
| "eval_samples_per_second": 5.057, |
| "eval_steps_per_second": 0.633, |
| "eval_wer": 30.19438771651161, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 15.575242042541504, |
| "learning_rate": 8.835555555555557e-06, |
| "loss": 0.3521144485473633, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 13.404891014099121, |
| "learning_rate": 8.78e-06, |
| "loss": 0.3038243865966797, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 11.040489196777344, |
| "learning_rate": 8.724444444444445e-06, |
| "loss": 0.33296077728271484, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 11.390976905822754, |
| "learning_rate": 8.66888888888889e-06, |
| "loss": 0.3266580581665039, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_loss": 0.3538387417793274, |
| "eval_runtime": 1245.1395, |
| "eval_samples_per_second": 6.419, |
| "eval_steps_per_second": 0.803, |
| "eval_wer": 29.1329450621486, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 9.734987258911133, |
| "learning_rate": 8.613333333333333e-06, |
| "loss": 0.30483461380004884, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 13.66518497467041, |
| "learning_rate": 8.557777777777778e-06, |
| "loss": 0.337967643737793, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 14.166866302490234, |
| "learning_rate": 8.502222222222223e-06, |
| "loss": 0.33347091674804685, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 10.097210884094238, |
| "learning_rate": 8.446666666666668e-06, |
| "loss": 0.30127151489257814, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_loss": 0.3475528061389923, |
| "eval_runtime": 1114.263, |
| "eval_samples_per_second": 7.173, |
| "eval_steps_per_second": 0.897, |
| "eval_wer": 28.73206767012077, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 8.526341438293457, |
| "learning_rate": 8.391111111111112e-06, |
| "loss": 0.30967933654785157, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 13.744101524353027, |
| "learning_rate": 8.335555555555556e-06, |
| "loss": 0.2926918983459473, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 12.457859992980957, |
| "learning_rate": 8.28e-06, |
| "loss": 0.19928192138671874, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 10.412860870361328, |
| "learning_rate": 8.224444444444444e-06, |
| "loss": 0.2255691719055176, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.08, |
| "eval_loss": 0.3463591933250427, |
| "eval_runtime": 1136.9055, |
| "eval_samples_per_second": 7.03, |
| "eval_steps_per_second": 0.88, |
| "eval_wer": 28.709376496987115, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 12.349778175354004, |
| "learning_rate": 8.16888888888889e-06, |
| "loss": 0.2224934768676758, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 7.292425155639648, |
| "learning_rate": 8.113333333333334e-06, |
| "loss": 0.20890113830566406, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 7.30359411239624, |
| "learning_rate": 8.057777777777778e-06, |
| "loss": 0.22248428344726562, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 6.8495683670043945, |
| "learning_rate": 8.002222222222222e-06, |
| "loss": 0.1987138557434082, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.24, |
| "eval_loss": 0.34195244312286377, |
| "eval_runtime": 1142.688, |
| "eval_samples_per_second": 6.995, |
| "eval_steps_per_second": 0.875, |
| "eval_wer": 28.3009353805814, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.2800000000000002, |
| "grad_norm": 9.956233024597168, |
| "learning_rate": 7.946666666666666e-06, |
| "loss": 0.18319826126098632, |
| "step": 1425 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 9.506035804748535, |
| "learning_rate": 7.891111111111112e-06, |
| "loss": 0.2086960792541504, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 9.610784530639648, |
| "learning_rate": 7.835555555555556e-06, |
| "loss": 0.20998584747314453, |
| "step": 1475 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 10.06142807006836, |
| "learning_rate": 7.78e-06, |
| "loss": 0.19923351287841798, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.4, |
| "eval_loss": 0.337080180644989, |
| "eval_runtime": 1129.0462, |
| "eval_samples_per_second": 7.079, |
| "eval_steps_per_second": 0.886, |
| "eval_wer": 28.12444847843078, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 10.249608993530273, |
| "learning_rate": 7.724444444444446e-06, |
| "loss": 0.20815914154052734, |
| "step": 1525 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 8.729615211486816, |
| "learning_rate": 7.66888888888889e-06, |
| "loss": 0.19476179122924805, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 7.07798957824707, |
| "learning_rate": 7.613333333333334e-06, |
| "loss": 0.20427942276000977, |
| "step": 1575 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 12.55591106414795, |
| "learning_rate": 7.557777777777779e-06, |
| "loss": 0.19880136489868164, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_loss": 0.3345155715942383, |
| "eval_runtime": 1157.6671, |
| "eval_samples_per_second": 6.904, |
| "eval_steps_per_second": 0.864, |
| "eval_wer": 27.466404457554898, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 9.194686889648438, |
| "learning_rate": 7.502222222222223e-06, |
| "loss": 0.20003116607666016, |
| "step": 1625 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 8.028614044189453, |
| "learning_rate": 7.446666666666668e-06, |
| "loss": 0.20664962768554687, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.68, |
| "grad_norm": 9.309157371520996, |
| "learning_rate": 7.3911111111111125e-06, |
| "loss": 0.2059481430053711, |
| "step": 1675 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 7.072760105133057, |
| "learning_rate": 7.335555555555556e-06, |
| "loss": 0.18960922241210937, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "eval_loss": 0.3304011821746826, |
| "eval_runtime": 1136.784, |
| "eval_samples_per_second": 7.031, |
| "eval_steps_per_second": 0.88, |
| "eval_wer": 27.48405314776996, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 10.164315223693848, |
| "learning_rate": 7.280000000000001e-06, |
| "loss": 0.20121437072753906, |
| "step": 1725 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 11.21286392211914, |
| "learning_rate": 7.224444444444445e-06, |
| "loss": 0.2160506057739258, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 7.09088659286499, |
| "learning_rate": 7.1688888888888895e-06, |
| "loss": 0.1943138313293457, |
| "step": 1775 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 7.879263401031494, |
| "learning_rate": 7.113333333333334e-06, |
| "loss": 0.193405818939209, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.88, |
| "eval_loss": 0.3283212184906006, |
| "eval_runtime": 1146.053, |
| "eval_samples_per_second": 6.974, |
| "eval_steps_per_second": 0.873, |
| "eval_wer": 27.456319491717725, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 10.002179145812988, |
| "learning_rate": 7.057777777777778e-06, |
| "loss": 0.18917253494262695, |
| "step": 1825 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 9.466012954711914, |
| "learning_rate": 7.0022222222222225e-06, |
| "loss": 0.19346149444580077, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 14.86670207977295, |
| "learning_rate": 6.946666666666667e-06, |
| "loss": 0.20311836242675782, |
| "step": 1875 |
| }, |
| { |
| "epoch": 3.04, |
| "grad_norm": 7.088613986968994, |
| "learning_rate": 6.891111111111111e-06, |
| "loss": 0.12550613403320313, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.04, |
| "eval_loss": 0.326405793428421, |
| "eval_runtime": 1140.5284, |
| "eval_samples_per_second": 7.008, |
| "eval_steps_per_second": 0.877, |
| "eval_wer": 27.247056450596276, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.08, |
| "grad_norm": 5.738883972167969, |
| "learning_rate": 6.835555555555556e-06, |
| "loss": 0.1307435894012451, |
| "step": 1925 |
| }, |
| { |
| "epoch": 3.12, |
| "grad_norm": 5.431838035583496, |
| "learning_rate": 6.780000000000001e-06, |
| "loss": 0.11987467765808106, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.16, |
| "grad_norm": 8.73540210723877, |
| "learning_rate": 6.724444444444444e-06, |
| "loss": 0.1516973114013672, |
| "step": 1975 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 6.3792724609375, |
| "learning_rate": 6.668888888888889e-06, |
| "loss": 0.13660179138183592, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.2, |
| "eval_loss": 0.32666918635368347, |
| "eval_runtime": 1124.088, |
| "eval_samples_per_second": 7.111, |
| "eval_steps_per_second": 0.89, |
| "eval_wer": 27.363033557723824, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.24, |
| "grad_norm": 5.221762657165527, |
| "learning_rate": 6.613333333333334e-06, |
| "loss": 0.12183536529541016, |
| "step": 2025 |
| }, |
| { |
| "epoch": 3.2800000000000002, |
| "grad_norm": 7.180768013000488, |
| "learning_rate": 6.557777777777778e-06, |
| "loss": 0.1264752769470215, |
| "step": 2050 |
| }, |
| { |
| "epoch": 3.32, |
| "grad_norm": 8.103682518005371, |
| "learning_rate": 6.502222222222223e-06, |
| "loss": 0.14041830062866212, |
| "step": 2075 |
| }, |
| { |
| "epoch": 3.36, |
| "grad_norm": 6.988570690155029, |
| "learning_rate": 6.446666666666668e-06, |
| "loss": 0.14171558380126953, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.36, |
| "eval_loss": 0.3258770704269409, |
| "eval_runtime": 1142.5612, |
| "eval_samples_per_second": 6.996, |
| "eval_steps_per_second": 0.875, |
| "eval_wer": 27.148728033683785, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 7.272939205169678, |
| "learning_rate": 6.391111111111111e-06, |
| "loss": 0.12976963996887206, |
| "step": 2125 |
| }, |
| { |
| "epoch": 3.44, |
| "grad_norm": 9.169845581054688, |
| "learning_rate": 6.335555555555556e-06, |
| "loss": 0.13874659538269044, |
| "step": 2150 |
| }, |
| { |
| "epoch": 3.48, |
| "grad_norm": 9.13535213470459, |
| "learning_rate": 6.280000000000001e-06, |
| "loss": 0.1423179054260254, |
| "step": 2175 |
| }, |
| { |
| "epoch": 3.52, |
| "grad_norm": 5.841824531555176, |
| "learning_rate": 6.224444444444445e-06, |
| "loss": 0.12778244972229003, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.52, |
| "eval_loss": 0.32502686977386475, |
| "eval_runtime": 1129.5273, |
| "eval_samples_per_second": 7.076, |
| "eval_steps_per_second": 0.885, |
| "eval_wer": 27.098303204497892, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.56, |
| "grad_norm": 6.991465091705322, |
| "learning_rate": 6.16888888888889e-06, |
| "loss": 0.1317989444732666, |
| "step": 2225 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 8.489235877990723, |
| "learning_rate": 6.113333333333333e-06, |
| "loss": 0.12462780952453613, |
| "step": 2250 |
| }, |
| { |
| "epoch": 3.64, |
| "grad_norm": 8.89243221282959, |
| "learning_rate": 6.057777777777778e-06, |
| "loss": 0.11276106834411621, |
| "step": 2275 |
| }, |
| { |
| "epoch": 3.68, |
| "grad_norm": 7.854825019836426, |
| "learning_rate": 6.002222222222223e-06, |
| "loss": 0.128636474609375, |
| "step": 2300 |
| }, |
| { |
| "epoch": 3.68, |
| "eval_loss": 0.32361486554145813, |
| "eval_runtime": 1145.2768, |
| "eval_samples_per_second": 6.979, |
| "eval_steps_per_second": 0.873, |
| "eval_wer": 27.141164309305903, |
| "step": 2300 |
| }, |
| { |
| "epoch": 3.7199999999999998, |
| "grad_norm": 10.046810150146484, |
| "learning_rate": 5.946666666666668e-06, |
| "loss": 0.13479949951171874, |
| "step": 2325 |
| }, |
| { |
| "epoch": 3.76, |
| "grad_norm": 6.566898345947266, |
| "learning_rate": 5.891111111111112e-06, |
| "loss": 0.13264819145202636, |
| "step": 2350 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 6.007510662078857, |
| "learning_rate": 5.8355555555555565e-06, |
| "loss": 0.11804925918579101, |
| "step": 2375 |
| }, |
| { |
| "epoch": 3.84, |
| "grad_norm": 6.695367336273193, |
| "learning_rate": 5.78e-06, |
| "loss": 0.12892417907714843, |
| "step": 2400 |
| }, |
| { |
| "epoch": 3.84, |
| "eval_loss": 0.32250407338142395, |
| "eval_runtime": 1145.1862, |
| "eval_samples_per_second": 6.98, |
| "eval_steps_per_second": 0.873, |
| "eval_wer": 26.57640622242392, |
| "step": 2400 |
| }, |
| { |
| "epoch": 3.88, |
| "grad_norm": 8.012511253356934, |
| "learning_rate": 5.724444444444445e-06, |
| "loss": 0.13116491317749024, |
| "step": 2425 |
| }, |
| { |
| "epoch": 3.92, |
| "grad_norm": 7.509751319885254, |
| "learning_rate": 5.6688888888888895e-06, |
| "loss": 0.1309671401977539, |
| "step": 2450 |
| }, |
| { |
| "epoch": 3.96, |
| "grad_norm": 9.579854011535645, |
| "learning_rate": 5.613333333333334e-06, |
| "loss": 0.12149713516235351, |
| "step": 2475 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 16.018325805664062, |
| "learning_rate": 5.557777777777778e-06, |
| "loss": 0.1331118392944336, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.3214564025402069, |
| "eval_runtime": 1135.6798, |
| "eval_samples_per_second": 7.038, |
| "eval_steps_per_second": 0.881, |
| "eval_wer": 27.128558102009432, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.04, |
| "grad_norm": 3.181704521179199, |
| "learning_rate": 5.5022222222222224e-06, |
| "loss": 0.08591601371765137, |
| "step": 2525 |
| }, |
| { |
| "epoch": 4.08, |
| "grad_norm": 6.204383373260498, |
| "learning_rate": 5.4466666666666665e-06, |
| "loss": 0.08950037002563477, |
| "step": 2550 |
| }, |
| { |
| "epoch": 4.12, |
| "grad_norm": 6.119636535644531, |
| "learning_rate": 5.391111111111111e-06, |
| "loss": 0.0826924991607666, |
| "step": 2575 |
| }, |
| { |
| "epoch": 4.16, |
| "grad_norm": 6.250202178955078, |
| "learning_rate": 5.335555555555556e-06, |
| "loss": 0.07985872268676758, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.16, |
| "eval_loss": 0.324444979429245, |
| "eval_runtime": 1147.6894, |
| "eval_samples_per_second": 6.964, |
| "eval_steps_per_second": 0.871, |
| "eval_wer": 26.546151324912387, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 4.202062606811523, |
| "learning_rate": 5.28e-06, |
| "loss": 0.07818631649017334, |
| "step": 2625 |
| }, |
| { |
| "epoch": 4.24, |
| "grad_norm": 8.979434967041016, |
| "learning_rate": 5.224444444444445e-06, |
| "loss": 0.08032341957092286, |
| "step": 2650 |
| }, |
| { |
| "epoch": 4.28, |
| "grad_norm": 5.299781799316406, |
| "learning_rate": 5.168888888888889e-06, |
| "loss": 0.08594310760498047, |
| "step": 2675 |
| }, |
| { |
| "epoch": 4.32, |
| "grad_norm": 4.9248762130737305, |
| "learning_rate": 5.113333333333333e-06, |
| "loss": 0.08457598686218262, |
| "step": 2700 |
| }, |
| { |
| "epoch": 4.32, |
| "eval_loss": 0.32594889402389526, |
| "eval_runtime": 1130.0663, |
| "eval_samples_per_second": 7.073, |
| "eval_steps_per_second": 0.885, |
| "eval_wer": 26.546151324912387, |
| "step": 2700 |
| }, |
| { |
| "epoch": 4.36, |
| "grad_norm": 10.462182998657227, |
| "learning_rate": 5.057777777777778e-06, |
| "loss": 0.08635611534118652, |
| "step": 2725 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 6.411299705505371, |
| "learning_rate": 5.002222222222223e-06, |
| "loss": 0.08314334869384765, |
| "step": 2750 |
| }, |
| { |
| "epoch": 4.44, |
| "grad_norm": 6.515404224395752, |
| "learning_rate": 4.946666666666667e-06, |
| "loss": 0.09247981071472168, |
| "step": 2775 |
| }, |
| { |
| "epoch": 4.48, |
| "grad_norm": 9.802311897277832, |
| "learning_rate": 4.891111111111111e-06, |
| "loss": 0.09131069183349609, |
| "step": 2800 |
| }, |
| { |
| "epoch": 4.48, |
| "eval_loss": 0.32451489567756653, |
| "eval_runtime": 1144.7419, |
| "eval_samples_per_second": 6.982, |
| "eval_steps_per_second": 0.874, |
| "eval_wer": 26.392355595895417, |
| "step": 2800 |
| }, |
| { |
| "epoch": 4.52, |
| "grad_norm": 7.073087215423584, |
| "learning_rate": 4.835555555555556e-06, |
| "loss": 0.08045567512512207, |
| "step": 2825 |
| }, |
| { |
| "epoch": 4.5600000000000005, |
| "grad_norm": 6.24620246887207, |
| "learning_rate": 4.78e-06, |
| "loss": 0.09462824821472168, |
| "step": 2850 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 4.449136734008789, |
| "learning_rate": 4.724444444444445e-06, |
| "loss": 0.08349855422973633, |
| "step": 2875 |
| }, |
| { |
| "epoch": 4.64, |
| "grad_norm": 3.942056894302368, |
| "learning_rate": 4.66888888888889e-06, |
| "loss": 0.07720262527465821, |
| "step": 2900 |
| }, |
| { |
| "epoch": 4.64, |
| "eval_loss": 0.32689812779426575, |
| "eval_runtime": 1153.1464, |
| "eval_samples_per_second": 6.931, |
| "eval_steps_per_second": 0.867, |
| "eval_wer": 27.04031465093412, |
| "step": 2900 |
| }, |
| { |
| "epoch": 4.68, |
| "grad_norm": 5.481267929077148, |
| "learning_rate": 4.613333333333334e-06, |
| "loss": 0.09236433029174805, |
| "step": 2925 |
| }, |
| { |
| "epoch": 4.72, |
| "grad_norm": 7.2641215324401855, |
| "learning_rate": 4.557777777777778e-06, |
| "loss": 0.09008319854736328, |
| "step": 2950 |
| }, |
| { |
| "epoch": 4.76, |
| "grad_norm": 8.626544952392578, |
| "learning_rate": 4.502222222222223e-06, |
| "loss": 0.09662159919738769, |
| "step": 2975 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 7.221775531768799, |
| "learning_rate": 4.446666666666667e-06, |
| "loss": 0.08148813247680664, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.8, |
| "eval_loss": 0.32437387108802795, |
| "eval_runtime": 1136.3082, |
| "eval_samples_per_second": 7.034, |
| "eval_steps_per_second": 0.88, |
| "eval_wer": 26.768020573330308, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.84, |
| "grad_norm": 3.961613655090332, |
| "learning_rate": 4.391111111111112e-06, |
| "loss": 0.07602582931518555, |
| "step": 3025 |
| }, |
| { |
| "epoch": 4.88, |
| "grad_norm": 11.219801902770996, |
| "learning_rate": 4.3355555555555565e-06, |
| "loss": 0.0879791259765625, |
| "step": 3050 |
| }, |
| { |
| "epoch": 4.92, |
| "grad_norm": 5.104950904846191, |
| "learning_rate": 4.2800000000000005e-06, |
| "loss": 0.08852799415588379, |
| "step": 3075 |
| }, |
| { |
| "epoch": 4.96, |
| "grad_norm": 5.801946640014648, |
| "learning_rate": 4.2244444444444446e-06, |
| "loss": 0.07789647579193115, |
| "step": 3100 |
| }, |
| { |
| "epoch": 4.96, |
| "eval_loss": 0.32314595580101013, |
| "eval_runtime": 1146.6672, |
| "eval_samples_per_second": 6.971, |
| "eval_steps_per_second": 0.872, |
| "eval_wer": 26.296548420442228, |
| "step": 3100 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 7.96620512008667, |
| "learning_rate": 4.168888888888889e-06, |
| "loss": 0.08891249656677246, |
| "step": 3125 |
| }, |
| { |
| "epoch": 5.04, |
| "grad_norm": 2.248185396194458, |
| "learning_rate": 4.1133333333333335e-06, |
| "loss": 0.06223374366760254, |
| "step": 3150 |
| }, |
| { |
| "epoch": 5.08, |
| "grad_norm": 3.364957571029663, |
| "learning_rate": 4.057777777777778e-06, |
| "loss": 0.058481874465942385, |
| "step": 3175 |
| }, |
| { |
| "epoch": 5.12, |
| "grad_norm": 3.7165310382843018, |
| "learning_rate": 4.002222222222222e-06, |
| "loss": 0.06104232311248779, |
| "step": 3200 |
| }, |
| { |
| "epoch": 5.12, |
| "eval_loss": 0.32432663440704346, |
| "eval_runtime": 1128.5261, |
| "eval_samples_per_second": 7.083, |
| "eval_steps_per_second": 0.886, |
| "eval_wer": 26.44278042508131, |
| "step": 3200 |
| }, |
| { |
| "epoch": 5.16, |
| "grad_norm": 5.533367156982422, |
| "learning_rate": 3.946666666666667e-06, |
| "loss": 0.06542285442352296, |
| "step": 3225 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 3.9828567504882812, |
| "learning_rate": 3.891111111111111e-06, |
| "loss": 0.05872833728790283, |
| "step": 3250 |
| }, |
| { |
| "epoch": 5.24, |
| "grad_norm": 4.2224249839782715, |
| "learning_rate": 3.835555555555555e-06, |
| "loss": 0.05860544204711914, |
| "step": 3275 |
| }, |
| { |
| "epoch": 5.28, |
| "grad_norm": 4.558178901672363, |
| "learning_rate": 3.7800000000000002e-06, |
| "loss": 0.055550127029418944, |
| "step": 3300 |
| }, |
| { |
| "epoch": 5.28, |
| "eval_loss": 0.3261101543903351, |
| "eval_runtime": 1144.1278, |
| "eval_samples_per_second": 6.986, |
| "eval_steps_per_second": 0.874, |
| "eval_wer": 26.2158686937448, |
| "step": 3300 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 5000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 8, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2991385677824e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|