| { |
| "best_global_step": 46500, |
| "best_metric": 0.15599121044112013, |
| "best_model_checkpoint": "/home/cluster-dgxa100/slp01/bagas-fine-tune-whisper/whisper-tiny-javanese-openslr-v4/checkpoint-46500", |
| "epoch": 5.403631058518398, |
| "eval_steps": 500, |
| "global_step": 50000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01080672178094775, |
| "grad_norm": 25.346445083618164, |
| "learning_rate": 1.94e-06, |
| "loss": 3.5433, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0216134435618955, |
| "grad_norm": 21.656307220458984, |
| "learning_rate": 3.94e-06, |
| "loss": 2.0264, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03242016534284325, |
| "grad_norm": 18.657211303710938, |
| "learning_rate": 5.94e-06, |
| "loss": 1.5688, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.043226887123791, |
| "grad_norm": 16.42237663269043, |
| "learning_rate": 7.94e-06, |
| "loss": 1.3214, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.054033608904738746, |
| "grad_norm": 18.631206512451172, |
| "learning_rate": 9.940000000000001e-06, |
| "loss": 1.1788, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.054033608904738746, |
| "eval_loss": 0.967095136642456, |
| "eval_runtime": 5770.5819, |
| "eval_samples_per_second": 3.207, |
| "eval_steps_per_second": 0.802, |
| "eval_wer": 0.6590292385770924, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0648403306856865, |
| "grad_norm": 15.337555885314941, |
| "learning_rate": 1.1940000000000001e-05, |
| "loss": 1.0627, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07564705246663425, |
| "grad_norm": 14.623177528381348, |
| "learning_rate": 1.394e-05, |
| "loss": 0.9632, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.086453774247582, |
| "grad_norm": 17.126712799072266, |
| "learning_rate": 1.5940000000000003e-05, |
| "loss": 0.906, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.09726049602852975, |
| "grad_norm": 16.75067710876465, |
| "learning_rate": 1.794e-05, |
| "loss": 0.8503, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.10806721780947749, |
| "grad_norm": 14.265076637268066, |
| "learning_rate": 1.9940000000000002e-05, |
| "loss": 0.8015, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.10806721780947749, |
| "eval_loss": 0.6976613402366638, |
| "eval_runtime": 5463.0331, |
| "eval_samples_per_second": 3.387, |
| "eval_steps_per_second": 0.847, |
| "eval_wer": 0.5304858499049883, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.11887393959042525, |
| "grad_norm": 13.737130165100098, |
| "learning_rate": 1.9784444444444446e-05, |
| "loss": 0.7589, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.129680661371373, |
| "grad_norm": 18.01378631591797, |
| "learning_rate": 1.9562222222222225e-05, |
| "loss": 0.7589, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.14048738315232073, |
| "grad_norm": 11.696120262145996, |
| "learning_rate": 1.934e-05, |
| "loss": 0.7087, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.1512941049332685, |
| "grad_norm": 13.419560432434082, |
| "learning_rate": 1.911777777777778e-05, |
| "loss": 0.683, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.16210082671421625, |
| "grad_norm": 12.753211975097656, |
| "learning_rate": 1.8895555555555557e-05, |
| "loss": 0.6498, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.16210082671421625, |
| "eval_loss": 0.5724753737449646, |
| "eval_runtime": 4564.4621, |
| "eval_samples_per_second": 4.054, |
| "eval_steps_per_second": 1.013, |
| "eval_wer": 0.6670133485560569, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.172907548495164, |
| "grad_norm": 11.64907455444336, |
| "learning_rate": 1.8673333333333333e-05, |
| "loss": 0.6216, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.18371427027611173, |
| "grad_norm": 13.781865119934082, |
| "learning_rate": 1.8451111111111113e-05, |
| "loss": 0.6138, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.1945209920570595, |
| "grad_norm": 12.58388900756836, |
| "learning_rate": 1.822888888888889e-05, |
| "loss": 0.595, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.20532771383800724, |
| "grad_norm": 14.661055564880371, |
| "learning_rate": 1.8006666666666668e-05, |
| "loss": 0.5938, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.21613443561895498, |
| "grad_norm": 11.948161125183105, |
| "learning_rate": 1.7784444444444448e-05, |
| "loss": 0.5828, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.21613443561895498, |
| "eval_loss": 0.5093731880187988, |
| "eval_runtime": 5328.6402, |
| "eval_samples_per_second": 3.473, |
| "eval_steps_per_second": 0.868, |
| "eval_wer": 0.4828939857208768, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.22694115739990273, |
| "grad_norm": 12.322188377380371, |
| "learning_rate": 1.7562222222222224e-05, |
| "loss": 0.5752, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.2377478791808505, |
| "grad_norm": 17.046159744262695, |
| "learning_rate": 1.734e-05, |
| "loss": 0.5663, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.24855460096179824, |
| "grad_norm": 10.154263496398926, |
| "learning_rate": 1.711777777777778e-05, |
| "loss": 0.537, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.259361322742746, |
| "grad_norm": 11.958285331726074, |
| "learning_rate": 1.6895555555555556e-05, |
| "loss": 0.5246, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.27016804452369375, |
| "grad_norm": 10.264266014099121, |
| "learning_rate": 1.6673333333333335e-05, |
| "loss": 0.5226, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.27016804452369375, |
| "eval_loss": 0.46415480971336365, |
| "eval_runtime": 4645.745, |
| "eval_samples_per_second": 3.983, |
| "eval_steps_per_second": 0.996, |
| "eval_wer": 0.38602898052064843, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.28097476630464147, |
| "grad_norm": 12.049257278442383, |
| "learning_rate": 1.6451111111111115e-05, |
| "loss": 0.493, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.29178148808558924, |
| "grad_norm": 9.821508407592773, |
| "learning_rate": 1.622888888888889e-05, |
| "loss": 0.5153, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.302588209866537, |
| "grad_norm": 10.481095314025879, |
| "learning_rate": 1.6006666666666667e-05, |
| "loss": 0.5, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3133949316474847, |
| "grad_norm": 10.193309783935547, |
| "learning_rate": 1.5784444444444447e-05, |
| "loss": 0.5248, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.3242016534284325, |
| "grad_norm": 12.328668594360352, |
| "learning_rate": 1.5562222222222223e-05, |
| "loss": 0.4955, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.3242016534284325, |
| "eval_loss": 0.4340818226337433, |
| "eval_runtime": 4456.9484, |
| "eval_samples_per_second": 4.152, |
| "eval_steps_per_second": 1.038, |
| "eval_wer": 0.39154200455117727, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.33500837520938026, |
| "grad_norm": 12.583343505859375, |
| "learning_rate": 1.5340000000000002e-05, |
| "loss": 0.5082, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.345815096990328, |
| "grad_norm": 8.40932846069336, |
| "learning_rate": 1.511777777777778e-05, |
| "loss": 0.4905, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.35662181877127574, |
| "grad_norm": 14.150980949401855, |
| "learning_rate": 1.4895555555555556e-05, |
| "loss": 0.466, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.36742854055222346, |
| "grad_norm": 13.014771461486816, |
| "learning_rate": 1.4673333333333336e-05, |
| "loss": 0.4788, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.37823526233317123, |
| "grad_norm": 11.843710899353027, |
| "learning_rate": 1.4451111111111112e-05, |
| "loss": 0.4616, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.37823526233317123, |
| "eval_loss": 0.4127795398235321, |
| "eval_runtime": 4528.1925, |
| "eval_samples_per_second": 4.086, |
| "eval_steps_per_second": 1.022, |
| "eval_wer": 0.35399088200564593, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.389041984114119, |
| "grad_norm": 11.520469665527344, |
| "learning_rate": 1.422888888888889e-05, |
| "loss": 0.4695, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.3998487058950667, |
| "grad_norm": 10.21032428741455, |
| "learning_rate": 1.400666666666667e-05, |
| "loss": 0.47, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.4106554276760145, |
| "grad_norm": 9.393896102905273, |
| "learning_rate": 1.3784444444444445e-05, |
| "loss": 0.4656, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.42146214945696225, |
| "grad_norm": 10.503016471862793, |
| "learning_rate": 1.3562222222222223e-05, |
| "loss": 0.4446, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.43226887123790997, |
| "grad_norm": 10.747596740722656, |
| "learning_rate": 1.3340000000000001e-05, |
| "loss": 0.4474, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.43226887123790997, |
| "eval_loss": 0.3900074064731598, |
| "eval_runtime": 4858.8536, |
| "eval_samples_per_second": 3.808, |
| "eval_steps_per_second": 0.952, |
| "eval_wer": 0.36136504038974343, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.44307559301885774, |
| "grad_norm": 13.275285720825195, |
| "learning_rate": 1.3117777777777779e-05, |
| "loss": 0.4488, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.45388231479980545, |
| "grad_norm": 11.318832397460938, |
| "learning_rate": 1.2897777777777778e-05, |
| "loss": 0.4292, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.4646890365807532, |
| "grad_norm": 10.3064546585083, |
| "learning_rate": 1.2675555555555557e-05, |
| "loss": 0.4302, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.475495758361701, |
| "grad_norm": 11.634562492370605, |
| "learning_rate": 1.2453333333333335e-05, |
| "loss": 0.426, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.4863024801426487, |
| "grad_norm": 10.647918701171875, |
| "learning_rate": 1.2231111111111111e-05, |
| "loss": 0.4387, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.4863024801426487, |
| "eval_loss": 0.37359631061553955, |
| "eval_runtime": 4990.4878, |
| "eval_samples_per_second": 3.708, |
| "eval_steps_per_second": 0.927, |
| "eval_wer": 0.35633684967821144, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.4971092019235965, |
| "grad_norm": 9.396610260009766, |
| "learning_rate": 1.200888888888889e-05, |
| "loss": 0.4195, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.5079159237045442, |
| "grad_norm": 10.845105171203613, |
| "learning_rate": 1.1786666666666668e-05, |
| "loss": 0.4056, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.518722645485492, |
| "grad_norm": 9.404190063476562, |
| "learning_rate": 1.1564444444444445e-05, |
| "loss": 0.4306, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.5295293672664397, |
| "grad_norm": 9.176289558410645, |
| "learning_rate": 1.1342222222222224e-05, |
| "loss": 0.4239, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.5403360890473875, |
| "grad_norm": 10.088706016540527, |
| "learning_rate": 1.1120000000000002e-05, |
| "loss": 0.4154, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.5403360890473875, |
| "eval_loss": 0.36057594418525696, |
| "eval_runtime": 5945.658, |
| "eval_samples_per_second": 3.112, |
| "eval_steps_per_second": 0.778, |
| "eval_wer": 0.32743452795220485, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.5511428108283353, |
| "grad_norm": 9.688194274902344, |
| "learning_rate": 1.0897777777777778e-05, |
| "loss": 0.4115, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.5619495326092829, |
| "grad_norm": 9.752260208129883, |
| "learning_rate": 1.0675555555555558e-05, |
| "loss": 0.3854, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.5727562543902307, |
| "grad_norm": 10.447392463684082, |
| "learning_rate": 1.0453333333333334e-05, |
| "loss": 0.4141, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.5835629761711785, |
| "grad_norm": 11.185776710510254, |
| "learning_rate": 1.0231111111111112e-05, |
| "loss": 0.3924, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.5943696979521262, |
| "grad_norm": 10.3914794921875, |
| "learning_rate": 1.000888888888889e-05, |
| "loss": 0.419, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.5943696979521262, |
| "eval_loss": 0.3494803309440613, |
| "eval_runtime": 6902.9208, |
| "eval_samples_per_second": 2.681, |
| "eval_steps_per_second": 0.67, |
| "eval_wer": 0.314375307908257, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.605176419733074, |
| "grad_norm": 11.420536041259766, |
| "learning_rate": 9.786666666666667e-06, |
| "loss": 0.4096, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.6159831415140217, |
| "grad_norm": 9.05328369140625, |
| "learning_rate": 9.564444444444445e-06, |
| "loss": 0.3917, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.6267898632949694, |
| "grad_norm": 10.281911849975586, |
| "learning_rate": 9.342222222222223e-06, |
| "loss": 0.3965, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.6375965850759172, |
| "grad_norm": 10.587265014648438, |
| "learning_rate": 9.12e-06, |
| "loss": 0.374, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.648403306856865, |
| "grad_norm": 7.721372127532959, |
| "learning_rate": 8.897777777777779e-06, |
| "loss": 0.3799, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.648403306856865, |
| "eval_loss": 0.3397567868232727, |
| "eval_runtime": 7002.8513, |
| "eval_samples_per_second": 2.642, |
| "eval_steps_per_second": 0.661, |
| "eval_wer": 0.2921668139413039, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.6592100286378128, |
| "grad_norm": 6.785597324371338, |
| "learning_rate": 8.675555555555556e-06, |
| "loss": 0.3953, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.6700167504187605, |
| "grad_norm": 9.53781509399414, |
| "learning_rate": 8.453333333333334e-06, |
| "loss": 0.3786, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.6808234721997082, |
| "grad_norm": 8.857239723205566, |
| "learning_rate": 8.231111111111112e-06, |
| "loss": 0.3744, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.691630193980656, |
| "grad_norm": 9.638261795043945, |
| "learning_rate": 8.00888888888889e-06, |
| "loss": 0.3809, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.7024369157616037, |
| "grad_norm": 8.304004669189453, |
| "learning_rate": 7.786666666666666e-06, |
| "loss": 0.3802, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.7024369157616037, |
| "eval_loss": 0.3289755880832672, |
| "eval_runtime": 5885.8991, |
| "eval_samples_per_second": 3.144, |
| "eval_steps_per_second": 0.786, |
| "eval_wer": 0.3044049452998538, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.7132436375425515, |
| "grad_norm": 9.978581428527832, |
| "learning_rate": 7.564444444444446e-06, |
| "loss": 0.3537, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.7240503593234993, |
| "grad_norm": 10.849929809570312, |
| "learning_rate": 7.342222222222223e-06, |
| "loss": 0.3762, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.7348570811044469, |
| "grad_norm": 11.856138229370117, |
| "learning_rate": 7.1200000000000004e-06, |
| "loss": 0.3477, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.7456638028853947, |
| "grad_norm": 10.761491775512695, |
| "learning_rate": 6.897777777777779e-06, |
| "loss": 0.361, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.7564705246663425, |
| "grad_norm": 9.24421501159668, |
| "learning_rate": 6.675555555555556e-06, |
| "loss": 0.3611, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.7564705246663425, |
| "eval_loss": 0.3224972188472748, |
| "eval_runtime": 5632.6127, |
| "eval_samples_per_second": 3.285, |
| "eval_steps_per_second": 0.821, |
| "eval_wer": 0.2823372093932546, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.7672772464472902, |
| "grad_norm": 10.52470874786377, |
| "learning_rate": 6.453333333333334e-06, |
| "loss": 0.3638, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.778083968228238, |
| "grad_norm": 9.080463409423828, |
| "learning_rate": 6.231111111111111e-06, |
| "loss": 0.3532, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.7888906900091858, |
| "grad_norm": 8.789374351501465, |
| "learning_rate": 6.00888888888889e-06, |
| "loss": 0.3592, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.7996974117901334, |
| "grad_norm": 8.97732162475586, |
| "learning_rate": 5.7866666666666674e-06, |
| "loss": 0.3611, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.8105041335710812, |
| "grad_norm": 10.455592155456543, |
| "learning_rate": 5.5644444444444444e-06, |
| "loss": 0.3548, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.8105041335710812, |
| "eval_loss": 0.31678903102874756, |
| "eval_runtime": 3060.9871, |
| "eval_samples_per_second": 6.045, |
| "eval_steps_per_second": 1.511, |
| "eval_wer": 0.27332869353060313, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.821310855352029, |
| "grad_norm": 8.56920051574707, |
| "learning_rate": 5.342222222222223e-06, |
| "loss": 0.3628, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.8321175771329767, |
| "grad_norm": 11.37761402130127, |
| "learning_rate": 5.12e-06, |
| "loss": 0.3353, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.8429242989139245, |
| "grad_norm": 9.396086692810059, |
| "learning_rate": 4.897777777777778e-06, |
| "loss": 0.3704, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.8537310206948722, |
| "grad_norm": 10.0977144241333, |
| "learning_rate": 4.677777777777778e-06, |
| "loss": 0.364, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.8645377424758199, |
| "grad_norm": 8.653088569641113, |
| "learning_rate": 4.455555555555555e-06, |
| "loss": 0.346, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.8645377424758199, |
| "eval_loss": 0.3104597330093384, |
| "eval_runtime": 3053.8514, |
| "eval_samples_per_second": 6.059, |
| "eval_steps_per_second": 1.515, |
| "eval_wer": 0.26601709428444076, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.8753444642567677, |
| "grad_norm": 9.058122634887695, |
| "learning_rate": 4.233333333333334e-06, |
| "loss": 0.3382, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.8861511860377155, |
| "grad_norm": 12.135452270507812, |
| "learning_rate": 4.011111111111111e-06, |
| "loss": 0.3456, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.8969579078186632, |
| "grad_norm": 6.601293563842773, |
| "learning_rate": 3.7888888888888893e-06, |
| "loss": 0.3404, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.9077646295996109, |
| "grad_norm": 9.51930046081543, |
| "learning_rate": 3.566666666666667e-06, |
| "loss": 0.3479, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.9185713513805587, |
| "grad_norm": 7.031350135803223, |
| "learning_rate": 3.3444444444444445e-06, |
| "loss": 0.3547, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.9185713513805587, |
| "eval_loss": 0.3063461184501648, |
| "eval_runtime": 3070.7291, |
| "eval_samples_per_second": 6.026, |
| "eval_steps_per_second": 1.506, |
| "eval_wer": 0.27081068822871623, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.9293780731615064, |
| "grad_norm": 11.10822868347168, |
| "learning_rate": 3.1222222222222228e-06, |
| "loss": 0.3454, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.9401847949424542, |
| "grad_norm": 9.607211112976074, |
| "learning_rate": 2.9e-06, |
| "loss": 0.3319, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.950991516723402, |
| "grad_norm": 10.614663124084473, |
| "learning_rate": 2.677777777777778e-06, |
| "loss": 0.3441, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.9617982385043498, |
| "grad_norm": 8.344138145446777, |
| "learning_rate": 2.455555555555556e-06, |
| "loss": 0.3466, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.9726049602852974, |
| "grad_norm": 11.955930709838867, |
| "learning_rate": 2.2333333333333333e-06, |
| "loss": 0.3211, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.9726049602852974, |
| "eval_loss": 0.30189329385757446, |
| "eval_runtime": 3095.8164, |
| "eval_samples_per_second": 5.977, |
| "eval_steps_per_second": 1.494, |
| "eval_wer": 0.28268910454413937, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.9834116820662452, |
| "grad_norm": 9.438616752624512, |
| "learning_rate": 2.011111111111111e-06, |
| "loss": 0.343, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.994218403847193, |
| "grad_norm": 10.029309272766113, |
| "learning_rate": 1.788888888888889e-06, |
| "loss": 0.3582, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.0050791592370454, |
| "grad_norm": 9.47360610961914, |
| "learning_rate": 1.566666666666667e-06, |
| "loss": 0.3024, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.0158858810179932, |
| "grad_norm": 9.3403959274292, |
| "learning_rate": 1.3444444444444446e-06, |
| "loss": 0.2811, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.026692602798941, |
| "grad_norm": 9.723664283752441, |
| "learning_rate": 1.1222222222222222e-06, |
| "loss": 0.2718, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.026692602798941, |
| "eval_loss": 0.2989746034145355, |
| "eval_runtime": 3189.5179, |
| "eval_samples_per_second": 5.802, |
| "eval_steps_per_second": 1.45, |
| "eval_wer": 0.2659936346077151, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.0374993245798887, |
| "grad_norm": 7.739469051361084, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 0.2765, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.0483060463608365, |
| "grad_norm": 8.379693984985352, |
| "learning_rate": 6.777777777777779e-07, |
| "loss": 0.2872, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.0591127681417842, |
| "grad_norm": 8.849838256835938, |
| "learning_rate": 4.5555555555555563e-07, |
| "loss": 0.2782, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.069919489922732, |
| "grad_norm": 8.006597518920898, |
| "learning_rate": 2.3333333333333336e-07, |
| "loss": 0.2673, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.0807262117036798, |
| "grad_norm": 10.859480857849121, |
| "learning_rate": 1.1111111111111112e-08, |
| "loss": 0.2859, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.0807262117036798, |
| "eval_loss": 0.2979792058467865, |
| "eval_runtime": 3174.7879, |
| "eval_samples_per_second": 5.828, |
| "eval_steps_per_second": 1.457, |
| "eval_wer": 0.2586507557925852, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.0915329334846273, |
| "grad_norm": 8.480154037475586, |
| "learning_rate": 1.7735555555555558e-05, |
| "loss": 0.3044, |
| "step": 10100 |
| }, |
| { |
| "epoch": 1.102339655265575, |
| "grad_norm": 8.65846061706543, |
| "learning_rate": 1.7691111111111113e-05, |
| "loss": 0.3126, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.1131463770465229, |
| "grad_norm": 8.054668426513672, |
| "learning_rate": 1.764711111111111e-05, |
| "loss": 0.315, |
| "step": 10300 |
| }, |
| { |
| "epoch": 1.1239530988274706, |
| "grad_norm": 9.317754745483398, |
| "learning_rate": 1.7602666666666667e-05, |
| "loss": 0.3139, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.1347598206084184, |
| "grad_norm": 6.9345879554748535, |
| "learning_rate": 1.7558222222222222e-05, |
| "loss": 0.2917, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.1347598206084184, |
| "eval_loss": 0.3268890976905823, |
| "eval_runtime": 5010.973, |
| "eval_samples_per_second": 3.693, |
| "eval_steps_per_second": 0.923, |
| "eval_wer": 0.2518552694343872, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.1455665423893662, |
| "grad_norm": 10.198206901550293, |
| "learning_rate": 1.7513777777777777e-05, |
| "loss": 0.3099, |
| "step": 10600 |
| }, |
| { |
| "epoch": 1.156373264170314, |
| "grad_norm": 10.446975708007812, |
| "learning_rate": 1.7469333333333332e-05, |
| "loss": 0.3109, |
| "step": 10700 |
| }, |
| { |
| "epoch": 1.1671799859512617, |
| "grad_norm": 8.202065467834473, |
| "learning_rate": 1.742488888888889e-05, |
| "loss": 0.3033, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.1779867077322095, |
| "grad_norm": 9.471212387084961, |
| "learning_rate": 1.7380444444444446e-05, |
| "loss": 0.3121, |
| "step": 10900 |
| }, |
| { |
| "epoch": 1.1887934295131573, |
| "grad_norm": 9.272053718566895, |
| "learning_rate": 1.7336e-05, |
| "loss": 0.3117, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.1887934295131573, |
| "eval_loss": 0.32140180468559265, |
| "eval_runtime": 4975.5976, |
| "eval_samples_per_second": 3.719, |
| "eval_steps_per_second": 0.93, |
| "eval_wer": 0.2575403310942375, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.199600151294105, |
| "grad_norm": 8.912075996398926, |
| "learning_rate": 1.7291555555555557e-05, |
| "loss": 0.3163, |
| "step": 11100 |
| }, |
| { |
| "epoch": 1.2104068730750526, |
| "grad_norm": 12.307350158691406, |
| "learning_rate": 1.7247111111111112e-05, |
| "loss": 0.3087, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.2212135948560003, |
| "grad_norm": 8.338894844055176, |
| "learning_rate": 1.7202666666666667e-05, |
| "loss": 0.3264, |
| "step": 11300 |
| }, |
| { |
| "epoch": 1.232020316636948, |
| "grad_norm": 10.600968360900879, |
| "learning_rate": 1.7158222222222222e-05, |
| "loss": 0.3144, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.2428270384178959, |
| "grad_norm": 9.626172065734863, |
| "learning_rate": 1.711377777777778e-05, |
| "loss": 0.3204, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.2428270384178959, |
| "eval_loss": 0.3168378174304962, |
| "eval_runtime": 5060.1288, |
| "eval_samples_per_second": 3.657, |
| "eval_steps_per_second": 0.914, |
| "eval_wer": 0.2646173335731434, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.2536337601988436, |
| "grad_norm": 8.312841415405273, |
| "learning_rate": 1.7069333333333336e-05, |
| "loss": 0.2968, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.2644404819797914, |
| "grad_norm": 8.717096328735352, |
| "learning_rate": 1.702488888888889e-05, |
| "loss": 0.3145, |
| "step": 11700 |
| }, |
| { |
| "epoch": 1.2752472037607392, |
| "grad_norm": 7.836411952972412, |
| "learning_rate": 1.6980444444444447e-05, |
| "loss": 0.3144, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.286053925541687, |
| "grad_norm": 7.561498165130615, |
| "learning_rate": 1.6936000000000002e-05, |
| "loss": 0.315, |
| "step": 11900 |
| }, |
| { |
| "epoch": 1.2968606473226347, |
| "grad_norm": 9.085077285766602, |
| "learning_rate": 1.6891555555555557e-05, |
| "loss": 0.2962, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.2968606473226347, |
| "eval_loss": 0.3087281286716461, |
| "eval_runtime": 5226.3325, |
| "eval_samples_per_second": 3.541, |
| "eval_steps_per_second": 0.885, |
| "eval_wer": 0.24104817835610226, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.3076673691035825, |
| "grad_norm": 10.332176208496094, |
| "learning_rate": 1.6847111111111112e-05, |
| "loss": 0.3092, |
| "step": 12100 |
| }, |
| { |
| "epoch": 1.3184740908845303, |
| "grad_norm": 10.159818649291992, |
| "learning_rate": 1.6802666666666668e-05, |
| "loss": 0.301, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.3292808126654778, |
| "grad_norm": 8.238092422485352, |
| "learning_rate": 1.6758222222222226e-05, |
| "loss": 0.307, |
| "step": 12300 |
| }, |
| { |
| "epoch": 1.3400875344464258, |
| "grad_norm": 6.4258317947387695, |
| "learning_rate": 1.671377777777778e-05, |
| "loss": 0.3022, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.3508942562273734, |
| "grad_norm": 10.840997695922852, |
| "learning_rate": 1.6669333333333337e-05, |
| "loss": 0.2961, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.3508942562273734, |
| "eval_loss": 0.3057025372982025, |
| "eval_runtime": 5051.0013, |
| "eval_samples_per_second": 3.663, |
| "eval_steps_per_second": 0.916, |
| "eval_wer": 0.23847543380852212, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.3617009780083211, |
| "grad_norm": 12.025620460510254, |
| "learning_rate": 1.6624888888888892e-05, |
| "loss": 0.2959, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.372507699789269, |
| "grad_norm": 7.729722023010254, |
| "learning_rate": 1.6580444444444447e-05, |
| "loss": 0.2917, |
| "step": 12700 |
| }, |
| { |
| "epoch": 1.3833144215702167, |
| "grad_norm": 10.813538551330566, |
| "learning_rate": 1.6536000000000002e-05, |
| "loss": 0.3176, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.3941211433511644, |
| "grad_norm": 9.306085586547852, |
| "learning_rate": 1.6491555555555558e-05, |
| "loss": 0.3092, |
| "step": 12900 |
| }, |
| { |
| "epoch": 1.4049278651321122, |
| "grad_norm": 8.277687072753906, |
| "learning_rate": 1.6447111111111113e-05, |
| "loss": 0.2887, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.4049278651321122, |
| "eval_loss": 0.298722505569458, |
| "eval_runtime": 5198.4556, |
| "eval_samples_per_second": 3.56, |
| "eval_steps_per_second": 0.89, |
| "eval_wer": 0.22810625669578274, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.41573458691306, |
| "grad_norm": 6.493645191192627, |
| "learning_rate": 1.640311111111111e-05, |
| "loss": 0.2895, |
| "step": 13100 |
| }, |
| { |
| "epoch": 1.4265413086940077, |
| "grad_norm": 12.606965065002441, |
| "learning_rate": 1.6358666666666666e-05, |
| "loss": 0.3072, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.4373480304749555, |
| "grad_norm": 11.579826354980469, |
| "learning_rate": 1.6314222222222225e-05, |
| "loss": 0.3046, |
| "step": 13300 |
| }, |
| { |
| "epoch": 1.448154752255903, |
| "grad_norm": 8.03853702545166, |
| "learning_rate": 1.626977777777778e-05, |
| "loss": 0.2983, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.458961474036851, |
| "grad_norm": 5.478261470794678, |
| "learning_rate": 1.6225333333333335e-05, |
| "loss": 0.2981, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.458961474036851, |
| "eval_loss": 0.29534754157066345, |
| "eval_runtime": 5398.8982, |
| "eval_samples_per_second": 3.427, |
| "eval_steps_per_second": 0.857, |
| "eval_wer": 0.23218042055380478, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.4697681958177986, |
| "grad_norm": 11.128421783447266, |
| "learning_rate": 1.618088888888889e-05, |
| "loss": 0.2863, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.4805749175987464, |
| "grad_norm": 9.209829330444336, |
| "learning_rate": 1.6136444444444446e-05, |
| "loss": 0.2945, |
| "step": 13700 |
| }, |
| { |
| "epoch": 1.4913816393796941, |
| "grad_norm": 7.881196022033691, |
| "learning_rate": 1.6092e-05, |
| "loss": 0.2881, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.502188361160642, |
| "grad_norm": 7.5840630531311035, |
| "learning_rate": 1.6047555555555556e-05, |
| "loss": 0.2881, |
| "step": 13900 |
| }, |
| { |
| "epoch": 1.5129950829415897, |
| "grad_norm": 9.444194793701172, |
| "learning_rate": 1.600311111111111e-05, |
| "loss": 0.2994, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.5129950829415897, |
| "eval_loss": 0.29087820649147034, |
| "eval_runtime": 5347.4253, |
| "eval_samples_per_second": 3.46, |
| "eval_steps_per_second": 0.865, |
| "eval_wer": 0.23219606033828855, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.5238018047225375, |
| "grad_norm": 8.870946884155273, |
| "learning_rate": 1.595866666666667e-05, |
| "loss": 0.2817, |
| "step": 14100 |
| }, |
| { |
| "epoch": 1.5346085265034852, |
| "grad_norm": 8.750350952148438, |
| "learning_rate": 1.5914222222222225e-05, |
| "loss": 0.2748, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.5454152482844328, |
| "grad_norm": 8.175148010253906, |
| "learning_rate": 1.586977777777778e-05, |
| "loss": 0.2941, |
| "step": 14300 |
| }, |
| { |
| "epoch": 1.5562219700653808, |
| "grad_norm": 8.30854320526123, |
| "learning_rate": 1.5825333333333336e-05, |
| "loss": 0.2861, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.5670286918463283, |
| "grad_norm": 6.031162261962891, |
| "learning_rate": 1.578088888888889e-05, |
| "loss": 0.2818, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.5670286918463283, |
| "eval_loss": 0.2847795784473419, |
| "eval_runtime": 4891.0559, |
| "eval_samples_per_second": 3.783, |
| "eval_steps_per_second": 0.946, |
| "eval_wer": 0.21997356876422242, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.5778354136272763, |
| "grad_norm": 9.509627342224121, |
| "learning_rate": 1.5736444444444446e-05, |
| "loss": 0.276, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.5886421354082239, |
| "grad_norm": 9.79079532623291, |
| "learning_rate": 1.5692e-05, |
| "loss": 0.2918, |
| "step": 14700 |
| }, |
| { |
| "epoch": 1.5994488571891716, |
| "grad_norm": 5.595622539520264, |
| "learning_rate": 1.5647555555555557e-05, |
| "loss": 0.282, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.6102555789701194, |
| "grad_norm": 8.00671100616455, |
| "learning_rate": 1.5603111111111112e-05, |
| "loss": 0.2884, |
| "step": 14900 |
| }, |
| { |
| "epoch": 1.6210623007510672, |
| "grad_norm": 9.086261749267578, |
| "learning_rate": 1.5558666666666667e-05, |
| "loss": 0.2851, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.6210623007510672, |
| "eval_loss": 0.2829968333244324, |
| "eval_runtime": 4762.5655, |
| "eval_samples_per_second": 3.885, |
| "eval_steps_per_second": 0.971, |
| "eval_wer": 0.21664229466917945, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.631869022532015, |
| "grad_norm": 7.383193016052246, |
| "learning_rate": 1.5514222222222222e-05, |
| "loss": 0.2811, |
| "step": 15100 |
| }, |
| { |
| "epoch": 1.6426757443129627, |
| "grad_norm": 8.950287818908691, |
| "learning_rate": 1.5469777777777778e-05, |
| "loss": 0.2897, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.6534824660939105, |
| "grad_norm": 9.632174491882324, |
| "learning_rate": 1.5425333333333333e-05, |
| "loss": 0.2622, |
| "step": 15300 |
| }, |
| { |
| "epoch": 1.664289187874858, |
| "grad_norm": 9.395116806030273, |
| "learning_rate": 1.5380888888888888e-05, |
| "loss": 0.2839, |
| "step": 15400 |
| }, |
| { |
| "epoch": 1.675095909655806, |
| "grad_norm": 7.435296535491943, |
| "learning_rate": 1.5336444444444443e-05, |
| "loss": 0.275, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.675095909655806, |
| "eval_loss": 0.2770063579082489, |
| "eval_runtime": 3549.6262, |
| "eval_samples_per_second": 5.213, |
| "eval_steps_per_second": 1.303, |
| "eval_wer": 0.2128887463930747, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.6859026314367536, |
| "grad_norm": 6.951478004455566, |
| "learning_rate": 1.5292e-05, |
| "loss": 0.2573, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.6967093532177016, |
| "grad_norm": 9.954898834228516, |
| "learning_rate": 1.5247555555555557e-05, |
| "loss": 0.2722, |
| "step": 15700 |
| }, |
| { |
| "epoch": 1.707516074998649, |
| "grad_norm": 7.309504985809326, |
| "learning_rate": 1.5203111111111112e-05, |
| "loss": 0.2578, |
| "step": 15800 |
| }, |
| { |
| "epoch": 1.7183227967795969, |
| "grad_norm": 7.3711042404174805, |
| "learning_rate": 1.5158666666666668e-05, |
| "loss": 0.2718, |
| "step": 15900 |
| }, |
| { |
| "epoch": 1.7291295185605446, |
| "grad_norm": 10.445212364196777, |
| "learning_rate": 1.5114222222222223e-05, |
| "loss": 0.2689, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.7291295185605446, |
| "eval_loss": 0.27603384852409363, |
| "eval_runtime": 3570.6477, |
| "eval_samples_per_second": 5.182, |
| "eval_steps_per_second": 1.296, |
| "eval_wer": 0.21191907975508098, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.7399362403414924, |
| "grad_norm": 5.5510573387146, |
| "learning_rate": 1.5069777777777778e-05, |
| "loss": 0.2831, |
| "step": 16100 |
| }, |
| { |
| "epoch": 1.7507429621224402, |
| "grad_norm": 7.619734287261963, |
| "learning_rate": 1.5025333333333333e-05, |
| "loss": 0.2596, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.761549683903388, |
| "grad_norm": 8.503314018249512, |
| "learning_rate": 1.4980888888888889e-05, |
| "loss": 0.2741, |
| "step": 16300 |
| }, |
| { |
| "epoch": 1.7723564056843357, |
| "grad_norm": 7.427919387817383, |
| "learning_rate": 1.4936444444444447e-05, |
| "loss": 0.2826, |
| "step": 16400 |
| }, |
| { |
| "epoch": 1.7831631274652833, |
| "grad_norm": 6.663356781005859, |
| "learning_rate": 1.4892000000000002e-05, |
| "loss": 0.2796, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.7831631274652833, |
| "eval_loss": 0.26777052879333496, |
| "eval_runtime": 3498.6608, |
| "eval_samples_per_second": 5.289, |
| "eval_steps_per_second": 1.322, |
| "eval_wer": 0.20020488117673738, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.7939698492462313, |
| "grad_norm": 7.476739883422852, |
| "learning_rate": 1.4847555555555558e-05, |
| "loss": 0.2507, |
| "step": 16600 |
| }, |
| { |
| "epoch": 1.8047765710271788, |
| "grad_norm": 7.695949077606201, |
| "learning_rate": 1.4803111111111113e-05, |
| "loss": 0.2578, |
| "step": 16700 |
| }, |
| { |
| "epoch": 1.8155832928081268, |
| "grad_norm": 9.240167617797852, |
| "learning_rate": 1.4758666666666668e-05, |
| "loss": 0.2917, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1.8263900145890744, |
| "grad_norm": 8.233548164367676, |
| "learning_rate": 1.4714222222222223e-05, |
| "loss": 0.2587, |
| "step": 16900 |
| }, |
| { |
| "epoch": 1.8371967363700221, |
| "grad_norm": 9.109882354736328, |
| "learning_rate": 1.4669777777777779e-05, |
| "loss": 0.2717, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.8371967363700221, |
| "eval_loss": 0.2652583122253418, |
| "eval_runtime": 3449.0658, |
| "eval_samples_per_second": 5.365, |
| "eval_steps_per_second": 1.341, |
| "eval_wer": 0.20007976290086724, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.84800345815097, |
| "grad_norm": 6.163556098937988, |
| "learning_rate": 1.4625333333333334e-05, |
| "loss": 0.2607, |
| "step": 17100 |
| }, |
| { |
| "epoch": 1.8588101799319177, |
| "grad_norm": 9.27730941772461, |
| "learning_rate": 1.4581333333333334e-05, |
| "loss": 0.2581, |
| "step": 17200 |
| }, |
| { |
| "epoch": 1.8696169017128654, |
| "grad_norm": 8.40946102142334, |
| "learning_rate": 1.4536888888888889e-05, |
| "loss": 0.2577, |
| "step": 17300 |
| }, |
| { |
| "epoch": 1.8804236234938132, |
| "grad_norm": 8.552946090698242, |
| "learning_rate": 1.4492444444444444e-05, |
| "loss": 0.2741, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1.891230345274761, |
| "grad_norm": 6.056818962097168, |
| "learning_rate": 1.4448000000000001e-05, |
| "loss": 0.2661, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.891230345274761, |
| "eval_loss": 0.2625672221183777, |
| "eval_runtime": 3434.9257, |
| "eval_samples_per_second": 5.387, |
| "eval_steps_per_second": 1.347, |
| "eval_wer": 0.20144824404319708, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.9020370670557085, |
| "grad_norm": 7.4529571533203125, |
| "learning_rate": 1.4403555555555556e-05, |
| "loss": 0.2638, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1.9128437888366565, |
| "grad_norm": 6.99146032333374, |
| "learning_rate": 1.4359111111111112e-05, |
| "loss": 0.268, |
| "step": 17700 |
| }, |
| { |
| "epoch": 1.923650510617604, |
| "grad_norm": 6.872374534606934, |
| "learning_rate": 1.4314666666666669e-05, |
| "loss": 0.2469, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1.934457232398552, |
| "grad_norm": 8.856480598449707, |
| "learning_rate": 1.4270222222222224e-05, |
| "loss": 0.2685, |
| "step": 17900 |
| }, |
| { |
| "epoch": 1.9452639541794996, |
| "grad_norm": 9.830224990844727, |
| "learning_rate": 1.4225777777777779e-05, |
| "loss": 0.2612, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.9452639541794996, |
| "eval_loss": 0.2572856843471527, |
| "eval_runtime": 3453.1421, |
| "eval_samples_per_second": 5.359, |
| "eval_steps_per_second": 1.34, |
| "eval_wer": 0.19530180874107556, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.9560706759604474, |
| "grad_norm": 7.073034286499023, |
| "learning_rate": 1.4181333333333334e-05, |
| "loss": 0.2631, |
| "step": 18100 |
| }, |
| { |
| "epoch": 1.9668773977413951, |
| "grad_norm": 8.844318389892578, |
| "learning_rate": 1.4136888888888891e-05, |
| "loss": 0.263, |
| "step": 18200 |
| }, |
| { |
| "epoch": 1.977684119522343, |
| "grad_norm": 7.525826454162598, |
| "learning_rate": 1.4092444444444446e-05, |
| "loss": 0.2643, |
| "step": 18300 |
| }, |
| { |
| "epoch": 1.9884908413032907, |
| "grad_norm": 7.551785945892334, |
| "learning_rate": 1.4048000000000002e-05, |
| "loss": 0.2491, |
| "step": 18400 |
| }, |
| { |
| "epoch": 1.9992975630842384, |
| "grad_norm": 8.573739051818848, |
| "learning_rate": 1.4003555555555557e-05, |
| "loss": 0.2532, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.9992975630842384, |
| "eval_loss": 0.2554282248020172, |
| "eval_runtime": 3437.8823, |
| "eval_samples_per_second": 5.382, |
| "eval_steps_per_second": 1.346, |
| "eval_wer": 0.19536436787901063, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.0101583184740908, |
| "grad_norm": 250204.390625, |
| "learning_rate": 1.3959111111111112e-05, |
| "loss": 0.2066, |
| "step": 18600 |
| }, |
| { |
| "epoch": 2.0209650402550388, |
| "grad_norm": 299758.4375, |
| "learning_rate": 1.3914666666666667e-05, |
| "loss": 0.1958, |
| "step": 18700 |
| }, |
| { |
| "epoch": 2.0317717620359863, |
| "grad_norm": 245057.34375, |
| "learning_rate": 1.3870222222222223e-05, |
| "loss": 0.2, |
| "step": 18800 |
| }, |
| { |
| "epoch": 2.0425784838169343, |
| "grad_norm": 334399.46875, |
| "learning_rate": 1.3825777777777778e-05, |
| "loss": 0.192, |
| "step": 18900 |
| }, |
| { |
| "epoch": 2.053385205597882, |
| "grad_norm": 236247.03125, |
| "learning_rate": 1.3781333333333335e-05, |
| "loss": 0.1993, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.053385205597882, |
| "eval_loss": 0.2527328431606293, |
| "eval_runtime": 2819.5348, |
| "eval_samples_per_second": 6.563, |
| "eval_steps_per_second": 0.82, |
| "eval_wer": 0.19490299423673943, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.06419192737883, |
| "grad_norm": 260942.1875, |
| "learning_rate": 1.373688888888889e-05, |
| "loss": 0.1947, |
| "step": 19100 |
| }, |
| { |
| "epoch": 2.0749986491597774, |
| "grad_norm": 247902.484375, |
| "learning_rate": 1.3692444444444445e-05, |
| "loss": 0.1969, |
| "step": 19200 |
| }, |
| { |
| "epoch": 2.085805370940725, |
| "grad_norm": 229633.375, |
| "learning_rate": 1.3648e-05, |
| "loss": 0.1934, |
| "step": 19300 |
| }, |
| { |
| "epoch": 2.096612092721673, |
| "grad_norm": 214819.4375, |
| "learning_rate": 1.3603555555555556e-05, |
| "loss": 0.1936, |
| "step": 19400 |
| }, |
| { |
| "epoch": 2.1074188145026205, |
| "grad_norm": 225145.53125, |
| "learning_rate": 1.3559111111111113e-05, |
| "loss": 0.2009, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.1074188145026205, |
| "eval_loss": 0.25053051114082336, |
| "eval_runtime": 2775.1844, |
| "eval_samples_per_second": 6.668, |
| "eval_steps_per_second": 0.833, |
| "eval_wer": 0.1897340454648535, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.1182255362835685, |
| "grad_norm": 335387.5, |
| "learning_rate": 1.3514666666666668e-05, |
| "loss": 0.1993, |
| "step": 19600 |
| }, |
| { |
| "epoch": 2.129032258064516, |
| "grad_norm": 276303.59375, |
| "learning_rate": 1.3470222222222223e-05, |
| "loss": 0.1927, |
| "step": 19700 |
| }, |
| { |
| "epoch": 2.139838979845464, |
| "grad_norm": 314395.15625, |
| "learning_rate": 1.342577777777778e-05, |
| "loss": 0.1828, |
| "step": 19800 |
| }, |
| { |
| "epoch": 2.1506457016264116, |
| "grad_norm": 224658.71875, |
| "learning_rate": 1.3381333333333335e-05, |
| "loss": 0.1998, |
| "step": 19900 |
| }, |
| { |
| "epoch": 2.1614524234073595, |
| "grad_norm": 269057.15625, |
| "learning_rate": 1.333688888888889e-05, |
| "loss": 0.1929, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.1614524234073595, |
| "eval_loss": 0.24842554330825806, |
| "eval_runtime": 2752.0603, |
| "eval_samples_per_second": 6.724, |
| "eval_steps_per_second": 0.84, |
| "eval_wer": 0.19266650505556032, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.172259145188307, |
| "grad_norm": 295629.75, |
| "learning_rate": 1.3292444444444446e-05, |
| "loss": 0.1881, |
| "step": 20100 |
| }, |
| { |
| "epoch": 2.1830658669692546, |
| "grad_norm": 253882.09375, |
| "learning_rate": 1.3248000000000001e-05, |
| "loss": 0.1918, |
| "step": 20200 |
| }, |
| { |
| "epoch": 2.1938725887502026, |
| "grad_norm": 220421.125, |
| "learning_rate": 1.3203555555555556e-05, |
| "loss": 0.1944, |
| "step": 20300 |
| }, |
| { |
| "epoch": 2.20467931053115, |
| "grad_norm": 167626.5625, |
| "learning_rate": 1.3159111111111111e-05, |
| "loss": 0.194, |
| "step": 20400 |
| }, |
| { |
| "epoch": 2.215486032312098, |
| "grad_norm": 303924.09375, |
| "learning_rate": 1.3114666666666667e-05, |
| "loss": 0.2011, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.215486032312098, |
| "eval_loss": 0.24535807967185974, |
| "eval_runtime": 2748.7126, |
| "eval_samples_per_second": 6.732, |
| "eval_steps_per_second": 0.841, |
| "eval_wer": 0.1894759890208713, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.2262927540930457, |
| "grad_norm": 202871.59375, |
| "learning_rate": 1.3070222222222223e-05, |
| "loss": 0.2008, |
| "step": 20600 |
| }, |
| { |
| "epoch": 2.2370994758739937, |
| "grad_norm": 263771.4375, |
| "learning_rate": 1.3025777777777779e-05, |
| "loss": 0.1995, |
| "step": 20700 |
| }, |
| { |
| "epoch": 2.2479061976549413, |
| "grad_norm": 214771.859375, |
| "learning_rate": 1.2981333333333334e-05, |
| "loss": 0.1911, |
| "step": 20800 |
| }, |
| { |
| "epoch": 2.2587129194358893, |
| "grad_norm": 286280.8125, |
| "learning_rate": 1.293688888888889e-05, |
| "loss": 0.1939, |
| "step": 20900 |
| }, |
| { |
| "epoch": 2.269519641216837, |
| "grad_norm": 310779.4375, |
| "learning_rate": 1.2892444444444444e-05, |
| "loss": 0.1828, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.269519641216837, |
| "eval_loss": 0.24438022077083588, |
| "eval_runtime": 2733.7874, |
| "eval_samples_per_second": 6.769, |
| "eval_steps_per_second": 0.846, |
| "eval_wer": 0.1892179325768891, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.280326362997785, |
| "grad_norm": 254828.265625, |
| "learning_rate": 1.2848e-05, |
| "loss": 0.1933, |
| "step": 21100 |
| }, |
| { |
| "epoch": 2.2911330847787323, |
| "grad_norm": 239186.609375, |
| "learning_rate": 1.2803555555555557e-05, |
| "loss": 0.2002, |
| "step": 21200 |
| }, |
| { |
| "epoch": 2.3019398065596803, |
| "grad_norm": 239042.359375, |
| "learning_rate": 1.2759111111111113e-05, |
| "loss": 0.1924, |
| "step": 21300 |
| }, |
| { |
| "epoch": 2.312746528340628, |
| "grad_norm": 260667.75, |
| "learning_rate": 1.2714666666666669e-05, |
| "loss": 0.2005, |
| "step": 21400 |
| }, |
| { |
| "epoch": 2.3235532501215754, |
| "grad_norm": 245939.078125, |
| "learning_rate": 1.2670222222222224e-05, |
| "loss": 0.1823, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.3235532501215754, |
| "eval_loss": 0.24371393024921417, |
| "eval_runtime": 2738.6211, |
| "eval_samples_per_second": 6.757, |
| "eval_steps_per_second": 0.845, |
| "eval_wer": 0.18452599723175814, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.3343599719025234, |
| "grad_norm": 315165.90625, |
| "learning_rate": 1.262577777777778e-05, |
| "loss": 0.1958, |
| "step": 21600 |
| }, |
| { |
| "epoch": 2.345166693683471, |
| "grad_norm": 236173.65625, |
| "learning_rate": 1.2581333333333334e-05, |
| "loss": 0.1884, |
| "step": 21700 |
| }, |
| { |
| "epoch": 2.355973415464419, |
| "grad_norm": 176315.5, |
| "learning_rate": 1.253688888888889e-05, |
| "loss": 0.1953, |
| "step": 21800 |
| }, |
| { |
| "epoch": 2.3667801372453665, |
| "grad_norm": 257501.171875, |
| "learning_rate": 1.2492444444444445e-05, |
| "loss": 0.1968, |
| "step": 21900 |
| }, |
| { |
| "epoch": 2.3775868590263145, |
| "grad_norm": 253721.75, |
| "learning_rate": 1.2448e-05, |
| "loss": 0.186, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.3775868590263145, |
| "eval_loss": 0.24074091017246246, |
| "eval_runtime": 2749.0556, |
| "eval_samples_per_second": 6.731, |
| "eval_steps_per_second": 0.841, |
| "eval_wer": 0.18391604563689112, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.388393580807262, |
| "grad_norm": 173236.4375, |
| "learning_rate": 1.2403555555555557e-05, |
| "loss": 0.2004, |
| "step": 22100 |
| }, |
| { |
| "epoch": 2.39920030258821, |
| "grad_norm": 219634.15625, |
| "learning_rate": 1.2359111111111112e-05, |
| "loss": 0.1986, |
| "step": 22200 |
| }, |
| { |
| "epoch": 2.4100070243691576, |
| "grad_norm": 276572.0, |
| "learning_rate": 1.2314666666666667e-05, |
| "loss": 0.1821, |
| "step": 22300 |
| }, |
| { |
| "epoch": 2.420813746150105, |
| "grad_norm": 166507.25, |
| "learning_rate": 1.2270222222222223e-05, |
| "loss": 0.1785, |
| "step": 22400 |
| }, |
| { |
| "epoch": 2.431620467931053, |
| "grad_norm": 264705.6875, |
| "learning_rate": 1.2225777777777778e-05, |
| "loss": 0.1898, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.431620467931053, |
| "eval_loss": 0.23902596533298492, |
| "eval_runtime": 2754.9189, |
| "eval_samples_per_second": 6.717, |
| "eval_steps_per_second": 0.84, |
| "eval_wer": 0.18278216126181782, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.4424271897120007, |
| "grad_norm": 222752.09375, |
| "learning_rate": 1.2181333333333333e-05, |
| "loss": 0.1884, |
| "step": 22600 |
| }, |
| { |
| "epoch": 2.4532339114929487, |
| "grad_norm": 206168.15625, |
| "learning_rate": 1.2136888888888888e-05, |
| "loss": 0.1911, |
| "step": 22700 |
| }, |
| { |
| "epoch": 2.464040633273896, |
| "grad_norm": 255710.078125, |
| "learning_rate": 1.2092444444444444e-05, |
| "loss": 0.1876, |
| "step": 22800 |
| }, |
| { |
| "epoch": 2.474847355054844, |
| "grad_norm": 158791.09375, |
| "learning_rate": 1.2048000000000002e-05, |
| "loss": 0.1962, |
| "step": 22900 |
| }, |
| { |
| "epoch": 2.4856540768357918, |
| "grad_norm": 322706.125, |
| "learning_rate": 1.2003555555555557e-05, |
| "loss": 0.1789, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.4856540768357918, |
| "eval_loss": 0.2363387644290924, |
| "eval_runtime": 2786.1335, |
| "eval_samples_per_second": 6.641, |
| "eval_steps_per_second": 0.83, |
| "eval_wer": 0.17904425277019684, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.4964607986167398, |
| "grad_norm": 262923.15625, |
| "learning_rate": 1.1959111111111113e-05, |
| "loss": 0.1861, |
| "step": 23100 |
| }, |
| { |
| "epoch": 2.5072675203976873, |
| "grad_norm": 216068.8125, |
| "learning_rate": 1.1914666666666668e-05, |
| "loss": 0.1827, |
| "step": 23200 |
| }, |
| { |
| "epoch": 2.518074242178635, |
| "grad_norm": 188912.265625, |
| "learning_rate": 1.1870222222222223e-05, |
| "loss": 0.1827, |
| "step": 23300 |
| }, |
| { |
| "epoch": 2.528880963959583, |
| "grad_norm": 222542.171875, |
| "learning_rate": 1.1825777777777778e-05, |
| "loss": 0.19, |
| "step": 23400 |
| }, |
| { |
| "epoch": 2.539687685740531, |
| "grad_norm": 202856.296875, |
| "learning_rate": 1.1781333333333334e-05, |
| "loss": 0.1765, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.539687685740531, |
| "eval_loss": 0.2353278398513794, |
| "eval_runtime": 2813.1319, |
| "eval_samples_per_second": 6.578, |
| "eval_steps_per_second": 0.822, |
| "eval_wer": 0.17970112371851515, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.5504944075214784, |
| "grad_norm": 238512.4375, |
| "learning_rate": 1.1736888888888889e-05, |
| "loss": 0.1869, |
| "step": 23600 |
| }, |
| { |
| "epoch": 2.561301129302426, |
| "grad_norm": 276903.75, |
| "learning_rate": 1.1692444444444446e-05, |
| "loss": 0.1963, |
| "step": 23700 |
| }, |
| { |
| "epoch": 2.572107851083374, |
| "grad_norm": 329353.65625, |
| "learning_rate": 1.1648000000000001e-05, |
| "loss": 0.1839, |
| "step": 23800 |
| }, |
| { |
| "epoch": 2.5829145728643215, |
| "grad_norm": 330138.1875, |
| "learning_rate": 1.1603555555555556e-05, |
| "loss": 0.1808, |
| "step": 23900 |
| }, |
| { |
| "epoch": 2.5937212946452695, |
| "grad_norm": 279948.03125, |
| "learning_rate": 1.1559111111111111e-05, |
| "loss": 0.1808, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.5937212946452695, |
| "eval_loss": 0.23201151192188263, |
| "eval_runtime": 3188.0735, |
| "eval_samples_per_second": 5.804, |
| "eval_steps_per_second": 0.726, |
| "eval_wer": 0.17965420436506385, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.604528016426217, |
| "grad_norm": 216210.78125, |
| "learning_rate": 1.1514666666666667e-05, |
| "loss": 0.1831, |
| "step": 24100 |
| }, |
| { |
| "epoch": 2.615334738207165, |
| "grad_norm": 282255.96875, |
| "learning_rate": 1.1470222222222222e-05, |
| "loss": 0.1826, |
| "step": 24200 |
| }, |
| { |
| "epoch": 2.6261414599881125, |
| "grad_norm": 260694.609375, |
| "learning_rate": 1.1425777777777777e-05, |
| "loss": 0.189, |
| "step": 24300 |
| }, |
| { |
| "epoch": 2.6369481817690605, |
| "grad_norm": 212081.46875, |
| "learning_rate": 1.1381333333333336e-05, |
| "loss": 0.1859, |
| "step": 24400 |
| }, |
| { |
| "epoch": 2.647754903550008, |
| "grad_norm": 217712.515625, |
| "learning_rate": 1.1336888888888891e-05, |
| "loss": 0.1771, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.647754903550008, |
| "eval_loss": 0.22906863689422607, |
| "eval_runtime": 3261.465, |
| "eval_samples_per_second": 5.674, |
| "eval_steps_per_second": 0.709, |
| "eval_wer": 0.17766795173562508, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.6585616253309556, |
| "grad_norm": 298979.4375, |
| "learning_rate": 1.1292444444444446e-05, |
| "loss": 0.1884, |
| "step": 24600 |
| }, |
| { |
| "epoch": 2.6693683471119036, |
| "grad_norm": 220226.265625, |
| "learning_rate": 1.1248000000000001e-05, |
| "loss": 0.1938, |
| "step": 24700 |
| }, |
| { |
| "epoch": 2.6801750688928516, |
| "grad_norm": 208763.828125, |
| "learning_rate": 1.1203555555555557e-05, |
| "loss": 0.1764, |
| "step": 24800 |
| }, |
| { |
| "epoch": 2.690981790673799, |
| "grad_norm": 141644.71875, |
| "learning_rate": 1.1159111111111112e-05, |
| "loss": 0.1797, |
| "step": 24900 |
| }, |
| { |
| "epoch": 2.7017885124547467, |
| "grad_norm": 232710.078125, |
| "learning_rate": 1.1114666666666667e-05, |
| "loss": 0.183, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.7017885124547467, |
| "eval_loss": 0.22756607830524445, |
| "eval_runtime": 2974.7193, |
| "eval_samples_per_second": 6.22, |
| "eval_steps_per_second": 0.778, |
| "eval_wer": 0.17883311567966592, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.7125952342356947, |
| "grad_norm": 348240.0, |
| "learning_rate": 1.1070222222222222e-05, |
| "loss": 0.1888, |
| "step": 25100 |
| }, |
| { |
| "epoch": 2.7234019560166423, |
| "grad_norm": 219766.265625, |
| "learning_rate": 1.102577777777778e-05, |
| "loss": 0.1839, |
| "step": 25200 |
| }, |
| { |
| "epoch": 2.7342086777975902, |
| "grad_norm": 240334.796875, |
| "learning_rate": 1.0981333333333334e-05, |
| "loss": 0.1802, |
| "step": 25300 |
| }, |
| { |
| "epoch": 2.745015399578538, |
| "grad_norm": 226478.640625, |
| "learning_rate": 1.093688888888889e-05, |
| "loss": 0.18, |
| "step": 25400 |
| }, |
| { |
| "epoch": 2.7558221213594853, |
| "grad_norm": 248438.5, |
| "learning_rate": 1.0892444444444445e-05, |
| "loss": 0.178, |
| "step": 25500 |
| }, |
| { |
| "epoch": 2.7558221213594853, |
| "eval_loss": 0.22500741481781006, |
| "eval_runtime": 2728.4865, |
| "eval_samples_per_second": 6.782, |
| "eval_steps_per_second": 0.848, |
| "eval_wer": 0.17539236309323658, |
| "step": 25500 |
| }, |
| { |
| "epoch": 2.7666288431404333, |
| "grad_norm": 229927.078125, |
| "learning_rate": 1.0848e-05, |
| "loss": 0.191, |
| "step": 25600 |
| }, |
| { |
| "epoch": 2.7774355649213813, |
| "grad_norm": 300608.5, |
| "learning_rate": 1.0803555555555555e-05, |
| "loss": 0.1982, |
| "step": 25700 |
| }, |
| { |
| "epoch": 2.788242286702329, |
| "grad_norm": 293853.34375, |
| "learning_rate": 1.075911111111111e-05, |
| "loss": 0.1747, |
| "step": 25800 |
| }, |
| { |
| "epoch": 2.7990490084832764, |
| "grad_norm": 210441.359375, |
| "learning_rate": 1.0714666666666666e-05, |
| "loss": 0.1808, |
| "step": 25900 |
| }, |
| { |
| "epoch": 2.8098557302642244, |
| "grad_norm": 175316.484375, |
| "learning_rate": 1.0670222222222224e-05, |
| "loss": 0.1829, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.8098557302642244, |
| "eval_loss": 0.2231319695711136, |
| "eval_runtime": 2895.6838, |
| "eval_samples_per_second": 6.39, |
| "eval_steps_per_second": 0.799, |
| "eval_wer": 0.17550966147686484, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.820662452045172, |
| "grad_norm": 162894.484375, |
| "learning_rate": 1.062577777777778e-05, |
| "loss": 0.1772, |
| "step": 26100 |
| }, |
| { |
| "epoch": 2.83146917382612, |
| "grad_norm": 399308.75, |
| "learning_rate": 1.0581333333333335e-05, |
| "loss": 0.183, |
| "step": 26200 |
| }, |
| { |
| "epoch": 2.8422758956070675, |
| "grad_norm": 237068.109375, |
| "learning_rate": 1.053688888888889e-05, |
| "loss": 0.1867, |
| "step": 26300 |
| }, |
| { |
| "epoch": 2.8530826173880155, |
| "grad_norm": 274371.4375, |
| "learning_rate": 1.0492444444444445e-05, |
| "loss": 0.189, |
| "step": 26400 |
| }, |
| { |
| "epoch": 2.863889339168963, |
| "grad_norm": 218764.46875, |
| "learning_rate": 1.0448e-05, |
| "loss": 0.183, |
| "step": 26500 |
| }, |
| { |
| "epoch": 2.863889339168963, |
| "eval_loss": 0.2216072529554367, |
| "eval_runtime": 2986.971, |
| "eval_samples_per_second": 6.195, |
| "eval_steps_per_second": 0.774, |
| "eval_wer": 0.17901297320122928, |
| "step": 26500 |
| }, |
| { |
| "epoch": 2.874696060949911, |
| "grad_norm": 200908.984375, |
| "learning_rate": 1.0403555555555556e-05, |
| "loss": 0.1714, |
| "step": 26600 |
| }, |
| { |
| "epoch": 2.8855027827308586, |
| "grad_norm": 247789.078125, |
| "learning_rate": 1.0359111111111111e-05, |
| "loss": 0.1775, |
| "step": 26700 |
| }, |
| { |
| "epoch": 2.896309504511806, |
| "grad_norm": 237418.84375, |
| "learning_rate": 1.0314666666666668e-05, |
| "loss": 0.1695, |
| "step": 26800 |
| }, |
| { |
| "epoch": 2.907116226292754, |
| "grad_norm": 253792.0, |
| "learning_rate": 1.0270222222222223e-05, |
| "loss": 0.1806, |
| "step": 26900 |
| }, |
| { |
| "epoch": 2.917922948073702, |
| "grad_norm": 205986.421875, |
| "learning_rate": 1.0225777777777778e-05, |
| "loss": 0.1812, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.917922948073702, |
| "eval_loss": 0.21981683373451233, |
| "eval_runtime": 2808.8113, |
| "eval_samples_per_second": 6.588, |
| "eval_steps_per_second": 0.823, |
| "eval_wer": 0.1729369169292847, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.9287296698546497, |
| "grad_norm": 194869.09375, |
| "learning_rate": 1.0181333333333334e-05, |
| "loss": 0.1805, |
| "step": 27100 |
| }, |
| { |
| "epoch": 2.939536391635597, |
| "grad_norm": 223082.046875, |
| "learning_rate": 1.0136888888888889e-05, |
| "loss": 0.1797, |
| "step": 27200 |
| }, |
| { |
| "epoch": 2.950343113416545, |
| "grad_norm": 186045.15625, |
| "learning_rate": 1.0092444444444444e-05, |
| "loss": 0.1809, |
| "step": 27300 |
| }, |
| { |
| "epoch": 2.9611498351974928, |
| "grad_norm": 307597.75, |
| "learning_rate": 1.0048e-05, |
| "loss": 0.1752, |
| "step": 27400 |
| }, |
| { |
| "epoch": 2.9719565569784407, |
| "grad_norm": 256261.78125, |
| "learning_rate": 1.0003555555555558e-05, |
| "loss": 0.1697, |
| "step": 27500 |
| }, |
| { |
| "epoch": 2.9719565569784407, |
| "eval_loss": 0.21857349574565887, |
| "eval_runtime": 2775.0986, |
| "eval_samples_per_second": 6.668, |
| "eval_steps_per_second": 0.833, |
| "eval_wer": 0.17267104059306063, |
| "step": 27500 |
| }, |
| { |
| "epoch": 2.9827632787593883, |
| "grad_norm": 140632.125, |
| "learning_rate": 9.959111111111111e-06, |
| "loss": 0.1773, |
| "step": 27600 |
| }, |
| { |
| "epoch": 2.993570000540336, |
| "grad_norm": 238205.53125, |
| "learning_rate": 9.914666666666668e-06, |
| "loss": 0.1799, |
| "step": 27700 |
| }, |
| { |
| "epoch": 3.0044307559301884, |
| "grad_norm": 153436.546875, |
| "learning_rate": 9.870222222222224e-06, |
| "loss": 0.1618, |
| "step": 27800 |
| }, |
| { |
| "epoch": 3.0152374777111364, |
| "grad_norm": 179979.90625, |
| "learning_rate": 9.825777777777779e-06, |
| "loss": 0.1261, |
| "step": 27900 |
| }, |
| { |
| "epoch": 3.026044199492084, |
| "grad_norm": 237624.703125, |
| "learning_rate": 9.781333333333334e-06, |
| "loss": 0.1317, |
| "step": 28000 |
| }, |
| { |
| "epoch": 3.026044199492084, |
| "eval_loss": 0.21732862293720245, |
| "eval_runtime": 2726.3235, |
| "eval_samples_per_second": 6.787, |
| "eval_steps_per_second": 0.848, |
| "eval_wer": 0.17278051908444703, |
| "step": 28000 |
| }, |
| { |
| "epoch": 3.036850921273032, |
| "grad_norm": 245701.53125, |
| "learning_rate": 9.73688888888889e-06, |
| "loss": 0.1383, |
| "step": 28100 |
| }, |
| { |
| "epoch": 3.0476576430539795, |
| "grad_norm": 94987.546875, |
| "learning_rate": 9.692444444444446e-06, |
| "loss": 0.1317, |
| "step": 28200 |
| }, |
| { |
| "epoch": 3.0584643648349275, |
| "grad_norm": 204097.234375, |
| "learning_rate": 9.648000000000001e-06, |
| "loss": 0.1349, |
| "step": 28300 |
| }, |
| { |
| "epoch": 3.069271086615875, |
| "grad_norm": 142045.625, |
| "learning_rate": 9.603555555555557e-06, |
| "loss": 0.1308, |
| "step": 28400 |
| }, |
| { |
| "epoch": 3.080077808396823, |
| "grad_norm": 192114.71875, |
| "learning_rate": 9.559111111111112e-06, |
| "loss": 0.1298, |
| "step": 28500 |
| }, |
| { |
| "epoch": 3.080077808396823, |
| "eval_loss": 0.21591147780418396, |
| "eval_runtime": 2949.2136, |
| "eval_samples_per_second": 6.274, |
| "eval_steps_per_second": 0.784, |
| "eval_wer": 0.16897223156264907, |
| "step": 28500 |
| }, |
| { |
| "epoch": 3.0908845301777705, |
| "grad_norm": 84732.625, |
| "learning_rate": 9.514666666666667e-06, |
| "loss": 0.1182, |
| "step": 28600 |
| }, |
| { |
| "epoch": 3.1016912519587185, |
| "grad_norm": 176436.625, |
| "learning_rate": 9.470222222222222e-06, |
| "loss": 0.1275, |
| "step": 28700 |
| }, |
| { |
| "epoch": 3.112497973739666, |
| "grad_norm": 216536.46875, |
| "learning_rate": 9.425777777777778e-06, |
| "loss": 0.1346, |
| "step": 28800 |
| }, |
| { |
| "epoch": 3.1233046955206136, |
| "grad_norm": 227679.296875, |
| "learning_rate": 9.381333333333335e-06, |
| "loss": 0.1255, |
| "step": 28900 |
| }, |
| { |
| "epoch": 3.1341114173015616, |
| "grad_norm": 172016.46875, |
| "learning_rate": 9.33688888888889e-06, |
| "loss": 0.1272, |
| "step": 29000 |
| }, |
| { |
| "epoch": 3.1341114173015616, |
| "eval_loss": 0.21611380577087402, |
| "eval_runtime": 2875.8761, |
| "eval_samples_per_second": 6.434, |
| "eval_steps_per_second": 0.804, |
| "eval_wer": 0.16858123695055482, |
| "step": 29000 |
| }, |
| { |
| "epoch": 3.144918139082509, |
| "grad_norm": 135140.53125, |
| "learning_rate": 9.292444444444445e-06, |
| "loss": 0.1327, |
| "step": 29100 |
| }, |
| { |
| "epoch": 3.155724860863457, |
| "grad_norm": 245684.453125, |
| "learning_rate": 9.248e-06, |
| "loss": 0.1359, |
| "step": 29200 |
| }, |
| { |
| "epoch": 3.1665315826444047, |
| "grad_norm": 184601.390625, |
| "learning_rate": 9.203555555555557e-06, |
| "loss": 0.131, |
| "step": 29300 |
| }, |
| { |
| "epoch": 3.1773383044253527, |
| "grad_norm": 157958.8125, |
| "learning_rate": 9.159111111111112e-06, |
| "loss": 0.129, |
| "step": 29400 |
| }, |
| { |
| "epoch": 3.1881450262063002, |
| "grad_norm": 174601.015625, |
| "learning_rate": 9.114666666666668e-06, |
| "loss": 0.1389, |
| "step": 29500 |
| }, |
| { |
| "epoch": 3.1881450262063002, |
| "eval_loss": 0.21482256054878235, |
| "eval_runtime": 2995.308, |
| "eval_samples_per_second": 6.178, |
| "eval_steps_per_second": 0.772, |
| "eval_wer": 0.17062222882568678, |
| "step": 29500 |
| }, |
| { |
| "epoch": 3.1989517479872482, |
| "grad_norm": 366836.96875, |
| "learning_rate": 9.070222222222223e-06, |
| "loss": 0.1375, |
| "step": 29600 |
| }, |
| { |
| "epoch": 3.209758469768196, |
| "grad_norm": 217750.703125, |
| "learning_rate": 9.025777777777778e-06, |
| "loss": 0.1393, |
| "step": 29700 |
| }, |
| { |
| "epoch": 3.2205651915491433, |
| "grad_norm": 427549.6875, |
| "learning_rate": 8.981333333333333e-06, |
| "loss": 0.1211, |
| "step": 29800 |
| }, |
| { |
| "epoch": 3.2313719133300913, |
| "grad_norm": 139396.234375, |
| "learning_rate": 8.93688888888889e-06, |
| "loss": 0.1278, |
| "step": 29900 |
| }, |
| { |
| "epoch": 3.242178635111039, |
| "grad_norm": 165933.109375, |
| "learning_rate": 8.892444444444445e-06, |
| "loss": 0.1379, |
| "step": 30000 |
| }, |
| { |
| "epoch": 3.242178635111039, |
| "eval_loss": 0.2138548046350479, |
| "eval_runtime": 3184.9662, |
| "eval_samples_per_second": 5.81, |
| "eval_steps_per_second": 0.726, |
| "eval_wer": 0.16925374768335694, |
| "step": 30000 |
| }, |
| { |
| "epoch": 3.252985356891987, |
| "grad_norm": 289672.5, |
| "learning_rate": 8.848e-06, |
| "loss": 0.1407, |
| "step": 30100 |
| }, |
| { |
| "epoch": 3.2637920786729344, |
| "grad_norm": 145135.828125, |
| "learning_rate": 8.803555555555556e-06, |
| "loss": 0.1319, |
| "step": 30200 |
| }, |
| { |
| "epoch": 3.2745988004538824, |
| "grad_norm": 184688.515625, |
| "learning_rate": 8.759111111111111e-06, |
| "loss": 0.1308, |
| "step": 30300 |
| }, |
| { |
| "epoch": 3.28540552223483, |
| "grad_norm": 201466.8125, |
| "learning_rate": 8.714666666666666e-06, |
| "loss": 0.1322, |
| "step": 30400 |
| }, |
| { |
| "epoch": 3.296212244015778, |
| "grad_norm": 194344.265625, |
| "learning_rate": 8.670222222222223e-06, |
| "loss": 0.1312, |
| "step": 30500 |
| }, |
| { |
| "epoch": 3.296212244015778, |
| "eval_loss": 0.21327927708625793, |
| "eval_runtime": 3054.7075, |
| "eval_samples_per_second": 6.058, |
| "eval_steps_per_second": 0.757, |
| "eval_wer": 0.1713885782653915, |
| "step": 30500 |
| }, |
| { |
| "epoch": 3.3070189657967255, |
| "grad_norm": 204070.3125, |
| "learning_rate": 8.625777777777779e-06, |
| "loss": 0.1222, |
| "step": 30600 |
| }, |
| { |
| "epoch": 3.3178256875776735, |
| "grad_norm": 258432.828125, |
| "learning_rate": 8.581333333333334e-06, |
| "loss": 0.1277, |
| "step": 30700 |
| }, |
| { |
| "epoch": 3.328632409358621, |
| "grad_norm": 251962.796875, |
| "learning_rate": 8.53688888888889e-06, |
| "loss": 0.1265, |
| "step": 30800 |
| }, |
| { |
| "epoch": 3.339439131139569, |
| "grad_norm": 226804.796875, |
| "learning_rate": 8.492444444444446e-06, |
| "loss": 0.1292, |
| "step": 30900 |
| }, |
| { |
| "epoch": 3.3502458529205166, |
| "grad_norm": 191085.5625, |
| "learning_rate": 8.448000000000001e-06, |
| "loss": 0.1212, |
| "step": 31000 |
| }, |
| { |
| "epoch": 3.3502458529205166, |
| "eval_loss": 0.21162918210029602, |
| "eval_runtime": 3559.2497, |
| "eval_samples_per_second": 5.199, |
| "eval_steps_per_second": 0.65, |
| "eval_wer": 0.17063786861017055, |
| "step": 31000 |
| }, |
| { |
| "epoch": 3.361052574701464, |
| "grad_norm": 186760.765625, |
| "learning_rate": 8.403555555555556e-06, |
| "loss": 0.1281, |
| "step": 31100 |
| }, |
| { |
| "epoch": 3.371859296482412, |
| "grad_norm": 234347.296875, |
| "learning_rate": 8.359111111111112e-06, |
| "loss": 0.1338, |
| "step": 31200 |
| }, |
| { |
| "epoch": 3.3826660182633597, |
| "grad_norm": 223205.296875, |
| "learning_rate": 8.314666666666667e-06, |
| "loss": 0.1365, |
| "step": 31300 |
| }, |
| { |
| "epoch": 3.3934727400443077, |
| "grad_norm": 227340.140625, |
| "learning_rate": 8.270222222222222e-06, |
| "loss": 0.1263, |
| "step": 31400 |
| }, |
| { |
| "epoch": 3.404279461825255, |
| "grad_norm": 330275.71875, |
| "learning_rate": 8.225777777777777e-06, |
| "loss": 0.1265, |
| "step": 31500 |
| }, |
| { |
| "epoch": 3.404279461825255, |
| "eval_loss": 0.21032755076885223, |
| "eval_runtime": 3585.0413, |
| "eval_samples_per_second": 5.161, |
| "eval_steps_per_second": 0.645, |
| "eval_wer": 0.16658716442887417, |
| "step": 31500 |
| }, |
| { |
| "epoch": 3.415086183606203, |
| "grad_norm": 202428.109375, |
| "learning_rate": 8.181333333333334e-06, |
| "loss": 0.1302, |
| "step": 31600 |
| }, |
| { |
| "epoch": 3.4258929053871507, |
| "grad_norm": 139416.578125, |
| "learning_rate": 8.13688888888889e-06, |
| "loss": 0.126, |
| "step": 31700 |
| }, |
| { |
| "epoch": 3.4366996271680987, |
| "grad_norm": 151699.484375, |
| "learning_rate": 8.092444444444445e-06, |
| "loss": 0.1273, |
| "step": 31800 |
| }, |
| { |
| "epoch": 3.4475063489490463, |
| "grad_norm": 127831.4609375, |
| "learning_rate": 8.048e-06, |
| "loss": 0.1347, |
| "step": 31900 |
| }, |
| { |
| "epoch": 3.458313070729994, |
| "grad_norm": 225660.5625, |
| "learning_rate": 8.003555555555557e-06, |
| "loss": 0.1261, |
| "step": 32000 |
| }, |
| { |
| "epoch": 3.458313070729994, |
| "eval_loss": 0.20947901904582977, |
| "eval_runtime": 3432.2646, |
| "eval_samples_per_second": 5.391, |
| "eval_steps_per_second": 0.674, |
| "eval_wer": 0.17062222882568678, |
| "step": 32000 |
| }, |
| { |
| "epoch": 3.469119792510942, |
| "grad_norm": 5.6989545822143555, |
| "learning_rate": 7.959111111111112e-06, |
| "loss": 0.1326, |
| "step": 32100 |
| }, |
| { |
| "epoch": 3.4799265142918894, |
| "grad_norm": 5.444442272186279, |
| "learning_rate": 7.914666666666667e-06, |
| "loss": 0.1391, |
| "step": 32200 |
| }, |
| { |
| "epoch": 3.4907332360728374, |
| "grad_norm": 5.629488945007324, |
| "learning_rate": 7.870222222222222e-06, |
| "loss": 0.134, |
| "step": 32300 |
| }, |
| { |
| "epoch": 3.501539957853785, |
| "grad_norm": 9.071991920471191, |
| "learning_rate": 7.82577777777778e-06, |
| "loss": 0.1345, |
| "step": 32400 |
| }, |
| { |
| "epoch": 3.512346679634733, |
| "grad_norm": 8.57175064086914, |
| "learning_rate": 7.781333333333335e-06, |
| "loss": 0.127, |
| "step": 32500 |
| }, |
| { |
| "epoch": 3.512346679634733, |
| "eval_loss": 0.20792409777641296, |
| "eval_runtime": 3540.4171, |
| "eval_samples_per_second": 5.227, |
| "eval_steps_per_second": 1.307, |
| "eval_wer": 0.16730659451512758, |
| "step": 32500 |
| }, |
| { |
| "epoch": 3.5231534014156805, |
| "grad_norm": 7.08974552154541, |
| "learning_rate": 7.73688888888889e-06, |
| "loss": 0.1325, |
| "step": 32600 |
| }, |
| { |
| "epoch": 3.5339601231966284, |
| "grad_norm": 5.81699275970459, |
| "learning_rate": 7.692444444444445e-06, |
| "loss": 0.1337, |
| "step": 32700 |
| }, |
| { |
| "epoch": 3.544766844977576, |
| "grad_norm": 8.329341888427734, |
| "learning_rate": 7.648444444444445e-06, |
| "loss": 0.1295, |
| "step": 32800 |
| }, |
| { |
| "epoch": 3.5555735667585235, |
| "grad_norm": 9.390524864196777, |
| "learning_rate": 7.604e-06, |
| "loss": 0.1308, |
| "step": 32900 |
| }, |
| { |
| "epoch": 3.5663802885394715, |
| "grad_norm": 7.076089859008789, |
| "learning_rate": 7.5595555555555565e-06, |
| "loss": 0.1346, |
| "step": 33000 |
| }, |
| { |
| "epoch": 3.5663802885394715, |
| "eval_loss": 0.20613741874694824, |
| "eval_runtime": 3517.8125, |
| "eval_samples_per_second": 5.26, |
| "eval_steps_per_second": 1.315, |
| "eval_wer": 0.1682840810453632, |
| "step": 33000 |
| }, |
| { |
| "epoch": 3.5771870103204195, |
| "grad_norm": 6.325503826141357, |
| "learning_rate": 7.515111111111112e-06, |
| "loss": 0.14, |
| "step": 33100 |
| }, |
| { |
| "epoch": 3.587993732101367, |
| "grad_norm": 7.135802745819092, |
| "learning_rate": 7.470666666666667e-06, |
| "loss": 0.1292, |
| "step": 33200 |
| }, |
| { |
| "epoch": 3.5988004538823146, |
| "grad_norm": 5.185844898223877, |
| "learning_rate": 7.426222222222222e-06, |
| "loss": 0.1375, |
| "step": 33300 |
| }, |
| { |
| "epoch": 3.6096071756632626, |
| "grad_norm": 7.516198635101318, |
| "learning_rate": 7.381777777777779e-06, |
| "loss": 0.1287, |
| "step": 33400 |
| }, |
| { |
| "epoch": 3.62041389744421, |
| "grad_norm": 6.644392490386963, |
| "learning_rate": 7.337333333333334e-06, |
| "loss": 0.1283, |
| "step": 33500 |
| }, |
| { |
| "epoch": 3.62041389744421, |
| "eval_loss": 0.20456381142139435, |
| "eval_runtime": 3492.6992, |
| "eval_samples_per_second": 5.298, |
| "eval_steps_per_second": 1.324, |
| "eval_wer": 0.16519522360981867, |
| "step": 33500 |
| }, |
| { |
| "epoch": 3.631220619225158, |
| "grad_norm": 7.233791351318359, |
| "learning_rate": 7.2928888888888895e-06, |
| "loss": 0.1373, |
| "step": 33600 |
| }, |
| { |
| "epoch": 3.6420273410061057, |
| "grad_norm": 5.153164386749268, |
| "learning_rate": 7.248444444444445e-06, |
| "loss": 0.1368, |
| "step": 33700 |
| }, |
| { |
| "epoch": 3.6528340627870537, |
| "grad_norm": 6.022379398345947, |
| "learning_rate": 7.204000000000001e-06, |
| "loss": 0.1377, |
| "step": 33800 |
| }, |
| { |
| "epoch": 3.6636407845680012, |
| "grad_norm": 7.33857536315918, |
| "learning_rate": 7.159555555555556e-06, |
| "loss": 0.1343, |
| "step": 33900 |
| }, |
| { |
| "epoch": 3.6744475063489492, |
| "grad_norm": 6.584815502166748, |
| "learning_rate": 7.115111111111111e-06, |
| "loss": 0.1244, |
| "step": 34000 |
| }, |
| { |
| "epoch": 3.6744475063489492, |
| "eval_loss": 0.20398086309432983, |
| "eval_runtime": 3469.4987, |
| "eval_samples_per_second": 5.333, |
| "eval_steps_per_second": 1.333, |
| "eval_wer": 0.168432658997959, |
| "step": 34000 |
| }, |
| { |
| "epoch": 3.685254228129897, |
| "grad_norm": 6.824450492858887, |
| "learning_rate": 7.0706666666666665e-06, |
| "loss": 0.1255, |
| "step": 34100 |
| }, |
| { |
| "epoch": 3.6960609499108443, |
| "grad_norm": 5.974719047546387, |
| "learning_rate": 7.0262222222222234e-06, |
| "loss": 0.1302, |
| "step": 34200 |
| }, |
| { |
| "epoch": 3.7068676716917923, |
| "grad_norm": 6.354248523712158, |
| "learning_rate": 6.981777777777779e-06, |
| "loss": 0.1245, |
| "step": 34300 |
| }, |
| { |
| "epoch": 3.7176743934727403, |
| "grad_norm": 5.096312999725342, |
| "learning_rate": 6.937333333333334e-06, |
| "loss": 0.1369, |
| "step": 34400 |
| }, |
| { |
| "epoch": 3.728481115253688, |
| "grad_norm": 5.251643180847168, |
| "learning_rate": 6.892888888888889e-06, |
| "loss": 0.1207, |
| "step": 34500 |
| }, |
| { |
| "epoch": 3.728481115253688, |
| "eval_loss": 0.20263046026229858, |
| "eval_runtime": 3278.7844, |
| "eval_samples_per_second": 5.644, |
| "eval_steps_per_second": 1.411, |
| "eval_wer": 0.16479640910548252, |
| "step": 34500 |
| }, |
| { |
| "epoch": 3.7392878370346354, |
| "grad_norm": 7.432106971740723, |
| "learning_rate": 6.848444444444445e-06, |
| "loss": 0.1337, |
| "step": 34600 |
| }, |
| { |
| "epoch": 3.7500945588155834, |
| "grad_norm": 4.93491268157959, |
| "learning_rate": 6.804e-06, |
| "loss": 0.1257, |
| "step": 34700 |
| }, |
| { |
| "epoch": 3.760901280596531, |
| "grad_norm": 6.047059059143066, |
| "learning_rate": 6.760000000000001e-06, |
| "loss": 0.1206, |
| "step": 34800 |
| }, |
| { |
| "epoch": 3.771708002377479, |
| "grad_norm": 6.542396545410156, |
| "learning_rate": 6.7155555555555566e-06, |
| "loss": 0.1271, |
| "step": 34900 |
| }, |
| { |
| "epoch": 3.7825147241584265, |
| "grad_norm": 5.706289768218994, |
| "learning_rate": 6.671111111111112e-06, |
| "loss": 0.1239, |
| "step": 35000 |
| }, |
| { |
| "epoch": 3.7825147241584265, |
| "eval_loss": 0.20222991704940796, |
| "eval_runtime": 3317.3157, |
| "eval_samples_per_second": 5.578, |
| "eval_steps_per_second": 1.395, |
| "eval_wer": 0.16217674520445108, |
| "step": 35000 |
| }, |
| { |
| "epoch": 3.793321445939374, |
| "grad_norm": 7.686710834503174, |
| "learning_rate": 6.626666666666667e-06, |
| "loss": 0.1298, |
| "step": 35100 |
| }, |
| { |
| "epoch": 3.804128167720322, |
| "grad_norm": 7.791649341583252, |
| "learning_rate": 6.582222222222223e-06, |
| "loss": 0.1276, |
| "step": 35200 |
| }, |
| { |
| "epoch": 3.81493488950127, |
| "grad_norm": 5.835906505584717, |
| "learning_rate": 6.537777777777778e-06, |
| "loss": 0.1244, |
| "step": 35300 |
| }, |
| { |
| "epoch": 3.8257416112822176, |
| "grad_norm": 8.771524429321289, |
| "learning_rate": 6.4933333333333336e-06, |
| "loss": 0.1316, |
| "step": 35400 |
| }, |
| { |
| "epoch": 3.836548333063165, |
| "grad_norm": 7.212921619415283, |
| "learning_rate": 6.448888888888889e-06, |
| "loss": 0.1308, |
| "step": 35500 |
| }, |
| { |
| "epoch": 3.836548333063165, |
| "eval_loss": 0.19980210065841675, |
| "eval_runtime": 3260.0277, |
| "eval_samples_per_second": 5.676, |
| "eval_steps_per_second": 1.419, |
| "eval_wer": 0.16239570218722385, |
| "step": 35500 |
| }, |
| { |
| "epoch": 3.847355054844113, |
| "grad_norm": 5.587503910064697, |
| "learning_rate": 6.404444444444446e-06, |
| "loss": 0.1317, |
| "step": 35600 |
| }, |
| { |
| "epoch": 3.8581617766250607, |
| "grad_norm": 8.271342277526855, |
| "learning_rate": 6.360444444444445e-06, |
| "loss": 0.1316, |
| "step": 35700 |
| }, |
| { |
| "epoch": 3.8689684984060086, |
| "grad_norm": 6.529531955718994, |
| "learning_rate": 6.316000000000001e-06, |
| "loss": 0.1257, |
| "step": 35800 |
| }, |
| { |
| "epoch": 3.879775220186956, |
| "grad_norm": 5.135924816131592, |
| "learning_rate": 6.271555555555556e-06, |
| "loss": 0.1309, |
| "step": 35900 |
| }, |
| { |
| "epoch": 3.890581941967904, |
| "grad_norm": 4.710616588592529, |
| "learning_rate": 6.2271111111111115e-06, |
| "loss": 0.1272, |
| "step": 36000 |
| }, |
| { |
| "epoch": 3.890581941967904, |
| "eval_loss": 0.19968418776988983, |
| "eval_runtime": 3207.2086, |
| "eval_samples_per_second": 5.77, |
| "eval_steps_per_second": 1.442, |
| "eval_wer": 0.16488242792014327, |
| "step": 36000 |
| }, |
| { |
| "epoch": 3.9013886637488517, |
| "grad_norm": 9.243986129760742, |
| "learning_rate": 6.182666666666667e-06, |
| "loss": 0.1301, |
| "step": 36100 |
| }, |
| { |
| "epoch": 3.9121953855297997, |
| "grad_norm": 6.014384746551514, |
| "learning_rate": 6.138222222222223e-06, |
| "loss": 0.1261, |
| "step": 36200 |
| }, |
| { |
| "epoch": 3.9230021073107473, |
| "grad_norm": 5.140791893005371, |
| "learning_rate": 6.093777777777779e-06, |
| "loss": 0.1219, |
| "step": 36300 |
| }, |
| { |
| "epoch": 3.933808829091695, |
| "grad_norm": 4.738403797149658, |
| "learning_rate": 6.049333333333334e-06, |
| "loss": 0.1244, |
| "step": 36400 |
| }, |
| { |
| "epoch": 3.944615550872643, |
| "grad_norm": 4.881937026977539, |
| "learning_rate": 6.004888888888889e-06, |
| "loss": 0.1328, |
| "step": 36500 |
| }, |
| { |
| "epoch": 3.944615550872643, |
| "eval_loss": 0.19876359403133392, |
| "eval_runtime": 3246.3553, |
| "eval_samples_per_second": 5.7, |
| "eval_steps_per_second": 1.425, |
| "eval_wer": 0.1647103902908218, |
| "step": 36500 |
| }, |
| { |
| "epoch": 3.955422272653591, |
| "grad_norm": 9.000225067138672, |
| "learning_rate": 5.960444444444445e-06, |
| "loss": 0.1238, |
| "step": 36600 |
| }, |
| { |
| "epoch": 3.9662289944345384, |
| "grad_norm": 4.82861328125, |
| "learning_rate": 5.916000000000001e-06, |
| "loss": 0.1268, |
| "step": 36700 |
| }, |
| { |
| "epoch": 3.977035716215486, |
| "grad_norm": 4.868381023406982, |
| "learning_rate": 5.871555555555556e-06, |
| "loss": 0.1262, |
| "step": 36800 |
| }, |
| { |
| "epoch": 3.987842437996434, |
| "grad_norm": 10.557507514953613, |
| "learning_rate": 5.827111111111111e-06, |
| "loss": 0.134, |
| "step": 36900 |
| }, |
| { |
| "epoch": 3.9986491597773814, |
| "grad_norm": 6.664336204528809, |
| "learning_rate": 5.782666666666667e-06, |
| "loss": 0.1256, |
| "step": 37000 |
| }, |
| { |
| "epoch": 3.9986491597773814, |
| "eval_loss": 0.19714923202991486, |
| "eval_runtime": 3358.3537, |
| "eval_samples_per_second": 5.51, |
| "eval_steps_per_second": 1.377, |
| "eval_wer": 0.1653203418856888, |
| "step": 37000 |
| }, |
| { |
| "epoch": 4.009509915167234, |
| "grad_norm": 6.824110507965088, |
| "learning_rate": 5.738222222222223e-06, |
| "loss": 0.095, |
| "step": 37100 |
| }, |
| { |
| "epoch": 4.0203166369481815, |
| "grad_norm": 6.033724308013916, |
| "learning_rate": 5.6937777777777785e-06, |
| "loss": 0.0924, |
| "step": 37200 |
| }, |
| { |
| "epoch": 4.031123358729129, |
| "grad_norm": 8.13729476928711, |
| "learning_rate": 5.649333333333334e-06, |
| "loss": 0.1009, |
| "step": 37300 |
| }, |
| { |
| "epoch": 4.0419300805100775, |
| "grad_norm": 7.620489597320557, |
| "learning_rate": 5.60488888888889e-06, |
| "loss": 0.0994, |
| "step": 37400 |
| }, |
| { |
| "epoch": 4.052736802291025, |
| "grad_norm": 5.248648166656494, |
| "learning_rate": 5.560444444444445e-06, |
| "loss": 0.0953, |
| "step": 37500 |
| }, |
| { |
| "epoch": 4.052736802291025, |
| "eval_loss": 0.19735735654830933, |
| "eval_runtime": 3623.9668, |
| "eval_samples_per_second": 5.106, |
| "eval_steps_per_second": 1.277, |
| "eval_wer": 0.1603938097733013, |
| "step": 37500 |
| }, |
| { |
| "epoch": 4.063543524071973, |
| "grad_norm": 5.766596794128418, |
| "learning_rate": 5.516e-06, |
| "loss": 0.0947, |
| "step": 37600 |
| }, |
| { |
| "epoch": 4.07435024585292, |
| "grad_norm": 4.6069231033325195, |
| "learning_rate": 5.4715555555555554e-06, |
| "loss": 0.0957, |
| "step": 37700 |
| }, |
| { |
| "epoch": 4.085156967633869, |
| "grad_norm": 5.434189319610596, |
| "learning_rate": 5.4271111111111115e-06, |
| "loss": 0.0992, |
| "step": 37800 |
| }, |
| { |
| "epoch": 4.095963689414816, |
| "grad_norm": 2.9330973625183105, |
| "learning_rate": 5.382666666666667e-06, |
| "loss": 0.0943, |
| "step": 37900 |
| }, |
| { |
| "epoch": 4.106770411195764, |
| "grad_norm": 4.690386772155762, |
| "learning_rate": 5.338222222222223e-06, |
| "loss": 0.0946, |
| "step": 38000 |
| }, |
| { |
| "epoch": 4.106770411195764, |
| "eval_loss": 0.19785380363464355, |
| "eval_runtime": 3496.2995, |
| "eval_samples_per_second": 5.292, |
| "eval_steps_per_second": 1.323, |
| "eval_wer": 0.16248954089412648, |
| "step": 38000 |
| }, |
| { |
| "epoch": 4.117577132976711, |
| "grad_norm": 5.448973655700684, |
| "learning_rate": 5.293777777777778e-06, |
| "loss": 0.0928, |
| "step": 38100 |
| }, |
| { |
| "epoch": 4.12838385475766, |
| "grad_norm": 6.168562889099121, |
| "learning_rate": 5.249333333333334e-06, |
| "loss": 0.0977, |
| "step": 38200 |
| }, |
| { |
| "epoch": 4.139190576538607, |
| "grad_norm": 6.410705089569092, |
| "learning_rate": 5.204888888888889e-06, |
| "loss": 0.0954, |
| "step": 38300 |
| }, |
| { |
| "epoch": 4.149997298319555, |
| "grad_norm": 6.880079746246338, |
| "learning_rate": 5.160444444444445e-06, |
| "loss": 0.0982, |
| "step": 38400 |
| }, |
| { |
| "epoch": 4.160804020100502, |
| "grad_norm": 4.514254570007324, |
| "learning_rate": 5.116000000000001e-06, |
| "loss": 0.0933, |
| "step": 38500 |
| }, |
| { |
| "epoch": 4.160804020100502, |
| "eval_loss": 0.19639329612255096, |
| "eval_runtime": 3441.5727, |
| "eval_samples_per_second": 5.377, |
| "eval_steps_per_second": 1.344, |
| "eval_wer": 0.16088646298454007, |
| "step": 38500 |
| }, |
| { |
| "epoch": 4.17161074188145, |
| "grad_norm": 5.864041328430176, |
| "learning_rate": 5.071555555555556e-06, |
| "loss": 0.0946, |
| "step": 38600 |
| }, |
| { |
| "epoch": 4.182417463662398, |
| "grad_norm": 5.394285678863525, |
| "learning_rate": 5.027111111111111e-06, |
| "loss": 0.098, |
| "step": 38700 |
| }, |
| { |
| "epoch": 4.193224185443346, |
| "grad_norm": 5.213718891143799, |
| "learning_rate": 4.982666666666667e-06, |
| "loss": 0.0948, |
| "step": 38800 |
| }, |
| { |
| "epoch": 4.204030907224293, |
| "grad_norm": 3.7767562866210938, |
| "learning_rate": 4.938222222222222e-06, |
| "loss": 0.099, |
| "step": 38900 |
| }, |
| { |
| "epoch": 4.214837629005241, |
| "grad_norm": 8.426477432250977, |
| "learning_rate": 4.8937777777777785e-06, |
| "loss": 0.1025, |
| "step": 39000 |
| }, |
| { |
| "epoch": 4.214837629005241, |
| "eval_loss": 0.19617383182048798, |
| "eval_runtime": 3407.3151, |
| "eval_samples_per_second": 5.431, |
| "eval_steps_per_second": 1.358, |
| "eval_wer": 0.16335754893297572, |
| "step": 39000 |
| }, |
| { |
| "epoch": 4.225644350786189, |
| "grad_norm": 6.989054203033447, |
| "learning_rate": 4.849333333333334e-06, |
| "loss": 0.0922, |
| "step": 39100 |
| }, |
| { |
| "epoch": 4.236451072567137, |
| "grad_norm": 7.133777618408203, |
| "learning_rate": 4.80488888888889e-06, |
| "loss": 0.0971, |
| "step": 39200 |
| }, |
| { |
| "epoch": 4.2472577943480845, |
| "grad_norm": 5.765046119689941, |
| "learning_rate": 4.760444444444445e-06, |
| "loss": 0.0919, |
| "step": 39300 |
| }, |
| { |
| "epoch": 4.258064516129032, |
| "grad_norm": 5.539346694946289, |
| "learning_rate": 4.716e-06, |
| "loss": 0.096, |
| "step": 39400 |
| }, |
| { |
| "epoch": 4.2688712379099805, |
| "grad_norm": 6.360944747924805, |
| "learning_rate": 4.6715555555555555e-06, |
| "loss": 0.1002, |
| "step": 39500 |
| }, |
| { |
| "epoch": 4.2688712379099805, |
| "eval_loss": 0.1956612765789032, |
| "eval_runtime": 3418.2414, |
| "eval_samples_per_second": 5.413, |
| "eval_steps_per_second": 1.353, |
| "eval_wer": 0.16324025054934743, |
| "step": 39500 |
| }, |
| { |
| "epoch": 4.279677959690928, |
| "grad_norm": 6.404228210449219, |
| "learning_rate": 4.6271111111111116e-06, |
| "loss": 0.1, |
| "step": 39600 |
| }, |
| { |
| "epoch": 4.290484681471876, |
| "grad_norm": 5.106740474700928, |
| "learning_rate": 4.582666666666667e-06, |
| "loss": 0.1012, |
| "step": 39700 |
| }, |
| { |
| "epoch": 4.301291403252823, |
| "grad_norm": 7.007205009460449, |
| "learning_rate": 4.538222222222223e-06, |
| "loss": 0.103, |
| "step": 39800 |
| }, |
| { |
| "epoch": 4.312098125033771, |
| "grad_norm": 7.048201084136963, |
| "learning_rate": 4.493777777777778e-06, |
| "loss": 0.0937, |
| "step": 39900 |
| }, |
| { |
| "epoch": 4.322904846814719, |
| "grad_norm": 5.77664852142334, |
| "learning_rate": 4.449333333333334e-06, |
| "loss": 0.0976, |
| "step": 40000 |
| }, |
| { |
| "epoch": 4.322904846814719, |
| "eval_loss": 0.1948525756597519, |
| "eval_runtime": 3400.5876, |
| "eval_samples_per_second": 5.441, |
| "eval_steps_per_second": 1.36, |
| "eval_wer": 0.16208290649754847, |
| "step": 40000 |
| }, |
| { |
| "epoch": 4.333711568595667, |
| "grad_norm": 7.6147918701171875, |
| "learning_rate": 4.404888888888889e-06, |
| "loss": 0.094, |
| "step": 40100 |
| }, |
| { |
| "epoch": 4.344518290376614, |
| "grad_norm": 2.64292049407959, |
| "learning_rate": 4.360444444444445e-06, |
| "loss": 0.0987, |
| "step": 40200 |
| }, |
| { |
| "epoch": 4.355325012157562, |
| "grad_norm": 4.686502456665039, |
| "learning_rate": 4.316e-06, |
| "loss": 0.0985, |
| "step": 40300 |
| }, |
| { |
| "epoch": 4.366131733938509, |
| "grad_norm": 6.833780288696289, |
| "learning_rate": 4.271555555555556e-06, |
| "loss": 0.0866, |
| "step": 40400 |
| }, |
| { |
| "epoch": 4.376938455719458, |
| "grad_norm": 3.5335001945495605, |
| "learning_rate": 4.227111111111111e-06, |
| "loss": 0.0983, |
| "step": 40500 |
| }, |
| { |
| "epoch": 4.376938455719458, |
| "eval_loss": 0.19357165694236755, |
| "eval_runtime": 3407.3735, |
| "eval_samples_per_second": 5.431, |
| "eval_steps_per_second": 1.358, |
| "eval_wer": 0.1605111081569296, |
| "step": 40500 |
| }, |
| { |
| "epoch": 4.387745177500405, |
| "grad_norm": 6.259922981262207, |
| "learning_rate": 4.183111111111112e-06, |
| "loss": 0.0941, |
| "step": 40600 |
| }, |
| { |
| "epoch": 4.398551899281353, |
| "grad_norm": 3.454116106033325, |
| "learning_rate": 4.138666666666667e-06, |
| "loss": 0.0931, |
| "step": 40700 |
| }, |
| { |
| "epoch": 4.4093586210623, |
| "grad_norm": 5.945463180541992, |
| "learning_rate": 4.0942222222222225e-06, |
| "loss": 0.0973, |
| "step": 40800 |
| }, |
| { |
| "epoch": 4.420165342843249, |
| "grad_norm": 6.534635066986084, |
| "learning_rate": 4.049777777777778e-06, |
| "loss": 0.0941, |
| "step": 40900 |
| }, |
| { |
| "epoch": 4.430972064624196, |
| "grad_norm": 6.562328815460205, |
| "learning_rate": 4.005333333333334e-06, |
| "loss": 0.0995, |
| "step": 41000 |
| }, |
| { |
| "epoch": 4.430972064624196, |
| "eval_loss": 0.1934925764799118, |
| "eval_runtime": 3377.5938, |
| "eval_samples_per_second": 5.478, |
| "eval_steps_per_second": 1.37, |
| "eval_wer": 0.1607769844931537, |
| "step": 41000 |
| }, |
| { |
| "epoch": 4.441778786405144, |
| "grad_norm": 4.691957473754883, |
| "learning_rate": 3.960888888888889e-06, |
| "loss": 0.0907, |
| "step": 41100 |
| }, |
| { |
| "epoch": 4.4525855081860914, |
| "grad_norm": 7.0055646896362305, |
| "learning_rate": 3.916444444444445e-06, |
| "loss": 0.0984, |
| "step": 41200 |
| }, |
| { |
| "epoch": 4.46339222996704, |
| "grad_norm": 4.472280979156494, |
| "learning_rate": 3.872e-06, |
| "loss": 0.0954, |
| "step": 41300 |
| }, |
| { |
| "epoch": 4.474198951747987, |
| "grad_norm": 7.1928277015686035, |
| "learning_rate": 3.8275555555555564e-06, |
| "loss": 0.0989, |
| "step": 41400 |
| }, |
| { |
| "epoch": 4.485005673528935, |
| "grad_norm": 6.270200729370117, |
| "learning_rate": 3.7831111111111112e-06, |
| "loss": 0.0877, |
| "step": 41500 |
| }, |
| { |
| "epoch": 4.485005673528935, |
| "eval_loss": 0.19299255311489105, |
| "eval_runtime": 3437.135, |
| "eval_samples_per_second": 5.384, |
| "eval_steps_per_second": 1.346, |
| "eval_wer": 0.16083954363108877, |
| "step": 41500 |
| }, |
| { |
| "epoch": 4.4958123953098825, |
| "grad_norm": 4.897816181182861, |
| "learning_rate": 3.7386666666666673e-06, |
| "loss": 0.0878, |
| "step": 41600 |
| }, |
| { |
| "epoch": 4.50661911709083, |
| "grad_norm": 8.050458908081055, |
| "learning_rate": 3.6942222222222226e-06, |
| "loss": 0.0973, |
| "step": 41700 |
| }, |
| { |
| "epoch": 4.5174258388717785, |
| "grad_norm": 6.027979373931885, |
| "learning_rate": 3.649777777777778e-06, |
| "loss": 0.0967, |
| "step": 41800 |
| }, |
| { |
| "epoch": 4.528232560652726, |
| "grad_norm": 7.365042686462402, |
| "learning_rate": 3.6053333333333334e-06, |
| "loss": 0.0965, |
| "step": 41900 |
| }, |
| { |
| "epoch": 4.539039282433674, |
| "grad_norm": 6.175548076629639, |
| "learning_rate": 3.560888888888889e-06, |
| "loss": 0.0985, |
| "step": 42000 |
| }, |
| { |
| "epoch": 4.539039282433674, |
| "eval_loss": 0.19284753501415253, |
| "eval_runtime": 3493.7006, |
| "eval_samples_per_second": 5.296, |
| "eval_steps_per_second": 1.324, |
| "eval_wer": 0.1633028096872825, |
| "step": 42000 |
| }, |
| { |
| "epoch": 4.549846004214621, |
| "grad_norm": 3.834568738937378, |
| "learning_rate": 3.5164444444444447e-06, |
| "loss": 0.0946, |
| "step": 42100 |
| }, |
| { |
| "epoch": 4.56065272599557, |
| "grad_norm": 5.286928176879883, |
| "learning_rate": 3.4720000000000004e-06, |
| "loss": 0.0984, |
| "step": 42200 |
| }, |
| { |
| "epoch": 4.571459447776517, |
| "grad_norm": 4.384335994720459, |
| "learning_rate": 3.4275555555555556e-06, |
| "loss": 0.0859, |
| "step": 42300 |
| }, |
| { |
| "epoch": 4.582266169557465, |
| "grad_norm": 6.872238636016846, |
| "learning_rate": 3.3831111111111113e-06, |
| "loss": 0.1006, |
| "step": 42400 |
| }, |
| { |
| "epoch": 4.593072891338412, |
| "grad_norm": 5.914927005767822, |
| "learning_rate": 3.338666666666667e-06, |
| "loss": 0.0887, |
| "step": 42500 |
| }, |
| { |
| "epoch": 4.593072891338412, |
| "eval_loss": 0.19172823429107666, |
| "eval_runtime": 3473.6538, |
| "eval_samples_per_second": 5.327, |
| "eval_steps_per_second": 1.332, |
| "eval_wer": 0.1616449925320029, |
| "step": 42500 |
| }, |
| { |
| "epoch": 4.603879613119361, |
| "grad_norm": 2.9186370372772217, |
| "learning_rate": 3.2942222222222226e-06, |
| "loss": 0.0929, |
| "step": 42600 |
| }, |
| { |
| "epoch": 4.614686334900308, |
| "grad_norm": 7.138390064239502, |
| "learning_rate": 3.249777777777778e-06, |
| "loss": 0.1014, |
| "step": 42700 |
| }, |
| { |
| "epoch": 4.625493056681256, |
| "grad_norm": 5.447595596313477, |
| "learning_rate": 3.2053333333333334e-06, |
| "loss": 0.0942, |
| "step": 42800 |
| }, |
| { |
| "epoch": 4.636299778462203, |
| "grad_norm": 5.631972789764404, |
| "learning_rate": 3.160888888888889e-06, |
| "loss": 0.0928, |
| "step": 42900 |
| }, |
| { |
| "epoch": 4.647106500243151, |
| "grad_norm": 6.55267333984375, |
| "learning_rate": 3.1164444444444448e-06, |
| "loss": 0.0909, |
| "step": 43000 |
| }, |
| { |
| "epoch": 4.647106500243151, |
| "eval_loss": 0.1917807012796402, |
| "eval_runtime": 3531.9979, |
| "eval_samples_per_second": 5.239, |
| "eval_steps_per_second": 1.31, |
| "eval_wer": 0.1603547103120919, |
| "step": 43000 |
| }, |
| { |
| "epoch": 4.657913222024099, |
| "grad_norm": 5.544260501861572, |
| "learning_rate": 3.0728888888888893e-06, |
| "loss": 0.0952, |
| "step": 43100 |
| }, |
| { |
| "epoch": 4.668719943805047, |
| "grad_norm": 6.711052417755127, |
| "learning_rate": 3.028444444444445e-06, |
| "loss": 0.0893, |
| "step": 43200 |
| }, |
| { |
| "epoch": 4.679526665585994, |
| "grad_norm": 5.341217994689941, |
| "learning_rate": 2.984e-06, |
| "loss": 0.0894, |
| "step": 43300 |
| }, |
| { |
| "epoch": 4.690333387366942, |
| "grad_norm": 6.262836933135986, |
| "learning_rate": 2.9395555555555562e-06, |
| "loss": 0.0933, |
| "step": 43400 |
| }, |
| { |
| "epoch": 4.7011401091478895, |
| "grad_norm": 5.641539096832275, |
| "learning_rate": 2.8951111111111114e-06, |
| "loss": 0.0908, |
| "step": 43500 |
| }, |
| { |
| "epoch": 4.7011401091478895, |
| "eval_loss": 0.19096316397190094, |
| "eval_runtime": 3434.1139, |
| "eval_samples_per_second": 5.388, |
| "eval_steps_per_second": 1.347, |
| "eval_wer": 0.1592677452904699, |
| "step": 43500 |
| }, |
| { |
| "epoch": 4.711946830928838, |
| "grad_norm": 5.90605354309082, |
| "learning_rate": 2.850666666666667e-06, |
| "loss": 0.0955, |
| "step": 43600 |
| }, |
| { |
| "epoch": 4.7227535527097855, |
| "grad_norm": 7.320056438446045, |
| "learning_rate": 2.8062222222222223e-06, |
| "loss": 0.0862, |
| "step": 43700 |
| }, |
| { |
| "epoch": 4.733560274490733, |
| "grad_norm": 7.9307026863098145, |
| "learning_rate": 2.7617777777777784e-06, |
| "loss": 0.0968, |
| "step": 43800 |
| }, |
| { |
| "epoch": 4.7443669962716815, |
| "grad_norm": 3.4138481616973877, |
| "learning_rate": 2.7173333333333336e-06, |
| "loss": 0.095, |
| "step": 43900 |
| }, |
| { |
| "epoch": 4.755173718052629, |
| "grad_norm": 5.649805068969727, |
| "learning_rate": 2.6728888888888893e-06, |
| "loss": 0.0931, |
| "step": 44000 |
| }, |
| { |
| "epoch": 4.755173718052629, |
| "eval_loss": 0.19024226069450378, |
| "eval_runtime": 3527.1779, |
| "eval_samples_per_second": 5.246, |
| "eval_steps_per_second": 1.312, |
| "eval_wer": 0.1579227238248657, |
| "step": 44000 |
| }, |
| { |
| "epoch": 4.7659804398335766, |
| "grad_norm": 5.45325231552124, |
| "learning_rate": 2.6284444444444445e-06, |
| "loss": 0.0897, |
| "step": 44100 |
| }, |
| { |
| "epoch": 4.776787161614524, |
| "grad_norm": 4.2618408203125, |
| "learning_rate": 2.5840000000000006e-06, |
| "loss": 0.0921, |
| "step": 44200 |
| }, |
| { |
| "epoch": 4.787593883395472, |
| "grad_norm": 6.174403190612793, |
| "learning_rate": 2.539555555555556e-06, |
| "loss": 0.0954, |
| "step": 44300 |
| }, |
| { |
| "epoch": 4.79840060517642, |
| "grad_norm": 4.927825927734375, |
| "learning_rate": 2.495111111111111e-06, |
| "loss": 0.0891, |
| "step": 44400 |
| }, |
| { |
| "epoch": 4.809207326957368, |
| "grad_norm": 4.512660503387451, |
| "learning_rate": 2.4506666666666667e-06, |
| "loss": 0.0938, |
| "step": 44500 |
| }, |
| { |
| "epoch": 4.809207326957368, |
| "eval_loss": 0.18895868957042694, |
| "eval_runtime": 4201.807, |
| "eval_samples_per_second": 4.404, |
| "eval_steps_per_second": 1.101, |
| "eval_wer": 0.158172960376606, |
| "step": 44500 |
| }, |
| { |
| "epoch": 4.820014048738315, |
| "grad_norm": 5.174787998199463, |
| "learning_rate": 2.4062222222222223e-06, |
| "loss": 0.0925, |
| "step": 44600 |
| }, |
| { |
| "epoch": 4.830820770519263, |
| "grad_norm": 6.067021369934082, |
| "learning_rate": 2.361777777777778e-06, |
| "loss": 0.0917, |
| "step": 44700 |
| }, |
| { |
| "epoch": 4.84162749230021, |
| "grad_norm": 7.221127033233643, |
| "learning_rate": 2.3173333333333336e-06, |
| "loss": 0.1004, |
| "step": 44800 |
| }, |
| { |
| "epoch": 4.852434214081159, |
| "grad_norm": 6.763819217681885, |
| "learning_rate": 2.2728888888888893e-06, |
| "loss": 0.0934, |
| "step": 44900 |
| }, |
| { |
| "epoch": 4.863240935862106, |
| "grad_norm": 6.756659030914307, |
| "learning_rate": 2.228444444444445e-06, |
| "loss": 0.0925, |
| "step": 45000 |
| }, |
| { |
| "epoch": 4.863240935862106, |
| "eval_loss": 0.18888415396213531, |
| "eval_runtime": 4200.6639, |
| "eval_samples_per_second": 4.405, |
| "eval_steps_per_second": 1.101, |
| "eval_wer": 0.15939286356634005, |
| "step": 45000 |
| }, |
| { |
| "epoch": 4.874047657643054, |
| "grad_norm": 5.333858966827393, |
| "learning_rate": 2.184e-06, |
| "loss": 0.0937, |
| "step": 45100 |
| }, |
| { |
| "epoch": 4.884854379424001, |
| "grad_norm": 5.267432689666748, |
| "learning_rate": 2.139555555555556e-06, |
| "loss": 0.1001, |
| "step": 45200 |
| }, |
| { |
| "epoch": 4.89566110120495, |
| "grad_norm": 6.311913967132568, |
| "learning_rate": 2.0951111111111115e-06, |
| "loss": 0.09, |
| "step": 45300 |
| }, |
| { |
| "epoch": 4.906467822985897, |
| "grad_norm": 8.137591361999512, |
| "learning_rate": 2.0506666666666667e-06, |
| "loss": 0.0918, |
| "step": 45400 |
| }, |
| { |
| "epoch": 4.917274544766845, |
| "grad_norm": 8.778470993041992, |
| "learning_rate": 2.0062222222222224e-06, |
| "loss": 0.0943, |
| "step": 45500 |
| }, |
| { |
| "epoch": 4.917274544766845, |
| "eval_loss": 0.18823765218257904, |
| "eval_runtime": 4132.8456, |
| "eval_samples_per_second": 4.477, |
| "eval_steps_per_second": 1.119, |
| "eval_wer": 0.15777414587226987, |
| "step": 45500 |
| }, |
| { |
| "epoch": 4.928081266547792, |
| "grad_norm": 6.091644287109375, |
| "learning_rate": 1.961777777777778e-06, |
| "loss": 0.0917, |
| "step": 45600 |
| }, |
| { |
| "epoch": 4.938887988328741, |
| "grad_norm": 4.328017234802246, |
| "learning_rate": 1.9173333333333337e-06, |
| "loss": 0.0931, |
| "step": 45700 |
| }, |
| { |
| "epoch": 4.949694710109688, |
| "grad_norm": 6.655763626098633, |
| "learning_rate": 1.8728888888888891e-06, |
| "loss": 0.0909, |
| "step": 45800 |
| }, |
| { |
| "epoch": 4.960501431890636, |
| "grad_norm": 4.749749183654785, |
| "learning_rate": 1.8284444444444445e-06, |
| "loss": 0.0899, |
| "step": 45900 |
| }, |
| { |
| "epoch": 4.9713081536715835, |
| "grad_norm": 5.6424455642700195, |
| "learning_rate": 1.7840000000000002e-06, |
| "loss": 0.0918, |
| "step": 46000 |
| }, |
| { |
| "epoch": 4.9713081536715835, |
| "eval_loss": 0.18786819279193878, |
| "eval_runtime": 4143.7949, |
| "eval_samples_per_second": 4.465, |
| "eval_steps_per_second": 1.116, |
| "eval_wer": 0.1583919173593788, |
| "step": 46000 |
| }, |
| { |
| "epoch": 4.982114875452531, |
| "grad_norm": 5.470109462738037, |
| "learning_rate": 1.74e-06, |
| "loss": 0.0994, |
| "step": 46100 |
| }, |
| { |
| "epoch": 4.9929215972334795, |
| "grad_norm": 5.171388149261475, |
| "learning_rate": 1.6955555555555555e-06, |
| "loss": 0.0943, |
| "step": 46200 |
| }, |
| { |
| "epoch": 5.0036742854055225, |
| "grad_norm": 6.812433242797852, |
| "learning_rate": 1.6511111111111112e-06, |
| "loss": 0.0892, |
| "step": 46300 |
| }, |
| { |
| "epoch": 5.01448100718647, |
| "grad_norm": 3.1371068954467773, |
| "learning_rate": 1.606666666666667e-06, |
| "loss": 0.0758, |
| "step": 46400 |
| }, |
| { |
| "epoch": 5.025287728967418, |
| "grad_norm": 3.814182758331299, |
| "learning_rate": 1.5622222222222225e-06, |
| "loss": 0.0791, |
| "step": 46500 |
| }, |
| { |
| "epoch": 5.025287728967418, |
| "eval_loss": 0.18768277764320374, |
| "eval_runtime": 4051.6008, |
| "eval_samples_per_second": 4.567, |
| "eval_steps_per_second": 1.142, |
| "eval_wer": 0.15599121044112013, |
| "step": 46500 |
| }, |
| { |
| "epoch": 5.036094450748365, |
| "grad_norm": 5.319475173950195, |
| "learning_rate": 1.5177777777777781e-06, |
| "loss": 0.0759, |
| "step": 46600 |
| }, |
| { |
| "epoch": 5.046901172529314, |
| "grad_norm": 7.009033679962158, |
| "learning_rate": 1.4733333333333336e-06, |
| "loss": 0.0757, |
| "step": 46700 |
| }, |
| { |
| "epoch": 5.057707894310261, |
| "grad_norm": 4.960785865783691, |
| "learning_rate": 1.4288888888888892e-06, |
| "loss": 0.0835, |
| "step": 46800 |
| }, |
| { |
| "epoch": 5.068514616091209, |
| "grad_norm": 3.7821145057678223, |
| "learning_rate": 1.3844444444444446e-06, |
| "loss": 0.0775, |
| "step": 46900 |
| }, |
| { |
| "epoch": 5.079321337872156, |
| "grad_norm": 5.3354668617248535, |
| "learning_rate": 1.34e-06, |
| "loss": 0.077, |
| "step": 47000 |
| }, |
| { |
| "epoch": 5.079321337872156, |
| "eval_loss": 0.18772615492343903, |
| "eval_runtime": 4041.9986, |
| "eval_samples_per_second": 4.578, |
| "eval_steps_per_second": 1.144, |
| "eval_wer": 0.15872035283353794, |
| "step": 47000 |
| }, |
| { |
| "epoch": 5.090128059653105, |
| "grad_norm": 4.3733320236206055, |
| "learning_rate": 1.2955555555555557e-06, |
| "loss": 0.0809, |
| "step": 47100 |
| }, |
| { |
| "epoch": 5.100934781434052, |
| "grad_norm": 2.8010852336883545, |
| "learning_rate": 1.2511111111111112e-06, |
| "loss": 0.0733, |
| "step": 47200 |
| }, |
| { |
| "epoch": 5.111741503215, |
| "grad_norm": 10.156082153320312, |
| "learning_rate": 1.2066666666666668e-06, |
| "loss": 0.0736, |
| "step": 47300 |
| }, |
| { |
| "epoch": 5.122548224995947, |
| "grad_norm": 8.13224983215332, |
| "learning_rate": 1.1622222222222223e-06, |
| "loss": 0.0797, |
| "step": 47400 |
| }, |
| { |
| "epoch": 5.133354946776895, |
| "grad_norm": 3.623875856399536, |
| "learning_rate": 1.117777777777778e-06, |
| "loss": 0.0769, |
| "step": 47500 |
| }, |
| { |
| "epoch": 5.133354946776895, |
| "eval_loss": 0.18783515691757202, |
| "eval_runtime": 4090.0533, |
| "eval_samples_per_second": 4.524, |
| "eval_steps_per_second": 1.131, |
| "eval_wer": 0.15967437968704792, |
| "step": 47500 |
| }, |
| { |
| "epoch": 5.144161668557843, |
| "grad_norm": 4.278363227844238, |
| "learning_rate": 1.0733333333333334e-06, |
| "loss": 0.0765, |
| "step": 47600 |
| }, |
| { |
| "epoch": 5.154968390338791, |
| "grad_norm": 5.6777191162109375, |
| "learning_rate": 1.028888888888889e-06, |
| "loss": 0.074, |
| "step": 47700 |
| }, |
| { |
| "epoch": 5.165775112119738, |
| "grad_norm": 5.946367263793945, |
| "learning_rate": 9.844444444444445e-07, |
| "loss": 0.0777, |
| "step": 47800 |
| }, |
| { |
| "epoch": 5.176581833900686, |
| "grad_norm": 6.455644607543945, |
| "learning_rate": 9.400000000000001e-07, |
| "loss": 0.0804, |
| "step": 47900 |
| }, |
| { |
| "epoch": 5.187388555681634, |
| "grad_norm": 4.086187839508057, |
| "learning_rate": 8.955555555555557e-07, |
| "loss": 0.0744, |
| "step": 48000 |
| }, |
| { |
| "epoch": 5.187388555681634, |
| "eval_loss": 0.18759387731552124, |
| "eval_runtime": 4109.5273, |
| "eval_samples_per_second": 4.503, |
| "eval_steps_per_second": 1.126, |
| "eval_wer": 0.15845447649731387, |
| "step": 48000 |
| }, |
| { |
| "epoch": 5.198195277462582, |
| "grad_norm": 5.9285736083984375, |
| "learning_rate": 8.511111111111112e-07, |
| "loss": 0.0775, |
| "step": 48100 |
| }, |
| { |
| "epoch": 5.2090019992435295, |
| "grad_norm": 4.34613037109375, |
| "learning_rate": 8.066666666666667e-07, |
| "loss": 0.0772, |
| "step": 48200 |
| }, |
| { |
| "epoch": 5.219808721024477, |
| "grad_norm": 5.6380109786987305, |
| "learning_rate": 7.622222222222223e-07, |
| "loss": 0.0736, |
| "step": 48300 |
| }, |
| { |
| "epoch": 5.230615442805425, |
| "grad_norm": 6.854168891906738, |
| "learning_rate": 7.177777777777778e-07, |
| "loss": 0.0748, |
| "step": 48400 |
| }, |
| { |
| "epoch": 5.241422164586373, |
| "grad_norm": 5.549808502197266, |
| "learning_rate": 6.733333333333334e-07, |
| "loss": 0.0775, |
| "step": 48500 |
| }, |
| { |
| "epoch": 5.241422164586373, |
| "eval_loss": 0.18737368285655975, |
| "eval_runtime": 4061.569, |
| "eval_samples_per_second": 4.556, |
| "eval_steps_per_second": 1.139, |
| "eval_wer": 0.1595258017344521, |
| "step": 48500 |
| }, |
| { |
| "epoch": 5.2522288863673205, |
| "grad_norm": 4.706333160400391, |
| "learning_rate": 6.288888888888889e-07, |
| "loss": 0.0804, |
| "step": 48600 |
| }, |
| { |
| "epoch": 5.263035608148268, |
| "grad_norm": 6.192058563232422, |
| "learning_rate": 5.844444444444445e-07, |
| "loss": 0.0727, |
| "step": 48700 |
| }, |
| { |
| "epoch": 5.273842329929216, |
| "grad_norm": 6.242217540740967, |
| "learning_rate": 5.4e-07, |
| "loss": 0.0779, |
| "step": 48800 |
| }, |
| { |
| "epoch": 5.284649051710164, |
| "grad_norm": 4.3639326095581055, |
| "learning_rate": 4.955555555555556e-07, |
| "loss": 0.0763, |
| "step": 48900 |
| }, |
| { |
| "epoch": 5.295455773491112, |
| "grad_norm": 4.59783411026001, |
| "learning_rate": 4.511111111111111e-07, |
| "loss": 0.069, |
| "step": 49000 |
| }, |
| { |
| "epoch": 5.295455773491112, |
| "eval_loss": 0.1873014122247696, |
| "eval_runtime": 4056.6992, |
| "eval_samples_per_second": 4.561, |
| "eval_steps_per_second": 1.14, |
| "eval_wer": 0.15788362436365627, |
| "step": 49000 |
| }, |
| { |
| "epoch": 5.306262495272059, |
| "grad_norm": 2.637141227722168, |
| "learning_rate": 4.0666666666666666e-07, |
| "loss": 0.0755, |
| "step": 49100 |
| }, |
| { |
| "epoch": 5.317069217053007, |
| "grad_norm": 4.836416244506836, |
| "learning_rate": 3.622222222222223e-07, |
| "loss": 0.0792, |
| "step": 49200 |
| }, |
| { |
| "epoch": 5.327875938833955, |
| "grad_norm": 3.9841196537017822, |
| "learning_rate": 3.177777777777778e-07, |
| "loss": 0.0779, |
| "step": 49300 |
| }, |
| { |
| "epoch": 5.338682660614903, |
| "grad_norm": 5.457947731018066, |
| "learning_rate": 2.7333333333333335e-07, |
| "loss": 0.0745, |
| "step": 49400 |
| }, |
| { |
| "epoch": 5.34948938239585, |
| "grad_norm": 2.749351978302002, |
| "learning_rate": 2.2888888888888892e-07, |
| "loss": 0.0761, |
| "step": 49500 |
| }, |
| { |
| "epoch": 5.34948938239585, |
| "eval_loss": 0.18704187870025635, |
| "eval_runtime": 4067.5033, |
| "eval_samples_per_second": 4.549, |
| "eval_steps_per_second": 1.137, |
| "eval_wer": 0.15749262975156203, |
| "step": 49500 |
| }, |
| { |
| "epoch": 5.360404171394608, |
| "grad_norm": 4.275832653045654, |
| "learning_rate": 1.8444444444444446e-07, |
| "loss": 0.0745, |
| "step": 49600 |
| }, |
| { |
| "epoch": 5.371210893175555, |
| "grad_norm": 4.123929023742676, |
| "learning_rate": 1.4e-07, |
| "loss": 0.0791, |
| "step": 49700 |
| }, |
| { |
| "epoch": 5.382017614956503, |
| "grad_norm": 4.660633563995361, |
| "learning_rate": 9.555555555555556e-08, |
| "loss": 0.0788, |
| "step": 49800 |
| }, |
| { |
| "epoch": 5.39282433673745, |
| "grad_norm": 7.454755783081055, |
| "learning_rate": 5.111111111111112e-08, |
| "loss": 0.0782, |
| "step": 49900 |
| }, |
| { |
| "epoch": 5.403631058518398, |
| "grad_norm": 5.680870056152344, |
| "learning_rate": 6.666666666666667e-09, |
| "loss": 0.0711, |
| "step": 50000 |
| }, |
| { |
| "epoch": 5.403631058518398, |
| "eval_loss": 0.1868782788515091, |
| "eval_runtime": 3402.9501, |
| "eval_samples_per_second": 5.438, |
| "eval_steps_per_second": 1.359, |
| "eval_wer": 0.15829025876023428, |
| "step": 50000 |
| }, |
| { |
| "epoch": 5.403631058518398, |
| "step": 50000, |
| "total_flos": 1.969520657154048e+19, |
| "train_loss": 0.0007634645557403565, |
| "train_runtime": 4719.3215, |
| "train_samples_per_second": 169.516, |
| "train_steps_per_second": 10.595 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 50000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.969520657154048e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|