| { |
| "best_global_step": 5000, |
| "best_metric": 70.01703577512777, |
| "best_model_checkpoint": "./whisper-small-sdn-2025/asr_training_runs/lightning_cycle_2/checkpoint-5000", |
| "epoch": 11.737089201877934, |
| "eval_steps": 1000, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05868544600938967, |
| "grad_norm": 30.71144676208496, |
| "learning_rate": 9.960000000000001e-06, |
| "loss": 4.0849, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11737089201877934, |
| "grad_norm": 22.479434967041016, |
| "learning_rate": 9.912000000000001e-06, |
| "loss": 2.4516, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.176056338028169, |
| "grad_norm": 30.043546676635742, |
| "learning_rate": 9.862e-06, |
| "loss": 2.2164, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2347417840375587, |
| "grad_norm": 27.947431564331055, |
| "learning_rate": 9.812e-06, |
| "loss": 1.908, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2934272300469484, |
| "grad_norm": 33.64918899536133, |
| "learning_rate": 9.762e-06, |
| "loss": 1.7268, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.352112676056338, |
| "grad_norm": 27.36302947998047, |
| "learning_rate": 9.712e-06, |
| "loss": 1.4279, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4107981220657277, |
| "grad_norm": 32.08675003051758, |
| "learning_rate": 9.662e-06, |
| "loss": 1.5036, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4694835680751174, |
| "grad_norm": 21.85032081604004, |
| "learning_rate": 9.612000000000002e-06, |
| "loss": 1.3513, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.528169014084507, |
| "grad_norm": 18.02567481994629, |
| "learning_rate": 9.562000000000001e-06, |
| "loss": 1.2851, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5868544600938967, |
| "grad_norm": 22.885683059692383, |
| "learning_rate": 9.512000000000001e-06, |
| "loss": 1.4273, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6455399061032864, |
| "grad_norm": 24.009239196777344, |
| "learning_rate": 9.462000000000002e-06, |
| "loss": 1.2741, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.704225352112676, |
| "grad_norm": 19.683744430541992, |
| "learning_rate": 9.412e-06, |
| "loss": 1.164, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7629107981220657, |
| "grad_norm": 27.29810333251953, |
| "learning_rate": 9.362000000000001e-06, |
| "loss": 1.2817, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.8215962441314554, |
| "grad_norm": 25.677413940429688, |
| "learning_rate": 9.312000000000002e-06, |
| "loss": 1.1265, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8802816901408451, |
| "grad_norm": 22.555444717407227, |
| "learning_rate": 9.262e-06, |
| "loss": 1.2558, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.9389671361502347, |
| "grad_norm": 27.330963134765625, |
| "learning_rate": 9.212000000000001e-06, |
| "loss": 1.0733, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9976525821596244, |
| "grad_norm": 25.466411590576172, |
| "learning_rate": 9.162000000000001e-06, |
| "loss": 1.0392, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.056338028169014, |
| "grad_norm": 20.50482940673828, |
| "learning_rate": 9.112e-06, |
| "loss": 0.8302, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1150234741784038, |
| "grad_norm": 18.399213790893555, |
| "learning_rate": 9.062e-06, |
| "loss": 0.6969, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.1737089201877935, |
| "grad_norm": 22.934707641601562, |
| "learning_rate": 9.012000000000001e-06, |
| "loss": 0.8019, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.232394366197183, |
| "grad_norm": 17.93931007385254, |
| "learning_rate": 8.962e-06, |
| "loss": 0.8902, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.2910798122065728, |
| "grad_norm": 13.139846801757812, |
| "learning_rate": 8.912e-06, |
| "loss": 0.7966, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.3497652582159625, |
| "grad_norm": 23.364347457885742, |
| "learning_rate": 8.862000000000001e-06, |
| "loss": 0.8172, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.408450704225352, |
| "grad_norm": 22.44937515258789, |
| "learning_rate": 8.812000000000001e-06, |
| "loss": 0.9067, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.4671361502347418, |
| "grad_norm": 13.097396850585938, |
| "learning_rate": 8.762e-06, |
| "loss": 0.7597, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.5258215962441315, |
| "grad_norm": 17.809097290039062, |
| "learning_rate": 8.712e-06, |
| "loss": 0.8297, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.584507042253521, |
| "grad_norm": 21.2346248626709, |
| "learning_rate": 8.662000000000001e-06, |
| "loss": 0.762, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.6431924882629108, |
| "grad_norm": 20.58393669128418, |
| "learning_rate": 8.612e-06, |
| "loss": 0.9516, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.7018779342723005, |
| "grad_norm": 15.944975852966309, |
| "learning_rate": 8.562e-06, |
| "loss": 0.7829, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.76056338028169, |
| "grad_norm": 18.670852661132812, |
| "learning_rate": 8.512e-06, |
| "loss": 0.8098, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.8192488262910798, |
| "grad_norm": 18.15910530090332, |
| "learning_rate": 8.462e-06, |
| "loss": 0.8253, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.8779342723004695, |
| "grad_norm": 17.510900497436523, |
| "learning_rate": 8.412e-06, |
| "loss": 0.7525, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.936619718309859, |
| "grad_norm": 11.208598136901855, |
| "learning_rate": 8.362e-06, |
| "loss": 0.7598, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.995305164319249, |
| "grad_norm": 14.952198028564453, |
| "learning_rate": 8.312000000000001e-06, |
| "loss": 0.8244, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.0539906103286385, |
| "grad_norm": 14.107500076293945, |
| "learning_rate": 8.262000000000002e-06, |
| "loss": 0.5531, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.112676056338028, |
| "grad_norm": 12.615099906921387, |
| "learning_rate": 8.212e-06, |
| "loss": 0.5232, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.171361502347418, |
| "grad_norm": 8.425407409667969, |
| "learning_rate": 8.162e-06, |
| "loss": 0.4375, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.2300469483568075, |
| "grad_norm": 10.094340324401855, |
| "learning_rate": 8.112000000000001e-06, |
| "loss": 0.5122, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.288732394366197, |
| "grad_norm": 12.052427291870117, |
| "learning_rate": 8.062000000000002e-06, |
| "loss": 0.4252, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.347417840375587, |
| "grad_norm": 12.115269660949707, |
| "learning_rate": 8.012e-06, |
| "loss": 0.5578, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.347417840375587, |
| "eval_loss": 1.0599621534347534, |
| "eval_runtime": 271.4248, |
| "eval_samples_per_second": 1.396, |
| "eval_steps_per_second": 0.177, |
| "eval_wer": 86.44439036261863, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.4061032863849765, |
| "grad_norm": 14.625807762145996, |
| "learning_rate": 7.962000000000001e-06, |
| "loss": 0.5382, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.464788732394366, |
| "grad_norm": 9.787211418151855, |
| "learning_rate": 7.912000000000001e-06, |
| "loss": 0.5301, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.523474178403756, |
| "grad_norm": 13.635499954223633, |
| "learning_rate": 7.862e-06, |
| "loss": 0.5697, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.5821596244131455, |
| "grad_norm": 12.006632804870605, |
| "learning_rate": 7.812e-06, |
| "loss": 0.4453, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.640845070422535, |
| "grad_norm": 16.6585636138916, |
| "learning_rate": 7.762000000000001e-06, |
| "loss": 0.4854, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.699530516431925, |
| "grad_norm": 15.70045280456543, |
| "learning_rate": 7.712e-06, |
| "loss": 0.5265, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.7582159624413145, |
| "grad_norm": 15.379609107971191, |
| "learning_rate": 7.662e-06, |
| "loss": 0.4894, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.816901408450704, |
| "grad_norm": 15.11367130279541, |
| "learning_rate": 7.612e-06, |
| "loss": 0.5602, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.875586854460094, |
| "grad_norm": 17.753530502319336, |
| "learning_rate": 7.562000000000001e-06, |
| "loss": 0.4709, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.9342723004694835, |
| "grad_norm": 12.208802223205566, |
| "learning_rate": 7.512e-06, |
| "loss": 0.5455, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.992957746478873, |
| "grad_norm": 9.159188270568848, |
| "learning_rate": 7.462000000000001e-06, |
| "loss": 0.5367, |
| "step": 1275 |
| }, |
| { |
| "epoch": 3.051643192488263, |
| "grad_norm": 6.392484664916992, |
| "learning_rate": 7.412e-06, |
| "loss": 0.3184, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.1103286384976525, |
| "grad_norm": 8.96312427520752, |
| "learning_rate": 7.362e-06, |
| "loss": 0.3772, |
| "step": 1325 |
| }, |
| { |
| "epoch": 3.169014084507042, |
| "grad_norm": 8.565118789672852, |
| "learning_rate": 7.3120000000000005e-06, |
| "loss": 0.3321, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.227699530516432, |
| "grad_norm": 8.298563957214355, |
| "learning_rate": 7.262e-06, |
| "loss": 0.3029, |
| "step": 1375 |
| }, |
| { |
| "epoch": 3.2863849765258215, |
| "grad_norm": 13.057472229003906, |
| "learning_rate": 7.212e-06, |
| "loss": 0.2809, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.345070422535211, |
| "grad_norm": 15.98492431640625, |
| "learning_rate": 7.162e-06, |
| "loss": 0.2694, |
| "step": 1425 |
| }, |
| { |
| "epoch": 3.403755868544601, |
| "grad_norm": 13.155710220336914, |
| "learning_rate": 7.1120000000000015e-06, |
| "loss": 0.3075, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.4624413145539905, |
| "grad_norm": 8.523398399353027, |
| "learning_rate": 7.062000000000001e-06, |
| "loss": 0.299, |
| "step": 1475 |
| }, |
| { |
| "epoch": 3.52112676056338, |
| "grad_norm": 6.524288177490234, |
| "learning_rate": 7.012000000000001e-06, |
| "loss": 0.3276, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.57981220657277, |
| "grad_norm": 8.676782608032227, |
| "learning_rate": 6.962000000000001e-06, |
| "loss": 0.3431, |
| "step": 1525 |
| }, |
| { |
| "epoch": 3.6384976525821595, |
| "grad_norm": 7.747554779052734, |
| "learning_rate": 6.912000000000001e-06, |
| "loss": 0.3369, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.697183098591549, |
| "grad_norm": 9.531723022460938, |
| "learning_rate": 6.8620000000000005e-06, |
| "loss": 0.3354, |
| "step": 1575 |
| }, |
| { |
| "epoch": 3.755868544600939, |
| "grad_norm": 13.466939926147461, |
| "learning_rate": 6.812000000000001e-06, |
| "loss": 0.3224, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.8145539906103285, |
| "grad_norm": 9.547689437866211, |
| "learning_rate": 6.762000000000001e-06, |
| "loss": 0.2504, |
| "step": 1625 |
| }, |
| { |
| "epoch": 3.873239436619718, |
| "grad_norm": 17.218488693237305, |
| "learning_rate": 6.712000000000001e-06, |
| "loss": 0.3484, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.931924882629108, |
| "grad_norm": 10.556262969970703, |
| "learning_rate": 6.662000000000001e-06, |
| "loss": 0.298, |
| "step": 1675 |
| }, |
| { |
| "epoch": 3.9906103286384975, |
| "grad_norm": 5.671838760375977, |
| "learning_rate": 6.612e-06, |
| "loss": 0.3222, |
| "step": 1700 |
| }, |
| { |
| "epoch": 4.049295774647887, |
| "grad_norm": 14.99199104309082, |
| "learning_rate": 6.562000000000001e-06, |
| "loss": 0.2189, |
| "step": 1725 |
| }, |
| { |
| "epoch": 4.107981220657277, |
| "grad_norm": 13.279620170593262, |
| "learning_rate": 6.5120000000000005e-06, |
| "loss": 0.1823, |
| "step": 1750 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "grad_norm": 8.277289390563965, |
| "learning_rate": 6.462e-06, |
| "loss": 0.1381, |
| "step": 1775 |
| }, |
| { |
| "epoch": 4.225352112676056, |
| "grad_norm": 4.825685977935791, |
| "learning_rate": 6.412000000000001e-06, |
| "loss": 0.2302, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.284037558685446, |
| "grad_norm": 7.209333896636963, |
| "learning_rate": 6.362e-06, |
| "loss": 0.1838, |
| "step": 1825 |
| }, |
| { |
| "epoch": 4.342723004694836, |
| "grad_norm": 12.577840805053711, |
| "learning_rate": 6.312000000000001e-06, |
| "loss": 0.1596, |
| "step": 1850 |
| }, |
| { |
| "epoch": 4.401408450704225, |
| "grad_norm": 6.849479675292969, |
| "learning_rate": 6.262e-06, |
| "loss": 0.2349, |
| "step": 1875 |
| }, |
| { |
| "epoch": 4.460093896713615, |
| "grad_norm": 6.93465518951416, |
| "learning_rate": 6.212e-06, |
| "loss": 0.1934, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.518779342723005, |
| "grad_norm": 8.305404663085938, |
| "learning_rate": 6.1620000000000005e-06, |
| "loss": 0.1755, |
| "step": 1925 |
| }, |
| { |
| "epoch": 4.577464788732394, |
| "grad_norm": 8.874540328979492, |
| "learning_rate": 6.112e-06, |
| "loss": 0.1726, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.636150234741784, |
| "grad_norm": 5.533932685852051, |
| "learning_rate": 6.062e-06, |
| "loss": 0.1891, |
| "step": 1975 |
| }, |
| { |
| "epoch": 4.694835680751174, |
| "grad_norm": 13.135041236877441, |
| "learning_rate": 6.012e-06, |
| "loss": 0.1592, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.694835680751174, |
| "eval_loss": 1.117398977279663, |
| "eval_runtime": 220.7322, |
| "eval_samples_per_second": 1.717, |
| "eval_steps_per_second": 0.217, |
| "eval_wer": 82.793867120954, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.753521126760563, |
| "grad_norm": 10.941877365112305, |
| "learning_rate": 5.962e-06, |
| "loss": 0.1958, |
| "step": 2025 |
| }, |
| { |
| "epoch": 4.812206572769953, |
| "grad_norm": 8.460000991821289, |
| "learning_rate": 5.912e-06, |
| "loss": 0.1765, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.870892018779343, |
| "grad_norm": 13.924954414367676, |
| "learning_rate": 5.862000000000001e-06, |
| "loss": 0.2053, |
| "step": 2075 |
| }, |
| { |
| "epoch": 4.929577464788732, |
| "grad_norm": 8.0511474609375, |
| "learning_rate": 5.812000000000001e-06, |
| "loss": 0.2096, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.988262910798122, |
| "grad_norm": 6.871691703796387, |
| "learning_rate": 5.762000000000001e-06, |
| "loss": 0.1623, |
| "step": 2125 |
| }, |
| { |
| "epoch": 5.046948356807512, |
| "grad_norm": 7.501040458679199, |
| "learning_rate": 5.7120000000000005e-06, |
| "loss": 0.1228, |
| "step": 2150 |
| }, |
| { |
| "epoch": 5.105633802816901, |
| "grad_norm": 3.6400983333587646, |
| "learning_rate": 5.662000000000001e-06, |
| "loss": 0.1071, |
| "step": 2175 |
| }, |
| { |
| "epoch": 5.164319248826291, |
| "grad_norm": 4.942204475402832, |
| "learning_rate": 5.612000000000001e-06, |
| "loss": 0.1214, |
| "step": 2200 |
| }, |
| { |
| "epoch": 5.223004694835681, |
| "grad_norm": 7.270200729370117, |
| "learning_rate": 5.562000000000001e-06, |
| "loss": 0.1011, |
| "step": 2225 |
| }, |
| { |
| "epoch": 5.28169014084507, |
| "grad_norm": 8.258340835571289, |
| "learning_rate": 5.512000000000001e-06, |
| "loss": 0.1231, |
| "step": 2250 |
| }, |
| { |
| "epoch": 5.34037558685446, |
| "grad_norm": 3.134267807006836, |
| "learning_rate": 5.462e-06, |
| "loss": 0.1075, |
| "step": 2275 |
| }, |
| { |
| "epoch": 5.39906103286385, |
| "grad_norm": 8.155512809753418, |
| "learning_rate": 5.412000000000001e-06, |
| "loss": 0.1037, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.457746478873239, |
| "grad_norm": 8.14622974395752, |
| "learning_rate": 5.3620000000000005e-06, |
| "loss": 0.1026, |
| "step": 2325 |
| }, |
| { |
| "epoch": 5.516431924882629, |
| "grad_norm": 7.712051868438721, |
| "learning_rate": 5.312e-06, |
| "loss": 0.124, |
| "step": 2350 |
| }, |
| { |
| "epoch": 5.575117370892019, |
| "grad_norm": 7.989415645599365, |
| "learning_rate": 5.262000000000001e-06, |
| "loss": 0.0928, |
| "step": 2375 |
| }, |
| { |
| "epoch": 5.633802816901408, |
| "grad_norm": 7.154349327087402, |
| "learning_rate": 5.212e-06, |
| "loss": 0.1234, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.692488262910798, |
| "grad_norm": 6.29465913772583, |
| "learning_rate": 5.162000000000001e-06, |
| "loss": 0.1243, |
| "step": 2425 |
| }, |
| { |
| "epoch": 5.751173708920188, |
| "grad_norm": 9.395393371582031, |
| "learning_rate": 5.112e-06, |
| "loss": 0.1195, |
| "step": 2450 |
| }, |
| { |
| "epoch": 5.809859154929578, |
| "grad_norm": 5.384176731109619, |
| "learning_rate": 5.062e-06, |
| "loss": 0.0725, |
| "step": 2475 |
| }, |
| { |
| "epoch": 5.868544600938967, |
| "grad_norm": 10.592826843261719, |
| "learning_rate": 5.0120000000000005e-06, |
| "loss": 0.092, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.927230046948357, |
| "grad_norm": 10.480640411376953, |
| "learning_rate": 4.962e-06, |
| "loss": 0.0979, |
| "step": 2525 |
| }, |
| { |
| "epoch": 5.985915492957746, |
| "grad_norm": 8.233006477355957, |
| "learning_rate": 4.9120000000000006e-06, |
| "loss": 0.0923, |
| "step": 2550 |
| }, |
| { |
| "epoch": 6.044600938967136, |
| "grad_norm": 1.81440269947052, |
| "learning_rate": 4.862e-06, |
| "loss": 0.0549, |
| "step": 2575 |
| }, |
| { |
| "epoch": 6.103286384976526, |
| "grad_norm": 8.188456535339355, |
| "learning_rate": 4.812000000000001e-06, |
| "loss": 0.0619, |
| "step": 2600 |
| }, |
| { |
| "epoch": 6.161971830985916, |
| "grad_norm": 6.400794506072998, |
| "learning_rate": 4.762e-06, |
| "loss": 0.0678, |
| "step": 2625 |
| }, |
| { |
| "epoch": 6.220657276995305, |
| "grad_norm": 4.501795291900635, |
| "learning_rate": 4.712000000000001e-06, |
| "loss": 0.0358, |
| "step": 2650 |
| }, |
| { |
| "epoch": 6.279342723004695, |
| "grad_norm": 8.449444770812988, |
| "learning_rate": 4.6620000000000004e-06, |
| "loss": 0.0736, |
| "step": 2675 |
| }, |
| { |
| "epoch": 6.338028169014084, |
| "grad_norm": 7.520918369293213, |
| "learning_rate": 4.612e-06, |
| "loss": 0.0631, |
| "step": 2700 |
| }, |
| { |
| "epoch": 6.396713615023474, |
| "grad_norm": 2.6740336418151855, |
| "learning_rate": 4.5620000000000005e-06, |
| "loss": 0.0732, |
| "step": 2725 |
| }, |
| { |
| "epoch": 6.455399061032864, |
| "grad_norm": 7.82635498046875, |
| "learning_rate": 4.512e-06, |
| "loss": 0.0681, |
| "step": 2750 |
| }, |
| { |
| "epoch": 6.514084507042254, |
| "grad_norm": 4.215991973876953, |
| "learning_rate": 4.462e-06, |
| "loss": 0.0687, |
| "step": 2775 |
| }, |
| { |
| "epoch": 6.572769953051643, |
| "grad_norm": 8.543045997619629, |
| "learning_rate": 4.412e-06, |
| "loss": 0.0587, |
| "step": 2800 |
| }, |
| { |
| "epoch": 6.631455399061033, |
| "grad_norm": 1.273762583732605, |
| "learning_rate": 4.362e-06, |
| "loss": 0.0388, |
| "step": 2825 |
| }, |
| { |
| "epoch": 6.690140845070422, |
| "grad_norm": 7.219778060913086, |
| "learning_rate": 4.312e-06, |
| "loss": 0.0637, |
| "step": 2850 |
| }, |
| { |
| "epoch": 6.748826291079812, |
| "grad_norm": 3.4591705799102783, |
| "learning_rate": 4.262000000000001e-06, |
| "loss": 0.0512, |
| "step": 2875 |
| }, |
| { |
| "epoch": 6.807511737089202, |
| "grad_norm": 8.502069473266602, |
| "learning_rate": 4.2120000000000005e-06, |
| "loss": 0.0553, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.866197183098592, |
| "grad_norm": 9.043886184692383, |
| "learning_rate": 4.162e-06, |
| "loss": 0.0773, |
| "step": 2925 |
| }, |
| { |
| "epoch": 6.924882629107981, |
| "grad_norm": 7.274739742279053, |
| "learning_rate": 4.112000000000001e-06, |
| "loss": 0.0575, |
| "step": 2950 |
| }, |
| { |
| "epoch": 6.983568075117371, |
| "grad_norm": 1.4064300060272217, |
| "learning_rate": 4.062e-06, |
| "loss": 0.0527, |
| "step": 2975 |
| }, |
| { |
| "epoch": 7.042253521126761, |
| "grad_norm": 4.416920185089111, |
| "learning_rate": 4.012000000000001e-06, |
| "loss": 0.0282, |
| "step": 3000 |
| }, |
| { |
| "epoch": 7.042253521126761, |
| "eval_loss": 1.2262433767318726, |
| "eval_runtime": 197.2898, |
| "eval_samples_per_second": 1.921, |
| "eval_steps_per_second": 0.243, |
| "eval_wer": 81.09028960817717, |
| "step": 3000 |
| }, |
| { |
| "epoch": 7.10093896713615, |
| "grad_norm": 1.1444480419158936, |
| "learning_rate": 3.962e-06, |
| "loss": 0.0271, |
| "step": 3025 |
| }, |
| { |
| "epoch": 7.15962441314554, |
| "grad_norm": 3.13212513923645, |
| "learning_rate": 3.912e-06, |
| "loss": 0.0302, |
| "step": 3050 |
| }, |
| { |
| "epoch": 7.21830985915493, |
| "grad_norm": 1.2989273071289062, |
| "learning_rate": 3.8620000000000005e-06, |
| "loss": 0.0385, |
| "step": 3075 |
| }, |
| { |
| "epoch": 7.276995305164319, |
| "grad_norm": 8.594914436340332, |
| "learning_rate": 3.812e-06, |
| "loss": 0.0383, |
| "step": 3100 |
| }, |
| { |
| "epoch": 7.335680751173709, |
| "grad_norm": 3.2579174041748047, |
| "learning_rate": 3.762e-06, |
| "loss": 0.043, |
| "step": 3125 |
| }, |
| { |
| "epoch": 7.394366197183099, |
| "grad_norm": 2.448817014694214, |
| "learning_rate": 3.712e-06, |
| "loss": 0.0191, |
| "step": 3150 |
| }, |
| { |
| "epoch": 7.453051643192488, |
| "grad_norm": 1.0537528991699219, |
| "learning_rate": 3.6620000000000007e-06, |
| "loss": 0.026, |
| "step": 3175 |
| }, |
| { |
| "epoch": 7.511737089201878, |
| "grad_norm": 5.7627716064453125, |
| "learning_rate": 3.6120000000000003e-06, |
| "loss": 0.0334, |
| "step": 3200 |
| }, |
| { |
| "epoch": 7.570422535211268, |
| "grad_norm": 1.5681730508804321, |
| "learning_rate": 3.5620000000000004e-06, |
| "loss": 0.0303, |
| "step": 3225 |
| }, |
| { |
| "epoch": 7.629107981220657, |
| "grad_norm": 1.3271775245666504, |
| "learning_rate": 3.5120000000000004e-06, |
| "loss": 0.0318, |
| "step": 3250 |
| }, |
| { |
| "epoch": 7.687793427230047, |
| "grad_norm": 1.4985450506210327, |
| "learning_rate": 3.4620000000000005e-06, |
| "loss": 0.0287, |
| "step": 3275 |
| }, |
| { |
| "epoch": 7.746478873239437, |
| "grad_norm": 2.9170143604278564, |
| "learning_rate": 3.412e-06, |
| "loss": 0.0251, |
| "step": 3300 |
| }, |
| { |
| "epoch": 7.805164319248826, |
| "grad_norm": 3.5694615840911865, |
| "learning_rate": 3.362e-06, |
| "loss": 0.0303, |
| "step": 3325 |
| }, |
| { |
| "epoch": 7.863849765258216, |
| "grad_norm": 0.8620719313621521, |
| "learning_rate": 3.3120000000000002e-06, |
| "loss": 0.0374, |
| "step": 3350 |
| }, |
| { |
| "epoch": 7.922535211267606, |
| "grad_norm": 4.067095756530762, |
| "learning_rate": 3.2620000000000003e-06, |
| "loss": 0.0254, |
| "step": 3375 |
| }, |
| { |
| "epoch": 7.981220657276995, |
| "grad_norm": 4.516016960144043, |
| "learning_rate": 3.212e-06, |
| "loss": 0.0359, |
| "step": 3400 |
| }, |
| { |
| "epoch": 8.039906103286384, |
| "grad_norm": 4.685014247894287, |
| "learning_rate": 3.162e-06, |
| "loss": 0.0189, |
| "step": 3425 |
| }, |
| { |
| "epoch": 8.098591549295774, |
| "grad_norm": 4.836423873901367, |
| "learning_rate": 3.112e-06, |
| "loss": 0.0217, |
| "step": 3450 |
| }, |
| { |
| "epoch": 8.157276995305164, |
| "grad_norm": 0.42729654908180237, |
| "learning_rate": 3.0620000000000005e-06, |
| "loss": 0.0157, |
| "step": 3475 |
| }, |
| { |
| "epoch": 8.215962441314554, |
| "grad_norm": 2.2334694862365723, |
| "learning_rate": 3.0120000000000006e-06, |
| "loss": 0.015, |
| "step": 3500 |
| }, |
| { |
| "epoch": 8.274647887323944, |
| "grad_norm": 4.860167980194092, |
| "learning_rate": 2.9620000000000006e-06, |
| "loss": 0.0254, |
| "step": 3525 |
| }, |
| { |
| "epoch": 8.333333333333334, |
| "grad_norm": 3.4534430503845215, |
| "learning_rate": 2.9120000000000002e-06, |
| "loss": 0.0154, |
| "step": 3550 |
| }, |
| { |
| "epoch": 8.392018779342724, |
| "grad_norm": 0.6198468208312988, |
| "learning_rate": 2.8620000000000003e-06, |
| "loss": 0.0195, |
| "step": 3575 |
| }, |
| { |
| "epoch": 8.450704225352112, |
| "grad_norm": 3.6635360717773438, |
| "learning_rate": 2.8120000000000004e-06, |
| "loss": 0.0132, |
| "step": 3600 |
| }, |
| { |
| "epoch": 8.509389671361502, |
| "grad_norm": 4.437213897705078, |
| "learning_rate": 2.7620000000000004e-06, |
| "loss": 0.0283, |
| "step": 3625 |
| }, |
| { |
| "epoch": 8.568075117370892, |
| "grad_norm": 2.0735621452331543, |
| "learning_rate": 2.712e-06, |
| "loss": 0.0141, |
| "step": 3650 |
| }, |
| { |
| "epoch": 8.626760563380282, |
| "grad_norm": 4.264507293701172, |
| "learning_rate": 2.662e-06, |
| "loss": 0.016, |
| "step": 3675 |
| }, |
| { |
| "epoch": 8.685446009389672, |
| "grad_norm": 9.613415718078613, |
| "learning_rate": 2.612e-06, |
| "loss": 0.0188, |
| "step": 3700 |
| }, |
| { |
| "epoch": 8.74413145539906, |
| "grad_norm": 1.1187382936477661, |
| "learning_rate": 2.562e-06, |
| "loss": 0.0111, |
| "step": 3725 |
| }, |
| { |
| "epoch": 8.80281690140845, |
| "grad_norm": 1.9176028966903687, |
| "learning_rate": 2.512e-06, |
| "loss": 0.0142, |
| "step": 3750 |
| }, |
| { |
| "epoch": 8.86150234741784, |
| "grad_norm": 2.139860153198242, |
| "learning_rate": 2.4620000000000003e-06, |
| "loss": 0.0122, |
| "step": 3775 |
| }, |
| { |
| "epoch": 8.92018779342723, |
| "grad_norm": 1.8670209646224976, |
| "learning_rate": 2.4120000000000004e-06, |
| "loss": 0.0136, |
| "step": 3800 |
| }, |
| { |
| "epoch": 8.97887323943662, |
| "grad_norm": 1.4191254377365112, |
| "learning_rate": 2.362e-06, |
| "loss": 0.0104, |
| "step": 3825 |
| }, |
| { |
| "epoch": 9.03755868544601, |
| "grad_norm": 1.3398191928863525, |
| "learning_rate": 2.312e-06, |
| "loss": 0.0097, |
| "step": 3850 |
| }, |
| { |
| "epoch": 9.0962441314554, |
| "grad_norm": 3.4211862087249756, |
| "learning_rate": 2.262e-06, |
| "loss": 0.0045, |
| "step": 3875 |
| }, |
| { |
| "epoch": 9.154929577464788, |
| "grad_norm": 0.4645729959011078, |
| "learning_rate": 2.212e-06, |
| "loss": 0.0069, |
| "step": 3900 |
| }, |
| { |
| "epoch": 9.213615023474178, |
| "grad_norm": 0.5300021767616272, |
| "learning_rate": 2.1620000000000002e-06, |
| "loss": 0.0064, |
| "step": 3925 |
| }, |
| { |
| "epoch": 9.272300469483568, |
| "grad_norm": 0.7054510712623596, |
| "learning_rate": 2.1120000000000003e-06, |
| "loss": 0.0077, |
| "step": 3950 |
| }, |
| { |
| "epoch": 9.330985915492958, |
| "grad_norm": 2.699523448944092, |
| "learning_rate": 2.062e-06, |
| "loss": 0.0169, |
| "step": 3975 |
| }, |
| { |
| "epoch": 9.389671361502348, |
| "grad_norm": 3.4975876808166504, |
| "learning_rate": 2.012e-06, |
| "loss": 0.0066, |
| "step": 4000 |
| }, |
| { |
| "epoch": 9.389671361502348, |
| "eval_loss": 1.3293064832687378, |
| "eval_runtime": 183.5937, |
| "eval_samples_per_second": 2.064, |
| "eval_steps_per_second": 0.261, |
| "eval_wer": 70.13871988318326, |
| "step": 4000 |
| }, |
| { |
| "epoch": 9.448356807511738, |
| "grad_norm": 0.23933370411396027, |
| "learning_rate": 1.9620000000000004e-06, |
| "loss": 0.0086, |
| "step": 4025 |
| }, |
| { |
| "epoch": 9.507042253521126, |
| "grad_norm": 4.528557777404785, |
| "learning_rate": 1.912e-06, |
| "loss": 0.0093, |
| "step": 4050 |
| }, |
| { |
| "epoch": 9.565727699530516, |
| "grad_norm": 0.5696691274642944, |
| "learning_rate": 1.8620000000000001e-06, |
| "loss": 0.0083, |
| "step": 4075 |
| }, |
| { |
| "epoch": 9.624413145539906, |
| "grad_norm": 0.42469522356987, |
| "learning_rate": 1.8120000000000002e-06, |
| "loss": 0.0093, |
| "step": 4100 |
| }, |
| { |
| "epoch": 9.683098591549296, |
| "grad_norm": 0.2512667179107666, |
| "learning_rate": 1.762e-06, |
| "loss": 0.0067, |
| "step": 4125 |
| }, |
| { |
| "epoch": 9.741784037558686, |
| "grad_norm": 8.73310661315918, |
| "learning_rate": 1.712e-06, |
| "loss": 0.0153, |
| "step": 4150 |
| }, |
| { |
| "epoch": 9.800469483568076, |
| "grad_norm": 0.35621461272239685, |
| "learning_rate": 1.662e-06, |
| "loss": 0.009, |
| "step": 4175 |
| }, |
| { |
| "epoch": 9.859154929577464, |
| "grad_norm": 0.45837071537971497, |
| "learning_rate": 1.6120000000000002e-06, |
| "loss": 0.0077, |
| "step": 4200 |
| }, |
| { |
| "epoch": 9.917840375586854, |
| "grad_norm": 2.0876801013946533, |
| "learning_rate": 1.5620000000000002e-06, |
| "loss": 0.0086, |
| "step": 4225 |
| }, |
| { |
| "epoch": 9.976525821596244, |
| "grad_norm": 0.18432964384555817, |
| "learning_rate": 1.512e-06, |
| "loss": 0.009, |
| "step": 4250 |
| }, |
| { |
| "epoch": 10.035211267605634, |
| "grad_norm": 1.0506223440170288, |
| "learning_rate": 1.4620000000000001e-06, |
| "loss": 0.0073, |
| "step": 4275 |
| }, |
| { |
| "epoch": 10.093896713615024, |
| "grad_norm": 1.3841832876205444, |
| "learning_rate": 1.412e-06, |
| "loss": 0.0058, |
| "step": 4300 |
| }, |
| { |
| "epoch": 10.152582159624414, |
| "grad_norm": 0.15871676802635193, |
| "learning_rate": 1.362e-06, |
| "loss": 0.0047, |
| "step": 4325 |
| }, |
| { |
| "epoch": 10.211267605633802, |
| "grad_norm": 0.42714136838912964, |
| "learning_rate": 1.3120000000000003e-06, |
| "loss": 0.01, |
| "step": 4350 |
| }, |
| { |
| "epoch": 10.269953051643192, |
| "grad_norm": 0.19602788984775543, |
| "learning_rate": 1.2620000000000002e-06, |
| "loss": 0.0055, |
| "step": 4375 |
| }, |
| { |
| "epoch": 10.328638497652582, |
| "grad_norm": 0.25925296545028687, |
| "learning_rate": 1.2120000000000002e-06, |
| "loss": 0.0042, |
| "step": 4400 |
| }, |
| { |
| "epoch": 10.387323943661972, |
| "grad_norm": 0.18303866684436798, |
| "learning_rate": 1.162e-06, |
| "loss": 0.0058, |
| "step": 4425 |
| }, |
| { |
| "epoch": 10.446009389671362, |
| "grad_norm": 0.2301306277513504, |
| "learning_rate": 1.1120000000000001e-06, |
| "loss": 0.0031, |
| "step": 4450 |
| }, |
| { |
| "epoch": 10.504694835680752, |
| "grad_norm": 0.23439094424247742, |
| "learning_rate": 1.0620000000000002e-06, |
| "loss": 0.005, |
| "step": 4475 |
| }, |
| { |
| "epoch": 10.56338028169014, |
| "grad_norm": 0.2046060562133789, |
| "learning_rate": 1.012e-06, |
| "loss": 0.0046, |
| "step": 4500 |
| }, |
| { |
| "epoch": 10.62206572769953, |
| "grad_norm": 0.17962978780269623, |
| "learning_rate": 9.62e-07, |
| "loss": 0.0055, |
| "step": 4525 |
| }, |
| { |
| "epoch": 10.68075117370892, |
| "grad_norm": 0.13209222257137299, |
| "learning_rate": 9.120000000000001e-07, |
| "loss": 0.0048, |
| "step": 4550 |
| }, |
| { |
| "epoch": 10.73943661971831, |
| "grad_norm": 0.1691015511751175, |
| "learning_rate": 8.620000000000001e-07, |
| "loss": 0.0044, |
| "step": 4575 |
| }, |
| { |
| "epoch": 10.7981220657277, |
| "grad_norm": 4.851961135864258, |
| "learning_rate": 8.12e-07, |
| "loss": 0.006, |
| "step": 4600 |
| }, |
| { |
| "epoch": 10.85680751173709, |
| "grad_norm": 0.1549525409936905, |
| "learning_rate": 7.620000000000001e-07, |
| "loss": 0.0064, |
| "step": 4625 |
| }, |
| { |
| "epoch": 10.915492957746478, |
| "grad_norm": 0.5663545727729797, |
| "learning_rate": 7.12e-07, |
| "loss": 0.0066, |
| "step": 4650 |
| }, |
| { |
| "epoch": 10.974178403755868, |
| "grad_norm": 0.2026844024658203, |
| "learning_rate": 6.62e-07, |
| "loss": 0.0051, |
| "step": 4675 |
| }, |
| { |
| "epoch": 11.032863849765258, |
| "grad_norm": 0.29603302478790283, |
| "learning_rate": 6.12e-07, |
| "loss": 0.0036, |
| "step": 4700 |
| }, |
| { |
| "epoch": 11.091549295774648, |
| "grad_norm": 0.46924281120300293, |
| "learning_rate": 5.620000000000001e-07, |
| "loss": 0.0037, |
| "step": 4725 |
| }, |
| { |
| "epoch": 11.150234741784038, |
| "grad_norm": 0.24668444693088531, |
| "learning_rate": 5.12e-07, |
| "loss": 0.0036, |
| "step": 4750 |
| }, |
| { |
| "epoch": 11.208920187793428, |
| "grad_norm": 0.1730998456478119, |
| "learning_rate": 4.6200000000000003e-07, |
| "loss": 0.0025, |
| "step": 4775 |
| }, |
| { |
| "epoch": 11.267605633802816, |
| "grad_norm": 0.2514236569404602, |
| "learning_rate": 4.1200000000000004e-07, |
| "loss": 0.0041, |
| "step": 4800 |
| }, |
| { |
| "epoch": 11.326291079812206, |
| "grad_norm": 0.3077276647090912, |
| "learning_rate": 3.6200000000000004e-07, |
| "loss": 0.0031, |
| "step": 4825 |
| }, |
| { |
| "epoch": 11.384976525821596, |
| "grad_norm": 0.13625359535217285, |
| "learning_rate": 3.12e-07, |
| "loss": 0.0029, |
| "step": 4850 |
| }, |
| { |
| "epoch": 11.443661971830986, |
| "grad_norm": 0.6590627431869507, |
| "learning_rate": 2.6200000000000004e-07, |
| "loss": 0.0046, |
| "step": 4875 |
| }, |
| { |
| "epoch": 11.502347417840376, |
| "grad_norm": 0.09400284290313721, |
| "learning_rate": 2.1200000000000002e-07, |
| "loss": 0.005, |
| "step": 4900 |
| }, |
| { |
| "epoch": 11.561032863849766, |
| "grad_norm": 0.14036104083061218, |
| "learning_rate": 1.62e-07, |
| "loss": 0.0045, |
| "step": 4925 |
| }, |
| { |
| "epoch": 11.619718309859154, |
| "grad_norm": 0.48431381583213806, |
| "learning_rate": 1.1200000000000001e-07, |
| "loss": 0.0066, |
| "step": 4950 |
| }, |
| { |
| "epoch": 11.678403755868544, |
| "grad_norm": 0.35120266675949097, |
| "learning_rate": 6.2e-08, |
| "loss": 0.0032, |
| "step": 4975 |
| }, |
| { |
| "epoch": 11.737089201877934, |
| "grad_norm": 0.20458117127418518, |
| "learning_rate": 1.2e-08, |
| "loss": 0.0031, |
| "step": 5000 |
| }, |
| { |
| "epoch": 11.737089201877934, |
| "eval_loss": 1.3679251670837402, |
| "eval_runtime": 188.6684, |
| "eval_samples_per_second": 2.009, |
| "eval_steps_per_second": 0.254, |
| "eval_wer": 70.01703577512777, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 5000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 12, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.153706713399296e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|