| { | |
| "best_metric": 12.174852488988614, | |
| "best_model_checkpoint": "all_lang_models/urdu_models/whisper-medium-ur_alldata_multigpu/checkpoint-1650", | |
| "epoch": 2.96229802513465, | |
| "global_step": 1650, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.4439086758819625e-06, | |
| "loss": 2.9322, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.556091324118037e-06, | |
| "loss": 1.5084, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 7.895096067413523e-06, | |
| "loss": 0.9982, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 8.760378708604572e-06, | |
| "loss": 0.7498, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.400891958404646e-06, | |
| "loss": 0.575, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 0.3818359375, | |
| "eval_runtime": 714.3133, | |
| "eval_samples_per_second": 4.52, | |
| "eval_steps_per_second": 0.036, | |
| "eval_wer": 25.978558962852155, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.909653498712722e-06, | |
| "loss": 0.4348, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.987373737373737e-06, | |
| "loss": 0.3921, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.96933621933622e-06, | |
| "loss": 0.3761, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.951298701298701e-06, | |
| "loss": 0.3596, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 9.933261183261184e-06, | |
| "loss": 0.3423, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 0.275390625, | |
| "eval_runtime": 235.7872, | |
| "eval_samples_per_second": 13.695, | |
| "eval_steps_per_second": 0.11, | |
| "eval_wer": 19.46591318319067, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.915223665223665e-06, | |
| "loss": 0.327, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.897186147186148e-06, | |
| "loss": 0.3276, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.87914862914863e-06, | |
| "loss": 0.3167, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.861111111111112e-06, | |
| "loss": 0.3081, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 9.843073593073595e-06, | |
| "loss": 0.2998, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 0.245849609375, | |
| "eval_runtime": 792.9567, | |
| "eval_samples_per_second": 4.072, | |
| "eval_steps_per_second": 0.033, | |
| "eval_wer": 17.673619767860604, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 9.825036075036076e-06, | |
| "loss": 0.2942, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 9.806998556998558e-06, | |
| "loss": 0.2866, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 9.78896103896104e-06, | |
| "loss": 0.2859, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 9.770923520923522e-06, | |
| "loss": 0.285, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 9.752886002886003e-06, | |
| "loss": 0.279, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 0.2276611328125, | |
| "eval_runtime": 350.8145, | |
| "eval_samples_per_second": 9.204, | |
| "eval_steps_per_second": 0.074, | |
| "eval_wer": 16.47137039807197, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 9.734848484848486e-06, | |
| "loss": 0.2675, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 9.716810966810967e-06, | |
| "loss": 0.2773, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 9.69877344877345e-06, | |
| "loss": 0.2711, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 9.680735930735931e-06, | |
| "loss": 0.2618, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 9.662698412698414e-06, | |
| "loss": 0.26, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_loss": 0.218017578125, | |
| "eval_runtime": 625.9012, | |
| "eval_samples_per_second": 5.159, | |
| "eval_steps_per_second": 0.042, | |
| "eval_wer": 16.003213385412337, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 9.644660894660895e-06, | |
| "loss": 0.2659, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 9.626623376623378e-06, | |
| "loss": 0.2624, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 9.608585858585859e-06, | |
| "loss": 0.258, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.590548340548342e-06, | |
| "loss": 0.2567, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.572510822510823e-06, | |
| "loss": 0.2433, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_loss": 0.208740234375, | |
| "eval_runtime": 603.9398, | |
| "eval_samples_per_second": 5.347, | |
| "eval_steps_per_second": 0.043, | |
| "eval_wer": 15.172165433945539, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.554473304473305e-06, | |
| "loss": 0.2555, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.536435786435786e-06, | |
| "loss": 0.2518, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 9.51839826839827e-06, | |
| "loss": 0.2428, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 9.50036075036075e-06, | |
| "loss": 0.2403, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 9.482323232323233e-06, | |
| "loss": 0.2366, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 0.2032470703125, | |
| "eval_runtime": 301.5552, | |
| "eval_samples_per_second": 10.708, | |
| "eval_steps_per_second": 0.086, | |
| "eval_wer": 15.014266323166847, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 9.464285714285714e-06, | |
| "loss": 0.245, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 9.446248196248197e-06, | |
| "loss": 0.2328, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.428210678210678e-06, | |
| "loss": 0.2323, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.41017316017316e-06, | |
| "loss": 0.2332, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 9.392135642135642e-06, | |
| "loss": 0.2325, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_loss": 0.19677734375, | |
| "eval_runtime": 738.5749, | |
| "eval_samples_per_second": 4.372, | |
| "eval_steps_per_second": 0.035, | |
| "eval_wer": 14.451923876007648, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 9.374098124098125e-06, | |
| "loss": 0.2376, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 9.356060606060606e-06, | |
| "loss": 0.2336, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 9.338023088023089e-06, | |
| "loss": 0.2348, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 9.319985569985571e-06, | |
| "loss": 0.2347, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.301948051948052e-06, | |
| "loss": 0.2252, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_loss": 0.1900634765625, | |
| "eval_runtime": 305.5238, | |
| "eval_samples_per_second": 10.569, | |
| "eval_steps_per_second": 0.085, | |
| "eval_wer": 14.269093326684951, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.283910533910535e-06, | |
| "loss": 0.2389, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 9.265873015873016e-06, | |
| "loss": 0.2235, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 9.247835497835499e-06, | |
| "loss": 0.2142, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 9.22979797979798e-06, | |
| "loss": 0.2257, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 9.211760461760463e-06, | |
| "loss": 0.2247, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 0.1873779296875, | |
| "eval_runtime": 648.3057, | |
| "eval_samples_per_second": 4.981, | |
| "eval_steps_per_second": 0.04, | |
| "eval_wer": 14.02808942075958, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 9.193722943722944e-06, | |
| "loss": 0.2273, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.175685425685427e-06, | |
| "loss": 0.2236, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.157647907647908e-06, | |
| "loss": 0.2288, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.13961038961039e-06, | |
| "loss": 0.2225, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 9.121572871572872e-06, | |
| "loss": 0.2225, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 0.182373046875, | |
| "eval_runtime": 802.2673, | |
| "eval_samples_per_second": 4.025, | |
| "eval_steps_per_second": 0.032, | |
| "eval_wer": 13.701210559849303, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 9.103535353535354e-06, | |
| "loss": 0.2139, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 9.085497835497836e-06, | |
| "loss": 0.1864, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 9.067460317460318e-06, | |
| "loss": 0.1895, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.0494227994228e-06, | |
| "loss": 0.1839, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.031385281385282e-06, | |
| "loss": 0.1836, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_loss": 0.1817626953125, | |
| "eval_runtime": 293.7776, | |
| "eval_samples_per_second": 10.991, | |
| "eval_steps_per_second": 0.089, | |
| "eval_wer": 13.928363666583563, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.013347763347763e-06, | |
| "loss": 0.185, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 8.995310245310246e-06, | |
| "loss": 0.1874, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 8.977272727272727e-06, | |
| "loss": 0.1844, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 8.95923520923521e-06, | |
| "loss": 0.1863, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.941197691197691e-06, | |
| "loss": 0.1847, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_loss": 0.181884765625, | |
| "eval_runtime": 485.4864, | |
| "eval_samples_per_second": 6.651, | |
| "eval_steps_per_second": 0.054, | |
| "eval_wer": 13.595944485996842, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.923160173160174e-06, | |
| "loss": 0.1773, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 8.905122655122655e-06, | |
| "loss": 0.182, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 8.887085137085138e-06, | |
| "loss": 0.1821, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 8.869047619047619e-06, | |
| "loss": 0.1889, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 8.851010101010101e-06, | |
| "loss": 0.1858, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_loss": 0.1776123046875, | |
| "eval_runtime": 792.7244, | |
| "eval_samples_per_second": 4.073, | |
| "eval_steps_per_second": 0.033, | |
| "eval_wer": 13.288456743954127, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 8.832972582972583e-06, | |
| "loss": 0.1861, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 8.814935064935065e-06, | |
| "loss": 0.1847, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 8.796897546897548e-06, | |
| "loss": 0.1824, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 8.778860028860031e-06, | |
| "loss": 0.1746, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 8.760822510822512e-06, | |
| "loss": 0.1812, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_loss": 0.1759033203125, | |
| "eval_runtime": 231.4397, | |
| "eval_samples_per_second": 13.952, | |
| "eval_steps_per_second": 0.112, | |
| "eval_wer": 13.360480899747914, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 8.742784992784995e-06, | |
| "loss": 0.1848, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 8.724747474747476e-06, | |
| "loss": 0.1828, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 8.706709956709959e-06, | |
| "loss": 0.1794, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 8.68867243867244e-06, | |
| "loss": 0.186, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 8.670634920634922e-06, | |
| "loss": 0.1882, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_loss": 0.1741943359375, | |
| "eval_runtime": 773.8012, | |
| "eval_samples_per_second": 4.173, | |
| "eval_steps_per_second": 0.034, | |
| "eval_wer": 13.269065625086569, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 8.652597402597404e-06, | |
| "loss": 0.1841, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 8.634559884559886e-06, | |
| "loss": 0.1811, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 8.616522366522367e-06, | |
| "loss": 0.1834, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 8.59848484848485e-06, | |
| "loss": 0.1821, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 8.580447330447331e-06, | |
| "loss": 0.1818, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_loss": 0.1715087890625, | |
| "eval_runtime": 401.969, | |
| "eval_samples_per_second": 8.033, | |
| "eval_steps_per_second": 0.065, | |
| "eval_wer": 13.318928502174575, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 8.562409812409814e-06, | |
| "loss": 0.1813, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 8.544372294372295e-06, | |
| "loss": 0.174, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 8.526334776334778e-06, | |
| "loss": 0.1768, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 8.508297258297259e-06, | |
| "loss": 0.183, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 8.49025974025974e-06, | |
| "loss": 0.1781, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_loss": 0.1710205078125, | |
| "eval_runtime": 515.2274, | |
| "eval_samples_per_second": 6.267, | |
| "eval_steps_per_second": 0.05, | |
| "eval_wer": 13.213662428322115, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 8.472222222222223e-06, | |
| "loss": 0.1808, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 8.454184704184704e-06, | |
| "loss": 0.18, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 8.436147186147187e-06, | |
| "loss": 0.1732, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 8.418109668109668e-06, | |
| "loss": 0.1716, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 8.40007215007215e-06, | |
| "loss": 0.1722, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_loss": 0.1700439453125, | |
| "eval_runtime": 611.9797, | |
| "eval_samples_per_second": 5.276, | |
| "eval_steps_per_second": 0.042, | |
| "eval_wer": 13.025291559322975, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 8.382034632034632e-06, | |
| "loss": 0.1801, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 8.363997113997114e-06, | |
| "loss": 0.1735, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 8.345959595959595e-06, | |
| "loss": 0.1731, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 8.327922077922078e-06, | |
| "loss": 0.1759, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 8.30988455988456e-06, | |
| "loss": 0.1819, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 0.1678466796875, | |
| "eval_runtime": 232.5897, | |
| "eval_samples_per_second": 13.883, | |
| "eval_steps_per_second": 0.112, | |
| "eval_wer": 13.080694756087427, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 8.291847041847042e-06, | |
| "loss": 0.1723, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 8.273809523809523e-06, | |
| "loss": 0.1765, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 8.255772005772008e-06, | |
| "loss": 0.1756, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 8.237734487734489e-06, | |
| "loss": 0.168, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 8.219696969696971e-06, | |
| "loss": 0.182, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 0.165771484375, | |
| "eval_runtime": 744.0885, | |
| "eval_samples_per_second": 4.34, | |
| "eval_steps_per_second": 0.035, | |
| "eval_wer": 12.629158702457133, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 8.201659451659453e-06, | |
| "loss": 0.1751, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 8.183621933621935e-06, | |
| "loss": 0.1749, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 8.165584415584416e-06, | |
| "loss": 0.178, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 8.1475468975469e-06, | |
| "loss": 0.1721, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 8.12950937950938e-06, | |
| "loss": 0.1703, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_loss": 0.1663818359375, | |
| "eval_runtime": 637.9066, | |
| "eval_samples_per_second": 5.062, | |
| "eval_steps_per_second": 0.041, | |
| "eval_wer": 12.734424776309595, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 8.111471861471863e-06, | |
| "loss": 0.1663, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 8.093434343434344e-06, | |
| "loss": 0.1534, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 8.075396825396827e-06, | |
| "loss": 0.1412, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 8.057359307359308e-06, | |
| "loss": 0.143, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 8.03932178932179e-06, | |
| "loss": 0.1354, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_loss": 0.16845703125, | |
| "eval_runtime": 260.6549, | |
| "eval_samples_per_second": 12.388, | |
| "eval_steps_per_second": 0.1, | |
| "eval_wer": 12.975428682234966, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 8.021284271284272e-06, | |
| "loss": 0.1394, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 8.003246753246755e-06, | |
| "loss": 0.1395, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 7.985209235209236e-06, | |
| "loss": 0.1343, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 7.967171717171718e-06, | |
| "loss": 0.1452, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 7.9491341991342e-06, | |
| "loss": 0.1426, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_loss": 0.16943359375, | |
| "eval_runtime": 653.3444, | |
| "eval_samples_per_second": 4.942, | |
| "eval_steps_per_second": 0.04, | |
| "eval_wer": 12.587606304883792, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 7.931096681096682e-06, | |
| "loss": 0.1394, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 7.913059163059163e-06, | |
| "loss": 0.1367, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 7.895021645021646e-06, | |
| "loss": 0.1424, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 7.876984126984127e-06, | |
| "loss": 0.1377, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 7.85894660894661e-06, | |
| "loss": 0.1414, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_loss": 0.1707763671875, | |
| "eval_runtime": 512.911, | |
| "eval_samples_per_second": 6.295, | |
| "eval_steps_per_second": 0.051, | |
| "eval_wer": 12.559904706501564, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 7.840909090909091e-06, | |
| "loss": 0.1473, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 7.822871572871574e-06, | |
| "loss": 0.1391, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 7.804834054834055e-06, | |
| "loss": 0.1412, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 7.786796536796538e-06, | |
| "loss": 0.1416, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 7.768759018759019e-06, | |
| "loss": 0.1404, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_loss": 0.1689453125, | |
| "eval_runtime": 426.0802, | |
| "eval_samples_per_second": 7.578, | |
| "eval_steps_per_second": 0.061, | |
| "eval_wer": 12.6374691819718, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 7.750721500721502e-06, | |
| "loss": 0.1437, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 7.732683982683983e-06, | |
| "loss": 0.1417, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 7.714646464646465e-06, | |
| "loss": 0.1438, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 7.696608946608948e-06, | |
| "loss": 0.1423, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 7.67857142857143e-06, | |
| "loss": 0.1379, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_loss": 0.1676025390625, | |
| "eval_runtime": 721.7662, | |
| "eval_samples_per_second": 4.474, | |
| "eval_steps_per_second": 0.036, | |
| "eval_wer": 12.914485165794066, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 7.660533910533912e-06, | |
| "loss": 0.1382, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 7.642496392496393e-06, | |
| "loss": 0.1383, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 7.624458874458875e-06, | |
| "loss": 0.1412, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 7.606421356421357e-06, | |
| "loss": 0.1416, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 7.588383838383839e-06, | |
| "loss": 0.1358, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_loss": 0.168212890625, | |
| "eval_runtime": 238.5103, | |
| "eval_samples_per_second": 13.538, | |
| "eval_steps_per_second": 0.109, | |
| "eval_wer": 12.61807806310424, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.570346320346321e-06, | |
| "loss": 0.1419, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.552308802308803e-06, | |
| "loss": 0.1375, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.534271284271285e-06, | |
| "loss": 0.1393, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 7.516233766233767e-06, | |
| "loss": 0.141, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.498196248196249e-06, | |
| "loss": 0.1409, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_loss": 0.167724609375, | |
| "eval_runtime": 738.5382, | |
| "eval_samples_per_second": 4.372, | |
| "eval_steps_per_second": 0.035, | |
| "eval_wer": 12.651319981162912, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 7.4801587301587306e-06, | |
| "loss": 0.1456, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 7.4621212121212125e-06, | |
| "loss": 0.1425, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 7.444083694083694e-06, | |
| "loss": 0.1421, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 7.426046176046176e-06, | |
| "loss": 0.1383, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 7.408008658008658e-06, | |
| "loss": 0.1423, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_loss": 0.16552734375, | |
| "eval_runtime": 539.6646, | |
| "eval_samples_per_second": 5.983, | |
| "eval_steps_per_second": 0.048, | |
| "eval_wer": 12.6374691819718, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 7.38997113997114e-06, | |
| "loss": 0.1443, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 7.371933621933622e-06, | |
| "loss": 0.1395, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 7.353896103896104e-06, | |
| "loss": 0.143, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 7.335858585858586e-06, | |
| "loss": 0.1423, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 7.317821067821068e-06, | |
| "loss": 0.1372, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_loss": 0.16650390625, | |
| "eval_runtime": 445.0527, | |
| "eval_samples_per_second": 7.255, | |
| "eval_steps_per_second": 0.058, | |
| "eval_wer": 12.587606304883792, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 7.29978354978355e-06, | |
| "loss": 0.1383, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 7.281746031746032e-06, | |
| "loss": 0.135, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 7.263708513708514e-06, | |
| "loss": 0.1356, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 7.245670995670996e-06, | |
| "loss": 0.1374, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 7.2276334776334776e-06, | |
| "loss": 0.1372, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_loss": 0.16552734375, | |
| "eval_runtime": 734.4433, | |
| "eval_samples_per_second": 4.397, | |
| "eval_steps_per_second": 0.035, | |
| "eval_wer": 12.507271669575335, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 7.2095959595959595e-06, | |
| "loss": 0.1394, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 7.191558441558441e-06, | |
| "loss": 0.1451, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 7.173520923520925e-06, | |
| "loss": 0.1408, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 7.155483405483407e-06, | |
| "loss": 0.1419, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.137445887445889e-06, | |
| "loss": 0.1422, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_loss": 0.1640625, | |
| "eval_runtime": 304.9057, | |
| "eval_samples_per_second": 10.59, | |
| "eval_steps_per_second": 0.085, | |
| "eval_wer": 12.174852488988614, | |
| "step": 1650 | |
| } | |
| ], | |
| "max_steps": 5600, | |
| "num_train_epochs": 11, | |
| "total_flos": 4.310300337493057e+20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |