| { |
| "best_metric": 19.119313582645297, |
| "best_model_checkpoint": "./whisper-medium-hi32/checkpoint-1250", |
| "epoch": 0.3970775095298602, |
| "eval_steps": 1250, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0039707750952986025, |
| "grad_norm": 5.025813579559326, |
| "learning_rate": 2.5e-05, |
| "loss": 1.0996, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.007941550190597205, |
| "grad_norm": 4.760560989379883, |
| "learning_rate": 5e-05, |
| "loss": 0.8774, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.011912325285895807, |
| "grad_norm": 4.298652648925781, |
| "learning_rate": 7.5e-05, |
| "loss": 0.3753, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.01588310038119441, |
| "grad_norm": 4.617311477661133, |
| "learning_rate": 0.0001, |
| "loss": 0.3013, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.01985387547649301, |
| "grad_norm": 9.100639343261719, |
| "learning_rate": 0.000125, |
| "loss": 0.2996, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.023824650571791613, |
| "grad_norm": 3.8302597999572754, |
| "learning_rate": 0.00015, |
| "loss": 0.2166, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.027795425667090215, |
| "grad_norm": 3.0683753490448, |
| "learning_rate": 0.000175, |
| "loss": 0.2249, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.03176620076238882, |
| "grad_norm": 3.0955100059509277, |
| "learning_rate": 0.0002, |
| "loss": 0.2256, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03573697585768742, |
| "grad_norm": 8.42872142791748, |
| "learning_rate": 0.00022500000000000002, |
| "loss": 0.2054, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.03970775095298602, |
| "grad_norm": 1.8967641592025757, |
| "learning_rate": 0.00025, |
| "loss": 0.2378, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.04367852604828463, |
| "grad_norm": 3.759864568710327, |
| "learning_rate": 0.000275, |
| "loss": 0.2663, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.04764930114358323, |
| "grad_norm": 3.7866759300231934, |
| "learning_rate": 0.0003, |
| "loss": 0.2715, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.05162007623888183, |
| "grad_norm": 3.0431158542633057, |
| "learning_rate": 0.00032500000000000004, |
| "loss": 0.2401, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.05559085133418043, |
| "grad_norm": 5.701106071472168, |
| "learning_rate": 0.00035, |
| "loss": 0.2534, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.059561626429479035, |
| "grad_norm": 2.9990756511688232, |
| "learning_rate": 0.000375, |
| "loss": 0.204, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.06353240152477764, |
| "grad_norm": 5.0229573249816895, |
| "learning_rate": 0.0004, |
| "loss": 0.2318, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06750317662007624, |
| "grad_norm": 5.128113746643066, |
| "learning_rate": 0.000425, |
| "loss": 0.2111, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.07147395171537484, |
| "grad_norm": 3.8650593757629395, |
| "learning_rate": 0.000449, |
| "loss": 0.2493, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.07544472681067345, |
| "grad_norm": 7.482765197753906, |
| "learning_rate": 0.000474, |
| "loss": 0.3081, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.07941550190597205, |
| "grad_norm": 4.78742790222168, |
| "learning_rate": 0.000499, |
| "loss": 0.2288, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.08338627700127065, |
| "grad_norm": 1.9080342054367065, |
| "learning_rate": 0.00049731843575419, |
| "loss": 0.2175, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.08735705209656926, |
| "grad_norm": 6.20205545425415, |
| "learning_rate": 0.0004945251396648045, |
| "loss": 0.2218, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.09132782719186786, |
| "grad_norm": 6.478557586669922, |
| "learning_rate": 0.000491731843575419, |
| "loss": 0.2954, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.09529860228716645, |
| "grad_norm": 8.531637191772461, |
| "learning_rate": 0.0004889385474860335, |
| "loss": 0.2223, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.09926937738246505, |
| "grad_norm": 4.512006759643555, |
| "learning_rate": 0.0004861452513966481, |
| "loss": 0.2208, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.10324015247776366, |
| "grad_norm": 2.6392629146575928, |
| "learning_rate": 0.0004833519553072626, |
| "loss": 0.2348, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.10721092757306226, |
| "grad_norm": 3.4534718990325928, |
| "learning_rate": 0.00048055865921787713, |
| "loss": 0.2385, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.11118170266836086, |
| "grad_norm": 3.7822482585906982, |
| "learning_rate": 0.00047776536312849164, |
| "loss": 0.288, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.11515247776365947, |
| "grad_norm": 4.008564472198486, |
| "learning_rate": 0.0004749720670391062, |
| "loss": 0.2389, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.11912325285895807, |
| "grad_norm": 4.458996772766113, |
| "learning_rate": 0.0004721787709497207, |
| "loss": 0.2875, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.12309402795425667, |
| "grad_norm": 6.05142068862915, |
| "learning_rate": 0.00046938547486033525, |
| "loss": 0.2694, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.12706480304955528, |
| "grad_norm": 8.252070426940918, |
| "learning_rate": 0.00046659217877094975, |
| "loss": 0.2158, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.13103557814485386, |
| "grad_norm": 3.173055410385132, |
| "learning_rate": 0.0004637988826815643, |
| "loss": 0.2154, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.13500635324015248, |
| "grad_norm": 1.4051086902618408, |
| "learning_rate": 0.00046100558659217876, |
| "loss": 0.2297, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.1389771283354511, |
| "grad_norm": 10.670642852783203, |
| "learning_rate": 0.00045821229050279326, |
| "loss": 0.2181, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.14294790343074967, |
| "grad_norm": 3.286973476409912, |
| "learning_rate": 0.00045553072625698326, |
| "loss": 0.2543, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.14691867852604829, |
| "grad_norm": 3.300940752029419, |
| "learning_rate": 0.00045273743016759776, |
| "loss": 0.222, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.1508894536213469, |
| "grad_norm": 3.951962947845459, |
| "learning_rate": 0.00044994413407821226, |
| "loss": 0.2792, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.15486022871664548, |
| "grad_norm": 4.659793853759766, |
| "learning_rate": 0.0004471508379888268, |
| "loss": 0.2409, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.1588310038119441, |
| "grad_norm": 8.52952766418457, |
| "learning_rate": 0.0004443575418994413, |
| "loss": 0.2745, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1628017789072427, |
| "grad_norm": 4.599229335784912, |
| "learning_rate": 0.0004415642458100559, |
| "loss": 0.2656, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.1667725540025413, |
| "grad_norm": 3.206956386566162, |
| "learning_rate": 0.00043888268156424583, |
| "loss": 0.2268, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1707433290978399, |
| "grad_norm": 4.573523998260498, |
| "learning_rate": 0.00043608938547486033, |
| "loss": 0.2322, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.17471410419313851, |
| "grad_norm": 2.311208963394165, |
| "learning_rate": 0.0004332960893854749, |
| "loss": 0.2445, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.1786848792884371, |
| "grad_norm": 12.883535385131836, |
| "learning_rate": 0.0004305027932960894, |
| "loss": 0.2932, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.1826556543837357, |
| "grad_norm": 3.4275145530700684, |
| "learning_rate": 0.00042770949720670394, |
| "loss": 0.2642, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.1866264294790343, |
| "grad_norm": 4.927622318267822, |
| "learning_rate": 0.00042491620111731845, |
| "loss": 0.2854, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.1905972045743329, |
| "grad_norm": 3.562868595123291, |
| "learning_rate": 0.000422122905027933, |
| "loss": 0.2408, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.19456797966963152, |
| "grad_norm": 6.159071445465088, |
| "learning_rate": 0.0004193296089385475, |
| "loss": 0.262, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.1985387547649301, |
| "grad_norm": 6.2040114402771, |
| "learning_rate": 0.00041653631284916206, |
| "loss": 0.3184, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.1985387547649301, |
| "eval_loss": 0.2267104983329773, |
| "eval_runtime": 1316.6693, |
| "eval_samples_per_second": 0.787, |
| "eval_steps_per_second": 0.393, |
| "eval_wer": 19.119313582645297, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.20250952986022872, |
| "grad_norm": 5.0747246742248535, |
| "learning_rate": 0.00041374301675977656, |
| "loss": 0.2806, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.20648030495552733, |
| "grad_norm": 7.066751003265381, |
| "learning_rate": 0.00041094972067039106, |
| "loss": 0.2596, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.2104510800508259, |
| "grad_norm": 3.551501989364624, |
| "learning_rate": 0.0004081564245810056, |
| "loss": 0.2609, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.21442185514612452, |
| "grad_norm": 4.077850341796875, |
| "learning_rate": 0.0004053631284916201, |
| "loss": 0.2232, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.21839263024142314, |
| "grad_norm": 3.4992265701293945, |
| "learning_rate": 0.0004025698324022347, |
| "loss": 0.2224, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.22236340533672172, |
| "grad_norm": 3.0660696029663086, |
| "learning_rate": 0.0003997765363128492, |
| "loss": 0.2006, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.22633418043202033, |
| "grad_norm": 8.794944763183594, |
| "learning_rate": 0.00039698324022346374, |
| "loss": 0.2199, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.23030495552731894, |
| "grad_norm": 3.5407402515411377, |
| "learning_rate": 0.00039418994413407824, |
| "loss": 0.2547, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.23427573062261753, |
| "grad_norm": 5.331219673156738, |
| "learning_rate": 0.0003913966480446928, |
| "loss": 0.2058, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.23824650571791614, |
| "grad_norm": 7.421133995056152, |
| "learning_rate": 0.0003886033519553073, |
| "loss": 0.2284, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.24221728081321475, |
| "grad_norm": 6.895657539367676, |
| "learning_rate": 0.00038581005586592175, |
| "loss": 0.2323, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.24618805590851334, |
| "grad_norm": 1.489917278289795, |
| "learning_rate": 0.0003830167597765363, |
| "loss": 0.2418, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.2501588310038119, |
| "grad_norm": 3.7984824180603027, |
| "learning_rate": 0.0003802234636871508, |
| "loss": 0.2223, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.25412960609911056, |
| "grad_norm": 3.4480276107788086, |
| "learning_rate": 0.00037743016759776536, |
| "loss": 0.2672, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.25810038119440915, |
| "grad_norm": 5.31698751449585, |
| "learning_rate": 0.00037463687150837986, |
| "loss": 0.2231, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.26207115628970773, |
| "grad_norm": 3.1410892009735107, |
| "learning_rate": 0.0003718435754189944, |
| "loss": 0.248, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.26604193138500637, |
| "grad_norm": 3.897778272628784, |
| "learning_rate": 0.0003690502793296089, |
| "loss": 0.271, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.27001270648030495, |
| "grad_norm": 2.355808734893799, |
| "learning_rate": 0.0003662569832402235, |
| "loss": 0.2293, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.27398348157560354, |
| "grad_norm": 3.172232151031494, |
| "learning_rate": 0.000363463687150838, |
| "loss": 0.1907, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.2779542566709022, |
| "grad_norm": 5.099759578704834, |
| "learning_rate": 0.0003606703910614525, |
| "loss": 0.2054, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.28192503176620076, |
| "grad_norm": 5.372403144836426, |
| "learning_rate": 0.00035787709497206704, |
| "loss": 0.2279, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.28589580686149935, |
| "grad_norm": 7.999370574951172, |
| "learning_rate": 0.00035508379888268154, |
| "loss": 0.1572, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.289866581956798, |
| "grad_norm": 12.339667320251465, |
| "learning_rate": 0.0003522905027932961, |
| "loss": 0.2401, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.29383735705209657, |
| "grad_norm": 7.104443073272705, |
| "learning_rate": 0.0003494972067039106, |
| "loss": 0.2333, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.29780813214739515, |
| "grad_norm": 8.25573444366455, |
| "learning_rate": 0.00034670391061452515, |
| "loss": 0.1648, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.3017789072426938, |
| "grad_norm": 5.5282769203186035, |
| "learning_rate": 0.00034391061452513966, |
| "loss": 0.2446, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.3057496823379924, |
| "grad_norm": 8.142168045043945, |
| "learning_rate": 0.0003411173184357542, |
| "loss": 0.2474, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.30972045743329096, |
| "grad_norm": 7.110402584075928, |
| "learning_rate": 0.0003383240223463687, |
| "loss": 0.184, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.3136912325285896, |
| "grad_norm": 6.967545509338379, |
| "learning_rate": 0.0003355307262569832, |
| "loss": 0.2391, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.3176620076238882, |
| "grad_norm": 7.029871463775635, |
| "learning_rate": 0.0003327374301675978, |
| "loss": 0.2114, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.32163278271918677, |
| "grad_norm": 8.513774871826172, |
| "learning_rate": 0.0003299441340782123, |
| "loss": 0.2483, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.3256035578144854, |
| "grad_norm": 6.16885232925415, |
| "learning_rate": 0.00032715083798882683, |
| "loss": 0.2206, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.329574332909784, |
| "grad_norm": 6.541902542114258, |
| "learning_rate": 0.00032435754189944133, |
| "loss": 0.223, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.3335451080050826, |
| "grad_norm": 3.3565759658813477, |
| "learning_rate": 0.0003215642458100559, |
| "loss": 0.1957, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.3375158831003812, |
| "grad_norm": 1.907651424407959, |
| "learning_rate": 0.0003187709497206704, |
| "loss": 0.271, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.3414866581956798, |
| "grad_norm": 4.328953742980957, |
| "learning_rate": 0.00031597765363128495, |
| "loss": 0.2432, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.3454574332909784, |
| "grad_norm": 2.5049920082092285, |
| "learning_rate": 0.00031318435754189945, |
| "loss": 0.2074, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.34942820838627703, |
| "grad_norm": 7.09634256362915, |
| "learning_rate": 0.00031039106145251395, |
| "loss": 0.1959, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.3533989834815756, |
| "grad_norm": 3.597811698913574, |
| "learning_rate": 0.0003075977653631285, |
| "loss": 0.2009, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.3573697585768742, |
| "grad_norm": 4.403627872467041, |
| "learning_rate": 0.000304804469273743, |
| "loss": 0.1956, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.3613405336721728, |
| "grad_norm": 14.672781944274902, |
| "learning_rate": 0.00030201117318435757, |
| "loss": 0.2425, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.3653113087674714, |
| "grad_norm": 3.903442621231079, |
| "learning_rate": 0.00029921787709497207, |
| "loss": 0.2176, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.36928208386277, |
| "grad_norm": 6.7449517250061035, |
| "learning_rate": 0.0002964245810055866, |
| "loss": 0.2667, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.3732528589580686, |
| "grad_norm": 6.445186614990234, |
| "learning_rate": 0.0002937430167597766, |
| "loss": 0.2148, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.37722363405336723, |
| "grad_norm": 5.590458869934082, |
| "learning_rate": 0.0002909497206703911, |
| "loss": 0.1853, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.3811944091486658, |
| "grad_norm": 3.372239351272583, |
| "learning_rate": 0.00028815642458100563, |
| "loss": 0.2079, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3851651842439644, |
| "grad_norm": 8.069969177246094, |
| "learning_rate": 0.0002853631284916201, |
| "loss": 0.2099, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.38913595933926304, |
| "grad_norm": 3.9822771549224854, |
| "learning_rate": 0.00028256983240223464, |
| "loss": 0.2716, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.3931067344345616, |
| "grad_norm": 7.241533279418945, |
| "learning_rate": 0.00027977653631284914, |
| "loss": 0.2138, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.3970775095298602, |
| "grad_norm": 4.6112542152404785, |
| "learning_rate": 0.0002769832402234637, |
| "loss": 0.1957, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.3970775095298602, |
| "eval_loss": 0.20951329171657562, |
| "eval_runtime": 1414.685, |
| "eval_samples_per_second": 0.732, |
| "eval_steps_per_second": 0.366, |
| "eval_wer": 32.51578436134046, |
| "step": 2500 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 4975, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 2500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 1 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.09103814656e+19, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|