| { |
| "best_metric": 0.5420793793724458, |
| "best_model_checkpoint": "md_d_l2_arctic/checkpoint-3700", |
| "epoch": 100.0, |
| "eval_steps": 100, |
| "global_step": 7900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.27, |
| "grad_norm": 92.3280258178711, |
| "learning_rate": 2.4050632911392408e-06, |
| "loss": 14.3657, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.27, |
| "eval_loss": 9.120972633361816, |
| "eval_runtime": 15.883, |
| "eval_samples_per_second": 28.332, |
| "eval_steps_per_second": 1.448, |
| "eval_wer": 2.4374870125372308, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.53, |
| "grad_norm": 1.9874589443206787, |
| "learning_rate": 4.9113924050632915e-06, |
| "loss": 4.382, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.53, |
| "eval_loss": 3.421921730041504, |
| "eval_runtime": 10.9345, |
| "eval_samples_per_second": 41.154, |
| "eval_steps_per_second": 2.103, |
| "eval_wer": 1.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 3.151137590408325, |
| "learning_rate": 7.443037974683544e-06, |
| "loss": 3.2514, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.8, |
| "eval_loss": 2.788123369216919, |
| "eval_runtime": 10.9315, |
| "eval_samples_per_second": 41.165, |
| "eval_steps_per_second": 2.104, |
| "eval_wer": 0.9979912724250191, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.06, |
| "grad_norm": 3.4454073905944824, |
| "learning_rate": 9.974683544303799e-06, |
| "loss": 2.4508, |
| "step": 400 |
| }, |
| { |
| "epoch": 5.06, |
| "eval_loss": 1.8000441789627075, |
| "eval_runtime": 10.9542, |
| "eval_samples_per_second": 41.08, |
| "eval_steps_per_second": 2.1, |
| "eval_wer": 0.7380342176352428, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.33, |
| "grad_norm": 2.928997039794922, |
| "learning_rate": 1.2506329113924051e-05, |
| "loss": 1.6168, |
| "step": 500 |
| }, |
| { |
| "epoch": 6.33, |
| "eval_loss": 1.131545901298523, |
| "eval_runtime": 11.2822, |
| "eval_samples_per_second": 39.886, |
| "eval_steps_per_second": 2.039, |
| "eval_wer": 0.967791092332202, |
| "step": 500 |
| }, |
| { |
| "epoch": 7.59, |
| "grad_norm": 2.174651861190796, |
| "learning_rate": 1.5037974683544306e-05, |
| "loss": 1.1212, |
| "step": 600 |
| }, |
| { |
| "epoch": 7.59, |
| "eval_loss": 0.8748846054077148, |
| "eval_runtime": 11.1275, |
| "eval_samples_per_second": 40.44, |
| "eval_steps_per_second": 2.067, |
| "eval_wer": 1.065803144697652, |
| "step": 600 |
| }, |
| { |
| "epoch": 8.86, |
| "grad_norm": 7.105869770050049, |
| "learning_rate": 1.7569620253164558e-05, |
| "loss": 0.8953, |
| "step": 700 |
| }, |
| { |
| "epoch": 8.86, |
| "eval_loss": 0.7655201554298401, |
| "eval_runtime": 11.2119, |
| "eval_samples_per_second": 40.136, |
| "eval_steps_per_second": 2.051, |
| "eval_wer": 0.9655052988848098, |
| "step": 700 |
| }, |
| { |
| "epoch": 10.13, |
| "grad_norm": 2.2692487239837646, |
| "learning_rate": 1.99887482419128e-05, |
| "loss": 0.7684, |
| "step": 800 |
| }, |
| { |
| "epoch": 10.13, |
| "eval_loss": 0.6687426567077637, |
| "eval_runtime": 11.2107, |
| "eval_samples_per_second": 40.14, |
| "eval_steps_per_second": 2.052, |
| "eval_wer": 0.7621389485350142, |
| "step": 800 |
| }, |
| { |
| "epoch": 11.39, |
| "grad_norm": 3.137871742248535, |
| "learning_rate": 1.970745428973277e-05, |
| "loss": 0.6661, |
| "step": 900 |
| }, |
| { |
| "epoch": 11.39, |
| "eval_loss": 0.6319410800933838, |
| "eval_runtime": 11.2724, |
| "eval_samples_per_second": 39.921, |
| "eval_steps_per_second": 2.04, |
| "eval_wer": 0.6755558634065248, |
| "step": 900 |
| }, |
| { |
| "epoch": 12.66, |
| "grad_norm": 2.894819498062134, |
| "learning_rate": 1.9426160337552744e-05, |
| "loss": 0.6306, |
| "step": 1000 |
| }, |
| { |
| "epoch": 12.66, |
| "eval_loss": 0.6195651292800903, |
| "eval_runtime": 11.3009, |
| "eval_samples_per_second": 39.82, |
| "eval_steps_per_second": 2.035, |
| "eval_wer": 0.6963358038373624, |
| "step": 1000 |
| }, |
| { |
| "epoch": 13.92, |
| "grad_norm": 3.4255077838897705, |
| "learning_rate": 1.9144866385372717e-05, |
| "loss": 0.5759, |
| "step": 1100 |
| }, |
| { |
| "epoch": 13.92, |
| "eval_loss": 0.5875259637832642, |
| "eval_runtime": 11.1702, |
| "eval_samples_per_second": 40.286, |
| "eval_steps_per_second": 2.059, |
| "eval_wer": 0.5965228233012398, |
| "step": 1100 |
| }, |
| { |
| "epoch": 15.19, |
| "grad_norm": 2.189448118209839, |
| "learning_rate": 1.8863572433192687e-05, |
| "loss": 0.5417, |
| "step": 1200 |
| }, |
| { |
| "epoch": 15.19, |
| "eval_loss": 0.5779715776443481, |
| "eval_runtime": 11.2889, |
| "eval_samples_per_second": 39.862, |
| "eval_steps_per_second": 2.037, |
| "eval_wer": 0.6528364618688093, |
| "step": 1200 |
| }, |
| { |
| "epoch": 16.46, |
| "grad_norm": 6.146909713745117, |
| "learning_rate": 1.858227848101266e-05, |
| "loss": 0.528, |
| "step": 1300 |
| }, |
| { |
| "epoch": 16.46, |
| "eval_loss": 0.5798078179359436, |
| "eval_runtime": 11.2225, |
| "eval_samples_per_second": 40.098, |
| "eval_steps_per_second": 2.049, |
| "eval_wer": 0.6539447253584539, |
| "step": 1300 |
| }, |
| { |
| "epoch": 17.72, |
| "grad_norm": 3.1426494121551514, |
| "learning_rate": 1.8300984528832633e-05, |
| "loss": 0.4857, |
| "step": 1400 |
| }, |
| { |
| "epoch": 17.72, |
| "eval_loss": 0.5568873286247253, |
| "eval_runtime": 11.201, |
| "eval_samples_per_second": 40.175, |
| "eval_steps_per_second": 2.053, |
| "eval_wer": 0.5724873588695713, |
| "step": 1400 |
| }, |
| { |
| "epoch": 18.99, |
| "grad_norm": 1.9612127542495728, |
| "learning_rate": 1.8019690576652603e-05, |
| "loss": 0.4655, |
| "step": 1500 |
| }, |
| { |
| "epoch": 18.99, |
| "eval_loss": 0.549960196018219, |
| "eval_runtime": 11.3494, |
| "eval_samples_per_second": 39.65, |
| "eval_steps_per_second": 2.027, |
| "eval_wer": 0.575535083466094, |
| "step": 1500 |
| }, |
| { |
| "epoch": 20.25, |
| "grad_norm": 2.946164131164551, |
| "learning_rate": 1.7738396624472576e-05, |
| "loss": 0.4526, |
| "step": 1600 |
| }, |
| { |
| "epoch": 20.25, |
| "eval_loss": 0.5582863092422485, |
| "eval_runtime": 11.3026, |
| "eval_samples_per_second": 39.814, |
| "eval_steps_per_second": 2.035, |
| "eval_wer": 0.5776130775091778, |
| "step": 1600 |
| }, |
| { |
| "epoch": 21.52, |
| "grad_norm": 2.338858127593994, |
| "learning_rate": 1.745710267229255e-05, |
| "loss": 0.4287, |
| "step": 1700 |
| }, |
| { |
| "epoch": 21.52, |
| "eval_loss": 0.555654764175415, |
| "eval_runtime": 11.2714, |
| "eval_samples_per_second": 39.924, |
| "eval_steps_per_second": 2.041, |
| "eval_wer": 0.5609891251645078, |
| "step": 1700 |
| }, |
| { |
| "epoch": 22.78, |
| "grad_norm": 2.5261471271514893, |
| "learning_rate": 1.717580872011252e-05, |
| "loss": 0.4149, |
| "step": 1800 |
| }, |
| { |
| "epoch": 22.78, |
| "eval_loss": 0.5575445294380188, |
| "eval_runtime": 11.3966, |
| "eval_samples_per_second": 39.486, |
| "eval_steps_per_second": 2.018, |
| "eval_wer": 0.5748424187850661, |
| "step": 1800 |
| }, |
| { |
| "epoch": 24.05, |
| "grad_norm": 3.1321146488189697, |
| "learning_rate": 1.689451476793249e-05, |
| "loss": 0.3983, |
| "step": 1900 |
| }, |
| { |
| "epoch": 24.05, |
| "eval_loss": 0.5648804306983948, |
| "eval_runtime": 11.191, |
| "eval_samples_per_second": 40.211, |
| "eval_steps_per_second": 2.055, |
| "eval_wer": 0.6003324790468934, |
| "step": 1900 |
| }, |
| { |
| "epoch": 25.32, |
| "grad_norm": 2.3534066677093506, |
| "learning_rate": 1.661322081575246e-05, |
| "loss": 0.4001, |
| "step": 2000 |
| }, |
| { |
| "epoch": 25.32, |
| "eval_loss": 0.567441463470459, |
| "eval_runtime": 11.1757, |
| "eval_samples_per_second": 40.266, |
| "eval_steps_per_second": 2.058, |
| "eval_wer": 0.5976310867908845, |
| "step": 2000 |
| }, |
| { |
| "epoch": 26.58, |
| "grad_norm": 2.417196750640869, |
| "learning_rate": 1.6331926863572434e-05, |
| "loss": 0.3649, |
| "step": 2100 |
| }, |
| { |
| "epoch": 26.58, |
| "eval_loss": 0.579703152179718, |
| "eval_runtime": 11.126, |
| "eval_samples_per_second": 40.446, |
| "eval_steps_per_second": 2.067, |
| "eval_wer": 0.5804530027013922, |
| "step": 2100 |
| }, |
| { |
| "epoch": 27.85, |
| "grad_norm": 2.8033077716827393, |
| "learning_rate": 1.6050632911392404e-05, |
| "loss": 0.3711, |
| "step": 2200 |
| }, |
| { |
| "epoch": 27.85, |
| "eval_loss": 0.5839091539382935, |
| "eval_runtime": 11.2012, |
| "eval_samples_per_second": 40.174, |
| "eval_steps_per_second": 2.053, |
| "eval_wer": 0.6546373900394818, |
| "step": 2200 |
| }, |
| { |
| "epoch": 29.11, |
| "grad_norm": 2.2273740768432617, |
| "learning_rate": 1.576933895921238e-05, |
| "loss": 0.3547, |
| "step": 2300 |
| }, |
| { |
| "epoch": 29.11, |
| "eval_loss": 0.5734866261482239, |
| "eval_runtime": 11.079, |
| "eval_samples_per_second": 40.617, |
| "eval_steps_per_second": 2.076, |
| "eval_wer": 0.5904273741081942, |
| "step": 2300 |
| }, |
| { |
| "epoch": 30.38, |
| "grad_norm": 2.2852895259857178, |
| "learning_rate": 1.548804500703235e-05, |
| "loss": 0.3402, |
| "step": 2400 |
| }, |
| { |
| "epoch": 30.38, |
| "eval_loss": 0.5698839426040649, |
| "eval_runtime": 11.0702, |
| "eval_samples_per_second": 40.65, |
| "eval_steps_per_second": 2.078, |
| "eval_wer": 0.5426335111172681, |
| "step": 2400 |
| }, |
| { |
| "epoch": 31.65, |
| "grad_norm": 1.869658350944519, |
| "learning_rate": 1.5206751054852323e-05, |
| "loss": 0.3414, |
| "step": 2500 |
| }, |
| { |
| "epoch": 31.65, |
| "eval_loss": 0.5700486302375793, |
| "eval_runtime": 11.3836, |
| "eval_samples_per_second": 39.531, |
| "eval_steps_per_second": 2.02, |
| "eval_wer": 0.5421486458405486, |
| "step": 2500 |
| }, |
| { |
| "epoch": 32.91, |
| "grad_norm": 1.7649214267730713, |
| "learning_rate": 1.4925457102672294e-05, |
| "loss": 0.3255, |
| "step": 2600 |
| }, |
| { |
| "epoch": 32.91, |
| "eval_loss": 0.5744786262512207, |
| "eval_runtime": 11.2135, |
| "eval_samples_per_second": 40.13, |
| "eval_steps_per_second": 2.051, |
| "eval_wer": 0.5663226432084228, |
| "step": 2600 |
| }, |
| { |
| "epoch": 34.18, |
| "grad_norm": 2.0651187896728516, |
| "learning_rate": 1.4644163150492266e-05, |
| "loss": 0.3093, |
| "step": 2700 |
| }, |
| { |
| "epoch": 34.18, |
| "eval_loss": 0.5957615971565247, |
| "eval_runtime": 11.1504, |
| "eval_samples_per_second": 40.357, |
| "eval_steps_per_second": 2.063, |
| "eval_wer": 0.5931980328323059, |
| "step": 2700 |
| }, |
| { |
| "epoch": 35.44, |
| "grad_norm": 2.1554198265075684, |
| "learning_rate": 1.4362869198312237e-05, |
| "loss": 0.315, |
| "step": 2800 |
| }, |
| { |
| "epoch": 35.44, |
| "eval_loss": 0.5933964848518372, |
| "eval_runtime": 11.202, |
| "eval_samples_per_second": 40.171, |
| "eval_steps_per_second": 2.053, |
| "eval_wer": 0.5905659070443998, |
| "step": 2800 |
| }, |
| { |
| "epoch": 36.71, |
| "grad_norm": 2.880059003829956, |
| "learning_rate": 1.4081575246132208e-05, |
| "loss": 0.31, |
| "step": 2900 |
| }, |
| { |
| "epoch": 36.71, |
| "eval_loss": 0.6071695685386658, |
| "eval_runtime": 11.2335, |
| "eval_samples_per_second": 40.059, |
| "eval_steps_per_second": 2.047, |
| "eval_wer": 0.601094410196024, |
| "step": 2900 |
| }, |
| { |
| "epoch": 37.97, |
| "grad_norm": 2.7784523963928223, |
| "learning_rate": 1.380028129395218e-05, |
| "loss": 0.3026, |
| "step": 3000 |
| }, |
| { |
| "epoch": 37.97, |
| "eval_loss": 0.6038002371788025, |
| "eval_runtime": 11.1544, |
| "eval_samples_per_second": 40.343, |
| "eval_steps_per_second": 2.062, |
| "eval_wer": 0.5760199487428136, |
| "step": 3000 |
| }, |
| { |
| "epoch": 39.24, |
| "grad_norm": 2.2978756427764893, |
| "learning_rate": 1.3518987341772155e-05, |
| "loss": 0.2802, |
| "step": 3100 |
| }, |
| { |
| "epoch": 39.24, |
| "eval_loss": 0.6079789400100708, |
| "eval_runtime": 11.1769, |
| "eval_samples_per_second": 40.262, |
| "eval_steps_per_second": 2.058, |
| "eval_wer": 0.5776823439772806, |
| "step": 3100 |
| }, |
| { |
| "epoch": 40.51, |
| "grad_norm": 2.1417360305786133, |
| "learning_rate": 1.3237693389592126e-05, |
| "loss": 0.2835, |
| "step": 3200 |
| }, |
| { |
| "epoch": 40.51, |
| "eval_loss": 0.6061974167823792, |
| "eval_runtime": 11.1226, |
| "eval_samples_per_second": 40.458, |
| "eval_steps_per_second": 2.068, |
| "eval_wer": 0.5743575535083466, |
| "step": 3200 |
| }, |
| { |
| "epoch": 41.77, |
| "grad_norm": 1.6934860944747925, |
| "learning_rate": 1.2956399437412097e-05, |
| "loss": 0.2585, |
| "step": 3300 |
| }, |
| { |
| "epoch": 41.77, |
| "eval_loss": 0.6224856972694397, |
| "eval_runtime": 11.0743, |
| "eval_samples_per_second": 40.634, |
| "eval_steps_per_second": 2.077, |
| "eval_wer": 0.5784442751264113, |
| "step": 3300 |
| }, |
| { |
| "epoch": 43.04, |
| "grad_norm": 1.937164068222046, |
| "learning_rate": 1.2675105485232069e-05, |
| "loss": 0.2699, |
| "step": 3400 |
| }, |
| { |
| "epoch": 43.04, |
| "eval_loss": 0.6225900650024414, |
| "eval_runtime": 11.1534, |
| "eval_samples_per_second": 40.347, |
| "eval_steps_per_second": 2.062, |
| "eval_wer": 0.5664611761446284, |
| "step": 3400 |
| }, |
| { |
| "epoch": 44.3, |
| "grad_norm": 2.5404953956604004, |
| "learning_rate": 1.239381153305204e-05, |
| "loss": 0.2785, |
| "step": 3500 |
| }, |
| { |
| "epoch": 44.3, |
| "eval_loss": 0.6240466833114624, |
| "eval_runtime": 11.1649, |
| "eval_samples_per_second": 40.305, |
| "eval_steps_per_second": 2.06, |
| "eval_wer": 0.5713790953799266, |
| "step": 3500 |
| }, |
| { |
| "epoch": 45.57, |
| "grad_norm": 5.070058822631836, |
| "learning_rate": 1.2112517580872011e-05, |
| "loss": 0.2689, |
| "step": 3600 |
| }, |
| { |
| "epoch": 45.57, |
| "eval_loss": 0.6294780969619751, |
| "eval_runtime": 11.118, |
| "eval_samples_per_second": 40.475, |
| "eval_steps_per_second": 2.069, |
| "eval_wer": 0.5648680473782642, |
| "step": 3600 |
| }, |
| { |
| "epoch": 46.84, |
| "grad_norm": 2.5456833839416504, |
| "learning_rate": 1.1831223628691983e-05, |
| "loss": 0.2514, |
| "step": 3700 |
| }, |
| { |
| "epoch": 46.84, |
| "eval_loss": 0.6424580812454224, |
| "eval_runtime": 11.271, |
| "eval_samples_per_second": 39.926, |
| "eval_steps_per_second": 2.041, |
| "eval_wer": 0.5420793793724458, |
| "step": 3700 |
| }, |
| { |
| "epoch": 48.1, |
| "grad_norm": 2.153717279434204, |
| "learning_rate": 1.1549929676511956e-05, |
| "loss": 0.2433, |
| "step": 3800 |
| }, |
| { |
| "epoch": 48.1, |
| "eval_loss": 0.6667928099632263, |
| "eval_runtime": 11.226, |
| "eval_samples_per_second": 40.086, |
| "eval_steps_per_second": 2.049, |
| "eval_wer": 0.606774260580453, |
| "step": 3800 |
| }, |
| { |
| "epoch": 49.37, |
| "grad_norm": 2.5784971714019775, |
| "learning_rate": 1.1268635724331929e-05, |
| "loss": 0.2403, |
| "step": 3900 |
| }, |
| { |
| "epoch": 49.37, |
| "eval_loss": 0.6562526226043701, |
| "eval_runtime": 11.112, |
| "eval_samples_per_second": 40.497, |
| "eval_steps_per_second": 2.07, |
| "eval_wer": 0.5749809517212717, |
| "step": 3900 |
| }, |
| { |
| "epoch": 50.63, |
| "grad_norm": 2.656663656234741, |
| "learning_rate": 1.09873417721519e-05, |
| "loss": 0.2287, |
| "step": 4000 |
| }, |
| { |
| "epoch": 50.63, |
| "eval_loss": 0.6695858240127563, |
| "eval_runtime": 11.0907, |
| "eval_samples_per_second": 40.575, |
| "eval_steps_per_second": 2.074, |
| "eval_wer": 0.5932672993004087, |
| "step": 4000 |
| }, |
| { |
| "epoch": 51.9, |
| "grad_norm": 2.9128212928771973, |
| "learning_rate": 1.0706047819971872e-05, |
| "loss": 0.2366, |
| "step": 4100 |
| }, |
| { |
| "epoch": 51.9, |
| "eval_loss": 0.6738879680633545, |
| "eval_runtime": 11.1372, |
| "eval_samples_per_second": 40.405, |
| "eval_steps_per_second": 2.065, |
| "eval_wer": 0.5731107570824964, |
| "step": 4100 |
| }, |
| { |
| "epoch": 53.16, |
| "grad_norm": 1.8941991329193115, |
| "learning_rate": 1.0424753867791843e-05, |
| "loss": 0.2295, |
| "step": 4200 |
| }, |
| { |
| "epoch": 53.16, |
| "eval_loss": 0.680944561958313, |
| "eval_runtime": 11.1339, |
| "eval_samples_per_second": 40.417, |
| "eval_steps_per_second": 2.066, |
| "eval_wer": 0.6090600540278451, |
| "step": 4200 |
| }, |
| { |
| "epoch": 54.43, |
| "grad_norm": 2.2944602966308594, |
| "learning_rate": 1.0143459915611814e-05, |
| "loss": 0.2274, |
| "step": 4300 |
| }, |
| { |
| "epoch": 54.43, |
| "eval_loss": 0.6874995827674866, |
| "eval_runtime": 11.1569, |
| "eval_samples_per_second": 40.334, |
| "eval_steps_per_second": 2.061, |
| "eval_wer": 0.5913971046616333, |
| "step": 4300 |
| }, |
| { |
| "epoch": 55.7, |
| "grad_norm": 2.4684228897094727, |
| "learning_rate": 9.862165963431787e-06, |
| "loss": 0.2178, |
| "step": 4400 |
| }, |
| { |
| "epoch": 55.7, |
| "eval_loss": 0.6899309158325195, |
| "eval_runtime": 11.21, |
| "eval_samples_per_second": 40.143, |
| "eval_steps_per_second": 2.052, |
| "eval_wer": 0.5949296945348757, |
| "step": 4400 |
| }, |
| { |
| "epoch": 56.96, |
| "grad_norm": 2.0573887825012207, |
| "learning_rate": 9.580872011251759e-06, |
| "loss": 0.2176, |
| "step": 4500 |
| }, |
| { |
| "epoch": 56.96, |
| "eval_loss": 0.6924750208854675, |
| "eval_runtime": 11.1413, |
| "eval_samples_per_second": 40.39, |
| "eval_steps_per_second": 2.064, |
| "eval_wer": 0.5828080626168871, |
| "step": 4500 |
| }, |
| { |
| "epoch": 58.23, |
| "grad_norm": 1.9555515050888062, |
| "learning_rate": 9.299578059071732e-06, |
| "loss": 0.2064, |
| "step": 4600 |
| }, |
| { |
| "epoch": 58.23, |
| "eval_loss": 0.7009023427963257, |
| "eval_runtime": 11.0931, |
| "eval_samples_per_second": 40.566, |
| "eval_steps_per_second": 2.073, |
| "eval_wer": 0.598462284408118, |
| "step": 4600 |
| }, |
| { |
| "epoch": 59.49, |
| "grad_norm": 3.208376169204712, |
| "learning_rate": 9.018284106891703e-06, |
| "loss": 0.2081, |
| "step": 4700 |
| }, |
| { |
| "epoch": 59.49, |
| "eval_loss": 0.701277494430542, |
| "eval_runtime": 11.216, |
| "eval_samples_per_second": 40.121, |
| "eval_steps_per_second": 2.051, |
| "eval_wer": 0.5995705478977627, |
| "step": 4700 |
| }, |
| { |
| "epoch": 60.76, |
| "grad_norm": 2.5926976203918457, |
| "learning_rate": 8.736990154711675e-06, |
| "loss": 0.2093, |
| "step": 4800 |
| }, |
| { |
| "epoch": 60.76, |
| "eval_loss": 0.725727379322052, |
| "eval_runtime": 11.1473, |
| "eval_samples_per_second": 40.369, |
| "eval_steps_per_second": 2.063, |
| "eval_wer": 0.6086444552192284, |
| "step": 4800 |
| }, |
| { |
| "epoch": 62.03, |
| "grad_norm": 2.2040176391601562, |
| "learning_rate": 8.455696202531646e-06, |
| "loss": 0.2024, |
| "step": 4900 |
| }, |
| { |
| "epoch": 62.03, |
| "eval_loss": 0.7215314507484436, |
| "eval_runtime": 11.4213, |
| "eval_samples_per_second": 39.4, |
| "eval_steps_per_second": 2.014, |
| "eval_wer": 0.6003324790468934, |
| "step": 4900 |
| }, |
| { |
| "epoch": 63.29, |
| "grad_norm": 1.6947568655014038, |
| "learning_rate": 8.174402250351619e-06, |
| "loss": 0.1999, |
| "step": 5000 |
| }, |
| { |
| "epoch": 63.29, |
| "eval_loss": 0.7332788109779358, |
| "eval_runtime": 11.2504, |
| "eval_samples_per_second": 39.999, |
| "eval_steps_per_second": 2.044, |
| "eval_wer": 0.6090600540278451, |
| "step": 5000 |
| }, |
| { |
| "epoch": 64.56, |
| "grad_norm": 2.657949686050415, |
| "learning_rate": 7.89310829817159e-06, |
| "loss": 0.2064, |
| "step": 5100 |
| }, |
| { |
| "epoch": 64.56, |
| "eval_loss": 0.7529835104942322, |
| "eval_runtime": 11.2168, |
| "eval_samples_per_second": 40.118, |
| "eval_steps_per_second": 2.05, |
| "eval_wer": 0.6397450993973818, |
| "step": 5100 |
| }, |
| { |
| "epoch": 65.82, |
| "grad_norm": 2.161647081375122, |
| "learning_rate": 7.611814345991562e-06, |
| "loss": 0.186, |
| "step": 5200 |
| }, |
| { |
| "epoch": 65.82, |
| "eval_loss": 0.7542085647583008, |
| "eval_runtime": 11.1628, |
| "eval_samples_per_second": 40.312, |
| "eval_steps_per_second": 2.06, |
| "eval_wer": 0.6348964466301863, |
| "step": 5200 |
| }, |
| { |
| "epoch": 67.09, |
| "grad_norm": 1.5503740310668945, |
| "learning_rate": 7.330520393811533e-06, |
| "loss": 0.186, |
| "step": 5300 |
| }, |
| { |
| "epoch": 67.09, |
| "eval_loss": 0.7416096925735474, |
| "eval_runtime": 11.1272, |
| "eval_samples_per_second": 40.441, |
| "eval_steps_per_second": 2.067, |
| "eval_wer": 0.6270000692664681, |
| "step": 5300 |
| }, |
| { |
| "epoch": 68.35, |
| "grad_norm": 2.8439977169036865, |
| "learning_rate": 7.049226441631506e-06, |
| "loss": 0.1807, |
| "step": 5400 |
| }, |
| { |
| "epoch": 68.35, |
| "eval_loss": 0.7548705339431763, |
| "eval_runtime": 11.1833, |
| "eval_samples_per_second": 40.239, |
| "eval_steps_per_second": 2.057, |
| "eval_wer": 0.6352427789707003, |
| "step": 5400 |
| }, |
| { |
| "epoch": 69.62, |
| "grad_norm": 3.1191818714141846, |
| "learning_rate": 6.7679324894514775e-06, |
| "loss": 0.1784, |
| "step": 5500 |
| }, |
| { |
| "epoch": 69.62, |
| "eval_loss": 0.7506438493728638, |
| "eval_runtime": 11.1231, |
| "eval_samples_per_second": 40.456, |
| "eval_steps_per_second": 2.068, |
| "eval_wer": 0.5844011913832514, |
| "step": 5500 |
| }, |
| { |
| "epoch": 70.89, |
| "grad_norm": 2.1088929176330566, |
| "learning_rate": 6.486638537271449e-06, |
| "loss": 0.1824, |
| "step": 5600 |
| }, |
| { |
| "epoch": 70.89, |
| "eval_loss": 0.7611370086669922, |
| "eval_runtime": 11.0844, |
| "eval_samples_per_second": 40.598, |
| "eval_steps_per_second": 2.075, |
| "eval_wer": 0.6252684075638983, |
| "step": 5600 |
| }, |
| { |
| "epoch": 72.15, |
| "grad_norm": 3.0879805088043213, |
| "learning_rate": 6.208157524613221e-06, |
| "loss": 0.1769, |
| "step": 5700 |
| }, |
| { |
| "epoch": 72.15, |
| "eval_loss": 0.771263837814331, |
| "eval_runtime": 11.226, |
| "eval_samples_per_second": 40.085, |
| "eval_steps_per_second": 2.049, |
| "eval_wer": 0.5927131675555863, |
| "step": 5700 |
| }, |
| { |
| "epoch": 73.42, |
| "grad_norm": 2.1844921112060547, |
| "learning_rate": 5.926863572433193e-06, |
| "loss": 0.1843, |
| "step": 5800 |
| }, |
| { |
| "epoch": 73.42, |
| "eval_loss": 0.7719753980636597, |
| "eval_runtime": 11.1675, |
| "eval_samples_per_second": 40.295, |
| "eval_steps_per_second": 2.06, |
| "eval_wer": 0.5955530927478008, |
| "step": 5800 |
| }, |
| { |
| "epoch": 74.68, |
| "grad_norm": 2.3081653118133545, |
| "learning_rate": 5.645569620253165e-06, |
| "loss": 0.1709, |
| "step": 5900 |
| }, |
| { |
| "epoch": 74.68, |
| "eval_loss": 0.7804738879203796, |
| "eval_runtime": 11.2408, |
| "eval_samples_per_second": 40.033, |
| "eval_steps_per_second": 2.046, |
| "eval_wer": 0.6258225393087207, |
| "step": 5900 |
| }, |
| { |
| "epoch": 75.95, |
| "grad_norm": 3.2150704860687256, |
| "learning_rate": 5.364275668073137e-06, |
| "loss": 0.1691, |
| "step": 6000 |
| }, |
| { |
| "epoch": 75.95, |
| "eval_loss": 0.7865281105041504, |
| "eval_runtime": 11.2778, |
| "eval_samples_per_second": 39.901, |
| "eval_steps_per_second": 2.039, |
| "eval_wer": 0.6281775992242156, |
| "step": 6000 |
| }, |
| { |
| "epoch": 77.22, |
| "grad_norm": 1.8031377792358398, |
| "learning_rate": 5.082981715893108e-06, |
| "loss": 0.1701, |
| "step": 6100 |
| }, |
| { |
| "epoch": 77.22, |
| "eval_loss": 0.7807941436767578, |
| "eval_runtime": 11.2568, |
| "eval_samples_per_second": 39.976, |
| "eval_steps_per_second": 2.043, |
| "eval_wer": 0.6218050841587588, |
| "step": 6100 |
| }, |
| { |
| "epoch": 78.48, |
| "grad_norm": 1.8435957431793213, |
| "learning_rate": 4.8016877637130805e-06, |
| "loss": 0.1735, |
| "step": 6200 |
| }, |
| { |
| "epoch": 78.48, |
| "eval_loss": 0.7789934873580933, |
| "eval_runtime": 11.2304, |
| "eval_samples_per_second": 40.07, |
| "eval_steps_per_second": 2.048, |
| "eval_wer": 0.5965920897693426, |
| "step": 6200 |
| }, |
| { |
| "epoch": 79.75, |
| "grad_norm": 1.9381072521209717, |
| "learning_rate": 4.520393811533053e-06, |
| "loss": 0.1746, |
| "step": 6300 |
| }, |
| { |
| "epoch": 79.75, |
| "eval_loss": 0.7949352264404297, |
| "eval_runtime": 11.4313, |
| "eval_samples_per_second": 39.366, |
| "eval_steps_per_second": 2.012, |
| "eval_wer": 0.6430698898663157, |
| "step": 6300 |
| }, |
| { |
| "epoch": 81.01, |
| "grad_norm": 1.8376802206039429, |
| "learning_rate": 4.239099859353024e-06, |
| "loss": 0.1745, |
| "step": 6400 |
| }, |
| { |
| "epoch": 81.01, |
| "eval_loss": 0.8125633001327515, |
| "eval_runtime": 11.456, |
| "eval_samples_per_second": 39.281, |
| "eval_steps_per_second": 2.008, |
| "eval_wer": 0.6284546650966267, |
| "step": 6400 |
| }, |
| { |
| "epoch": 82.28, |
| "grad_norm": 2.187868118286133, |
| "learning_rate": 3.957805907172996e-06, |
| "loss": 0.1605, |
| "step": 6500 |
| }, |
| { |
| "epoch": 82.28, |
| "eval_loss": 0.8113065361976624, |
| "eval_runtime": 11.3144, |
| "eval_samples_per_second": 39.772, |
| "eval_steps_per_second": 2.033, |
| "eval_wer": 0.6194500242432638, |
| "step": 6500 |
| }, |
| { |
| "epoch": 83.54, |
| "grad_norm": 2.5882511138916016, |
| "learning_rate": 3.676511954992968e-06, |
| "loss": 0.1579, |
| "step": 6600 |
| }, |
| { |
| "epoch": 83.54, |
| "eval_loss": 0.7976768612861633, |
| "eval_runtime": 11.4022, |
| "eval_samples_per_second": 39.466, |
| "eval_steps_per_second": 2.017, |
| "eval_wer": 0.6155018355614047, |
| "step": 6600 |
| }, |
| { |
| "epoch": 84.81, |
| "grad_norm": 2.416449546813965, |
| "learning_rate": 3.39521800281294e-06, |
| "loss": 0.1704, |
| "step": 6700 |
| }, |
| { |
| "epoch": 84.81, |
| "eval_loss": 0.8016535043716431, |
| "eval_runtime": 11.3366, |
| "eval_samples_per_second": 39.694, |
| "eval_steps_per_second": 2.029, |
| "eval_wer": 0.6139779732631433, |
| "step": 6700 |
| }, |
| { |
| "epoch": 86.08, |
| "grad_norm": 1.5159286260604858, |
| "learning_rate": 3.1139240506329116e-06, |
| "loss": 0.1659, |
| "step": 6800 |
| }, |
| { |
| "epoch": 86.08, |
| "eval_loss": 0.8146914839744568, |
| "eval_runtime": 11.289, |
| "eval_samples_per_second": 39.862, |
| "eval_steps_per_second": 2.037, |
| "eval_wer": 0.6279005333518044, |
| "step": 6800 |
| }, |
| { |
| "epoch": 87.34, |
| "grad_norm": 2.9167511463165283, |
| "learning_rate": 2.832630098452884e-06, |
| "loss": 0.166, |
| "step": 6900 |
| }, |
| { |
| "epoch": 87.34, |
| "eval_loss": 0.8088270425796509, |
| "eval_runtime": 11.28, |
| "eval_samples_per_second": 39.893, |
| "eval_steps_per_second": 2.039, |
| "eval_wer": 0.6350349795663919, |
| "step": 6900 |
| }, |
| { |
| "epoch": 88.61, |
| "grad_norm": 2.3707916736602783, |
| "learning_rate": 2.5513361462728552e-06, |
| "loss": 0.1539, |
| "step": 7000 |
| }, |
| { |
| "epoch": 88.61, |
| "eval_loss": 0.8052927255630493, |
| "eval_runtime": 11.3196, |
| "eval_samples_per_second": 39.754, |
| "eval_steps_per_second": 2.032, |
| "eval_wer": 0.616402299646741, |
| "step": 7000 |
| }, |
| { |
| "epoch": 89.87, |
| "grad_norm": 2.315516471862793, |
| "learning_rate": 2.270042194092827e-06, |
| "loss": 0.1589, |
| "step": 7100 |
| }, |
| { |
| "epoch": 89.87, |
| "eval_loss": 0.8188755512237549, |
| "eval_runtime": 11.3039, |
| "eval_samples_per_second": 39.809, |
| "eval_steps_per_second": 2.035, |
| "eval_wer": 0.6357276442474198, |
| "step": 7100 |
| }, |
| { |
| "epoch": 91.14, |
| "grad_norm": 1.863142967224121, |
| "learning_rate": 1.9887482419127992e-06, |
| "loss": 0.1559, |
| "step": 7200 |
| }, |
| { |
| "epoch": 91.14, |
| "eval_loss": 0.8152031898498535, |
| "eval_runtime": 11.2054, |
| "eval_samples_per_second": 40.159, |
| "eval_steps_per_second": 2.053, |
| "eval_wer": 0.6258225393087207, |
| "step": 7200 |
| }, |
| { |
| "epoch": 92.41, |
| "grad_norm": 3.034898519515991, |
| "learning_rate": 1.7074542897327708e-06, |
| "loss": 0.1564, |
| "step": 7300 |
| }, |
| { |
| "epoch": 92.41, |
| "eval_loss": 0.8190972208976746, |
| "eval_runtime": 11.3427, |
| "eval_samples_per_second": 39.673, |
| "eval_steps_per_second": 2.028, |
| "eval_wer": 0.6245064764147676, |
| "step": 7300 |
| }, |
| { |
| "epoch": 93.67, |
| "grad_norm": 1.673194408416748, |
| "learning_rate": 1.4261603375527428e-06, |
| "loss": 0.158, |
| "step": 7400 |
| }, |
| { |
| "epoch": 93.67, |
| "eval_loss": 0.8254526853561401, |
| "eval_runtime": 11.282, |
| "eval_samples_per_second": 39.887, |
| "eval_steps_per_second": 2.039, |
| "eval_wer": 0.6333033178638221, |
| "step": 7400 |
| }, |
| { |
| "epoch": 94.94, |
| "grad_norm": 1.6554739475250244, |
| "learning_rate": 1.1448663853727146e-06, |
| "loss": 0.1595, |
| "step": 7500 |
| }, |
| { |
| "epoch": 94.94, |
| "eval_loss": 0.8184179663658142, |
| "eval_runtime": 11.2214, |
| "eval_samples_per_second": 40.102, |
| "eval_steps_per_second": 2.05, |
| "eval_wer": 0.6205582877329086, |
| "step": 7500 |
| }, |
| { |
| "epoch": 96.2, |
| "grad_norm": 2.310234785079956, |
| "learning_rate": 8.635724331926865e-07, |
| "loss": 0.1638, |
| "step": 7600 |
| }, |
| { |
| "epoch": 96.2, |
| "eval_loss": 0.8229891061782837, |
| "eval_runtime": 11.1115, |
| "eval_samples_per_second": 40.499, |
| "eval_steps_per_second": 2.07, |
| "eval_wer": 0.6364203089284477, |
| "step": 7600 |
| }, |
| { |
| "epoch": 97.47, |
| "grad_norm": 1.6313074827194214, |
| "learning_rate": 5.822784810126583e-07, |
| "loss": 0.1629, |
| "step": 7700 |
| }, |
| { |
| "epoch": 97.47, |
| "eval_loss": 0.8244702219963074, |
| "eval_runtime": 11.1778, |
| "eval_samples_per_second": 40.258, |
| "eval_steps_per_second": 2.058, |
| "eval_wer": 0.6312253238207384, |
| "step": 7700 |
| }, |
| { |
| "epoch": 98.73, |
| "grad_norm": 2.2342050075531006, |
| "learning_rate": 3.009845288326301e-07, |
| "loss": 0.1531, |
| "step": 7800 |
| }, |
| { |
| "epoch": 98.73, |
| "eval_loss": 0.8226235508918762, |
| "eval_runtime": 11.0989, |
| "eval_samples_per_second": 40.545, |
| "eval_steps_per_second": 2.072, |
| "eval_wer": 0.626723003394057, |
| "step": 7800 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 3.3648197650909424, |
| "learning_rate": 1.9690576652601972e-08, |
| "loss": 0.1572, |
| "step": 7900 |
| }, |
| { |
| "epoch": 100.0, |
| "eval_loss": 0.8258158564567566, |
| "eval_runtime": 11.1124, |
| "eval_samples_per_second": 40.495, |
| "eval_steps_per_second": 2.07, |
| "eval_wer": 0.6288702639052435, |
| "step": 7900 |
| }, |
| { |
| "epoch": 100.0, |
| "step": 7900, |
| "total_flos": 3.6740184088961606e+19, |
| "train_loss": 0.6056561310683625, |
| "train_runtime": 25369.8434, |
| "train_samples_per_second": 12.404, |
| "train_steps_per_second": 0.311 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 7900, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 100, |
| "total_flos": 3.6740184088961606e+19, |
| "train_batch_size": 20, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|