{ "best_global_step": 8500, "best_metric": 0.3142754137516022, "best_model_checkpoint": "./Wav2vec2-fula/checkpoint-8500", "epoch": 7.565045540470871, "eval_steps": 500, "global_step": 11000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017185083347654236, "grad_norm": 10.79065990447998, "learning_rate": 1.2000000000000002e-06, "loss": 14.779, "step": 25 }, { "epoch": 0.03437016669530847, "grad_norm": 11.638880729675293, "learning_rate": 2.4000000000000003e-06, "loss": 13.4402, "step": 50 }, { "epoch": 0.05155525004296271, "grad_norm": 12.49268913269043, "learning_rate": 3.6499999999999998e-06, "loss": 14.1815, "step": 75 }, { "epoch": 0.06874033339061694, "grad_norm": 13.672736167907715, "learning_rate": 4.9000000000000005e-06, "loss": 12.3835, "step": 100 }, { "epoch": 0.08592541673827118, "grad_norm": 13.192852973937988, "learning_rate": 6.15e-06, "loss": 11.3532, "step": 125 }, { "epoch": 0.10311050008592541, "grad_norm": 13.134611129760742, "learning_rate": 7.4e-06, "loss": 7.4199, "step": 150 }, { "epoch": 0.12029558343357966, "grad_norm": 10.945942878723145, "learning_rate": 8.65e-06, "loss": 6.3555, "step": 175 }, { "epoch": 0.13748066678123388, "grad_norm": 10.094327926635742, "learning_rate": 9.900000000000002e-06, "loss": 5.2388, "step": 200 }, { "epoch": 0.1546657501288881, "grad_norm": 9.148414611816406, "learning_rate": 1.115e-05, "loss": 5.0771, "step": 225 }, { "epoch": 0.17185083347654237, "grad_norm": 7.876718997955322, "learning_rate": 1.24e-05, "loss": 4.6181, "step": 250 }, { "epoch": 0.1890359168241966, "grad_norm": 7.483435153961182, "learning_rate": 1.3650000000000001e-05, "loss": 4.5182, "step": 275 }, { "epoch": 0.20622100017185083, "grad_norm": 6.882397174835205, "learning_rate": 1.49e-05, "loss": 4.1879, "step": 300 }, { "epoch": 0.22340608351950508, "grad_norm": 6.444328308105469, "learning_rate": 1.6150000000000003e-05, "loss": 4.1217, "step": 325 }, { "epoch": 0.2405911668671593, "grad_norm": 4.407646179199219, "learning_rate": 1.74e-05, "loss": 3.8535, "step": 350 }, { "epoch": 0.25777625021481354, "grad_norm": 4.332294940948486, "learning_rate": 1.865e-05, "loss": 3.7725, "step": 375 }, { "epoch": 0.27496133356246777, "grad_norm": 3.5310115814208984, "learning_rate": 1.9900000000000003e-05, "loss": 3.5538, "step": 400 }, { "epoch": 0.292146416910122, "grad_norm": 2.884195566177368, "learning_rate": 2.115e-05, "loss": 3.4667, "step": 425 }, { "epoch": 0.3093315002577762, "grad_norm": 2.221975564956665, "learning_rate": 2.2400000000000002e-05, "loss": 3.3157, "step": 450 }, { "epoch": 0.3265165836054305, "grad_norm": 1.393004059791565, "learning_rate": 2.365e-05, "loss": 3.2574, "step": 475 }, { "epoch": 0.34370166695308474, "grad_norm": 1.5566725730895996, "learning_rate": 2.4900000000000002e-05, "loss": 3.1344, "step": 500 }, { "epoch": 0.34370166695308474, "eval_loss": 3.093510150909424, "eval_runtime": 148.9649, "eval_samples_per_second": 8.223, "eval_steps_per_second": 1.034, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.36088675030073897, "grad_norm": 1.1272865533828735, "learning_rate": 2.6150000000000002e-05, "loss": 3.0829, "step": 525 }, { "epoch": 0.3780718336483932, "grad_norm": 0.8798616528511047, "learning_rate": 2.7400000000000002e-05, "loss": 3.0138, "step": 550 }, { "epoch": 0.3952569169960474, "grad_norm": 1.233052134513855, "learning_rate": 2.865e-05, "loss": 2.9976, "step": 575 }, { "epoch": 0.41244200034370165, "grad_norm": 0.4763319492340088, "learning_rate": 2.9900000000000002e-05, "loss": 2.9609, "step": 600 }, { "epoch": 0.4296270836913559, "grad_norm": 0.35058021545410156, "learning_rate": 3.115e-05, "loss": 2.9375, "step": 625 }, { "epoch": 0.44681216703901017, "grad_norm": 0.9661968946456909, "learning_rate": 3.24e-05, "loss": 2.8961, "step": 650 }, { "epoch": 0.4639972503866644, "grad_norm": 1.0027278661727905, "learning_rate": 3.3650000000000005e-05, "loss": 2.8169, "step": 675 }, { "epoch": 0.4811823337343186, "grad_norm": 0.6780478358268738, "learning_rate": 3.49e-05, "loss": 2.674, "step": 700 }, { "epoch": 0.49836741708197285, "grad_norm": 0.7937625646591187, "learning_rate": 3.615e-05, "loss": 2.4667, "step": 725 }, { "epoch": 0.5155525004296271, "grad_norm": 1.0229036808013916, "learning_rate": 3.74e-05, "loss": 2.2186, "step": 750 }, { "epoch": 0.5327375837772813, "grad_norm": 1.1023578643798828, "learning_rate": 3.8650000000000004e-05, "loss": 1.9797, "step": 775 }, { "epoch": 0.5499226671249355, "grad_norm": 1.084370493888855, "learning_rate": 3.99e-05, "loss": 1.6958, "step": 800 }, { "epoch": 0.5671077504725898, "grad_norm": 0.8571304082870483, "learning_rate": 4.115e-05, "loss": 1.3521, "step": 825 }, { "epoch": 0.584292833820244, "grad_norm": 1.0564861297607422, "learning_rate": 4.24e-05, "loss": 1.1168, "step": 850 }, { "epoch": 0.6014779171678982, "grad_norm": 0.8157157301902771, "learning_rate": 4.3650000000000004e-05, "loss": 0.9829, "step": 875 }, { "epoch": 0.6186630005155525, "grad_norm": 1.098561406135559, "learning_rate": 4.49e-05, "loss": 0.904, "step": 900 }, { "epoch": 0.6358480838632067, "grad_norm": 0.6373503804206848, "learning_rate": 4.6150000000000004e-05, "loss": 0.8686, "step": 925 }, { "epoch": 0.653033167210861, "grad_norm": 1.2277697324752808, "learning_rate": 4.74e-05, "loss": 0.7969, "step": 950 }, { "epoch": 0.6702182505585152, "grad_norm": 0.5907439589500427, "learning_rate": 4.8650000000000003e-05, "loss": 0.7524, "step": 975 }, { "epoch": 0.6874033339061695, "grad_norm": 0.7881184220314026, "learning_rate": 4.99e-05, "loss": 0.7323, "step": 1000 }, { "epoch": 0.6874033339061695, "eval_loss": 0.630436360836029, "eval_runtime": 155.009, "eval_samples_per_second": 7.903, "eval_steps_per_second": 0.993, "eval_wer": 0.711954217888936, "step": 1000 }, { "epoch": 0.7045884172538237, "grad_norm": 0.7166395783424377, "learning_rate": 4.99866651205937e-05, "loss": 0.705, "step": 1025 }, { "epoch": 0.7217735006014779, "grad_norm": 0.7382345199584961, "learning_rate": 4.9972170686456406e-05, "loss": 0.7207, "step": 1050 }, { "epoch": 0.7389585839491322, "grad_norm": 0.6300435066223145, "learning_rate": 4.995767625231911e-05, "loss": 0.7097, "step": 1075 }, { "epoch": 0.7561436672967864, "grad_norm": 0.7419637441635132, "learning_rate": 4.994318181818182e-05, "loss": 0.6865, "step": 1100 }, { "epoch": 0.7733287506444406, "grad_norm": 0.5746726989746094, "learning_rate": 4.9928687384044535e-05, "loss": 0.6694, "step": 1125 }, { "epoch": 0.7905138339920948, "grad_norm": 0.7999011874198914, "learning_rate": 4.991419294990724e-05, "loss": 0.6729, "step": 1150 }, { "epoch": 0.8076989173397491, "grad_norm": 0.5744255781173706, "learning_rate": 4.989969851576995e-05, "loss": 0.6253, "step": 1175 }, { "epoch": 0.8248840006874033, "grad_norm": 0.8337430357933044, "learning_rate": 4.988520408163265e-05, "loss": 0.6616, "step": 1200 }, { "epoch": 0.8420690840350575, "grad_norm": 0.5431417226791382, "learning_rate": 4.9870709647495364e-05, "loss": 0.6129, "step": 1225 }, { "epoch": 0.8592541673827118, "grad_norm": 0.8542420864105225, "learning_rate": 4.985621521335807e-05, "loss": 0.6252, "step": 1250 }, { "epoch": 0.876439250730366, "grad_norm": 0.5819442272186279, "learning_rate": 4.984172077922078e-05, "loss": 0.5723, "step": 1275 }, { "epoch": 0.8936243340780203, "grad_norm": 0.9629399180412292, "learning_rate": 4.982722634508349e-05, "loss": 0.582, "step": 1300 }, { "epoch": 0.9108094174256746, "grad_norm": 0.5698046088218689, "learning_rate": 4.98127319109462e-05, "loss": 0.5745, "step": 1325 }, { "epoch": 0.9279945007733288, "grad_norm": 0.893267035484314, "learning_rate": 4.979823747680891e-05, "loss": 0.5887, "step": 1350 }, { "epoch": 0.945179584120983, "grad_norm": 0.4911533296108246, "learning_rate": 4.9783743042671616e-05, "loss": 0.5657, "step": 1375 }, { "epoch": 0.9623646674686372, "grad_norm": 0.9226717948913574, "learning_rate": 4.976924860853432e-05, "loss": 0.5615, "step": 1400 }, { "epoch": 0.9795497508162915, "grad_norm": 0.5409913659095764, "learning_rate": 4.975475417439703e-05, "loss": 0.5562, "step": 1425 }, { "epoch": 0.9967348341639457, "grad_norm": 0.7855440974235535, "learning_rate": 4.9740259740259745e-05, "loss": 0.5641, "step": 1450 }, { "epoch": 1.0144354700120295, "grad_norm": 0.48342418670654297, "learning_rate": 4.972576530612245e-05, "loss": 0.5216, "step": 1475 }, { "epoch": 1.0316205533596838, "grad_norm": 0.7128458023071289, "learning_rate": 4.971127087198516e-05, "loss": 0.5416, "step": 1500 }, { "epoch": 1.0316205533596838, "eval_loss": 0.47852450609207153, "eval_runtime": 156.3389, "eval_samples_per_second": 7.836, "eval_steps_per_second": 0.985, "eval_wer": 0.6490568037303942, "step": 1500 }, { "epoch": 1.048805636707338, "grad_norm": 0.4331744909286499, "learning_rate": 4.969677643784787e-05, "loss": 0.5337, "step": 1525 }, { "epoch": 1.0659907200549923, "grad_norm": 0.7347924113273621, "learning_rate": 4.968228200371058e-05, "loss": 0.5115, "step": 1550 }, { "epoch": 1.0831758034026464, "grad_norm": 0.47687920928001404, "learning_rate": 4.966778756957329e-05, "loss": 0.5525, "step": 1575 }, { "epoch": 1.1003608867503007, "grad_norm": 0.6743185520172119, "learning_rate": 4.9653293135435996e-05, "loss": 0.4931, "step": 1600 }, { "epoch": 1.117545970097955, "grad_norm": 0.5449199676513672, "learning_rate": 4.9638798701298704e-05, "loss": 0.4773, "step": 1625 }, { "epoch": 1.1347310534456092, "grad_norm": 0.6370054483413696, "learning_rate": 4.962430426716141e-05, "loss": 0.4937, "step": 1650 }, { "epoch": 1.1519161367932633, "grad_norm": 0.5630868077278137, "learning_rate": 4.9609809833024125e-05, "loss": 0.541, "step": 1675 }, { "epoch": 1.1691012201409177, "grad_norm": 0.6498411297798157, "learning_rate": 4.959531539888683e-05, "loss": 0.5057, "step": 1700 }, { "epoch": 1.186286303488572, "grad_norm": 0.468143105506897, "learning_rate": 4.9580820964749533e-05, "loss": 0.5155, "step": 1725 }, { "epoch": 1.2034713868362261, "grad_norm": 0.7214887738227844, "learning_rate": 4.956632653061225e-05, "loss": 0.5132, "step": 1750 }, { "epoch": 1.2206564701838805, "grad_norm": 0.4510962963104248, "learning_rate": 4.9551832096474955e-05, "loss": 0.4643, "step": 1775 }, { "epoch": 1.2378415535315346, "grad_norm": 0.6587559580802917, "learning_rate": 4.953733766233766e-05, "loss": 0.4701, "step": 1800 }, { "epoch": 1.255026636879189, "grad_norm": 0.5024493336677551, "learning_rate": 4.952284322820037e-05, "loss": 0.5198, "step": 1825 }, { "epoch": 1.272211720226843, "grad_norm": 0.7172912359237671, "learning_rate": 4.950834879406308e-05, "loss": 0.4555, "step": 1850 }, { "epoch": 1.2893968035744974, "grad_norm": 0.5447876453399658, "learning_rate": 4.949385435992579e-05, "loss": 0.5041, "step": 1875 }, { "epoch": 1.3065818869221515, "grad_norm": 0.6178200244903564, "learning_rate": 4.94793599257885e-05, "loss": 0.4686, "step": 1900 }, { "epoch": 1.3237669702698058, "grad_norm": 0.49054816365242004, "learning_rate": 4.9464865491651207e-05, "loss": 0.4803, "step": 1925 }, { "epoch": 1.34095205361746, "grad_norm": 0.6802988648414612, "learning_rate": 4.9450371057513914e-05, "loss": 0.4644, "step": 1950 }, { "epoch": 1.3581371369651143, "grad_norm": 0.41138285398483276, "learning_rate": 4.943587662337663e-05, "loss": 0.513, "step": 1975 }, { "epoch": 1.3753222203127686, "grad_norm": 0.8374213576316833, "learning_rate": 4.9421382189239336e-05, "loss": 0.4479, "step": 2000 }, { "epoch": 1.3753222203127686, "eval_loss": 0.42023956775665283, "eval_runtime": 156.1936, "eval_samples_per_second": 7.843, "eval_steps_per_second": 0.986, "eval_wer": 0.6206549385332768, "step": 2000 }, { "epoch": 1.3925073036604227, "grad_norm": 0.7108762860298157, "learning_rate": 4.940688775510204e-05, "loss": 0.465, "step": 2025 }, { "epoch": 1.4096923870080769, "grad_norm": 0.6862374544143677, "learning_rate": 4.939239332096475e-05, "loss": 0.4513, "step": 2050 }, { "epoch": 1.4268774703557312, "grad_norm": 0.5275014638900757, "learning_rate": 4.9377898886827465e-05, "loss": 0.5135, "step": 2075 }, { "epoch": 1.4440625537033855, "grad_norm": 0.7356075644493103, "learning_rate": 4.936340445269017e-05, "loss": 0.4843, "step": 2100 }, { "epoch": 1.4612476370510397, "grad_norm": 0.46985840797424316, "learning_rate": 4.934891001855288e-05, "loss": 0.544, "step": 2125 }, { "epoch": 1.478432720398694, "grad_norm": 0.8410568237304688, "learning_rate": 4.933441558441559e-05, "loss": 0.453, "step": 2150 }, { "epoch": 1.4956178037463481, "grad_norm": 0.5153818130493164, "learning_rate": 4.9319921150278294e-05, "loss": 0.5145, "step": 2175 }, { "epoch": 1.5128028870940025, "grad_norm": 0.8318967223167419, "learning_rate": 4.930542671614101e-05, "loss": 0.4583, "step": 2200 }, { "epoch": 1.5299879704416566, "grad_norm": 0.5471023321151733, "learning_rate": 4.9290932282003716e-05, "loss": 0.4622, "step": 2225 }, { "epoch": 1.547173053789311, "grad_norm": 0.6953750848770142, "learning_rate": 4.927643784786642e-05, "loss": 0.4712, "step": 2250 }, { "epoch": 1.5643581371369653, "grad_norm": 0.5455946326255798, "learning_rate": 4.926194341372913e-05, "loss": 0.4702, "step": 2275 }, { "epoch": 1.5815432204846194, "grad_norm": 0.6755653023719788, "learning_rate": 4.924744897959184e-05, "loss": 0.4356, "step": 2300 }, { "epoch": 1.5987283038322735, "grad_norm": 0.47134584188461304, "learning_rate": 4.9232954545454546e-05, "loss": 0.4333, "step": 2325 }, { "epoch": 1.6159133871799278, "grad_norm": 0.6077346205711365, "learning_rate": 4.921846011131725e-05, "loss": 0.4432, "step": 2350 }, { "epoch": 1.6330984705275822, "grad_norm": 0.5316899418830872, "learning_rate": 4.920396567717996e-05, "loss": 0.45, "step": 2375 }, { "epoch": 1.6502835538752363, "grad_norm": 0.79489666223526, "learning_rate": 4.9189471243042675e-05, "loss": 0.4637, "step": 2400 }, { "epoch": 1.6674686372228904, "grad_norm": 0.5166532397270203, "learning_rate": 4.917497680890538e-05, "loss": 0.4375, "step": 2425 }, { "epoch": 1.6846537205705447, "grad_norm": 0.6109660863876343, "learning_rate": 4.916048237476809e-05, "loss": 0.418, "step": 2450 }, { "epoch": 1.701838803918199, "grad_norm": 0.6634232401847839, "learning_rate": 4.91459879406308e-05, "loss": 0.4315, "step": 2475 }, { "epoch": 1.7190238872658532, "grad_norm": 0.5985594987869263, "learning_rate": 4.913149350649351e-05, "loss": 0.4541, "step": 2500 }, { "epoch": 1.7190238872658532, "eval_loss": 0.3850683569908142, "eval_runtime": 155.7689, "eval_samples_per_second": 7.864, "eval_steps_per_second": 0.989, "eval_wer": 0.6006252649427724, "step": 2500 }, { "epoch": 1.7362089706135073, "grad_norm": 0.5531134605407715, "learning_rate": 4.911699907235622e-05, "loss": 0.4558, "step": 2525 }, { "epoch": 1.7533940539611617, "grad_norm": 0.594007670879364, "learning_rate": 4.9102504638218926e-05, "loss": 0.4422, "step": 2550 }, { "epoch": 1.770579137308816, "grad_norm": 0.4865548312664032, "learning_rate": 4.9088010204081634e-05, "loss": 0.4216, "step": 2575 }, { "epoch": 1.7877642206564701, "grad_norm": 0.5752180814743042, "learning_rate": 4.907351576994435e-05, "loss": 0.429, "step": 2600 }, { "epoch": 1.8049493040041245, "grad_norm": 0.4255332350730896, "learning_rate": 4.9059021335807055e-05, "loss": 0.4074, "step": 2625 }, { "epoch": 1.8221343873517788, "grad_norm": 0.6160002946853638, "learning_rate": 4.904452690166976e-05, "loss": 0.4388, "step": 2650 }, { "epoch": 1.839319470699433, "grad_norm": 0.49583593010902405, "learning_rate": 4.903003246753247e-05, "loss": 0.4211, "step": 2675 }, { "epoch": 1.856504554047087, "grad_norm": 0.7490856647491455, "learning_rate": 4.901553803339518e-05, "loss": 0.4559, "step": 2700 }, { "epoch": 1.8736896373947414, "grad_norm": 0.4634897708892822, "learning_rate": 4.900104359925789e-05, "loss": 0.4323, "step": 2725 }, { "epoch": 1.8908747207423957, "grad_norm": 1.0682307481765747, "learning_rate": 4.89865491651206e-05, "loss": 0.4361, "step": 2750 }, { "epoch": 1.9080598040900498, "grad_norm": 0.41067609190940857, "learning_rate": 4.89720547309833e-05, "loss": 0.4627, "step": 2775 }, { "epoch": 1.925244887437704, "grad_norm": 0.674846887588501, "learning_rate": 4.8957560296846014e-05, "loss": 0.4357, "step": 2800 }, { "epoch": 1.9424299707853583, "grad_norm": 0.4671356678009033, "learning_rate": 4.894306586270872e-05, "loss": 0.429, "step": 2825 }, { "epoch": 1.9596150541330126, "grad_norm": 0.6439480781555176, "learning_rate": 4.892857142857143e-05, "loss": 0.3965, "step": 2850 }, { "epoch": 1.9768001374806667, "grad_norm": 0.5034067034721375, "learning_rate": 4.8914076994434137e-05, "loss": 0.4131, "step": 2875 }, { "epoch": 1.9939852208283209, "grad_norm": 0.67892986536026, "learning_rate": 4.8899582560296844e-05, "loss": 0.4306, "step": 2900 }, { "epoch": 2.011685856676405, "grad_norm": 0.4826701879501343, "learning_rate": 4.888508812615956e-05, "loss": 0.4437, "step": 2925 }, { "epoch": 2.028870940024059, "grad_norm": 0.5658535361289978, "learning_rate": 4.8870593692022266e-05, "loss": 0.3842, "step": 2950 }, { "epoch": 2.0460560233717136, "grad_norm": 0.5490546226501465, "learning_rate": 4.885609925788497e-05, "loss": 0.3872, "step": 2975 }, { "epoch": 2.0632411067193677, "grad_norm": 0.59776371717453, "learning_rate": 4.884160482374768e-05, "loss": 0.365, "step": 3000 }, { "epoch": 2.0632411067193677, "eval_loss": 0.37011492252349854, "eval_runtime": 150.8939, "eval_samples_per_second": 8.118, "eval_steps_per_second": 1.021, "eval_wer": 0.588543874523103, "step": 3000 }, { "epoch": 2.080426190067022, "grad_norm": 0.4145926237106323, "learning_rate": 4.8827110389610395e-05, "loss": 0.4335, "step": 3025 }, { "epoch": 2.097611273414676, "grad_norm": 0.6833218336105347, "learning_rate": 4.88126159554731e-05, "loss": 0.3801, "step": 3050 }, { "epoch": 2.1147963567623305, "grad_norm": 0.5245340466499329, "learning_rate": 4.879812152133581e-05, "loss": 0.4167, "step": 3075 }, { "epoch": 2.1319814401099846, "grad_norm": 0.5739388465881348, "learning_rate": 4.878362708719852e-05, "loss": 0.3793, "step": 3100 }, { "epoch": 2.1491665234576387, "grad_norm": 0.502185046672821, "learning_rate": 4.876913265306123e-05, "loss": 0.4444, "step": 3125 }, { "epoch": 2.166351606805293, "grad_norm": 0.635421633720398, "learning_rate": 4.875463821892394e-05, "loss": 0.3668, "step": 3150 }, { "epoch": 2.1835366901529474, "grad_norm": 0.4521035850048065, "learning_rate": 4.8740143784786646e-05, "loss": 0.3772, "step": 3175 }, { "epoch": 2.2007217735006015, "grad_norm": 0.7126047015190125, "learning_rate": 4.8725649350649354e-05, "loss": 0.374, "step": 3200 }, { "epoch": 2.2179068568482556, "grad_norm": 0.44621542096138, "learning_rate": 4.871115491651206e-05, "loss": 0.448, "step": 3225 }, { "epoch": 2.23509194019591, "grad_norm": 0.6418918967247009, "learning_rate": 4.8696660482374775e-05, "loss": 0.4099, "step": 3250 }, { "epoch": 2.2522770235435643, "grad_norm": 0.6630382537841797, "learning_rate": 4.8682166048237476e-05, "loss": 0.4028, "step": 3275 }, { "epoch": 2.2694621068912184, "grad_norm": 0.6084064841270447, "learning_rate": 4.866767161410018e-05, "loss": 0.3737, "step": 3300 }, { "epoch": 2.2866471902388725, "grad_norm": 0.49679034948349, "learning_rate": 4.86531771799629e-05, "loss": 0.4054, "step": 3325 }, { "epoch": 2.3038322735865266, "grad_norm": 0.5616837739944458, "learning_rate": 4.8638682745825605e-05, "loss": 0.3562, "step": 3350 }, { "epoch": 2.321017356934181, "grad_norm": 0.501042366027832, "learning_rate": 4.862418831168831e-05, "loss": 0.3613, "step": 3375 }, { "epoch": 2.3382024402818353, "grad_norm": 0.7408941984176636, "learning_rate": 4.860969387755102e-05, "loss": 0.3774, "step": 3400 }, { "epoch": 2.3553875236294894, "grad_norm": 0.4748440086841583, "learning_rate": 4.859519944341373e-05, "loss": 0.4066, "step": 3425 }, { "epoch": 2.372572606977144, "grad_norm": 0.6289178729057312, "learning_rate": 4.858070500927644e-05, "loss": 0.3748, "step": 3450 }, { "epoch": 2.389757690324798, "grad_norm": 0.4348820149898529, "learning_rate": 4.856621057513915e-05, "loss": 0.4605, "step": 3475 }, { "epoch": 2.4069427736724522, "grad_norm": 0.5051092505455017, "learning_rate": 4.8551716141001856e-05, "loss": 0.3433, "step": 3500 }, { "epoch": 2.4069427736724522, "eval_loss": 0.364750474691391, "eval_runtime": 151.4529, "eval_samples_per_second": 8.088, "eval_steps_per_second": 1.017, "eval_wer": 0.5796947859262399, "step": 3500 }, { "epoch": 2.4241278570201064, "grad_norm": 0.5361665487289429, "learning_rate": 4.8537221706864564e-05, "loss": 0.4373, "step": 3525 }, { "epoch": 2.441312940367761, "grad_norm": 0.5831518769264221, "learning_rate": 4.852272727272728e-05, "loss": 0.4052, "step": 3550 }, { "epoch": 2.458498023715415, "grad_norm": 0.42250296473503113, "learning_rate": 4.8508232838589985e-05, "loss": 0.3786, "step": 3575 }, { "epoch": 2.475683107063069, "grad_norm": 0.6438080072402954, "learning_rate": 4.849373840445269e-05, "loss": 0.3529, "step": 3600 }, { "epoch": 2.4928681904107233, "grad_norm": 0.41823628544807434, "learning_rate": 4.84792439703154e-05, "loss": 0.4122, "step": 3625 }, { "epoch": 2.510053273758378, "grad_norm": 0.6400181651115417, "learning_rate": 4.8464749536178115e-05, "loss": 0.3602, "step": 3650 }, { "epoch": 2.527238357106032, "grad_norm": 0.40071290731430054, "learning_rate": 4.845025510204082e-05, "loss": 0.3968, "step": 3675 }, { "epoch": 2.544423440453686, "grad_norm": 1.0052437782287598, "learning_rate": 4.843576066790353e-05, "loss": 0.3927, "step": 3700 }, { "epoch": 2.5616085238013406, "grad_norm": 0.43629390001296997, "learning_rate": 4.842126623376624e-05, "loss": 0.3858, "step": 3725 }, { "epoch": 2.5787936071489947, "grad_norm": 0.8912670016288757, "learning_rate": 4.8406771799628944e-05, "loss": 0.3688, "step": 3750 }, { "epoch": 2.595978690496649, "grad_norm": 0.4484070837497711, "learning_rate": 4.839227736549166e-05, "loss": 0.4143, "step": 3775 }, { "epoch": 2.613163773844303, "grad_norm": 0.6882378458976746, "learning_rate": 4.837778293135436e-05, "loss": 0.3575, "step": 3800 }, { "epoch": 2.6303488571919575, "grad_norm": 0.4403606653213501, "learning_rate": 4.8363288497217067e-05, "loss": 0.4099, "step": 3825 }, { "epoch": 2.6475339405396117, "grad_norm": 0.7600814700126648, "learning_rate": 4.834879406307978e-05, "loss": 0.3732, "step": 3850 }, { "epoch": 2.664719023887266, "grad_norm": 0.3970819115638733, "learning_rate": 4.833429962894249e-05, "loss": 0.3994, "step": 3875 }, { "epoch": 2.68190410723492, "grad_norm": 0.8571271300315857, "learning_rate": 4.8319805194805196e-05, "loss": 0.3354, "step": 3900 }, { "epoch": 2.6990891905825745, "grad_norm": 0.514602780342102, "learning_rate": 4.83053107606679e-05, "loss": 0.4109, "step": 3925 }, { "epoch": 2.7162742739302286, "grad_norm": 0.5113664269447327, "learning_rate": 4.829081632653061e-05, "loss": 0.3742, "step": 3950 }, { "epoch": 2.7334593572778827, "grad_norm": 0.44583624601364136, "learning_rate": 4.8276321892393325e-05, "loss": 0.3896, "step": 3975 }, { "epoch": 2.7506444406255373, "grad_norm": 0.5685079097747803, "learning_rate": 4.826182745825603e-05, "loss": 0.3561, "step": 4000 }, { "epoch": 2.7506444406255373, "eval_loss": 0.3437730669975281, "eval_runtime": 152.7484, "eval_samples_per_second": 8.02, "eval_steps_per_second": 1.008, "eval_wer": 0.5716405256464604, "step": 4000 }, { "epoch": 2.7678295239731914, "grad_norm": 0.48729953169822693, "learning_rate": 4.824733302411874e-05, "loss": 0.3752, "step": 4025 }, { "epoch": 2.7850146073208455, "grad_norm": 0.64439457654953, "learning_rate": 4.823283858998145e-05, "loss": 0.3513, "step": 4050 }, { "epoch": 2.8021996906684996, "grad_norm": 0.4153486490249634, "learning_rate": 4.821834415584416e-05, "loss": 0.3979, "step": 4075 }, { "epoch": 2.8193847740161537, "grad_norm": 0.5988856554031372, "learning_rate": 4.820384972170687e-05, "loss": 0.3825, "step": 4100 }, { "epoch": 2.8365698573638083, "grad_norm": 0.8136705160140991, "learning_rate": 4.8189355287569576e-05, "loss": 0.4321, "step": 4125 }, { "epoch": 2.8537549407114624, "grad_norm": 0.6228049993515015, "learning_rate": 4.8174860853432284e-05, "loss": 0.3569, "step": 4150 }, { "epoch": 2.8709400240591165, "grad_norm": 0.5307970643043518, "learning_rate": 4.816036641929499e-05, "loss": 0.3775, "step": 4175 }, { "epoch": 2.888125107406771, "grad_norm": 0.5696175694465637, "learning_rate": 4.8145871985157705e-05, "loss": 0.3306, "step": 4200 }, { "epoch": 2.905310190754425, "grad_norm": 0.46450933814048767, "learning_rate": 4.813137755102041e-05, "loss": 0.4307, "step": 4225 }, { "epoch": 2.9224952741020793, "grad_norm": 0.6127625703811646, "learning_rate": 4.811688311688312e-05, "loss": 0.3604, "step": 4250 }, { "epoch": 2.939680357449734, "grad_norm": 0.5017271637916565, "learning_rate": 4.810238868274583e-05, "loss": 0.3721, "step": 4275 }, { "epoch": 2.956865440797388, "grad_norm": 0.6709030866622925, "learning_rate": 4.808789424860854e-05, "loss": 0.3603, "step": 4300 }, { "epoch": 2.974050524145042, "grad_norm": 0.40175503492355347, "learning_rate": 4.807339981447124e-05, "loss": 0.376, "step": 4325 }, { "epoch": 2.9912356074926962, "grad_norm": 0.6254987716674805, "learning_rate": 4.805890538033395e-05, "loss": 0.3316, "step": 4350 }, { "epoch": 3.0089362433407802, "grad_norm": 0.4055463373661041, "learning_rate": 4.804441094619666e-05, "loss": 0.4126, "step": 4375 }, { "epoch": 3.0261213266884344, "grad_norm": 0.5347069501876831, "learning_rate": 4.802991651205937e-05, "loss": 0.3477, "step": 4400 }, { "epoch": 3.0433064100360885, "grad_norm": 0.4559672474861145, "learning_rate": 4.801542207792208e-05, "loss": 0.3458, "step": 4425 }, { "epoch": 3.060491493383743, "grad_norm": 0.46173930168151855, "learning_rate": 4.8000927643784786e-05, "loss": 0.3316, "step": 4450 }, { "epoch": 3.077676576731397, "grad_norm": 0.414719820022583, "learning_rate": 4.7986433209647494e-05, "loss": 0.3719, "step": 4475 }, { "epoch": 3.0948616600790513, "grad_norm": 0.7085908055305481, "learning_rate": 4.797193877551021e-05, "loss": 0.3237, "step": 4500 }, { "epoch": 3.0948616600790513, "eval_loss": 0.3646816313266754, "eval_runtime": 153.9661, "eval_samples_per_second": 7.956, "eval_steps_per_second": 1.0, "eval_wer": 0.5677193726155151, "step": 4500 }, { "epoch": 3.1120467434267054, "grad_norm": 0.4840669631958008, "learning_rate": 4.7957444341372916e-05, "loss": 0.3515, "step": 4525 }, { "epoch": 3.12923182677436, "grad_norm": 0.6030757427215576, "learning_rate": 4.794294990723562e-05, "loss": 0.3263, "step": 4550 }, { "epoch": 3.146416910122014, "grad_norm": 0.5091059803962708, "learning_rate": 4.792845547309833e-05, "loss": 0.3315, "step": 4575 }, { "epoch": 3.163601993469668, "grad_norm": 0.7523996829986572, "learning_rate": 4.7913961038961045e-05, "loss": 0.3368, "step": 4600 }, { "epoch": 3.1807870768173228, "grad_norm": 1.0035797357559204, "learning_rate": 4.789946660482375e-05, "loss": 0.3931, "step": 4625 }, { "epoch": 3.197972160164977, "grad_norm": 0.5936137437820435, "learning_rate": 4.788497217068646e-05, "loss": 0.2896, "step": 4650 }, { "epoch": 3.215157243512631, "grad_norm": 0.5628079771995544, "learning_rate": 4.787047773654917e-05, "loss": 0.3632, "step": 4675 }, { "epoch": 3.232342326860285, "grad_norm": 0.5175526738166809, "learning_rate": 4.7855983302411874e-05, "loss": 0.33, "step": 4700 }, { "epoch": 3.2495274102079397, "grad_norm": 0.36029067635536194, "learning_rate": 4.784148886827459e-05, "loss": 0.3487, "step": 4725 }, { "epoch": 3.266712493555594, "grad_norm": 0.6295140981674194, "learning_rate": 4.7826994434137296e-05, "loss": 0.3391, "step": 4750 }, { "epoch": 3.283897576903248, "grad_norm": 0.535555362701416, "learning_rate": 4.7812500000000003e-05, "loss": 0.3804, "step": 4775 }, { "epoch": 3.301082660250902, "grad_norm": 1.0075314044952393, "learning_rate": 4.779800556586271e-05, "loss": 0.3338, "step": 4800 }, { "epoch": 3.3182677435985566, "grad_norm": 0.3681296110153198, "learning_rate": 4.7783511131725425e-05, "loss": 0.3727, "step": 4825 }, { "epoch": 3.3354528269462107, "grad_norm": 0.5938307642936707, "learning_rate": 4.7769016697588126e-05, "loss": 0.3565, "step": 4850 }, { "epoch": 3.352637910293865, "grad_norm": 0.4364496171474457, "learning_rate": 4.775452226345083e-05, "loss": 0.3459, "step": 4875 }, { "epoch": 3.3698229936415194, "grad_norm": 0.5793933272361755, "learning_rate": 4.774002782931354e-05, "loss": 0.3653, "step": 4900 }, { "epoch": 3.3870080769891735, "grad_norm": 0.41033703088760376, "learning_rate": 4.7725533395176255e-05, "loss": 0.3629, "step": 4925 }, { "epoch": 3.4041931603368276, "grad_norm": 0.6783180236816406, "learning_rate": 4.771103896103896e-05, "loss": 0.3258, "step": 4950 }, { "epoch": 3.4213782436844817, "grad_norm": 0.4356047511100769, "learning_rate": 4.769654452690167e-05, "loss": 0.3423, "step": 4975 }, { "epoch": 3.4385633270321363, "grad_norm": 0.645196795463562, "learning_rate": 4.768205009276438e-05, "loss": 0.322, "step": 5000 }, { "epoch": 3.4385633270321363, "eval_loss": 0.3426838219165802, "eval_runtime": 154.5652, "eval_samples_per_second": 7.925, "eval_steps_per_second": 0.996, "eval_wer": 0.5637982195845698, "step": 5000 }, { "epoch": 3.4557484103797904, "grad_norm": 0.7585852742195129, "learning_rate": 4.766755565862709e-05, "loss": 0.3615, "step": 5025 }, { "epoch": 3.4729334937274445, "grad_norm": 0.6891105771064758, "learning_rate": 4.76530612244898e-05, "loss": 0.3016, "step": 5050 }, { "epoch": 3.4901185770750986, "grad_norm": 0.40171509981155396, "learning_rate": 4.7638566790352506e-05, "loss": 0.3389, "step": 5075 }, { "epoch": 3.507303660422753, "grad_norm": 0.6688668131828308, "learning_rate": 4.7624072356215214e-05, "loss": 0.3196, "step": 5100 }, { "epoch": 3.5244887437704073, "grad_norm": 0.7812600135803223, "learning_rate": 4.760957792207793e-05, "loss": 0.3795, "step": 5125 }, { "epoch": 3.5416738271180614, "grad_norm": 0.6127042174339294, "learning_rate": 4.7595083487940635e-05, "loss": 0.3422, "step": 5150 }, { "epoch": 3.558858910465716, "grad_norm": 0.4893074333667755, "learning_rate": 4.758058905380334e-05, "loss": 0.3759, "step": 5175 }, { "epoch": 3.57604399381337, "grad_norm": 0.7298163175582886, "learning_rate": 4.756609461966605e-05, "loss": 0.3388, "step": 5200 }, { "epoch": 3.5932290771610242, "grad_norm": 0.5726421475410461, "learning_rate": 4.755160018552876e-05, "loss": 0.4172, "step": 5225 }, { "epoch": 3.6104141605086784, "grad_norm": 0.5771546959877014, "learning_rate": 4.753710575139147e-05, "loss": 0.3217, "step": 5250 }, { "epoch": 3.6275992438563325, "grad_norm": 0.48307299613952637, "learning_rate": 4.752261131725418e-05, "loss": 0.3864, "step": 5275 }, { "epoch": 3.644784327203987, "grad_norm": 0.5440219044685364, "learning_rate": 4.750811688311689e-05, "loss": 0.3288, "step": 5300 }, { "epoch": 3.661969410551641, "grad_norm": 0.4851985275745392, "learning_rate": 4.7493622448979594e-05, "loss": 0.4105, "step": 5325 }, { "epoch": 3.6791544938992953, "grad_norm": 0.537399172782898, "learning_rate": 4.74791280148423e-05, "loss": 0.3309, "step": 5350 }, { "epoch": 3.69633957724695, "grad_norm": 0.4729978144168854, "learning_rate": 4.746463358070501e-05, "loss": 0.3604, "step": 5375 }, { "epoch": 3.713524660594604, "grad_norm": 0.8028717041015625, "learning_rate": 4.7450139146567716e-05, "loss": 0.2908, "step": 5400 }, { "epoch": 3.730709743942258, "grad_norm": 0.40672922134399414, "learning_rate": 4.7435644712430424e-05, "loss": 0.3871, "step": 5425 }, { "epoch": 3.747894827289912, "grad_norm": 0.6138872504234314, "learning_rate": 4.742115027829314e-05, "loss": 0.3166, "step": 5450 }, { "epoch": 3.7650799106375668, "grad_norm": 0.4270385801792145, "learning_rate": 4.7406655844155846e-05, "loss": 0.3449, "step": 5475 }, { "epoch": 3.782264993985221, "grad_norm": 0.535121500492096, "learning_rate": 4.739216141001855e-05, "loss": 0.2921, "step": 5500 }, { "epoch": 3.782264993985221, "eval_loss": 0.3344533145427704, "eval_runtime": 150.3102, "eval_samples_per_second": 8.15, "eval_steps_per_second": 1.025, "eval_wer": 0.5604069520983468, "step": 5500 }, { "epoch": 3.799450077332875, "grad_norm": 0.3871770203113556, "learning_rate": 4.737766697588126e-05, "loss": 0.349, "step": 5525 }, { "epoch": 3.816635160680529, "grad_norm": 0.503182053565979, "learning_rate": 4.7363172541743975e-05, "loss": 0.3707, "step": 5550 }, { "epoch": 3.8338202440281837, "grad_norm": 0.4528012275695801, "learning_rate": 4.734867810760668e-05, "loss": 0.359, "step": 5575 }, { "epoch": 3.851005327375838, "grad_norm": 0.630174458026886, "learning_rate": 4.733418367346939e-05, "loss": 0.3558, "step": 5600 }, { "epoch": 3.868190410723492, "grad_norm": 0.4319029450416565, "learning_rate": 4.73196892393321e-05, "loss": 0.3812, "step": 5625 }, { "epoch": 3.8853754940711465, "grad_norm": 0.5308706760406494, "learning_rate": 4.730519480519481e-05, "loss": 0.2885, "step": 5650 }, { "epoch": 3.9025605774188006, "grad_norm": 0.4054734408855438, "learning_rate": 4.729070037105752e-05, "loss": 0.363, "step": 5675 }, { "epoch": 3.9197456607664547, "grad_norm": 0.8012121319770813, "learning_rate": 4.7276205936920226e-05, "loss": 0.3398, "step": 5700 }, { "epoch": 3.936930744114109, "grad_norm": 0.4499848783016205, "learning_rate": 4.7261711502782933e-05, "loss": 0.3485, "step": 5725 }, { "epoch": 3.954115827461763, "grad_norm": 0.5845701098442078, "learning_rate": 4.724721706864564e-05, "loss": 0.3188, "step": 5750 }, { "epoch": 3.9713009108094175, "grad_norm": 0.4100358486175537, "learning_rate": 4.7232722634508355e-05, "loss": 0.3479, "step": 5775 }, { "epoch": 3.9884859941570716, "grad_norm": 0.42875462770462036, "learning_rate": 4.721822820037106e-05, "loss": 0.3043, "step": 5800 }, { "epoch": 4.006186630005155, "grad_norm": 0.4040299355983734, "learning_rate": 4.720373376623377e-05, "loss": 0.4045, "step": 5825 }, { "epoch": 4.02337171335281, "grad_norm": 0.4601922035217285, "learning_rate": 4.718923933209648e-05, "loss": 0.3044, "step": 5850 }, { "epoch": 4.040556796700464, "grad_norm": 0.5125726461410522, "learning_rate": 4.7174744897959185e-05, "loss": 0.342, "step": 5875 }, { "epoch": 4.057741880048118, "grad_norm": 0.5830023884773254, "learning_rate": 4.716025046382189e-05, "loss": 0.2916, "step": 5900 }, { "epoch": 4.0749269633957725, "grad_norm": 0.3486079275608063, "learning_rate": 4.71457560296846e-05, "loss": 0.3215, "step": 5925 }, { "epoch": 4.092112046743427, "grad_norm": 0.5681314468383789, "learning_rate": 4.713126159554731e-05, "loss": 0.3049, "step": 5950 }, { "epoch": 4.109297130091081, "grad_norm": 0.3579752743244171, "learning_rate": 4.711676716141002e-05, "loss": 0.3167, "step": 5975 }, { "epoch": 4.126482213438735, "grad_norm": 0.8702667355537415, "learning_rate": 4.710227272727273e-05, "loss": 0.3037, "step": 6000 }, { "epoch": 4.126482213438735, "eval_loss": 0.335175484418869, "eval_runtime": 150.3279, "eval_samples_per_second": 8.149, "eval_steps_per_second": 1.024, "eval_wer": 0.5541013141161509, "step": 6000 }, { "epoch": 4.143667296786389, "grad_norm": 0.6025490760803223, "learning_rate": 4.7087778293135436e-05, "loss": 0.3527, "step": 6025 }, { "epoch": 4.160852380134044, "grad_norm": 0.46763402223587036, "learning_rate": 4.7073283858998144e-05, "loss": 0.3182, "step": 6050 }, { "epoch": 4.178037463481698, "grad_norm": 0.38680383563041687, "learning_rate": 4.705878942486086e-05, "loss": 0.3234, "step": 6075 }, { "epoch": 4.195222546829352, "grad_norm": 0.45606276392936707, "learning_rate": 4.7044294990723565e-05, "loss": 0.3418, "step": 6100 }, { "epoch": 4.212407630177006, "grad_norm": 0.5080279111862183, "learning_rate": 4.702980055658627e-05, "loss": 0.3402, "step": 6125 }, { "epoch": 4.229592713524661, "grad_norm": 0.5734138488769531, "learning_rate": 4.701530612244898e-05, "loss": 0.3026, "step": 6150 }, { "epoch": 4.246777796872315, "grad_norm": 0.34839344024658203, "learning_rate": 4.7000811688311694e-05, "loss": 0.3422, "step": 6175 }, { "epoch": 4.263962880219969, "grad_norm": 0.5648381114006042, "learning_rate": 4.69863172541744e-05, "loss": 0.3075, "step": 6200 }, { "epoch": 4.281147963567623, "grad_norm": 0.5454294681549072, "learning_rate": 4.697182282003711e-05, "loss": 0.3528, "step": 6225 }, { "epoch": 4.298333046915277, "grad_norm": 0.5028226375579834, "learning_rate": 4.695732838589982e-05, "loss": 0.2952, "step": 6250 }, { "epoch": 4.315518130262932, "grad_norm": 0.45058056712150574, "learning_rate": 4.6942833951762524e-05, "loss": 0.3535, "step": 6275 }, { "epoch": 4.332703213610586, "grad_norm": 0.6654832363128662, "learning_rate": 4.692833951762524e-05, "loss": 0.3127, "step": 6300 }, { "epoch": 4.34988829695824, "grad_norm": 0.49009886384010315, "learning_rate": 4.6913845083487946e-05, "loss": 0.3419, "step": 6325 }, { "epoch": 4.367073380305895, "grad_norm": 0.5751463770866394, "learning_rate": 4.689935064935065e-05, "loss": 0.3069, "step": 6350 }, { "epoch": 4.384258463653548, "grad_norm": 0.7767444849014282, "learning_rate": 4.688485621521336e-05, "loss": 0.3666, "step": 6375 }, { "epoch": 4.401443547001203, "grad_norm": 0.5131709575653076, "learning_rate": 4.687036178107607e-05, "loss": 0.329, "step": 6400 }, { "epoch": 4.4186286303488576, "grad_norm": 0.4997400939464569, "learning_rate": 4.6855867346938776e-05, "loss": 0.365, "step": 6425 }, { "epoch": 4.435813713696511, "grad_norm": 0.5275589227676392, "learning_rate": 4.684137291280148e-05, "loss": 0.31, "step": 6450 }, { "epoch": 4.452998797044166, "grad_norm": 0.43651729822158813, "learning_rate": 4.682687847866419e-05, "loss": 0.3253, "step": 6475 }, { "epoch": 4.47018388039182, "grad_norm": 0.49254560470581055, "learning_rate": 4.6812384044526905e-05, "loss": 0.2695, "step": 6500 }, { "epoch": 4.47018388039182, "eval_loss": 0.32015639543533325, "eval_runtime": 154.5447, "eval_samples_per_second": 7.927, "eval_steps_per_second": 0.996, "eval_wer": 0.5515048749470114, "step": 6500 }, { "epoch": 4.487368963739474, "grad_norm": 0.3705599904060364, "learning_rate": 4.679788961038961e-05, "loss": 0.3453, "step": 6525 }, { "epoch": 4.504554047087129, "grad_norm": 0.5254660844802856, "learning_rate": 4.678339517625232e-05, "loss": 0.282, "step": 6550 }, { "epoch": 4.521739130434782, "grad_norm": 0.37494751811027527, "learning_rate": 4.676890074211503e-05, "loss": 0.3491, "step": 6575 }, { "epoch": 4.538924213782437, "grad_norm": 0.5620461702346802, "learning_rate": 4.675440630797774e-05, "loss": 0.3074, "step": 6600 }, { "epoch": 4.556109297130091, "grad_norm": 0.8100690245628357, "learning_rate": 4.673991187384045e-05, "loss": 0.3366, "step": 6625 }, { "epoch": 4.573294380477745, "grad_norm": 0.7091922760009766, "learning_rate": 4.6725417439703156e-05, "loss": 0.2864, "step": 6650 }, { "epoch": 4.5904794638254, "grad_norm": 0.38283970952033997, "learning_rate": 4.6710923005565864e-05, "loss": 0.3512, "step": 6675 }, { "epoch": 4.607664547173053, "grad_norm": 0.5631033182144165, "learning_rate": 4.669642857142857e-05, "loss": 0.2884, "step": 6700 }, { "epoch": 4.624849630520708, "grad_norm": 0.3868861794471741, "learning_rate": 4.6681934137291285e-05, "loss": 0.3413, "step": 6725 }, { "epoch": 4.642034713868362, "grad_norm": 0.6378294229507446, "learning_rate": 4.666743970315399e-05, "loss": 0.2949, "step": 6750 }, { "epoch": 4.659219797216016, "grad_norm": 0.5135634541511536, "learning_rate": 4.66529452690167e-05, "loss": 0.3463, "step": 6775 }, { "epoch": 4.676404880563671, "grad_norm": 0.4989064633846283, "learning_rate": 4.663845083487941e-05, "loss": 0.3027, "step": 6800 }, { "epoch": 4.693589963911325, "grad_norm": 0.5633465647697449, "learning_rate": 4.662395640074212e-05, "loss": 0.3626, "step": 6825 }, { "epoch": 4.710775047258979, "grad_norm": 0.6484938859939575, "learning_rate": 4.660946196660483e-05, "loss": 0.2881, "step": 6850 }, { "epoch": 4.7279601306066334, "grad_norm": 0.4283730983734131, "learning_rate": 4.6594967532467537e-05, "loss": 0.3452, "step": 6875 }, { "epoch": 4.745145213954288, "grad_norm": 0.5287323594093323, "learning_rate": 4.6580473098330244e-05, "loss": 0.3015, "step": 6900 }, { "epoch": 4.762330297301942, "grad_norm": 0.41760608553886414, "learning_rate": 4.656597866419295e-05, "loss": 0.3032, "step": 6925 }, { "epoch": 4.779515380649596, "grad_norm": 0.6855202317237854, "learning_rate": 4.655148423005566e-05, "loss": 0.3006, "step": 6950 }, { "epoch": 4.796700463997251, "grad_norm": 0.45387232303619385, "learning_rate": 4.6536989795918366e-05, "loss": 0.3428, "step": 6975 }, { "epoch": 4.8138855473449045, "grad_norm": 0.8281689286231995, "learning_rate": 4.6522495361781074e-05, "loss": 0.2804, "step": 7000 }, { "epoch": 4.8138855473449045, "eval_loss": 0.3352712392807007, "eval_runtime": 156.292, "eval_samples_per_second": 7.838, "eval_steps_per_second": 0.985, "eval_wer": 0.5525116574819839, "step": 7000 }, { "epoch": 4.831070630692559, "grad_norm": 0.4252523183822632, "learning_rate": 4.650800092764379e-05, "loss": 0.3456, "step": 7025 }, { "epoch": 4.848255714040213, "grad_norm": 0.536359429359436, "learning_rate": 4.6493506493506495e-05, "loss": 0.3016, "step": 7050 }, { "epoch": 4.865440797387867, "grad_norm": 0.48082077503204346, "learning_rate": 4.64790120593692e-05, "loss": 0.3323, "step": 7075 }, { "epoch": 4.882625880735522, "grad_norm": 0.7152004837989807, "learning_rate": 4.646451762523191e-05, "loss": 0.3102, "step": 7100 }, { "epoch": 4.8998109640831755, "grad_norm": 0.43397244811058044, "learning_rate": 4.6450023191094625e-05, "loss": 0.3671, "step": 7125 }, { "epoch": 4.91699604743083, "grad_norm": 1.093762755393982, "learning_rate": 4.643552875695733e-05, "loss": 0.299, "step": 7150 }, { "epoch": 4.934181130778485, "grad_norm": 0.3550453782081604, "learning_rate": 4.642103432282004e-05, "loss": 0.3351, "step": 7175 }, { "epoch": 4.951366214126138, "grad_norm": 0.6337935924530029, "learning_rate": 4.640653988868275e-05, "loss": 0.2952, "step": 7200 }, { "epoch": 4.968551297473793, "grad_norm": 0.5126771926879883, "learning_rate": 4.6392045454545454e-05, "loss": 0.3431, "step": 7225 }, { "epoch": 4.9857363808214465, "grad_norm": 0.4208792448043823, "learning_rate": 4.637755102040817e-05, "loss": 0.3025, "step": 7250 }, { "epoch": 5.003437016669531, "grad_norm": 0.5154265761375427, "learning_rate": 4.6363056586270876e-05, "loss": 0.3215, "step": 7275 }, { "epoch": 5.020622100017185, "grad_norm": 0.4937199652194977, "learning_rate": 4.634856215213358e-05, "loss": 0.2898, "step": 7300 }, { "epoch": 5.03780718336484, "grad_norm": 0.4737917482852936, "learning_rate": 4.633406771799629e-05, "loss": 0.3186, "step": 7325 }, { "epoch": 5.054992266712493, "grad_norm": 0.39069080352783203, "learning_rate": 4.6319573283859005e-05, "loss": 0.299, "step": 7350 }, { "epoch": 5.072177350060148, "grad_norm": 0.41207846999168396, "learning_rate": 4.630507884972171e-05, "loss": 0.3261, "step": 7375 }, { "epoch": 5.089362433407802, "grad_norm": 0.5971049070358276, "learning_rate": 4.629058441558442e-05, "loss": 0.279, "step": 7400 }, { "epoch": 5.106547516755456, "grad_norm": 0.41475459933280945, "learning_rate": 4.627608998144712e-05, "loss": 0.3126, "step": 7425 }, { "epoch": 5.123732600103111, "grad_norm": 0.5062717795372009, "learning_rate": 4.6261595547309835e-05, "loss": 0.2741, "step": 7450 }, { "epoch": 5.140917683450764, "grad_norm": 0.5244805812835693, "learning_rate": 4.624710111317254e-05, "loss": 0.2913, "step": 7475 }, { "epoch": 5.158102766798419, "grad_norm": 0.7847909927368164, "learning_rate": 4.623260667903525e-05, "loss": 0.2908, "step": 7500 }, { "epoch": 5.158102766798419, "eval_loss": 0.3383817970752716, "eval_runtime": 159.5151, "eval_samples_per_second": 7.68, "eval_steps_per_second": 0.965, "eval_wer": 0.5484845273420941, "step": 7500 }, { "epoch": 5.1752878501460735, "grad_norm": 0.44492971897125244, "learning_rate": 4.621811224489796e-05, "loss": 0.3359, "step": 7525 }, { "epoch": 5.192472933493727, "grad_norm": 0.8884369730949402, "learning_rate": 4.620361781076067e-05, "loss": 0.286, "step": 7550 }, { "epoch": 5.209658016841382, "grad_norm": 0.4650115966796875, "learning_rate": 4.618912337662338e-05, "loss": 0.3538, "step": 7575 }, { "epoch": 5.226843100189036, "grad_norm": 0.46531254053115845, "learning_rate": 4.6174628942486086e-05, "loss": 0.2828, "step": 7600 }, { "epoch": 5.24402818353669, "grad_norm": 0.5248584747314453, "learning_rate": 4.6160134508348794e-05, "loss": 0.3166, "step": 7625 }, { "epoch": 5.2612132668843445, "grad_norm": 0.676996648311615, "learning_rate": 4.614564007421151e-05, "loss": 0.3141, "step": 7650 }, { "epoch": 5.278398350231998, "grad_norm": 0.40085482597351074, "learning_rate": 4.6131145640074215e-05, "loss": 0.3099, "step": 7675 }, { "epoch": 5.295583433579653, "grad_norm": 0.5248492956161499, "learning_rate": 4.611665120593692e-05, "loss": 0.2905, "step": 7700 }, { "epoch": 5.312768516927307, "grad_norm": 0.3710981607437134, "learning_rate": 4.610215677179963e-05, "loss": 0.3114, "step": 7725 }, { "epoch": 5.329953600274961, "grad_norm": 0.607997477054596, "learning_rate": 4.608766233766234e-05, "loss": 0.2814, "step": 7750 }, { "epoch": 5.347138683622616, "grad_norm": 0.41846323013305664, "learning_rate": 4.607316790352505e-05, "loss": 0.3442, "step": 7775 }, { "epoch": 5.36432376697027, "grad_norm": 0.7187564373016357, "learning_rate": 4.605867346938776e-05, "loss": 0.3089, "step": 7800 }, { "epoch": 5.381508850317924, "grad_norm": 0.6370894312858582, "learning_rate": 4.604417903525047e-05, "loss": 0.2802, "step": 7825 }, { "epoch": 5.398693933665578, "grad_norm": 0.4946443736553192, "learning_rate": 4.6029684601113174e-05, "loss": 0.2752, "step": 7850 }, { "epoch": 5.415879017013232, "grad_norm": 0.5713298916816711, "learning_rate": 4.601519016697589e-05, "loss": 0.3335, "step": 7875 }, { "epoch": 5.433064100360887, "grad_norm": 0.5185278058052063, "learning_rate": 4.6000695732838596e-05, "loss": 0.2779, "step": 7900 }, { "epoch": 5.450249183708541, "grad_norm": 0.6777128577232361, "learning_rate": 4.59862012987013e-05, "loss": 0.2907, "step": 7925 }, { "epoch": 5.467434267056195, "grad_norm": 0.4836239814758301, "learning_rate": 4.5971706864564004e-05, "loss": 0.2726, "step": 7950 }, { "epoch": 5.484619350403849, "grad_norm": 0.4827396273612976, "learning_rate": 4.595721243042672e-05, "loss": 0.2956, "step": 7975 }, { "epoch": 5.501804433751504, "grad_norm": 0.4124370813369751, "learning_rate": 4.5942717996289425e-05, "loss": 0.2646, "step": 8000 }, { "epoch": 5.501804433751504, "eval_loss": 0.31637853384017944, "eval_runtime": 154.147, "eval_samples_per_second": 7.947, "eval_steps_per_second": 0.999, "eval_wer": 0.5461530309453159, "step": 8000 }, { "epoch": 5.518989517099158, "grad_norm": 0.4147077202796936, "learning_rate": 4.592822356215213e-05, "loss": 0.3084, "step": 8025 }, { "epoch": 5.536174600446812, "grad_norm": 0.5999482274055481, "learning_rate": 4.591372912801484e-05, "loss": 0.2824, "step": 8050 }, { "epoch": 5.553359683794467, "grad_norm": 0.4082586169242859, "learning_rate": 4.5899234693877555e-05, "loss": 0.3095, "step": 8075 }, { "epoch": 5.57054476714212, "grad_norm": 0.45266950130462646, "learning_rate": 4.588474025974026e-05, "loss": 0.2672, "step": 8100 }, { "epoch": 5.587729850489775, "grad_norm": 0.4024001955986023, "learning_rate": 4.587024582560297e-05, "loss": 0.3147, "step": 8125 }, { "epoch": 5.60491493383743, "grad_norm": 0.61323481798172, "learning_rate": 4.585575139146568e-05, "loss": 0.2461, "step": 8150 }, { "epoch": 5.622100017185083, "grad_norm": 0.7370169758796692, "learning_rate": 4.584125695732839e-05, "loss": 0.3364, "step": 8175 }, { "epoch": 5.639285100532738, "grad_norm": 0.5105010867118835, "learning_rate": 4.58267625231911e-05, "loss": 0.2749, "step": 8200 }, { "epoch": 5.6564701838803915, "grad_norm": 0.4862951636314392, "learning_rate": 4.5812268089053806e-05, "loss": 0.318, "step": 8225 }, { "epoch": 5.673655267228046, "grad_norm": 0.5929895639419556, "learning_rate": 4.5797773654916513e-05, "loss": 0.3215, "step": 8250 }, { "epoch": 5.690840350575701, "grad_norm": 0.5355464220046997, "learning_rate": 4.578327922077922e-05, "loss": 0.343, "step": 8275 }, { "epoch": 5.708025433923354, "grad_norm": 0.6044451594352722, "learning_rate": 4.5768784786641935e-05, "loss": 0.2761, "step": 8300 }, { "epoch": 5.725210517271009, "grad_norm": 0.5010135769844055, "learning_rate": 4.575429035250464e-05, "loss": 0.3525, "step": 8325 }, { "epoch": 5.742395600618663, "grad_norm": 0.592808723449707, "learning_rate": 4.573979591836735e-05, "loss": 0.2749, "step": 8350 }, { "epoch": 5.759580683966317, "grad_norm": 0.5672963261604309, "learning_rate": 4.572530148423006e-05, "loss": 0.3711, "step": 8375 }, { "epoch": 5.776765767313972, "grad_norm": 0.8346742391586304, "learning_rate": 4.571080705009277e-05, "loss": 0.272, "step": 8400 }, { "epoch": 5.793950850661625, "grad_norm": 0.5474342107772827, "learning_rate": 4.569631261595548e-05, "loss": 0.3022, "step": 8425 }, { "epoch": 5.81113593400928, "grad_norm": 0.5067320466041565, "learning_rate": 4.5681818181818186e-05, "loss": 0.2632, "step": 8450 }, { "epoch": 5.828321017356934, "grad_norm": 0.4363684356212616, "learning_rate": 4.566732374768089e-05, "loss": 0.2961, "step": 8475 }, { "epoch": 5.845506100704588, "grad_norm": 0.6637271642684937, "learning_rate": 4.56528293135436e-05, "loss": 0.2982, "step": 8500 }, { "epoch": 5.845506100704588, "eval_loss": 0.3142754137516022, "eval_runtime": 153.2422, "eval_samples_per_second": 7.994, "eval_steps_per_second": 1.005, "eval_wer": 0.5454641797371768, "step": 8500 }, { "epoch": 5.862691184052243, "grad_norm": 0.5597206950187683, "learning_rate": 4.563833487940631e-05, "loss": 0.3282, "step": 8525 }, { "epoch": 5.879876267399897, "grad_norm": 0.5069429278373718, "learning_rate": 4.5623840445269016e-05, "loss": 0.2819, "step": 8550 }, { "epoch": 5.897061350747551, "grad_norm": 0.4272073805332184, "learning_rate": 4.5609346011131724e-05, "loss": 0.3032, "step": 8575 }, { "epoch": 5.9142464340952055, "grad_norm": 0.46952158212661743, "learning_rate": 4.559485157699444e-05, "loss": 0.2979, "step": 8600 }, { "epoch": 5.93143151744286, "grad_norm": 0.46362563967704773, "learning_rate": 4.5580357142857145e-05, "loss": 0.3042, "step": 8625 }, { "epoch": 5.948616600790514, "grad_norm": 0.6578242778778076, "learning_rate": 4.556586270871985e-05, "loss": 0.2558, "step": 8650 }, { "epoch": 5.965801684138168, "grad_norm": 0.5568517446517944, "learning_rate": 4.555136827458256e-05, "loss": 0.3127, "step": 8675 }, { "epoch": 5.982986767485822, "grad_norm": 0.5202658772468567, "learning_rate": 4.5536873840445274e-05, "loss": 0.2956, "step": 8700 }, { "epoch": 6.000687403333906, "grad_norm": 0.5851805210113525, "learning_rate": 4.552237940630798e-05, "loss": 0.3197, "step": 8725 }, { "epoch": 6.0178724866815605, "grad_norm": 0.4245930314064026, "learning_rate": 4.550788497217069e-05, "loss": 0.2537, "step": 8750 }, { "epoch": 6.035057570029215, "grad_norm": 0.3843390643596649, "learning_rate": 4.54933905380334e-05, "loss": 0.2552, "step": 8775 }, { "epoch": 6.052242653376869, "grad_norm": 0.4880934953689575, "learning_rate": 4.5478896103896104e-05, "loss": 0.2669, "step": 8800 }, { "epoch": 6.069427736724523, "grad_norm": 0.44671300053596497, "learning_rate": 4.546440166975882e-05, "loss": 0.2894, "step": 8825 }, { "epoch": 6.086612820072177, "grad_norm": 0.6307169795036316, "learning_rate": 4.5449907235621526e-05, "loss": 0.3032, "step": 8850 }, { "epoch": 6.1037979034198315, "grad_norm": 0.4117954969406128, "learning_rate": 4.543541280148423e-05, "loss": 0.2863, "step": 8875 }, { "epoch": 6.120982986767486, "grad_norm": 0.41599756479263306, "learning_rate": 4.542091836734694e-05, "loss": 0.2804, "step": 8900 }, { "epoch": 6.13816807011514, "grad_norm": 0.5033993124961853, "learning_rate": 4.5406423933209655e-05, "loss": 0.2971, "step": 8925 }, { "epoch": 6.155353153462794, "grad_norm": 0.39483895897865295, "learning_rate": 4.539192949907236e-05, "loss": 0.2485, "step": 8950 }, { "epoch": 6.172538236810449, "grad_norm": 0.48659563064575195, "learning_rate": 4.537743506493506e-05, "loss": 0.2847, "step": 8975 }, { "epoch": 6.189723320158103, "grad_norm": 0.7809969186782837, "learning_rate": 4.536294063079777e-05, "loss": 0.2978, "step": 9000 }, { "epoch": 6.189723320158103, "eval_loss": 0.3218280076980591, "eval_runtime": 151.9921, "eval_samples_per_second": 8.06, "eval_steps_per_second": 1.013, "eval_wer": 0.5423908435777872, "step": 9000 }, { "epoch": 6.206908403505757, "grad_norm": 0.6053128242492676, "learning_rate": 4.5348446196660485e-05, "loss": 0.275, "step": 9025 }, { "epoch": 6.224093486853411, "grad_norm": 0.4554101526737213, "learning_rate": 4.533395176252319e-05, "loss": 0.2926, "step": 9050 }, { "epoch": 6.241278570201065, "grad_norm": 0.4731072187423706, "learning_rate": 4.53194573283859e-05, "loss": 0.2921, "step": 9075 }, { "epoch": 6.25846365354872, "grad_norm": 0.4384573996067047, "learning_rate": 4.530496289424861e-05, "loss": 0.2827, "step": 9100 }, { "epoch": 6.275648736896374, "grad_norm": 0.5838501453399658, "learning_rate": 4.529046846011132e-05, "loss": 0.2924, "step": 9125 }, { "epoch": 6.292833820244028, "grad_norm": 0.5555030703544617, "learning_rate": 4.527597402597403e-05, "loss": 0.2839, "step": 9150 }, { "epoch": 6.310018903591683, "grad_norm": 0.7978671193122864, "learning_rate": 4.5261479591836736e-05, "loss": 0.2808, "step": 9175 }, { "epoch": 6.327203986939336, "grad_norm": 0.4151977002620697, "learning_rate": 4.5246985157699443e-05, "loss": 0.2767, "step": 9200 }, { "epoch": 6.344389070286991, "grad_norm": 0.5371110439300537, "learning_rate": 4.523249072356215e-05, "loss": 0.3124, "step": 9225 }, { "epoch": 6.3615741536346455, "grad_norm": 0.4500742554664612, "learning_rate": 4.5217996289424865e-05, "loss": 0.2731, "step": 9250 }, { "epoch": 6.378759236982299, "grad_norm": 0.5459848642349243, "learning_rate": 4.520350185528757e-05, "loss": 0.3118, "step": 9275 }, { "epoch": 6.395944320329954, "grad_norm": 0.7165120244026184, "learning_rate": 4.518900742115028e-05, "loss": 0.2781, "step": 9300 }, { "epoch": 6.413129403677607, "grad_norm": 0.4492073655128479, "learning_rate": 4.517451298701299e-05, "loss": 0.2652, "step": 9325 }, { "epoch": 6.430314487025262, "grad_norm": 0.8615822196006775, "learning_rate": 4.51600185528757e-05, "loss": 0.265, "step": 9350 }, { "epoch": 6.4474995703729165, "grad_norm": 0.37190139293670654, "learning_rate": 4.514552411873841e-05, "loss": 0.2839, "step": 9375 }, { "epoch": 6.46468465372057, "grad_norm": 0.5044118762016296, "learning_rate": 4.5131029684601116e-05, "loss": 0.2737, "step": 9400 }, { "epoch": 6.481869737068225, "grad_norm": 0.8805606961250305, "learning_rate": 4.5116535250463824e-05, "loss": 0.2658, "step": 9425 }, { "epoch": 6.499054820415879, "grad_norm": 0.52882981300354, "learning_rate": 4.510204081632654e-05, "loss": 0.2875, "step": 9450 }, { "epoch": 6.516239903763533, "grad_norm": 0.8483859896659851, "learning_rate": 4.5087546382189246e-05, "loss": 0.3182, "step": 9475 }, { "epoch": 6.533424987111188, "grad_norm": 0.5920891165733337, "learning_rate": 4.5073051948051946e-05, "loss": 0.288, "step": 9500 }, { "epoch": 6.533424987111188, "eval_loss": 0.3151616156101227, "eval_runtime": 152.9497, "eval_samples_per_second": 8.009, "eval_steps_per_second": 1.007, "eval_wer": 0.5417549809241204, "step": 9500 }, { "epoch": 6.550610070458841, "grad_norm": 0.5297147035598755, "learning_rate": 4.5059137291280155e-05, "loss": 0.2972, "step": 9525 }, { "epoch": 6.567795153806496, "grad_norm": 0.43849292397499084, "learning_rate": 4.504464285714286e-05, "loss": 0.2753, "step": 9550 }, { "epoch": 6.58498023715415, "grad_norm": 0.5231007933616638, "learning_rate": 4.503014842300557e-05, "loss": 0.2916, "step": 9575 }, { "epoch": 6.602165320501804, "grad_norm": 0.3840227425098419, "learning_rate": 4.501565398886828e-05, "loss": 0.2569, "step": 9600 }, { "epoch": 6.619350403849459, "grad_norm": 0.4950826168060303, "learning_rate": 4.5001159554730984e-05, "loss": 0.3198, "step": 9625 }, { "epoch": 6.636535487197113, "grad_norm": 0.4441792368888855, "learning_rate": 4.49866651205937e-05, "loss": 0.2796, "step": 9650 }, { "epoch": 6.653720570544767, "grad_norm": 0.4779716730117798, "learning_rate": 4.4972170686456406e-05, "loss": 0.3165, "step": 9675 }, { "epoch": 6.670905653892421, "grad_norm": 0.6962916254997253, "learning_rate": 4.4957676252319113e-05, "loss": 0.3176, "step": 9700 }, { "epoch": 6.688090737240076, "grad_norm": 0.43603190779685974, "learning_rate": 4.494318181818182e-05, "loss": 0.3095, "step": 9725 }, { "epoch": 6.70527582058773, "grad_norm": 0.8133454918861389, "learning_rate": 4.492868738404453e-05, "loss": 0.2793, "step": 9750 }, { "epoch": 6.722460903935384, "grad_norm": 0.4596538245677948, "learning_rate": 4.4914192949907236e-05, "loss": 0.2626, "step": 9775 }, { "epoch": 6.739645987283039, "grad_norm": 0.6948014497756958, "learning_rate": 4.489969851576994e-05, "loss": 0.2546, "step": 9800 }, { "epoch": 6.756831070630692, "grad_norm": 0.40873172879219055, "learning_rate": 4.488520408163265e-05, "loss": 0.2867, "step": 9825 }, { "epoch": 6.774016153978347, "grad_norm": 0.36870279908180237, "learning_rate": 4.4870709647495365e-05, "loss": 0.3258, "step": 9850 }, { "epoch": 6.791201237326001, "grad_norm": 0.4158894121646881, "learning_rate": 4.485621521335807e-05, "loss": 0.3015, "step": 9875 }, { "epoch": 6.808386320673655, "grad_norm": 0.4422719180583954, "learning_rate": 4.484172077922078e-05, "loss": 0.285, "step": 9900 }, { "epoch": 6.82557140402131, "grad_norm": 0.7379534840583801, "learning_rate": 4.482780612244898e-05, "loss": 0.3093, "step": 9925 }, { "epoch": 6.8427564873689635, "grad_norm": 0.5119884014129639, "learning_rate": 4.481331168831169e-05, "loss": 0.2794, "step": 9950 }, { "epoch": 6.859941570716618, "grad_norm": 0.46535733342170715, "learning_rate": 4.4798817254174396e-05, "loss": 0.2784, "step": 9975 }, { "epoch": 6.877126654064273, "grad_norm": 0.49434012174606323, "learning_rate": 4.4784322820037104e-05, "loss": 0.2706, "step": 10000 }, { "epoch": 6.877126654064273, "eval_loss": 0.321118026971817, "eval_runtime": 152.9572, "eval_samples_per_second": 8.009, "eval_steps_per_second": 1.007, "eval_wer": 0.53984739296312, "step": 10000 }, { "epoch": 6.894311737411926, "grad_norm": 0.4334565997123718, "learning_rate": 4.476982838589982e-05, "loss": 0.2762, "step": 10025 }, { "epoch": 6.911496820759581, "grad_norm": 0.564243733882904, "learning_rate": 4.4755333951762525e-05, "loss": 0.2429, "step": 10050 }, { "epoch": 6.9286819041072345, "grad_norm": 0.4657536447048187, "learning_rate": 4.474083951762523e-05, "loss": 0.2528, "step": 10075 }, { "epoch": 6.945866987454889, "grad_norm": 0.5497089624404907, "learning_rate": 4.472634508348794e-05, "loss": 0.2982, "step": 10100 }, { "epoch": 6.963052070802544, "grad_norm": 0.7017095685005188, "learning_rate": 4.471185064935065e-05, "loss": 0.2762, "step": 10125 }, { "epoch": 6.980237154150197, "grad_norm": 0.4462623596191406, "learning_rate": 4.469735621521336e-05, "loss": 0.2814, "step": 10150 }, { "epoch": 6.997422237497852, "grad_norm": 0.42071837186813354, "learning_rate": 4.468286178107607e-05, "loss": 0.2461, "step": 10175 }, { "epoch": 7.015122873345936, "grad_norm": 0.5645279288291931, "learning_rate": 4.466836734693878e-05, "loss": 0.2862, "step": 10200 }, { "epoch": 7.0323079566935895, "grad_norm": 1.0430643558502197, "learning_rate": 4.4653872912801484e-05, "loss": 0.2689, "step": 10225 }, { "epoch": 7.049493040041244, "grad_norm": 0.5359970331192017, "learning_rate": 4.46393784786642e-05, "loss": 0.2685, "step": 10250 }, { "epoch": 7.066678123388899, "grad_norm": 0.7322932481765747, "learning_rate": 4.4624884044526906e-05, "loss": 0.2806, "step": 10275 }, { "epoch": 7.083863206736552, "grad_norm": 0.4568728804588318, "learning_rate": 4.461038961038961e-05, "loss": 0.2726, "step": 10300 }, { "epoch": 7.101048290084207, "grad_norm": 1.341957926750183, "learning_rate": 4.459589517625232e-05, "loss": 0.2647, "step": 10325 }, { "epoch": 7.1182333734318615, "grad_norm": 0.48318567872047424, "learning_rate": 4.4581400742115035e-05, "loss": 0.28, "step": 10350 }, { "epoch": 7.135418456779515, "grad_norm": 0.7370210289955139, "learning_rate": 4.456690630797774e-05, "loss": 0.2886, "step": 10375 }, { "epoch": 7.15260354012717, "grad_norm": 0.703504741191864, "learning_rate": 4.455241187384045e-05, "loss": 0.276, "step": 10400 }, { "epoch": 7.169788623474824, "grad_norm": 0.5735402703285217, "learning_rate": 4.453791743970316e-05, "loss": 0.2786, "step": 10425 }, { "epoch": 7.186973706822478, "grad_norm": 0.36307334899902344, "learning_rate": 4.4523423005565865e-05, "loss": 0.2714, "step": 10450 }, { "epoch": 7.2041587901701325, "grad_norm": 0.8428148031234741, "learning_rate": 4.450892857142857e-05, "loss": 0.2997, "step": 10475 }, { "epoch": 7.221343873517786, "grad_norm": 0.8433165550231934, "learning_rate": 4.449443413729128e-05, "loss": 0.3008, "step": 10500 }, { "epoch": 7.221343873517786, "eval_loss": 0.3265960216522217, "eval_runtime": 151.3692, "eval_samples_per_second": 8.093, "eval_steps_per_second": 1.017, "eval_wer": 0.53984739296312, "step": 10500 }, { "epoch": 7.238528956865441, "grad_norm": 0.7506297826766968, "learning_rate": 4.447993970315399e-05, "loss": 0.2789, "step": 10525 }, { "epoch": 7.255714040213095, "grad_norm": 0.4726732075214386, "learning_rate": 4.44654452690167e-05, "loss": 0.2734, "step": 10550 }, { "epoch": 7.272899123560749, "grad_norm": 0.5341284275054932, "learning_rate": 4.445095083487941e-05, "loss": 0.2281, "step": 10575 }, { "epoch": 7.2900842069084035, "grad_norm": 1.0162677764892578, "learning_rate": 4.4436456400742116e-05, "loss": 0.2581, "step": 10600 }, { "epoch": 7.307269290256058, "grad_norm": 0.6543082594871521, "learning_rate": 4.4421961966604823e-05, "loss": 0.245, "step": 10625 }, { "epoch": 7.324454373603712, "grad_norm": 0.46228596568107605, "learning_rate": 4.440746753246753e-05, "loss": 0.2835, "step": 10650 }, { "epoch": 7.341639456951366, "grad_norm": 1.2381787300109863, "learning_rate": 4.4392973098330245e-05, "loss": 0.241, "step": 10675 }, { "epoch": 7.35882454029902, "grad_norm": 0.6333453059196472, "learning_rate": 4.437847866419295e-05, "loss": 0.2691, "step": 10700 }, { "epoch": 7.376009623646675, "grad_norm": 0.5812168121337891, "learning_rate": 4.436398423005566e-05, "loss": 0.2679, "step": 10725 }, { "epoch": 7.393194706994329, "grad_norm": 0.4156525433063507, "learning_rate": 4.434948979591837e-05, "loss": 0.2355, "step": 10750 }, { "epoch": 7.410379790341983, "grad_norm": 0.7613847255706787, "learning_rate": 4.433499536178108e-05, "loss": 0.2578, "step": 10775 }, { "epoch": 7.427564873689637, "grad_norm": 0.4807249903678894, "learning_rate": 4.432050092764379e-05, "loss": 0.2873, "step": 10800 }, { "epoch": 7.444749957037292, "grad_norm": 0.6777732372283936, "learning_rate": 4.4306006493506496e-05, "loss": 0.2319, "step": 10825 }, { "epoch": 7.461935040384946, "grad_norm": 0.5690301060676575, "learning_rate": 4.4291512059369204e-05, "loss": 0.2933, "step": 10850 }, { "epoch": 7.4791201237326, "grad_norm": 0.6938736438751221, "learning_rate": 4.427701762523191e-05, "loss": 0.25, "step": 10875 }, { "epoch": 7.496305207080255, "grad_norm": 1.2933138608932495, "learning_rate": 4.4262523191094626e-05, "loss": 0.2932, "step": 10900 }, { "epoch": 7.513490290427908, "grad_norm": 0.6283312439918518, "learning_rate": 4.424802875695733e-05, "loss": 0.2703, "step": 10925 }, { "epoch": 7.530675373775563, "grad_norm": 0.6064092516899109, "learning_rate": 4.423353432282004e-05, "loss": 0.2762, "step": 10950 }, { "epoch": 7.5478604571232175, "grad_norm": 0.6407192349433899, "learning_rate": 4.421903988868275e-05, "loss": 0.2477, "step": 10975 }, { "epoch": 7.565045540470871, "grad_norm": 0.40185797214508057, "learning_rate": 4.4204545454545455e-05, "loss": 0.2674, "step": 11000 }, { "epoch": 7.565045540470871, "eval_loss": 0.318492591381073, "eval_runtime": 150.7169, "eval_samples_per_second": 8.128, "eval_steps_per_second": 1.022, "eval_wer": 0.5379398050021196, "step": 11000 }, { "epoch": 7.565045540470871, "step": 11000, "total_flos": 8.071230701734987e+19, "train_loss": 0.7137202631343494, "train_runtime": 97593.296, "train_samples_per_second": 28.619, "train_steps_per_second": 0.894 } ], "logging_steps": 25, "max_steps": 87240, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.071230701734987e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }