{ "best_global_step": 22000, "best_metric": 0.09801159451698542, "best_model_checkpoint": "w2v-bert-urmi-out-v3/checkpoint-22000", "epoch": 19.113814074717638, "eval_steps": 500, "global_step": 22000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.043440486533449174, "grad_norm": NaN, "learning_rate": 2.9400000000000002e-06, "loss": 17.6344189453125, "step": 50 }, { "epoch": 0.08688097306689835, "grad_norm": 41.381343841552734, "learning_rate": 5.940000000000001e-06, "loss": 8.653125, "step": 100 }, { "epoch": 0.13032145960034752, "grad_norm": 25.690353393554688, "learning_rate": 8.939999999999999e-06, "loss": 6.376434936523437, "step": 150 }, { "epoch": 0.1737619461337967, "grad_norm": 150.42007446289062, "learning_rate": 1.1940000000000001e-05, "loss": 5.640718994140625, "step": 200 }, { "epoch": 0.21720243266724587, "grad_norm": 31.25239372253418, "learning_rate": 1.4940000000000001e-05, "loss": 4.098101806640625, "step": 250 }, { "epoch": 0.26064291920069504, "grad_norm": 36.728145599365234, "learning_rate": 1.794e-05, "loss": 2.84884033203125, "step": 300 }, { "epoch": 0.3040834057341442, "grad_norm": 19.722448348999023, "learning_rate": 2.094e-05, "loss": 2.7515597534179688, "step": 350 }, { "epoch": 0.3475238922675934, "grad_norm": 72.23578643798828, "learning_rate": 2.394e-05, "loss": 2.2729856872558596, "step": 400 }, { "epoch": 0.39096437880104257, "grad_norm": 29.115379333496094, "learning_rate": 2.6940000000000003e-05, "loss": 2.120067443847656, "step": 450 }, { "epoch": 0.43440486533449174, "grad_norm": 41.9510612487793, "learning_rate": 2.994e-05, "loss": 2.0917138671875, "step": 500 }, { "epoch": 0.43440486533449174, "eval_cer": 0.2925177439453866, "eval_loss": 1.1169049739837646, "eval_runtime": 41.0307, "eval_samples_per_second": 24.323, "eval_steps_per_second": 12.162, "eval_wer": 0.860224586288416, "step": 500 }, { "epoch": 0.4778453518679409, "grad_norm": 56.66565704345703, "learning_rate": 2.995680282104026e-05, "loss": 2.0159649658203125, "step": 550 }, { "epoch": 0.5212858384013901, "grad_norm": 93.8177261352539, "learning_rate": 2.9912724066999708e-05, "loss": 2.4505059814453123, "step": 600 }, { "epoch": 0.5647263249348393, "grad_norm": 21.54208755493164, "learning_rate": 2.9868645312959155e-05, "loss": 1.9825759887695313, "step": 650 }, { "epoch": 0.6081668114682884, "grad_norm": 67.06742095947266, "learning_rate": 2.9824566558918603e-05, "loss": 1.764338836669922, "step": 700 }, { "epoch": 0.6516072980017377, "grad_norm": 52.83509063720703, "learning_rate": 2.978048780487805e-05, "loss": 1.8609919738769531, "step": 750 }, { "epoch": 0.6950477845351868, "grad_norm": 19.600238800048828, "learning_rate": 2.9736409050837498e-05, "loss": 1.800018310546875, "step": 800 }, { "epoch": 0.738488271068636, "grad_norm": 131.72647094726562, "learning_rate": 2.9692330296796945e-05, "loss": 1.884906005859375, "step": 850 }, { "epoch": 0.7819287576020851, "grad_norm": 90.5487060546875, "learning_rate": 2.9648251542756393e-05, "loss": 1.8686543273925782, "step": 900 }, { "epoch": 0.8253692441355344, "grad_norm": 109.63233184814453, "learning_rate": 2.960417278871584e-05, "loss": 1.5970869445800782, "step": 950 }, { "epoch": 0.8688097306689835, "grad_norm": 33.57727813720703, "learning_rate": 2.9560094034675285e-05, "loss": 1.7014007568359375, "step": 1000 }, { "epoch": 0.8688097306689835, "eval_cer": 0.2367123584547868, "eval_loss": 0.8549327850341797, "eval_runtime": 36.6894, "eval_samples_per_second": 27.201, "eval_steps_per_second": 13.601, "eval_wer": 0.7121749408983451, "step": 1000 }, { "epoch": 0.9122502172024327, "grad_norm": 43.89970016479492, "learning_rate": 2.9516015280634735e-05, "loss": 1.5748690795898437, "step": 1050 }, { "epoch": 0.9556907037358818, "grad_norm": 26.835451126098633, "learning_rate": 2.947193652659418e-05, "loss": 1.4466169738769532, "step": 1100 }, { "epoch": 0.9991311902693311, "grad_norm": 56.00555419921875, "learning_rate": 2.942785777255363e-05, "loss": 1.6500205993652344, "step": 1150 }, { "epoch": 1.0425716768027802, "grad_norm": 14.68392562866211, "learning_rate": 2.9383779018513075e-05, "loss": 1.589685821533203, "step": 1200 }, { "epoch": 1.0860121633362294, "grad_norm": 43.6255989074707, "learning_rate": 2.9339700264472526e-05, "loss": 1.181229705810547, "step": 1250 }, { "epoch": 1.1294526498696786, "grad_norm": 73.56681060791016, "learning_rate": 2.929562151043197e-05, "loss": 1.37734619140625, "step": 1300 }, { "epoch": 1.1728931364031276, "grad_norm": 12.448647499084473, "learning_rate": 2.925154275639142e-05, "loss": 1.4884031677246095, "step": 1350 }, { "epoch": 1.2163336229365769, "grad_norm": 68.00922393798828, "learning_rate": 2.9207464002350868e-05, "loss": 1.596350860595703, "step": 1400 }, { "epoch": 1.259774109470026, "grad_norm": 39.86298370361328, "learning_rate": 2.9163385248310316e-05, "loss": 1.3065278625488281, "step": 1450 }, { "epoch": 1.3032145960034751, "grad_norm": 15.449691772460938, "learning_rate": 2.9119306494269763e-05, "loss": 1.1519753265380859, "step": 1500 }, { "epoch": 1.3032145960034751, "eval_cer": 0.2101099853714038, "eval_loss": 0.790473997592926, "eval_runtime": 35.6641, "eval_samples_per_second": 27.983, "eval_steps_per_second": 13.992, "eval_wer": 0.6616430260047281, "step": 1500 }, { "epoch": 1.3466550825369243, "grad_norm": 24.626739501953125, "learning_rate": 2.907522774022921e-05, "loss": 1.427688751220703, "step": 1550 }, { "epoch": 1.3900955690703736, "grad_norm": 17.682024002075195, "learning_rate": 2.9031148986188658e-05, "loss": 1.4333070373535157, "step": 1600 }, { "epoch": 1.4335360556038228, "grad_norm": 23.89002227783203, "learning_rate": 2.8987070232148106e-05, "loss": 1.2861351013183593, "step": 1650 }, { "epoch": 1.476976542137272, "grad_norm": 30.092050552368164, "learning_rate": 2.8942991478107553e-05, "loss": 1.256303176879883, "step": 1700 }, { "epoch": 1.520417028670721, "grad_norm": 18.956981658935547, "learning_rate": 2.8898912724067e-05, "loss": 1.2424105834960937, "step": 1750 }, { "epoch": 1.5638575152041703, "grad_norm": 56.31697082519531, "learning_rate": 2.8854833970026448e-05, "loss": 1.1252889251708984, "step": 1800 }, { "epoch": 1.6072980017376195, "grad_norm": 42.870338439941406, "learning_rate": 2.8810755215985896e-05, "loss": 1.0607293701171876, "step": 1850 }, { "epoch": 1.6507384882710685, "grad_norm": 61.81471633911133, "learning_rate": 2.8766676461945343e-05, "loss": 1.077663116455078, "step": 1900 }, { "epoch": 1.694178974804518, "grad_norm": 14.434207916259766, "learning_rate": 2.872259770790479e-05, "loss": 1.0206593322753905, "step": 1950 }, { "epoch": 1.737619461337967, "grad_norm": 42.81059265136719, "learning_rate": 2.8678518953864238e-05, "loss": 1.1406269836425782, "step": 2000 }, { "epoch": 1.737619461337967, "eval_cer": 0.18090697296418703, "eval_loss": 0.7465401887893677, "eval_runtime": 35.7694, "eval_samples_per_second": 27.901, "eval_steps_per_second": 13.95, "eval_wer": 0.5706264775413712, "step": 2000 }, { "epoch": 1.7810599478714162, "grad_norm": 10.328418731689453, "learning_rate": 2.8634440199823686e-05, "loss": 1.0407346343994142, "step": 2050 }, { "epoch": 1.8245004344048654, "grad_norm": 57.201133728027344, "learning_rate": 2.8590361445783133e-05, "loss": 1.36724365234375, "step": 2100 }, { "epoch": 1.8679409209383144, "grad_norm": 63.23184585571289, "learning_rate": 2.854628269174258e-05, "loss": 1.1011062622070313, "step": 2150 }, { "epoch": 1.9113814074717637, "grad_norm": 12.780070304870605, "learning_rate": 2.850220393770203e-05, "loss": 1.0486875915527343, "step": 2200 }, { "epoch": 1.954821894005213, "grad_norm": 15.590168952941895, "learning_rate": 2.8458125183661476e-05, "loss": 1.4535511779785155, "step": 2250 }, { "epoch": 1.998262380538662, "grad_norm": 23.453882217407227, "learning_rate": 2.8414046429620923e-05, "loss": 1.2782644653320312, "step": 2300 }, { "epoch": 2.0417028670721113, "grad_norm": 96.71955108642578, "learning_rate": 2.836996767558037e-05, "loss": 0.843255615234375, "step": 2350 }, { "epoch": 2.0851433536055604, "grad_norm": 17.284881591796875, "learning_rate": 2.832588892153982e-05, "loss": 0.7496602630615234, "step": 2400 }, { "epoch": 2.1285838401390094, "grad_norm": 19.61467742919922, "learning_rate": 2.8281810167499266e-05, "loss": 0.8246018218994141, "step": 2450 }, { "epoch": 2.172024326672459, "grad_norm": 20.361276626586914, "learning_rate": 2.8237731413458713e-05, "loss": 1.0796304321289063, "step": 2500 }, { "epoch": 2.172024326672459, "eval_cer": 0.16687435661266728, "eval_loss": 0.6873559951782227, "eval_runtime": 35.5627, "eval_samples_per_second": 28.063, "eval_steps_per_second": 14.032, "eval_wer": 0.5325059101654847, "step": 2500 }, { "epoch": 2.215464813205908, "grad_norm": 41.729736328125, "learning_rate": 2.819365265941816e-05, "loss": 0.8797608947753907, "step": 2550 }, { "epoch": 2.2589052997393573, "grad_norm": 11.56946086883545, "learning_rate": 2.814957390537761e-05, "loss": 0.9233754730224609, "step": 2600 }, { "epoch": 2.3023457862728063, "grad_norm": 13.053935050964355, "learning_rate": 2.8105495151337056e-05, "loss": 1.0405730438232421, "step": 2650 }, { "epoch": 2.3457862728062553, "grad_norm": 27.34178352355957, "learning_rate": 2.8061416397296503e-05, "loss": 0.9149618530273438, "step": 2700 }, { "epoch": 2.3892267593397047, "grad_norm": 32.077274322509766, "learning_rate": 2.801733764325595e-05, "loss": 0.9278230285644531, "step": 2750 }, { "epoch": 2.4326672458731537, "grad_norm": 14.97318172454834, "learning_rate": 2.79732588892154e-05, "loss": 1.0948815155029297, "step": 2800 }, { "epoch": 2.4761077324066028, "grad_norm": 0.24952514469623566, "learning_rate": 2.7929180135174846e-05, "loss": 0.7527609252929688, "step": 2850 }, { "epoch": 2.519548218940052, "grad_norm": 42.21710205078125, "learning_rate": 2.7885101381134294e-05, "loss": 0.8132963562011719, "step": 2900 }, { "epoch": 2.562988705473501, "grad_norm": 10.806293487548828, "learning_rate": 2.784102262709374e-05, "loss": 1.0475637817382812, "step": 2950 }, { "epoch": 2.6064291920069502, "grad_norm": 24.606548309326172, "learning_rate": 2.779694387305319e-05, "loss": 0.8934781646728516, "step": 3000 }, { "epoch": 2.6064291920069502, "eval_cer": 0.20637156634339274, "eval_loss": 0.6298205256462097, "eval_runtime": 35.3882, "eval_samples_per_second": 28.201, "eval_steps_per_second": 14.101, "eval_wer": 0.567080378250591, "step": 3000 }, { "epoch": 2.6498696785403997, "grad_norm": 34.39348220825195, "learning_rate": 2.7752865119012636e-05, "loss": 0.920788803100586, "step": 3050 }, { "epoch": 2.6933101650738487, "grad_norm": 67.26911163330078, "learning_rate": 2.7708786364972084e-05, "loss": 0.8773422241210938, "step": 3100 }, { "epoch": 2.736750651607298, "grad_norm": 62.36620330810547, "learning_rate": 2.766470761093153e-05, "loss": 1.0072268676757812, "step": 3150 }, { "epoch": 2.780191138140747, "grad_norm": 28.642549514770508, "learning_rate": 2.7620628856890982e-05, "loss": 1.3133396911621094, "step": 3200 }, { "epoch": 2.8236316246741966, "grad_norm": 20.24125862121582, "learning_rate": 2.7576550102850426e-05, "loss": 0.9312178802490234, "step": 3250 }, { "epoch": 2.8670721112076456, "grad_norm": 87.64716339111328, "learning_rate": 2.7532471348809877e-05, "loss": 0.9268650817871094, "step": 3300 }, { "epoch": 2.9105125977410946, "grad_norm": 10.313736915588379, "learning_rate": 2.748839259476932e-05, "loss": 0.8256442260742187, "step": 3350 }, { "epoch": 2.953953084274544, "grad_norm": 31.871875762939453, "learning_rate": 2.7444313840728772e-05, "loss": 1.0591201782226562, "step": 3400 }, { "epoch": 2.997393570807993, "grad_norm": 51.40370559692383, "learning_rate": 2.7400235086688216e-05, "loss": 1.0520962524414061, "step": 3450 }, { "epoch": 3.040834057341442, "grad_norm": 7.437458515167236, "learning_rate": 2.7356156332647667e-05, "loss": 0.7271649169921875, "step": 3500 }, { "epoch": 3.040834057341442, "eval_cer": 0.15457549981037005, "eval_loss": 0.6895098090171814, "eval_runtime": 35.2334, "eval_samples_per_second": 28.325, "eval_steps_per_second": 14.163, "eval_wer": 0.5041371158392435, "step": 3500 }, { "epoch": 3.0842745438748915, "grad_norm": 2.256615161895752, "learning_rate": 2.731207757860711e-05, "loss": 0.6253271865844726, "step": 3550 }, { "epoch": 3.1277150304083405, "grad_norm": 24.19891357421875, "learning_rate": 2.7267998824566562e-05, "loss": 0.8554808807373047, "step": 3600 }, { "epoch": 3.1711555169417895, "grad_norm": 25.919506072998047, "learning_rate": 2.7223920070526006e-05, "loss": 0.7264094543457031, "step": 3650 }, { "epoch": 3.214596003475239, "grad_norm": 0.6518918871879578, "learning_rate": 2.7179841316485454e-05, "loss": 0.633333511352539, "step": 3700 }, { "epoch": 3.258036490008688, "grad_norm": 36.61137390136719, "learning_rate": 2.71357625624449e-05, "loss": 0.8081251525878906, "step": 3750 }, { "epoch": 3.3014769765421375, "grad_norm": 8.081766128540039, "learning_rate": 2.709168380840435e-05, "loss": 0.8773213195800781, "step": 3800 }, { "epoch": 3.3449174630755865, "grad_norm": 24.704824447631836, "learning_rate": 2.7047605054363796e-05, "loss": 0.8086146545410157, "step": 3850 }, { "epoch": 3.3883579496090355, "grad_norm": 0.8145921230316162, "learning_rate": 2.7003526300323244e-05, "loss": 0.6982787322998046, "step": 3900 }, { "epoch": 3.431798436142485, "grad_norm": 37.42679214477539, "learning_rate": 2.695944754628269e-05, "loss": 0.7531932067871093, "step": 3950 }, { "epoch": 3.475238922675934, "grad_norm": 32.73085403442383, "learning_rate": 2.691536879224214e-05, "loss": 0.6939554595947266, "step": 4000 }, { "epoch": 3.475238922675934, "eval_cer": 0.1360459446280544, "eval_loss": 0.6202276349067688, "eval_runtime": 34.9968, "eval_samples_per_second": 28.517, "eval_steps_per_second": 14.258, "eval_wer": 0.42671394799054374, "step": 4000 }, { "epoch": 3.5186794092093834, "grad_norm": 0.5364285111427307, "learning_rate": 2.6871290038201586e-05, "loss": 0.7825308227539063, "step": 4050 }, { "epoch": 3.5621198957428324, "grad_norm": 26.980627059936523, "learning_rate": 2.6827211284161034e-05, "loss": 0.7656624603271485, "step": 4100 }, { "epoch": 3.6055603822762814, "grad_norm": 10.756477355957031, "learning_rate": 2.6783132530120485e-05, "loss": 0.7668492889404297, "step": 4150 }, { "epoch": 3.649000868809731, "grad_norm": 13.8463773727417, "learning_rate": 2.673905377607993e-05, "loss": 0.7058528900146485, "step": 4200 }, { "epoch": 3.69244135534318, "grad_norm": 18.059154510498047, "learning_rate": 2.669497502203938e-05, "loss": 0.7425822448730469, "step": 4250 }, { "epoch": 3.735881841876629, "grad_norm": 14.087454795837402, "learning_rate": 2.6650896267998824e-05, "loss": 0.7796754455566406, "step": 4300 }, { "epoch": 3.7793223284100783, "grad_norm": 4.631764888763428, "learning_rate": 2.6606817513958275e-05, "loss": 0.7211798095703125, "step": 4350 }, { "epoch": 3.8227628149435273, "grad_norm": 0.7707765698432922, "learning_rate": 2.656273875991772e-05, "loss": 0.6928179168701172, "step": 4400 }, { "epoch": 3.8662033014769763, "grad_norm": 0.21713215112686157, "learning_rate": 2.651866000587717e-05, "loss": 0.7452503204345703, "step": 4450 }, { "epoch": 3.909643788010426, "grad_norm": 19.084728240966797, "learning_rate": 2.6474581251836614e-05, "loss": 0.6911500549316406, "step": 4500 }, { "epoch": 3.909643788010426, "eval_cer": 0.14384786259955573, "eval_loss": 0.6342427730560303, "eval_runtime": 35.3212, "eval_samples_per_second": 28.255, "eval_steps_per_second": 14.128, "eval_wer": 0.4435579196217494, "step": 4500 }, { "epoch": 3.953084274543875, "grad_norm": 0.5245521068572998, "learning_rate": 2.6430502497796065e-05, "loss": 0.6018388748168946, "step": 4550 }, { "epoch": 3.996524761077324, "grad_norm": 22.038259506225586, "learning_rate": 2.638642374375551e-05, "loss": 0.5375812149047852, "step": 4600 }, { "epoch": 4.039965247610773, "grad_norm": 11.167423248291016, "learning_rate": 2.634234498971496e-05, "loss": 0.5369546508789063, "step": 4650 }, { "epoch": 4.083405734144223, "grad_norm": 0.10995540767908096, "learning_rate": 2.6298266235674404e-05, "loss": 0.6637758636474609, "step": 4700 }, { "epoch": 4.126846220677671, "grad_norm": 596.5354614257812, "learning_rate": 2.6254187481633855e-05, "loss": 0.5059263610839844, "step": 4750 }, { "epoch": 4.170286707211121, "grad_norm": 49.69171905517578, "learning_rate": 2.62101087275933e-05, "loss": 0.5901547622680664, "step": 4800 }, { "epoch": 4.21372719374457, "grad_norm": 0.014338035136461258, "learning_rate": 2.616602997355275e-05, "loss": 0.6108988571166992, "step": 4850 }, { "epoch": 4.257167680278019, "grad_norm": 0.33344972133636475, "learning_rate": 2.6121951219512194e-05, "loss": 0.5445775985717773, "step": 4900 }, { "epoch": 4.300608166811468, "grad_norm": 0.7429609298706055, "learning_rate": 2.6077872465471645e-05, "loss": 0.4996451187133789, "step": 4950 }, { "epoch": 4.344048653344918, "grad_norm": 0.05325142666697502, "learning_rate": 2.603379371143109e-05, "loss": 0.5604157257080078, "step": 5000 }, { "epoch": 4.344048653344918, "eval_cer": 0.1346914449802243, "eval_loss": 0.5602818727493286, "eval_runtime": 35.3794, "eval_samples_per_second": 28.209, "eval_steps_per_second": 14.104, "eval_wer": 0.4231678486997636, "step": 5000 }, { "epoch": 4.387489139878367, "grad_norm": 0.4052943289279938, "learning_rate": 2.5989714957390537e-05, "loss": 0.571678810119629, "step": 5050 }, { "epoch": 4.430929626411816, "grad_norm": 7.114663600921631, "learning_rate": 2.5945636203349988e-05, "loss": 0.5940496826171875, "step": 5100 }, { "epoch": 4.474370112945265, "grad_norm": 3.226045846939087, "learning_rate": 2.5901557449309432e-05, "loss": 0.48196929931640625, "step": 5150 }, { "epoch": 4.5178105994787146, "grad_norm": 14.2632474899292, "learning_rate": 2.5857478695268883e-05, "loss": 0.4521299362182617, "step": 5200 }, { "epoch": 4.561251086012163, "grad_norm": 11.746747016906738, "learning_rate": 2.5813399941228327e-05, "loss": 0.6751963806152343, "step": 5250 }, { "epoch": 4.604691572545613, "grad_norm": 16.07468605041504, "learning_rate": 2.5769321187187778e-05, "loss": 0.45375862121582033, "step": 5300 }, { "epoch": 4.648132059079062, "grad_norm": 11.938125610351562, "learning_rate": 2.5725242433147222e-05, "loss": 0.5193147277832031, "step": 5350 }, { "epoch": 4.691572545612511, "grad_norm": 14.44975757598877, "learning_rate": 2.5681163679106673e-05, "loss": 0.5906137084960937, "step": 5400 }, { "epoch": 4.73501303214596, "grad_norm": 0.14901815354824066, "learning_rate": 2.5637084925066117e-05, "loss": 0.5064856338500977, "step": 5450 }, { "epoch": 4.7784535186794095, "grad_norm": 0.9449958801269531, "learning_rate": 2.5593006171025568e-05, "loss": 0.6325591278076171, "step": 5500 }, { "epoch": 4.7784535186794095, "eval_cer": 0.1300319661916888, "eval_loss": 0.5885463356971741, "eval_runtime": 35.2148, "eval_samples_per_second": 28.34, "eval_steps_per_second": 14.17, "eval_wer": 0.4078014184397163, "step": 5500 }, { "epoch": 4.821894005212858, "grad_norm": 0.016951393336057663, "learning_rate": 2.5548927416985012e-05, "loss": 0.5301705551147461, "step": 5550 }, { "epoch": 4.8653344917463075, "grad_norm": 31.289724349975586, "learning_rate": 2.5504848662944463e-05, "loss": 0.4865913009643555, "step": 5600 }, { "epoch": 4.908774978279757, "grad_norm": 0.2750867009162903, "learning_rate": 2.5460769908903907e-05, "loss": 0.6053089523315429, "step": 5650 }, { "epoch": 4.9522154648132055, "grad_norm": 0.15572036802768707, "learning_rate": 2.5416691154863358e-05, "loss": 0.44040061950683596, "step": 5700 }, { "epoch": 4.995655951346655, "grad_norm": 1.536003828048706, "learning_rate": 2.5372612400822802e-05, "loss": 0.6176298141479493, "step": 5750 }, { "epoch": 5.039096437880104, "grad_norm": 3.888091564178467, "learning_rate": 2.5328533646782253e-05, "loss": 0.6050854873657227, "step": 5800 }, { "epoch": 5.082536924413553, "grad_norm": 1.9056124687194824, "learning_rate": 2.5284454892741697e-05, "loss": 0.5753683090209961, "step": 5850 }, { "epoch": 5.125977410947002, "grad_norm": 0.10637835413217545, "learning_rate": 2.5240376138701148e-05, "loss": 0.47484302520751953, "step": 5900 }, { "epoch": 5.169417897480452, "grad_norm": 4.4535441398620605, "learning_rate": 2.5196297384660595e-05, "loss": 0.34999225616455076, "step": 5950 }, { "epoch": 5.212858384013901, "grad_norm": 0.6373205780982971, "learning_rate": 2.5152218630620043e-05, "loss": 0.38846492767333984, "step": 6000 }, { "epoch": 5.212858384013901, "eval_cer": 0.1255350273608929, "eval_loss": 0.6293100118637085, "eval_runtime": 35.1024, "eval_samples_per_second": 28.431, "eval_steps_per_second": 14.216, "eval_wer": 0.3983451536643026, "step": 6000 }, { "epoch": 5.25629887054735, "grad_norm": 0.3430880010128021, "learning_rate": 2.510813987657949e-05, "loss": 0.3233113479614258, "step": 6050 }, { "epoch": 5.299739357080799, "grad_norm": 0.023547176271677017, "learning_rate": 2.5064061122538938e-05, "loss": 0.5810712432861328, "step": 6100 }, { "epoch": 5.343179843614249, "grad_norm": 0.45001161098480225, "learning_rate": 2.5019982368498385e-05, "loss": 0.31497194290161135, "step": 6150 }, { "epoch": 5.386620330147697, "grad_norm": 0.07451729476451874, "learning_rate": 2.4975903614457833e-05, "loss": 0.3424281311035156, "step": 6200 }, { "epoch": 5.430060816681147, "grad_norm": 102.05135345458984, "learning_rate": 2.493182486041728e-05, "loss": 0.42556037902832033, "step": 6250 }, { "epoch": 5.473501303214596, "grad_norm": 1.4394115209579468, "learning_rate": 2.4887746106376728e-05, "loss": 0.39192684173583986, "step": 6300 }, { "epoch": 5.516941789748045, "grad_norm": 2.0899856090545654, "learning_rate": 2.4843667352336176e-05, "loss": 0.4351010513305664, "step": 6350 }, { "epoch": 5.560382276281494, "grad_norm": 0.12065482884645462, "learning_rate": 2.479958859829562e-05, "loss": 0.508093376159668, "step": 6400 }, { "epoch": 5.603822762814944, "grad_norm": 0.041007447987794876, "learning_rate": 2.475550984425507e-05, "loss": 0.4111709213256836, "step": 6450 }, { "epoch": 5.647263249348393, "grad_norm": 14.792854309082031, "learning_rate": 2.4711431090214515e-05, "loss": 0.34780517578125, "step": 6500 }, { "epoch": 5.647263249348393, "eval_cer": 0.1270520669664626, "eval_loss": 0.6646775007247925, "eval_runtime": 35.5266, "eval_samples_per_second": 28.092, "eval_steps_per_second": 14.046, "eval_wer": 0.3980496453900709, "step": 6500 }, { "epoch": 5.690703735881842, "grad_norm": 28.922000885009766, "learning_rate": 2.4667352336173966e-05, "loss": 0.4107795715332031, "step": 6550 }, { "epoch": 5.734144222415291, "grad_norm": 0.07848715782165527, "learning_rate": 2.462327358213341e-05, "loss": 0.5832571029663086, "step": 6600 }, { "epoch": 5.777584708948741, "grad_norm": 19.316383361816406, "learning_rate": 2.457919482809286e-05, "loss": 0.41104129791259764, "step": 6650 }, { "epoch": 5.821025195482189, "grad_norm": 0.20225679874420166, "learning_rate": 2.4535116074052305e-05, "loss": 0.4999349975585938, "step": 6700 }, { "epoch": 5.864465682015639, "grad_norm": 0.04317609593272209, "learning_rate": 2.4491037320011756e-05, "loss": 0.5584917449951172, "step": 6750 }, { "epoch": 5.907906168549088, "grad_norm": 0.3524606227874756, "learning_rate": 2.44469585659712e-05, "loss": 0.4921522521972656, "step": 6800 }, { "epoch": 5.951346655082537, "grad_norm": 29.436384201049805, "learning_rate": 2.440287981193065e-05, "loss": 0.5514765548706054, "step": 6850 }, { "epoch": 5.994787141615986, "grad_norm": 0.23278824985027313, "learning_rate": 2.4358801057890098e-05, "loss": 0.3556842422485352, "step": 6900 }, { "epoch": 6.038227628149436, "grad_norm": 0.08552414178848267, "learning_rate": 2.4314722303849546e-05, "loss": 0.33310401916503907, "step": 6950 }, { "epoch": 6.081668114682884, "grad_norm": 11.057211875915527, "learning_rate": 2.4270643549808993e-05, "loss": 0.36625064849853517, "step": 7000 }, { "epoch": 6.081668114682884, "eval_cer": 0.12483068754402124, "eval_loss": 0.6519187688827515, "eval_runtime": 35.4455, "eval_samples_per_second": 28.156, "eval_steps_per_second": 14.078, "eval_wer": 0.3945035460992908, "step": 7000 }, { "epoch": 6.125108601216334, "grad_norm": 0.06223779171705246, "learning_rate": 2.422656479576844e-05, "loss": 0.4004500198364258, "step": 7050 }, { "epoch": 6.168549087749783, "grad_norm": 0.009129839017987251, "learning_rate": 2.4182486041727888e-05, "loss": 0.26837165832519533, "step": 7100 }, { "epoch": 6.211989574283232, "grad_norm": 1.0068172216415405, "learning_rate": 2.4138407287687336e-05, "loss": 0.3842990112304687, "step": 7150 }, { "epoch": 6.255430060816681, "grad_norm": 0.03263875097036362, "learning_rate": 2.4094328533646783e-05, "loss": 0.46779460906982423, "step": 7200 }, { "epoch": 6.2988705473501305, "grad_norm": 0.025848915800452232, "learning_rate": 2.405024977960623e-05, "loss": 0.46671478271484373, "step": 7250 }, { "epoch": 6.342311033883579, "grad_norm": 0.032335590571165085, "learning_rate": 2.4006171025565678e-05, "loss": 0.2948387336730957, "step": 7300 }, { "epoch": 6.3857515204170285, "grad_norm": 0.07902107387781143, "learning_rate": 2.3962092271525126e-05, "loss": 0.2986873435974121, "step": 7350 }, { "epoch": 6.429192006950478, "grad_norm": 1.8951733112335205, "learning_rate": 2.3918013517484573e-05, "loss": 0.48029232025146484, "step": 7400 }, { "epoch": 6.4726324934839266, "grad_norm": 0.1298227608203888, "learning_rate": 2.387393476344402e-05, "loss": 0.45991172790527346, "step": 7450 }, { "epoch": 6.516072980017376, "grad_norm": 8.462530136108398, "learning_rate": 2.382985600940347e-05, "loss": 0.3584669876098633, "step": 7500 }, { "epoch": 6.516072980017376, "eval_cer": 0.12033374871322533, "eval_loss": 0.6832783818244934, "eval_runtime": 35.4555, "eval_samples_per_second": 28.148, "eval_steps_per_second": 14.074, "eval_wer": 0.3844562647754137, "step": 7500 }, { "epoch": 6.5595134665508255, "grad_norm": 0.03215891495347023, "learning_rate": 2.3785777255362916e-05, "loss": 0.3052578163146973, "step": 7550 }, { "epoch": 6.602953953084275, "grad_norm": 3.956105947494507, "learning_rate": 2.3741698501322363e-05, "loss": 0.3200105667114258, "step": 7600 }, { "epoch": 6.6463944396177235, "grad_norm": 10.573678016662598, "learning_rate": 2.369761974728181e-05, "loss": 0.3665552520751953, "step": 7650 }, { "epoch": 6.689834926151173, "grad_norm": 2.567551374435425, "learning_rate": 2.365354099324126e-05, "loss": 0.3944419479370117, "step": 7700 }, { "epoch": 6.733275412684622, "grad_norm": 2.1139237880706787, "learning_rate": 2.3609462239200703e-05, "loss": 0.36841018676757814, "step": 7750 }, { "epoch": 6.776715899218071, "grad_norm": 0.01942128874361515, "learning_rate": 2.3565383485160153e-05, "loss": 0.3560383987426758, "step": 7800 }, { "epoch": 6.82015638575152, "grad_norm": 1.474857211112976, "learning_rate": 2.35213047311196e-05, "loss": 0.46931259155273436, "step": 7850 }, { "epoch": 6.86359687228497, "grad_norm": 24.063940048217773, "learning_rate": 2.347722597707905e-05, "loss": 0.3131961250305176, "step": 7900 }, { "epoch": 6.907037358818418, "grad_norm": 0.024980274960398674, "learning_rate": 2.3433147223038496e-05, "loss": 0.3744655609130859, "step": 7950 }, { "epoch": 6.950477845351868, "grad_norm": 19.342248916625977, "learning_rate": 2.3389068468997944e-05, "loss": 0.38378406524658204, "step": 8000 }, { "epoch": 6.950477845351868, "eval_cer": 0.14720702172617436, "eval_loss": 0.6338760852813721, "eval_runtime": 35.924, "eval_samples_per_second": 27.781, "eval_steps_per_second": 13.89, "eval_wer": 0.40573286052009455, "step": 8000 }, { "epoch": 6.993918331885317, "grad_norm": 3.440767765045166, "learning_rate": 2.334498971495739e-05, "loss": 0.44374298095703124, "step": 8050 }, { "epoch": 7.037358818418766, "grad_norm": 78.23323822021484, "learning_rate": 2.330091096091684e-05, "loss": 0.4143082809448242, "step": 8100 }, { "epoch": 7.080799304952215, "grad_norm": 20.11145782470703, "learning_rate": 2.3256832206876286e-05, "loss": 0.2681180191040039, "step": 8150 }, { "epoch": 7.124239791485665, "grad_norm": 4.221235275268555, "learning_rate": 2.3212753452835734e-05, "loss": 0.33470783233642576, "step": 8200 }, { "epoch": 7.167680278019114, "grad_norm": 0.00418456643819809, "learning_rate": 2.316867469879518e-05, "loss": 0.29219053268432615, "step": 8250 }, { "epoch": 7.211120764552563, "grad_norm": 48.96384048461914, "learning_rate": 2.312459594475463e-05, "loss": 0.2650064277648926, "step": 8300 }, { "epoch": 7.254561251086012, "grad_norm": 0.4012812077999115, "learning_rate": 2.3080517190714076e-05, "loss": 0.2377411651611328, "step": 8350 }, { "epoch": 7.298001737619462, "grad_norm": 0.04035955294966698, "learning_rate": 2.3036438436673524e-05, "loss": 0.39625030517578125, "step": 8400 }, { "epoch": 7.34144222415291, "grad_norm": 0.015255268663167953, "learning_rate": 2.299235968263297e-05, "loss": 0.29354951858520506, "step": 8450 }, { "epoch": 7.38488271068636, "grad_norm": 0.1737648993730545, "learning_rate": 2.294828092859242e-05, "loss": 0.43962146759033205, "step": 8500 }, { "epoch": 7.38488271068636, "eval_cer": 0.11811236929078399, "eval_loss": 0.6835731863975525, "eval_runtime": 35.5953, "eval_samples_per_second": 28.037, "eval_steps_per_second": 14.019, "eval_wer": 0.37056737588652483, "step": 8500 }, { "epoch": 7.428323197219809, "grad_norm": 26.738134384155273, "learning_rate": 2.2904202174551866e-05, "loss": 0.32676326751708984, "step": 8550 }, { "epoch": 7.471763683753258, "grad_norm": 91.86631774902344, "learning_rate": 2.2860123420511314e-05, "loss": 0.27230093002319333, "step": 8600 }, { "epoch": 7.515204170286707, "grad_norm": 0.2025415152311325, "learning_rate": 2.281604466647076e-05, "loss": 0.34510005950927736, "step": 8650 }, { "epoch": 7.558644656820157, "grad_norm": 0.06521395593881607, "learning_rate": 2.2771965912430212e-05, "loss": 0.31739959716796873, "step": 8700 }, { "epoch": 7.602085143353605, "grad_norm": 0.023135656490921974, "learning_rate": 2.2727887158389656e-05, "loss": 0.4019832992553711, "step": 8750 }, { "epoch": 7.645525629887055, "grad_norm": 0.0029301783069968224, "learning_rate": 2.2683808404349107e-05, "loss": 0.3610734558105469, "step": 8800 }, { "epoch": 7.688966116420504, "grad_norm": 155.0016326904297, "learning_rate": 2.263972965030855e-05, "loss": 0.4037496566772461, "step": 8850 }, { "epoch": 7.732406602953953, "grad_norm": 25.407201766967773, "learning_rate": 2.2595650896268002e-05, "loss": 0.22004886627197265, "step": 8900 }, { "epoch": 7.775847089487402, "grad_norm": 0.04883955046534538, "learning_rate": 2.2551572142227446e-05, "loss": 0.4110527420043945, "step": 8950 }, { "epoch": 7.819287576020852, "grad_norm": 6.237477779388428, "learning_rate": 2.2507493388186897e-05, "loss": 0.21914356231689452, "step": 9000 }, { "epoch": 7.819287576020852, "eval_cer": 0.11708294955843311, "eval_loss": 0.6818587183952332, "eval_runtime": 35.3665, "eval_samples_per_second": 28.219, "eval_steps_per_second": 14.109, "eval_wer": 0.37706855791962174, "step": 9000 }, { "epoch": 7.8627280625543, "grad_norm": 2.864680528640747, "learning_rate": 2.246341463414634e-05, "loss": 0.3091525459289551, "step": 9050 }, { "epoch": 7.90616854908775, "grad_norm": 0.009744558483362198, "learning_rate": 2.241933588010579e-05, "loss": 0.18931781768798828, "step": 9100 }, { "epoch": 7.949609035621199, "grad_norm": 0.018469370901584625, "learning_rate": 2.2375257126065236e-05, "loss": 0.25914777755737306, "step": 9150 }, { "epoch": 7.9930495221546485, "grad_norm": 0.11463995277881622, "learning_rate": 2.2331178372024684e-05, "loss": 0.3924109649658203, "step": 9200 }, { "epoch": 8.036490008688098, "grad_norm": 0.18527474999427795, "learning_rate": 2.228709961798413e-05, "loss": 0.4139134979248047, "step": 9250 }, { "epoch": 8.079930495221546, "grad_norm": 0.012078936211764812, "learning_rate": 2.224302086394358e-05, "loss": 0.20489992141723634, "step": 9300 }, { "epoch": 8.123370981754995, "grad_norm": 0.026449766010046005, "learning_rate": 2.2198942109903026e-05, "loss": 0.23680988311767578, "step": 9350 }, { "epoch": 8.166811468288445, "grad_norm": 1.4742465019226074, "learning_rate": 2.2154863355862474e-05, "loss": 0.2400914192199707, "step": 9400 }, { "epoch": 8.210251954821894, "grad_norm": 0.0015448889462277293, "learning_rate": 2.211078460182192e-05, "loss": 0.31873985290527346, "step": 9450 }, { "epoch": 8.253692441355343, "grad_norm": 0.6546465158462524, "learning_rate": 2.206670584778137e-05, "loss": 0.16370586395263673, "step": 9500 }, { "epoch": 8.253692441355343, "eval_cer": 0.11431977027685973, "eval_loss": 0.722854495048523, "eval_runtime": 34.9616, "eval_samples_per_second": 28.546, "eval_steps_per_second": 14.273, "eval_wer": 0.3602245862884161, "step": 9500 }, { "epoch": 8.297132927888793, "grad_norm": 0.0220937579870224, "learning_rate": 2.2022627093740816e-05, "loss": 0.16179698944091797, "step": 9550 }, { "epoch": 8.340573414422241, "grad_norm": 0.8495884537696838, "learning_rate": 2.1978548339700264e-05, "loss": 0.2629365348815918, "step": 9600 }, { "epoch": 8.38401390095569, "grad_norm": 0.9506490230560303, "learning_rate": 2.1934469585659715e-05, "loss": 0.2445651626586914, "step": 9650 }, { "epoch": 8.42745438748914, "grad_norm": 0.26221564412117004, "learning_rate": 2.189039083161916e-05, "loss": 0.20401872634887697, "step": 9700 }, { "epoch": 8.470894874022589, "grad_norm": 0.00027192034758627415, "learning_rate": 2.184631207757861e-05, "loss": 0.2544666290283203, "step": 9750 }, { "epoch": 8.514335360556037, "grad_norm": 0.04336933791637421, "learning_rate": 2.1802233323538054e-05, "loss": 0.3723867797851563, "step": 9800 }, { "epoch": 8.557775847089488, "grad_norm": 0.14333416521549225, "learning_rate": 2.1758154569497505e-05, "loss": 0.25252397537231447, "step": 9850 }, { "epoch": 8.601216333622936, "grad_norm": 0.31019526720046997, "learning_rate": 2.171407581545695e-05, "loss": 0.23379629135131835, "step": 9900 }, { "epoch": 8.644656820156385, "grad_norm": 0.9922002553939819, "learning_rate": 2.16699970614164e-05, "loss": 0.3892123031616211, "step": 9950 }, { "epoch": 8.688097306689835, "grad_norm": 0.00887572392821312, "learning_rate": 2.1625918307375844e-05, "loss": 0.2666620254516602, "step": 10000 }, { "epoch": 8.688097306689835, "eval_cer": 0.11231511079807119, "eval_loss": 0.7393125891685486, "eval_runtime": 35.3264, "eval_samples_per_second": 28.251, "eval_steps_per_second": 14.125, "eval_wer": 0.35726950354609927, "step": 10000 }, { "epoch": 8.731537793223284, "grad_norm": 0.017117468640208244, "learning_rate": 2.1581839553335295e-05, "loss": 0.18703149795532226, "step": 10050 }, { "epoch": 8.774978279756734, "grad_norm": 0.20450972020626068, "learning_rate": 2.153776079929474e-05, "loss": 0.16164979934692383, "step": 10100 }, { "epoch": 8.818418766290183, "grad_norm": 0.00887273158878088, "learning_rate": 2.149368204525419e-05, "loss": 0.2993427085876465, "step": 10150 }, { "epoch": 8.861859252823631, "grad_norm": 0.00210910034365952, "learning_rate": 2.1449603291213634e-05, "loss": 0.2953006172180176, "step": 10200 }, { "epoch": 8.90529973935708, "grad_norm": 0.0051006837747991085, "learning_rate": 2.1405524537173085e-05, "loss": 0.24485448837280274, "step": 10250 }, { "epoch": 8.94874022589053, "grad_norm": 0.5796188712120056, "learning_rate": 2.136144578313253e-05, "loss": 0.3098959159851074, "step": 10300 }, { "epoch": 8.992180712423979, "grad_norm": 0.01159872580319643, "learning_rate": 2.131736702909198e-05, "loss": 0.27299707412719726, "step": 10350 }, { "epoch": 9.035621198957429, "grad_norm": 2.516123056411743, "learning_rate": 2.1273288275051424e-05, "loss": 0.25595357894897464, "step": 10400 }, { "epoch": 9.079061685490878, "grad_norm": 0.0016837273724377155, "learning_rate": 2.1229209521010872e-05, "loss": 0.2204635238647461, "step": 10450 }, { "epoch": 9.122502172024326, "grad_norm": 0.004055003169924021, "learning_rate": 2.118513076697032e-05, "loss": 0.29069057464599607, "step": 10500 }, { "epoch": 9.122502172024326, "eval_cer": 0.1140488703472937, "eval_loss": 0.7343300580978394, "eval_runtime": 35.4446, "eval_samples_per_second": 28.157, "eval_steps_per_second": 14.078, "eval_wer": 0.3472222222222222, "step": 10500 }, { "epoch": 9.165942658557777, "grad_norm": 10.814416885375977, "learning_rate": 2.1141052012929767e-05, "loss": 0.22457393646240234, "step": 10550 }, { "epoch": 9.209383145091225, "grad_norm": 0.531550407409668, "learning_rate": 2.1096973258889218e-05, "loss": 0.19709733963012696, "step": 10600 }, { "epoch": 9.252823631624674, "grad_norm": 0.02372005581855774, "learning_rate": 2.1052894504848662e-05, "loss": 0.21724346160888672, "step": 10650 }, { "epoch": 9.296264118158124, "grad_norm": 0.003351462772116065, "learning_rate": 2.1008815750808113e-05, "loss": 0.2724002838134766, "step": 10700 }, { "epoch": 9.339704604691573, "grad_norm": 0.2525140047073364, "learning_rate": 2.0964736996767557e-05, "loss": 0.23882347106933594, "step": 10750 }, { "epoch": 9.383145091225021, "grad_norm": 0.14738580584526062, "learning_rate": 2.0920658242727008e-05, "loss": 0.1564232349395752, "step": 10800 }, { "epoch": 9.426585577758472, "grad_norm": 0.10283453017473221, "learning_rate": 2.0876579488686452e-05, "loss": 0.14069479942321778, "step": 10850 }, { "epoch": 9.47002606429192, "grad_norm": 0.07120943069458008, "learning_rate": 2.0832500734645903e-05, "loss": 0.20460891723632812, "step": 10900 }, { "epoch": 9.513466550825369, "grad_norm": 0.229303777217865, "learning_rate": 2.0788421980605347e-05, "loss": 0.29092355728149416, "step": 10950 }, { "epoch": 9.556907037358819, "grad_norm": 0.011797781102359295, "learning_rate": 2.0744343226564798e-05, "loss": 0.14928483963012695, "step": 11000 }, { "epoch": 9.556907037358819, "eval_cer": 0.11589098986834263, "eval_loss": 0.6468539237976074, "eval_runtime": 35.2293, "eval_samples_per_second": 28.329, "eval_steps_per_second": 14.164, "eval_wer": 0.366725768321513, "step": 11000 }, { "epoch": 9.600347523892268, "grad_norm": 0.0010864798678085208, "learning_rate": 2.0700264472524242e-05, "loss": 0.22057802200317383, "step": 11050 }, { "epoch": 9.643788010425716, "grad_norm": 0.00047053879825398326, "learning_rate": 2.0656185718483693e-05, "loss": 0.1952187156677246, "step": 11100 }, { "epoch": 9.687228496959166, "grad_norm": 0.004543100483715534, "learning_rate": 2.0612106964443137e-05, "loss": 0.3174121856689453, "step": 11150 }, { "epoch": 9.730668983492615, "grad_norm": 0.0010513780871406198, "learning_rate": 2.0568028210402588e-05, "loss": 0.16007177352905275, "step": 11200 }, { "epoch": 9.774109470026064, "grad_norm": 0.0026681029703468084, "learning_rate": 2.0523949456362032e-05, "loss": 0.2065435218811035, "step": 11250 }, { "epoch": 9.817549956559514, "grad_norm": 0.2069607824087143, "learning_rate": 2.0479870702321483e-05, "loss": 0.22219644546508788, "step": 11300 }, { "epoch": 9.860990443092962, "grad_norm": 0.012031909078359604, "learning_rate": 2.0435791948280927e-05, "loss": 0.1956252098083496, "step": 11350 }, { "epoch": 9.904430929626411, "grad_norm": 0.0008321640198118985, "learning_rate": 2.0391713194240378e-05, "loss": 0.3007790565490723, "step": 11400 }, { "epoch": 9.947871416159861, "grad_norm": 0.00023682558094151318, "learning_rate": 2.0347634440199825e-05, "loss": 0.25140411376953126, "step": 11450 }, { "epoch": 9.99131190269331, "grad_norm": 0.18799935281276703, "learning_rate": 2.0303555686159273e-05, "loss": 0.20868509292602538, "step": 11500 }, { "epoch": 9.99131190269331, "eval_cer": 0.11296527062902964, "eval_loss": 0.871296226978302, "eval_runtime": 35.7614, "eval_samples_per_second": 27.907, "eval_steps_per_second": 13.954, "eval_wer": 0.35786052009456265, "step": 11500 }, { "epoch": 10.034752389226758, "grad_norm": 0.001729931216686964, "learning_rate": 2.025947693211872e-05, "loss": 0.2267488098144531, "step": 11550 }, { "epoch": 10.078192875760209, "grad_norm": 0.0033665213268250227, "learning_rate": 2.0215398178078168e-05, "loss": 0.11680364608764648, "step": 11600 }, { "epoch": 10.121633362293657, "grad_norm": 0.000843276153318584, "learning_rate": 2.0171319424037616e-05, "loss": 0.20855466842651368, "step": 11650 }, { "epoch": 10.165073848827106, "grad_norm": 0.0007557457429356873, "learning_rate": 2.0127240669997063e-05, "loss": 0.17802534103393555, "step": 11700 }, { "epoch": 10.208514335360556, "grad_norm": 0.08655949681997299, "learning_rate": 2.008316191595651e-05, "loss": 0.14240021705627443, "step": 11750 }, { "epoch": 10.251954821894005, "grad_norm": 55.311119079589844, "learning_rate": 2.0039083161915955e-05, "loss": 0.3166378211975098, "step": 11800 }, { "epoch": 10.295395308427455, "grad_norm": 0.04812853783369064, "learning_rate": 1.9995004407875406e-05, "loss": 0.1832990837097168, "step": 11850 }, { "epoch": 10.338835794960904, "grad_norm": 4.462372303009033, "learning_rate": 1.995092565383485e-05, "loss": 0.1998225212097168, "step": 11900 }, { "epoch": 10.382276281494352, "grad_norm": 0.030581099912524223, "learning_rate": 1.99068468997943e-05, "loss": 0.1649586296081543, "step": 11950 }, { "epoch": 10.425716768027803, "grad_norm": 0.0061181094497442245, "learning_rate": 1.9862768145753745e-05, "loss": 0.21640779495239257, "step": 12000 }, { "epoch": 10.425716768027803, "eval_cer": 0.11507829007964458, "eval_loss": 0.7006326913833618, "eval_runtime": 35.3681, "eval_samples_per_second": 28.218, "eval_steps_per_second": 14.109, "eval_wer": 0.3489952718676123, "step": 12000 }, { "epoch": 10.469157254561251, "grad_norm": 9.876059532165527, "learning_rate": 1.9818689391713196e-05, "loss": 0.24267179489135743, "step": 12050 }, { "epoch": 10.5125977410947, "grad_norm": 0.017044102773070335, "learning_rate": 1.977461063767264e-05, "loss": 0.17035614013671874, "step": 12100 }, { "epoch": 10.55603822762815, "grad_norm": 0.0013389646774157882, "learning_rate": 1.973053188363209e-05, "loss": 0.154972562789917, "step": 12150 }, { "epoch": 10.599478714161599, "grad_norm": 0.0071999249048531055, "learning_rate": 1.9686453129591535e-05, "loss": 0.08084283828735352, "step": 12200 }, { "epoch": 10.642919200695047, "grad_norm": 0.29191315174102783, "learning_rate": 1.9642374375550986e-05, "loss": 0.2428382682800293, "step": 12250 }, { "epoch": 10.686359687228498, "grad_norm": 2.9929769039154053, "learning_rate": 1.959829562151043e-05, "loss": 0.165596923828125, "step": 12300 }, { "epoch": 10.729800173761946, "grad_norm": 0.6568811535835266, "learning_rate": 1.955421686746988e-05, "loss": 0.24114521026611327, "step": 12350 }, { "epoch": 10.773240660295395, "grad_norm": 0.00521878432482481, "learning_rate": 1.9510138113429328e-05, "loss": 0.13222161293029785, "step": 12400 }, { "epoch": 10.816681146828845, "grad_norm": 0.004102786537259817, "learning_rate": 1.9466059359388776e-05, "loss": 0.2611697006225586, "step": 12450 }, { "epoch": 10.860121633362294, "grad_norm": 0.009258633479475975, "learning_rate": 1.9421980605348223e-05, "loss": 0.17743043899536132, "step": 12500 }, { "epoch": 10.860121633362294, "eval_cer": 0.11280273067129003, "eval_loss": 0.8380096554756165, "eval_runtime": 35.6349, "eval_samples_per_second": 28.006, "eval_steps_per_second": 14.003, "eval_wer": 0.3549054373522459, "step": 12500 }, { "epoch": 10.903562119895742, "grad_norm": 0.010119021870195866, "learning_rate": 1.937790185130767e-05, "loss": 0.2513529586791992, "step": 12550 }, { "epoch": 10.947002606429193, "grad_norm": 0.06954587996006012, "learning_rate": 1.9333823097267118e-05, "loss": 0.18938690185546875, "step": 12600 }, { "epoch": 10.990443092962641, "grad_norm": 0.012158134952187538, "learning_rate": 1.9289744343226566e-05, "loss": 0.1424751377105713, "step": 12650 }, { "epoch": 11.03388357949609, "grad_norm": 0.35711684823036194, "learning_rate": 1.9245665589186013e-05, "loss": 0.22175674438476561, "step": 12700 }, { "epoch": 11.07732406602954, "grad_norm": 0.029316997155547142, "learning_rate": 1.920158683514546e-05, "loss": 0.09745993614196777, "step": 12750 }, { "epoch": 11.120764552562989, "grad_norm": 0.0045172832906246185, "learning_rate": 1.915750808110491e-05, "loss": 0.14967589378356932, "step": 12800 }, { "epoch": 11.164205039096437, "grad_norm": 0.1485351026058197, "learning_rate": 1.9113429327064356e-05, "loss": 0.15214619636535645, "step": 12850 }, { "epoch": 11.207645525629887, "grad_norm": 0.013465415686368942, "learning_rate": 1.9069350573023803e-05, "loss": 0.20777603149414062, "step": 12900 }, { "epoch": 11.251086012163336, "grad_norm": 0.003324932884424925, "learning_rate": 1.902527181898325e-05, "loss": 0.14591985702514648, "step": 12950 }, { "epoch": 11.294526498696785, "grad_norm": 18.002288818359375, "learning_rate": 1.89811930649427e-05, "loss": 0.0729653549194336, "step": 13000 }, { "epoch": 11.294526498696785, "eval_cer": 0.10716801213631684, "eval_loss": 0.8233883380889893, "eval_runtime": 35.8606, "eval_samples_per_second": 27.83, "eval_steps_per_second": 13.915, "eval_wer": 0.3354018912529551, "step": 13000 }, { "epoch": 11.337966985230235, "grad_norm": 0.008703617379069328, "learning_rate": 1.8937114310902146e-05, "loss": 0.18068933486938477, "step": 13050 }, { "epoch": 11.381407471763684, "grad_norm": 0.016712911427021027, "learning_rate": 1.8893035556861593e-05, "loss": 0.18568845748901366, "step": 13100 }, { "epoch": 11.424847958297132, "grad_norm": 0.0025050437543541193, "learning_rate": 1.884895680282104e-05, "loss": 0.12276277542114258, "step": 13150 }, { "epoch": 11.468288444830582, "grad_norm": 0.0009163509821519256, "learning_rate": 1.880487804878049e-05, "loss": 0.14427170753479004, "step": 13200 }, { "epoch": 11.511728931364031, "grad_norm": 0.0007597589865326881, "learning_rate": 1.8760799294739933e-05, "loss": 0.21098020553588867, "step": 13250 }, { "epoch": 11.555169417897481, "grad_norm": 0.00016254196816589683, "learning_rate": 1.8716720540699384e-05, "loss": 0.18421314239501954, "step": 13300 }, { "epoch": 11.59860990443093, "grad_norm": 0.006345795933157206, "learning_rate": 1.867264178665883e-05, "loss": 0.20616317749023438, "step": 13350 }, { "epoch": 11.642050390964378, "grad_norm": 0.0005729036638513207, "learning_rate": 1.862856303261828e-05, "loss": 0.10284842491149902, "step": 13400 }, { "epoch": 11.685490877497829, "grad_norm": 0.014439227990806103, "learning_rate": 1.8584484278577726e-05, "loss": 0.17948501586914062, "step": 13450 }, { "epoch": 11.728931364031277, "grad_norm": 1.6784127950668335, "learning_rate": 1.8540405524537174e-05, "loss": 0.15696640014648439, "step": 13500 }, { "epoch": 11.728931364031277, "eval_cer": 0.1050549926857019, "eval_loss": 0.827880322933197, "eval_runtime": 35.4053, "eval_samples_per_second": 28.188, "eval_steps_per_second": 14.094, "eval_wer": 0.3271276595744681, "step": 13500 }, { "epoch": 11.772371850564726, "grad_norm": 0.0005249602254480124, "learning_rate": 1.849632677049662e-05, "loss": 0.1481422519683838, "step": 13550 }, { "epoch": 11.815812337098176, "grad_norm": 0.04822874069213867, "learning_rate": 1.845224801645607e-05, "loss": 0.17740755081176757, "step": 13600 }, { "epoch": 11.859252823631625, "grad_norm": 0.0025418957229703665, "learning_rate": 1.8408169262415516e-05, "loss": 0.12424736022949219, "step": 13650 }, { "epoch": 11.902693310165073, "grad_norm": 0.004390745423734188, "learning_rate": 1.8364090508374964e-05, "loss": 0.13344883918762207, "step": 13700 }, { "epoch": 11.946133796698524, "grad_norm": 32.29993438720703, "learning_rate": 1.832001175433441e-05, "loss": 0.08959797859191894, "step": 13750 }, { "epoch": 11.989574283231972, "grad_norm": 0.01902751810848713, "learning_rate": 1.827593300029386e-05, "loss": 0.15901991844177246, "step": 13800 }, { "epoch": 12.03301476976542, "grad_norm": 0.0059561156667768955, "learning_rate": 1.8231854246253306e-05, "loss": 0.17461122512817384, "step": 13850 }, { "epoch": 12.076455256298871, "grad_norm": 0.018380964174866676, "learning_rate": 1.8187775492212754e-05, "loss": 0.07262963771820069, "step": 13900 }, { "epoch": 12.11989574283232, "grad_norm": 0.0007720252615399659, "learning_rate": 1.81436967381722e-05, "loss": 0.12178866386413574, "step": 13950 }, { "epoch": 12.163336229365768, "grad_norm": 0.005173459183424711, "learning_rate": 1.809961798413165e-05, "loss": 0.18882158279418945, "step": 14000 }, { "epoch": 12.163336229365768, "eval_cer": 0.109335211572845, "eval_loss": 0.7686098217964172, "eval_runtime": 35.2553, "eval_samples_per_second": 28.308, "eval_steps_per_second": 14.154, "eval_wer": 0.3380614657210402, "step": 14000 }, { "epoch": 12.206776715899219, "grad_norm": 0.005660334601998329, "learning_rate": 1.8055539230091096e-05, "loss": 0.10836532592773437, "step": 14050 }, { "epoch": 12.250217202432667, "grad_norm": 0.24879610538482666, "learning_rate": 1.8011460476050544e-05, "loss": 0.06245335102081299, "step": 14100 }, { "epoch": 12.293657688966116, "grad_norm": 0.0002563217713031918, "learning_rate": 1.796738172200999e-05, "loss": 0.09659749031066894, "step": 14150 }, { "epoch": 12.337098175499566, "grad_norm": 0.0001977673382498324, "learning_rate": 1.792330296796944e-05, "loss": 0.08525155067443847, "step": 14200 }, { "epoch": 12.380538662033015, "grad_norm": 0.00024911269429139793, "learning_rate": 1.7879224213928886e-05, "loss": 0.1183913516998291, "step": 14250 }, { "epoch": 12.423979148566463, "grad_norm": 0.001824671751819551, "learning_rate": 1.7835145459888337e-05, "loss": 0.08873219490051269, "step": 14300 }, { "epoch": 12.467419635099914, "grad_norm": 0.004962866194546223, "learning_rate": 1.779106670584778e-05, "loss": 0.11354425430297851, "step": 14350 }, { "epoch": 12.510860121633362, "grad_norm": 0.0757075771689415, "learning_rate": 1.7746987951807232e-05, "loss": 0.11156253814697266, "step": 14400 }, { "epoch": 12.55430060816681, "grad_norm": 0.02478897199034691, "learning_rate": 1.7702909197766676e-05, "loss": 0.12282137870788574, "step": 14450 }, { "epoch": 12.597741094700261, "grad_norm": 2.5461020469665527, "learning_rate": 1.7658830443726127e-05, "loss": 0.11555877685546875, "step": 14500 }, { "epoch": 12.597741094700261, "eval_cer": 0.10830579184049412, "eval_loss": 0.92600417137146, "eval_runtime": 35.2867, "eval_samples_per_second": 28.283, "eval_steps_per_second": 14.141, "eval_wer": 0.3309692671394799, "step": 14500 }, { "epoch": 12.64118158123371, "grad_norm": 0.5628868341445923, "learning_rate": 1.761475168968557e-05, "loss": 0.2381545639038086, "step": 14550 }, { "epoch": 12.684622067767158, "grad_norm": 0.01276449766010046, "learning_rate": 1.757067293564502e-05, "loss": 0.10359532356262208, "step": 14600 }, { "epoch": 12.728062554300609, "grad_norm": 0.009611076675355434, "learning_rate": 1.7526594181604466e-05, "loss": 0.10290337562561035, "step": 14650 }, { "epoch": 12.771503040834057, "grad_norm": 0.00047707941848784685, "learning_rate": 1.7482515427563914e-05, "loss": 0.20995697021484375, "step": 14700 }, { "epoch": 12.814943527367507, "grad_norm": 10.169084548950195, "learning_rate": 1.743843667352336e-05, "loss": 0.15165854454040528, "step": 14750 }, { "epoch": 12.858384013900956, "grad_norm": 0.0020368106197565794, "learning_rate": 1.739435791948281e-05, "loss": 0.22781238555908204, "step": 14800 }, { "epoch": 12.901824500434405, "grad_norm": 0.04858289286494255, "learning_rate": 1.7350279165442256e-05, "loss": 0.13032222747802735, "step": 14850 }, { "epoch": 12.945264986967853, "grad_norm": 0.0008267110679298639, "learning_rate": 1.7306200411401704e-05, "loss": 0.06278028964996338, "step": 14900 }, { "epoch": 12.988705473501303, "grad_norm": 0.14715807139873505, "learning_rate": 1.726212165736115e-05, "loss": 0.16469184875488282, "step": 14950 }, { "epoch": 13.032145960034752, "grad_norm": 0.18887297809123993, "learning_rate": 1.72180429033206e-05, "loss": 0.248513126373291, "step": 15000 }, { "epoch": 13.032145960034752, "eval_cer": 0.11063553123476189, "eval_loss": 0.8484429717063904, "eval_runtime": 35.4635, "eval_samples_per_second": 28.142, "eval_steps_per_second": 14.071, "eval_wer": 0.33747044917257685, "step": 15000 }, { "epoch": 13.075586446568202, "grad_norm": 0.0044303713366389275, "learning_rate": 1.7173964149280047e-05, "loss": 0.13554862022399902, "step": 15050 }, { "epoch": 13.119026933101651, "grad_norm": 0.006357671692967415, "learning_rate": 1.7129885395239494e-05, "loss": 0.1657179069519043, "step": 15100 }, { "epoch": 13.1624674196351, "grad_norm": 0.004660587292164564, "learning_rate": 1.7085806641198945e-05, "loss": 0.07184979438781738, "step": 15150 }, { "epoch": 13.20590790616855, "grad_norm": 0.001002687495201826, "learning_rate": 1.704172788715839e-05, "loss": 0.11178950309753417, "step": 15200 }, { "epoch": 13.249348392701998, "grad_norm": 0.0017005419358611107, "learning_rate": 1.699764913311784e-05, "loss": 0.14817577362060547, "step": 15250 }, { "epoch": 13.292788879235447, "grad_norm": 30.164806365966797, "learning_rate": 1.6953570379077284e-05, "loss": 0.11133524894714356, "step": 15300 }, { "epoch": 13.336229365768897, "grad_norm": 0.20776331424713135, "learning_rate": 1.6909491625036735e-05, "loss": 0.08040478706359863, "step": 15350 }, { "epoch": 13.379669852302346, "grad_norm": 0.0001020112176775001, "learning_rate": 1.686541287099618e-05, "loss": 0.15835739135742188, "step": 15400 }, { "epoch": 13.423110338835794, "grad_norm": 0.020164845511317253, "learning_rate": 1.682133411695563e-05, "loss": 0.14341225624084472, "step": 15450 }, { "epoch": 13.466550825369245, "grad_norm": 0.0017340014455839992, "learning_rate": 1.6777255362915074e-05, "loss": 0.1316046142578125, "step": 15500 }, { "epoch": 13.466550825369245, "eval_cer": 0.10104567372812483, "eval_loss": 0.9770230650901794, "eval_runtime": 35.2978, "eval_samples_per_second": 28.274, "eval_steps_per_second": 14.137, "eval_wer": 0.32062647754137114, "step": 15500 }, { "epoch": 13.509991311902693, "grad_norm": 0.10325725376605988, "learning_rate": 1.6733176608874525e-05, "loss": 0.14392637252807616, "step": 15550 }, { "epoch": 13.553431798436142, "grad_norm": 7.639220714569092, "learning_rate": 1.668909785483397e-05, "loss": 0.11816396713256835, "step": 15600 }, { "epoch": 13.596872284969592, "grad_norm": 0.011842885985970497, "learning_rate": 1.664501910079342e-05, "loss": 0.06573171615600586, "step": 15650 }, { "epoch": 13.64031277150304, "grad_norm": 0.36505550146102905, "learning_rate": 1.6600940346752864e-05, "loss": 0.12598639488220215, "step": 15700 }, { "epoch": 13.68375325803649, "grad_norm": 0.01986199989914894, "learning_rate": 1.6556861592712315e-05, "loss": 0.08807419776916504, "step": 15750 }, { "epoch": 13.72719374456994, "grad_norm": 0.0006646508118137717, "learning_rate": 1.651278283867176e-05, "loss": 0.07460322380065917, "step": 15800 }, { "epoch": 13.770634231103388, "grad_norm": 0.017491919919848442, "learning_rate": 1.646870408463121e-05, "loss": 0.08792648315429688, "step": 15850 }, { "epoch": 13.814074717636837, "grad_norm": 64.46247863769531, "learning_rate": 1.6424625330590654e-05, "loss": 0.19781913757324218, "step": 15900 }, { "epoch": 13.857515204170287, "grad_norm": 0.004558779299259186, "learning_rate": 1.6380546576550102e-05, "loss": 0.10111617088317872, "step": 15950 }, { "epoch": 13.900955690703736, "grad_norm": 0.00020643201423808932, "learning_rate": 1.633646782250955e-05, "loss": 0.08666461944580078, "step": 16000 }, { "epoch": 13.900955690703736, "eval_cer": 0.10776399198136208, "eval_loss": 0.8977736234664917, "eval_runtime": 35.6835, "eval_samples_per_second": 27.968, "eval_steps_per_second": 13.984, "eval_wer": 0.3271276595744681, "step": 16000 }, { "epoch": 13.944396177237184, "grad_norm": 0.031363021582365036, "learning_rate": 1.6292389068468997e-05, "loss": 0.09195023536682129, "step": 16050 }, { "epoch": 13.987836663770635, "grad_norm": 0.024453002959489822, "learning_rate": 1.6248310314428448e-05, "loss": 0.05720340728759766, "step": 16100 }, { "epoch": 14.031277150304083, "grad_norm": 0.020940568298101425, "learning_rate": 1.6204231560387892e-05, "loss": 0.11965296745300293, "step": 16150 }, { "epoch": 14.074717636837532, "grad_norm": 0.020178375765681267, "learning_rate": 1.6160152806347343e-05, "loss": 0.11014815330505372, "step": 16200 }, { "epoch": 14.118158123370982, "grad_norm": 1.0401362180709839, "learning_rate": 1.6116074052306787e-05, "loss": 0.06974054336547851, "step": 16250 }, { "epoch": 14.16159860990443, "grad_norm": 0.007594361901283264, "learning_rate": 1.6071995298266238e-05, "loss": 0.041026763916015625, "step": 16300 }, { "epoch": 14.20503909643788, "grad_norm": 0.0018089961959049106, "learning_rate": 1.6027916544225682e-05, "loss": 0.05419292449951172, "step": 16350 }, { "epoch": 14.24847958297133, "grad_norm": 8.15002727508545, "learning_rate": 1.5983837790185133e-05, "loss": 0.0943959903717041, "step": 16400 }, { "epoch": 14.291920069504778, "grad_norm": 0.0004822172923013568, "learning_rate": 1.5939759036144577e-05, "loss": 0.09241563796997071, "step": 16450 }, { "epoch": 14.335360556038228, "grad_norm": 0.0005213666008785367, "learning_rate": 1.5895680282104028e-05, "loss": 0.08321575164794921, "step": 16500 }, { "epoch": 14.335360556038228, "eval_cer": 0.10608441241805278, "eval_loss": 0.936793327331543, "eval_runtime": 35.3702, "eval_samples_per_second": 28.216, "eval_steps_per_second": 14.108, "eval_wer": 0.32476359338061467, "step": 16500 }, { "epoch": 14.378801042571677, "grad_norm": 7.210012699943036e-05, "learning_rate": 1.5851601528063472e-05, "loss": 0.09619697570800781, "step": 16550 }, { "epoch": 14.422241529105126, "grad_norm": 4.460615158081055, "learning_rate": 1.5807522774022923e-05, "loss": 0.06220272541046143, "step": 16600 }, { "epoch": 14.465682015638576, "grad_norm": 3.5526578426361084, "learning_rate": 1.5763444019982367e-05, "loss": 0.07247277259826661, "step": 16650 }, { "epoch": 14.509122502172024, "grad_norm": 0.0009735809871926904, "learning_rate": 1.5719365265941818e-05, "loss": 0.14418716430664064, "step": 16700 }, { "epoch": 14.552562988705473, "grad_norm": 0.002880257787182927, "learning_rate": 1.5675286511901262e-05, "loss": 0.05156928539276123, "step": 16750 }, { "epoch": 14.596003475238923, "grad_norm": 0.009934864938259125, "learning_rate": 1.5631207757860713e-05, "loss": 0.1062159538269043, "step": 16800 }, { "epoch": 14.639443961772372, "grad_norm": 0.13457264006137848, "learning_rate": 1.5587129003820157e-05, "loss": 0.05868762016296387, "step": 16850 }, { "epoch": 14.68288444830582, "grad_norm": 0.00943897757679224, "learning_rate": 1.5543050249779608e-05, "loss": 0.07642593383789062, "step": 16900 }, { "epoch": 14.72632493483927, "grad_norm": 0.026743775233626366, "learning_rate": 1.5498971495739052e-05, "loss": 0.10913041114807129, "step": 16950 }, { "epoch": 14.76976542137272, "grad_norm": 0.003263711929321289, "learning_rate": 1.5454892741698503e-05, "loss": 0.12181022644042969, "step": 17000 }, { "epoch": 14.76976542137272, "eval_cer": 0.10294197323508696, "eval_loss": 0.8898913264274597, "eval_runtime": 35.138, "eval_samples_per_second": 28.402, "eval_steps_per_second": 14.201, "eval_wer": 0.3188534278959811, "step": 17000 }, { "epoch": 14.813205907906168, "grad_norm": 0.020024575293064117, "learning_rate": 1.541081398765795e-05, "loss": 0.06474356651306153, "step": 17050 }, { "epoch": 14.856646394439618, "grad_norm": 0.02727115899324417, "learning_rate": 1.5366735233617398e-05, "loss": 0.064862699508667, "step": 17100 }, { "epoch": 14.900086880973067, "grad_norm": 0.06588542461395264, "learning_rate": 1.5322656479576846e-05, "loss": 0.11551046371459961, "step": 17150 }, { "epoch": 14.943527367506515, "grad_norm": 1.188116431236267, "learning_rate": 1.5278577725536293e-05, "loss": 0.0937428092956543, "step": 17200 }, { "epoch": 14.986967854039966, "grad_norm": 0.003894130466505885, "learning_rate": 1.5234498971495739e-05, "loss": 0.11846747398376464, "step": 17250 }, { "epoch": 15.030408340573414, "grad_norm": 0.0014571856008842587, "learning_rate": 1.5190420217455185e-05, "loss": 0.05842185020446777, "step": 17300 }, { "epoch": 15.073848827106863, "grad_norm": 0.0016659823013469577, "learning_rate": 1.5146341463414634e-05, "loss": 0.055425772666931154, "step": 17350 }, { "epoch": 15.117289313640313, "grad_norm": 0.0008578883716836572, "learning_rate": 1.510226270937408e-05, "loss": 0.10561844825744629, "step": 17400 }, { "epoch": 15.160729800173762, "grad_norm": 0.028916161507368088, "learning_rate": 1.505818395533353e-05, "loss": 0.15631651878356934, "step": 17450 }, { "epoch": 15.20417028670721, "grad_norm": 0.01692270301282406, "learning_rate": 1.5014105201292976e-05, "loss": 0.04396585464477539, "step": 17500 }, { "epoch": 15.20417028670721, "eval_cer": 0.10250853334778133, "eval_loss": 0.9507099390029907, "eval_runtime": 35.4616, "eval_samples_per_second": 28.143, "eval_steps_per_second": 14.072, "eval_wer": 0.3200354609929078, "step": 17500 }, { "epoch": 15.24761077324066, "grad_norm": 0.00015645832172594965, "learning_rate": 1.4970026447252426e-05, "loss": 0.10505289077758789, "step": 17550 }, { "epoch": 15.29105125977411, "grad_norm": 6.809161277487874e-05, "learning_rate": 1.4925947693211873e-05, "loss": 0.06061763286590576, "step": 17600 }, { "epoch": 15.334491746307558, "grad_norm": 0.002175210742279887, "learning_rate": 1.488186893917132e-05, "loss": 0.08643261909484863, "step": 17650 }, { "epoch": 15.377932232841008, "grad_norm": 0.00033852062188088894, "learning_rate": 1.4837790185130768e-05, "loss": 0.058766045570373536, "step": 17700 }, { "epoch": 15.421372719374457, "grad_norm": 0.032032400369644165, "learning_rate": 1.4793711431090216e-05, "loss": 0.06575697422027588, "step": 17750 }, { "epoch": 15.464813205907905, "grad_norm": 0.007524843327701092, "learning_rate": 1.4749632677049663e-05, "loss": 0.039991099834442136, "step": 17800 }, { "epoch": 15.508253692441356, "grad_norm": 0.38591468334198, "learning_rate": 1.470555392300911e-05, "loss": 0.06621292591094971, "step": 17850 }, { "epoch": 15.551694178974804, "grad_norm": 0.005559583194553852, "learning_rate": 1.4661475168968558e-05, "loss": 0.05986703395843506, "step": 17900 }, { "epoch": 15.595134665508255, "grad_norm": 0.0015642516082152724, "learning_rate": 1.4617396414928004e-05, "loss": 0.05914860725402832, "step": 17950 }, { "epoch": 15.638575152041703, "grad_norm": 0.025494471192359924, "learning_rate": 1.4573317660887452e-05, "loss": 0.049571285247802736, "step": 18000 }, { "epoch": 15.638575152041703, "eval_cer": 0.10175001354499648, "eval_loss": 0.9704659581184387, "eval_runtime": 35.2476, "eval_samples_per_second": 28.314, "eval_steps_per_second": 14.157, "eval_wer": 0.31501182033096925, "step": 18000 }, { "epoch": 15.682015638575152, "grad_norm": 0.0005379091016948223, "learning_rate": 1.45292389068469e-05, "loss": 0.04489382266998291, "step": 18050 }, { "epoch": 15.725456125108602, "grad_norm": 0.0002697557501960546, "learning_rate": 1.4485160152806347e-05, "loss": 0.03827667951583862, "step": 18100 }, { "epoch": 15.76889661164205, "grad_norm": 0.04231059551239014, "learning_rate": 1.4441081398765794e-05, "loss": 0.06413057327270508, "step": 18150 }, { "epoch": 15.8123370981755, "grad_norm": 0.0001920364738907665, "learning_rate": 1.4397002644725242e-05, "loss": 0.06431771278381347, "step": 18200 }, { "epoch": 15.85577758470895, "grad_norm": 0.0009730961173772812, "learning_rate": 1.435292389068469e-05, "loss": 0.14641772270202635, "step": 18250 }, { "epoch": 15.899218071242398, "grad_norm": 0.04817694053053856, "learning_rate": 1.4308845136644137e-05, "loss": 0.09290631294250488, "step": 18300 }, { "epoch": 15.942658557775847, "grad_norm": 0.0002339025668334216, "learning_rate": 1.4264766382603586e-05, "loss": 0.04536252975463867, "step": 18350 }, { "epoch": 15.986099044309297, "grad_norm": 5.864691734313965, "learning_rate": 1.4220687628563033e-05, "loss": 0.07572299003601074, "step": 18400 }, { "epoch": 16.029539530842744, "grad_norm": 0.00020901852985844016, "learning_rate": 1.4176608874522481e-05, "loss": 0.04608057975769043, "step": 18450 }, { "epoch": 16.072980017376196, "grad_norm": 0.07316890358924866, "learning_rate": 1.4132530120481928e-05, "loss": 0.11023859977722168, "step": 18500 }, { "epoch": 16.072980017376196, "eval_cer": 0.11616188979790865, "eval_loss": 1.0090523958206177, "eval_runtime": 35.2425, "eval_samples_per_second": 28.318, "eval_steps_per_second": 14.159, "eval_wer": 0.33037825059101655, "step": 18500 }, { "epoch": 16.116420503909644, "grad_norm": 0.0020939745008945465, "learning_rate": 1.4088451366441376e-05, "loss": 0.05038735389709473, "step": 18550 }, { "epoch": 16.159860990443093, "grad_norm": 0.0004580508393701166, "learning_rate": 1.4044372612400824e-05, "loss": 0.0617540168762207, "step": 18600 }, { "epoch": 16.20330147697654, "grad_norm": 0.0010127995628863573, "learning_rate": 1.4000293858360271e-05, "loss": 0.0612303876876831, "step": 18650 }, { "epoch": 16.24674196350999, "grad_norm": 0.015361390076577663, "learning_rate": 1.3956215104319719e-05, "loss": 0.04825174331665039, "step": 18700 }, { "epoch": 16.290182450043442, "grad_norm": 0.0008976504323072731, "learning_rate": 1.3912136350279166e-05, "loss": 0.05854806423187256, "step": 18750 }, { "epoch": 16.33362293657689, "grad_norm": 0.013188125565648079, "learning_rate": 1.3868057596238614e-05, "loss": 0.04929457664489746, "step": 18800 }, { "epoch": 16.37706342311034, "grad_norm": 0.013670213520526886, "learning_rate": 1.3823978842198061e-05, "loss": 0.038565528392791745, "step": 18850 }, { "epoch": 16.420503909643788, "grad_norm": 0.08130084723234177, "learning_rate": 1.3779900088157509e-05, "loss": 0.04009881019592285, "step": 18900 }, { "epoch": 16.463944396177236, "grad_norm": 2.4593734741210938, "learning_rate": 1.3735821334116956e-05, "loss": 0.043494491577148436, "step": 18950 }, { "epoch": 16.507384882710685, "grad_norm": 0.0002031345502473414, "learning_rate": 1.3691742580076404e-05, "loss": 0.027528271675109864, "step": 19000 }, { "epoch": 16.507384882710685, "eval_cer": 0.10440483285474346, "eval_loss": 0.9773461818695068, "eval_runtime": 35.5819, "eval_samples_per_second": 28.048, "eval_steps_per_second": 14.024, "eval_wer": 0.3212174940898345, "step": 19000 }, { "epoch": 16.550825369244137, "grad_norm": 0.00017495028441771865, "learning_rate": 1.3647663826035851e-05, "loss": 0.06181173324584961, "step": 19050 }, { "epoch": 16.594265855777586, "grad_norm": 0.00031455489806830883, "learning_rate": 1.3603585071995299e-05, "loss": 0.046858110427856446, "step": 19100 }, { "epoch": 16.637706342311034, "grad_norm": 0.03978965803980827, "learning_rate": 1.3559506317954746e-05, "loss": 0.05867977142333984, "step": 19150 }, { "epoch": 16.681146828844483, "grad_norm": 0.00650749821215868, "learning_rate": 1.3515427563914194e-05, "loss": 0.06390885829925537, "step": 19200 }, { "epoch": 16.72458731537793, "grad_norm": 0.002027066657319665, "learning_rate": 1.3471348809873641e-05, "loss": 0.0747562313079834, "step": 19250 }, { "epoch": 16.76802780191138, "grad_norm": 0.00012768770102411509, "learning_rate": 1.3427270055833089e-05, "loss": 0.0417702579498291, "step": 19300 }, { "epoch": 16.811468288444832, "grad_norm": 4.3758605897892267e-05, "learning_rate": 1.3383191301792536e-05, "loss": 0.043452243804931644, "step": 19350 }, { "epoch": 16.85490877497828, "grad_norm": 0.009404808282852173, "learning_rate": 1.3339112547751984e-05, "loss": 0.07918959617614746, "step": 19400 }, { "epoch": 16.89834926151173, "grad_norm": 0.003255483927205205, "learning_rate": 1.3295033793711431e-05, "loss": 0.031140968799591065, "step": 19450 }, { "epoch": 16.941789748045178, "grad_norm": 0.046869829297065735, "learning_rate": 1.3250955039670879e-05, "loss": 0.053838644027709964, "step": 19500 }, { "epoch": 16.941789748045178, "eval_cer": 0.10142493362951725, "eval_loss": 1.045753836631775, "eval_runtime": 35.7261, "eval_samples_per_second": 27.935, "eval_steps_per_second": 13.967, "eval_wer": 0.3141252955082742, "step": 19500 }, { "epoch": 16.985230234578626, "grad_norm": 0.0014443215914070606, "learning_rate": 1.3206876285630326e-05, "loss": 0.055178966522216794, "step": 19550 }, { "epoch": 17.028670721112075, "grad_norm": 0.0004687681212089956, "learning_rate": 1.3162797531589774e-05, "loss": 0.09418526649475098, "step": 19600 }, { "epoch": 17.072111207645527, "grad_norm": 0.0004573004553094506, "learning_rate": 1.3118718777549221e-05, "loss": 0.026365480422973632, "step": 19650 }, { "epoch": 17.115551694178976, "grad_norm": 0.0036469711922109127, "learning_rate": 1.3074640023508669e-05, "loss": 0.058814377784729005, "step": 19700 }, { "epoch": 17.158992180712424, "grad_norm": 0.00524592399597168, "learning_rate": 1.3030561269468116e-05, "loss": 0.04088939189910889, "step": 19750 }, { "epoch": 17.202432667245873, "grad_norm": 0.00013877540186513215, "learning_rate": 1.2986482515427564e-05, "loss": 0.06733872890472412, "step": 19800 }, { "epoch": 17.24587315377932, "grad_norm": 0.04638398066163063, "learning_rate": 1.2942403761387011e-05, "loss": 0.02715529441833496, "step": 19850 }, { "epoch": 17.28931364031277, "grad_norm": 0.0002255926956422627, "learning_rate": 1.2898325007346459e-05, "loss": 0.024372515678405763, "step": 19900 }, { "epoch": 17.332754126846222, "grad_norm": 0.0001305036712437868, "learning_rate": 1.2854246253305906e-05, "loss": 0.05264826774597168, "step": 19950 }, { "epoch": 17.37619461337967, "grad_norm": 0.012165222316980362, "learning_rate": 1.2810167499265354e-05, "loss": 0.022559099197387696, "step": 20000 }, { "epoch": 17.37619461337967, "eval_cer": 0.10581351248848675, "eval_loss": 0.9943767786026001, "eval_runtime": 35.1598, "eval_samples_per_second": 28.385, "eval_steps_per_second": 14.192, "eval_wer": 0.3271276595744681, "step": 20000 }, { "epoch": 17.41963509991312, "grad_norm": 0.2545449733734131, "learning_rate": 1.2766088745224801e-05, "loss": 0.02598097801208496, "step": 20050 }, { "epoch": 17.463075586446568, "grad_norm": 0.5349053144454956, "learning_rate": 1.2722009991184249e-05, "loss": 0.02631650447845459, "step": 20100 }, { "epoch": 17.506516072980016, "grad_norm": 0.001936123939231038, "learning_rate": 1.2677931237143697e-05, "loss": 0.021945018768310547, "step": 20150 }, { "epoch": 17.54995655951347, "grad_norm": 0.000843520334456116, "learning_rate": 1.2633852483103146e-05, "loss": 0.09685382843017579, "step": 20200 }, { "epoch": 17.593397046046917, "grad_norm": 0.0006347526214085519, "learning_rate": 1.2589773729062593e-05, "loss": 0.07540733814239502, "step": 20250 }, { "epoch": 17.636837532580365, "grad_norm": 0.00029396990430541337, "learning_rate": 1.254569497502204e-05, "loss": 0.04331284999847412, "step": 20300 }, { "epoch": 17.680278019113814, "grad_norm": 0.0012669226853176951, "learning_rate": 1.2501616220981488e-05, "loss": 0.05464168548583984, "step": 20350 }, { "epoch": 17.723718505647263, "grad_norm": 8.315537706948817e-05, "learning_rate": 1.2457537466940936e-05, "loss": 0.043418560028076175, "step": 20400 }, { "epoch": 17.76715899218071, "grad_norm": 0.014166179113090038, "learning_rate": 1.2413458712900383e-05, "loss": 0.057585406303405764, "step": 20450 }, { "epoch": 17.810599478714163, "grad_norm": 0.0003503711777739227, "learning_rate": 1.236937995885983e-05, "loss": 0.06322105884552003, "step": 20500 }, { "epoch": 17.810599478714163, "eval_cer": 0.09947445413664192, "eval_loss": 0.9832805395126343, "eval_runtime": 35.3191, "eval_samples_per_second": 28.257, "eval_steps_per_second": 14.128, "eval_wer": 0.3076241134751773, "step": 20500 }, { "epoch": 17.854039965247612, "grad_norm": 0.00030440345290116966, "learning_rate": 1.2325301204819278e-05, "loss": 0.060886926651000976, "step": 20550 }, { "epoch": 17.89748045178106, "grad_norm": 0.007375710643827915, "learning_rate": 1.2281222450778726e-05, "loss": 0.04395482540130615, "step": 20600 }, { "epoch": 17.94092093831451, "grad_norm": 0.0019175054039806128, "learning_rate": 1.2237143696738172e-05, "loss": 0.023046765327453613, "step": 20650 }, { "epoch": 17.984361424847958, "grad_norm": 0.0014469270827248693, "learning_rate": 1.219306494269762e-05, "loss": 0.041912388801574704, "step": 20700 }, { "epoch": 18.027801911381406, "grad_norm": 0.008292295038700104, "learning_rate": 1.2148986188657067e-05, "loss": 0.05653272151947022, "step": 20750 }, { "epoch": 18.071242397914858, "grad_norm": 0.0011951219057664275, "learning_rate": 1.2104907434616514e-05, "loss": 0.03046605587005615, "step": 20800 }, { "epoch": 18.114682884448307, "grad_norm": 0.004597791470587254, "learning_rate": 1.2060828680575962e-05, "loss": 0.034540703296661375, "step": 20850 }, { "epoch": 18.158123370981755, "grad_norm": 0.0003544765349943191, "learning_rate": 1.201674992653541e-05, "loss": 0.015487746000289918, "step": 20900 }, { "epoch": 18.201563857515204, "grad_norm": 0.001666396390646696, "learning_rate": 1.1972671172494857e-05, "loss": 0.028139712810516356, "step": 20950 }, { "epoch": 18.245004344048652, "grad_norm": 0.4877508282661438, "learning_rate": 1.1928592418454304e-05, "loss": 0.045163874626159665, "step": 21000 }, { "epoch": 18.245004344048652, "eval_cer": 0.10554261255892074, "eval_loss": 1.0116287469863892, "eval_runtime": 35.5341, "eval_samples_per_second": 28.086, "eval_steps_per_second": 14.043, "eval_wer": 0.3182624113475177, "step": 21000 }, { "epoch": 18.2884448305821, "grad_norm": 4.470763451536186e-05, "learning_rate": 1.1884513664413752e-05, "loss": 0.023325955867767333, "step": 21050 }, { "epoch": 18.331885317115553, "grad_norm": 0.000781964510679245, "learning_rate": 1.18404349103732e-05, "loss": 0.025803213119506837, "step": 21100 }, { "epoch": 18.375325803649, "grad_norm": 19.389554977416992, "learning_rate": 1.1796356156332649e-05, "loss": 0.030954115390777588, "step": 21150 }, { "epoch": 18.41876629018245, "grad_norm": 7.068664126563817e-05, "learning_rate": 1.1752277402292096e-05, "loss": 0.04330010414123535, "step": 21200 }, { "epoch": 18.4622067767159, "grad_norm": 0.00017082026170101017, "learning_rate": 1.1708198648251544e-05, "loss": 0.04214978694915771, "step": 21250 }, { "epoch": 18.505647263249347, "grad_norm": 0.0019248217577114701, "learning_rate": 1.1664119894210991e-05, "loss": 0.021218812465667723, "step": 21300 }, { "epoch": 18.549087749782796, "grad_norm": 0.2257125824689865, "learning_rate": 1.1620041140170439e-05, "loss": 0.04343417644500733, "step": 21350 }, { "epoch": 18.592528236316248, "grad_norm": 0.004617325030267239, "learning_rate": 1.1575962386129886e-05, "loss": 0.031105964183807372, "step": 21400 }, { "epoch": 18.635968722849697, "grad_norm": 6.712381582474336e-05, "learning_rate": 1.1531883632089334e-05, "loss": 0.022620809078216553, "step": 21450 }, { "epoch": 18.679409209383145, "grad_norm": 11.555673599243164, "learning_rate": 1.1487804878048781e-05, "loss": 0.051412558555603026, "step": 21500 }, { "epoch": 18.679409209383145, "eval_cer": 0.09855339437611746, "eval_loss": 1.0771058797836304, "eval_runtime": 35.2368, "eval_samples_per_second": 28.323, "eval_steps_per_second": 14.161, "eval_wer": 0.3108747044917258, "step": 21500 }, { "epoch": 18.722849695916594, "grad_norm": 0.001295646419748664, "learning_rate": 1.1443726124008229e-05, "loss": 0.0214068603515625, "step": 21550 }, { "epoch": 18.766290182450042, "grad_norm": 0.022851450368762016, "learning_rate": 1.1399647369967676e-05, "loss": 0.03087963581085205, "step": 21600 }, { "epoch": 18.80973066898349, "grad_norm": 0.0012702015228569508, "learning_rate": 1.1355568615927124e-05, "loss": 0.046400198936462404, "step": 21650 }, { "epoch": 18.853171155516943, "grad_norm": 0.0037327518220990896, "learning_rate": 1.1311489861886571e-05, "loss": 0.024634184837341307, "step": 21700 }, { "epoch": 18.89661164205039, "grad_norm": 6.548186502186581e-05, "learning_rate": 1.1267411107846019e-05, "loss": 0.03668407678604126, "step": 21750 }, { "epoch": 18.94005212858384, "grad_norm": 3.877016544342041, "learning_rate": 1.1223332353805466e-05, "loss": 0.02262542963027954, "step": 21800 }, { "epoch": 18.98349261511729, "grad_norm": 0.0001716541883070022, "learning_rate": 1.1179253599764914e-05, "loss": 0.05151228427886963, "step": 21850 }, { "epoch": 19.026933101650737, "grad_norm": 0.0001898752962006256, "learning_rate": 1.1135174845724361e-05, "loss": 0.0242765212059021, "step": 21900 }, { "epoch": 19.07037358818419, "grad_norm": 0.10014080256223679, "learning_rate": 1.1091096091683809e-05, "loss": 0.037711410522460936, "step": 21950 }, { "epoch": 19.113814074717638, "grad_norm": 0.00022042197815608233, "learning_rate": 1.1047017337643256e-05, "loss": 0.025053555965423583, "step": 22000 }, { "epoch": 19.113814074717638, "eval_cer": 0.09801159451698542, "eval_loss": 1.0469719171524048, "eval_runtime": 35.8341, "eval_samples_per_second": 27.851, "eval_steps_per_second": 13.925, "eval_wer": 0.30112293144208035, "step": 22000 } ], "logging_steps": 50, "max_steps": 34530, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.810244226353391e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }