diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,20033 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.29607698001480387, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00014803849000740192, + "grad_norm": 26.65081787109375, + "learning_rate": 0.0, + "loss": 2.0391, + "step": 1, + "train/speech_entropy": 2.65710903199251, + "train/text_entropy": 1.5672304041245404, + "train/token_acc": 0.3325892857142857 + }, + { + "epoch": 0.00029607698001480384, + "grad_norm": 28.154951095581055, + "learning_rate": 1.6929380759878146e-06, + "loss": 1.5234, + "step": 2, + "train/speech_entropy": 2.509311460331684, + "train/text_entropy": 0.8891578342603601, + "train/token_acc": 0.37912813738441214 + }, + { + "epoch": 0.00044411547002220575, + "grad_norm": 22.413196563720703, + "learning_rate": 2.68324336648371e-06, + "loss": 1.2891, + "step": 3, + "train/speech_entropy": 2.4842583454830547, + "train/text_entropy": 0.5761710435916216, + "train/token_acc": 0.38683602771362585 + }, + { + "epoch": 0.0005921539600296077, + "grad_norm": 26.925764083862305, + "learning_rate": 3.385876151975629e-06, + "loss": 1.4219, + "step": 4, + "train/speech_entropy": 2.8765005223891316, + "train/text_entropy": 1.4069841849703748, + "train/token_acc": 0.3212669683257919 + }, + { + "epoch": 0.0007401924500370096, + "grad_norm": 17.01216697692871, + "learning_rate": 3.930880481540663e-06, + "loss": 0.8477, + "step": 5, + "train/speech_entropy": 2.642801180752841, + "train/text_entropy": 0.27560723193584047, + "train/token_acc": 0.402187120291616 + }, + { + "epoch": 0.0008882309400444115, + "grad_norm": 17.91358184814453, + "learning_rate": 4.376181442471524e-06, + "loss": 1.7266, + "step": 6, + "train/speech_entropy": 2.9952047720927855, + "train/text_entropy": 0.8048725771100333, + "train/token_acc": 0.34206586826347307 + }, + { + "epoch": 0.0010362694300518134, + "grad_norm": 26.848417282104492, + "learning_rate": 4.7526780403631215e-06, + "loss": 1.4883, + "step": 7, + "train/speech_entropy": 2.7591665074445197, + "train/text_entropy": 0.9750215530395507, + "train/token_acc": 0.3240963855421687 + }, + { + "epoch": 0.0011843079200592153, + "grad_norm": 25.967880249023438, + "learning_rate": 5.078814227963444e-06, + "loss": 1.6172, + "step": 8, + "train/speech_entropy": 2.6645294273523588, + "train/text_entropy": 0.8700172787620908, + "train/token_acc": 0.3724832214765101 + }, + { + "epoch": 0.0013323464100666173, + "grad_norm": 38.743438720703125, + "learning_rate": 5.36648673296742e-06, + "loss": 0.7227, + "step": 9, + "train/speech_entropy": 3.140289939129291, + "train/text_entropy": 0.9065519332885742, + "train/token_acc": 0.3218884120171674 + }, + { + "epoch": 0.0014803849000740192, + "grad_norm": 24.221752166748047, + "learning_rate": 5.623818557528479e-06, + "loss": 1.6445, + "step": 10, + "train/speech_entropy": 2.7034304713510138, + "train/text_entropy": 1.5318762923086156, + "train/token_acc": 0.32238805970149254 + }, + { + "epoch": 0.001628423390081421, + "grad_norm": 25.87760353088379, + "learning_rate": 5.856603508467238e-06, + "loss": 1.0801, + "step": 11, + "train/speech_entropy": 2.6504871534264605, + "train/text_entropy": 0.7164372411267511, + "train/token_acc": 0.3758241758241758 + }, + { + "epoch": 0.001776461880088823, + "grad_norm": 25.32552146911621, + "learning_rate": 6.069119518459339e-06, + "loss": 1.2695, + "step": 12, + "train/speech_entropy": 3.42159615054151, + "train/text_entropy": 1.4786524941735233, + "train/token_acc": 0.3212520593080725 + }, + { + "epoch": 0.001924500370096225, + "grad_norm": 21.262847900390625, + "learning_rate": 6.264615296738672e-06, + "loss": 1.1055, + "step": 13, + "train/speech_entropy": 2.737188663122789, + "train/text_entropy": 0.6549776380307206, + "train/token_acc": 0.3870056497175141 + }, + { + "epoch": 0.002072538860103627, + "grad_norm": 17.222091674804688, + "learning_rate": 6.445616116350935e-06, + "loss": 1.5117, + "step": 14, + "train/speech_entropy": 2.7686665944923643, + "train/text_entropy": 1.2393510209753158, + "train/token_acc": 0.3455223880597015 + }, + { + "epoch": 0.0022205773501110288, + "grad_norm": 25.269365310668945, + "learning_rate": 6.614123848024373e-06, + "loss": 1.6504, + "step": 15, + "train/speech_entropy": 2.836352897507137, + "train/text_entropy": 1.5188847526175078, + "train/token_acc": 0.2925170068027211 + }, + { + "epoch": 0.0023686158401184307, + "grad_norm": 25.083860397338867, + "learning_rate": 6.771752303951258e-06, + "loss": 2.2656, + "step": 16, + "train/speech_entropy": 3.1907329830513183, + "train/text_entropy": 2.0436036028180804, + "train/token_acc": 0.25711481844946027 + }, + { + "epoch": 0.0025166543301258326, + "grad_norm": 13.803647994995117, + "learning_rate": 6.9198214781380354e-06, + "loss": 1.1543, + "step": 17, + "train/speech_entropy": 2.595293142611428, + "train/text_entropy": 1.1458764926407687, + "train/token_acc": 0.3808764940239044 + }, + { + "epoch": 0.0026646928201332345, + "grad_norm": 10.412036895751953, + "learning_rate": 7.059424808955233e-06, + "loss": 0.5352, + "step": 18, + "train/speech_entropy": 2.562082544962565, + "train/text_entropy": 0.5133582358418206, + "train/token_acc": 0.4183927091963546 + }, + { + "epoch": 0.0028127313101406364, + "grad_norm": 24.278940200805664, + "learning_rate": 7.191478231544885e-06, + "loss": 1.5703, + "step": 19, + "train/speech_entropy": 3.019741520091364, + "train/text_entropy": 1.585971862312377, + "train/token_acc": 0.3509036144578313 + }, + { + "epoch": 0.0029607698001480384, + "grad_norm": 14.470947265625, + "learning_rate": 7.316756633516292e-06, + "loss": 1.7539, + "step": 20, + "train/speech_entropy": 2.9488778841697565, + "train/text_entropy": 1.7658906159577545, + "train/token_acc": 0.3201149425287356 + }, + { + "epoch": 0.0031088082901554403, + "grad_norm": 18.676969528198242, + "learning_rate": 7.435921406846831e-06, + "loss": 2.0742, + "step": 21, + "train/speech_entropy": 3.042611497124863, + "train/text_entropy": 2.122990045021838, + "train/token_acc": 0.3324764353041988 + }, + { + "epoch": 0.003256846780162842, + "grad_norm": 15.74759578704834, + "learning_rate": 7.549541584455053e-06, + "loss": 1.6641, + "step": 22, + "train/speech_entropy": 3.030915153592211, + "train/text_entropy": 1.7436262022774174, + "train/token_acc": 0.34008683068017365 + }, + { + "epoch": 0.003404885270170244, + "grad_norm": 16.101818084716797, + "learning_rate": 7.658110274498834e-06, + "loss": 0.8145, + "step": 23, + "train/speech_entropy": 2.6174940045582367, + "train/text_entropy": 0.7757636392192476, + "train/token_acc": 0.40543735224586286 + }, + { + "epoch": 0.003552923760177646, + "grad_norm": 22.107669830322266, + "learning_rate": 7.762057594447154e-06, + "loss": 1.4766, + "step": 24, + "train/speech_entropy": 2.8636434107650945, + "train/text_entropy": 1.8280875665678396, + "train/token_acc": 0.33290978398983484 + }, + { + "epoch": 0.003700962250185048, + "grad_norm": 17.85451316833496, + "learning_rate": 7.861760963081327e-06, + "loss": 1.1953, + "step": 25, + "train/speech_entropy": 2.648275362667485, + "train/text_entropy": 1.1501866887124736, + "train/token_acc": 0.4015069967707212 + }, + { + "epoch": 0.00384900074019245, + "grad_norm": 23.26652717590332, + "learning_rate": 7.957553372726487e-06, + "loss": 1.4043, + "step": 26, + "train/speech_entropy": 2.7919408286490093, + "train/text_entropy": 1.863157884790263, + "train/token_acc": 0.3130841121495327 + }, + { + "epoch": 0.003997039230199852, + "grad_norm": 15.167733192443848, + "learning_rate": 8.049730099451128e-06, + "loss": 1.3945, + "step": 27, + "train/speech_entropy": 3.471995379805757, + "train/text_entropy": 1.5624275051817602, + "train/token_acc": 0.2885003362474781 + }, + { + "epoch": 0.004145077720207254, + "grad_norm": 20.175907135009766, + "learning_rate": 8.13855419233875e-06, + "loss": 1.2539, + "step": 28, + "train/speech_entropy": 2.951905517578125, + "train/text_entropy": 1.493891227145155, + "train/token_acc": 0.3427065026362039 + }, + { + "epoch": 0.004293116210214656, + "grad_norm": 19.319175720214844, + "learning_rate": 8.224260999076641e-06, + "loss": 1.3867, + "step": 29, + "train/speech_entropy": 2.5832042833314324, + "train/text_entropy": 1.386241310521176, + "train/token_acc": 0.35125448028673834 + }, + { + "epoch": 0.0044411547002220575, + "grad_norm": 19.964929580688477, + "learning_rate": 8.307061924012188e-06, + "loss": 1.2598, + "step": 30, + "train/speech_entropy": 3.206294768779691, + "train/text_entropy": 0.9954632048911237, + "train/token_acc": 0.3658310120705664 + }, + { + "epoch": 0.00458919319022946, + "grad_norm": 18.976987838745117, + "learning_rate": 8.387147569772287e-06, + "loss": 0.7676, + "step": 31, + "train/speech_entropy": 2.7764809718825343, + "train/text_entropy": 0.7277915522737323, + "train/token_acc": 0.3865546218487395 + }, + { + "epoch": 0.004737231680236861, + "grad_norm": 9.006917953491211, + "learning_rate": 8.464690379939073e-06, + "loss": 0.4092, + "step": 32, + "train/speech_entropy": 2.634807496083443, + "train/text_entropy": 0.5016499306391744, + "train/token_acc": 0.4470954356846473 + }, + { + "epoch": 0.004885270170244264, + "grad_norm": 16.353656768798828, + "learning_rate": 8.539846874950946e-06, + "loss": 1.3945, + "step": 33, + "train/speech_entropy": 3.385729663939191, + "train/text_entropy": 1.5693403977614182, + "train/token_acc": 0.2957486136783734 + }, + { + "epoch": 0.005033308660251665, + "grad_norm": 14.289937019348145, + "learning_rate": 8.612759554125852e-06, + "loss": 1.042, + "step": 34, + "train/speech_entropy": 2.9053532476721595, + "train/text_entropy": 1.3029769377274947, + "train/token_acc": 0.3564614050303556 + }, + { + "epoch": 0.0051813471502590676, + "grad_norm": 9.665493965148926, + "learning_rate": 8.683558521903785e-06, + "loss": 0.5332, + "step": 35, + "train/speech_entropy": 2.8228570037573313, + "train/text_entropy": 0.5121821118639661, + "train/token_acc": 0.4082934609250399 + }, + { + "epoch": 0.005329385640266469, + "grad_norm": 17.88351058959961, + "learning_rate": 8.752362884943047e-06, + "loss": 1.0127, + "step": 36, + "train/speech_entropy": 2.659584885651845, + "train/text_entropy": 1.0727695837253477, + "train/token_acc": 0.3952662721893491 + }, + { + "epoch": 0.005477424130273871, + "grad_norm": 18.988882064819336, + "learning_rate": 8.81928195775612e-06, + "loss": 0.8008, + "step": 37, + "train/speech_entropy": 2.8139575292838717, + "train/text_entropy": 0.7298374065774025, + "train/token_acc": 0.37662337662337664 + }, + { + "epoch": 0.005625462620281273, + "grad_norm": 12.04275131225586, + "learning_rate": 8.884416307532699e-06, + "loss": 0.6172, + "step": 38, + "train/speech_entropy": 3.3558692124310663, + "train/text_entropy": 0.5485218373974364, + "train/token_acc": 0.35271687321258344 + }, + { + "epoch": 0.005773501110288675, + "grad_norm": 16.62215232849121, + "learning_rate": 8.947858663222381e-06, + "loss": 0.9922, + "step": 39, + "train/speech_entropy": 2.794347817069905, + "train/text_entropy": 0.9162953954164673, + "train/token_acc": 0.37163814180929094 + }, + { + "epoch": 0.005921539600296077, + "grad_norm": 19.8236083984375, + "learning_rate": 9.009694709504107e-06, + "loss": 1.6055, + "step": 40, + "train/speech_entropy": 3.0251731068966854, + "train/text_entropy": 1.6424304417201452, + "train/token_acc": 0.2994871794871795 + }, + { + "epoch": 0.006069578090303479, + "grad_norm": 13.29216480255127, + "learning_rate": 9.070003782702798e-06, + "loss": 0.9219, + "step": 41, + "train/speech_entropy": 3.3458943194653616, + "train/text_entropy": 0.9146737598237538, + "train/token_acc": 0.3374888691006233 + }, + { + "epoch": 0.0062176165803108805, + "grad_norm": 19.51304817199707, + "learning_rate": 9.128859482834646e-06, + "loss": 1.4297, + "step": 42, + "train/speech_entropy": 3.018309841266257, + "train/text_entropy": 1.4148128255208334, + "train/token_acc": 0.33847980997624705 + }, + { + "epoch": 0.006365655070318283, + "grad_norm": 11.696044921875, + "learning_rate": 9.18633021362586e-06, + "loss": 0.7959, + "step": 43, + "train/speech_entropy": 3.2625145679900163, + "train/text_entropy": 0.7065791242262897, + "train/token_acc": 0.37250554323725055 + }, + { + "epoch": 0.006513693560325684, + "grad_norm": 14.52393627166748, + "learning_rate": 9.242479660442866e-06, + "loss": 1.085, + "step": 44, + "train/speech_entropy": 3.347622103491294, + "train/text_entropy": 1.056294842301128, + "train/token_acc": 0.31722880583409296 + }, + { + "epoch": 0.006661732050333087, + "grad_norm": 25.041366577148438, + "learning_rate": 9.297367214508082e-06, + "loss": 1.5508, + "step": 45, + "train/speech_entropy": 3.353548888375643, + "train/text_entropy": 1.5762575502748843, + "train/token_acc": 0.2913472070098576 + }, + { + "epoch": 0.006809770540340488, + "grad_norm": 21.935256958007812, + "learning_rate": 9.351048350486649e-06, + "loss": 1.5352, + "step": 46, + "train/speech_entropy": 3.2695098722041562, + "train/text_entropy": 1.4286928245489545, + "train/token_acc": 0.2901833872707659 + }, + { + "epoch": 0.0069578090303478906, + "grad_norm": 11.520458221435547, + "learning_rate": 9.403574963462505e-06, + "loss": 0.8477, + "step": 47, + "train/speech_entropy": 3.512798221699129, + "train/text_entropy": 0.9439076050199737, + "train/token_acc": 0.3585139318885449 + }, + { + "epoch": 0.007105847520355292, + "grad_norm": 13.773293495178223, + "learning_rate": 9.454995670434968e-06, + "loss": 1.0625, + "step": 48, + "train/speech_entropy": 2.9994179315289617, + "train/text_entropy": 0.9187587040407649, + "train/token_acc": 0.36921850079744817 + }, + { + "epoch": 0.007253886010362694, + "grad_norm": 22.352094650268555, + "learning_rate": 9.505356080726243e-06, + "loss": 1.207, + "step": 49, + "train/speech_entropy": 2.964687695494355, + "train/text_entropy": 1.2857474069746713, + "train/token_acc": 0.3333333333333333 + }, + { + "epoch": 0.007401924500370096, + "grad_norm": 14.959973335266113, + "learning_rate": 9.554699039069141e-06, + "loss": 0.543, + "step": 50, + "train/speech_entropy": 2.716988599937381, + "train/text_entropy": 0.4954503808317885, + "train/token_acc": 0.3842159916926272 + }, + { + "epoch": 0.007549962990377498, + "grad_norm": 17.8276424407959, + "learning_rate": 9.603064844621745e-06, + "loss": 1.8047, + "step": 51, + "train/speech_entropy": 3.6818985200264085, + "train/text_entropy": 1.8400897434779575, + "train/token_acc": 0.27440147329650094 + }, + { + "epoch": 0.0076980014803849, + "grad_norm": 20.50669288635254, + "learning_rate": 9.650491448714302e-06, + "loss": 1.8301, + "step": 52, + "train/speech_entropy": 2.8732857582225773, + "train/text_entropy": 1.4886649899366424, + "train/token_acc": 0.3808016877637131 + }, + { + "epoch": 0.007846039970392302, + "grad_norm": 7.677231311798096, + "learning_rate": 9.697014633759472e-06, + "loss": 0.3301, + "step": 53, + "train/speech_entropy": 2.7634819878472223, + "train/text_entropy": 0.3614090896966889, + "train/token_acc": 0.4377646062658764 + }, + { + "epoch": 0.007994078460399704, + "grad_norm": 20.992244720458984, + "learning_rate": 9.742668175438943e-06, + "loss": 1.8398, + "step": 54, + "train/speech_entropy": 3.7400206019158677, + "train/text_entropy": 1.7114155578613282, + "train/token_acc": 0.2629416598192276 + }, + { + "epoch": 0.008142116950407105, + "grad_norm": 17.355562210083008, + "learning_rate": 9.787483990007902e-06, + "loss": 0.8281, + "step": 55, + "train/speech_entropy": 3.199016871077291, + "train/text_entropy": 0.8002722137852719, + "train/token_acc": 0.3541666666666667 + }, + { + "epoch": 0.008290155440414507, + "grad_norm": 17.179462432861328, + "learning_rate": 9.831492268326567e-06, + "loss": 1.4961, + "step": 56, + "train/speech_entropy": 3.520965058924788, + "train/text_entropy": 1.1720839309692384, + "train/token_acc": 0.30230414746543777 + }, + { + "epoch": 0.00843819393042191, + "grad_norm": 22.513553619384766, + "learning_rate": 9.874721598028594e-06, + "loss": 2.0469, + "step": 57, + "train/speech_entropy": 3.588168526785714, + "train/text_entropy": 1.8304866851969837, + "train/token_acc": 0.2966101694915254 + }, + { + "epoch": 0.008586232420429312, + "grad_norm": 20.280309677124023, + "learning_rate": 9.917199075064456e-06, + "loss": 1.5625, + "step": 58, + "train/speech_entropy": 3.61470468255538, + "train/text_entropy": 1.6494887312488442, + "train/token_acc": 0.28884254431699685 + }, + { + "epoch": 0.008734270910436713, + "grad_norm": 24.084901809692383, + "learning_rate": 9.958950405709727e-06, + "loss": 1.6133, + "step": 59, + "train/speech_entropy": 3.7362595391027704, + "train/text_entropy": 1.4983285993537647, + "train/token_acc": 0.28019323671497587 + }, + { + "epoch": 0.008882309400444115, + "grad_norm": 20.420503616333008, + "learning_rate": 1e-05, + "loss": 1.5, + "step": 60, + "train/speech_entropy": 4.075035654554619, + "train/text_entropy": 1.6751991840119058, + "train/token_acc": 0.3 + }, + { + "epoch": 0.009030347890451517, + "grad_norm": 16.520116806030273, + "learning_rate": 9.999994099637303e-06, + "loss": 1.0156, + "step": 61, + "train/speech_entropy": 3.6208316363917694, + "train/text_entropy": 1.1904311810851729, + "train/token_acc": 0.34965034965034963 + }, + { + "epoch": 0.00917838638045892, + "grad_norm": 18.00417709350586, + "learning_rate": 9.999976398564682e-06, + "loss": 1.4492, + "step": 62, + "train/speech_entropy": 4.237001507231186, + "train/text_entropy": 1.6567105925484988, + "train/token_acc": 0.2704225352112676 + }, + { + "epoch": 0.00932642487046632, + "grad_norm": 19.235883712768555, + "learning_rate": 9.999946896828559e-06, + "loss": 0.5283, + "step": 63, + "train/speech_entropy": 3.1783508773922, + "train/text_entropy": 0.7796996888660249, + "train/token_acc": 0.346072186836518 + }, + { + "epoch": 0.009474463360473723, + "grad_norm": 14.355302810668945, + "learning_rate": 9.999905594506296e-06, + "loss": 1.2109, + "step": 64, + "train/speech_entropy": 3.8676749051472785, + "train/text_entropy": 1.2042918999989827, + "train/token_acc": 0.31776913099870296 + }, + { + "epoch": 0.009622501850481125, + "grad_norm": 11.437870979309082, + "learning_rate": 9.999852491706205e-06, + "loss": 0.8047, + "step": 65, + "train/speech_entropy": 3.677659194356894, + "train/text_entropy": 0.8693653693565956, + "train/token_acc": 0.3573825503355705 + }, + { + "epoch": 0.009770540340488527, + "grad_norm": 15.579059600830078, + "learning_rate": 9.99978758856754e-06, + "loss": 1.3984, + "step": 66, + "train/speech_entropy": 3.761014094118212, + "train/text_entropy": 1.533369980849229, + "train/token_acc": 0.3032311516155758 + }, + { + "epoch": 0.00991857883049593, + "grad_norm": 13.356656074523926, + "learning_rate": 9.999710885260506e-06, + "loss": 0.8691, + "step": 67, + "train/speech_entropy": 3.441499982561384, + "train/text_entropy": 0.9067702130573552, + "train/token_acc": 0.3509075194468453 + }, + { + "epoch": 0.01006661732050333, + "grad_norm": 14.831608772277832, + "learning_rate": 9.999622381986245e-06, + "loss": 1.1465, + "step": 68, + "train/speech_entropy": 3.526116503364718, + "train/text_entropy": 0.9617915520301232, + "train/token_acc": 0.349478390461997 + }, + { + "epoch": 0.010214655810510733, + "grad_norm": 26.15338706970215, + "learning_rate": 9.999522078976847e-06, + "loss": 2.2188, + "step": 69, + "train/speech_entropy": 2.8983091495428055, + "train/text_entropy": 2.095130566729615, + "train/token_acc": 0.3169469598965071 + }, + { + "epoch": 0.010362694300518135, + "grad_norm": 16.534503936767578, + "learning_rate": 9.999409976495346e-06, + "loss": 1.1787, + "step": 70, + "train/speech_entropy": 3.7839251111677448, + "train/text_entropy": 1.0595906217333297, + "train/token_acc": 0.3157894736842105 + }, + { + "epoch": 0.010510732790525537, + "grad_norm": 18.609149932861328, + "learning_rate": 9.999286074835716e-06, + "loss": 1.5156, + "step": 71, + "train/speech_entropy": 3.4945174162836143, + "train/text_entropy": 1.3522557026631123, + "train/token_acc": 0.32041102288650164 + }, + { + "epoch": 0.010658771280532938, + "grad_norm": 19.785852432250977, + "learning_rate": 9.999150374322878e-06, + "loss": 1.7227, + "step": 72, + "train/speech_entropy": 4.267144097222222, + "train/text_entropy": 1.857120733994704, + "train/token_acc": 0.2544333076329992 + }, + { + "epoch": 0.01080680977054034, + "grad_norm": 19.444740295410156, + "learning_rate": 9.999002875312686e-06, + "loss": 1.4375, + "step": 73, + "train/speech_entropy": 4.382020874023437, + "train/text_entropy": 1.6833521842956543, + "train/token_acc": 0.25 + }, + { + "epoch": 0.010954848260547743, + "grad_norm": 19.18793296813965, + "learning_rate": 9.998843578191943e-06, + "loss": 1.3594, + "step": 74, + "train/speech_entropy": 3.8276648412776897, + "train/text_entropy": 1.298198247359971, + "train/token_acc": 0.29782833505687695 + }, + { + "epoch": 0.011102886750555145, + "grad_norm": 21.52139663696289, + "learning_rate": 9.998672483378387e-06, + "loss": 1.6289, + "step": 75, + "train/speech_entropy": 4.15635775862069, + "train/text_entropy": 1.7597568821262668, + "train/token_acc": 0.27488151658767773 + }, + { + "epoch": 0.011250925240562546, + "grad_norm": 20.55527687072754, + "learning_rate": 9.998489591320691e-06, + "loss": 1.5586, + "step": 76, + "train/speech_entropy": 4.116953089617301, + "train/text_entropy": 1.2316962511111529, + "train/token_acc": 0.26595744680851063 + }, + { + "epoch": 0.011398963730569948, + "grad_norm": 27.19259262084961, + "learning_rate": 9.998294902498471e-06, + "loss": 1.0078, + "step": 77, + "train/speech_entropy": 4.015926223864659, + "train/text_entropy": 1.22780936697255, + "train/token_acc": 0.275049115913556 + }, + { + "epoch": 0.01154700222057735, + "grad_norm": 10.288987159729004, + "learning_rate": 9.998088417422275e-06, + "loss": 0.7305, + "step": 78, + "train/speech_entropy": 3.8515201754096196, + "train/text_entropy": 0.7700465308295356, + "train/token_acc": 0.3412029229904441 + }, + { + "epoch": 0.011695040710584753, + "grad_norm": 21.538204193115234, + "learning_rate": 9.997870136633585e-06, + "loss": 1.1758, + "step": 79, + "train/speech_entropy": 3.799452218061093, + "train/text_entropy": 1.2326283371239377, + "train/token_acc": 0.2828438948995363 + }, + { + "epoch": 0.011843079200592153, + "grad_norm": 18.910940170288086, + "learning_rate": 9.997640060704818e-06, + "loss": 1.5742, + "step": 80, + "train/speech_entropy": 4.43643844574396, + "train/text_entropy": 1.4963870729718889, + "train/token_acc": 0.24878836833602586 + }, + { + "epoch": 0.011991117690599556, + "grad_norm": 11.48192310333252, + "learning_rate": 9.99739819023932e-06, + "loss": 0.8418, + "step": 81, + "train/speech_entropy": 4.24639144165318, + "train/text_entropy": 0.8383301344493892, + "train/token_acc": 0.29671011793916824 + }, + { + "epoch": 0.012139156180606958, + "grad_norm": 19.161968231201172, + "learning_rate": 9.99714452587137e-06, + "loss": 1.4219, + "step": 82, + "train/speech_entropy": 4.389976083548965, + "train/text_entropy": 1.2987510638530029, + "train/token_acc": 0.2603734439834025 + }, + { + "epoch": 0.01228719467061436, + "grad_norm": 19.092405319213867, + "learning_rate": 9.996879068266173e-06, + "loss": 1.5, + "step": 83, + "train/speech_entropy": 4.30304261647403, + "train/text_entropy": 1.3504498615357035, + "train/token_acc": 0.2770083102493075 + }, + { + "epoch": 0.012435233160621761, + "grad_norm": 15.9556884765625, + "learning_rate": 9.996601818119858e-06, + "loss": 0.5176, + "step": 84, + "train/speech_entropy": 3.634000692721671, + "train/text_entropy": 0.4663086485588688, + "train/token_acc": 0.3486910994764398 + }, + { + "epoch": 0.012583271650629163, + "grad_norm": 16.814498901367188, + "learning_rate": 9.996312776159485e-06, + "loss": 1.5459, + "step": 85, + "train/speech_entropy": 4.151731773721717, + "train/text_entropy": 1.4486937596247746, + "train/token_acc": 0.2999279019466474 + }, + { + "epoch": 0.012731310140636566, + "grad_norm": 23.655147552490234, + "learning_rate": 9.996011943143032e-06, + "loss": 1.5195, + "step": 86, + "train/speech_entropy": 4.090528164690698, + "train/text_entropy": 1.4896155235751363, + "train/token_acc": 0.2665066026410564 + }, + { + "epoch": 0.012879348630643968, + "grad_norm": 15.538963317871094, + "learning_rate": 9.995699319859394e-06, + "loss": 1.002, + "step": 87, + "train/speech_entropy": 3.861784140969163, + "train/text_entropy": 0.92957015692019, + "train/token_acc": 0.34651762682717113 + }, + { + "epoch": 0.013027387120651369, + "grad_norm": 19.186567306518555, + "learning_rate": 9.995374907128396e-06, + "loss": 1.2656, + "step": 88, + "train/speech_entropy": 4.251786591990894, + "train/text_entropy": 1.3636616613134482, + "train/token_acc": 0.2952586206896552 + }, + { + "epoch": 0.013175425610658771, + "grad_norm": 26.369543075561523, + "learning_rate": 9.995038705800766e-06, + "loss": 1.3789, + "step": 89, + "train/speech_entropy": 4.231524709493172, + "train/text_entropy": 1.248952386099533, + "train/token_acc": 0.3178294573643411 + }, + { + "epoch": 0.013323464100666173, + "grad_norm": 12.274277687072754, + "learning_rate": 9.994690716758159e-06, + "loss": 0.8359, + "step": 90, + "train/speech_entropy": 3.919299067283163, + "train/text_entropy": 0.8272540443821957, + "train/token_acc": 0.33513513513513515 + }, + { + "epoch": 0.013471502590673576, + "grad_norm": 25.338237762451172, + "learning_rate": 9.994330940913131e-06, + "loss": 1.582, + "step": 91, + "train/speech_entropy": 4.349732009582049, + "train/text_entropy": 1.361894524615744, + "train/token_acc": 0.25412960609911056 + }, + { + "epoch": 0.013619541080680976, + "grad_norm": 19.105175018310547, + "learning_rate": 9.993959379209155e-06, + "loss": 1.3398, + "step": 92, + "train/speech_entropy": 3.882232526665953, + "train/text_entropy": 1.07718402998788, + "train/token_acc": 0.30636363636363634 + }, + { + "epoch": 0.013767579570688379, + "grad_norm": 14.872505187988281, + "learning_rate": 9.993576032620606e-06, + "loss": 1.0586, + "step": 93, + "train/speech_entropy": 4.4615660107825414, + "train/text_entropy": 1.2327000011097302, + "train/token_acc": 0.27441077441077444 + }, + { + "epoch": 0.013915618060695781, + "grad_norm": 22.260454177856445, + "learning_rate": 9.993180902152767e-06, + "loss": 1.2617, + "step": 94, + "train/speech_entropy": 4.359050556764764, + "train/text_entropy": 1.3149899311981867, + "train/token_acc": 0.25234441602728047 + }, + { + "epoch": 0.014063656550703183, + "grad_norm": 18.354936599731445, + "learning_rate": 9.992773988841822e-06, + "loss": 1.0977, + "step": 95, + "train/speech_entropy": 4.167662111047196, + "train/text_entropy": 1.4984910692487443, + "train/token_acc": 0.2776243093922652 + }, + { + "epoch": 0.014211695040710584, + "grad_norm": 18.81583023071289, + "learning_rate": 9.992355293754853e-06, + "loss": 1.7305, + "step": 96, + "train/speech_entropy": 4.325864324853404, + "train/text_entropy": 1.907604395416734, + "train/token_acc": 0.2539831302717901 + }, + { + "epoch": 0.014359733530717986, + "grad_norm": 16.192136764526367, + "learning_rate": 9.991924817989838e-06, + "loss": 0.9268, + "step": 97, + "train/speech_entropy": 3.8693466712688576, + "train/text_entropy": 0.9716153654662318, + "train/token_acc": 0.32476319350473615 + }, + { + "epoch": 0.014507772020725389, + "grad_norm": 14.210833549499512, + "learning_rate": 9.991482562675654e-06, + "loss": 1.3086, + "step": 98, + "train/speech_entropy": 4.317142163160751, + "train/text_entropy": 1.4118251645468114, + "train/token_acc": 0.2897959183673469 + }, + { + "epoch": 0.014655810510732791, + "grad_norm": 10.692156791687012, + "learning_rate": 9.991028528972058e-06, + "loss": 0.6543, + "step": 99, + "train/speech_entropy": 3.607872513422357, + "train/text_entropy": 0.7005994529054876, + "train/token_acc": 0.3389084507042254 + }, + { + "epoch": 0.014803849000740192, + "grad_norm": 15.090703010559082, + "learning_rate": 9.990562718069703e-06, + "loss": 1.2285, + "step": 100, + "train/speech_entropy": 4.055386381789138, + "train/text_entropy": 1.1311342819877293, + "train/token_acc": 0.3028229255774166 + }, + { + "epoch": 0.014951887490747594, + "grad_norm": 19.24198341369629, + "learning_rate": 9.990085131190125e-06, + "loss": 1.3398, + "step": 101, + "train/speech_entropy": 4.099089880331332, + "train/text_entropy": 1.5902461503681384, + "train/token_acc": 0.3002610966057441 + }, + { + "epoch": 0.015099925980754996, + "grad_norm": 15.953693389892578, + "learning_rate": 9.989595769585738e-06, + "loss": 0.9844, + "step": 102, + "train/speech_entropy": 3.741991853974556, + "train/text_entropy": 1.0434371948242187, + "train/token_acc": 0.30089285714285713 + }, + { + "epoch": 0.015247964470762399, + "grad_norm": 24.122079849243164, + "learning_rate": 9.989094634539837e-06, + "loss": 1.8711, + "step": 103, + "train/speech_entropy": 3.9547146923341927, + "train/text_entropy": 1.854280259874132, + "train/token_acc": 0.25059665871121717 + }, + { + "epoch": 0.0153960029607698, + "grad_norm": 8.521744728088379, + "learning_rate": 9.988581727366591e-06, + "loss": 0.312, + "step": 104, + "train/speech_entropy": 3.1307019721906926, + "train/text_entropy": 0.39322471618652344, + "train/token_acc": 0.40513290559120074 + }, + { + "epoch": 0.015544041450777202, + "grad_norm": 15.880602836608887, + "learning_rate": 9.988057049411038e-06, + "loss": 0.9736, + "step": 105, + "train/speech_entropy": 3.4027014061447103, + "train/text_entropy": 0.8269786340585027, + "train/token_acc": 0.3679144385026738 + }, + { + "epoch": 0.015692079940784604, + "grad_norm": 20.832653045654297, + "learning_rate": 9.987520602049084e-06, + "loss": 1.5117, + "step": 106, + "train/speech_entropy": 3.991196534959532, + "train/text_entropy": 1.4748034000396728, + "train/token_acc": 0.28421052631578947 + }, + { + "epoch": 0.015840118430792006, + "grad_norm": 21.47350311279297, + "learning_rate": 9.986972386687501e-06, + "loss": 1.8594, + "step": 107, + "train/speech_entropy": 4.028718570536247, + "train/text_entropy": 1.5800409417403372, + "train/token_acc": 0.28696498054474706 + }, + { + "epoch": 0.01598815692079941, + "grad_norm": 17.386119842529297, + "learning_rate": 9.98641240476392e-06, + "loss": 1.3906, + "step": 108, + "train/speech_entropy": 4.274378178743708, + "train/text_entropy": 1.5237040546654308, + "train/token_acc": 0.2754880694143167 + }, + { + "epoch": 0.01613619541080681, + "grad_norm": 18.810396194458008, + "learning_rate": 9.985840657746826e-06, + "loss": 1.5859, + "step": 109, + "train/speech_entropy": 3.7034117448311834, + "train/text_entropy": 1.6315255906846788, + "train/token_acc": 0.3253873659117998 + }, + { + "epoch": 0.01628423390081421, + "grad_norm": 27.263654708862305, + "learning_rate": 9.985257147135564e-06, + "loss": 1.8867, + "step": 110, + "train/speech_entropy": 3.4041162527603053, + "train/text_entropy": 1.451851222826087, + "train/token_acc": 0.3075601374570447 + }, + { + "epoch": 0.016432272390821612, + "grad_norm": 20.128877639770508, + "learning_rate": 9.984661874460316e-06, + "loss": 2.0078, + "step": 111, + "train/speech_entropy": 4.140070611904549, + "train/text_entropy": 1.9707735394119124, + "train/token_acc": 0.2592885375494071 + }, + { + "epoch": 0.016580310880829015, + "grad_norm": 21.895824432373047, + "learning_rate": 9.98405484128212e-06, + "loss": 1.8555, + "step": 112, + "train/speech_entropy": 4.037769040681718, + "train/text_entropy": 1.6537385534067623, + "train/token_acc": 0.2777242044358727 + }, + { + "epoch": 0.016728349370836417, + "grad_norm": 18.003520965576172, + "learning_rate": 9.983436049192848e-06, + "loss": 0.8223, + "step": 113, + "train/speech_entropy": 3.481001371177009, + "train/text_entropy": 0.6009201238184799, + "train/token_acc": 0.34842951059167276 + }, + { + "epoch": 0.01687638786084382, + "grad_norm": 16.534439086914062, + "learning_rate": 9.98280549981521e-06, + "loss": 1.3887, + "step": 114, + "train/speech_entropy": 3.6460613189627433, + "train/text_entropy": 1.5422115500913847, + "train/token_acc": 0.29853479853479853 + }, + { + "epoch": 0.017024426350851222, + "grad_norm": 11.613746643066406, + "learning_rate": 9.98216319480275e-06, + "loss": 1.7773, + "step": 115, + "train/speech_entropy": 4.445391803678109, + "train/text_entropy": 2.0476779870819626, + "train/token_acc": 0.25074037512339586 + }, + { + "epoch": 0.017172464840858624, + "grad_norm": 19.757719039916992, + "learning_rate": 9.981509135839835e-06, + "loss": 1.2422, + "step": 116, + "train/speech_entropy": 3.8901483851435024, + "train/text_entropy": 1.2480605154326467, + "train/token_acc": 0.2844574780058651 + }, + { + "epoch": 0.017320503330866027, + "grad_norm": 21.846017837524414, + "learning_rate": 9.980843324641659e-06, + "loss": 1.3574, + "step": 117, + "train/speech_entropy": 3.4292599103687853, + "train/text_entropy": 1.3786562942877048, + "train/token_acc": 0.3201219512195122 + }, + { + "epoch": 0.017468541820873425, + "grad_norm": 15.731741905212402, + "learning_rate": 9.980165762954237e-06, + "loss": 1.8359, + "step": 118, + "train/speech_entropy": 4.273757173868218, + "train/text_entropy": 1.7394236211907375, + "train/token_acc": 0.25664251207729466 + }, + { + "epoch": 0.017616580310880828, + "grad_norm": 16.661874771118164, + "learning_rate": 9.979476452554394e-06, + "loss": 1.582, + "step": 119, + "train/speech_entropy": 3.82767085025185, + "train/text_entropy": 1.639156857052365, + "train/token_acc": 0.28271186440677964 + }, + { + "epoch": 0.01776461880088823, + "grad_norm": 26.400941848754883, + "learning_rate": 9.978775395249763e-06, + "loss": 1.7266, + "step": 120, + "train/speech_entropy": 3.4764826100343353, + "train/text_entropy": 1.3938321052713598, + "train/token_acc": 0.2964889466840052 + }, + { + "epoch": 0.017912657290895632, + "grad_norm": 16.05152130126953, + "learning_rate": 9.978062592878792e-06, + "loss": 1.5703, + "step": 121, + "train/speech_entropy": 4.14646524528462, + "train/text_entropy": 1.7910851826237197, + "train/token_acc": 0.27896725440806047 + }, + { + "epoch": 0.018060695780903035, + "grad_norm": 19.709640502929688, + "learning_rate": 9.977338047310714e-06, + "loss": 1.0059, + "step": 122, + "train/speech_entropy": 3.3504029130872346, + "train/text_entropy": 0.69430238711908, + "train/token_acc": 0.3347921225382932 + }, + { + "epoch": 0.018208734270910437, + "grad_norm": 18.80482292175293, + "learning_rate": 9.976601760445571e-06, + "loss": 1.3301, + "step": 123, + "train/speech_entropy": 3.7957666852147445, + "train/text_entropy": 1.4170955511239858, + "train/token_acc": 0.2711864406779661 + }, + { + "epoch": 0.01835677276091784, + "grad_norm": 22.530481338500977, + "learning_rate": 9.97585373421419e-06, + "loss": 1.1719, + "step": 124, + "train/speech_entropy": 3.5247106244487147, + "train/text_entropy": 1.0057323455810547, + "train/token_acc": 0.32792207792207795 + }, + { + "epoch": 0.018504811250925242, + "grad_norm": 16.389606475830078, + "learning_rate": 9.975093970578177e-06, + "loss": 1.5195, + "step": 125, + "train/speech_entropy": 3.879809105282738, + "train/text_entropy": 1.518065417826417, + "train/token_acc": 0.2891287586738628 + }, + { + "epoch": 0.01865284974093264, + "grad_norm": 17.52190589904785, + "learning_rate": 9.974322471529929e-06, + "loss": 1.4062, + "step": 126, + "train/speech_entropy": 4.1191460176644075, + "train/text_entropy": 1.4635822401134246, + "train/token_acc": 0.2751235584843493 + }, + { + "epoch": 0.018800888230940043, + "grad_norm": 8.749944686889648, + "learning_rate": 9.97353923909261e-06, + "loss": 0.4385, + "step": 127, + "train/speech_entropy": 2.9459948315046223, + "train/text_entropy": 0.3608852863311768, + "train/token_acc": 0.4024896265560166 + }, + { + "epoch": 0.018948926720947445, + "grad_norm": 15.868559837341309, + "learning_rate": 9.972744275320156e-06, + "loss": 1.4023, + "step": 128, + "train/speech_entropy": 3.7338902127124327, + "train/text_entropy": 1.45824949202999, + "train/token_acc": 0.3103953147877013 + }, + { + "epoch": 0.019096965210954848, + "grad_norm": 15.339587211608887, + "learning_rate": 9.971937582297267e-06, + "loss": 1.5352, + "step": 129, + "train/speech_entropy": 4.138557775696712, + "train/text_entropy": 1.5127306292133946, + "train/token_acc": 0.2825757575757576 + }, + { + "epoch": 0.01924500370096225, + "grad_norm": 12.795344352722168, + "learning_rate": 9.971119162139401e-06, + "loss": 0.6914, + "step": 130, + "train/speech_entropy": 3.4449889817162958, + "train/text_entropy": 0.6732829927324174, + "train/token_acc": 0.33305012744265083 + }, + { + "epoch": 0.019393042190969653, + "grad_norm": 22.88527488708496, + "learning_rate": 9.970289016992768e-06, + "loss": 1.6289, + "step": 131, + "train/speech_entropy": 3.9483704978284746, + "train/text_entropy": 1.5188060705212578, + "train/token_acc": 0.26570680628272253 + }, + { + "epoch": 0.019541080680977055, + "grad_norm": 22.360509872436523, + "learning_rate": 9.96944714903433e-06, + "loss": 1.5547, + "step": 132, + "train/speech_entropy": 4.067053108571846, + "train/text_entropy": 1.6883467940596846, + "train/token_acc": 0.2585139318885449 + }, + { + "epoch": 0.019689119170984457, + "grad_norm": 25.37891387939453, + "learning_rate": 9.968593560471788e-06, + "loss": 2.25, + "step": 133, + "train/speech_entropy": 3.8020480097036726, + "train/text_entropy": 2.198157187619824, + "train/token_acc": 0.29396325459317585 + }, + { + "epoch": 0.01983715766099186, + "grad_norm": 19.359142303466797, + "learning_rate": 9.967728253543574e-06, + "loss": 1.3477, + "step": 134, + "train/speech_entropy": 3.6117659333202607, + "train/text_entropy": 1.4189739495935574, + "train/token_acc": 0.3014460511679644 + }, + { + "epoch": 0.01998519615099926, + "grad_norm": 10.559738159179688, + "learning_rate": 9.966851230518858e-06, + "loss": 0.5283, + "step": 135, + "train/speech_entropy": 2.952342834472656, + "train/text_entropy": 0.48428398912603204, + "train/token_acc": 0.3709016393442623 + }, + { + "epoch": 0.02013323464100666, + "grad_norm": 16.956701278686523, + "learning_rate": 9.965962493697531e-06, + "loss": 1.3965, + "step": 136, + "train/speech_entropy": 3.6221258396055642, + "train/text_entropy": 1.404509110884233, + "train/token_acc": 0.3235014272121789 + }, + { + "epoch": 0.020281273131014063, + "grad_norm": 40.714393615722656, + "learning_rate": 9.965062045410199e-06, + "loss": 1.6484, + "step": 137, + "train/speech_entropy": 3.949837677145678, + "train/text_entropy": 1.499860167503357, + "train/token_acc": 0.2601307189542484 + }, + { + "epoch": 0.020429311621021466, + "grad_norm": 20.434326171875, + "learning_rate": 9.964149888018187e-06, + "loss": 1.3203, + "step": 138, + "train/speech_entropy": 3.410084419971844, + "train/text_entropy": 1.2178199404761905, + "train/token_acc": 0.3226102941176471 + }, + { + "epoch": 0.020577350111028868, + "grad_norm": 20.73240089416504, + "learning_rate": 9.963226023913515e-06, + "loss": 1.5547, + "step": 139, + "train/speech_entropy": 3.940753761781465, + "train/text_entropy": 1.6421526973111642, + "train/token_acc": 0.3061032863849765 + }, + { + "epoch": 0.02072538860103627, + "grad_norm": 26.086261749267578, + "learning_rate": 9.962290455518914e-06, + "loss": 1.1641, + "step": 140, + "train/speech_entropy": 3.9787502375123727, + "train/text_entropy": 1.8203335076990261, + "train/token_acc": 0.3018633540372671 + }, + { + "epoch": 0.020873427091043673, + "grad_norm": 16.541744232177734, + "learning_rate": 9.9613431852878e-06, + "loss": 1.3594, + "step": 141, + "train/speech_entropy": 3.597602549266915, + "train/text_entropy": 1.47565610481031, + "train/token_acc": 0.2865800865800866 + }, + { + "epoch": 0.021021465581051075, + "grad_norm": 13.699913024902344, + "learning_rate": 9.960384215704284e-06, + "loss": 0.8887, + "step": 142, + "train/speech_entropy": 3.5062197429859316, + "train/text_entropy": 1.1514793647514594, + "train/token_acc": 0.3319371727748691 + }, + { + "epoch": 0.021169504071058474, + "grad_norm": 14.946200370788574, + "learning_rate": 9.959413549283145e-06, + "loss": 1.5781, + "step": 143, + "train/speech_entropy": 4.077611126637895, + "train/text_entropy": 1.5124346051897322, + "train/token_acc": 0.2840755735492578 + }, + { + "epoch": 0.021317542561065876, + "grad_norm": 21.750261306762695, + "learning_rate": 9.958431188569848e-06, + "loss": 1.7383, + "step": 144, + "train/speech_entropy": 3.458832569611378, + "train/text_entropy": 1.9327420025337032, + "train/token_acc": 0.29907621247113164 + }, + { + "epoch": 0.02146558105107328, + "grad_norm": 23.855594635009766, + "learning_rate": 9.957437136140516e-06, + "loss": 1.7344, + "step": 145, + "train/speech_entropy": 3.862258051809836, + "train/text_entropy": 1.8251743446401998, + "train/token_acc": 0.28233830845771146 + }, + { + "epoch": 0.02161361954108068, + "grad_norm": 14.88935661315918, + "learning_rate": 9.956431394601938e-06, + "loss": 1.3965, + "step": 146, + "train/speech_entropy": 4.0937044187579925, + "train/text_entropy": 1.5256728584884751, + "train/token_acc": 0.30515463917525776 + }, + { + "epoch": 0.021761658031088083, + "grad_norm": 22.87155532836914, + "learning_rate": 9.955413966591551e-06, + "loss": 1.375, + "step": 147, + "train/speech_entropy": 3.431805173992674, + "train/text_entropy": 1.014829549703512, + "train/token_acc": 0.3120243531202435 + }, + { + "epoch": 0.021909696521095486, + "grad_norm": 19.152799606323242, + "learning_rate": 9.954384854777444e-06, + "loss": 1.9297, + "step": 148, + "train/speech_entropy": 3.984301066032689, + "train/text_entropy": 1.7834714956619033, + "train/token_acc": 0.28680688336520077 + }, + { + "epoch": 0.022057735011102888, + "grad_norm": 9.089462280273438, + "learning_rate": 9.953344061858342e-06, + "loss": 0.5762, + "step": 149, + "train/speech_entropy": 3.0724396943829473, + "train/text_entropy": 0.688238287484774, + "train/token_acc": 0.3847826086956522 + }, + { + "epoch": 0.02220577350111029, + "grad_norm": 15.095109939575195, + "learning_rate": 9.952291590563604e-06, + "loss": 0.916, + "step": 150, + "train/speech_entropy": 2.9137790091529463, + "train/text_entropy": 0.7851511533143091, + "train/token_acc": 0.3562231759656652 + }, + { + "epoch": 0.02235381199111769, + "grad_norm": 13.042162895202637, + "learning_rate": 9.951227443653211e-06, + "loss": 0.835, + "step": 151, + "train/speech_entropy": 3.325519944903111, + "train/text_entropy": 1.114982675297469, + "train/token_acc": 0.3645189761694616 + }, + { + "epoch": 0.02250185048112509, + "grad_norm": 15.95380687713623, + "learning_rate": 9.950151623917765e-06, + "loss": 1.6758, + "step": 152, + "train/speech_entropy": 3.820236606898533, + "train/text_entropy": 1.5478910664795886, + "train/token_acc": 0.2903225806451613 + }, + { + "epoch": 0.022649888971132494, + "grad_norm": 19.31294822692871, + "learning_rate": 9.949064134178478e-06, + "loss": 1.5273, + "step": 153, + "train/speech_entropy": 3.9198113018281844, + "train/text_entropy": 1.4801388192684093, + "train/token_acc": 0.2992849846782431 + }, + { + "epoch": 0.022797927461139896, + "grad_norm": 16.784242630004883, + "learning_rate": 9.947964977287169e-06, + "loss": 1.5156, + "step": 154, + "train/speech_entropy": 4.025979863009443, + "train/text_entropy": 1.5623524083500415, + "train/token_acc": 0.2711038961038961 + }, + { + "epoch": 0.0229459659511473, + "grad_norm": 20.493833541870117, + "learning_rate": 9.946854156126242e-06, + "loss": 0.8652, + "step": 155, + "train/speech_entropy": 3.3479964687797055, + "train/text_entropy": 0.8679008658872832, + "train/token_acc": 0.3291338582677165 + }, + { + "epoch": 0.0230940044411547, + "grad_norm": 17.363046646118164, + "learning_rate": 9.945731673608698e-06, + "loss": 1.6641, + "step": 156, + "train/speech_entropy": 3.8055401083863813, + "train/text_entropy": 1.6699229949111238, + "train/token_acc": 0.28991596638655465 + }, + { + "epoch": 0.023242042931162103, + "grad_norm": 23.040096282958984, + "learning_rate": 9.94459753267812e-06, + "loss": 1.2129, + "step": 157, + "train/speech_entropy": 3.1463584059660654, + "train/text_entropy": 1.5103288750899466, + "train/token_acc": 0.29690346083788705 + }, + { + "epoch": 0.023390081421169506, + "grad_norm": 15.619634628295898, + "learning_rate": 9.94345173630866e-06, + "loss": 0.8672, + "step": 158, + "train/speech_entropy": 2.9037463816072657, + "train/text_entropy": 0.7809470440708312, + "train/token_acc": 0.39424141749723146 + }, + { + "epoch": 0.023538119911176904, + "grad_norm": 22.864696502685547, + "learning_rate": 9.94229428750503e-06, + "loss": 1.5508, + "step": 159, + "train/speech_entropy": 3.3799732955460695, + "train/text_entropy": 1.7369512117826023, + "train/token_acc": 0.35121951219512193 + }, + { + "epoch": 0.023686158401184307, + "grad_norm": 16.114728927612305, + "learning_rate": 9.941125189302508e-06, + "loss": 1.6211, + "step": 160, + "train/speech_entropy": 4.150910669443559, + "train/text_entropy": 1.508614327143697, + "train/token_acc": 0.2554806070826307 + }, + { + "epoch": 0.02383419689119171, + "grad_norm": 19.34571647644043, + "learning_rate": 9.939944444766919e-06, + "loss": 1.5, + "step": 161, + "train/speech_entropy": 3.458534492062965, + "train/text_entropy": 1.592423095703125, + "train/token_acc": 0.3032427695004382 + }, + { + "epoch": 0.02398223538119911, + "grad_norm": 17.96615982055664, + "learning_rate": 9.938752056994629e-06, + "loss": 1.6094, + "step": 162, + "train/speech_entropy": 3.7342534077696636, + "train/text_entropy": 1.5837572635208046, + "train/token_acc": 0.28827877507919747 + }, + { + "epoch": 0.024130273871206514, + "grad_norm": 15.549561500549316, + "learning_rate": 9.93754802911253e-06, + "loss": 0.6641, + "step": 163, + "train/speech_entropy": 3.3153030658087355, + "train/text_entropy": 0.9188502175467355, + "train/token_acc": 0.3354330708661417 + }, + { + "epoch": 0.024278312361213916, + "grad_norm": 18.20968246459961, + "learning_rate": 9.936332364278051e-06, + "loss": 1.6875, + "step": 164, + "train/speech_entropy": 3.6368010855789756, + "train/text_entropy": 1.6078026040086468, + "train/token_acc": 0.28171478565179353 + }, + { + "epoch": 0.02442635085122132, + "grad_norm": 13.679301261901855, + "learning_rate": 9.935105065679127e-06, + "loss": 0.958, + "step": 165, + "train/speech_entropy": 3.7725242654276636, + "train/text_entropy": 0.8365798376302803, + "train/token_acc": 0.4264705882352941 + }, + { + "epoch": 0.02457438934122872, + "grad_norm": 17.929277420043945, + "learning_rate": 9.933866136534208e-06, + "loss": 1.4805, + "step": 166, + "train/speech_entropy": 3.632219073523773, + "train/text_entropy": 1.3165888442206628, + "train/token_acc": 0.288404360753221 + }, + { + "epoch": 0.02472242783123612, + "grad_norm": 20.99834442138672, + "learning_rate": 9.93261558009224e-06, + "loss": 1.7383, + "step": 167, + "train/speech_entropy": 3.9379219171917192, + "train/text_entropy": 1.7063695225146933, + "train/token_acc": 0.2633559066967645 + }, + { + "epoch": 0.024870466321243522, + "grad_norm": 12.079826354980469, + "learning_rate": 9.931353399632661e-06, + "loss": 0.8633, + "step": 168, + "train/speech_entropy": 3.4104768506272674, + "train/text_entropy": 0.9653545781185753, + "train/token_acc": 0.3392330383480826 + }, + { + "epoch": 0.025018504811250925, + "grad_norm": 16.254112243652344, + "learning_rate": 9.930079598465395e-06, + "loss": 1.1523, + "step": 169, + "train/speech_entropy": 3.3183949507402453, + "train/text_entropy": 1.1646275432563267, + "train/token_acc": 0.3201803833145434 + }, + { + "epoch": 0.025166543301258327, + "grad_norm": 15.107691764831543, + "learning_rate": 9.928794179930836e-06, + "loss": 1.0508, + "step": 170, + "train/speech_entropy": 3.4738943917410716, + "train/text_entropy": 1.0409171428155461, + "train/token_acc": 0.33474936278674594 + }, + { + "epoch": 0.02531458179126573, + "grad_norm": 19.142250061035156, + "learning_rate": 9.927497147399841e-06, + "loss": 1.6484, + "step": 171, + "train/speech_entropy": 4.090767144097223, + "train/text_entropy": 1.4306994846888952, + "train/token_acc": 0.28467683369644153 + }, + { + "epoch": 0.02546262028127313, + "grad_norm": 16.931941986083984, + "learning_rate": 9.92618850427373e-06, + "loss": 0.8887, + "step": 172, + "train/speech_entropy": 3.1232756373153965, + "train/text_entropy": 0.72244873046875, + "train/token_acc": 0.3689217758985201 + }, + { + "epoch": 0.025610658771280534, + "grad_norm": 13.980072021484375, + "learning_rate": 9.924868253984265e-06, + "loss": 0.6602, + "step": 173, + "train/speech_entropy": 3.0939192571559873, + "train/text_entropy": 0.69334414130763, + "train/token_acc": 0.3510392609699769 + }, + { + "epoch": 0.025758697261287936, + "grad_norm": 22.294706344604492, + "learning_rate": 9.923536399993651e-06, + "loss": 1.4961, + "step": 174, + "train/speech_entropy": 4.006367178524242, + "train/text_entropy": 1.8183895404522235, + "train/token_acc": 0.26112759643916916 + }, + { + "epoch": 0.025906735751295335, + "grad_norm": 14.491503715515137, + "learning_rate": 9.922192945794517e-06, + "loss": 1.0762, + "step": 175, + "train/speech_entropy": 3.14862021471871, + "train/text_entropy": 1.1950568589083668, + "train/token_acc": 0.3313008130081301 + }, + { + "epoch": 0.026054774241302738, + "grad_norm": 18.961400985717773, + "learning_rate": 9.920837894909915e-06, + "loss": 1.5625, + "step": 176, + "train/speech_entropy": 3.6430484221272703, + "train/text_entropy": 1.5048126634968892, + "train/token_acc": 0.28217821782178215 + }, + { + "epoch": 0.02620281273131014, + "grad_norm": 10.386406898498535, + "learning_rate": 9.919471250893312e-06, + "loss": 0.5625, + "step": 177, + "train/speech_entropy": 2.9099846985114453, + "train/text_entropy": 0.5356508133903383, + "train/token_acc": 0.40604575163398693 + }, + { + "epoch": 0.026350851221317542, + "grad_norm": 9.936087608337402, + "learning_rate": 9.91809301732857e-06, + "loss": 0.4053, + "step": 178, + "train/speech_entropy": 2.795654899195621, + "train/text_entropy": 0.37558404171105586, + "train/token_acc": 0.4165588615782665 + }, + { + "epoch": 0.026498889711324945, + "grad_norm": 17.32583999633789, + "learning_rate": 9.916703197829945e-06, + "loss": 1.6953, + "step": 179, + "train/speech_entropy": 4.115280802484346, + "train/text_entropy": 1.701604332943512, + "train/token_acc": 0.2611657834973505 + }, + { + "epoch": 0.026646928201332347, + "grad_norm": 19.661821365356445, + "learning_rate": 9.915301796042076e-06, + "loss": 0.8262, + "step": 180, + "train/speech_entropy": 3.0727854761584052, + "train/text_entropy": 0.7417241076489428, + "train/token_acc": 0.3831258644536653 + }, + { + "epoch": 0.02679496669133975, + "grad_norm": 18.377384185791016, + "learning_rate": 9.913888815639979e-06, + "loss": 1.3555, + "step": 181, + "train/speech_entropy": 3.87146866987775, + "train/text_entropy": 1.3585599503427181, + "train/token_acc": 0.2924976258309592 + }, + { + "epoch": 0.02694300518134715, + "grad_norm": 27.315792083740234, + "learning_rate": 9.912464260329029e-06, + "loss": 0.8682, + "step": 182, + "train/speech_entropy": 3.2338980111871654, + "train/text_entropy": 1.078209889085987, + "train/token_acc": 0.33497536945812806 + }, + { + "epoch": 0.02709104367135455, + "grad_norm": 14.322318077087402, + "learning_rate": 9.911028133844953e-06, + "loss": 1.0186, + "step": 183, + "train/speech_entropy": 3.4783476042656845, + "train/text_entropy": 1.0939003809584373, + "train/token_acc": 0.32857142857142857 + }, + { + "epoch": 0.027239082161361953, + "grad_norm": 20.44219970703125, + "learning_rate": 9.90958043995383e-06, + "loss": 1.4375, + "step": 184, + "train/speech_entropy": 3.7657133643306904, + "train/text_entropy": 1.3896406693892045, + "train/token_acc": 0.28159645232815966 + }, + { + "epoch": 0.027387120651369355, + "grad_norm": 16.43271827697754, + "learning_rate": 9.908121182452066e-06, + "loss": 1.0449, + "step": 185, + "train/speech_entropy": 3.5465466729525863, + "train/text_entropy": 1.0410881972894437, + "train/token_acc": 0.2646766169154229 + }, + { + "epoch": 0.027535159141376758, + "grad_norm": 18.386709213256836, + "learning_rate": 9.906650365166394e-06, + "loss": 1.3086, + "step": 186, + "train/speech_entropy": 3.727820983013654, + "train/text_entropy": 1.371898351539492, + "train/token_acc": 0.3260416666666667 + }, + { + "epoch": 0.02768319763138416, + "grad_norm": 14.631078720092773, + "learning_rate": 9.905167991953862e-06, + "loss": 1.0352, + "step": 187, + "train/speech_entropy": 4.134019749547595, + "train/text_entropy": 1.1690489035754947, + "train/token_acc": 0.30439121756487025 + }, + { + "epoch": 0.027831236121391562, + "grad_norm": 16.738197326660156, + "learning_rate": 9.90367406670182e-06, + "loss": 1.0127, + "step": 188, + "train/speech_entropy": 3.7954254949280104, + "train/text_entropy": 1.1413173552482359, + "train/token_acc": 0.3392857142857143 + }, + { + "epoch": 0.027979274611398965, + "grad_norm": 18.33837127685547, + "learning_rate": 9.902168593327912e-06, + "loss": 1.5547, + "step": 189, + "train/speech_entropy": 4.277361532422303, + "train/text_entropy": 1.3065041587466286, + "train/token_acc": 0.23866790009250693 + }, + { + "epoch": 0.028127313101406367, + "grad_norm": 17.383333206176758, + "learning_rate": 9.90065157578007e-06, + "loss": 1.168, + "step": 190, + "train/speech_entropy": 4.050316162109375, + "train/text_entropy": 1.209292121112028, + "train/token_acc": 0.28334866605335784 + }, + { + "epoch": 0.02827535159141377, + "grad_norm": 15.31650161743164, + "learning_rate": 9.89912301803649e-06, + "loss": 0.9268, + "step": 191, + "train/speech_entropy": 3.6063705215071993, + "train/text_entropy": 0.9408154574307528, + "train/token_acc": 0.3481675392670157 + }, + { + "epoch": 0.028423390081421168, + "grad_norm": 14.91807746887207, + "learning_rate": 9.897582924105638e-06, + "loss": 0.9395, + "step": 192, + "train/speech_entropy": 4.131676361526268, + "train/text_entropy": 1.0497355633471386, + "train/token_acc": 0.308411214953271 + }, + { + "epoch": 0.02857142857142857, + "grad_norm": 15.369440078735352, + "learning_rate": 9.896031298026235e-06, + "loss": 1.0215, + "step": 193, + "train/speech_entropy": 3.5410968837453356, + "train/text_entropy": 0.9192207473480772, + "train/token_acc": 0.34528076463560337 + }, + { + "epoch": 0.028719467061435973, + "grad_norm": 21.659900665283203, + "learning_rate": 9.894468143867236e-06, + "loss": 1.7109, + "step": 194, + "train/speech_entropy": 3.758589923122279, + "train/text_entropy": 1.7498469606659117, + "train/token_acc": 0.30536130536130535 + }, + { + "epoch": 0.028867505551443375, + "grad_norm": 11.974047660827637, + "learning_rate": 9.892893465727831e-06, + "loss": 1.0508, + "step": 195, + "train/speech_entropy": 4.229051879046078, + "train/text_entropy": 1.1778652676669035, + "train/token_acc": 0.2840050377833753 + }, + { + "epoch": 0.029015544041450778, + "grad_norm": 14.285003662109375, + "learning_rate": 9.891307267737432e-06, + "loss": 1.1396, + "step": 196, + "train/speech_entropy": 3.630347578788952, + "train/text_entropy": 1.0202658803839433, + "train/token_acc": 0.3247863247863248 + }, + { + "epoch": 0.02916358253145818, + "grad_norm": 11.704471588134766, + "learning_rate": 9.889709554055654e-06, + "loss": 0.8359, + "step": 197, + "train/speech_entropy": 3.524643736552977, + "train/text_entropy": 0.8936214073031556, + "train/token_acc": 0.3174311926605505 + }, + { + "epoch": 0.029311621021465582, + "grad_norm": 15.819293975830078, + "learning_rate": 9.888100328872318e-06, + "loss": 1.5781, + "step": 198, + "train/speech_entropy": 4.130980606752944, + "train/text_entropy": 1.589056420996486, + "train/token_acc": 0.2807570977917981 + }, + { + "epoch": 0.029459659511472985, + "grad_norm": 19.090341567993164, + "learning_rate": 9.886479596407428e-06, + "loss": 1.2344, + "step": 199, + "train/speech_entropy": 3.698015037742821, + "train/text_entropy": 1.3397998562106839, + "train/token_acc": 0.2941834451901566 + }, + { + "epoch": 0.029607698001480384, + "grad_norm": 27.217750549316406, + "learning_rate": 9.884847360911168e-06, + "loss": 1.5195, + "step": 200, + "train/speech_entropy": 4.135105037953385, + "train/text_entropy": 1.532439859289872, + "train/token_acc": 0.2494279176201373 + }, + { + "epoch": 0.029755736491487786, + "grad_norm": 15.88379955291748, + "learning_rate": 9.883203626663882e-06, + "loss": 1.623, + "step": 201, + "train/speech_entropy": 4.265376744645365, + "train/text_entropy": 1.4592082950618717, + "train/token_acc": 0.29098966026587886 + }, + { + "epoch": 0.029903774981495188, + "grad_norm": 17.63105583190918, + "learning_rate": 9.881548397976077e-06, + "loss": 0.7393, + "step": 202, + "train/speech_entropy": 3.4113576883411687, + "train/text_entropy": 0.7676385673316749, + "train/token_acc": 0.3583535108958838 + }, + { + "epoch": 0.03005181347150259, + "grad_norm": 20.22199821472168, + "learning_rate": 9.87988167918839e-06, + "loss": 1.3281, + "step": 203, + "train/speech_entropy": 3.3266698443700395, + "train/text_entropy": 1.2372548519036708, + "train/token_acc": 0.3121319199057715 + }, + { + "epoch": 0.030199851961509993, + "grad_norm": 20.358642578125, + "learning_rate": 9.878203474671603e-06, + "loss": 0.9443, + "step": 204, + "train/speech_entropy": 3.2181704341841497, + "train/text_entropy": 0.8396807925801881, + "train/token_acc": 0.3769338959212377 + }, + { + "epoch": 0.030347890451517395, + "grad_norm": 16.17738151550293, + "learning_rate": 9.876513788826607e-06, + "loss": 1.8438, + "step": 205, + "train/speech_entropy": 4.428232884212151, + "train/text_entropy": 1.7452784881130807, + "train/token_acc": 0.2556976097832129 + }, + { + "epoch": 0.030495928941524798, + "grad_norm": 14.219298362731934, + "learning_rate": 9.87481262608441e-06, + "loss": 1.4316, + "step": 206, + "train/speech_entropy": 4.11250597244106, + "train/text_entropy": 1.385136226185581, + "train/token_acc": 0.31563421828908556 + }, + { + "epoch": 0.0306439674315322, + "grad_norm": 15.162027359008789, + "learning_rate": 9.873099990906109e-06, + "loss": 1.0723, + "step": 207, + "train/speech_entropy": 4.2987018660003065, + "train/text_entropy": 1.0309212522686653, + "train/token_acc": 0.29571984435797666 + }, + { + "epoch": 0.0307920059215396, + "grad_norm": 27.80650520324707, + "learning_rate": 9.871375887782894e-06, + "loss": 2.3555, + "step": 208, + "train/speech_entropy": 4.689799122355867, + "train/text_entropy": 1.8016305666917947, + "train/token_acc": 0.2708860759493671 + }, + { + "epoch": 0.030940044411547, + "grad_norm": 21.37019920349121, + "learning_rate": 9.869640321236024e-06, + "loss": 1.5156, + "step": 209, + "train/speech_entropy": 3.778087712477114, + "train/text_entropy": 1.2293604368804603, + "train/token_acc": 0.3244397011739594 + }, + { + "epoch": 0.031088082901554404, + "grad_norm": 19.975238800048828, + "learning_rate": 9.867893295816818e-06, + "loss": 1.7852, + "step": 210, + "train/speech_entropy": 4.252812876452665, + "train/text_entropy": 2.016330866523869, + "train/token_acc": 0.2596425211665099 + }, + { + "epoch": 0.031236121391561806, + "grad_norm": 14.038900375366211, + "learning_rate": 9.866134816106644e-06, + "loss": 1.0205, + "step": 211, + "train/speech_entropy": 3.431277804904514, + "train/text_entropy": 0.834742192471965, + "train/token_acc": 0.3436830835117773 + }, + { + "epoch": 0.03138415988156921, + "grad_norm": 14.091026306152344, + "learning_rate": 9.864364886716917e-06, + "loss": 1.4727, + "step": 212, + "train/speech_entropy": 4.464903539540816, + "train/text_entropy": 1.6464894961004388, + "train/token_acc": 0.2435118718939812 + }, + { + "epoch": 0.03153219837157661, + "grad_norm": 19.656312942504883, + "learning_rate": 9.862583512289065e-06, + "loss": 0.9219, + "step": 213, + "train/speech_entropy": 3.597453182444853, + "train/text_entropy": 1.078334137245461, + "train/token_acc": 0.3395872420262664 + }, + { + "epoch": 0.03168023686158401, + "grad_norm": 12.807794570922852, + "learning_rate": 9.860790697494537e-06, + "loss": 1.207, + "step": 214, + "train/speech_entropy": 4.143555788224188, + "train/text_entropy": 1.306239854043989, + "train/token_acc": 0.29121278140885987 + }, + { + "epoch": 0.03182827535159141, + "grad_norm": 16.624290466308594, + "learning_rate": 9.85898644703478e-06, + "loss": 1.1133, + "step": 215, + "train/speech_entropy": 4.286246389516542, + "train/text_entropy": 1.0499902898615057, + "train/token_acc": 0.29175946547884185 + }, + { + "epoch": 0.03197631384159882, + "grad_norm": 15.198729515075684, + "learning_rate": 9.857170765641232e-06, + "loss": 1.1992, + "step": 216, + "train/speech_entropy": 4.089330147663506, + "train/text_entropy": 1.2858003558534565, + "train/token_acc": 0.2647814910025707 + }, + { + "epoch": 0.03212435233160622, + "grad_norm": 15.258683204650879, + "learning_rate": 9.855343658075302e-06, + "loss": 1.5742, + "step": 217, + "train/speech_entropy": 4.010282195183179, + "train/text_entropy": 1.5081346299913194, + "train/token_acc": 0.2854877081681205 + }, + { + "epoch": 0.03227239082161362, + "grad_norm": 14.438471794128418, + "learning_rate": 9.853505129128372e-06, + "loss": 0.9941, + "step": 218, + "train/speech_entropy": 3.4344044093517105, + "train/text_entropy": 1.002074235927559, + "train/token_acc": 0.32486187845303865 + }, + { + "epoch": 0.03242042931162102, + "grad_norm": 16.158950805664062, + "learning_rate": 9.851655183621765e-06, + "loss": 0.6904, + "step": 219, + "train/speech_entropy": 3.4666124051089793, + "train/text_entropy": 0.6199728858391971, + "train/token_acc": 0.34549878345498786 + }, + { + "epoch": 0.03256846780162842, + "grad_norm": 13.61329174041748, + "learning_rate": 9.849793826406752e-06, + "loss": 1.0586, + "step": 220, + "train/speech_entropy": 4.774694416518296, + "train/text_entropy": 1.0630827747691762, + "train/token_acc": 0.3039443155452436 + }, + { + "epoch": 0.032716506291635826, + "grad_norm": 22.903051376342773, + "learning_rate": 9.84792106236452e-06, + "loss": 2.4141, + "step": 221, + "train/speech_entropy": 3.9127921778298824, + "train/text_entropy": 2.407526335647802, + "train/token_acc": 0.2599805258033106 + }, + { + "epoch": 0.032864544781643225, + "grad_norm": 15.943107604980469, + "learning_rate": 9.846036896406176e-06, + "loss": 1.2402, + "step": 222, + "train/speech_entropy": 4.03511921318692, + "train/text_entropy": 0.9580987318506781, + "train/token_acc": 0.3346839546191248 + }, + { + "epoch": 0.03301258327165063, + "grad_norm": 19.630525588989258, + "learning_rate": 9.844141333472731e-06, + "loss": 1.3242, + "step": 223, + "train/speech_entropy": 3.812871976741734, + "train/text_entropy": 1.28522270952346, + "train/token_acc": 0.2855579868708972 + }, + { + "epoch": 0.03316062176165803, + "grad_norm": 16.100122451782227, + "learning_rate": 9.84223437853507e-06, + "loss": 1.2383, + "step": 224, + "train/speech_entropy": 4.118604792177288, + "train/text_entropy": 1.2013861376368358, + "train/token_acc": 0.28969957081545067 + }, + { + "epoch": 0.033308660251665435, + "grad_norm": 13.177223205566406, + "learning_rate": 9.840316036593965e-06, + "loss": 0.7383, + "step": 225, + "train/speech_entropy": 3.812861493934792, + "train/text_entropy": 0.7221677421343209, + "train/token_acc": 0.32007575757575757 + }, + { + "epoch": 0.033456698741672834, + "grad_norm": 14.475889205932617, + "learning_rate": 9.838386312680043e-06, + "loss": 0.7495, + "step": 226, + "train/speech_entropy": 3.358440763387249, + "train/text_entropy": 0.7298914591471354, + "train/token_acc": 0.36720867208672087 + }, + { + "epoch": 0.03360473723168024, + "grad_norm": 21.173519134521484, + "learning_rate": 9.83644521185378e-06, + "loss": 0.9922, + "step": 227, + "train/speech_entropy": 4.08793314051667, + "train/text_entropy": 0.9761310686383928, + "train/token_acc": 0.3223350253807107 + }, + { + "epoch": 0.03375277572168764, + "grad_norm": 14.451682090759277, + "learning_rate": 9.834492739205484e-06, + "loss": 0.7246, + "step": 228, + "train/speech_entropy": 3.609160288890068, + "train/text_entropy": 0.6805776501378269, + "train/token_acc": 0.33424657534246577 + }, + { + "epoch": 0.03390081421169504, + "grad_norm": 13.507497787475586, + "learning_rate": 9.83252889985529e-06, + "loss": 0.9463, + "step": 229, + "train/speech_entropy": 4.154609930907057, + "train/text_entropy": 1.1927895890068763, + "train/token_acc": 0.29349904397705545 + }, + { + "epoch": 0.034048852701702444, + "grad_norm": 14.866497039794922, + "learning_rate": 9.830553698953136e-06, + "loss": 0.9746, + "step": 230, + "train/speech_entropy": 3.4467352885584677, + "train/text_entropy": 0.7125647968716091, + "train/token_acc": 0.33403141361256544 + }, + { + "epoch": 0.03419689119170984, + "grad_norm": 14.515915870666504, + "learning_rate": 9.828567141678758e-06, + "loss": 0.9287, + "step": 231, + "train/speech_entropy": 3.4353748353952724, + "train/text_entropy": 0.8662152497664742, + "train/token_acc": 0.3356401384083045 + }, + { + "epoch": 0.03434492968171725, + "grad_norm": 14.002631187438965, + "learning_rate": 9.826569233241671e-06, + "loss": 1.168, + "step": 232, + "train/speech_entropy": 3.898133638087084, + "train/text_entropy": 1.265190734166533, + "train/token_acc": 0.29572925060435135 + }, + { + "epoch": 0.03449296817172465, + "grad_norm": 19.381816864013672, + "learning_rate": 9.824559978881154e-06, + "loss": 1.4043, + "step": 233, + "train/speech_entropy": 3.768015360203927, + "train/text_entropy": 1.288986179926624, + "train/token_acc": 0.2779700115340254 + }, + { + "epoch": 0.03464100666173205, + "grad_norm": 17.96495246887207, + "learning_rate": 9.822539383866246e-06, + "loss": 1.0342, + "step": 234, + "train/speech_entropy": 3.5816434405237954, + "train/text_entropy": 1.0423579437053756, + "train/token_acc": 0.3469387755102041 + }, + { + "epoch": 0.03478904515173945, + "grad_norm": 21.293405532836914, + "learning_rate": 9.820507453495718e-06, + "loss": 1.3945, + "step": 235, + "train/speech_entropy": 3.7275824216332767, + "train/text_entropy": 0.9884970892043341, + "train/token_acc": 0.3290766208251473 + }, + { + "epoch": 0.03493708364174685, + "grad_norm": 18.07074737548828, + "learning_rate": 9.818464193098073e-06, + "loss": 1.3281, + "step": 236, + "train/speech_entropy": 3.149629407957218, + "train/text_entropy": 1.037757776345417, + "train/token_acc": 0.3431603773584906 + }, + { + "epoch": 0.03508512213175426, + "grad_norm": 20.506067276000977, + "learning_rate": 9.816409608031523e-06, + "loss": 1.6953, + "step": 237, + "train/speech_entropy": 4.145053395053797, + "train/text_entropy": 1.6381740794462316, + "train/token_acc": 0.23382624768946395 + }, + { + "epoch": 0.035233160621761656, + "grad_norm": 15.072114944458008, + "learning_rate": 9.814343703683977e-06, + "loss": 0.793, + "step": 238, + "train/speech_entropy": 3.1868014162686205, + "train/text_entropy": 0.7041601965270067, + "train/token_acc": 0.37874097007223945 + }, + { + "epoch": 0.03538119911176906, + "grad_norm": 9.069244384765625, + "learning_rate": 9.81226648547303e-06, + "loss": 0.3604, + "step": 239, + "train/speech_entropy": 3.411216846410779, + "train/text_entropy": 0.4251588280521222, + "train/token_acc": 0.3673469387755102 + }, + { + "epoch": 0.03552923760177646, + "grad_norm": 18.261547088623047, + "learning_rate": 9.810177958845942e-06, + "loss": 1.9336, + "step": 240, + "train/speech_entropy": 4.206288783617195, + "train/text_entropy": 1.5853403914333586, + "train/token_acc": 0.2987987987987988 + }, + { + "epoch": 0.035677276091783866, + "grad_norm": 15.431228637695312, + "learning_rate": 9.808078129279633e-06, + "loss": 0.7139, + "step": 241, + "train/speech_entropy": 3.167125040048304, + "train/text_entropy": 0.6709525751513105, + "train/token_acc": 0.4057071960297767 + }, + { + "epoch": 0.035825314581791265, + "grad_norm": 15.0645170211792, + "learning_rate": 9.80596700228066e-06, + "loss": 0.8809, + "step": 242, + "train/speech_entropy": 3.2379287372935903, + "train/text_entropy": 0.7333303021557759, + "train/token_acc": 0.35689851767388825 + }, + { + "epoch": 0.03597335307179867, + "grad_norm": 14.975324630737305, + "learning_rate": 9.803844583385207e-06, + "loss": 0.9082, + "step": 243, + "train/speech_entropy": 3.484467585417882, + "train/text_entropy": 0.772703559310348, + "train/token_acc": 0.3448275862068966 + }, + { + "epoch": 0.03612139156180607, + "grad_norm": 19.337793350219727, + "learning_rate": 9.801710878159072e-06, + "loss": 1.3008, + "step": 244, + "train/speech_entropy": 3.91242135317271, + "train/text_entropy": 1.2184481413468071, + "train/token_acc": 0.27364438839848676 + }, + { + "epoch": 0.03626943005181347, + "grad_norm": 18.79851722717285, + "learning_rate": 9.799565892197648e-06, + "loss": 1.6445, + "step": 245, + "train/speech_entropy": 3.688543439304868, + "train/text_entropy": 1.6231237854620424, + "train/token_acc": 0.2971996386630533 + }, + { + "epoch": 0.036417468541820874, + "grad_norm": 18.75339698791504, + "learning_rate": 9.797409631125908e-06, + "loss": 0.9863, + "step": 246, + "train/speech_entropy": 3.5348719099678223, + "train/text_entropy": 1.067495256940895, + "train/token_acc": 0.29721362229102166 + }, + { + "epoch": 0.03656550703182827, + "grad_norm": 22.06964874267578, + "learning_rate": 9.795242100598397e-06, + "loss": 1.3613, + "step": 247, + "train/speech_entropy": 3.583182155495823, + "train/text_entropy": 1.5613555530510326, + "train/token_acc": 0.31287128712871287 + }, + { + "epoch": 0.03671354552183568, + "grad_norm": 10.317774772644043, + "learning_rate": 9.793063306299211e-06, + "loss": 0.8091, + "step": 248, + "train/speech_entropy": 3.765242156611162, + "train/text_entropy": 0.8426946347898191, + "train/token_acc": 0.33237547892720304 + }, + { + "epoch": 0.03686158401184308, + "grad_norm": 11.870044708251953, + "learning_rate": 9.790873253941986e-06, + "loss": 0.6367, + "step": 249, + "train/speech_entropy": 3.5705236032468464, + "train/text_entropy": 0.7106512442402456, + "train/token_acc": 0.3135755258126195 + }, + { + "epoch": 0.037009622501850484, + "grad_norm": 22.117252349853516, + "learning_rate": 9.788671949269874e-06, + "loss": 1.5391, + "step": 250, + "train/speech_entropy": 3.3153053602605005, + "train/text_entropy": 1.390533753803798, + "train/token_acc": 0.2970027247956403 + }, + { + "epoch": 0.03715766099185788, + "grad_norm": 17.686002731323242, + "learning_rate": 9.786459398055546e-06, + "loss": 1.6094, + "step": 251, + "train/speech_entropy": 4.415896342787984, + "train/text_entropy": 1.421113998170883, + "train/token_acc": 0.27402700555996823 + }, + { + "epoch": 0.03730569948186528, + "grad_norm": 16.330148696899414, + "learning_rate": 9.784235606101155e-06, + "loss": 1.418, + "step": 252, + "train/speech_entropy": 3.888961773361187, + "train/text_entropy": 1.543044085059661, + "train/token_acc": 0.2834331337325349 + }, + { + "epoch": 0.03745373797187269, + "grad_norm": 22.581153869628906, + "learning_rate": 9.782000579238338e-06, + "loss": 1.5547, + "step": 253, + "train/speech_entropy": 4.085720440288922, + "train/text_entropy": 1.5804587029791497, + "train/token_acc": 0.2621951219512195 + }, + { + "epoch": 0.037601776461880086, + "grad_norm": 18.777231216430664, + "learning_rate": 9.779754323328192e-06, + "loss": 1.4082, + "step": 254, + "train/speech_entropy": 3.473818185247957, + "train/text_entropy": 1.3192666706285978, + "train/token_acc": 0.2884848484848485 + }, + { + "epoch": 0.03774981495188749, + "grad_norm": 19.488616943359375, + "learning_rate": 9.777496844261262e-06, + "loss": 0.832, + "step": 255, + "train/speech_entropy": 3.390472412109375, + "train/text_entropy": 0.9038229636775638, + "train/token_acc": 0.36059479553903345 + }, + { + "epoch": 0.03789785344189489, + "grad_norm": 20.993331909179688, + "learning_rate": 9.775228147957522e-06, + "loss": 1.9219, + "step": 256, + "train/speech_entropy": 4.29492757922021, + "train/text_entropy": 1.8211923946033826, + "train/token_acc": 0.24864864864864866 + }, + { + "epoch": 0.0380458919319023, + "grad_norm": 10.128350257873535, + "learning_rate": 9.772948240366365e-06, + "loss": 0.7188, + "step": 257, + "train/speech_entropy": 3.3765599206273036, + "train/text_entropy": 0.5414502386953316, + "train/token_acc": 0.36345776031434185 + }, + { + "epoch": 0.038193930421909696, + "grad_norm": 21.49756622314453, + "learning_rate": 9.770657127466583e-06, + "loss": 1.8633, + "step": 258, + "train/speech_entropy": 3.8758639946669162, + "train/text_entropy": 1.8596051924067771, + "train/token_acc": 0.2767749699157641 + }, + { + "epoch": 0.0383419689119171, + "grad_norm": 15.741171836853027, + "learning_rate": 9.768354815266353e-06, + "loss": 0.9199, + "step": 259, + "train/speech_entropy": 4.075684704489877, + "train/text_entropy": 0.7943573522403887, + "train/token_acc": 0.318705035971223 + }, + { + "epoch": 0.0384900074019245, + "grad_norm": 19.83437156677246, + "learning_rate": 9.766041309803218e-06, + "loss": 1.8203, + "step": 260, + "train/speech_entropy": 3.9584914161484086, + "train/text_entropy": 1.6513766024975067, + "train/token_acc": 0.2818455366098295 + }, + { + "epoch": 0.0386380458919319, + "grad_norm": 17.42316246032715, + "learning_rate": 9.763716617144078e-06, + "loss": 1.1875, + "step": 261, + "train/speech_entropy": 3.547016504392102, + "train/text_entropy": 1.2159606888449404, + "train/token_acc": 0.31551901336073995 + }, + { + "epoch": 0.038786084381939305, + "grad_norm": 16.597604751586914, + "learning_rate": 9.761380743385167e-06, + "loss": 0.959, + "step": 262, + "train/speech_entropy": 3.3283088780656644, + "train/text_entropy": 0.9664521379535701, + "train/token_acc": 0.36428571428571427 + }, + { + "epoch": 0.038934122871946704, + "grad_norm": 21.650373458862305, + "learning_rate": 9.759033694652045e-06, + "loss": 2.0117, + "step": 263, + "train/speech_entropy": 3.8578810647269277, + "train/text_entropy": 1.871297786110326, + "train/token_acc": 0.26574307304785894 + }, + { + "epoch": 0.03908216136195411, + "grad_norm": 14.58530330657959, + "learning_rate": 9.75667547709957e-06, + "loss": 1.5742, + "step": 264, + "train/speech_entropy": 4.415019696621546, + "train/text_entropy": 1.5750761071029975, + "train/token_acc": 0.24945135332845647 + }, + { + "epoch": 0.03923019985196151, + "grad_norm": 11.229324340820312, + "learning_rate": 9.754306096911896e-06, + "loss": 0.9121, + "step": 265, + "train/speech_entropy": 3.674200781460466, + "train/text_entropy": 0.9801870281413451, + "train/token_acc": 0.3290378006872852 + }, + { + "epoch": 0.039378238341968914, + "grad_norm": 13.35831069946289, + "learning_rate": 9.751925560302443e-06, + "loss": 0.7852, + "step": 266, + "train/speech_entropy": 3.911860539362981, + "train/text_entropy": 0.7488142747145433, + "train/token_acc": 0.3528205128205128 + }, + { + "epoch": 0.03952627683197631, + "grad_norm": 18.46710205078125, + "learning_rate": 9.749533873513893e-06, + "loss": 1.2715, + "step": 267, + "train/speech_entropy": 3.2955457476111545, + "train/text_entropy": 1.1254572424088947, + "train/token_acc": 0.3516209476309227 + }, + { + "epoch": 0.03967431532198372, + "grad_norm": 18.31432342529297, + "learning_rate": 9.747131042818164e-06, + "loss": 1.3867, + "step": 268, + "train/speech_entropy": 3.7400412853853204, + "train/text_entropy": 1.5700797481813293, + "train/token_acc": 0.30916030534351147 + }, + { + "epoch": 0.03982235381199112, + "grad_norm": 15.530065536499023, + "learning_rate": 9.744717074516402e-06, + "loss": 1.8633, + "step": 269, + "train/speech_entropy": 4.160364020360659, + "train/text_entropy": 1.668556098651169, + "train/token_acc": 0.25453277545327757 + }, + { + "epoch": 0.03997039230199852, + "grad_norm": 14.879589080810547, + "learning_rate": 9.742291974938954e-06, + "loss": 1.1758, + "step": 270, + "train/speech_entropy": 4.464546440972223, + "train/text_entropy": 1.391593189239502, + "train/token_acc": 0.26 + }, + { + "epoch": 0.04011843079200592, + "grad_norm": 20.783327102661133, + "learning_rate": 9.739855750445363e-06, + "loss": 1.1094, + "step": 271, + "train/speech_entropy": 3.751682795142785, + "train/text_entropy": 1.4559878652746028, + "train/token_acc": 0.3076923076923077 + }, + { + "epoch": 0.04026646928201332, + "grad_norm": 15.419761657714844, + "learning_rate": 9.737408407424346e-06, + "loss": 1.3828, + "step": 272, + "train/speech_entropy": 4.099091469113059, + "train/text_entropy": 1.442423366970486, + "train/token_acc": 0.27036770007209804 + }, + { + "epoch": 0.04041450777202073, + "grad_norm": 15.93834400177002, + "learning_rate": 9.73494995229377e-06, + "loss": 0.8555, + "step": 273, + "train/speech_entropy": 3.1746349683318376, + "train/text_entropy": 0.6933233069364356, + "train/token_acc": 0.34973821989528797 + }, + { + "epoch": 0.040562546262028126, + "grad_norm": 14.737724304199219, + "learning_rate": 9.732480391500648e-06, + "loss": 0.9102, + "step": 274, + "train/speech_entropy": 3.691804544156848, + "train/text_entropy": 1.0169662621061681, + "train/token_acc": 0.33060109289617484 + }, + { + "epoch": 0.04071058475203553, + "grad_norm": 23.255170822143555, + "learning_rate": 9.729999731521118e-06, + "loss": 1.7539, + "step": 275, + "train/speech_entropy": 3.496569661999066, + "train/text_entropy": 1.668846191898469, + "train/token_acc": 0.2741046831955923 + }, + { + "epoch": 0.04085862324204293, + "grad_norm": 14.0156888961792, + "learning_rate": 9.72750797886042e-06, + "loss": 0.9248, + "step": 276, + "train/speech_entropy": 3.825288486941425, + "train/text_entropy": 1.020462940648659, + "train/token_acc": 0.33659491193737767 + }, + { + "epoch": 0.04100666173205033, + "grad_norm": 13.236350059509277, + "learning_rate": 9.725005140052886e-06, + "loss": 0.8301, + "step": 277, + "train/speech_entropy": 3.366861609256629, + "train/text_entropy": 0.6858070711546307, + "train/token_acc": 0.34474327628361856 + }, + { + "epoch": 0.041154700222057736, + "grad_norm": 15.045985221862793, + "learning_rate": 9.722491221661918e-06, + "loss": 1.1797, + "step": 278, + "train/speech_entropy": 4.100140499502228, + "train/text_entropy": 1.653536968231201, + "train/token_acc": 0.2744227353463588 + }, + { + "epoch": 0.041302738712065135, + "grad_norm": 17.924699783325195, + "learning_rate": 9.719966230279973e-06, + "loss": 1.6523, + "step": 279, + "train/speech_entropy": 4.271382842279508, + "train/text_entropy": 1.5655494103064904, + "train/token_acc": 0.27559726962457337 + }, + { + "epoch": 0.04145077720207254, + "grad_norm": 14.684612274169922, + "learning_rate": 9.717430172528548e-06, + "loss": 1.6875, + "step": 280, + "train/speech_entropy": 3.894752919550173, + "train/text_entropy": 1.6684202988459347, + "train/token_acc": 0.2897406989853439 + }, + { + "epoch": 0.04159881569207994, + "grad_norm": 20.314056396484375, + "learning_rate": 9.714883055058156e-06, + "loss": 1.8125, + "step": 281, + "train/speech_entropy": 3.978392571382336, + "train/text_entropy": 1.6840273617235428, + "train/token_acc": 0.2771317829457364 + }, + { + "epoch": 0.041746854182087345, + "grad_norm": 17.620515823364258, + "learning_rate": 9.71232488454832e-06, + "loss": 1.1309, + "step": 282, + "train/speech_entropy": 3.538358699640579, + "train/text_entropy": 1.372458527727825, + "train/token_acc": 0.33214285714285713 + }, + { + "epoch": 0.041894892672094744, + "grad_norm": 17.724355697631836, + "learning_rate": 9.709755667707541e-06, + "loss": 1.8672, + "step": 283, + "train/speech_entropy": 4.203494497881848, + "train/text_entropy": 1.7304424467540922, + "train/token_acc": 0.26961926961926963 + }, + { + "epoch": 0.04204293116210215, + "grad_norm": 17.785001754760742, + "learning_rate": 9.707175411273292e-06, + "loss": 1.5859, + "step": 284, + "train/speech_entropy": 4.241005009279856, + "train/text_entropy": 1.603331847110037, + "train/token_acc": 0.29774127310061604 + }, + { + "epoch": 0.04219096965210955, + "grad_norm": 19.709165573120117, + "learning_rate": 9.704584122011994e-06, + "loss": 0.9668, + "step": 285, + "train/speech_entropy": 3.5526338973135316, + "train/text_entropy": 0.8772057003445095, + "train/token_acc": 0.29961089494163423 + }, + { + "epoch": 0.04233900814211695, + "grad_norm": 17.458072662353516, + "learning_rate": 9.701981806719001e-06, + "loss": 0.6465, + "step": 286, + "train/speech_entropy": 3.5410585702183734, + "train/text_entropy": 0.8251842298005757, + "train/token_acc": 0.3254901960784314 + }, + { + "epoch": 0.04248704663212435, + "grad_norm": 20.58736228942871, + "learning_rate": 9.699368472218583e-06, + "loss": 1.2012, + "step": 287, + "train/speech_entropy": 3.5507226058220933, + "train/text_entropy": 0.495786330279182, + "train/token_acc": 0.3508771929824561 + }, + { + "epoch": 0.04263508512213175, + "grad_norm": 17.70758628845215, + "learning_rate": 9.6967441253639e-06, + "loss": 1.1309, + "step": 288, + "train/speech_entropy": 4.424679032027893, + "train/text_entropy": 1.4082091305706952, + "train/token_acc": 0.2635933806146572 + }, + { + "epoch": 0.04278312361213916, + "grad_norm": 23.48309898376465, + "learning_rate": 9.694108773037004e-06, + "loss": 1.4531, + "step": 289, + "train/speech_entropy": 4.005645358540516, + "train/text_entropy": 1.4369301795959473, + "train/token_acc": 0.2578241430700447 + }, + { + "epoch": 0.04293116210214656, + "grad_norm": 15.74549674987793, + "learning_rate": 9.691462422148791e-06, + "loss": 1.6172, + "step": 290, + "train/speech_entropy": 4.013756747003612, + "train/text_entropy": 1.7317760877372805, + "train/token_acc": 0.28034682080924855 + }, + { + "epoch": 0.04307920059215396, + "grad_norm": 12.432733535766602, + "learning_rate": 9.688805079639013e-06, + "loss": 0.8633, + "step": 291, + "train/speech_entropy": 3.926314041604121, + "train/text_entropy": 0.9661376725381879, + "train/token_acc": 0.3088630259623993 + }, + { + "epoch": 0.04322723908216136, + "grad_norm": 22.07400131225586, + "learning_rate": 9.686136752476239e-06, + "loss": 1.7012, + "step": 292, + "train/speech_entropy": 3.732533794757357, + "train/text_entropy": 1.9697560628255208, + "train/token_acc": 0.274442538593482 + }, + { + "epoch": 0.04337527757216876, + "grad_norm": 16.636829376220703, + "learning_rate": 9.683457447657846e-06, + "loss": 1.1089, + "step": 293, + "train/speech_entropy": 3.5247612156927013, + "train/text_entropy": 1.059990644454956, + "train/token_acc": 0.3576965669988926 + }, + { + "epoch": 0.043523316062176166, + "grad_norm": 21.391611099243164, + "learning_rate": 9.680767172209998e-06, + "loss": 1.0938, + "step": 294, + "train/speech_entropy": 3.472386308666492, + "train/text_entropy": 0.7633560242191437, + "train/token_acc": 0.3328290468986384 + }, + { + "epoch": 0.043671354552183565, + "grad_norm": 20.90434455871582, + "learning_rate": 9.67806593318763e-06, + "loss": 1.9258, + "step": 295, + "train/speech_entropy": 4.33682936065051, + "train/text_entropy": 1.4054823294698193, + "train/token_acc": 0.26791277258566976 + }, + { + "epoch": 0.04381939304219097, + "grad_norm": 13.387065887451172, + "learning_rate": 9.675353737674426e-06, + "loss": 0.7085, + "step": 296, + "train/speech_entropy": 3.8234589739543634, + "train/text_entropy": 0.8172085270737157, + "train/token_acc": 0.3276492082825822 + }, + { + "epoch": 0.04396743153219837, + "grad_norm": 15.015064239501953, + "learning_rate": 9.672630592782803e-06, + "loss": 1.1699, + "step": 297, + "train/speech_entropy": 3.7650038775275734, + "train/text_entropy": 1.2972575803346271, + "train/token_acc": 0.2971360381861575 + }, + { + "epoch": 0.044115470022205776, + "grad_norm": 20.059852600097656, + "learning_rate": 9.66989650565389e-06, + "loss": 1.6621, + "step": 298, + "train/speech_entropy": 4.117213893581081, + "train/text_entropy": 1.6670186231090764, + "train/token_acc": 0.27989821882951654 + }, + { + "epoch": 0.044263508512213175, + "grad_norm": 12.110257148742676, + "learning_rate": 9.667151483457511e-06, + "loss": 1.0215, + "step": 299, + "train/speech_entropy": 3.634046184260671, + "train/text_entropy": 1.131915890086781, + "train/token_acc": 0.2987951807228916 + }, + { + "epoch": 0.04441154700222058, + "grad_norm": 16.438453674316406, + "learning_rate": 9.66439553339217e-06, + "loss": 2.043, + "step": 300, + "train/speech_entropy": 3.6804913194444446, + "train/text_entropy": 2.1146870429205786, + "train/token_acc": 0.27596439169139464 + }, + { + "epoch": 0.04455958549222798, + "grad_norm": 18.194894790649414, + "learning_rate": 9.661628662685023e-06, + "loss": 1.082, + "step": 301, + "train/speech_entropy": 3.6938611288931855, + "train/text_entropy": 0.6113059131811697, + "train/token_acc": 0.3342354533152909 + }, + { + "epoch": 0.04470762398223538, + "grad_norm": 7.469502925872803, + "learning_rate": 9.658850878591862e-06, + "loss": 0.3701, + "step": 302, + "train/speech_entropy": 3.441296990444059, + "train/text_entropy": 0.43859047410590796, + "train/token_acc": 0.36801541425818884 + }, + { + "epoch": 0.044855662472242784, + "grad_norm": 20.106548309326172, + "learning_rate": 9.656062188397107e-06, + "loss": 1.5547, + "step": 303, + "train/speech_entropy": 3.8832454550217075, + "train/text_entropy": 1.6368189631281673, + "train/token_acc": 0.264 + }, + { + "epoch": 0.04500370096225018, + "grad_norm": 23.97908592224121, + "learning_rate": 9.653262599413772e-06, + "loss": 1.2969, + "step": 304, + "train/speech_entropy": 4.027971835748335, + "train/text_entropy": 1.556134023164448, + "train/token_acc": 0.3187855787476281 + }, + { + "epoch": 0.04515173945225759, + "grad_norm": 12.855597496032715, + "learning_rate": 9.650452118983454e-06, + "loss": 0.6113, + "step": 305, + "train/speech_entropy": 3.6583450491879765, + "train/text_entropy": 0.6341314164419023, + "train/token_acc": 0.3709016393442623 + }, + { + "epoch": 0.04529977794226499, + "grad_norm": 15.229080200195312, + "learning_rate": 9.647630754476306e-06, + "loss": 0.6777, + "step": 306, + "train/speech_entropy": 3.665758822826629, + "train/text_entropy": 0.7585994050702975, + "train/token_acc": 0.339568345323741 + }, + { + "epoch": 0.045447816432272393, + "grad_norm": 11.222063064575195, + "learning_rate": 9.64479851329103e-06, + "loss": 0.8359, + "step": 307, + "train/speech_entropy": 4.036868600046395, + "train/text_entropy": 0.9220056444685035, + "train/token_acc": 0.3111495246326707 + }, + { + "epoch": 0.04559585492227979, + "grad_norm": 20.179698944091797, + "learning_rate": 9.641955402854848e-06, + "loss": 1.7109, + "step": 308, + "train/speech_entropy": 3.9584415106698057, + "train/text_entropy": 1.8834082174022295, + "train/token_acc": 0.25553319919517103 + }, + { + "epoch": 0.04574389341228719, + "grad_norm": 14.673447608947754, + "learning_rate": 9.639101430623488e-06, + "loss": 1.8125, + "step": 309, + "train/speech_entropy": 4.437142268906502, + "train/text_entropy": 1.760451184676381, + "train/token_acc": 0.267109634551495 + }, + { + "epoch": 0.0458919319022946, + "grad_norm": 19.40577507019043, + "learning_rate": 9.636236604081156e-06, + "loss": 1.3594, + "step": 310, + "train/speech_entropy": 3.988948780101734, + "train/text_entropy": 1.3189536040683962, + "train/token_acc": 0.28635851183765504 + }, + { + "epoch": 0.046039970392301996, + "grad_norm": 12.111400604248047, + "learning_rate": 9.633360930740528e-06, + "loss": 1.168, + "step": 311, + "train/speech_entropy": 3.9286784965451025, + "train/text_entropy": 1.43197698053324, + "train/token_acc": 0.3114215283483977 + }, + { + "epoch": 0.0461880088823094, + "grad_norm": 18.862504959106445, + "learning_rate": 9.63047441814272e-06, + "loss": 1.3828, + "step": 312, + "train/speech_entropy": 3.9745597091375613, + "train/text_entropy": 1.2444002955092996, + "train/token_acc": 0.3040473840078973 + }, + { + "epoch": 0.0463360473723168, + "grad_norm": 17.686325073242188, + "learning_rate": 9.627577073857275e-06, + "loss": 1.4102, + "step": 313, + "train/speech_entropy": 3.8882454776884483, + "train/text_entropy": 1.4374331171641093, + "train/token_acc": 0.2640918580375783 + }, + { + "epoch": 0.046484085862324206, + "grad_norm": 12.776817321777344, + "learning_rate": 9.624668905482144e-06, + "loss": 0.9014, + "step": 314, + "train/speech_entropy": 3.535795420860561, + "train/text_entropy": 0.9063388469607331, + "train/token_acc": 0.33240997229916897 + }, + { + "epoch": 0.046632124352331605, + "grad_norm": 16.098953247070312, + "learning_rate": 9.621749920643655e-06, + "loss": 1.1855, + "step": 315, + "train/speech_entropy": 3.963386545803512, + "train/text_entropy": 1.2589939718377101, + "train/token_acc": 0.2731326644370123 + }, + { + "epoch": 0.04678016284233901, + "grad_norm": 20.406204223632812, + "learning_rate": 9.618820126996509e-06, + "loss": 1.6328, + "step": 316, + "train/speech_entropy": 4.019427611764553, + "train/text_entropy": 1.6262857841722893, + "train/token_acc": 0.2713248638838475 + }, + { + "epoch": 0.04692820133234641, + "grad_norm": 18.90703773498535, + "learning_rate": 9.615879532223745e-06, + "loss": 1.293, + "step": 317, + "train/speech_entropy": 3.543457760027985, + "train/text_entropy": 1.3589464651571739, + "train/token_acc": 0.30317848410757947 + }, + { + "epoch": 0.04707623982235381, + "grad_norm": 18.88282012939453, + "learning_rate": 9.612928144036734e-06, + "loss": 1.2188, + "step": 318, + "train/speech_entropy": 3.5886256933858403, + "train/text_entropy": 1.1239292608997808, + "train/token_acc": 0.3300970873786408 + }, + { + "epoch": 0.047224278312361215, + "grad_norm": 24.864177703857422, + "learning_rate": 9.609965970175143e-06, + "loss": 1.6836, + "step": 319, + "train/speech_entropy": 3.727046409349763, + "train/text_entropy": 1.7130102724642366, + "train/token_acc": 0.2872093023255814 + }, + { + "epoch": 0.047372316802368614, + "grad_norm": 18.50275421142578, + "learning_rate": 9.606993018406931e-06, + "loss": 2.043, + "step": 320, + "train/speech_entropy": 3.749151896793221, + "train/text_entropy": 1.860822777999075, + "train/token_acc": 0.28596802841918295 + }, + { + "epoch": 0.04752035529237602, + "grad_norm": 18.926244735717773, + "learning_rate": 9.604009296528316e-06, + "loss": 1.2617, + "step": 321, + "train/speech_entropy": 3.5312605703343225, + "train/text_entropy": 1.3995937190643728, + "train/token_acc": 0.28074245939675174 + }, + { + "epoch": 0.04766839378238342, + "grad_norm": 16.863008499145508, + "learning_rate": 9.601014812363762e-06, + "loss": 1.0234, + "step": 322, + "train/speech_entropy": 3.3222850452769888, + "train/text_entropy": 1.2137949253485454, + "train/token_acc": 0.33278418451400327 + }, + { + "epoch": 0.047816432272390824, + "grad_norm": 15.309182167053223, + "learning_rate": 9.598009573765953e-06, + "loss": 1.5547, + "step": 323, + "train/speech_entropy": 4.036659347776836, + "train/text_entropy": 1.5672740447215545, + "train/token_acc": 0.2762854950115119 + }, + { + "epoch": 0.04796447076239822, + "grad_norm": 20.343008041381836, + "learning_rate": 9.594993588615778e-06, + "loss": 1.7188, + "step": 324, + "train/speech_entropy": 3.3297127402575084, + "train/text_entropy": 1.8590222012465167, + "train/token_acc": 0.2994652406417112 + }, + { + "epoch": 0.04811250925240563, + "grad_norm": 29.836759567260742, + "learning_rate": 9.591966864822307e-06, + "loss": 1.4863, + "step": 325, + "train/speech_entropy": 4.366499792525183, + "train/text_entropy": 1.717252524502306, + "train/token_acc": 0.3146067415730337 + }, + { + "epoch": 0.04826054774241303, + "grad_norm": 15.940043449401855, + "learning_rate": 9.588929410322767e-06, + "loss": 1.4004, + "step": 326, + "train/speech_entropy": 3.5815608999823447, + "train/text_entropy": 1.2979822917418047, + "train/token_acc": 0.34728506787330315 + }, + { + "epoch": 0.04840858623242043, + "grad_norm": 11.48173713684082, + "learning_rate": 9.58588123308253e-06, + "loss": 0.751, + "step": 327, + "train/speech_entropy": 3.4945831474823383, + "train/text_entropy": 0.8508941650390625, + "train/token_acc": 0.33270852858481725 + }, + { + "epoch": 0.04855662472242783, + "grad_norm": 13.351143836975098, + "learning_rate": 9.58282234109509e-06, + "loss": 0.6621, + "step": 328, + "train/speech_entropy": 3.34148652846631, + "train/text_entropy": 0.8707114266348885, + "train/token_acc": 0.35838926174496644 + }, + { + "epoch": 0.04870466321243523, + "grad_norm": 13.38762378692627, + "learning_rate": 9.579752742382026e-06, + "loss": 1.4922, + "step": 329, + "train/speech_entropy": 4.140726496664326, + "train/text_entropy": 1.5486877122407177, + "train/token_acc": 0.2944649446494465 + }, + { + "epoch": 0.04885270170244264, + "grad_norm": 15.602507591247559, + "learning_rate": 9.576672444993012e-06, + "loss": 1.4922, + "step": 330, + "train/speech_entropy": 4.351835093851874, + "train/text_entropy": 1.42110730599666, + "train/token_acc": 0.270791749833666 + }, + { + "epoch": 0.049000740192450036, + "grad_norm": 19.251911163330078, + "learning_rate": 9.573581457005762e-06, + "loss": 2.2891, + "step": 331, + "train/speech_entropy": 4.004342782660021, + "train/text_entropy": 1.8235979582134045, + "train/token_acc": 0.270956816257409 + }, + { + "epoch": 0.04914877868245744, + "grad_norm": 18.15921974182129, + "learning_rate": 9.570479786526035e-06, + "loss": 1.5234, + "step": 332, + "train/speech_entropy": 3.894077383753765, + "train/text_entropy": 1.6983766343858506, + "train/token_acc": 0.2891089108910891 + }, + { + "epoch": 0.04929681717246484, + "grad_norm": 12.17459774017334, + "learning_rate": 9.567367441687598e-06, + "loss": 0.9668, + "step": 333, + "train/speech_entropy": 3.8062257799198354, + "train/text_entropy": 1.092972764691103, + "train/token_acc": 0.3155473781048758 + }, + { + "epoch": 0.04944485566247224, + "grad_norm": 16.3585262298584, + "learning_rate": 9.564244430652214e-06, + "loss": 1.4688, + "step": 334, + "train/speech_entropy": 3.8860130901790333, + "train/text_entropy": 1.5423684950491683, + "train/token_acc": 0.2731164383561644 + }, + { + "epoch": 0.049592894152479645, + "grad_norm": 13.707807540893555, + "learning_rate": 9.561110761609615e-06, + "loss": 1.3828, + "step": 335, + "train/speech_entropy": 3.8127334742870147, + "train/text_entropy": 1.1878525888172313, + "train/token_acc": 0.29293544457978077 + }, + { + "epoch": 0.049740932642487044, + "grad_norm": 17.578752517700195, + "learning_rate": 9.557966442777484e-06, + "loss": 0.5903, + "step": 336, + "train/speech_entropy": 3.4656287260642085, + "train/text_entropy": 0.5729161441916286, + "train/token_acc": 0.3351851851851852 + }, + { + "epoch": 0.04988897113249445, + "grad_norm": 10.289787292480469, + "learning_rate": 9.554811482401429e-06, + "loss": 0.8486, + "step": 337, + "train/speech_entropy": 3.7937017531622024, + "train/text_entropy": 0.9621707794328803, + "train/token_acc": 0.3342776203966006 + }, + { + "epoch": 0.05003700962250185, + "grad_norm": 16.654788970947266, + "learning_rate": 9.551645888754964e-06, + "loss": 1.0664, + "step": 338, + "train/speech_entropy": 3.736707844393134, + "train/text_entropy": 0.8811663382393973, + "train/token_acc": 0.31986899563318777 + }, + { + "epoch": 0.050185048112509255, + "grad_norm": 17.571788787841797, + "learning_rate": 9.548469670139495e-06, + "loss": 1.418, + "step": 339, + "train/speech_entropy": 4.000675602360173, + "train/text_entropy": 1.3334751793401158, + "train/token_acc": 0.2808333333333333 + }, + { + "epoch": 0.050333086602516654, + "grad_norm": 16.912715911865234, + "learning_rate": 9.54528283488428e-06, + "loss": 1.2305, + "step": 340, + "train/speech_entropy": 4.066639387941534, + "train/text_entropy": 1.237627401584532, + "train/token_acc": 0.2649746192893401 + }, + { + "epoch": 0.05048112509252406, + "grad_norm": 12.165294647216797, + "learning_rate": 9.542085391346423e-06, + "loss": 1.3535, + "step": 341, + "train/speech_entropy": 4.077474062200862, + "train/text_entropy": 1.1605145714499734, + "train/token_acc": 0.2944241792600313 + }, + { + "epoch": 0.05062916358253146, + "grad_norm": 10.49349308013916, + "learning_rate": 9.538877347910851e-06, + "loss": 1.0215, + "step": 342, + "train/speech_entropy": 4.304815011261009, + "train/text_entropy": 1.0949258113252944, + "train/token_acc": 0.29723502304147464 + }, + { + "epoch": 0.05077720207253886, + "grad_norm": 22.682371139526367, + "learning_rate": 9.53565871299028e-06, + "loss": 1.3906, + "step": 343, + "train/speech_entropy": 3.975278760566086, + "train/text_entropy": 1.2715102578992041, + "train/token_acc": 0.27563025210084036 + }, + { + "epoch": 0.05092524056254626, + "grad_norm": 16.118776321411133, + "learning_rate": 9.532429495025206e-06, + "loss": 1.7031, + "step": 344, + "train/speech_entropy": 4.49583400114802, + "train/text_entropy": 1.5590954447901526, + "train/token_acc": 0.24465558194774348 + }, + { + "epoch": 0.05107327905255366, + "grad_norm": 18.501604080200195, + "learning_rate": 9.529189702483878e-06, + "loss": 1.4648, + "step": 345, + "train/speech_entropy": 4.161865958827893, + "train/text_entropy": 1.430374331590606, + "train/token_acc": 0.27088305489260145 + }, + { + "epoch": 0.05122131754256107, + "grad_norm": 16.425060272216797, + "learning_rate": 9.525939343862273e-06, + "loss": 1.1875, + "step": 346, + "train/speech_entropy": 4.317041737770193, + "train/text_entropy": 1.525474117648217, + "train/token_acc": 0.27981220657276995 + }, + { + "epoch": 0.05136935603256847, + "grad_norm": 14.821573257446289, + "learning_rate": 9.522678427684075e-06, + "loss": 1.5469, + "step": 347, + "train/speech_entropy": 4.23058025552967, + "train/text_entropy": 1.5739091007585027, + "train/token_acc": 0.2824858757062147 + }, + { + "epoch": 0.05151739452257587, + "grad_norm": 18.80592155456543, + "learning_rate": 9.519406962500662e-06, + "loss": 1.918, + "step": 348, + "train/speech_entropy": 3.9915906131628787, + "train/text_entropy": 1.8147432907767918, + "train/token_acc": 0.2487611496531219 + }, + { + "epoch": 0.05166543301258327, + "grad_norm": 13.45146369934082, + "learning_rate": 9.516124956891067e-06, + "loss": 1.4727, + "step": 349, + "train/speech_entropy": 3.8759440941288577, + "train/text_entropy": 1.553466229633771, + "train/token_acc": 0.3085889570552147 + }, + { + "epoch": 0.05181347150259067, + "grad_norm": 13.573195457458496, + "learning_rate": 9.512832419461966e-06, + "loss": 1.042, + "step": 350, + "train/speech_entropy": 3.783827574508765, + "train/text_entropy": 1.0824903600356157, + "train/token_acc": 0.30640243902439024 + }, + { + "epoch": 0.051961509992598076, + "grad_norm": 16.640907287597656, + "learning_rate": 9.509529358847655e-06, + "loss": 1.248, + "step": 351, + "train/speech_entropy": 4.49945772292656, + "train/text_entropy": 1.256184746237362, + "train/token_acc": 0.2911255411255411 + }, + { + "epoch": 0.052109548482605475, + "grad_norm": 21.803049087524414, + "learning_rate": 9.50621578371003e-06, + "loss": 1.5195, + "step": 352, + "train/speech_entropy": 4.098921185080339, + "train/text_entropy": 1.6613884641413104, + "train/token_acc": 0.29769736842105265 + }, + { + "epoch": 0.05225758697261288, + "grad_norm": 15.359241485595703, + "learning_rate": 9.502891702738547e-06, + "loss": 1.2617, + "step": 353, + "train/speech_entropy": 4.006237505744485, + "train/text_entropy": 1.2243934716592288, + "train/token_acc": 0.3051506316812439 + }, + { + "epoch": 0.05240562546262028, + "grad_norm": 14.910136222839355, + "learning_rate": 9.49955712465023e-06, + "loss": 1.1953, + "step": 354, + "train/speech_entropy": 4.107170794977391, + "train/text_entropy": 1.3081586061410568, + "train/token_acc": 0.3186022610483042 + }, + { + "epoch": 0.052553663952627686, + "grad_norm": 19.3306884765625, + "learning_rate": 9.496212058189615e-06, + "loss": 1.2773, + "step": 355, + "train/speech_entropy": 3.5217608590411325, + "train/text_entropy": 1.2767965382543103, + "train/token_acc": 0.3561643835616438 + }, + { + "epoch": 0.052701702442635084, + "grad_norm": 18.126216888427734, + "learning_rate": 9.49285651212875e-06, + "loss": 1.6445, + "step": 356, + "train/speech_entropy": 4.019580102612525, + "train/text_entropy": 1.7688343327219893, + "train/token_acc": 0.27121464226289516 + }, + { + "epoch": 0.05284974093264249, + "grad_norm": 23.65995216369629, + "learning_rate": 9.489490495267166e-06, + "loss": 1.9727, + "step": 357, + "train/speech_entropy": 4.192455575980392, + "train/text_entropy": 1.8291608174641927, + "train/token_acc": 0.25396825396825395 + }, + { + "epoch": 0.05299777942264989, + "grad_norm": 20.590171813964844, + "learning_rate": 9.486114016431847e-06, + "loss": 1.7852, + "step": 358, + "train/speech_entropy": 4.182641145224883, + "train/text_entropy": 1.4050475258863608, + "train/token_acc": 0.2690815006468305 + }, + { + "epoch": 0.05314581791265729, + "grad_norm": 13.392560958862305, + "learning_rate": 9.482727084477215e-06, + "loss": 1.4414, + "step": 359, + "train/speech_entropy": 4.217694876592688, + "train/text_entropy": 1.5415660493051258, + "train/token_acc": 0.25997782705099776 + }, + { + "epoch": 0.053293856402664694, + "grad_norm": 16.14320945739746, + "learning_rate": 9.479329708285107e-06, + "loss": 1.6328, + "step": 360, + "train/speech_entropy": 4.651463870344491, + "train/text_entropy": 1.638199987490315, + "train/token_acc": 0.27521367521367524 + }, + { + "epoch": 0.05344189489267209, + "grad_norm": 14.811869621276855, + "learning_rate": 9.475921896764745e-06, + "loss": 1.832, + "step": 361, + "train/speech_entropy": 4.3905315416430994, + "train/text_entropy": 1.9300993208451704, + "train/token_acc": 0.2746376811594203 + }, + { + "epoch": 0.0535899333826795, + "grad_norm": 11.389338493347168, + "learning_rate": 9.472503658852714e-06, + "loss": 0.7773, + "step": 362, + "train/speech_entropy": 3.495472925917393, + "train/text_entropy": 0.8847226339673239, + "train/token_acc": 0.3572192513368984 + }, + { + "epoch": 0.0537379718726869, + "grad_norm": 14.477021217346191, + "learning_rate": 9.469075003512948e-06, + "loss": 0.9121, + "step": 363, + "train/speech_entropy": 4.05490152315815, + "train/text_entropy": 0.8362895764802631, + "train/token_acc": 0.30612244897959184 + }, + { + "epoch": 0.0538860103626943, + "grad_norm": 21.533723831176758, + "learning_rate": 9.465635939736696e-06, + "loss": 1.2578, + "step": 364, + "train/speech_entropy": 3.672877054510367, + "train/text_entropy": 1.3917103252191654, + "train/token_acc": 0.30434782608695654 + }, + { + "epoch": 0.0540340488527017, + "grad_norm": 17.79517364501953, + "learning_rate": 9.462186476542499e-06, + "loss": 1.5312, + "step": 365, + "train/speech_entropy": 4.050807969252951, + "train/text_entropy": 1.5786299544773745, + "train/token_acc": 0.2998859749144812 + }, + { + "epoch": 0.0541820873427091, + "grad_norm": 15.197225570678711, + "learning_rate": 9.458726622976176e-06, + "loss": 1.1992, + "step": 366, + "train/speech_entropy": 3.8941771252320545, + "train/text_entropy": 0.9949442298666945, + "train/token_acc": 0.33727810650887574 + }, + { + "epoch": 0.05433012583271651, + "grad_norm": 14.182482719421387, + "learning_rate": 9.455256388110787e-06, + "loss": 0.9561, + "step": 367, + "train/speech_entropy": 3.7672226266251356, + "train/text_entropy": 0.9367487885231196, + "train/token_acc": 0.3233743409490334 + }, + { + "epoch": 0.054478164322723906, + "grad_norm": 16.644710540771484, + "learning_rate": 9.451775781046619e-06, + "loss": 1.7227, + "step": 368, + "train/speech_entropy": 4.1992724609375, + "train/text_entropy": 1.4589952382531186, + "train/token_acc": 0.271461716937355 + }, + { + "epoch": 0.05462620281273131, + "grad_norm": 14.510842323303223, + "learning_rate": 9.448284810911156e-06, + "loss": 1.0156, + "step": 369, + "train/speech_entropy": 3.621873465401786, + "train/text_entropy": 0.9782926177978516, + "train/token_acc": 0.3144186046511628 + }, + { + "epoch": 0.05477424130273871, + "grad_norm": 17.43585777282715, + "learning_rate": 9.444783486859066e-06, + "loss": 1.1777, + "step": 370, + "train/speech_entropy": 4.038216537084335, + "train/text_entropy": 1.2570979388463015, + "train/token_acc": 0.29800629590766003 + }, + { + "epoch": 0.054922279792746116, + "grad_norm": 18.93077278137207, + "learning_rate": 9.441271818072157e-06, + "loss": 1.7695, + "step": 371, + "train/speech_entropy": 3.8823455028044873, + "train/text_entropy": 1.7491591994826858, + "train/token_acc": 0.28290155440414505 + }, + { + "epoch": 0.055070318282753515, + "grad_norm": 17.80628204345703, + "learning_rate": 9.437749813759376e-06, + "loss": 1.1465, + "step": 372, + "train/speech_entropy": 4.164985646919241, + "train/text_entropy": 1.2488103299527555, + "train/token_acc": 0.26054054054054054 + }, + { + "epoch": 0.05521835677276092, + "grad_norm": 10.866501808166504, + "learning_rate": 9.434217483156763e-06, + "loss": 0.8257, + "step": 373, + "train/speech_entropy": 3.8703394980326187, + "train/text_entropy": 1.0280954471001258, + "train/token_acc": 0.30162810625535563 + }, + { + "epoch": 0.05536639526276832, + "grad_norm": 12.122952461242676, + "learning_rate": 9.43067483552745e-06, + "loss": 0.9082, + "step": 374, + "train/speech_entropy": 3.882535399087847, + "train/text_entropy": 0.9356230999010181, + "train/token_acc": 0.3324697754749568 + }, + { + "epoch": 0.05551443375277572, + "grad_norm": 16.710630416870117, + "learning_rate": 9.427121880161611e-06, + "loss": 1.7656, + "step": 375, + "train/speech_entropy": 4.086209647700472, + "train/text_entropy": 1.85628100409024, + "train/token_acc": 0.25109306683322924 + }, + { + "epoch": 0.055662472242783124, + "grad_norm": 17.929353713989258, + "learning_rate": 9.423558626376462e-06, + "loss": 1.2266, + "step": 376, + "train/speech_entropy": 3.9476475545804797, + "train/text_entropy": 1.2148130529067096, + "train/token_acc": 0.3162962962962963 + }, + { + "epoch": 0.05581051073279052, + "grad_norm": 13.366183280944824, + "learning_rate": 9.419985083516219e-06, + "loss": 1.0684, + "step": 377, + "train/speech_entropy": 3.8299678291861468, + "train/text_entropy": 1.1855822258594773, + "train/token_acc": 0.3273871983210913 + }, + { + "epoch": 0.05595854922279793, + "grad_norm": 14.759517669677734, + "learning_rate": 9.416401260952082e-06, + "loss": 1.0723, + "step": 378, + "train/speech_entropy": 3.9178788321358815, + "train/text_entropy": 1.0414700482342694, + "train/token_acc": 0.31388564760793464 + }, + { + "epoch": 0.05610658771280533, + "grad_norm": 16.859634399414062, + "learning_rate": 9.412807168082204e-06, + "loss": 1.707, + "step": 379, + "train/speech_entropy": 4.234972949441709, + "train/text_entropy": 1.680050702162192, + "train/token_acc": 0.26905417814508725 + }, + { + "epoch": 0.056254626202812734, + "grad_norm": 27.79043197631836, + "learning_rate": 9.40920281433168e-06, + "loss": 0.9707, + "step": 380, + "train/speech_entropy": 3.0588759422302245, + "train/text_entropy": 0.715592784266318, + "train/token_acc": 0.4188861985472155 + }, + { + "epoch": 0.05640266469282013, + "grad_norm": 16.910398483276367, + "learning_rate": 9.405588209152505e-06, + "loss": 1.75, + "step": 381, + "train/speech_entropy": 3.9274195451518565, + "train/text_entropy": 1.6835876734910813, + "train/token_acc": 0.2624633431085044 + }, + { + "epoch": 0.05655070318282754, + "grad_norm": 13.504857063293457, + "learning_rate": 9.401963362023562e-06, + "loss": 0.918, + "step": 382, + "train/speech_entropy": 3.4911585587721605, + "train/text_entropy": 1.0052558662965125, + "train/token_acc": 0.3333333333333333 + }, + { + "epoch": 0.05669874167283494, + "grad_norm": 8.609955787658691, + "learning_rate": 9.398328282450584e-06, + "loss": 0.6162, + "step": 383, + "train/speech_entropy": 3.2622760690588306, + "train/text_entropy": 0.5865477950484664, + "train/token_acc": 0.3908668730650155 + }, + { + "epoch": 0.056846780162842336, + "grad_norm": 16.3585147857666, + "learning_rate": 9.394682979966151e-06, + "loss": 1.4609, + "step": 384, + "train/speech_entropy": 3.8632372407634694, + "train/text_entropy": 1.3705744013056025, + "train/token_acc": 0.3012345679012346 + }, + { + "epoch": 0.05699481865284974, + "grad_norm": 15.71584415435791, + "learning_rate": 9.39102746412964e-06, + "loss": 1.1445, + "step": 385, + "train/speech_entropy": 4.002458805412189, + "train/text_entropy": 1.345979649087657, + "train/token_acc": 0.27247191011235955 + }, + { + "epoch": 0.05714285714285714, + "grad_norm": 80.12581634521484, + "learning_rate": 9.387361744527213e-06, + "loss": 3.2969, + "step": 386, + "train/speech_entropy": 4.266656233857682, + "train/text_entropy": 2.009849234715404, + "train/token_acc": 0.26649746192893403 + }, + { + "epoch": 0.05729089563286455, + "grad_norm": 12.6445894241333, + "learning_rate": 9.383685830771797e-06, + "loss": 0.6152, + "step": 387, + "train/speech_entropy": 3.386664552115352, + "train/text_entropy": 0.4996581325641257, + "train/token_acc": 0.35359116022099446 + }, + { + "epoch": 0.057438934122871946, + "grad_norm": 18.76934051513672, + "learning_rate": 9.379999732503043e-06, + "loss": 1.5781, + "step": 388, + "train/speech_entropy": 3.482846429436085, + "train/text_entropy": 1.7482650264616935, + "train/token_acc": 0.26961770623742454 + }, + { + "epoch": 0.05758697261287935, + "grad_norm": 15.572599411010742, + "learning_rate": 9.376303459387318e-06, + "loss": 1.4336, + "step": 389, + "train/speech_entropy": 4.186446325145262, + "train/text_entropy": 1.49657114306886, + "train/token_acc": 0.2610789980732177 + }, + { + "epoch": 0.05773501110288675, + "grad_norm": 17.984249114990234, + "learning_rate": 9.372597021117664e-06, + "loss": 1.3691, + "step": 390, + "train/speech_entropy": 4.138888888888889, + "train/text_entropy": 1.9436667793675473, + "train/token_acc": 0.3057671381936888 + }, + { + "epoch": 0.05788304959289415, + "grad_norm": 17.009374618530273, + "learning_rate": 9.368880427413785e-06, + "loss": 1.4297, + "step": 391, + "train/speech_entropy": 3.8023107242450473, + "train/text_entropy": 1.5546336027742162, + "train/token_acc": 0.271689497716895 + }, + { + "epoch": 0.058031088082901555, + "grad_norm": 13.057161331176758, + "learning_rate": 9.365153688022015e-06, + "loss": 0.6992, + "step": 392, + "train/speech_entropy": 3.441517746719123, + "train/text_entropy": 0.6347603480021159, + "train/token_acc": 0.37561455260570303 + }, + { + "epoch": 0.058179126572908954, + "grad_norm": 15.388545036315918, + "learning_rate": 9.36141681271529e-06, + "loss": 1.2617, + "step": 393, + "train/speech_entropy": 3.7946880742123255, + "train/text_entropy": 1.3163204552992336, + "train/token_acc": 0.3194263363754889 + }, + { + "epoch": 0.05832716506291636, + "grad_norm": 19.325843811035156, + "learning_rate": 9.357669811293138e-06, + "loss": 0.8008, + "step": 394, + "train/speech_entropy": 3.5290065812475886, + "train/text_entropy": 1.1719104641086453, + "train/token_acc": 0.3228915662650602 + }, + { + "epoch": 0.05847520355292376, + "grad_norm": 14.279224395751953, + "learning_rate": 9.353912693581624e-06, + "loss": 1.3242, + "step": 395, + "train/speech_entropy": 4.1242238343544795, + "train/text_entropy": 1.2667826948494747, + "train/token_acc": 0.28197226502311246 + }, + { + "epoch": 0.058623242042931165, + "grad_norm": 17.407459259033203, + "learning_rate": 9.350145469433356e-06, + "loss": 1.3984, + "step": 396, + "train/speech_entropy": 3.8863335113159474, + "train/text_entropy": 1.5871782302856445, + "train/token_acc": 0.2857142857142857 + }, + { + "epoch": 0.05877128053293856, + "grad_norm": 14.546235084533691, + "learning_rate": 9.34636814872744e-06, + "loss": 1.0303, + "step": 397, + "train/speech_entropy": 3.6516376825696915, + "train/text_entropy": 0.8962108332936357, + "train/token_acc": 0.32633587786259544 + }, + { + "epoch": 0.05891931902294597, + "grad_norm": 16.61185073852539, + "learning_rate": 9.34258074136946e-06, + "loss": 1.625, + "step": 398, + "train/speech_entropy": 3.884474190848214, + "train/text_entropy": 1.7074964912376593, + "train/token_acc": 0.2741635687732342 + }, + { + "epoch": 0.05906735751295337, + "grad_norm": 13.129621505737305, + "learning_rate": 9.338783257291446e-06, + "loss": 0.5498, + "step": 399, + "train/speech_entropy": 3.8010139830957965, + "train/text_entropy": 0.72541529154606, + "train/token_acc": 0.3241830065359477 + }, + { + "epoch": 0.05921539600296077, + "grad_norm": 15.321759223937988, + "learning_rate": 9.334975706451863e-06, + "loss": 0.6953, + "step": 400, + "train/speech_entropy": 3.4530755860886706, + "train/text_entropy": 0.7044858115059989, + "train/token_acc": 0.34478371501272265 + }, + { + "epoch": 0.05936343449296817, + "grad_norm": 15.654309272766113, + "learning_rate": 9.331158098835565e-06, + "loss": 0.7842, + "step": 401, + "train/speech_entropy": 3.9507287184163293, + "train/text_entropy": 0.6726488240559896, + "train/token_acc": 0.3341375150784077 + }, + { + "epoch": 0.05951147298297557, + "grad_norm": 14.116585731506348, + "learning_rate": 9.327330444453783e-06, + "loss": 0.8652, + "step": 402, + "train/speech_entropy": 3.591190199331477, + "train/text_entropy": 0.849443997737866, + "train/token_acc": 0.3376494023904382 + }, + { + "epoch": 0.05965951147298298, + "grad_norm": 13.59544849395752, + "learning_rate": 9.3234927533441e-06, + "loss": 0.8188, + "step": 403, + "train/speech_entropy": 3.815022301227108, + "train/text_entropy": 1.2925039834349694, + "train/token_acc": 0.2845303867403315 + }, + { + "epoch": 0.059807549962990376, + "grad_norm": 16.554109573364258, + "learning_rate": 9.319645035570408e-06, + "loss": 1.8047, + "step": 404, + "train/speech_entropy": 4.328583459149955, + "train/text_entropy": 1.4821685791015624, + "train/token_acc": 0.23949889462048637 + }, + { + "epoch": 0.05995558845299778, + "grad_norm": 13.44148063659668, + "learning_rate": 9.315787301222896e-06, + "loss": 0.8242, + "step": 405, + "train/speech_entropy": 3.484231606012658, + "train/text_entropy": 0.7788093313986425, + "train/token_acc": 0.329557157569516 + }, + { + "epoch": 0.06010362694300518, + "grad_norm": 14.645378112792969, + "learning_rate": 9.31191956041803e-06, + "loss": 0.8711, + "step": 406, + "train/speech_entropy": 3.5395026779689607, + "train/text_entropy": 0.8630743778113162, + "train/token_acc": 0.33774834437086093 + }, + { + "epoch": 0.06025166543301258, + "grad_norm": 13.841647148132324, + "learning_rate": 9.308041823298503e-06, + "loss": 1.1484, + "step": 407, + "train/speech_entropy": 3.729479450708391, + "train/text_entropy": 1.1371671924032427, + "train/token_acc": 0.3177004538577912 + }, + { + "epoch": 0.060399703923019986, + "grad_norm": 7.602635383605957, + "learning_rate": 9.304154100033233e-06, + "loss": 0.2925, + "step": 408, + "train/speech_entropy": 3.105920927403337, + "train/text_entropy": 0.3913320367986506, + "train/token_acc": 0.41456953642384103 + }, + { + "epoch": 0.060547742413027385, + "grad_norm": 14.15632152557373, + "learning_rate": 9.300256400817316e-06, + "loss": 0.8691, + "step": 409, + "train/speech_entropy": 3.389437751808669, + "train/text_entropy": 0.68570378512272, + "train/token_acc": 0.3598360655737705 + }, + { + "epoch": 0.06069578090303479, + "grad_norm": 10.060515403747559, + "learning_rate": 9.29634873587202e-06, + "loss": 0.4941, + "step": 410, + "train/speech_entropy": 3.1480151890921513, + "train/text_entropy": 0.46444528281283215, + "train/token_acc": 0.34926958831341304 + }, + { + "epoch": 0.06084381939304219, + "grad_norm": 21.57944679260254, + "learning_rate": 9.292431115444739e-06, + "loss": 1.1641, + "step": 411, + "train/speech_entropy": 3.9728412376101323, + "train/text_entropy": 1.099474572831658, + "train/token_acc": 0.3054892601431981 + }, + { + "epoch": 0.060991857883049595, + "grad_norm": 23.48649787902832, + "learning_rate": 9.288503549808973e-06, + "loss": 2.4844, + "step": 412, + "train/speech_entropy": 3.789411959682938, + "train/text_entropy": 2.235616257611443, + "train/token_acc": 0.26198830409356727 + }, + { + "epoch": 0.061139896373056994, + "grad_norm": 11.90766716003418, + "learning_rate": 9.284566049264305e-06, + "loss": 1.082, + "step": 413, + "train/speech_entropy": 4.118344776471997, + "train/text_entropy": 1.158545188071943, + "train/token_acc": 0.30501672240802674 + }, + { + "epoch": 0.0612879348630644, + "grad_norm": 16.589860916137695, + "learning_rate": 9.280618624136374e-06, + "loss": 1.2148, + "step": 414, + "train/speech_entropy": 3.495948153409091, + "train/text_entropy": 1.4249804328469668, + "train/token_acc": 0.2733668341708543 + }, + { + "epoch": 0.0614359733530718, + "grad_norm": 16.198219299316406, + "learning_rate": 9.276661284776841e-06, + "loss": 1.4414, + "step": 415, + "train/speech_entropy": 4.087979786582835, + "train/text_entropy": 1.3250554044481735, + "train/token_acc": 0.2898550724637681 + }, + { + "epoch": 0.0615840118430792, + "grad_norm": 18.345064163208008, + "learning_rate": 9.272694041563367e-06, + "loss": 1.6719, + "step": 416, + "train/speech_entropy": 3.8689716569767443, + "train/text_entropy": 1.4280266819230045, + "train/token_acc": 0.2930513595166163 + }, + { + "epoch": 0.061732050333086604, + "grad_norm": 9.600738525390625, + "learning_rate": 9.268716904899586e-06, + "loss": 0.8418, + "step": 417, + "train/speech_entropy": 3.848293696384804, + "train/text_entropy": 0.808533601982649, + "train/token_acc": 0.32674490426189007 + }, + { + "epoch": 0.061880088823094, + "grad_norm": 24.526947021484375, + "learning_rate": 9.264729885215072e-06, + "loss": 1.5098, + "step": 418, + "train/speech_entropy": 3.6621481220373946, + "train/text_entropy": 1.0279118464543269, + "train/token_acc": 0.32550335570469796 + }, + { + "epoch": 0.06202812731310141, + "grad_norm": 17.582677841186523, + "learning_rate": 9.260732992965323e-06, + "loss": 1.6094, + "step": 419, + "train/speech_entropy": 3.9189602628474285, + "train/text_entropy": 1.5012848844233246, + "train/token_acc": 0.3072463768115942 + }, + { + "epoch": 0.06217616580310881, + "grad_norm": 18.502689361572266, + "learning_rate": 9.256726238631721e-06, + "loss": 1.4082, + "step": 420, + "train/speech_entropy": 3.823209236004601, + "train/text_entropy": 1.3983127414780174, + "train/token_acc": 0.2721598002496879 + }, + { + "epoch": 0.06232420429311621, + "grad_norm": 11.170029640197754, + "learning_rate": 9.252709632721514e-06, + "loss": 0.7646, + "step": 421, + "train/speech_entropy": 3.502154405927754, + "train/text_entropy": 0.7636753340898934, + "train/token_acc": 0.35609756097560974 + }, + { + "epoch": 0.06247224278312361, + "grad_norm": 15.928250312805176, + "learning_rate": 9.248683185767778e-06, + "loss": 0.6704, + "step": 422, + "train/speech_entropy": 4.033078941932091, + "train/text_entropy": 0.7690501537137818, + "train/token_acc": 0.3448275862068966 + }, + { + "epoch": 0.06262028127313102, + "grad_norm": 16.50928497314453, + "learning_rate": 9.244646908329407e-06, + "loss": 1.6562, + "step": 423, + "train/speech_entropy": 4.1207674490334885, + "train/text_entropy": 1.6051323172279393, + "train/token_acc": 0.27257799671592775 + }, + { + "epoch": 0.06276831976313842, + "grad_norm": 11.946236610412598, + "learning_rate": 9.240600810991064e-06, + "loss": 0.8271, + "step": 424, + "train/speech_entropy": 3.7211519371542106, + "train/text_entropy": 0.8598040748624766, + "train/token_acc": 0.319375 + }, + { + "epoch": 0.06291635825314582, + "grad_norm": 18.490110397338867, + "learning_rate": 9.236544904363169e-06, + "loss": 1.7617, + "step": 425, + "train/speech_entropy": 3.9306830147367817, + "train/text_entropy": 1.6282497424984452, + "train/token_acc": 0.2826300294406281 + }, + { + "epoch": 0.06306439674315321, + "grad_norm": 18.528566360473633, + "learning_rate": 9.232479199081863e-06, + "loss": 1.3008, + "step": 426, + "train/speech_entropy": 3.484402431530899, + "train/text_entropy": 1.344400634765625, + "train/token_acc": 0.3141122913505311 + }, + { + "epoch": 0.06321243523316063, + "grad_norm": 14.132893562316895, + "learning_rate": 9.228403705808987e-06, + "loss": 1.3438, + "step": 427, + "train/speech_entropy": 4.1038605174379486, + "train/text_entropy": 1.457655791642601, + "train/token_acc": 0.27332892124420916 + }, + { + "epoch": 0.06336047372316803, + "grad_norm": 16.272506713867188, + "learning_rate": 9.224318435232044e-06, + "loss": 1.248, + "step": 428, + "train/speech_entropy": 3.8648889478841517, + "train/text_entropy": 1.1124905741273476, + "train/token_acc": 0.3007380073800738 + }, + { + "epoch": 0.06350851221317542, + "grad_norm": 16.327468872070312, + "learning_rate": 9.22022339806418e-06, + "loss": 1.1904, + "step": 429, + "train/speech_entropy": 3.5753861738173343, + "train/text_entropy": 0.9783692111858743, + "train/token_acc": 0.3366778149386845 + }, + { + "epoch": 0.06365655070318282, + "grad_norm": 19.715543746948242, + "learning_rate": 9.216118605044154e-06, + "loss": 1.1055, + "step": 430, + "train/speech_entropy": 4.132263689060882, + "train/text_entropy": 1.029812424271195, + "train/token_acc": 0.27411167512690354 + }, + { + "epoch": 0.06380458919319022, + "grad_norm": 15.982611656188965, + "learning_rate": 9.212004066936308e-06, + "loss": 0.9688, + "step": 431, + "train/speech_entropy": 4.227069978381977, + "train/text_entropy": 0.9197511440370141, + "train/token_acc": 0.3111753371868979 + }, + { + "epoch": 0.06395262768319764, + "grad_norm": 10.36747932434082, + "learning_rate": 9.207879794530536e-06, + "loss": 0.8418, + "step": 432, + "train/speech_entropy": 4.253231036822328, + "train/text_entropy": 0.8900966708078033, + "train/token_acc": 0.33726415094339623 + }, + { + "epoch": 0.06410066617320503, + "grad_norm": 12.396132469177246, + "learning_rate": 9.203745798642264e-06, + "loss": 0.7656, + "step": 433, + "train/speech_entropy": 3.7915443150861656, + "train/text_entropy": 0.8020531336466471, + "train/token_acc": 0.3132075471698113 + }, + { + "epoch": 0.06424870466321243, + "grad_norm": 14.157469749450684, + "learning_rate": 9.199602090112411e-06, + "loss": 0.748, + "step": 434, + "train/speech_entropy": 3.6356201171875, + "train/text_entropy": 0.538723562755724, + "train/token_acc": 0.3608108108108108 + }, + { + "epoch": 0.06439674315321983, + "grad_norm": 14.575669288635254, + "learning_rate": 9.195448679807374e-06, + "loss": 1.2031, + "step": 435, + "train/speech_entropy": 3.8507388233307185, + "train/text_entropy": 1.144551650337551, + "train/token_acc": 0.32519422863485015 + }, + { + "epoch": 0.06454478164322724, + "grad_norm": 15.852694511413574, + "learning_rate": 9.191285578618984e-06, + "loss": 1.0918, + "step": 436, + "train/speech_entropy": 4.2211172263481656, + "train/text_entropy": 0.9940753672857704, + "train/token_acc": 0.28153153153153154 + }, + { + "epoch": 0.06469282013323464, + "grad_norm": 15.746209144592285, + "learning_rate": 9.18711279746449e-06, + "loss": 1.2441, + "step": 437, + "train/speech_entropy": 4.178741455078125, + "train/text_entropy": 1.2949724021864815, + "train/token_acc": 0.29220023282887075 + }, + { + "epoch": 0.06484085862324204, + "grad_norm": 17.495019912719727, + "learning_rate": 9.182930347286522e-06, + "loss": 1.0957, + "step": 438, + "train/speech_entropy": 3.854247783224217, + "train/text_entropy": 0.9459253294855101, + "train/token_acc": 0.30517241379310345 + }, + { + "epoch": 0.06498889711324944, + "grad_norm": 18.13072967529297, + "learning_rate": 9.17873823905307e-06, + "loss": 0.8965, + "step": 439, + "train/speech_entropy": 3.7910376282694944, + "train/text_entropy": 0.8831490509269774, + "train/token_acc": 0.32073011734028684 + }, + { + "epoch": 0.06513693560325684, + "grad_norm": 14.092267990112305, + "learning_rate": 9.174536483757449e-06, + "loss": 0.9883, + "step": 440, + "train/speech_entropy": 3.8763385151707848, + "train/text_entropy": 0.8643340837387812, + "train/token_acc": 0.3142523364485981 + }, + { + "epoch": 0.06528497409326425, + "grad_norm": 9.640019416809082, + "learning_rate": 9.170325092418273e-06, + "loss": 0.9199, + "step": 441, + "train/speech_entropy": 4.378295313569473, + "train/text_entropy": 1.060416713571686, + "train/token_acc": 0.2870575221238938 + }, + { + "epoch": 0.06543301258327165, + "grad_norm": 22.408161163330078, + "learning_rate": 9.166104076079423e-06, + "loss": 1.8281, + "step": 442, + "train/speech_entropy": 3.865175320892098, + "train/text_entropy": 1.765076265990279, + "train/token_acc": 0.2827763496143959 + }, + { + "epoch": 0.06558105107327905, + "grad_norm": 16.376502990722656, + "learning_rate": 9.161873445810024e-06, + "loss": 1.25, + "step": 443, + "train/speech_entropy": 4.107552814953433, + "train/text_entropy": 1.0771735547536827, + "train/token_acc": 0.27096114519427406 + }, + { + "epoch": 0.06572908956328645, + "grad_norm": 21.03876304626465, + "learning_rate": 9.15763321270441e-06, + "loss": 1.8398, + "step": 444, + "train/speech_entropy": 4.716183796781035, + "train/text_entropy": 2.0107216153826033, + "train/token_acc": 0.26038781163434904 + }, + { + "epoch": 0.06587712805329386, + "grad_norm": 17.252883911132812, + "learning_rate": 9.153383387882097e-06, + "loss": 0.918, + "step": 445, + "train/speech_entropy": 4.113401437195156, + "train/text_entropy": 1.128805160522461, + "train/token_acc": 0.2844175491679274 + }, + { + "epoch": 0.06602516654330126, + "grad_norm": 11.993241310119629, + "learning_rate": 9.149123982487757e-06, + "loss": 0.8896, + "step": 446, + "train/speech_entropy": 3.9176248877939552, + "train/text_entropy": 0.9280488368443081, + "train/token_acc": 0.3267838676318511 + }, + { + "epoch": 0.06617320503330866, + "grad_norm": 18.995695114135742, + "learning_rate": 9.144855007691184e-06, + "loss": 1.6562, + "step": 447, + "train/speech_entropy": 4.1146576370018115, + "train/text_entropy": 1.4514680151845896, + "train/token_acc": 0.26238532110091745 + }, + { + "epoch": 0.06632124352331606, + "grad_norm": 21.631206512451172, + "learning_rate": 9.140576474687263e-06, + "loss": 1.3047, + "step": 448, + "train/speech_entropy": 4.044248330472696, + "train/text_entropy": 1.174551415950694, + "train/token_acc": 0.27755102040816326 + }, + { + "epoch": 0.06646928201332346, + "grad_norm": 12.475444793701172, + "learning_rate": 9.136288394695955e-06, + "loss": 0.7451, + "step": 449, + "train/speech_entropy": 3.7544179570371066, + "train/text_entropy": 0.86334480737385, + "train/token_acc": 0.33577533577533575 + }, + { + "epoch": 0.06661732050333087, + "grad_norm": 15.248793601989746, + "learning_rate": 9.131990778962241e-06, + "loss": 1.3633, + "step": 450, + "train/speech_entropy": 4.465963419873158, + "train/text_entropy": 1.3151138889732543, + "train/token_acc": 0.25481209899175067 + }, + { + "epoch": 0.06676535899333827, + "grad_norm": 13.648004531860352, + "learning_rate": 9.127683638756127e-06, + "loss": 1.125, + "step": 451, + "train/speech_entropy": 3.962617549189815, + "train/text_entropy": 1.1109502344955633, + "train/token_acc": 0.30687830687830686 + }, + { + "epoch": 0.06691339748334567, + "grad_norm": 13.778231620788574, + "learning_rate": 9.123366985372577e-06, + "loss": 1.043, + "step": 452, + "train/speech_entropy": 3.8904131308369254, + "train/text_entropy": 1.2060458579759918, + "train/token_acc": 0.32151029748283755 + }, + { + "epoch": 0.06706143597335307, + "grad_norm": 19.72351837158203, + "learning_rate": 9.119040830131517e-06, + "loss": 2.2148, + "step": 453, + "train/speech_entropy": 4.12447661181051, + "train/text_entropy": 1.9938721963025015, + "train/token_acc": 0.2674933569530558 + }, + { + "epoch": 0.06720947446336048, + "grad_norm": 13.864856719970703, + "learning_rate": 9.114705184377785e-06, + "loss": 1.0244, + "step": 454, + "train/speech_entropy": 3.8473298987563775, + "train/text_entropy": 0.9800884428478422, + "train/token_acc": 0.31776556776556775 + }, + { + "epoch": 0.06735751295336788, + "grad_norm": 12.72614860534668, + "learning_rate": 9.110360059481102e-06, + "loss": 1.6523, + "step": 455, + "train/speech_entropy": 4.499845482514196, + "train/text_entropy": 1.5708294022554732, + "train/token_acc": 0.24357034795763993 + }, + { + "epoch": 0.06750555144337528, + "grad_norm": 12.65306568145752, + "learning_rate": 9.106005466836055e-06, + "loss": 0.6641, + "step": 456, + "train/speech_entropy": 3.714398064713847, + "train/text_entropy": 0.5763227568199886, + "train/token_acc": 0.33903133903133903 + }, + { + "epoch": 0.06765358993338268, + "grad_norm": 13.291062355041504, + "learning_rate": 9.101641417862056e-06, + "loss": 0.4773, + "step": 457, + "train/speech_entropy": 3.509392960194014, + "train/text_entropy": 0.4089463632318038, + "train/token_acc": 0.35853658536585364 + }, + { + "epoch": 0.06780162842339008, + "grad_norm": 11.350860595703125, + "learning_rate": 9.097267924003312e-06, + "loss": 0.8633, + "step": 458, + "train/speech_entropy": 3.853514478201657, + "train/text_entropy": 0.8221077558729384, + "train/token_acc": 0.3127641589180051 + }, + { + "epoch": 0.06794966691339749, + "grad_norm": 13.777132987976074, + "learning_rate": 9.092884996728804e-06, + "loss": 1.2715, + "step": 459, + "train/speech_entropy": 3.87706836704183, + "train/text_entropy": 1.1700196329752603, + "train/token_acc": 0.2942122186495177 + }, + { + "epoch": 0.06809770540340489, + "grad_norm": 7.372880935668945, + "learning_rate": 9.088492647532244e-06, + "loss": 0.5366, + "step": 460, + "train/speech_entropy": 4.023942451180315, + "train/text_entropy": 0.6262725545214368, + "train/token_acc": 0.3180887372013652 + }, + { + "epoch": 0.06824574389341229, + "grad_norm": 18.38614273071289, + "learning_rate": 9.08409088793206e-06, + "loss": 2.043, + "step": 461, + "train/speech_entropy": 4.167337804668398, + "train/text_entropy": 1.8589077088891006, + "train/token_acc": 0.26588465298142716 + }, + { + "epoch": 0.06839378238341969, + "grad_norm": 19.063812255859375, + "learning_rate": 9.079679729471349e-06, + "loss": 1.0625, + "step": 462, + "train/speech_entropy": 4.053629126382848, + "train/text_entropy": 1.0361866501142394, + "train/token_acc": 0.3195121951219512 + }, + { + "epoch": 0.0685418208734271, + "grad_norm": 14.880696296691895, + "learning_rate": 9.075259183717862e-06, + "loss": 1.4688, + "step": 463, + "train/speech_entropy": 4.261360942453578, + "train/text_entropy": 1.7488382590585063, + "train/token_acc": 0.2763385146804836 + }, + { + "epoch": 0.0686898593634345, + "grad_norm": 11.949872970581055, + "learning_rate": 9.070829262263966e-06, + "loss": 0.8184, + "step": 464, + "train/speech_entropy": 3.8219811903691925, + "train/text_entropy": 0.9695733974780544, + "train/token_acc": 0.31620271234832265 + }, + { + "epoch": 0.0688378978534419, + "grad_norm": 12.123211860656738, + "learning_rate": 9.066389976726612e-06, + "loss": 0.7842, + "step": 465, + "train/speech_entropy": 3.5141891879535345, + "train/text_entropy": 0.8056801745766088, + "train/token_acc": 0.33796296296296297 + }, + { + "epoch": 0.0689859363434493, + "grad_norm": 18.32499122619629, + "learning_rate": 9.061941338747311e-06, + "loss": 1.6523, + "step": 466, + "train/speech_entropy": 4.2475315237643825, + "train/text_entropy": 2.134874897003174, + "train/token_acc": 0.24394463667820068 + }, + { + "epoch": 0.0691339748334567, + "grad_norm": 10.569002151489258, + "learning_rate": 9.057483359992094e-06, + "loss": 0.8666, + "step": 467, + "train/speech_entropy": 3.5684960336802503, + "train/text_entropy": 1.285348993586743, + "train/token_acc": 0.3045977011494253 + }, + { + "epoch": 0.0692820133234641, + "grad_norm": 12.807770729064941, + "learning_rate": 9.053016052151492e-06, + "loss": 1.1152, + "step": 468, + "train/speech_entropy": 4.196477770229468, + "train/text_entropy": 1.283655249554178, + "train/token_acc": 0.28180354267310787 + }, + { + "epoch": 0.0694300518134715, + "grad_norm": 11.492125511169434, + "learning_rate": 9.0485394269405e-06, + "loss": 0.8965, + "step": 469, + "train/speech_entropy": 4.3606008429276315, + "train/text_entropy": 1.2131153972828446, + "train/token_acc": 0.2731201382886776 + }, + { + "epoch": 0.0695780903034789, + "grad_norm": 13.572197914123535, + "learning_rate": 9.044053496098546e-06, + "loss": 0.9648, + "step": 470, + "train/speech_entropy": 3.395906902316969, + "train/text_entropy": 0.8084875583648682, + "train/token_acc": 0.3465503568596352 + }, + { + "epoch": 0.0697261287934863, + "grad_norm": 13.30854320526123, + "learning_rate": 9.039558271389459e-06, + "loss": 1.0234, + "step": 471, + "train/speech_entropy": 4.4597597974582674, + "train/text_entropy": 1.046462805201319, + "train/token_acc": 0.29831932773109243 + }, + { + "epoch": 0.0698741672834937, + "grad_norm": 12.895074844360352, + "learning_rate": 9.035053764601443e-06, + "loss": 0.6777, + "step": 472, + "train/speech_entropy": 3.56500458299068, + "train/text_entropy": 0.6595083319622538, + "train/token_acc": 0.3329938900203666 + }, + { + "epoch": 0.07002220577350111, + "grad_norm": 19.367008209228516, + "learning_rate": 9.03053998754704e-06, + "loss": 1.0879, + "step": 473, + "train/speech_entropy": 3.7721053657181765, + "train/text_entropy": 1.0374204847547743, + "train/token_acc": 0.28517110266159695 + }, + { + "epoch": 0.07017024426350851, + "grad_norm": 21.11648178100586, + "learning_rate": 9.026016952063107e-06, + "loss": 1.2539, + "step": 474, + "train/speech_entropy": 3.322666481170316, + "train/text_entropy": 1.1331722300539735, + "train/token_acc": 0.31066176470588236 + }, + { + "epoch": 0.07031828275351591, + "grad_norm": 33.6579704284668, + "learning_rate": 9.021484670010774e-06, + "loss": 1.9297, + "step": 475, + "train/speech_entropy": 4.1298822854261115, + "train/text_entropy": 1.3528162746106165, + "train/token_acc": 0.2582496413199426 + }, + { + "epoch": 0.07046632124352331, + "grad_norm": 14.402887344360352, + "learning_rate": 9.016943153275426e-06, + "loss": 1.3496, + "step": 476, + "train/speech_entropy": 4.341152239860373, + "train/text_entropy": 1.7030254154768674, + "train/token_acc": 0.29566694987255737 + }, + { + "epoch": 0.07061435973353072, + "grad_norm": 36.494117736816406, + "learning_rate": 9.012392413766659e-06, + "loss": 1.7598, + "step": 477, + "train/speech_entropy": 3.6346261843688805, + "train/text_entropy": 0.9092870133646419, + "train/token_acc": 0.3177966101694915 + }, + { + "epoch": 0.07076239822353812, + "grad_norm": 17.92113494873047, + "learning_rate": 9.007832463418256e-06, + "loss": 1.4043, + "step": 478, + "train/speech_entropy": 3.8092915852864584, + "train/text_entropy": 1.5579580613124517, + "train/token_acc": 0.28345498783454987 + }, + { + "epoch": 0.07091043671354552, + "grad_norm": 18.227624893188477, + "learning_rate": 9.003263314188157e-06, + "loss": 0.9512, + "step": 479, + "train/speech_entropy": 3.5146808561731557, + "train/text_entropy": 0.9398903165544782, + "train/token_acc": 0.30933333333333335 + }, + { + "epoch": 0.07105847520355292, + "grad_norm": 19.395797729492188, + "learning_rate": 8.998684978058423e-06, + "loss": 1.6992, + "step": 480, + "train/speech_entropy": 3.9538453090545755, + "train/text_entropy": 1.6499553298950196, + "train/token_acc": 0.2764227642276423 + }, + { + "epoch": 0.07120651369356032, + "grad_norm": 15.26407241821289, + "learning_rate": 8.994097467035206e-06, + "loss": 0.8213, + "step": 481, + "train/speech_entropy": 3.6330126953125, + "train/text_entropy": 1.241144819462553, + "train/token_acc": 0.2774566473988439 + }, + { + "epoch": 0.07135455218356773, + "grad_norm": 8.209039688110352, + "learning_rate": 8.989500793148719e-06, + "loss": 0.5361, + "step": 482, + "train/speech_entropy": 3.892996913441913, + "train/text_entropy": 0.6768008495898957, + "train/token_acc": 0.33074204946996466 + }, + { + "epoch": 0.07150259067357513, + "grad_norm": 21.573678970336914, + "learning_rate": 8.984894968453204e-06, + "loss": 1.0957, + "step": 483, + "train/speech_entropy": 3.3425811559029754, + "train/text_entropy": 1.2364111607617672, + "train/token_acc": 0.3296296296296296 + }, + { + "epoch": 0.07165062916358253, + "grad_norm": 16.805391311645508, + "learning_rate": 8.980280005026898e-06, + "loss": 1.2461, + "step": 484, + "train/speech_entropy": 4.341613053156177, + "train/text_entropy": 1.3527763328742033, + "train/token_acc": 0.26020892687559355 + }, + { + "epoch": 0.07179866765358993, + "grad_norm": 15.85704517364502, + "learning_rate": 8.975655914972006e-06, + "loss": 0.9414, + "step": 485, + "train/speech_entropy": 3.8520560306293277, + "train/text_entropy": 1.1827355311467098, + "train/token_acc": 0.26715686274509803 + }, + { + "epoch": 0.07194670614359734, + "grad_norm": 12.448310852050781, + "learning_rate": 8.971022710414666e-06, + "loss": 0.748, + "step": 486, + "train/speech_entropy": 3.9527071506857014, + "train/text_entropy": 0.9724467895315878, + "train/token_acc": 0.2892271662763466 + }, + { + "epoch": 0.07209474463360474, + "grad_norm": 16.446603775024414, + "learning_rate": 8.966380403504913e-06, + "loss": 1.9844, + "step": 487, + "train/speech_entropy": 3.9380766354712544, + "train/text_entropy": 1.7807188253798243, + "train/token_acc": 0.3005836575875486 + }, + { + "epoch": 0.07224278312361214, + "grad_norm": 10.692554473876953, + "learning_rate": 8.96172900641666e-06, + "loss": 1.1562, + "step": 488, + "train/speech_entropy": 4.872623486882091, + "train/text_entropy": 1.2867594494367594, + "train/token_acc": 0.26393337604099937 + }, + { + "epoch": 0.07239082161361954, + "grad_norm": 13.80526065826416, + "learning_rate": 8.95706853134765e-06, + "loss": 1.3359, + "step": 489, + "train/speech_entropy": 4.1235696002246005, + "train/text_entropy": 1.3510493430770745, + "train/token_acc": 0.2725274725274725 + }, + { + "epoch": 0.07253886010362694, + "grad_norm": 12.18487548828125, + "learning_rate": 8.952398990519438e-06, + "loss": 0.8379, + "step": 490, + "train/speech_entropy": 3.6974146830389825, + "train/text_entropy": 1.0920788004428525, + "train/token_acc": 0.3341260404280618 + }, + { + "epoch": 0.07268689859363435, + "grad_norm": 14.447022438049316, + "learning_rate": 8.947720396177349e-06, + "loss": 1.0352, + "step": 491, + "train/speech_entropy": 3.7090456894933497, + "train/text_entropy": 0.9573147174009343, + "train/token_acc": 0.31273408239700373 + }, + { + "epoch": 0.07283493708364175, + "grad_norm": 9.370771408081055, + "learning_rate": 8.943032760590453e-06, + "loss": 0.4971, + "step": 492, + "train/speech_entropy": 3.6406890197604675, + "train/text_entropy": 0.5039771847608613, + "train/token_acc": 0.36363636363636365 + }, + { + "epoch": 0.07298297557364915, + "grad_norm": 13.408470153808594, + "learning_rate": 8.938336096051528e-06, + "loss": 0.9888, + "step": 493, + "train/speech_entropy": 3.788172978940217, + "train/text_entropy": 0.8287703146146038, + "train/token_acc": 0.3359193173002327 + }, + { + "epoch": 0.07313101406365655, + "grad_norm": 15.878983497619629, + "learning_rate": 8.933630414877026e-06, + "loss": 1.5742, + "step": 494, + "train/speech_entropy": 3.8839737640188234, + "train/text_entropy": 1.4019462311885271, + "train/token_acc": 0.2823529411764706 + }, + { + "epoch": 0.07327905255366396, + "grad_norm": 19.640844345092773, + "learning_rate": 8.92891572940705e-06, + "loss": 1.0566, + "step": 495, + "train/speech_entropy": 3.5954699394030447, + "train/text_entropy": 0.8296411192262327, + "train/token_acc": 0.3380462724935733 + }, + { + "epoch": 0.07342709104367136, + "grad_norm": 13.050369262695312, + "learning_rate": 8.92419205200531e-06, + "loss": 1.2031, + "step": 496, + "train/speech_entropy": 4.30749611142534, + "train/text_entropy": 1.25269135386355, + "train/token_acc": 0.26701183431952663 + }, + { + "epoch": 0.07357512953367876, + "grad_norm": 13.293437957763672, + "learning_rate": 8.919459395059105e-06, + "loss": 0.9824, + "step": 497, + "train/speech_entropy": 3.895206630629981, + "train/text_entropy": 0.8634498204167181, + "train/token_acc": 0.2955017301038062 + }, + { + "epoch": 0.07372316802368616, + "grad_norm": 14.183831214904785, + "learning_rate": 8.914717770979271e-06, + "loss": 0.9824, + "step": 498, + "train/speech_entropy": 3.3290845174211037, + "train/text_entropy": 0.9634284275334056, + "train/token_acc": 0.32989690721649484 + }, + { + "epoch": 0.07387120651369355, + "grad_norm": 12.558869361877441, + "learning_rate": 8.909967192200166e-06, + "loss": 1.0361, + "step": 499, + "train/speech_entropy": 3.819638836398165, + "train/text_entropy": 0.9088602379454134, + "train/token_acc": 0.33620689655172414 + }, + { + "epoch": 0.07401924500370097, + "grad_norm": 16.83846092224121, + "learning_rate": 8.905207671179629e-06, + "loss": 1.7188, + "step": 500, + "train/speech_entropy": 4.079481863027401, + "train/text_entropy": 1.6663915460759944, + "train/token_acc": 0.2685851318944844 + }, + { + "epoch": 0.07416728349370837, + "grad_norm": 19.759334564208984, + "learning_rate": 8.900439220398949e-06, + "loss": 1.1836, + "step": 501, + "train/speech_entropy": 4.178812349759616, + "train/text_entropy": 1.0542904246937146, + "train/token_acc": 0.2672634271099744 + }, + { + "epoch": 0.07431532198371577, + "grad_norm": 13.927163124084473, + "learning_rate": 8.895661852362833e-06, + "loss": 0.6191, + "step": 502, + "train/speech_entropy": 3.432428334325953, + "train/text_entropy": 0.4690578042007074, + "train/token_acc": 0.375750300120048 + }, + { + "epoch": 0.07446336047372316, + "grad_norm": 14.344385147094727, + "learning_rate": 8.890875579599372e-06, + "loss": 1.457, + "step": 503, + "train/speech_entropy": 4.614656282258822, + "train/text_entropy": 1.405457403966252, + "train/token_acc": 0.2616596002422774 + }, + { + "epoch": 0.07461139896373056, + "grad_norm": 13.587658882141113, + "learning_rate": 8.886080414660007e-06, + "loss": 1.1133, + "step": 504, + "train/speech_entropy": 4.02698229152183, + "train/text_entropy": 1.10773420825447, + "train/token_acc": 0.2946020128087832 + }, + { + "epoch": 0.07475943745373798, + "grad_norm": 15.6091890335083, + "learning_rate": 8.8812763701195e-06, + "loss": 1.3687, + "step": 505, + "train/speech_entropy": 3.768699119500863, + "train/text_entropy": 0.9917803783806003, + "train/token_acc": 0.31419939577039274 + }, + { + "epoch": 0.07490747594374537, + "grad_norm": 20.533388137817383, + "learning_rate": 8.876463458575895e-06, + "loss": 1.5078, + "step": 506, + "train/speech_entropy": 3.8560321940596634, + "train/text_entropy": 1.4571215485873288, + "train/token_acc": 0.26270136307311026 + }, + { + "epoch": 0.07505551443375277, + "grad_norm": 13.162017822265625, + "learning_rate": 8.871641692650497e-06, + "loss": 1.0068, + "step": 507, + "train/speech_entropy": 4.046586111853039, + "train/text_entropy": 0.8270480473836263, + "train/token_acc": 0.2985305491105955 + }, + { + "epoch": 0.07520355292376017, + "grad_norm": 16.01091766357422, + "learning_rate": 8.866811084987818e-06, + "loss": 1.2305, + "step": 508, + "train/speech_entropy": 3.571202399822654, + "train/text_entropy": 1.215443721433886, + "train/token_acc": 0.3032886723507917 + }, + { + "epoch": 0.07535159141376759, + "grad_norm": 11.824396133422852, + "learning_rate": 8.86197164825557e-06, + "loss": 0.8184, + "step": 509, + "train/speech_entropy": 3.8785399076628093, + "train/text_entropy": 0.7840452194213867, + "train/token_acc": 0.3359442993907746 + }, + { + "epoch": 0.07549962990377498, + "grad_norm": 17.503271102905273, + "learning_rate": 8.857123395144609e-06, + "loss": 1.8984, + "step": 510, + "train/speech_entropy": 4.591875854239454, + "train/text_entropy": 1.532980729992263, + "train/token_acc": 0.2310315430520034 + }, + { + "epoch": 0.07564766839378238, + "grad_norm": 18.934886932373047, + "learning_rate": 8.85226633836891e-06, + "loss": 1.4609, + "step": 511, + "train/speech_entropy": 3.8587091619318183, + "train/text_entropy": 1.2560776654411765, + "train/token_acc": 0.2692307692307692 + }, + { + "epoch": 0.07579570688378978, + "grad_norm": 16.99107551574707, + "learning_rate": 8.847400490665547e-06, + "loss": 0.7676, + "step": 512, + "train/speech_entropy": 3.7049386940616613, + "train/text_entropy": 0.7233727914315683, + "train/token_acc": 0.3472622478386167 + }, + { + "epoch": 0.07594374537379718, + "grad_norm": 21.70642852783203, + "learning_rate": 8.842525864794627e-06, + "loss": 1.6055, + "step": 513, + "train/speech_entropy": 3.97141340302258, + "train/text_entropy": 1.447987030029297, + "train/token_acc": 0.24199743918053776 + }, + { + "epoch": 0.0760917838638046, + "grad_norm": 16.974424362182617, + "learning_rate": 8.8376424735393e-06, + "loss": 1.0352, + "step": 514, + "train/speech_entropy": 3.785834176199777, + "train/text_entropy": 0.8006357064276385, + "train/token_acc": 0.3075196408529742 + }, + { + "epoch": 0.07623982235381199, + "grad_norm": 18.564197540283203, + "learning_rate": 8.83275032970568e-06, + "loss": 1.6133, + "step": 515, + "train/speech_entropy": 4.06409750010586, + "train/text_entropy": 1.5307355858813758, + "train/token_acc": 0.2642543859649123 + }, + { + "epoch": 0.07638786084381939, + "grad_norm": 15.445816040039062, + "learning_rate": 8.827849446122849e-06, + "loss": 0.7812, + "step": 516, + "train/speech_entropy": 3.9221117267500882, + "train/text_entropy": 0.9982819140536113, + "train/token_acc": 0.33260393873085337 + }, + { + "epoch": 0.07653589933382679, + "grad_norm": 19.21135711669922, + "learning_rate": 8.822939835642802e-06, + "loss": 1.9844, + "step": 517, + "train/speech_entropy": 4.281517028808594, + "train/text_entropy": 1.9842396525593546, + "train/token_acc": 0.24939467312348668 + }, + { + "epoch": 0.0766839378238342, + "grad_norm": 16.902448654174805, + "learning_rate": 8.818021511140423e-06, + "loss": 0.8828, + "step": 518, + "train/speech_entropy": 3.903167447680354, + "train/text_entropy": 1.0427087319863808, + "train/token_acc": 0.2688821752265861 + }, + { + "epoch": 0.0768319763138416, + "grad_norm": 16.147809982299805, + "learning_rate": 8.813094485513436e-06, + "loss": 1.4297, + "step": 519, + "train/speech_entropy": 4.25172174348304, + "train/text_entropy": 1.5285166802807388, + "train/token_acc": 0.2746071133167907 + }, + { + "epoch": 0.076980014803849, + "grad_norm": 22.531160354614258, + "learning_rate": 8.808158771682402e-06, + "loss": 2.043, + "step": 520, + "train/speech_entropy": 4.520983014787946, + "train/text_entropy": 1.8992438360091743, + "train/token_acc": 0.22393162393162394 + }, + { + "epoch": 0.0771280532938564, + "grad_norm": 19.334774017333984, + "learning_rate": 8.80321438259065e-06, + "loss": 1.7695, + "step": 521, + "train/speech_entropy": 4.245598183386522, + "train/text_entropy": 1.9540206207626167, + "train/token_acc": 0.27370689655172414 + }, + { + "epoch": 0.0772760917838638, + "grad_norm": 12.835897445678711, + "learning_rate": 8.798261331204262e-06, + "loss": 0.8789, + "step": 522, + "train/speech_entropy": 3.9963409860834016, + "train/text_entropy": 1.1328113478452422, + "train/token_acc": 0.28888888888888886 + }, + { + "epoch": 0.07742413027387121, + "grad_norm": 11.724824905395508, + "learning_rate": 8.793299630512042e-06, + "loss": 0.793, + "step": 523, + "train/speech_entropy": 3.6468494049582234, + "train/text_entropy": 0.7171758322351908, + "train/token_acc": 0.3181818181818182 + }, + { + "epoch": 0.07757216876387861, + "grad_norm": 17.860843658447266, + "learning_rate": 8.788329293525468e-06, + "loss": 1.1797, + "step": 524, + "train/speech_entropy": 4.119564855883935, + "train/text_entropy": 1.456304441179548, + "train/token_acc": 0.2685643564356436 + }, + { + "epoch": 0.07772020725388601, + "grad_norm": 30.536922454833984, + "learning_rate": 8.783350333278674e-06, + "loss": 1.9219, + "step": 525, + "train/speech_entropy": 4.150353540348101, + "train/text_entropy": 1.7753201597225592, + "train/token_acc": 0.2591743119266055 + }, + { + "epoch": 0.07786824574389341, + "grad_norm": 16.948368072509766, + "learning_rate": 8.778362762828396e-06, + "loss": 1.1777, + "step": 526, + "train/speech_entropy": 4.1223223027907645, + "train/text_entropy": 1.4244024684663452, + "train/token_acc": 0.27593582887700535 + }, + { + "epoch": 0.07801628423390082, + "grad_norm": 13.427288055419922, + "learning_rate": 8.773366595253962e-06, + "loss": 1.0176, + "step": 527, + "train/speech_entropy": 4.106922392897064, + "train/text_entropy": 0.9660020555768695, + "train/token_acc": 0.32552404438964244 + }, + { + "epoch": 0.07816432272390822, + "grad_norm": 16.753015518188477, + "learning_rate": 8.768361843657235e-06, + "loss": 1.5, + "step": 528, + "train/speech_entropy": 4.346920021103181, + "train/text_entropy": 1.5753684289676626, + "train/token_acc": 0.25673400673400676 + }, + { + "epoch": 0.07831236121391562, + "grad_norm": 36.823974609375, + "learning_rate": 8.763348521162595e-06, + "loss": 2.2266, + "step": 529, + "train/speech_entropy": 4.2660647414800685, + "train/text_entropy": 1.7477947093822337, + "train/token_acc": 0.30171073094867806 + }, + { + "epoch": 0.07846039970392302, + "grad_norm": 21.07876968383789, + "learning_rate": 8.758326640916898e-06, + "loss": 1.7148, + "step": 530, + "train/speech_entropy": 4.020288054065356, + "train/text_entropy": 1.6381896047881155, + "train/token_acc": 0.2706766917293233 + }, + { + "epoch": 0.07860843819393042, + "grad_norm": 14.993850708007812, + "learning_rate": 8.753296216089433e-06, + "loss": 1.3047, + "step": 531, + "train/speech_entropy": 4.388588247275608, + "train/text_entropy": 2.0141701937642336, + "train/token_acc": 0.24864130434782608 + }, + { + "epoch": 0.07875647668393783, + "grad_norm": 18.84642791748047, + "learning_rate": 8.74825725987191e-06, + "loss": 0.6992, + "step": 532, + "train/speech_entropy": 3.465694952867702, + "train/text_entropy": 0.4077477564006091, + "train/token_acc": 0.3709677419354839 + }, + { + "epoch": 0.07890451517394523, + "grad_norm": 16.65995216369629, + "learning_rate": 8.7432097854784e-06, + "loss": 1.4727, + "step": 533, + "train/speech_entropy": 3.3967108123419054, + "train/text_entropy": 1.431201171875, + "train/token_acc": 0.29395604395604397 + }, + { + "epoch": 0.07905255366395263, + "grad_norm": 15.181427001953125, + "learning_rate": 8.738153806145315e-06, + "loss": 1.125, + "step": 534, + "train/speech_entropy": 4.58429157718086, + "train/text_entropy": 1.2883427877561742, + "train/token_acc": 0.2894190871369295 + }, + { + "epoch": 0.07920059215396003, + "grad_norm": 17.080245971679688, + "learning_rate": 8.733089335131373e-06, + "loss": 1.5469, + "step": 535, + "train/speech_entropy": 4.290552506710123, + "train/text_entropy": 1.603005173627068, + "train/token_acc": 0.2416243654822335 + }, + { + "epoch": 0.07934863064396744, + "grad_norm": 18.179922103881836, + "learning_rate": 8.728016385717561e-06, + "loss": 1.2773, + "step": 536, + "train/speech_entropy": 3.8416826997823383, + "train/text_entropy": 1.2735785812628073, + "train/token_acc": 0.29379310344827586 + }, + { + "epoch": 0.07949666913397484, + "grad_norm": 16.47119140625, + "learning_rate": 8.722934971207095e-06, + "loss": 1.4688, + "step": 537, + "train/speech_entropy": 4.53660561801133, + "train/text_entropy": 1.2965378497713176, + "train/token_acc": 0.26749760306807285 + }, + { + "epoch": 0.07964470762398224, + "grad_norm": 16.0432186126709, + "learning_rate": 8.717845104925393e-06, + "loss": 1.5273, + "step": 538, + "train/speech_entropy": 4.047390687643184, + "train/text_entropy": 1.584147190403294, + "train/token_acc": 0.2618147448015123 + }, + { + "epoch": 0.07979274611398963, + "grad_norm": 14.702898979187012, + "learning_rate": 8.712746800220036e-06, + "loss": 1.1836, + "step": 539, + "train/speech_entropy": 3.9335024626358694, + "train/text_entropy": 1.2783600676293467, + "train/token_acc": 0.28380782918149466 + }, + { + "epoch": 0.07994078460399703, + "grad_norm": 14.740190505981445, + "learning_rate": 8.707640070460733e-06, + "loss": 1.6328, + "step": 540, + "train/speech_entropy": 4.239224676251805, + "train/text_entropy": 1.6034780115514369, + "train/token_acc": 0.28437917222963954 + }, + { + "epoch": 0.08008882309400445, + "grad_norm": 14.600983619689941, + "learning_rate": 8.702524929039286e-06, + "loss": 1.248, + "step": 541, + "train/speech_entropy": 3.8774793965368284, + "train/text_entropy": 0.9633526424709842, + "train/token_acc": 0.30634715025906734 + }, + { + "epoch": 0.08023686158401185, + "grad_norm": 27.743534088134766, + "learning_rate": 8.697401389369562e-06, + "loss": 1.0742, + "step": 542, + "train/speech_entropy": 4.137421846937859, + "train/text_entropy": 1.1560229110717772, + "train/token_acc": 0.2822429906542056 + }, + { + "epoch": 0.08038490007401924, + "grad_norm": 16.845672607421875, + "learning_rate": 8.692269464887441e-06, + "loss": 1.9453, + "step": 543, + "train/speech_entropy": 4.361698739275426, + "train/text_entropy": 1.7727830599894565, + "train/token_acc": 0.24509803921568626 + }, + { + "epoch": 0.08053293856402664, + "grad_norm": 19.556306838989258, + "learning_rate": 8.6871291690508e-06, + "loss": 1.5234, + "step": 544, + "train/speech_entropy": 4.161226074667126, + "train/text_entropy": 1.5327414467696745, + "train/token_acc": 0.22734375 + }, + { + "epoch": 0.08068097705403404, + "grad_norm": 10.327014923095703, + "learning_rate": 8.681980515339464e-06, + "loss": 0.9141, + "step": 545, + "train/speech_entropy": 3.9948813154523175, + "train/text_entropy": 0.9550801496289695, + "train/token_acc": 0.3277428371767994 + }, + { + "epoch": 0.08082901554404145, + "grad_norm": 14.367766380310059, + "learning_rate": 8.676823517255178e-06, + "loss": 1.5, + "step": 546, + "train/speech_entropy": 3.9539815923219086, + "train/text_entropy": 1.6734522103164602, + "train/token_acc": 0.2781168265039233 + }, + { + "epoch": 0.08097705403404885, + "grad_norm": 19.006240844726562, + "learning_rate": 8.67165818832157e-06, + "loss": 1.4414, + "step": 547, + "train/speech_entropy": 3.9709170118876083, + "train/text_entropy": 1.6354872305181962, + "train/token_acc": 0.2863849765258216 + }, + { + "epoch": 0.08112509252405625, + "grad_norm": 12.187581062316895, + "learning_rate": 8.666484542084109e-06, + "loss": 1.1973, + "step": 548, + "train/speech_entropy": 4.106391549018784, + "train/text_entropy": 1.0687760456953899, + "train/token_acc": 0.30673316708229426 + }, + { + "epoch": 0.08127313101406365, + "grad_norm": 14.037609100341797, + "learning_rate": 8.661302592110083e-06, + "loss": 1.3203, + "step": 549, + "train/speech_entropy": 4.100133460082359, + "train/text_entropy": 1.2067815014439771, + "train/token_acc": 0.294468085106383 + }, + { + "epoch": 0.08142116950407106, + "grad_norm": 16.227781295776367, + "learning_rate": 8.656112351988547e-06, + "loss": 1.293, + "step": 550, + "train/speech_entropy": 4.28264740021343, + "train/text_entropy": 1.5220263725103333, + "train/token_acc": 0.25065047701647875 + }, + { + "epoch": 0.08156920799407846, + "grad_norm": 11.605751037597656, + "learning_rate": 8.650913835330304e-06, + "loss": 1.2031, + "step": 551, + "train/speech_entropy": 4.340095077102923, + "train/text_entropy": 1.415593044013734, + "train/token_acc": 0.28757319453480806 + }, + { + "epoch": 0.08171724648408586, + "grad_norm": 15.391646385192871, + "learning_rate": 8.64570705576786e-06, + "loss": 1.5703, + "step": 552, + "train/speech_entropy": 4.283189605174057, + "train/text_entropy": 1.6295103593306108, + "train/token_acc": 0.23601637107776263 + }, + { + "epoch": 0.08186528497409326, + "grad_norm": 12.153841018676758, + "learning_rate": 8.640492026955383e-06, + "loss": 0.6484, + "step": 553, + "train/speech_entropy": 4.076602579441041, + "train/text_entropy": 0.6032330271709396, + "train/token_acc": 0.33821733821733824 + }, + { + "epoch": 0.08201332346410066, + "grad_norm": 15.41162109375, + "learning_rate": 8.63526876256868e-06, + "loss": 1.0723, + "step": 554, + "train/speech_entropy": 3.9317641065299855, + "train/text_entropy": 1.0465582462779262, + "train/token_acc": 0.27123552123552125 + }, + { + "epoch": 0.08216136195410807, + "grad_norm": 18.756330490112305, + "learning_rate": 8.630037276305153e-06, + "loss": 2.0664, + "step": 555, + "train/speech_entropy": 3.958503698560016, + "train/text_entropy": 1.8006986345563616, + "train/token_acc": 0.24452234881682736 + }, + { + "epoch": 0.08230940044411547, + "grad_norm": 20.04177474975586, + "learning_rate": 8.624797581883763e-06, + "loss": 1.2891, + "step": 556, + "train/speech_entropy": 3.9519764293323862, + "train/text_entropy": 1.3841790371253841, + "train/token_acc": 0.2527932960893855 + }, + { + "epoch": 0.08245743893412287, + "grad_norm": 15.114104270935059, + "learning_rate": 8.619549693045004e-06, + "loss": 1.5859, + "step": 557, + "train/speech_entropy": 3.8316847420254736, + "train/text_entropy": 1.3637655827037074, + "train/token_acc": 0.27673896783844426 + }, + { + "epoch": 0.08260547742413027, + "grad_norm": 15.861329078674316, + "learning_rate": 8.614293623550842e-06, + "loss": 1.3164, + "step": 558, + "train/speech_entropy": 3.998292266025128, + "train/text_entropy": 1.2390554791930093, + "train/token_acc": 0.279874213836478 + }, + { + "epoch": 0.08275351591413768, + "grad_norm": 18.311307907104492, + "learning_rate": 8.609029387184716e-06, + "loss": 1.5234, + "step": 559, + "train/speech_entropy": 4.527049927824594, + "train/text_entropy": 1.4148536235728162, + "train/token_acc": 0.2517694641051567 + }, + { + "epoch": 0.08290155440414508, + "grad_norm": 15.465943336486816, + "learning_rate": 8.60375699775147e-06, + "loss": 1.2793, + "step": 560, + "train/speech_entropy": 4.098047600968819, + "train/text_entropy": 1.4966029357910156, + "train/token_acc": 0.26805054151624547 + }, + { + "epoch": 0.08304959289415248, + "grad_norm": 21.72735023498535, + "learning_rate": 8.598476469077327e-06, + "loss": 0.9863, + "step": 561, + "train/speech_entropy": 3.6189164959016393, + "train/text_entropy": 1.0802650062405332, + "train/token_acc": 0.28498293515358364 + }, + { + "epoch": 0.08319763138415988, + "grad_norm": 13.125933647155762, + "learning_rate": 8.593187815009863e-06, + "loss": 0.8262, + "step": 562, + "train/speech_entropy": 3.6590118724120084, + "train/text_entropy": 0.6564735350061636, + "train/token_acc": 0.3495867768595041 + }, + { + "epoch": 0.08334566987416728, + "grad_norm": 33.9587516784668, + "learning_rate": 8.587891049417952e-06, + "loss": 1.7422, + "step": 563, + "train/speech_entropy": 3.644913787841797, + "train/text_entropy": 1.2352607402395694, + "train/token_acc": 0.2975708502024291 + }, + { + "epoch": 0.08349370836417469, + "grad_norm": 11.736367225646973, + "learning_rate": 8.582586186191747e-06, + "loss": 1.0303, + "step": 564, + "train/speech_entropy": 3.8209149386208945, + "train/text_entropy": 1.2151670143821023, + "train/token_acc": 0.30935754189944137 + }, + { + "epoch": 0.08364174685418209, + "grad_norm": 16.059005737304688, + "learning_rate": 8.577273239242633e-06, + "loss": 1.1172, + "step": 565, + "train/speech_entropy": 3.836766818154183, + "train/text_entropy": 1.3719127074531887, + "train/token_acc": 0.3036649214659686 + }, + { + "epoch": 0.08378978534418949, + "grad_norm": 23.215829849243164, + "learning_rate": 8.571952222503194e-06, + "loss": 2.0625, + "step": 566, + "train/speech_entropy": 3.6691055297851562, + "train/text_entropy": 1.9293533988620923, + "train/token_acc": 0.2700490998363339 + }, + { + "epoch": 0.08393782383419689, + "grad_norm": 15.816657066345215, + "learning_rate": 8.566623149927173e-06, + "loss": 1.3555, + "step": 567, + "train/speech_entropy": 4.238341737264363, + "train/text_entropy": 1.5436405147518124, + "train/token_acc": 0.25449101796407186 + }, + { + "epoch": 0.0840858623242043, + "grad_norm": 20.002483367919922, + "learning_rate": 8.561286035489446e-06, + "loss": 1.7852, + "step": 568, + "train/speech_entropy": 3.5781109953579837, + "train/text_entropy": 1.9116970085949632, + "train/token_acc": 0.3208053691275168 + }, + { + "epoch": 0.0842339008142117, + "grad_norm": 11.442554473876953, + "learning_rate": 8.555940893185975e-06, + "loss": 0.918, + "step": 569, + "train/speech_entropy": 4.000070781020051, + "train/text_entropy": 0.8485796925657421, + "train/token_acc": 0.3018757327080891 + }, + { + "epoch": 0.0843819393042191, + "grad_norm": 16.076322555541992, + "learning_rate": 8.550587737033766e-06, + "loss": 1.0957, + "step": 570, + "train/speech_entropy": 3.46460205078125, + "train/text_entropy": 1.1924615186803482, + "train/token_acc": 0.3411949685534591 + }, + { + "epoch": 0.0845299777942265, + "grad_norm": 21.05974769592285, + "learning_rate": 8.54522658107085e-06, + "loss": 1.5195, + "step": 571, + "train/speech_entropy": 3.451864973461355, + "train/text_entropy": 1.6079195149739582, + "train/token_acc": 0.2968944099378882 + }, + { + "epoch": 0.0846780162842339, + "grad_norm": 19.463764190673828, + "learning_rate": 8.539857439356234e-06, + "loss": 1.6055, + "step": 572, + "train/speech_entropy": 3.966581931854326, + "train/text_entropy": 1.3623785642114017, + "train/token_acc": 0.28643724696356276 + }, + { + "epoch": 0.08482605477424131, + "grad_norm": 17.482685089111328, + "learning_rate": 8.534480325969867e-06, + "loss": 1.8711, + "step": 573, + "train/speech_entropy": 4.48593495686849, + "train/text_entropy": 1.59149728609821, + "train/token_acc": 0.27100494233937394 + }, + { + "epoch": 0.0849740932642487, + "grad_norm": 16.22322654724121, + "learning_rate": 8.529095255012602e-06, + "loss": 1.5469, + "step": 574, + "train/speech_entropy": 4.269898098713427, + "train/text_entropy": 1.6540451836340206, + "train/token_acc": 0.2554300608166811 + }, + { + "epoch": 0.0851221317542561, + "grad_norm": 17.97168731689453, + "learning_rate": 8.523702240606156e-06, + "loss": 1.2656, + "step": 575, + "train/speech_entropy": 3.8184619355411473, + "train/text_entropy": 1.265653455579603, + "train/token_acc": 0.29759036144578316 + }, + { + "epoch": 0.0852701702442635, + "grad_norm": 12.77726936340332, + "learning_rate": 8.518301296893085e-06, + "loss": 1.0508, + "step": 576, + "train/speech_entropy": 4.218258592082444, + "train/text_entropy": 1.0727546942625807, + "train/token_acc": 0.2965217391304348 + }, + { + "epoch": 0.08541820873427092, + "grad_norm": 13.954240798950195, + "learning_rate": 8.512892438036733e-06, + "loss": 1.0547, + "step": 577, + "train/speech_entropy": 3.8802312825520833, + "train/text_entropy": 1.1390891223220352, + "train/token_acc": 0.2864983534577388 + }, + { + "epoch": 0.08556624722427832, + "grad_norm": 21.254735946655273, + "learning_rate": 8.507475678221201e-06, + "loss": 2.0723, + "step": 578, + "train/speech_entropy": 3.8290476927157, + "train/text_entropy": 2.148910236868629, + "train/token_acc": 0.28523489932885904 + }, + { + "epoch": 0.08571428571428572, + "grad_norm": 13.043547630310059, + "learning_rate": 8.502051031651311e-06, + "loss": 1.0938, + "step": 579, + "train/speech_entropy": 3.6859043275294283, + "train/text_entropy": 0.8853532945787584, + "train/token_acc": 0.3441053261520048 + }, + { + "epoch": 0.08586232420429311, + "grad_norm": 18.139122009277344, + "learning_rate": 8.496618512552567e-06, + "loss": 1.2002, + "step": 580, + "train/speech_entropy": 3.608752866334553, + "train/text_entropy": 1.1161087036132813, + "train/token_acc": 0.3005181347150259 + }, + { + "epoch": 0.08601036269430051, + "grad_norm": 16.87082862854004, + "learning_rate": 8.491178135171113e-06, + "loss": 1.1719, + "step": 581, + "train/speech_entropy": 4.061704708051078, + "train/text_entropy": 1.453920522313209, + "train/token_acc": 0.2915082382762991 + }, + { + "epoch": 0.08615840118430793, + "grad_norm": 17.911527633666992, + "learning_rate": 8.485729913773707e-06, + "loss": 1.9062, + "step": 582, + "train/speech_entropy": 4.240347325865462, + "train/text_entropy": 1.7748188083454715, + "train/token_acc": 0.2549019607843137 + }, + { + "epoch": 0.08630643967431532, + "grad_norm": 18.889739990234375, + "learning_rate": 8.480273862647678e-06, + "loss": 1.3438, + "step": 583, + "train/speech_entropy": 4.0566587890625, + "train/text_entropy": 1.4447236872043738, + "train/token_acc": 0.27849740932642486 + }, + { + "epoch": 0.08645447816432272, + "grad_norm": 17.32180404663086, + "learning_rate": 8.474809996100878e-06, + "loss": 1.1113, + "step": 584, + "train/speech_entropy": 4.351185389927456, + "train/text_entropy": 1.3224054745265417, + "train/token_acc": 0.27040816326530615 + }, + { + "epoch": 0.08660251665433012, + "grad_norm": 11.711606979370117, + "learning_rate": 8.469338328461665e-06, + "loss": 0.7598, + "step": 585, + "train/speech_entropy": 4.103608950926988, + "train/text_entropy": 0.5913767099380494, + "train/token_acc": 0.31265356265356264 + }, + { + "epoch": 0.08675055514433752, + "grad_norm": 13.868403434753418, + "learning_rate": 8.463858874078847e-06, + "loss": 1.0293, + "step": 586, + "train/speech_entropy": 3.8659656390265074, + "train/text_entropy": 0.9731836484346775, + "train/token_acc": 0.3172338090010977 + }, + { + "epoch": 0.08689859363434493, + "grad_norm": 14.006662368774414, + "learning_rate": 8.458371647321653e-06, + "loss": 0.709, + "step": 587, + "train/speech_entropy": 3.854122634863124, + "train/text_entropy": 0.5566214376421117, + "train/token_acc": 0.3125827814569536 + }, + { + "epoch": 0.08704663212435233, + "grad_norm": 16.276168823242188, + "learning_rate": 8.452876662579698e-06, + "loss": 1.7383, + "step": 588, + "train/speech_entropy": 4.442248977674624, + "train/text_entropy": 1.5723641660033154, + "train/token_acc": 0.25858290723155586 + }, + { + "epoch": 0.08719467061435973, + "grad_norm": 11.508447647094727, + "learning_rate": 8.447373934262937e-06, + "loss": 1.0498, + "step": 589, + "train/speech_entropy": 4.021414092092803, + "train/text_entropy": 1.0993946124980976, + "train/token_acc": 0.31695331695331697 + }, + { + "epoch": 0.08734270910436713, + "grad_norm": 12.13176155090332, + "learning_rate": 8.441863476801638e-06, + "loss": 0.8662, + "step": 590, + "train/speech_entropy": 3.875885527012712, + "train/text_entropy": 1.1605404389210237, + "train/token_acc": 0.3037752414398595 + }, + { + "epoch": 0.08749074759437454, + "grad_norm": 12.0918550491333, + "learning_rate": 8.436345304646327e-06, + "loss": 0.9609, + "step": 591, + "train/speech_entropy": 3.727878642231537, + "train/text_entropy": 0.8739053244902709, + "train/token_acc": 0.3216723549488055 + }, + { + "epoch": 0.08763878608438194, + "grad_norm": 38.72270202636719, + "learning_rate": 8.43081943226777e-06, + "loss": 1.0977, + "step": 592, + "train/speech_entropy": 4.340042180170508, + "train/text_entropy": 1.6029189068338145, + "train/token_acc": 0.29238754325259514 + }, + { + "epoch": 0.08778682457438934, + "grad_norm": 21.036365509033203, + "learning_rate": 8.425285874156924e-06, + "loss": 1.5625, + "step": 593, + "train/speech_entropy": 3.994478834145385, + "train/text_entropy": 1.747663697200035, + "train/token_acc": 0.28901734104046245 + }, + { + "epoch": 0.08793486306439674, + "grad_norm": 17.890560150146484, + "learning_rate": 8.419744644824899e-06, + "loss": 1.1328, + "step": 594, + "train/speech_entropy": 3.7393711330173733, + "train/text_entropy": 1.108921397816051, + "train/token_acc": 0.32447552447552447 + }, + { + "epoch": 0.08808290155440414, + "grad_norm": 16.547361373901367, + "learning_rate": 8.414195758802922e-06, + "loss": 1.2734, + "step": 595, + "train/speech_entropy": 4.240100883329299, + "train/text_entropy": 1.5764416905934224, + "train/token_acc": 0.2644135188866799 + }, + { + "epoch": 0.08823094004441155, + "grad_norm": 11.611001968383789, + "learning_rate": 8.408639230642302e-06, + "loss": 0.9648, + "step": 596, + "train/speech_entropy": 4.186064358415274, + "train/text_entropy": 0.916311335266873, + "train/token_acc": 0.30966638152266895 + }, + { + "epoch": 0.08837897853441895, + "grad_norm": 20.04622459411621, + "learning_rate": 8.403075074914383e-06, + "loss": 1.7461, + "step": 597, + "train/speech_entropy": 4.054742596600507, + "train/text_entropy": 1.7474870385590546, + "train/token_acc": 0.27081021087680357 + }, + { + "epoch": 0.08852701702442635, + "grad_norm": 15.17646312713623, + "learning_rate": 8.397503306210519e-06, + "loss": 1.666, + "step": 598, + "train/speech_entropy": 3.6139289151804617, + "train/text_entropy": 1.6928448835422192, + "train/token_acc": 0.284435261707989 + }, + { + "epoch": 0.08867505551443375, + "grad_norm": 15.438324928283691, + "learning_rate": 8.391923939142022e-06, + "loss": 1.2695, + "step": 599, + "train/speech_entropy": 4.105599539620536, + "train/text_entropy": 1.4423826780613298, + "train/token_acc": 0.2688259109311741 + }, + { + "epoch": 0.08882309400444116, + "grad_norm": 17.370447158813477, + "learning_rate": 8.386336988340132e-06, + "loss": 1.4141, + "step": 600, + "train/speech_entropy": 4.052480381914757, + "train/text_entropy": 1.5451823578783745, + "train/token_acc": 0.2722222222222222 + }, + { + "epoch": 0.08897113249444856, + "grad_norm": 24.76219367980957, + "learning_rate": 8.380742468455972e-06, + "loss": 1.4287, + "step": 601, + "train/speech_entropy": 3.0490643175579697, + "train/text_entropy": 1.1886648869776464, + "train/token_acc": 0.3192771084337349 + }, + { + "epoch": 0.08911917098445596, + "grad_norm": 31.6841983795166, + "learning_rate": 8.375140394160526e-06, + "loss": 0.6484, + "step": 602, + "train/speech_entropy": 3.503179771021793, + "train/text_entropy": 0.5260259594236102, + "train/token_acc": 0.3780487804878049 + }, + { + "epoch": 0.08926720947446336, + "grad_norm": 32.297393798828125, + "learning_rate": 8.369530780144574e-06, + "loss": 0.8496, + "step": 603, + "train/speech_entropy": 4.138485168457032, + "train/text_entropy": 1.3958197845501845, + "train/token_acc": 0.28987068965517243 + }, + { + "epoch": 0.08941524796447076, + "grad_norm": 20.016347885131836, + "learning_rate": 8.363913641118677e-06, + "loss": 1.3242, + "step": 604, + "train/speech_entropy": 3.7757060406750136, + "train/text_entropy": 1.5167834526669663, + "train/token_acc": 0.30212014134275617 + }, + { + "epoch": 0.08956328645447817, + "grad_norm": 25.20494270324707, + "learning_rate": 8.35828899181313e-06, + "loss": 2.4844, + "step": 605, + "train/speech_entropy": 4.0302718410069795, + "train/text_entropy": 2.4832434185480667, + "train/token_acc": 0.3134212567882079 + }, + { + "epoch": 0.08971132494448557, + "grad_norm": 19.807064056396484, + "learning_rate": 8.352656846977916e-06, + "loss": 1.6719, + "step": 606, + "train/speech_entropy": 4.142920831715839, + "train/text_entropy": 1.8603791180778952, + "train/token_acc": 0.2594871794871795 + }, + { + "epoch": 0.08985936343449297, + "grad_norm": 9.511260986328125, + "learning_rate": 8.347017221382679e-06, + "loss": 0.7861, + "step": 607, + "train/speech_entropy": 4.3021410844457435, + "train/text_entropy": 0.8013142110847751, + "train/token_acc": 0.2953285827395091 + }, + { + "epoch": 0.09000740192450037, + "grad_norm": 13.799107551574707, + "learning_rate": 8.341370129816682e-06, + "loss": 0.8926, + "step": 608, + "train/speech_entropy": 3.8714535975071125, + "train/text_entropy": 0.7499641111527366, + "train/token_acc": 0.34269005847953216 + }, + { + "epoch": 0.09015544041450778, + "grad_norm": 13.44991397857666, + "learning_rate": 8.335715587088763e-06, + "loss": 0.8115, + "step": 609, + "train/speech_entropy": 3.8032981337321643, + "train/text_entropy": 0.9673969641975735, + "train/token_acc": 0.2897560975609756 + }, + { + "epoch": 0.09030347890451518, + "grad_norm": 22.69342803955078, + "learning_rate": 8.3300536080273e-06, + "loss": 1.8125, + "step": 610, + "train/speech_entropy": 4.6178353930276534, + "train/text_entropy": 1.6146958295036764, + "train/token_acc": 0.22894168466522677 + }, + { + "epoch": 0.09045151739452258, + "grad_norm": 15.00654125213623, + "learning_rate": 8.324384207480176e-06, + "loss": 1.1699, + "step": 611, + "train/speech_entropy": 4.13502127110273, + "train/text_entropy": 1.1044325828552246, + "train/token_acc": 0.3119266055045872 + }, + { + "epoch": 0.09059955588452998, + "grad_norm": 6.092386245727539, + "learning_rate": 8.31870740031473e-06, + "loss": 0.3135, + "step": 612, + "train/speech_entropy": 3.5530713630445074, + "train/text_entropy": 0.384829255401111, + "train/token_acc": 0.38256227758007116 + }, + { + "epoch": 0.09074759437453737, + "grad_norm": 19.638641357421875, + "learning_rate": 8.313023201417727e-06, + "loss": 2.1797, + "step": 613, + "train/speech_entropy": 4.139657984024439, + "train/text_entropy": 1.9835561688026686, + "train/token_acc": 0.2661795407098121 + }, + { + "epoch": 0.09089563286454479, + "grad_norm": 15.821990966796875, + "learning_rate": 8.307331625695319e-06, + "loss": 1.4277, + "step": 614, + "train/speech_entropy": 3.7484084470005428, + "train/text_entropy": 1.0641476327922488, + "train/token_acc": 0.31370826010544817 + }, + { + "epoch": 0.09104367135455219, + "grad_norm": 14.72890567779541, + "learning_rate": 8.301632688072996e-06, + "loss": 1.4648, + "step": 615, + "train/speech_entropy": 4.207644349125804, + "train/text_entropy": 1.3966940174932065, + "train/token_acc": 0.2708512467755804 + }, + { + "epoch": 0.09119170984455958, + "grad_norm": 18.227680206298828, + "learning_rate": 8.29592640349556e-06, + "loss": 1.207, + "step": 616, + "train/speech_entropy": 4.181333156818515, + "train/text_entropy": 1.111070728302002, + "train/token_acc": 0.2878598247809762 + }, + { + "epoch": 0.09133974833456698, + "grad_norm": 22.086505889892578, + "learning_rate": 8.290212786927076e-06, + "loss": 1.8125, + "step": 617, + "train/speech_entropy": 4.242654584544574, + "train/text_entropy": 1.3594970703125, + "train/token_acc": 0.29036144578313255 + }, + { + "epoch": 0.09148778682457438, + "grad_norm": 11.30693531036377, + "learning_rate": 8.284491853350838e-06, + "loss": 0.5771, + "step": 618, + "train/speech_entropy": 3.970509068247234, + "train/text_entropy": 0.5041633172057817, + "train/token_acc": 0.3419293218720153 + }, + { + "epoch": 0.0916358253145818, + "grad_norm": 10.610136032104492, + "learning_rate": 8.278763617769328e-06, + "loss": 0.8379, + "step": 619, + "train/speech_entropy": 3.8524896309856227, + "train/text_entropy": 0.9880683411943152, + "train/token_acc": 0.3084693084693085 + }, + { + "epoch": 0.0917838638045892, + "grad_norm": 21.798519134521484, + "learning_rate": 8.273028095204174e-06, + "loss": 1.6953, + "step": 620, + "train/speech_entropy": 4.06170654296875, + "train/text_entropy": 1.5992531246609158, + "train/token_acc": 0.2603626943005181 + }, + { + "epoch": 0.09193190229459659, + "grad_norm": 16.426956176757812, + "learning_rate": 8.267285300696115e-06, + "loss": 1.1895, + "step": 621, + "train/speech_entropy": 4.241773770824415, + "train/text_entropy": 1.2609113626812227, + "train/token_acc": 0.25998142989786444 + }, + { + "epoch": 0.09207994078460399, + "grad_norm": 16.985321044921875, + "learning_rate": 8.261535249304966e-06, + "loss": 1.5039, + "step": 622, + "train/speech_entropy": 3.9381487191721365, + "train/text_entropy": 1.3079293903551603, + "train/token_acc": 0.31823671497584544 + }, + { + "epoch": 0.0922279792746114, + "grad_norm": 18.132339477539062, + "learning_rate": 8.255777956109558e-06, + "loss": 1.0664, + "step": 623, + "train/speech_entropy": 4.189067188656673, + "train/text_entropy": 1.0107797141966781, + "train/token_acc": 0.2927170868347339 + }, + { + "epoch": 0.0923760177646188, + "grad_norm": 15.220593452453613, + "learning_rate": 8.250013436207727e-06, + "loss": 1.2969, + "step": 624, + "train/speech_entropy": 4.245310604844463, + "train/text_entropy": 0.9803612480336185, + "train/token_acc": 0.28988149498632637 + }, + { + "epoch": 0.0925240562546262, + "grad_norm": 17.921791076660156, + "learning_rate": 8.244241704716252e-06, + "loss": 1.6133, + "step": 625, + "train/speech_entropy": 4.528661791391552, + "train/text_entropy": 1.6787796020507812, + "train/token_acc": 0.26025791324736225 + }, + { + "epoch": 0.0926720947446336, + "grad_norm": 31.24349021911621, + "learning_rate": 8.238462776770828e-06, + "loss": 1.0215, + "step": 626, + "train/speech_entropy": 3.9593418262622975, + "train/text_entropy": 1.547707580384754, + "train/token_acc": 0.2672672672672673 + }, + { + "epoch": 0.092820133234641, + "grad_norm": 18.06903839111328, + "learning_rate": 8.232676667526014e-06, + "loss": 0.8262, + "step": 627, + "train/speech_entropy": 3.6467794489960013, + "train/text_entropy": 0.7106470226568017, + "train/token_acc": 0.36607142857142855 + }, + { + "epoch": 0.09296817172464841, + "grad_norm": 19.328123092651367, + "learning_rate": 8.226883392155215e-06, + "loss": 0.791, + "step": 628, + "train/speech_entropy": 3.381070368594974, + "train/text_entropy": 0.785187079892521, + "train/token_acc": 0.34044823906083244 + }, + { + "epoch": 0.09311621021465581, + "grad_norm": 19.154850006103516, + "learning_rate": 8.221082965850618e-06, + "loss": 1.2422, + "step": 629, + "train/speech_entropy": 4.093753152461442, + "train/text_entropy": 1.1654673422536543, + "train/token_acc": 0.25704225352112675 + }, + { + "epoch": 0.09326424870466321, + "grad_norm": 12.077876091003418, + "learning_rate": 8.215275403823162e-06, + "loss": 1.1348, + "step": 630, + "train/speech_entropy": 3.952589696767379, + "train/text_entropy": 0.9152293602625529, + "train/token_acc": 0.32889344262295084 + }, + { + "epoch": 0.09341228719467061, + "grad_norm": 11.482194900512695, + "learning_rate": 8.209460721302503e-06, + "loss": 0.5146, + "step": 631, + "train/speech_entropy": 3.6411831839559987, + "train/text_entropy": 0.5937812853159394, + "train/token_acc": 0.33002481389578164 + }, + { + "epoch": 0.09356032568467802, + "grad_norm": 12.080677032470703, + "learning_rate": 8.203638933536967e-06, + "loss": 0.7695, + "step": 632, + "train/speech_entropy": 3.9099519690688775, + "train/text_entropy": 0.813315976749767, + "train/token_acc": 0.30625 + }, + { + "epoch": 0.09370836417468542, + "grad_norm": 21.791112899780273, + "learning_rate": 8.197810055793515e-06, + "loss": 1.3203, + "step": 633, + "train/speech_entropy": 4.214810564801409, + "train/text_entropy": 1.3265821603628305, + "train/token_acc": 0.28062678062678065 + }, + { + "epoch": 0.09385640266469282, + "grad_norm": 16.16947364807129, + "learning_rate": 8.191974103357699e-06, + "loss": 1.5234, + "step": 634, + "train/speech_entropy": 4.253477053010845, + "train/text_entropy": 1.3786915641518969, + "train/token_acc": 0.2757660167130919 + }, + { + "epoch": 0.09400444115470022, + "grad_norm": 18.87871551513672, + "learning_rate": 8.186131091533625e-06, + "loss": 1.7539, + "step": 635, + "train/speech_entropy": 4.268939601412876, + "train/text_entropy": 1.6558391864483173, + "train/token_acc": 0.276013143483023 + }, + { + "epoch": 0.09415247964470762, + "grad_norm": 11.657986640930176, + "learning_rate": 8.180281035643907e-06, + "loss": 1.0757, + "step": 636, + "train/speech_entropy": 4.121569682725237, + "train/text_entropy": 0.8449424662488572, + "train/token_acc": 0.30827702702702703 + }, + { + "epoch": 0.09430051813471503, + "grad_norm": 15.632102966308594, + "learning_rate": 8.174423951029639e-06, + "loss": 1.0625, + "step": 637, + "train/speech_entropy": 4.280301595670944, + "train/text_entropy": 1.4763669819411835, + "train/token_acc": 0.28716904276985744 + }, + { + "epoch": 0.09444855662472243, + "grad_norm": 10.090970039367676, + "learning_rate": 8.168559853050338e-06, + "loss": 0.4033, + "step": 638, + "train/speech_entropy": 3.451995731780888, + "train/text_entropy": 0.4888987011379666, + "train/token_acc": 0.3555900621118012 + }, + { + "epoch": 0.09459659511472983, + "grad_norm": 16.967321395874023, + "learning_rate": 8.162688757083923e-06, + "loss": 1.1094, + "step": 639, + "train/speech_entropy": 4.096692230323372, + "train/text_entropy": 1.359477590342037, + "train/token_acc": 0.2945945945945946 + }, + { + "epoch": 0.09474463360473723, + "grad_norm": 18.42877197265625, + "learning_rate": 8.156810678526652e-06, + "loss": 1.5, + "step": 640, + "train/speech_entropy": 4.184633175056137, + "train/text_entropy": 1.4981505539243585, + "train/token_acc": 0.27472527472527475 + }, + { + "epoch": 0.09489267209474464, + "grad_norm": 17.492631912231445, + "learning_rate": 8.150925632793106e-06, + "loss": 1.3828, + "step": 641, + "train/speech_entropy": 4.114429080338721, + "train/text_entropy": 1.3370258163356181, + "train/token_acc": 0.2799097065462754 + }, + { + "epoch": 0.09504071058475204, + "grad_norm": 13.305543899536133, + "learning_rate": 8.14503363531613e-06, + "loss": 0.9961, + "step": 642, + "train/speech_entropy": 3.961267974055269, + "train/text_entropy": 1.1568615683193864, + "train/token_acc": 0.30505709624796085 + }, + { + "epoch": 0.09518874907475944, + "grad_norm": 12.38676643371582, + "learning_rate": 8.1391347015468e-06, + "loss": 1.0469, + "step": 643, + "train/speech_entropy": 4.20050527943686, + "train/text_entropy": 1.1245926232646695, + "train/token_acc": 0.30137931034482757 + }, + { + "epoch": 0.09533678756476684, + "grad_norm": 16.936681747436523, + "learning_rate": 8.13322884695438e-06, + "loss": 1.4805, + "step": 644, + "train/speech_entropy": 4.433398868970913, + "train/text_entropy": 1.4543755009489239, + "train/token_acc": 0.2398568019093079 + }, + { + "epoch": 0.09548482605477424, + "grad_norm": 16.770071029663086, + "learning_rate": 8.127316087026289e-06, + "loss": 1.4883, + "step": 645, + "train/speech_entropy": 4.3315992842807995, + "train/text_entropy": 1.457369512441207, + "train/token_acc": 0.2773722627737226 + }, + { + "epoch": 0.09563286454478165, + "grad_norm": 16.685134887695312, + "learning_rate": 8.121396437268049e-06, + "loss": 1.4375, + "step": 646, + "train/speech_entropy": 4.219095865885417, + "train/text_entropy": 1.4230608670216687, + "train/token_acc": 0.24919093851132687 + }, + { + "epoch": 0.09578090303478905, + "grad_norm": 11.900941848754883, + "learning_rate": 8.115469913203252e-06, + "loss": 0.6641, + "step": 647, + "train/speech_entropy": 3.805007638283146, + "train/text_entropy": 0.6582821932705966, + "train/token_acc": 0.298 + }, + { + "epoch": 0.09592894152479645, + "grad_norm": 16.102394104003906, + "learning_rate": 8.109536530373515e-06, + "loss": 0.9023, + "step": 648, + "train/speech_entropy": 3.9821715967615225, + "train/text_entropy": 0.7919681235535504, + "train/token_acc": 0.3004640371229698 + }, + { + "epoch": 0.09607698001480384, + "grad_norm": 19.27621078491211, + "learning_rate": 8.103596304338446e-06, + "loss": 1.0352, + "step": 649, + "train/speech_entropy": 4.391229621731505, + "train/text_entropy": 1.22401856921968, + "train/token_acc": 0.292436974789916 + }, + { + "epoch": 0.09622501850481126, + "grad_norm": 17.923837661743164, + "learning_rate": 8.097649250675588e-06, + "loss": 1.668, + "step": 650, + "train/speech_entropy": 4.217238396351289, + "train/text_entropy": 1.4557186889648437, + "train/token_acc": 0.24261275272161742 + }, + { + "epoch": 0.09637305699481866, + "grad_norm": 15.863324165344238, + "learning_rate": 8.091695384980404e-06, + "loss": 1.1133, + "step": 651, + "train/speech_entropy": 4.578557340074924, + "train/text_entropy": 1.2382856474982367, + "train/token_acc": 0.26650062266500624 + }, + { + "epoch": 0.09652109548482606, + "grad_norm": 14.767213821411133, + "learning_rate": 8.085734722866207e-06, + "loss": 1.3906, + "step": 652, + "train/speech_entropy": 4.018647720968935, + "train/text_entropy": 1.374399876856542, + "train/token_acc": 0.2862706913339825 + }, + { + "epoch": 0.09666913397483345, + "grad_norm": 19.141368865966797, + "learning_rate": 8.079767279964145e-06, + "loss": 1.3477, + "step": 653, + "train/speech_entropy": 4.251264180222603, + "train/text_entropy": 1.2926380203430912, + "train/token_acc": 0.28236607142857145 + }, + { + "epoch": 0.09681717246484085, + "grad_norm": 14.845393180847168, + "learning_rate": 8.073793071923134e-06, + "loss": 0.957, + "step": 654, + "train/speech_entropy": 3.7916633053102355, + "train/text_entropy": 1.020785967508952, + "train/token_acc": 0.36281179138321995 + }, + { + "epoch": 0.09696521095484827, + "grad_norm": 12.692044258117676, + "learning_rate": 8.067812114409842e-06, + "loss": 0.7314, + "step": 655, + "train/speech_entropy": 3.407999827449483, + "train/text_entropy": 0.568097515219062, + "train/token_acc": 0.3597122302158273 + }, + { + "epoch": 0.09711324944485566, + "grad_norm": 11.748571395874023, + "learning_rate": 8.061824423108633e-06, + "loss": 0.9658, + "step": 656, + "train/speech_entropy": 3.9090584716796877, + "train/text_entropy": 0.9436905943317178, + "train/token_acc": 0.32984714400643605 + }, + { + "epoch": 0.09726128793486306, + "grad_norm": 16.4638671875, + "learning_rate": 8.055830013721528e-06, + "loss": 0.8096, + "step": 657, + "train/speech_entropy": 4.072131887368945, + "train/text_entropy": 0.5920903099907769, + "train/token_acc": 0.31612223393045313 + }, + { + "epoch": 0.09740932642487046, + "grad_norm": 18.443641662597656, + "learning_rate": 8.049828901968169e-06, + "loss": 1.0625, + "step": 658, + "train/speech_entropy": 4.287163697321806, + "train/text_entropy": 1.3646214429069967, + "train/token_acc": 0.2591549295774648 + }, + { + "epoch": 0.09755736491487786, + "grad_norm": 17.12950325012207, + "learning_rate": 8.043821103585768e-06, + "loss": 1.1914, + "step": 659, + "train/speech_entropy": 4.089920529207156, + "train/text_entropy": 1.2566366402999214, + "train/token_acc": 0.27202323330106487 + }, + { + "epoch": 0.09770540340488527, + "grad_norm": 9.418413162231445, + "learning_rate": 8.037806634329079e-06, + "loss": 0.5332, + "step": 660, + "train/speech_entropy": 3.4603953310747873, + "train/text_entropy": 0.47801265548750804, + "train/token_acc": 0.3668571428571429 + }, + { + "epoch": 0.09785344189489267, + "grad_norm": 17.147144317626953, + "learning_rate": 8.031785509970347e-06, + "loss": 1.3535, + "step": 661, + "train/speech_entropy": 4.254660769497113, + "train/text_entropy": 1.3078633046520807, + "train/token_acc": 0.2776659959758551 + }, + { + "epoch": 0.09800148038490007, + "grad_norm": 13.674064636230469, + "learning_rate": 8.025757746299267e-06, + "loss": 0.9668, + "step": 662, + "train/speech_entropy": 3.690793389132406, + "train/text_entropy": 0.8248151526393661, + "train/token_acc": 0.28700906344410876 + }, + { + "epoch": 0.09814951887490747, + "grad_norm": 19.1749324798584, + "learning_rate": 8.019723359122947e-06, + "loss": 1.4961, + "step": 663, + "train/speech_entropy": 4.115256334738756, + "train/text_entropy": 1.7451312852942424, + "train/token_acc": 0.2695035460992908 + }, + { + "epoch": 0.09829755736491488, + "grad_norm": 15.422938346862793, + "learning_rate": 8.013682364265863e-06, + "loss": 1.2383, + "step": 664, + "train/speech_entropy": 4.398524672896774, + "train/text_entropy": 1.1729894658570648, + "train/token_acc": 0.26285714285714284 + }, + { + "epoch": 0.09844559585492228, + "grad_norm": 18.46556854248047, + "learning_rate": 8.00763477756982e-06, + "loss": 1.1426, + "step": 665, + "train/speech_entropy": 4.206204042262193, + "train/text_entropy": 1.2776959049809087, + "train/token_acc": 0.3132530120481928 + }, + { + "epoch": 0.09859363434492968, + "grad_norm": 19.378149032592773, + "learning_rate": 8.001580614893912e-06, + "loss": 2.2969, + "step": 666, + "train/speech_entropy": 4.147415894087746, + "train/text_entropy": 2.1672209240141367, + "train/token_acc": 0.25843694493783304 + }, + { + "epoch": 0.09874167283493708, + "grad_norm": 12.975531578063965, + "learning_rate": 7.995519892114472e-06, + "loss": 1.4414, + "step": 667, + "train/speech_entropy": 4.295218808091338, + "train/text_entropy": 1.353796909343941, + "train/token_acc": 0.2743475493316359 + }, + { + "epoch": 0.09888971132494448, + "grad_norm": 13.56026840209961, + "learning_rate": 7.989452625125039e-06, + "loss": 1.5547, + "step": 668, + "train/speech_entropy": 4.439614432198661, + "train/text_entropy": 1.436805865861393, + "train/token_acc": 0.24705882352941178 + }, + { + "epoch": 0.09903774981495189, + "grad_norm": 19.125253677368164, + "learning_rate": 7.983378829836314e-06, + "loss": 0.8047, + "step": 669, + "train/speech_entropy": 3.2520384537546256, + "train/text_entropy": 0.6080958048502604, + "train/token_acc": 0.3131868131868132 + }, + { + "epoch": 0.09918578830495929, + "grad_norm": 23.20052146911621, + "learning_rate": 7.977298522176115e-06, + "loss": 2.1953, + "step": 670, + "train/speech_entropy": 4.23191560696525, + "train/text_entropy": 2.052773023173757, + "train/token_acc": 0.2770897832817337 + }, + { + "epoch": 0.09933382679496669, + "grad_norm": 18.6257381439209, + "learning_rate": 7.971211718089346e-06, + "loss": 1.418, + "step": 671, + "train/speech_entropy": 3.6313646454172037, + "train/text_entropy": 1.3858095056870405, + "train/token_acc": 0.27715877437325903 + }, + { + "epoch": 0.09948186528497409, + "grad_norm": 9.28011703491211, + "learning_rate": 7.965118433537934e-06, + "loss": 1.0625, + "step": 672, + "train/speech_entropy": 4.518016978130485, + "train/text_entropy": 1.2339004144435977, + "train/token_acc": 0.26380368098159507 + }, + { + "epoch": 0.0996299037749815, + "grad_norm": 17.91817283630371, + "learning_rate": 7.959018684500812e-06, + "loss": 1.793, + "step": 673, + "train/speech_entropy": 4.024237427424864, + "train/text_entropy": 1.792564685528095, + "train/token_acc": 0.25734639358860195 + }, + { + "epoch": 0.0997779422649889, + "grad_norm": 20.207117080688477, + "learning_rate": 7.952912486973859e-06, + "loss": 1.5469, + "step": 674, + "train/speech_entropy": 3.93358642578125, + "train/text_entropy": 1.5433331008785027, + "train/token_acc": 0.28691275167785235 + }, + { + "epoch": 0.0999259807549963, + "grad_norm": 13.649026870727539, + "learning_rate": 7.946799856969864e-06, + "loss": 0.9375, + "step": 675, + "train/speech_entropy": 4.093686229374562, + "train/text_entropy": 0.7967872181157957, + "train/token_acc": 0.3108108108108108 + }, + { + "epoch": 0.1000740192450037, + "grad_norm": 12.507515907287598, + "learning_rate": 7.94068081051849e-06, + "loss": 0.7588, + "step": 676, + "train/speech_entropy": 4.236168747484265, + "train/text_entropy": 0.8347662420220714, + "train/token_acc": 0.2844744455159113 + }, + { + "epoch": 0.1002220577350111, + "grad_norm": 8.550621032714844, + "learning_rate": 7.934555363666224e-06, + "loss": 0.7061, + "step": 677, + "train/speech_entropy": 3.885409230558817, + "train/text_entropy": 0.794732250579416, + "train/token_acc": 0.32604237867395763 + }, + { + "epoch": 0.10037009622501851, + "grad_norm": 17.44499397277832, + "learning_rate": 7.928423532476332e-06, + "loss": 1.1934, + "step": 678, + "train/speech_entropy": 4.322056924143145, + "train/text_entropy": 1.5007029639350042, + "train/token_acc": 0.28240109140518416 + }, + { + "epoch": 0.10051813471502591, + "grad_norm": 16.540376663208008, + "learning_rate": 7.92228533302883e-06, + "loss": 1.3711, + "step": 679, + "train/speech_entropy": 4.189332967016155, + "train/text_entropy": 1.4006896682932406, + "train/token_acc": 0.25 + }, + { + "epoch": 0.10066617320503331, + "grad_norm": 13.984689712524414, + "learning_rate": 7.916140781420428e-06, + "loss": 0.8467, + "step": 680, + "train/speech_entropy": 3.733980655670166, + "train/text_entropy": 0.8376972892067649, + "train/token_acc": 0.3389830508474576 + }, + { + "epoch": 0.1008142116950407, + "grad_norm": 15.291882514953613, + "learning_rate": 7.909989893764497e-06, + "loss": 1.2793, + "step": 681, + "train/speech_entropy": 4.552168712839552, + "train/text_entropy": 1.1470676392547845, + "train/token_acc": 0.274323335771763 + }, + { + "epoch": 0.10096225018504812, + "grad_norm": 12.789134979248047, + "learning_rate": 7.903832686191026e-06, + "loss": 1.2969, + "step": 682, + "train/speech_entropy": 4.382833005249344, + "train/text_entropy": 1.6493376579837522, + "train/token_acc": 0.2861169837914024 + }, + { + "epoch": 0.10111028867505552, + "grad_norm": 21.92783546447754, + "learning_rate": 7.897669174846568e-06, + "loss": 1.2188, + "step": 683, + "train/speech_entropy": 4.539855682921266, + "train/text_entropy": 1.1391830235882534, + "train/token_acc": 0.30434782608695654 + }, + { + "epoch": 0.10125832716506292, + "grad_norm": 18.43501853942871, + "learning_rate": 7.891499375894217e-06, + "loss": 1.8008, + "step": 684, + "train/speech_entropy": 4.311276241645335, + "train/text_entropy": 1.4852147550664396, + "train/token_acc": 0.27942497753818507 + }, + { + "epoch": 0.10140636565507032, + "grad_norm": 18.980072021484375, + "learning_rate": 7.885323305513551e-06, + "loss": 1.0195, + "step": 685, + "train/speech_entropy": 3.771190414159121, + "train/text_entropy": 1.0331533258611505, + "train/token_acc": 0.28254437869822485 + }, + { + "epoch": 0.10155440414507771, + "grad_norm": 5.549124240875244, + "learning_rate": 7.879140979900595e-06, + "loss": 0.2034, + "step": 686, + "train/speech_entropy": 3.6244619269120064, + "train/text_entropy": 0.3332660174128985, + "train/token_acc": 0.3491295938104449 + }, + { + "epoch": 0.10170244263508513, + "grad_norm": 12.305464744567871, + "learning_rate": 7.872952415267775e-06, + "loss": 0.5039, + "step": 687, + "train/speech_entropy": 3.438108178633678, + "train/text_entropy": 0.39922957885556104, + "train/token_acc": 0.3706030150753769 + }, + { + "epoch": 0.10185048112509253, + "grad_norm": 13.264803886413574, + "learning_rate": 7.866757627843883e-06, + "loss": 1.0215, + "step": 688, + "train/speech_entropy": 3.6372955387058785, + "train/text_entropy": 1.25004180771353, + "train/token_acc": 0.3090128755364807 + }, + { + "epoch": 0.10199851961509993, + "grad_norm": 18.242809295654297, + "learning_rate": 7.860556633874026e-06, + "loss": 1.2852, + "step": 689, + "train/speech_entropy": 4.140062200010212, + "train/text_entropy": 1.2972667804662732, + "train/token_acc": 0.26688815060908083 + }, + { + "epoch": 0.10214655810510732, + "grad_norm": 12.377289772033691, + "learning_rate": 7.854349449619586e-06, + "loss": 0.5918, + "step": 690, + "train/speech_entropy": 3.658132842092803, + "train/text_entropy": 0.5396208329634233, + "train/token_acc": 0.35244755244755244 + }, + { + "epoch": 0.10229459659511472, + "grad_norm": 17.614213943481445, + "learning_rate": 7.848136091358182e-06, + "loss": 1.0176, + "step": 691, + "train/speech_entropy": 3.955398790375525, + "train/text_entropy": 1.1603942172217916, + "train/token_acc": 0.2975206611570248 + }, + { + "epoch": 0.10244263508512214, + "grad_norm": 13.06287670135498, + "learning_rate": 7.841916575383622e-06, + "loss": 1.0547, + "step": 692, + "train/speech_entropy": 3.9272527667736803, + "train/text_entropy": 1.0842655441523847, + "train/token_acc": 0.2659744408945687 + }, + { + "epoch": 0.10259067357512953, + "grad_norm": 15.808326721191406, + "learning_rate": 7.83569091800586e-06, + "loss": 1.7031, + "step": 693, + "train/speech_entropy": 4.075709063307683, + "train/text_entropy": 1.8422794102624869, + "train/token_acc": 0.261703760552571 + }, + { + "epoch": 0.10273871206513693, + "grad_norm": 17.640783309936523, + "learning_rate": 7.829459135550957e-06, + "loss": 1.6445, + "step": 694, + "train/speech_entropy": 4.109065108771423, + "train/text_entropy": 1.5562127330315807, + "train/token_acc": 0.2518050541516246 + }, + { + "epoch": 0.10288675055514433, + "grad_norm": 18.893674850463867, + "learning_rate": 7.823221244361037e-06, + "loss": 1.0693, + "step": 695, + "train/speech_entropy": 4.1202955470162745, + "train/text_entropy": 1.4678756936082562, + "train/token_acc": 0.2530487804878049 + }, + { + "epoch": 0.10303478904515175, + "grad_norm": 25.073665618896484, + "learning_rate": 7.816977260794242e-06, + "loss": 1.293, + "step": 696, + "train/speech_entropy": 3.4784943153118264, + "train/text_entropy": 1.080352783203125, + "train/token_acc": 0.3109048723897912 + }, + { + "epoch": 0.10318282753515914, + "grad_norm": 15.499256134033203, + "learning_rate": 7.81072720122469e-06, + "loss": 1.1992, + "step": 697, + "train/speech_entropy": 4.317771072022652, + "train/text_entropy": 1.1774643168729895, + "train/token_acc": 0.27403846153846156 + }, + { + "epoch": 0.10333086602516654, + "grad_norm": 19.88504981994629, + "learning_rate": 7.804471082042432e-06, + "loss": 0.7832, + "step": 698, + "train/speech_entropy": 3.3584502121497843, + "train/text_entropy": 0.7114071778848138, + "train/token_acc": 0.3601108033240997 + }, + { + "epoch": 0.10347890451517394, + "grad_norm": 15.871016502380371, + "learning_rate": 7.798208919653418e-06, + "loss": 1.4219, + "step": 699, + "train/speech_entropy": 4.196228750042048, + "train/text_entropy": 1.4543512174398592, + "train/token_acc": 0.25552608311229 + }, + { + "epoch": 0.10362694300518134, + "grad_norm": 11.504782676696777, + "learning_rate": 7.791940730479435e-06, + "loss": 0.9727, + "step": 700, + "train/speech_entropy": 3.930038807003997, + "train/text_entropy": 1.0158566294262588, + "train/token_acc": 0.30420969023034156 + }, + { + "epoch": 0.10377498149518875, + "grad_norm": 17.42679214477539, + "learning_rate": 7.785666530958074e-06, + "loss": 1.1074, + "step": 701, + "train/speech_entropy": 4.016568493613093, + "train/text_entropy": 0.7779079266448519, + "train/token_acc": 0.30423280423280424 + }, + { + "epoch": 0.10392301998519615, + "grad_norm": 18.555810928344727, + "learning_rate": 7.7793863375427e-06, + "loss": 1.9648, + "step": 702, + "train/speech_entropy": 3.858250668174342, + "train/text_entropy": 2.04783434288524, + "train/token_acc": 0.2695238095238095 + }, + { + "epoch": 0.10407105847520355, + "grad_norm": 15.20937728881836, + "learning_rate": 7.77310016670238e-06, + "loss": 1.0195, + "step": 703, + "train/speech_entropy": 3.7531359741731865, + "train/text_entropy": 1.191191648825621, + "train/token_acc": 0.30267753201396974 + }, + { + "epoch": 0.10421909696521095, + "grad_norm": 19.168479919433594, + "learning_rate": 7.76680803492187e-06, + "loss": 1.4805, + "step": 704, + "train/speech_entropy": 4.067990309598521, + "train/text_entropy": 1.2444872887305964, + "train/token_acc": 0.2727272727272727 + }, + { + "epoch": 0.10436713545521836, + "grad_norm": 18.3392391204834, + "learning_rate": 7.760509958701549e-06, + "loss": 1.6367, + "step": 705, + "train/speech_entropy": 3.995153422818496, + "train/text_entropy": 1.6185016016806326, + "train/token_acc": 0.27705223880597013 + }, + { + "epoch": 0.10451517394522576, + "grad_norm": 10.478803634643555, + "learning_rate": 7.75420595455739e-06, + "loss": 0.8428, + "step": 706, + "train/speech_entropy": 3.3219841780632797, + "train/text_entropy": 0.8178909883370848, + "train/token_acc": 0.3341772151898734 + }, + { + "epoch": 0.10466321243523316, + "grad_norm": 10.639156341552734, + "learning_rate": 7.747896039020905e-06, + "loss": 0.9336, + "step": 707, + "train/speech_entropy": 3.7552152211885397, + "train/text_entropy": 1.0875371356343113, + "train/token_acc": 0.3047696038803557 + }, + { + "epoch": 0.10481125092524056, + "grad_norm": 9.853907585144043, + "learning_rate": 7.741580228639118e-06, + "loss": 0.752, + "step": 708, + "train/speech_entropy": 3.6921088883966924, + "train/text_entropy": 0.7085320272563417, + "train/token_acc": 0.3575268817204301 + }, + { + "epoch": 0.10495928941524796, + "grad_norm": 11.322315216064453, + "learning_rate": 7.7352585399745e-06, + "loss": 0.9341, + "step": 709, + "train/speech_entropy": 3.5715736887562812, + "train/text_entropy": 1.1969852743988827, + "train/token_acc": 0.32861476238624876 + }, + { + "epoch": 0.10510732790525537, + "grad_norm": 13.85293960571289, + "learning_rate": 7.728930989604945e-06, + "loss": 1.3125, + "step": 710, + "train/speech_entropy": 4.005296043113426, + "train/text_entropy": 1.1938199885624392, + "train/token_acc": 0.29693343305908754 + }, + { + "epoch": 0.10525536639526277, + "grad_norm": 18.677135467529297, + "learning_rate": 7.722597594123715e-06, + "loss": 0.9004, + "step": 711, + "train/speech_entropy": 3.804860044678356, + "train/text_entropy": 0.9112366800722869, + "train/token_acc": 0.30721649484536084 + }, + { + "epoch": 0.10540340488527017, + "grad_norm": 16.25985336303711, + "learning_rate": 7.716258370139403e-06, + "loss": 1.6328, + "step": 712, + "train/speech_entropy": 4.10502279201804, + "train/text_entropy": 1.667858184330047, + "train/token_acc": 0.2670414617006325 + }, + { + "epoch": 0.10555144337527757, + "grad_norm": 16.000411987304688, + "learning_rate": 7.709913334275884e-06, + "loss": 1.0879, + "step": 713, + "train/speech_entropy": 3.82076256603589, + "train/text_entropy": 1.2402440963252899, + "train/token_acc": 0.28550512445095166 + }, + { + "epoch": 0.10569948186528498, + "grad_norm": 17.070701599121094, + "learning_rate": 7.703562503172273e-06, + "loss": 1.5898, + "step": 714, + "train/speech_entropy": 3.6776144652240044, + "train/text_entropy": 1.531927130195532, + "train/token_acc": 0.286214953271028 + }, + { + "epoch": 0.10584752035529238, + "grad_norm": 18.08481788635254, + "learning_rate": 7.697205893482889e-06, + "loss": 1.7617, + "step": 715, + "train/speech_entropy": 4.000969609127769, + "train/text_entropy": 2.1579137142235636, + "train/token_acc": 0.2631578947368421 + }, + { + "epoch": 0.10599555884529978, + "grad_norm": 12.772119522094727, + "learning_rate": 7.690843521877194e-06, + "loss": 1.2246, + "step": 716, + "train/speech_entropy": 3.896038765893625, + "train/text_entropy": 1.1986440843151462, + "train/token_acc": 0.32123552123552124 + }, + { + "epoch": 0.10614359733530718, + "grad_norm": 18.12458610534668, + "learning_rate": 7.68447540503977e-06, + "loss": 1.0664, + "step": 717, + "train/speech_entropy": 3.549038242965161, + "train/text_entropy": 1.049156430741431, + "train/token_acc": 0.3169811320754717 + }, + { + "epoch": 0.10629163582531458, + "grad_norm": 8.065598487854004, + "learning_rate": 7.678101559670259e-06, + "loss": 0.6655, + "step": 718, + "train/speech_entropy": 3.690476904431141, + "train/text_entropy": 0.7101355072454358, + "train/token_acc": 0.33648943918426805 + }, + { + "epoch": 0.10643967431532199, + "grad_norm": 21.20306396484375, + "learning_rate": 7.671722002483328e-06, + "loss": 1.7422, + "step": 719, + "train/speech_entropy": 3.6707914977514444, + "train/text_entropy": 1.7422219925567883, + "train/token_acc": 0.27899159663865547 + }, + { + "epoch": 0.10658771280532939, + "grad_norm": 14.407920837402344, + "learning_rate": 7.665336750208624e-06, + "loss": 1.2422, + "step": 720, + "train/speech_entropy": 4.147839437395135, + "train/text_entropy": 1.1903106875535918, + "train/token_acc": 0.27399165507649514 + }, + { + "epoch": 0.10673575129533679, + "grad_norm": 16.50201988220215, + "learning_rate": 7.658945819590722e-06, + "loss": 1.0352, + "step": 721, + "train/speech_entropy": 3.550180787267596, + "train/text_entropy": 1.077999259852156, + "train/token_acc": 0.30735455543358947 + }, + { + "epoch": 0.10688378978534419, + "grad_norm": 14.496567726135254, + "learning_rate": 7.652549227389097e-06, + "loss": 1.1133, + "step": 722, + "train/speech_entropy": 3.6671481092436973, + "train/text_entropy": 1.1100308707590854, + "train/token_acc": 0.3172645739910314 + }, + { + "epoch": 0.1070318282753516, + "grad_norm": 18.72528648376465, + "learning_rate": 7.646146990378065e-06, + "loss": 1.0352, + "step": 723, + "train/speech_entropy": 3.7729217696669335, + "train/text_entropy": 1.1073504901323161, + "train/token_acc": 0.2795216741405082 + }, + { + "epoch": 0.107179866765359, + "grad_norm": 15.15335464477539, + "learning_rate": 7.639739125346745e-06, + "loss": 0.71, + "step": 724, + "train/speech_entropy": 3.703686491398048, + "train/text_entropy": 0.5913852314616359, + "train/token_acc": 0.33832976445396146 + }, + { + "epoch": 0.1073279052553664, + "grad_norm": 19.506956100463867, + "learning_rate": 7.633325649099017e-06, + "loss": 1.2285, + "step": 725, + "train/speech_entropy": 3.849755648908944, + "train/text_entropy": 1.2409589684527853, + "train/token_acc": 0.30915371329879104 + }, + { + "epoch": 0.1074759437453738, + "grad_norm": 17.01750373840332, + "learning_rate": 7.626906578453476e-06, + "loss": 0.8271, + "step": 726, + "train/speech_entropy": 3.906907063231994, + "train/text_entropy": 0.638968335903757, + "train/token_acc": 0.30979827089337175 + }, + { + "epoch": 0.1076239822353812, + "grad_norm": 15.267739295959473, + "learning_rate": 7.620481930243383e-06, + "loss": 1.0703, + "step": 727, + "train/speech_entropy": 4.13156392768483, + "train/text_entropy": 0.9860856756200931, + "train/token_acc": 0.3392857142857143 + }, + { + "epoch": 0.1077720207253886, + "grad_norm": 16.523788452148438, + "learning_rate": 7.614051721316631e-06, + "loss": 1.5039, + "step": 728, + "train/speech_entropy": 3.777907067789532, + "train/text_entropy": 1.737965082105302, + "train/token_acc": 0.2768259693417493 + }, + { + "epoch": 0.107920059215396, + "grad_norm": 13.447721481323242, + "learning_rate": 7.607615968535694e-06, + "loss": 1.3086, + "step": 729, + "train/speech_entropy": 3.9400749238200206, + "train/text_entropy": 1.1508273027074618, + "train/token_acc": 0.26936257710760797 + }, + { + "epoch": 0.1080680977054034, + "grad_norm": 15.622806549072266, + "learning_rate": 7.601174688777584e-06, + "loss": 1.2207, + "step": 730, + "train/speech_entropy": 4.241506483142342, + "train/text_entropy": 1.4059862459086936, + "train/token_acc": 0.28031634446397186 + }, + { + "epoch": 0.1082161361954108, + "grad_norm": 18.524269104003906, + "learning_rate": 7.5947278989338035e-06, + "loss": 1.3633, + "step": 731, + "train/speech_entropy": 3.9746281367795477, + "train/text_entropy": 1.5135785616361177, + "train/token_acc": 0.2686335403726708 + }, + { + "epoch": 0.1083641746854182, + "grad_norm": 12.711840629577637, + "learning_rate": 7.588275615910309e-06, + "loss": 1.04, + "step": 732, + "train/speech_entropy": 3.7544589783093945, + "train/text_entropy": 1.1134824054997141, + "train/token_acc": 0.30572160546541416 + }, + { + "epoch": 0.10851221317542561, + "grad_norm": 11.14571762084961, + "learning_rate": 7.58181785662746e-06, + "loss": 0.7334, + "step": 733, + "train/speech_entropy": 4.142983695056951, + "train/text_entropy": 0.7713932080215282, + "train/token_acc": 0.29497098646034814 + }, + { + "epoch": 0.10866025166543301, + "grad_norm": 14.877224922180176, + "learning_rate": 7.5753546380199785e-06, + "loss": 0.8008, + "step": 734, + "train/speech_entropy": 3.4906762285930353, + "train/text_entropy": 1.0043389881756288, + "train/token_acc": 0.30614805520702637 + }, + { + "epoch": 0.10880829015544041, + "grad_norm": 12.055302619934082, + "learning_rate": 7.568885977036901e-06, + "loss": 1.1016, + "step": 735, + "train/speech_entropy": 4.120246530562557, + "train/text_entropy": 1.1491605111680891, + "train/token_acc": 0.2723823975720789 + }, + { + "epoch": 0.10895632864544781, + "grad_norm": 13.358073234558105, + "learning_rate": 7.5624118906415355e-06, + "loss": 0.8711, + "step": 736, + "train/speech_entropy": 3.457660552338287, + "train/text_entropy": 0.5840149624809545, + "train/token_acc": 0.3675496688741722 + }, + { + "epoch": 0.10910436713545522, + "grad_norm": 14.761870384216309, + "learning_rate": 7.555932395811422e-06, + "loss": 1.0, + "step": 737, + "train/speech_entropy": 3.414969828591418, + "train/text_entropy": 0.9863901871901292, + "train/token_acc": 0.2990314769975787 + }, + { + "epoch": 0.10925240562546262, + "grad_norm": 10.156814575195312, + "learning_rate": 7.549447509538278e-06, + "loss": 0.4336, + "step": 738, + "train/speech_entropy": 3.846614342231255, + "train/text_entropy": 0.5149405717849731, + "train/token_acc": 0.3492268041237113 + }, + { + "epoch": 0.10940044411547002, + "grad_norm": 20.70349884033203, + "learning_rate": 7.5429572488279615e-06, + "loss": 1.6406, + "step": 739, + "train/speech_entropy": 4.093012894372002, + "train/text_entropy": 1.8092099302303717, + "train/token_acc": 0.24824120603015076 + }, + { + "epoch": 0.10954848260547742, + "grad_norm": 12.412102699279785, + "learning_rate": 7.536461630700426e-06, + "loss": 1.1836, + "step": 740, + "train/speech_entropy": 4.172618686136534, + "train/text_entropy": 1.1591609716415405, + "train/token_acc": 0.2841181165203512 + }, + { + "epoch": 0.10969652109548482, + "grad_norm": 21.480274200439453, + "learning_rate": 7.529960672189672e-06, + "loss": 2.0, + "step": 741, + "train/speech_entropy": 4.47268769780912, + "train/text_entropy": 1.8675863576489826, + "train/token_acc": 0.23549201009251472 + }, + { + "epoch": 0.10984455958549223, + "grad_norm": 12.709410667419434, + "learning_rate": 7.5234543903437065e-06, + "loss": 0.4727, + "step": 742, + "train/speech_entropy": 3.3279404656145486, + "train/text_entropy": 0.4409225990032328, + "train/token_acc": 0.35397039030955585 + }, + { + "epoch": 0.10999259807549963, + "grad_norm": 11.308231353759766, + "learning_rate": 7.516942802224489e-06, + "loss": 0.9946, + "step": 743, + "train/speech_entropy": 3.7031685163831076, + "train/text_entropy": 1.197200692218283, + "train/token_acc": 0.30016863406408095 + }, + { + "epoch": 0.11014063656550703, + "grad_norm": 14.788912773132324, + "learning_rate": 7.5104259249079115e-06, + "loss": 1.1758, + "step": 744, + "train/speech_entropy": 3.821070260712595, + "train/text_entropy": 0.9798568259668714, + "train/token_acc": 0.3242811501597444 + }, + { + "epoch": 0.11028867505551443, + "grad_norm": 15.355035781860352, + "learning_rate": 7.503903775483716e-06, + "loss": 1.084, + "step": 745, + "train/speech_entropy": 3.882904052734375, + "train/text_entropy": 1.269510117252316, + "train/token_acc": 0.33054003724394787 + }, + { + "epoch": 0.11043671354552184, + "grad_norm": 24.35964012145996, + "learning_rate": 7.497376371055481e-06, + "loss": 1.9609, + "step": 746, + "train/speech_entropy": 3.614183123207457, + "train/text_entropy": 1.8327000141143799, + "train/token_acc": 0.2780337941628264 + }, + { + "epoch": 0.11058475203552924, + "grad_norm": 16.1589298248291, + "learning_rate": 7.4908437287405666e-06, + "loss": 0.8008, + "step": 747, + "train/speech_entropy": 3.4143539268092105, + "train/text_entropy": 1.0994154888650645, + "train/token_acc": 0.3703099510603589 + }, + { + "epoch": 0.11073279052553664, + "grad_norm": 16.694713592529297, + "learning_rate": 7.484305865670064e-06, + "loss": 1.7461, + "step": 748, + "train/speech_entropy": 4.106234652534772, + "train/text_entropy": 1.3837963397686297, + "train/token_acc": 0.2898389783453637 + }, + { + "epoch": 0.11088082901554404, + "grad_norm": 18.51128387451172, + "learning_rate": 7.477762798988755e-06, + "loss": 1.4492, + "step": 749, + "train/speech_entropy": 4.486918908039122, + "train/text_entropy": 1.3360067030962777, + "train/token_acc": 0.2583682008368201 + }, + { + "epoch": 0.11102886750555144, + "grad_norm": 14.558300018310547, + "learning_rate": 7.471214545855071e-06, + "loss": 1.0996, + "step": 750, + "train/speech_entropy": 3.692387129620808, + "train/text_entropy": 0.9635487984919894, + "train/token_acc": 0.34079844206426485 + }, + { + "epoch": 0.11117690599555885, + "grad_norm": 17.610078811645508, + "learning_rate": 7.464661123441043e-06, + "loss": 1.3555, + "step": 751, + "train/speech_entropy": 3.9797303734756095, + "train/text_entropy": 1.3898003030810835, + "train/token_acc": 0.270042194092827 + }, + { + "epoch": 0.11132494448556625, + "grad_norm": 15.477089881896973, + "learning_rate": 7.458102548932255e-06, + "loss": 1.625, + "step": 752, + "train/speech_entropy": 3.688978647177667, + "train/text_entropy": 1.253312413093975, + "train/token_acc": 0.3 + }, + { + "epoch": 0.11147298297557365, + "grad_norm": 12.055418014526367, + "learning_rate": 7.451538839527808e-06, + "loss": 1.0859, + "step": 753, + "train/speech_entropy": 4.039075264198652, + "train/text_entropy": 1.0695324436764875, + "train/token_acc": 0.2899728997289973 + }, + { + "epoch": 0.11162102146558105, + "grad_norm": 13.893319129943848, + "learning_rate": 7.444970012440259e-06, + "loss": 1.1777, + "step": 754, + "train/speech_entropy": 3.7138868149880055, + "train/text_entropy": 1.142130053606156, + "train/token_acc": 0.3091060985797828 + }, + { + "epoch": 0.11176905995558846, + "grad_norm": 12.316691398620605, + "learning_rate": 7.438396084895597e-06, + "loss": 1.3525, + "step": 755, + "train/speech_entropy": 4.217874919398549, + "train/text_entropy": 1.6755442255193538, + "train/token_acc": 0.3258325832583258 + }, + { + "epoch": 0.11191709844559586, + "grad_norm": 15.783905982971191, + "learning_rate": 7.431817074133178e-06, + "loss": 1.5781, + "step": 756, + "train/speech_entropy": 3.959910094855108, + "train/text_entropy": 1.476636653572186, + "train/token_acc": 0.25746569814366427 + }, + { + "epoch": 0.11206513693560326, + "grad_norm": 19.105894088745117, + "learning_rate": 7.425232997405689e-06, + "loss": 1.1211, + "step": 757, + "train/speech_entropy": 3.5342839505153276, + "train/text_entropy": 1.0637813144259982, + "train/token_acc": 0.31153184165232356 + }, + { + "epoch": 0.11221317542561066, + "grad_norm": 16.99700164794922, + "learning_rate": 7.4186438719791066e-06, + "loss": 1.5508, + "step": 758, + "train/speech_entropy": 4.128013273256015, + "train/text_entropy": 1.5395133349360253, + "train/token_acc": 0.27 + }, + { + "epoch": 0.11236121391561806, + "grad_norm": 19.068653106689453, + "learning_rate": 7.412049715132643e-06, + "loss": 1.4238, + "step": 759, + "train/speech_entropy": 4.110874403595804, + "train/text_entropy": 1.566278584798177, + "train/token_acc": 0.26013513513513514 + }, + { + "epoch": 0.11250925240562547, + "grad_norm": 15.751558303833008, + "learning_rate": 7.4054505441587075e-06, + "loss": 0.6504, + "step": 760, + "train/speech_entropy": 3.1665043686375474, + "train/text_entropy": 0.5089231473263179, + "train/token_acc": 0.30393700787401573 + }, + { + "epoch": 0.11265729089563287, + "grad_norm": 17.882219314575195, + "learning_rate": 7.398846376362853e-06, + "loss": 1.3164, + "step": 761, + "train/speech_entropy": 3.9233136205616113, + "train/text_entropy": 1.2912476546280867, + "train/token_acc": 0.2638717632552404 + }, + { + "epoch": 0.11280532938564027, + "grad_norm": 15.199311256408691, + "learning_rate": 7.392237229063741e-06, + "loss": 1.0664, + "step": 762, + "train/speech_entropy": 4.0107692253542755, + "train/text_entropy": 1.401525995005732, + "train/token_acc": 0.2893347412882788 + }, + { + "epoch": 0.11295336787564766, + "grad_norm": 16.880578994750977, + "learning_rate": 7.385623119593093e-06, + "loss": 1.1406, + "step": 763, + "train/speech_entropy": 3.885811899943524, + "train/text_entropy": 1.1545399618412249, + "train/token_acc": 0.28486646884273 + }, + { + "epoch": 0.11310140636565508, + "grad_norm": 7.4900946617126465, + "learning_rate": 7.379004065295636e-06, + "loss": 0.3213, + "step": 764, + "train/speech_entropy": 3.445163898078763, + "train/text_entropy": 0.3285877086498119, + "train/token_acc": 0.3793949304987735 + }, + { + "epoch": 0.11324944485566248, + "grad_norm": 18.33951187133789, + "learning_rate": 7.372380083529068e-06, + "loss": 1.1348, + "step": 765, + "train/speech_entropy": 4.128254542374182, + "train/text_entropy": 0.973923669345137, + "train/token_acc": 0.27770360480640854 + }, + { + "epoch": 0.11339748334566987, + "grad_norm": 13.660920143127441, + "learning_rate": 7.365751191664012e-06, + "loss": 1.2109, + "step": 766, + "train/speech_entropy": 4.2139731099286415, + "train/text_entropy": 1.1266534777655117, + "train/token_acc": 0.26196636481241914 + }, + { + "epoch": 0.11354552183567727, + "grad_norm": 16.786405563354492, + "learning_rate": 7.359117407083964e-06, + "loss": 1.6758, + "step": 767, + "train/speech_entropy": 4.137699570766715, + "train/text_entropy": 1.5834127446656585, + "train/token_acc": 0.25812619502868067 + }, + { + "epoch": 0.11369356032568467, + "grad_norm": 15.964873313903809, + "learning_rate": 7.352478747185247e-06, + "loss": 1.5781, + "step": 768, + "train/speech_entropy": 3.7852562894615494, + "train/text_entropy": 1.4100324190579927, + "train/token_acc": 0.27137546468401486 + }, + { + "epoch": 0.11384159881569209, + "grad_norm": 18.083375930786133, + "learning_rate": 7.345835229376979e-06, + "loss": 1.0449, + "step": 769, + "train/speech_entropy": 3.515976769939747, + "train/text_entropy": 0.8649024106143566, + "train/token_acc": 0.3361611876988335 + }, + { + "epoch": 0.11398963730569948, + "grad_norm": 10.865363121032715, + "learning_rate": 7.339186871081005e-06, + "loss": 1.0908, + "step": 770, + "train/speech_entropy": 3.920989802351308, + "train/text_entropy": 1.1177268233953739, + "train/token_acc": 0.28951149425287354 + }, + { + "epoch": 0.11413767579570688, + "grad_norm": 18.756258010864258, + "learning_rate": 7.33253368973187e-06, + "loss": 1.9688, + "step": 771, + "train/speech_entropy": 4.055225890359761, + "train/text_entropy": 1.5877866423531863, + "train/token_acc": 0.288135593220339 + }, + { + "epoch": 0.11428571428571428, + "grad_norm": 35.02879333496094, + "learning_rate": 7.325875702776769e-06, + "loss": 2.2305, + "step": 772, + "train/speech_entropy": 3.956913630167643, + "train/text_entropy": 1.353790363231739, + "train/token_acc": 0.2846625766871166 + }, + { + "epoch": 0.11443375277572168, + "grad_norm": 18.192873001098633, + "learning_rate": 7.319212927675498e-06, + "loss": 2.0781, + "step": 773, + "train/speech_entropy": 3.9932868835524293, + "train/text_entropy": 2.0267525467218137, + "train/token_acc": 0.25958702064896755 + }, + { + "epoch": 0.1145817912657291, + "grad_norm": 22.976364135742188, + "learning_rate": 7.312545381900402e-06, + "loss": 1.5, + "step": 774, + "train/speech_entropy": 3.4629268018170496, + "train/text_entropy": 1.4923784714336543, + "train/token_acc": 0.2988668555240793 + }, + { + "epoch": 0.11472982975573649, + "grad_norm": 19.267301559448242, + "learning_rate": 7.3058730829363485e-06, + "loss": 1.0234, + "step": 775, + "train/speech_entropy": 3.7254021097217085, + "train/text_entropy": 1.1383157046336048, + "train/token_acc": 0.31484502446982054 + }, + { + "epoch": 0.11487786824574389, + "grad_norm": 11.770009994506836, + "learning_rate": 7.299196048280661e-06, + "loss": 0.957, + "step": 776, + "train/speech_entropy": 4.347596176027313, + "train/text_entropy": 0.9662623260960435, + "train/token_acc": 0.31875 + }, + { + "epoch": 0.11502590673575129, + "grad_norm": 13.242656707763672, + "learning_rate": 7.2925142954430846e-06, + "loss": 0.875, + "step": 777, + "train/speech_entropy": 3.5646998891336126, + "train/text_entropy": 0.604683190061335, + "train/token_acc": 0.340956340956341 + }, + { + "epoch": 0.1151739452257587, + "grad_norm": 13.734225273132324, + "learning_rate": 7.285827841945739e-06, + "loss": 1.0332, + "step": 778, + "train/speech_entropy": 3.912596181575084, + "train/text_entropy": 1.095219989155614, + "train/token_acc": 0.3064340239912759 + }, + { + "epoch": 0.1153219837157661, + "grad_norm": 16.82982635498047, + "learning_rate": 7.279136705323067e-06, + "loss": 1.6016, + "step": 779, + "train/speech_entropy": 4.09452138894544, + "train/text_entropy": 1.4726476810248614, + "train/token_acc": 0.2713523131672598 + }, + { + "epoch": 0.1154700222057735, + "grad_norm": 14.644168853759766, + "learning_rate": 7.272440903121792e-06, + "loss": 1.1846, + "step": 780, + "train/speech_entropy": 3.3770082895992366, + "train/text_entropy": 1.2629214885622957, + "train/token_acc": 0.34099153567110035 + }, + { + "epoch": 0.1156180606957809, + "grad_norm": 19.87251091003418, + "learning_rate": 7.26574045290088e-06, + "loss": 1.3096, + "step": 781, + "train/speech_entropy": 3.922981770833333, + "train/text_entropy": 0.9485219410487584, + "train/token_acc": 0.3062068965517241 + }, + { + "epoch": 0.1157660991857883, + "grad_norm": 19.845008850097656, + "learning_rate": 7.259035372231479e-06, + "loss": 1.4531, + "step": 782, + "train/speech_entropy": 3.8521509646487986, + "train/text_entropy": 1.67085180611446, + "train/token_acc": 0.2754946727549467 + }, + { + "epoch": 0.11591413767579571, + "grad_norm": 14.924542427062988, + "learning_rate": 7.252325678696879e-06, + "loss": 0.7461, + "step": 783, + "train/speech_entropy": 3.381531134642312, + "train/text_entropy": 0.7736411223540435, + "train/token_acc": 0.34219269102990035 + }, + { + "epoch": 0.11606217616580311, + "grad_norm": 15.447098731994629, + "learning_rate": 7.24561138989247e-06, + "loss": 0.8643, + "step": 784, + "train/speech_entropy": 3.439208279784452, + "train/text_entropy": 0.8875329260732613, + "train/token_acc": 0.33451536643026003 + }, + { + "epoch": 0.11621021465581051, + "grad_norm": 15.22691822052002, + "learning_rate": 7.238892523425694e-06, + "loss": 0.9277, + "step": 785, + "train/speech_entropy": 3.437126226397367, + "train/text_entropy": 0.7959452046976461, + "train/token_acc": 0.3202380952380952 + }, + { + "epoch": 0.11635825314581791, + "grad_norm": 17.47816276550293, + "learning_rate": 7.23216909691599e-06, + "loss": 1.6602, + "step": 786, + "train/speech_entropy": 4.087404334703109, + "train/text_entropy": 1.6684145649904927, + "train/token_acc": 0.258974358974359 + }, + { + "epoch": 0.11650629163582532, + "grad_norm": 16.35817527770996, + "learning_rate": 7.225441127994766e-06, + "loss": 1.2734, + "step": 787, + "train/speech_entropy": 3.7619249297351374, + "train/text_entropy": 1.5120078012452904, + "train/token_acc": 0.2973651191969887 + }, + { + "epoch": 0.11665433012583272, + "grad_norm": 16.086862564086914, + "learning_rate": 7.218708634305334e-06, + "loss": 1.1992, + "step": 788, + "train/speech_entropy": 3.994505359280494, + "train/text_entropy": 1.2252082368767119, + "train/token_acc": 0.31134352373290425 + }, + { + "epoch": 0.11680236861584012, + "grad_norm": 15.474823951721191, + "learning_rate": 7.211971633502872e-06, + "loss": 1.3418, + "step": 789, + "train/speech_entropy": 4.251760940551758, + "train/text_entropy": 1.5786038672017773, + "train/token_acc": 0.2698249227600412 + }, + { + "epoch": 0.11695040710584752, + "grad_norm": 25.70890998840332, + "learning_rate": 7.20523014325438e-06, + "loss": 1.4766, + "step": 790, + "train/speech_entropy": 3.6602783203125, + "train/text_entropy": 0.8576681954520089, + "train/token_acc": 0.313373253493014 + }, + { + "epoch": 0.11709844559585492, + "grad_norm": 18.94228172302246, + "learning_rate": 7.198484181238633e-06, + "loss": 1.2129, + "step": 791, + "train/speech_entropy": 4.325296994747622, + "train/text_entropy": 1.292820912487102, + "train/token_acc": 0.2711571675302245 + }, + { + "epoch": 0.11724648408586233, + "grad_norm": 20.087923049926758, + "learning_rate": 7.191733765146126e-06, + "loss": 1.5625, + "step": 792, + "train/speech_entropy": 3.8354578018188477, + "train/text_entropy": 1.4763200089738175, + "train/token_acc": 0.28436516264428124 + }, + { + "epoch": 0.11739452257586973, + "grad_norm": 14.724649429321289, + "learning_rate": 7.184978912679041e-06, + "loss": 1.0645, + "step": 793, + "train/speech_entropy": 4.0200640529157, + "train/text_entropy": 1.3842151708770216, + "train/token_acc": 0.2951351351351351 + }, + { + "epoch": 0.11754256106587713, + "grad_norm": 11.315794944763184, + "learning_rate": 7.17821964155119e-06, + "loss": 0.9922, + "step": 794, + "train/speech_entropy": 4.261680035143608, + "train/text_entropy": 1.1757595078627952, + "train/token_acc": 0.2939655172413793 + }, + { + "epoch": 0.11769059955588453, + "grad_norm": 10.9509859085083, + "learning_rate": 7.171455969487974e-06, + "loss": 0.8311, + "step": 795, + "train/speech_entropy": 4.095633397241099, + "train/text_entropy": 0.8284465471903483, + "train/token_acc": 0.3111668757841907 + }, + { + "epoch": 0.11783863804589194, + "grad_norm": 16.40129280090332, + "learning_rate": 7.164687914226335e-06, + "loss": 1.5156, + "step": 796, + "train/speech_entropy": 3.954764388283615, + "train/text_entropy": 1.1702618498339432, + "train/token_acc": 0.28257328990228014 + }, + { + "epoch": 0.11798667653589934, + "grad_norm": 15.083370208740234, + "learning_rate": 7.157915493514706e-06, + "loss": 1.3984, + "step": 797, + "train/speech_entropy": 3.677646874735349, + "train/text_entropy": 1.5430752984408675, + "train/token_acc": 0.27631578947368424 + }, + { + "epoch": 0.11813471502590674, + "grad_norm": 18.35961151123047, + "learning_rate": 7.1511387251129745e-06, + "loss": 1.2402, + "step": 798, + "train/speech_entropy": 3.518530882405045, + "train/text_entropy": 1.4408097585042319, + "train/token_acc": 0.31135531135531136 + }, + { + "epoch": 0.11828275351591414, + "grad_norm": 25.67911148071289, + "learning_rate": 7.144357626792424e-06, + "loss": 2.2773, + "step": 799, + "train/speech_entropy": 3.612577051148379, + "train/text_entropy": 2.4174186918470593, + "train/token_acc": 0.2973372781065089 + }, + { + "epoch": 0.11843079200592153, + "grad_norm": 13.958096504211426, + "learning_rate": 7.137572216335695e-06, + "loss": 1.0098, + "step": 800, + "train/speech_entropy": 3.8880854587928924, + "train/text_entropy": 1.2668581917172386, + "train/token_acc": 0.2774390243902439 + }, + { + "epoch": 0.11857883049592895, + "grad_norm": 14.468646049499512, + "learning_rate": 7.130782511536734e-06, + "loss": 1.0586, + "step": 801, + "train/speech_entropy": 3.734510515738225, + "train/text_entropy": 1.234379928304542, + "train/token_acc": 0.32432432432432434 + }, + { + "epoch": 0.11872686898593635, + "grad_norm": 14.579344749450684, + "learning_rate": 7.12398853020075e-06, + "loss": 1.0918, + "step": 802, + "train/speech_entropy": 4.063594588314194, + "train/text_entropy": 1.3088887532552083, + "train/token_acc": 0.28869778869778867 + }, + { + "epoch": 0.11887490747594374, + "grad_norm": 15.816568374633789, + "learning_rate": 7.117190290144169e-06, + "loss": 0.9492, + "step": 803, + "train/speech_entropy": 3.4672628504629355, + "train/text_entropy": 1.3092295537229444, + "train/token_acc": 0.3080495356037152 + }, + { + "epoch": 0.11902294596595114, + "grad_norm": 15.45766830444336, + "learning_rate": 7.1103878091945845e-06, + "loss": 1.1895, + "step": 804, + "train/speech_entropy": 3.9612670032690604, + "train/text_entropy": 1.4832946575240584, + "train/token_acc": 0.2733644859813084 + }, + { + "epoch": 0.11917098445595854, + "grad_norm": 11.292400360107422, + "learning_rate": 7.103581105190702e-06, + "loss": 0.5547, + "step": 805, + "train/speech_entropy": 4.047763753820349, + "train/text_entropy": 0.5292625614241058, + "train/token_acc": 0.32865168539325845 + }, + { + "epoch": 0.11931902294596596, + "grad_norm": 15.766887664794922, + "learning_rate": 7.0967701959823185e-06, + "loss": 0.7607, + "step": 806, + "train/speech_entropy": 3.7858216408217884, + "train/text_entropy": 0.5300553061745383, + "train/token_acc": 0.30886426592797783 + }, + { + "epoch": 0.11946706143597335, + "grad_norm": 12.26786994934082, + "learning_rate": 7.089955099430245e-06, + "loss": 0.9111, + "step": 807, + "train/speech_entropy": 3.93316943359375, + "train/text_entropy": 0.8275433564797426, + "train/token_acc": 0.31766612641815234 + }, + { + "epoch": 0.11961509992598075, + "grad_norm": 13.893891334533691, + "learning_rate": 7.083135833406276e-06, + "loss": 1.6797, + "step": 808, + "train/speech_entropy": 4.218410456189167, + "train/text_entropy": 1.635566412233839, + "train/token_acc": 0.2587123449497933 + }, + { + "epoch": 0.11976313841598815, + "grad_norm": 12.364026069641113, + "learning_rate": 7.076312415793147e-06, + "loss": 0.8691, + "step": 809, + "train/speech_entropy": 3.827325680835099, + "train/text_entropy": 0.8218367277686276, + "train/token_acc": 0.33383458646616543 + }, + { + "epoch": 0.11991117690599556, + "grad_norm": 16.436922073364258, + "learning_rate": 7.0694848644844715e-06, + "loss": 1.5938, + "step": 810, + "train/speech_entropy": 4.3033922327754865, + "train/text_entropy": 1.6672520011798289, + "train/token_acc": 0.2615823235923022 + }, + { + "epoch": 0.12005921539600296, + "grad_norm": 13.261826515197754, + "learning_rate": 7.062653197384706e-06, + "loss": 1.0244, + "step": 811, + "train/speech_entropy": 3.4178459270961774, + "train/text_entropy": 0.9458991113653038, + "train/token_acc": 0.3482142857142857 + }, + { + "epoch": 0.12020725388601036, + "grad_norm": 18.14591407775879, + "learning_rate": 7.055817432409103e-06, + "loss": 1.3906, + "step": 812, + "train/speech_entropy": 3.6534158414660527, + "train/text_entropy": 1.3689897823957056, + "train/token_acc": 0.28683181225554105 + }, + { + "epoch": 0.12035529237601776, + "grad_norm": 15.149886131286621, + "learning_rate": 7.04897758748366e-06, + "loss": 1.1738, + "step": 813, + "train/speech_entropy": 3.8909313618156762, + "train/text_entropy": 1.1482950695873013, + "train/token_acc": 0.29236276849642007 + }, + { + "epoch": 0.12050333086602516, + "grad_norm": 12.267088890075684, + "learning_rate": 7.04213368054507e-06, + "loss": 0.9199, + "step": 814, + "train/speech_entropy": 3.79299152029008, + "train/text_entropy": 0.910957377889882, + "train/token_acc": 0.29357798165137616 + }, + { + "epoch": 0.12065136935603257, + "grad_norm": 21.3205623626709, + "learning_rate": 7.035285729540683e-06, + "loss": 1.9805, + "step": 815, + "train/speech_entropy": 3.850219999446954, + "train/text_entropy": 1.844064827424934, + "train/token_acc": 0.2628434886499403 + }, + { + "epoch": 0.12079940784603997, + "grad_norm": 14.597343444824219, + "learning_rate": 7.028433752428453e-06, + "loss": 1.4609, + "step": 816, + "train/speech_entropy": 4.164335388544749, + "train/text_entropy": 1.3676781548394097, + "train/token_acc": 0.28130841121495326 + }, + { + "epoch": 0.12094744633604737, + "grad_norm": 25.276565551757812, + "learning_rate": 7.021577767176888e-06, + "loss": 1.2871, + "step": 817, + "train/speech_entropy": 4.567336352903451, + "train/text_entropy": 1.3437284080727587, + "train/token_acc": 0.2237871674491393 + }, + { + "epoch": 0.12109548482605477, + "grad_norm": 15.0615816116333, + "learning_rate": 7.0147177917650145e-06, + "loss": 0.9102, + "step": 818, + "train/speech_entropy": 3.899219391586705, + "train/text_entropy": 1.065258004811889, + "train/token_acc": 0.3093126385809313 + }, + { + "epoch": 0.12124352331606218, + "grad_norm": 18.26667022705078, + "learning_rate": 7.0078538441823155e-06, + "loss": 1.375, + "step": 819, + "train/speech_entropy": 3.947535759959838, + "train/text_entropy": 1.235950939471905, + "train/token_acc": 0.277526395173454 + }, + { + "epoch": 0.12139156180606958, + "grad_norm": 11.826743125915527, + "learning_rate": 7.000985942428693e-06, + "loss": 0.8848, + "step": 820, + "train/speech_entropy": 3.7207714242582934, + "train/text_entropy": 0.8891021612918738, + "train/token_acc": 0.32142857142857145 + }, + { + "epoch": 0.12153960029607698, + "grad_norm": 21.17845344543457, + "learning_rate": 6.994114104514421e-06, + "loss": 1.8281, + "step": 821, + "train/speech_entropy": 3.644469323096337, + "train/text_entropy": 1.3688900994916333, + "train/token_acc": 0.27080581241743723 + }, + { + "epoch": 0.12168763878608438, + "grad_norm": 16.088863372802734, + "learning_rate": 6.98723834846009e-06, + "loss": 1.3516, + "step": 822, + "train/speech_entropy": 4.148795497096597, + "train/text_entropy": 1.5073421780425724, + "train/token_acc": 0.2696917808219178 + }, + { + "epoch": 0.12183567727609178, + "grad_norm": 13.043249130249023, + "learning_rate": 6.9803586922965706e-06, + "loss": 1.2227, + "step": 823, + "train/speech_entropy": 4.030532374526516, + "train/text_entropy": 0.9624043017362072, + "train/token_acc": 0.3274907749077491 + }, + { + "epoch": 0.12198371576609919, + "grad_norm": 16.996931076049805, + "learning_rate": 6.97347515406496e-06, + "loss": 1.3945, + "step": 824, + "train/speech_entropy": 4.187308933423913, + "train/text_entropy": 1.3521455315982593, + "train/token_acc": 0.26023778071334214 + }, + { + "epoch": 0.12213175425610659, + "grad_norm": 18.342021942138672, + "learning_rate": 6.966587751816535e-06, + "loss": 1.3848, + "step": 825, + "train/speech_entropy": 3.900625466435535, + "train/text_entropy": 1.0759888114510001, + "train/token_acc": 0.2893954410307235 + }, + { + "epoch": 0.12227979274611399, + "grad_norm": 11.457801818847656, + "learning_rate": 6.9596965036127015e-06, + "loss": 1.0898, + "step": 826, + "train/speech_entropy": 4.6353792854281135, + "train/text_entropy": 0.9535886362979287, + "train/token_acc": 0.26842420611144396 + }, + { + "epoch": 0.12242783123612139, + "grad_norm": 15.246195793151855, + "learning_rate": 6.952801427524958e-06, + "loss": 1.1094, + "step": 827, + "train/speech_entropy": 3.8838861779321596, + "train/text_entropy": 1.1808699199131556, + "train/token_acc": 0.2828810020876827 + }, + { + "epoch": 0.1225758697261288, + "grad_norm": 16.595420837402344, + "learning_rate": 6.945902541634836e-06, + "loss": 0.8516, + "step": 828, + "train/speech_entropy": 3.789691707952236, + "train/text_entropy": 0.6256546809755522, + "train/token_acc": 0.34116541353383456 + }, + { + "epoch": 0.1227239082161362, + "grad_norm": 17.270606994628906, + "learning_rate": 6.93899986403386e-06, + "loss": 1.6953, + "step": 829, + "train/speech_entropy": 4.062376377348266, + "train/text_entropy": 1.6506609011919071, + "train/token_acc": 0.25849056603773585 + }, + { + "epoch": 0.1228719467061436, + "grad_norm": 13.342480659484863, + "learning_rate": 6.932093412823498e-06, + "loss": 0.7617, + "step": 830, + "train/speech_entropy": 3.7873003303660133, + "train/text_entropy": 0.808692769306462, + "train/token_acc": 0.3235981308411215 + }, + { + "epoch": 0.123019985196151, + "grad_norm": 15.880027770996094, + "learning_rate": 6.925183206115112e-06, + "loss": 1.3398, + "step": 831, + "train/speech_entropy": 4.0663074840198865, + "train/text_entropy": 1.3742136584662403, + "train/token_acc": 0.2712022367194781 + }, + { + "epoch": 0.1231680236861584, + "grad_norm": 17.570514678955078, + "learning_rate": 6.918269262029914e-06, + "loss": 1.4609, + "step": 832, + "train/speech_entropy": 4.0738626534598215, + "train/text_entropy": 1.3285140235825341, + "train/token_acc": 0.2906220984215413 + }, + { + "epoch": 0.12331606217616581, + "grad_norm": 18.000612258911133, + "learning_rate": 6.911351598698918e-06, + "loss": 1.6836, + "step": 833, + "train/speech_entropy": 4.0247309602649, + "train/text_entropy": 1.7570424450493847, + "train/token_acc": 0.2502274795268426 + }, + { + "epoch": 0.12346410066617321, + "grad_norm": 17.286937713623047, + "learning_rate": 6.904430234262886e-06, + "loss": 1.6328, + "step": 834, + "train/speech_entropy": 3.7754191946476063, + "train/text_entropy": 1.5000752416150323, + "train/token_acc": 0.3161057692307692 + }, + { + "epoch": 0.1236121391561806, + "grad_norm": 11.452483177185059, + "learning_rate": 6.897505186872292e-06, + "loss": 0.7598, + "step": 835, + "train/speech_entropy": 4.359274147326468, + "train/text_entropy": 0.8819368406783703, + "train/token_acc": 0.2924613987284287 + }, + { + "epoch": 0.123760177646188, + "grad_norm": 15.758251190185547, + "learning_rate": 6.890576474687264e-06, + "loss": 1.0957, + "step": 836, + "train/speech_entropy": 4.4917152806332235, + "train/text_entropy": 1.0630437585691743, + "train/token_acc": 0.24822695035460993 + }, + { + "epoch": 0.12390821613619542, + "grad_norm": 12.033262252807617, + "learning_rate": 6.883644115877544e-06, + "loss": 0.9941, + "step": 837, + "train/speech_entropy": 4.339819151779701, + "train/text_entropy": 1.2879560586992005, + "train/token_acc": 0.27607361963190186 + }, + { + "epoch": 0.12405625462620282, + "grad_norm": 18.661977767944336, + "learning_rate": 6.876708128622432e-06, + "loss": 1.8711, + "step": 838, + "train/speech_entropy": 3.9762650455336974, + "train/text_entropy": 2.084018827234424, + "train/token_acc": 0.2515188335358445 + }, + { + "epoch": 0.12420429311621022, + "grad_norm": 16.043556213378906, + "learning_rate": 6.869768531110749e-06, + "loss": 1.4688, + "step": 839, + "train/speech_entropy": 4.543252189997933, + "train/text_entropy": 1.6301264444986978, + "train/token_acc": 0.23827046918123276 + }, + { + "epoch": 0.12435233160621761, + "grad_norm": 7.166507720947266, + "learning_rate": 6.86282534154078e-06, + "loss": 0.3906, + "step": 840, + "train/speech_entropy": 3.787443042166752, + "train/text_entropy": 0.34567643788235247, + "train/token_acc": 0.3407043407043407 + }, + { + "epoch": 0.12450037009622501, + "grad_norm": 17.18818473815918, + "learning_rate": 6.855878578120228e-06, + "loss": 1.6211, + "step": 841, + "train/speech_entropy": 4.354201558958064, + "train/text_entropy": 1.4068036658741603, + "train/token_acc": 0.28093306288032455 + }, + { + "epoch": 0.12464840858623243, + "grad_norm": 12.486544609069824, + "learning_rate": 6.848928259066171e-06, + "loss": 1.332, + "step": 842, + "train/speech_entropy": 4.354899022646761, + "train/text_entropy": 1.3873538165025308, + "train/token_acc": 0.2764976958525346 + }, + { + "epoch": 0.12479644707623982, + "grad_norm": 19.193836212158203, + "learning_rate": 6.841974402605012e-06, + "loss": 1.3213, + "step": 843, + "train/speech_entropy": 3.7178449397097717, + "train/text_entropy": 1.2843478698730468, + "train/token_acc": 0.3048780487804878 + }, + { + "epoch": 0.12494448556624722, + "grad_norm": 16.323440551757812, + "learning_rate": 6.835017026972427e-06, + "loss": 1.0332, + "step": 844, + "train/speech_entropy": 4.407207411162707, + "train/text_entropy": 1.2244288979507074, + "train/token_acc": 0.2563291139240506 + }, + { + "epoch": 0.12509252405625462, + "grad_norm": 13.860257148742676, + "learning_rate": 6.828056150413323e-06, + "loss": 1.3789, + "step": 845, + "train/speech_entropy": 4.573814238794436, + "train/text_entropy": 1.3861938353009553, + "train/token_acc": 0.23101881894873458 + }, + { + "epoch": 0.12524056254626204, + "grad_norm": 15.712248802185059, + "learning_rate": 6.821091791181788e-06, + "loss": 1.2539, + "step": 846, + "train/speech_entropy": 4.097328964544802, + "train/text_entropy": 1.3768899410585813, + "train/token_acc": 0.25583864118895966 + }, + { + "epoch": 0.12538860103626942, + "grad_norm": 15.865267753601074, + "learning_rate": 6.814123967541043e-06, + "loss": 0.7676, + "step": 847, + "train/speech_entropy": 3.4272311690950046, + "train/text_entropy": 0.6022382589486929, + "train/token_acc": 0.34513274336283184 + }, + { + "epoch": 0.12553663952627683, + "grad_norm": 16.529382705688477, + "learning_rate": 6.807152697763391e-06, + "loss": 1.1699, + "step": 848, + "train/speech_entropy": 3.8542798506345783, + "train/text_entropy": 1.0124598878802675, + "train/token_acc": 0.3090717299578059 + }, + { + "epoch": 0.12568467801628425, + "grad_norm": 17.8050594329834, + "learning_rate": 6.800178000130176e-06, + "loss": 1.1289, + "step": 849, + "train/speech_entropy": 4.094745611183355, + "train/text_entropy": 0.9857151092044891, + "train/token_acc": 0.2954070981210856 + }, + { + "epoch": 0.12583271650629163, + "grad_norm": 11.071603775024414, + "learning_rate": 6.793199892931727e-06, + "loss": 1.1445, + "step": 850, + "train/speech_entropy": 4.2850093074522455, + "train/text_entropy": 1.2490175096251124, + "train/token_acc": 0.28203292770221905 + }, + { + "epoch": 0.12598075499629904, + "grad_norm": 15.173887252807617, + "learning_rate": 6.78621839446732e-06, + "loss": 1.3438, + "step": 851, + "train/speech_entropy": 4.442390375716388, + "train/text_entropy": 1.150642204284668, + "train/token_acc": 0.28500496524329694 + }, + { + "epoch": 0.12612879348630643, + "grad_norm": 21.56964683532715, + "learning_rate": 6.779233523045118e-06, + "loss": 1.8789, + "step": 852, + "train/speech_entropy": 4.149128229766127, + "train/text_entropy": 1.6845724381024565, + "train/token_acc": 0.23259911894273128 + }, + { + "epoch": 0.12627683197631384, + "grad_norm": 16.584287643432617, + "learning_rate": 6.772245296982135e-06, + "loss": 1.0156, + "step": 853, + "train/speech_entropy": 3.889220587097772, + "train/text_entropy": 0.961007488660576, + "train/token_acc": 0.2987220447284345 + }, + { + "epoch": 0.12642487046632125, + "grad_norm": 17.688161849975586, + "learning_rate": 6.765253734604175e-06, + "loss": 1.5664, + "step": 854, + "train/speech_entropy": 4.2163629401220035, + "train/text_entropy": 1.5832095929046175, + "train/token_acc": 0.2530434782608696 + }, + { + "epoch": 0.12657290895632864, + "grad_norm": 15.229533195495605, + "learning_rate": 6.758258854245801e-06, + "loss": 1.1094, + "step": 855, + "train/speech_entropy": 4.121108925589558, + "train/text_entropy": 1.1162782930860333, + "train/token_acc": 0.27653880463871544 + }, + { + "epoch": 0.12672094744633605, + "grad_norm": 16.772470474243164, + "learning_rate": 6.751260674250266e-06, + "loss": 1.4531, + "step": 856, + "train/speech_entropy": 4.0992850733458175, + "train/text_entropy": 1.4181779295533568, + "train/token_acc": 0.2807731434384537 + }, + { + "epoch": 0.12686898593634344, + "grad_norm": 19.928020477294922, + "learning_rate": 6.744259212969484e-06, + "loss": 1.1914, + "step": 857, + "train/speech_entropy": 3.807336155015307, + "train/text_entropy": 1.0217269448673023, + "train/token_acc": 0.2924242424242424 + }, + { + "epoch": 0.12701702442635085, + "grad_norm": 7.63237190246582, + "learning_rate": 6.7372544887639694e-06, + "loss": 0.335, + "step": 858, + "train/speech_entropy": 3.360938189106603, + "train/text_entropy": 0.28331874012947084, + "train/token_acc": 0.36666666666666664 + }, + { + "epoch": 0.12716506291635826, + "grad_norm": 25.887624740600586, + "learning_rate": 6.730246520002797e-06, + "loss": 1.4062, + "step": 859, + "train/speech_entropy": 4.197263708437123, + "train/text_entropy": 1.017076470386023, + "train/token_acc": 0.26746506986027946 + }, + { + "epoch": 0.12731310140636565, + "grad_norm": 27.89869499206543, + "learning_rate": 6.723235325063543e-06, + "loss": 1.8516, + "step": 860, + "train/speech_entropy": 4.044464805595481, + "train/text_entropy": 1.2378992374706645, + "train/token_acc": 0.29667721518987344 + }, + { + "epoch": 0.12746113989637306, + "grad_norm": 9.973913192749023, + "learning_rate": 6.716220922332255e-06, + "loss": 0.7188, + "step": 861, + "train/speech_entropy": 3.7410849609375, + "train/text_entropy": 0.9313172279520238, + "train/token_acc": 0.327027027027027 + }, + { + "epoch": 0.12760917838638045, + "grad_norm": 13.521400451660156, + "learning_rate": 6.709203330203383e-06, + "loss": 0.9873, + "step": 862, + "train/speech_entropy": 3.943119092475901, + "train/text_entropy": 1.0829128886378088, + "train/token_acc": 0.28556593977154726 + }, + { + "epoch": 0.12775721687638786, + "grad_norm": 14.243709564208984, + "learning_rate": 6.702182567079745e-06, + "loss": 1.4102, + "step": 863, + "train/speech_entropy": 4.105741537598512, + "train/text_entropy": 1.5868626256142893, + "train/token_acc": 0.27913669064748203 + }, + { + "epoch": 0.12790525536639527, + "grad_norm": 19.81629753112793, + "learning_rate": 6.695158651372474e-06, + "loss": 0.9902, + "step": 864, + "train/speech_entropy": 3.5970088958740236, + "train/text_entropy": 0.7967555706317608, + "train/token_acc": 0.3203517587939699 + }, + { + "epoch": 0.12805329385640266, + "grad_norm": 14.897217750549316, + "learning_rate": 6.68813160150097e-06, + "loss": 0.751, + "step": 865, + "train/speech_entropy": 3.632895832829431, + "train/text_entropy": 0.6111360051829344, + "train/token_acc": 0.3449564134495641 + }, + { + "epoch": 0.12820133234641007, + "grad_norm": 12.279254913330078, + "learning_rate": 6.68110143589285e-06, + "loss": 0.918, + "step": 866, + "train/speech_entropy": 3.8208437102833432, + "train/text_entropy": 1.0949809906330514, + "train/token_acc": 0.30078125 + }, + { + "epoch": 0.12834937083641748, + "grad_norm": 16.432466506958008, + "learning_rate": 6.674068172983907e-06, + "loss": 1.6758, + "step": 867, + "train/speech_entropy": 3.9028307662726682, + "train/text_entropy": 1.6184891010152882, + "train/token_acc": 0.28237259816207183 + }, + { + "epoch": 0.12849740932642487, + "grad_norm": 9.861013412475586, + "learning_rate": 6.667031831218054e-06, + "loss": 0.7061, + "step": 868, + "train/speech_entropy": 4.172754358362268, + "train/text_entropy": 0.9440531875147964, + "train/token_acc": 0.2937853107344633 + }, + { + "epoch": 0.12864544781643228, + "grad_norm": 21.687341690063477, + "learning_rate": 6.659992429047271e-06, + "loss": 1.6484, + "step": 869, + "train/speech_entropy": 4.050209067915745, + "train/text_entropy": 1.4681146220796426, + "train/token_acc": 0.26706586826347306 + }, + { + "epoch": 0.12879348630643966, + "grad_norm": 16.844219207763672, + "learning_rate": 6.652949984931577e-06, + "loss": 1.0488, + "step": 870, + "train/speech_entropy": 3.46329833984375, + "train/text_entropy": 0.8082707811902453, + "train/token_acc": 0.31399747793190413 + }, + { + "epoch": 0.12894152479644708, + "grad_norm": 14.40903091430664, + "learning_rate": 6.645904517338955e-06, + "loss": 1.3711, + "step": 871, + "train/speech_entropy": 3.9250529544454227, + "train/text_entropy": 1.4125175476074219, + "train/token_acc": 0.26233359436178544 + }, + { + "epoch": 0.1290895632864545, + "grad_norm": 17.850317001342773, + "learning_rate": 6.638856044745324e-06, + "loss": 1.1152, + "step": 872, + "train/speech_entropy": 3.9423018721846845, + "train/text_entropy": 1.3546422222467858, + "train/token_acc": 0.2844574780058651 + }, + { + "epoch": 0.12923760177646187, + "grad_norm": 20.45468521118164, + "learning_rate": 6.6318045856344825e-06, + "loss": 1.5664, + "step": 873, + "train/speech_entropy": 3.8041802989204796, + "train/text_entropy": 1.4407420004567792, + "train/token_acc": 0.2776978417266187 + }, + { + "epoch": 0.1293856402664693, + "grad_norm": 22.02005958557129, + "learning_rate": 6.62475015849806e-06, + "loss": 2.3281, + "step": 874, + "train/speech_entropy": 3.5450999190167685, + "train/text_entropy": 2.3223495009523, + "train/token_acc": 0.2860824742268041 + }, + { + "epoch": 0.12953367875647667, + "grad_norm": 13.669872283935547, + "learning_rate": 6.617692781835468e-06, + "loss": 1.3516, + "step": 875, + "train/speech_entropy": 3.8855296947337963, + "train/text_entropy": 1.430628153483073, + "train/token_acc": 0.3079710144927536 + }, + { + "epoch": 0.12968171724648409, + "grad_norm": 15.216891288757324, + "learning_rate": 6.610632474153854e-06, + "loss": 1.5547, + "step": 876, + "train/speech_entropy": 4.181024785967981, + "train/text_entropy": 1.7994357838350183, + "train/token_acc": 0.24841213832039521 + }, + { + "epoch": 0.1298297557364915, + "grad_norm": 14.891727447509766, + "learning_rate": 6.603569253968055e-06, + "loss": 1.0215, + "step": 877, + "train/speech_entropy": 4.11044209681117, + "train/text_entropy": 1.2074202457106258, + "train/token_acc": 0.263768115942029 + }, + { + "epoch": 0.12997779422649888, + "grad_norm": 11.102131843566895, + "learning_rate": 6.596503139800538e-06, + "loss": 1.1699, + "step": 878, + "train/speech_entropy": 4.188002791335522, + "train/text_entropy": 1.2019218746289657, + "train/token_acc": 0.27927927927927926 + }, + { + "epoch": 0.1301258327165063, + "grad_norm": 13.86330509185791, + "learning_rate": 6.589434150181366e-06, + "loss": 1.1367, + "step": 879, + "train/speech_entropy": 3.530148547469021, + "train/text_entropy": 0.9181275431315105, + "train/token_acc": 0.30854430379746833 + }, + { + "epoch": 0.13027387120651368, + "grad_norm": 17.47272300720215, + "learning_rate": 6.582362303648142e-06, + "loss": 0.9199, + "step": 880, + "train/speech_entropy": 3.9132599496949068, + "train/text_entropy": 1.0656405289967854, + "train/token_acc": 0.2894248608534323 + }, + { + "epoch": 0.1304219096965211, + "grad_norm": 11.987382888793945, + "learning_rate": 6.575287618745957e-06, + "loss": 0.4365, + "step": 881, + "train/speech_entropy": 3.6729866190159575, + "train/text_entropy": 0.4230865337230541, + "train/token_acc": 0.3376251788268956 + }, + { + "epoch": 0.1305699481865285, + "grad_norm": 13.659384727478027, + "learning_rate": 6.568210114027345e-06, + "loss": 0.9404, + "step": 882, + "train/speech_entropy": 3.64466827552356, + "train/text_entropy": 0.80835478769708, + "train/token_acc": 0.3299319727891156 + }, + { + "epoch": 0.1307179866765359, + "grad_norm": 8.512717247009277, + "learning_rate": 6.561129808052241e-06, + "loss": 0.498, + "step": 883, + "train/speech_entropy": 3.375063583597012, + "train/text_entropy": 0.40165248159634864, + "train/token_acc": 0.36432160804020103 + }, + { + "epoch": 0.1308660251665433, + "grad_norm": 11.13382339477539, + "learning_rate": 6.554046719387921e-06, + "loss": 0.7959, + "step": 884, + "train/speech_entropy": 3.903926738972066, + "train/text_entropy": 0.7214899724072749, + "train/token_acc": 0.3198237885462555 + }, + { + "epoch": 0.13101406365655072, + "grad_norm": 21.470378875732422, + "learning_rate": 6.546960866608958e-06, + "loss": 1.7422, + "step": 885, + "train/speech_entropy": 4.013310321753279, + "train/text_entropy": 1.600174296985973, + "train/token_acc": 0.24967148488830487 + }, + { + "epoch": 0.1311621021465581, + "grad_norm": 15.288581848144531, + "learning_rate": 6.539872268297176e-06, + "loss": 1.2383, + "step": 886, + "train/speech_entropy": 4.159242013950106, + "train/text_entropy": 1.2340726801418365, + "train/token_acc": 0.3046875 + }, + { + "epoch": 0.13131014063656551, + "grad_norm": 15.878676414489746, + "learning_rate": 6.532780943041598e-06, + "loss": 0.7109, + "step": 887, + "train/speech_entropy": 3.7011137337520204, + "train/text_entropy": 0.7256467286930528, + "train/token_acc": 0.3018181818181818 + }, + { + "epoch": 0.1314581791265729, + "grad_norm": 12.040822982788086, + "learning_rate": 6.525686909438397e-06, + "loss": 1.0254, + "step": 888, + "train/speech_entropy": 4.082769895854749, + "train/text_entropy": 0.9960254448047583, + "train/token_acc": 0.28150134048257375 + }, + { + "epoch": 0.1316062176165803, + "grad_norm": 11.779393196105957, + "learning_rate": 6.518590186090847e-06, + "loss": 0.752, + "step": 889, + "train/speech_entropy": 4.005034119405864, + "train/text_entropy": 0.578394777031355, + "train/token_acc": 0.3313253012048193 + }, + { + "epoch": 0.13175425610658772, + "grad_norm": 16.482900619506836, + "learning_rate": 6.511490791609283e-06, + "loss": 1.3711, + "step": 890, + "train/speech_entropy": 3.73063203125, + "train/text_entropy": 1.3806119944951305, + "train/token_acc": 0.3009079118028534 + }, + { + "epoch": 0.1319022945965951, + "grad_norm": 10.392497062683105, + "learning_rate": 6.504388744611036e-06, + "loss": 0.7344, + "step": 891, + "train/speech_entropy": 3.936062942683001, + "train/text_entropy": 0.8310499261926722, + "train/token_acc": 0.325115562403698 + }, + { + "epoch": 0.13205033308660252, + "grad_norm": 14.620426177978516, + "learning_rate": 6.497284063720394e-06, + "loss": 1.3984, + "step": 892, + "train/speech_entropy": 4.077356718435614, + "train/text_entropy": 1.395276956890353, + "train/token_acc": 0.29623430962343095 + }, + { + "epoch": 0.1321983715766099, + "grad_norm": 20.119197845458984, + "learning_rate": 6.49017676756856e-06, + "loss": 1.6328, + "step": 893, + "train/speech_entropy": 3.6112620900550616, + "train/text_entropy": 1.8522516498128876, + "train/token_acc": 0.2699228791773779 + }, + { + "epoch": 0.13234641006661732, + "grad_norm": 26.95081901550293, + "learning_rate": 6.483066874793584e-06, + "loss": 0.8135, + "step": 894, + "train/speech_entropy": 3.3508954931188515, + "train/text_entropy": 1.3991804399352143, + "train/token_acc": 0.29793510324483774 + }, + { + "epoch": 0.13249444855662473, + "grad_norm": 16.43600845336914, + "learning_rate": 6.475954404040336e-06, + "loss": 1.3203, + "step": 895, + "train/speech_entropy": 3.620266078673687, + "train/text_entropy": 1.0656662368774414, + "train/token_acc": 0.32889344262295084 + }, + { + "epoch": 0.13264248704663212, + "grad_norm": 21.26524543762207, + "learning_rate": 6.468839373960437e-06, + "loss": 1.4766, + "step": 896, + "train/speech_entropy": 4.160706841860586, + "train/text_entropy": 1.5723451845573657, + "train/token_acc": 0.2755020080321285 + }, + { + "epoch": 0.13279052553663953, + "grad_norm": 18.71293830871582, + "learning_rate": 6.461721803212227e-06, + "loss": 1.1562, + "step": 897, + "train/speech_entropy": 3.7417294238614547, + "train/text_entropy": 1.4331923562127191, + "train/token_acc": 0.3079019073569482 + }, + { + "epoch": 0.13293856402664692, + "grad_norm": 13.955531120300293, + "learning_rate": 6.454601710460704e-06, + "loss": 0.9805, + "step": 898, + "train/speech_entropy": 4.284257825052217, + "train/text_entropy": 1.4167957305908203, + "train/token_acc": 0.28776978417266186 + }, + { + "epoch": 0.13308660251665433, + "grad_norm": 7.304295539855957, + "learning_rate": 6.447479114377483e-06, + "loss": 0.3628, + "step": 899, + "train/speech_entropy": 3.5160566512902123, + "train/text_entropy": 0.5109198778525166, + "train/token_acc": 0.3584474885844749 + }, + { + "epoch": 0.13323464100666174, + "grad_norm": 12.08039665222168, + "learning_rate": 6.440354033640738e-06, + "loss": 1.1836, + "step": 900, + "train/speech_entropy": 3.8223944180253624, + "train/text_entropy": 1.2369510082488364, + "train/token_acc": 0.3 + }, + { + "epoch": 0.13338267949666913, + "grad_norm": 17.284427642822266, + "learning_rate": 6.433226486935167e-06, + "loss": 1.2656, + "step": 901, + "train/speech_entropy": 3.669654162962045, + "train/text_entropy": 1.3384794659084744, + "train/token_acc": 0.31470588235294117 + }, + { + "epoch": 0.13353071798667654, + "grad_norm": 48.985836029052734, + "learning_rate": 6.426096492951928e-06, + "loss": 1.5391, + "step": 902, + "train/speech_entropy": 4.01876813394052, + "train/text_entropy": 1.4899978071156115, + "train/token_acc": 0.275797373358349 + }, + { + "epoch": 0.13367875647668392, + "grad_norm": 11.540924072265625, + "learning_rate": 6.418964070388598e-06, + "loss": 0.8076, + "step": 903, + "train/speech_entropy": 3.6578837855580892, + "train/text_entropy": 0.7106678721668956, + "train/token_acc": 0.33595284872298625 + }, + { + "epoch": 0.13382679496669134, + "grad_norm": 9.52652645111084, + "learning_rate": 6.411829237949126e-06, + "loss": 0.7129, + "step": 904, + "train/speech_entropy": 3.9466700864641853, + "train/text_entropy": 0.6974198328305597, + "train/token_acc": 0.31199278629395855 + }, + { + "epoch": 0.13397483345669875, + "grad_norm": 16.565107345581055, + "learning_rate": 6.4046920143437775e-06, + "loss": 1.6406, + "step": 905, + "train/speech_entropy": 4.4058709172219, + "train/text_entropy": 1.362032091295397, + "train/token_acc": 0.24618320610687022 + }, + { + "epoch": 0.13412287194670613, + "grad_norm": 15.154189109802246, + "learning_rate": 6.397552418289089e-06, + "loss": 0.9668, + "step": 906, + "train/speech_entropy": 3.760888690176396, + "train/text_entropy": 0.9281033736008865, + "train/token_acc": 0.3183475091130012 + }, + { + "epoch": 0.13427091043671355, + "grad_norm": 12.829575538635254, + "learning_rate": 6.390410468507819e-06, + "loss": 0.5034, + "step": 907, + "train/speech_entropy": 3.3880514317741453, + "train/text_entropy": 0.40262616391213524, + "train/token_acc": 0.3525641025641026 + }, + { + "epoch": 0.13441894892672096, + "grad_norm": 14.602790832519531, + "learning_rate": 6.383266183728899e-06, + "loss": 1.2637, + "step": 908, + "train/speech_entropy": 3.9259381487652973, + "train/text_entropy": 1.2461504767922793, + "train/token_acc": 0.29717514124293787 + }, + { + "epoch": 0.13456698741672835, + "grad_norm": 16.292936325073242, + "learning_rate": 6.376119582687383e-06, + "loss": 1.4395, + "step": 909, + "train/speech_entropy": 4.053677004192947, + "train/text_entropy": 1.3469167336326684, + "train/token_acc": 0.2814107274063189 + }, + { + "epoch": 0.13471502590673576, + "grad_norm": 17.07648468017578, + "learning_rate": 6.368970684124397e-06, + "loss": 0.8682, + "step": 910, + "train/speech_entropy": 3.4494332561922225, + "train/text_entropy": 0.9165369497763144, + "train/token_acc": 0.3181818181818182 + }, + { + "epoch": 0.13486306439674314, + "grad_norm": 17.482479095458984, + "learning_rate": 6.361819506787094e-06, + "loss": 0.9805, + "step": 911, + "train/speech_entropy": 3.7200818184094553, + "train/text_entropy": 0.8906463125477666, + "train/token_acc": 0.29174664107485604 + }, + { + "epoch": 0.13501110288675056, + "grad_norm": 11.515512466430664, + "learning_rate": 6.354666069428606e-06, + "loss": 0.834, + "step": 912, + "train/speech_entropy": 4.393525172470434, + "train/text_entropy": 0.9166706103633567, + "train/token_acc": 0.3212809917355372 + }, + { + "epoch": 0.13515914137675797, + "grad_norm": 20.0139217376709, + "learning_rate": 6.347510390807987e-06, + "loss": 1.293, + "step": 913, + "train/speech_entropy": 3.7732703220321837, + "train/text_entropy": 1.2802806783605505, + "train/token_acc": 0.28032786885245903 + }, + { + "epoch": 0.13530717986676535, + "grad_norm": 4.959285736083984, + "learning_rate": 6.3403524896901695e-06, + "loss": 0.1758, + "step": 914, + "train/speech_entropy": 3.35098233927496, + "train/text_entropy": 0.23565457009861612, + "train/token_acc": 0.3835616438356164 + }, + { + "epoch": 0.13545521835677277, + "grad_norm": 15.407012939453125, + "learning_rate": 6.333192384845915e-06, + "loss": 1.6172, + "step": 915, + "train/speech_entropy": 4.49008297098094, + "train/text_entropy": 1.5089265177072573, + "train/token_acc": 0.27521367521367524 + }, + { + "epoch": 0.13560325684678015, + "grad_norm": 18.599300384521484, + "learning_rate": 6.326030095051763e-06, + "loss": 1.2734, + "step": 916, + "train/speech_entropy": 4.096170176630435, + "train/text_entropy": 1.4610346385410853, + "train/token_acc": 0.2851511169513798 + }, + { + "epoch": 0.13575129533678756, + "grad_norm": 17.777645111083984, + "learning_rate": 6.318865639089986e-06, + "loss": 0.9902, + "step": 917, + "train/speech_entropy": 3.5110570952648255, + "train/text_entropy": 0.9734256035458725, + "train/token_acc": 0.28180354267310787 + }, + { + "epoch": 0.13589933382679498, + "grad_norm": 14.072577476501465, + "learning_rate": 6.311699035748531e-06, + "loss": 1.2969, + "step": 918, + "train/speech_entropy": 4.167633278011658, + "train/text_entropy": 1.209141981155477, + "train/token_acc": 0.2690972222222222 + }, + { + "epoch": 0.13604737231680236, + "grad_norm": 14.07784652709961, + "learning_rate": 6.3045303038209815e-06, + "loss": 0.8486, + "step": 919, + "train/speech_entropy": 3.7285233276534635, + "train/text_entropy": 0.7074614508303604, + "train/token_acc": 0.3215077605321508 + }, + { + "epoch": 0.13619541080680977, + "grad_norm": 14.894777297973633, + "learning_rate": 6.297359462106504e-06, + "loss": 1.0586, + "step": 920, + "train/speech_entropy": 3.901987397735651, + "train/text_entropy": 1.2057716369628906, + "train/token_acc": 0.2757078986587183 + }, + { + "epoch": 0.13634344929681716, + "grad_norm": 15.019207000732422, + "learning_rate": 6.2901865294097895e-06, + "loss": 0.998, + "step": 921, + "train/speech_entropy": 4.194065662140542, + "train/text_entropy": 1.0609815485337202, + "train/token_acc": 0.28524945770065074 + }, + { + "epoch": 0.13649148778682457, + "grad_norm": 13.372662544250488, + "learning_rate": 6.283011524541021e-06, + "loss": 0.8828, + "step": 922, + "train/speech_entropy": 4.061653222293216, + "train/text_entropy": 1.1852485748108275, + "train/token_acc": 0.2802874743326489 + }, + { + "epoch": 0.13663952627683199, + "grad_norm": 15.343899726867676, + "learning_rate": 6.275834466315813e-06, + "loss": 1.1406, + "step": 923, + "train/speech_entropy": 3.930706190417035, + "train/text_entropy": 0.7566620934869825, + "train/token_acc": 0.3068072866730585 + }, + { + "epoch": 0.13678756476683937, + "grad_norm": 15.188273429870605, + "learning_rate": 6.268655373555163e-06, + "loss": 0.8555, + "step": 924, + "train/speech_entropy": 3.6517515980113635, + "train/text_entropy": 1.1402841664976993, + "train/token_acc": 0.27694610778443113 + }, + { + "epoch": 0.13693560325684678, + "grad_norm": 19.592578887939453, + "learning_rate": 6.261474265085403e-06, + "loss": 1.3984, + "step": 925, + "train/speech_entropy": 3.988006047723359, + "train/text_entropy": 1.2683973178057604, + "train/token_acc": 0.28194993412384717 + }, + { + "epoch": 0.1370836417468542, + "grad_norm": 12.020665168762207, + "learning_rate": 6.254291159738155e-06, + "loss": 0.7197, + "step": 926, + "train/speech_entropy": 3.9077828380549064, + "train/text_entropy": 0.6782210479348393, + "train/token_acc": 0.3093415007656968 + }, + { + "epoch": 0.13723168023686158, + "grad_norm": 15.834071159362793, + "learning_rate": 6.247106076350276e-06, + "loss": 1.8906, + "step": 927, + "train/speech_entropy": 4.272165779147982, + "train/text_entropy": 1.8944803456791113, + "train/token_acc": 0.25533480500367917 + }, + { + "epoch": 0.137379718726869, + "grad_norm": 17.646577835083008, + "learning_rate": 6.239919033763809e-06, + "loss": 1.5469, + "step": 928, + "train/speech_entropy": 4.2272144262341484, + "train/text_entropy": 1.7599070853020946, + "train/token_acc": 0.24981245311327832 + }, + { + "epoch": 0.13752775721687638, + "grad_norm": 13.323060035705566, + "learning_rate": 6.2327300508259325e-06, + "loss": 1.2227, + "step": 929, + "train/speech_entropy": 3.9495188815534608, + "train/text_entropy": 1.320160613869721, + "train/token_acc": 0.2940677966101695 + }, + { + "epoch": 0.1376757957068838, + "grad_norm": 7.287905693054199, + "learning_rate": 6.225539146388919e-06, + "loss": 0.5664, + "step": 930, + "train/speech_entropy": 3.7381066884299035, + "train/text_entropy": 0.5470812880341263, + "train/token_acc": 0.35970744680851063 + }, + { + "epoch": 0.1378238341968912, + "grad_norm": 15.908134460449219, + "learning_rate": 6.218346339310078e-06, + "loss": 1.5703, + "step": 931, + "train/speech_entropy": 4.1551999280052465, + "train/text_entropy": 1.4733817230367885, + "train/token_acc": 0.28695652173913044 + }, + { + "epoch": 0.1379718726868986, + "grad_norm": 9.403017044067383, + "learning_rate": 6.2111516484517064e-06, + "loss": 0.748, + "step": 932, + "train/speech_entropy": 3.9529174130265883, + "train/text_entropy": 0.667870361328125, + "train/token_acc": 0.32727272727272727 + }, + { + "epoch": 0.138119911176906, + "grad_norm": 11.500594139099121, + "learning_rate": 6.20395509268104e-06, + "loss": 1.3477, + "step": 933, + "train/speech_entropy": 4.592010187574806, + "train/text_entropy": 1.3353177447651707, + "train/token_acc": 0.22679900744416873 + }, + { + "epoch": 0.1382679496669134, + "grad_norm": 16.87989616394043, + "learning_rate": 6.196756690870209e-06, + "loss": 1.3418, + "step": 934, + "train/speech_entropy": 4.252433411001462, + "train/text_entropy": 1.0864487542046442, + "train/token_acc": 0.2531400966183575 + }, + { + "epoch": 0.1384159881569208, + "grad_norm": 18.084211349487305, + "learning_rate": 6.189556461896185e-06, + "loss": 1.6797, + "step": 935, + "train/speech_entropy": 4.2298189095172205, + "train/text_entropy": 1.6483366224500868, + "train/token_acc": 0.27101879327398615 + }, + { + "epoch": 0.1385640266469282, + "grad_norm": 15.764739036560059, + "learning_rate": 6.182354424640726e-06, + "loss": 1.2588, + "step": 936, + "train/speech_entropy": 3.6912355103900443, + "train/text_entropy": 1.253661616318891, + "train/token_acc": 0.3445255474452555 + }, + { + "epoch": 0.1387120651369356, + "grad_norm": 12.706223487854004, + "learning_rate": 6.175150597990336e-06, + "loss": 0.7686, + "step": 937, + "train/speech_entropy": 3.857184078647638, + "train/text_entropy": 1.1047467645609155, + "train/token_acc": 0.2852173913043478 + }, + { + "epoch": 0.138860103626943, + "grad_norm": 6.745974540710449, + "learning_rate": 6.1679450008362126e-06, + "loss": 0.3906, + "step": 938, + "train/speech_entropy": 3.496959786490258, + "train/text_entropy": 0.6807285471165434, + "train/token_acc": 0.35814606741573035 + }, + { + "epoch": 0.1390081421169504, + "grad_norm": 16.591421127319336, + "learning_rate": 6.160737652074192e-06, + "loss": 1.5156, + "step": 939, + "train/speech_entropy": 3.847609925047853, + "train/text_entropy": 1.2871202539514612, + "train/token_acc": 0.2727272727272727 + }, + { + "epoch": 0.1391561806069578, + "grad_norm": 14.602993965148926, + "learning_rate": 6.153528570604707e-06, + "loss": 1.2383, + "step": 940, + "train/speech_entropy": 3.9311845913495604, + "train/text_entropy": 1.5053305809314435, + "train/token_acc": 0.2779816513761468 + }, + { + "epoch": 0.13930421909696522, + "grad_norm": 14.617132186889648, + "learning_rate": 6.146317775332733e-06, + "loss": 1.1582, + "step": 941, + "train/speech_entropy": 4.191514389935662, + "train/text_entropy": 1.2512110852180642, + "train/token_acc": 0.25722543352601157 + }, + { + "epoch": 0.1394522575869726, + "grad_norm": 13.34124755859375, + "learning_rate": 6.13910528516774e-06, + "loss": 1.2402, + "step": 942, + "train/speech_entropy": 4.355350052960384, + "train/text_entropy": 1.1809800575519431, + "train/token_acc": 0.24820143884892087 + }, + { + "epoch": 0.13960029607698002, + "grad_norm": 10.187499046325684, + "learning_rate": 6.131891119023642e-06, + "loss": 0.7793, + "step": 943, + "train/speech_entropy": 4.033306730895483, + "train/text_entropy": 0.9635499018543171, + "train/token_acc": 0.2860824742268041 + }, + { + "epoch": 0.1397483345669874, + "grad_norm": 13.797541618347168, + "learning_rate": 6.12467529581875e-06, + "loss": 1.1914, + "step": 944, + "train/speech_entropy": 4.086181185986964, + "train/text_entropy": 1.189154027923336, + "train/token_acc": 0.2833333333333333 + }, + { + "epoch": 0.13989637305699482, + "grad_norm": 14.459935188293457, + "learning_rate": 6.1174578344757196e-06, + "loss": 0.5459, + "step": 945, + "train/speech_entropy": 4.223613352349955, + "train/text_entropy": 0.7993238590381764, + "train/token_acc": 0.3025516403402187 + }, + { + "epoch": 0.14004441154700223, + "grad_norm": 9.796091079711914, + "learning_rate": 6.1102387539215005e-06, + "loss": 1.0781, + "step": 946, + "train/speech_entropy": 3.98775863042913, + "train/text_entropy": 1.3386237117222377, + "train/token_acc": 0.32444168734491313 + }, + { + "epoch": 0.1401924500370096, + "grad_norm": 12.04282283782959, + "learning_rate": 6.103018073087288e-06, + "loss": 0.8398, + "step": 947, + "train/speech_entropy": 3.8868070828591854, + "train/text_entropy": 0.8217315001634774, + "train/token_acc": 0.3020030816640986 + }, + { + "epoch": 0.14034048852701703, + "grad_norm": 8.558350563049316, + "learning_rate": 6.09579581090848e-06, + "loss": 0.8887, + "step": 948, + "train/speech_entropy": 4.0665797369505, + "train/text_entropy": 1.0159494510063758, + "train/token_acc": 0.2987861811391223 + }, + { + "epoch": 0.14048852701702444, + "grad_norm": 11.559159278869629, + "learning_rate": 6.0885719863246155e-06, + "loss": 1.1758, + "step": 949, + "train/speech_entropy": 4.265391110453367, + "train/text_entropy": 1.0375604175385975, + "train/token_acc": 0.27639751552795033 + }, + { + "epoch": 0.14063656550703182, + "grad_norm": 17.55818748474121, + "learning_rate": 6.081346618279329e-06, + "loss": 0.6719, + "step": 950, + "train/speech_entropy": 3.380849555463821, + "train/text_entropy": 0.7555734008105833, + "train/token_acc": 0.3098958333333333 + }, + { + "epoch": 0.14078460399703924, + "grad_norm": 22.275327682495117, + "learning_rate": 6.074119725720311e-06, + "loss": 0.9707, + "step": 951, + "train/speech_entropy": 3.6344168878370717, + "train/text_entropy": 1.157883381021434, + "train/token_acc": 0.28104575163398693 + }, + { + "epoch": 0.14093264248704662, + "grad_norm": 12.021417617797852, + "learning_rate": 6.066891327599242e-06, + "loss": 1.4727, + "step": 952, + "train/speech_entropy": 4.468593174214507, + "train/text_entropy": 1.5016247501193916, + "train/token_acc": 0.2767412935323383 + }, + { + "epoch": 0.14108068097705403, + "grad_norm": 14.146052360534668, + "learning_rate": 6.059661442871754e-06, + "loss": 1.0254, + "step": 953, + "train/speech_entropy": 3.7539318707155553, + "train/text_entropy": 1.3111805230080085, + "train/token_acc": 0.32713963963963966 + }, + { + "epoch": 0.14122871946706145, + "grad_norm": 15.866910934448242, + "learning_rate": 6.0524300904973764e-06, + "loss": 1.0586, + "step": 954, + "train/speech_entropy": 4.2802023558490045, + "train/text_entropy": 1.1636629573634414, + "train/token_acc": 0.2765647743813683 + }, + { + "epoch": 0.14137675795706883, + "grad_norm": 12.254260063171387, + "learning_rate": 6.045197289439488e-06, + "loss": 0.6963, + "step": 955, + "train/speech_entropy": 4.3193046130807735, + "train/text_entropy": 0.9471650934543739, + "train/token_acc": 0.2974683544303797 + }, + { + "epoch": 0.14152479644707625, + "grad_norm": 13.411200523376465, + "learning_rate": 6.037963058665266e-06, + "loss": 0.9727, + "step": 956, + "train/speech_entropy": 3.719272205778652, + "train/text_entropy": 1.0456356010814705, + "train/token_acc": 0.30842911877394635 + }, + { + "epoch": 0.14167283493708363, + "grad_norm": 17.864734649658203, + "learning_rate": 6.030727417145638e-06, + "loss": 1.3223, + "step": 957, + "train/speech_entropy": 4.267453008007488, + "train/text_entropy": 1.312031628334359, + "train/token_acc": 0.26506024096385544 + }, + { + "epoch": 0.14182087342709104, + "grad_norm": 16.329322814941406, + "learning_rate": 6.023490383855231e-06, + "loss": 1.3027, + "step": 958, + "train/speech_entropy": 4.303853998739252, + "train/text_entropy": 1.6925655651643785, + "train/token_acc": 0.27341227125941875 + }, + { + "epoch": 0.14196891191709846, + "grad_norm": 12.207736015319824, + "learning_rate": 6.01625197777232e-06, + "loss": 0.7339, + "step": 959, + "train/speech_entropy": 3.8635406674291617, + "train/text_entropy": 0.7709341391538962, + "train/token_acc": 0.3253358925143954 + }, + { + "epoch": 0.14211695040710584, + "grad_norm": 15.301828384399414, + "learning_rate": 6.00901221787878e-06, + "loss": 1.625, + "step": 960, + "train/speech_entropy": 3.9727107363197427, + "train/text_entropy": 1.5086935051965498, + "train/token_acc": 0.3061578490893322 + }, + { + "epoch": 0.14226498889711325, + "grad_norm": 18.530986785888672, + "learning_rate": 6.00177112316004e-06, + "loss": 1.6758, + "step": 961, + "train/speech_entropy": 3.885814196074471, + "train/text_entropy": 1.4070058495107323, + "train/token_acc": 0.28503336510962823 + }, + { + "epoch": 0.14241302738712064, + "grad_norm": 11.43635368347168, + "learning_rate": 5.994528712605024e-06, + "loss": 1.0552, + "step": 962, + "train/speech_entropy": 4.131244441331808, + "train/text_entropy": 1.0367638017343177, + "train/token_acc": 0.284789644012945 + }, + { + "epoch": 0.14256106587712805, + "grad_norm": 17.046573638916016, + "learning_rate": 5.987285005206112e-06, + "loss": 0.7871, + "step": 963, + "train/speech_entropy": 3.9229166666666666, + "train/text_entropy": 0.9994931428328805, + "train/token_acc": 0.2872531418312388 + }, + { + "epoch": 0.14270910436713546, + "grad_norm": 14.120583534240723, + "learning_rate": 5.98004001995908e-06, + "loss": 0.6709, + "step": 964, + "train/speech_entropy": 3.574771799939744, + "train/text_entropy": 0.7741368418992168, + "train/token_acc": 0.3473684210526316 + }, + { + "epoch": 0.14285714285714285, + "grad_norm": 15.68652057647705, + "learning_rate": 5.972793775863059e-06, + "loss": 0.8594, + "step": 965, + "train/speech_entropy": 3.9383526141826923, + "train/text_entropy": 0.6818866526826899, + "train/token_acc": 0.31714876033057854 + }, + { + "epoch": 0.14300518134715026, + "grad_norm": 20.28297996520996, + "learning_rate": 5.965546291920478e-06, + "loss": 2.0469, + "step": 966, + "train/speech_entropy": 4.003380557789522, + "train/text_entropy": 1.980090303241082, + "train/token_acc": 0.24670433145009416 + }, + { + "epoch": 0.14315321983715767, + "grad_norm": 14.65700626373291, + "learning_rate": 5.958297587137019e-06, + "loss": 1.3008, + "step": 967, + "train/speech_entropy": 4.391058590249776, + "train/text_entropy": 1.2691389719645183, + "train/token_acc": 0.24624624624624625 + }, + { + "epoch": 0.14330125832716506, + "grad_norm": 17.01019287109375, + "learning_rate": 5.951047680521565e-06, + "loss": 1.4551, + "step": 968, + "train/speech_entropy": 3.972854308791111, + "train/text_entropy": 1.2605213001013942, + "train/token_acc": 0.2933227344992051 + }, + { + "epoch": 0.14344929681717247, + "grad_norm": 15.420723915100098, + "learning_rate": 5.9437965910861515e-06, + "loss": 1.3047, + "step": 969, + "train/speech_entropy": 4.508907956999492, + "train/text_entropy": 1.3762315299006418, + "train/token_acc": 0.22433718558803534 + }, + { + "epoch": 0.14359733530717986, + "grad_norm": 16.695877075195312, + "learning_rate": 5.9365443378459174e-06, + "loss": 1.5078, + "step": 970, + "train/speech_entropy": 3.876304703529435, + "train/text_entropy": 1.4206241916965794, + "train/token_acc": 0.2743091095189355 + }, + { + "epoch": 0.14374537379718727, + "grad_norm": 9.969259262084961, + "learning_rate": 5.9292909398190465e-06, + "loss": 0.9639, + "step": 971, + "train/speech_entropy": 3.9902927138240463, + "train/text_entropy": 0.965458731720413, + "train/token_acc": 0.2695595003287311 + }, + { + "epoch": 0.14389341228719468, + "grad_norm": 17.152822494506836, + "learning_rate": 5.922036416026734e-06, + "loss": 1.582, + "step": 972, + "train/speech_entropy": 4.3806088355279735, + "train/text_entropy": 1.6256934831727226, + "train/token_acc": 0.24501661129568106 + }, + { + "epoch": 0.14404145077720207, + "grad_norm": 15.241676330566406, + "learning_rate": 5.9147807854931195e-06, + "loss": 1.4863, + "step": 973, + "train/speech_entropy": 3.90649087725825, + "train/text_entropy": 1.14662914411396, + "train/token_acc": 0.29749830966869506 + }, + { + "epoch": 0.14418948926720948, + "grad_norm": 15.235798835754395, + "learning_rate": 5.907524067245251e-06, + "loss": 1.1836, + "step": 974, + "train/speech_entropy": 4.4609532107858785, + "train/text_entropy": 1.27298023867509, + "train/token_acc": 0.2823240589198036 + }, + { + "epoch": 0.14433752775721687, + "grad_norm": 12.472108840942383, + "learning_rate": 5.900266280313024e-06, + "loss": 0.6143, + "step": 975, + "train/speech_entropy": 3.63683841390849, + "train/text_entropy": 0.43407227939232845, + "train/token_acc": 0.34008683068017365 + }, + { + "epoch": 0.14448556624722428, + "grad_norm": 16.448898315429688, + "learning_rate": 5.89300744372914e-06, + "loss": 1.7227, + "step": 976, + "train/speech_entropy": 4.361871683372642, + "train/text_entropy": 1.5287829505072699, + "train/token_acc": 0.23746081504702193 + }, + { + "epoch": 0.1446336047372317, + "grad_norm": 15.924328804016113, + "learning_rate": 5.885747576529048e-06, + "loss": 1.0176, + "step": 977, + "train/speech_entropy": 4.1151909828186035, + "train/text_entropy": 1.007933759689331, + "train/token_acc": 0.26939655172413796 + }, + { + "epoch": 0.14478164322723908, + "grad_norm": 14.091202735900879, + "learning_rate": 5.87848669775091e-06, + "loss": 1.0547, + "step": 978, + "train/speech_entropy": 3.6094757457874826, + "train/text_entropy": 0.9922393074248756, + "train/token_acc": 0.3057395143487859 + }, + { + "epoch": 0.1449296817172465, + "grad_norm": 15.159602165222168, + "learning_rate": 5.871224826435531e-06, + "loss": 0.5371, + "step": 979, + "train/speech_entropy": 3.5197237862481012, + "train/text_entropy": 0.5566931685356244, + "train/token_acc": 0.33933518005540164 + }, + { + "epoch": 0.14507772020725387, + "grad_norm": 9.94163703918457, + "learning_rate": 5.863961981626319e-06, + "loss": 0.5518, + "step": 980, + "train/speech_entropy": 3.677234498680739, + "train/text_entropy": 0.4732672909656203, + "train/token_acc": 0.3409090909090909 + }, + { + "epoch": 0.1452257586972613, + "grad_norm": 22.163408279418945, + "learning_rate": 5.8566981823692426e-06, + "loss": 1.2461, + "step": 981, + "train/speech_entropy": 4.0818436451447315, + "train/text_entropy": 1.2819232060359074, + "train/token_acc": 0.25596816976127323 + }, + { + "epoch": 0.1453737971872687, + "grad_norm": 20.92543601989746, + "learning_rate": 5.849433447712766e-06, + "loss": 1.6055, + "step": 982, + "train/speech_entropy": 4.038701799902843, + "train/text_entropy": 1.5763851589626736, + "train/token_acc": 0.27008310249307477 + }, + { + "epoch": 0.14552183567727608, + "grad_norm": 14.29271125793457, + "learning_rate": 5.842167796707808e-06, + "loss": 0.9531, + "step": 983, + "train/speech_entropy": 3.603269403631037, + "train/text_entropy": 1.133030464978722, + "train/token_acc": 0.30721966205837176 + }, + { + "epoch": 0.1456698741672835, + "grad_norm": 12.402803421020508, + "learning_rate": 5.834901248407693e-06, + "loss": 0.9961, + "step": 984, + "train/speech_entropy": 4.1003807631619456, + "train/text_entropy": 1.0726355287038027, + "train/token_acc": 0.2893030794165316 + }, + { + "epoch": 0.14581791265729088, + "grad_norm": 11.718006134033203, + "learning_rate": 5.8276338218680975e-06, + "loss": 1.2266, + "step": 985, + "train/speech_entropy": 4.109801838804814, + "train/text_entropy": 1.1676264199182447, + "train/token_acc": 0.291350531107739 + }, + { + "epoch": 0.1459659511472983, + "grad_norm": 15.204816818237305, + "learning_rate": 5.820365536146999e-06, + "loss": 1.6484, + "step": 986, + "train/speech_entropy": 4.278270630107619, + "train/text_entropy": 1.8528229007876016, + "train/token_acc": 0.24774774774774774 + }, + { + "epoch": 0.1461139896373057, + "grad_norm": 14.973648071289062, + "learning_rate": 5.813096410304628e-06, + "loss": 1.084, + "step": 987, + "train/speech_entropy": 3.978110157147301, + "train/text_entropy": 1.3519828179303337, + "train/token_acc": 0.26362484157160965 + }, + { + "epoch": 0.1462620281273131, + "grad_norm": 14.027508735656738, + "learning_rate": 5.8058264634034235e-06, + "loss": 1.3008, + "step": 988, + "train/speech_entropy": 4.616693036551853, + "train/text_entropy": 1.3970796396066476, + "train/token_acc": 0.22324159021406728 + }, + { + "epoch": 0.1464100666173205, + "grad_norm": 12.432865142822266, + "learning_rate": 5.798555714507971e-06, + "loss": 1.2012, + "step": 989, + "train/speech_entropy": 4.185841550109207, + "train/text_entropy": 1.2155619864228107, + "train/token_acc": 0.28426395939086296 + }, + { + "epoch": 0.14655810510732792, + "grad_norm": 16.189023971557617, + "learning_rate": 5.791284182684962e-06, + "loss": 1.668, + "step": 990, + "train/speech_entropy": 4.295802143895349, + "train/text_entropy": 2.0525731449633575, + "train/token_acc": 0.23981552651806304 + }, + { + "epoch": 0.1467061435973353, + "grad_norm": 16.920869827270508, + "learning_rate": 5.784011887003141e-06, + "loss": 1.2344, + "step": 991, + "train/speech_entropy": 4.202040706204115, + "train/text_entropy": 1.6398015123732546, + "train/token_acc": 0.27918781725888325 + }, + { + "epoch": 0.14685418208734272, + "grad_norm": 19.373626708984375, + "learning_rate": 5.776738846533258e-06, + "loss": 2.1367, + "step": 992, + "train/speech_entropy": 3.9104381777205557, + "train/text_entropy": 2.2259144998675016, + "train/token_acc": 0.25692454632282713 + }, + { + "epoch": 0.1470022205773501, + "grad_norm": 16.81647300720215, + "learning_rate": 5.76946508034801e-06, + "loss": 1.1582, + "step": 993, + "train/speech_entropy": 3.9648387810591927, + "train/text_entropy": 1.1772334705699574, + "train/token_acc": 0.29490022172949004 + }, + { + "epoch": 0.14715025906735751, + "grad_norm": 15.715492248535156, + "learning_rate": 5.7621906075220035e-06, + "loss": 1.6758, + "step": 994, + "train/speech_entropy": 4.235873656653462, + "train/text_entropy": 1.6805035112682818, + "train/token_acc": 0.2596825396825397 + }, + { + "epoch": 0.14729829755736493, + "grad_norm": 17.434402465820312, + "learning_rate": 5.754915447131694e-06, + "loss": 1.8359, + "step": 995, + "train/speech_entropy": 4.333204700100806, + "train/text_entropy": 1.8135711531833285, + "train/token_acc": 0.26160714285714287 + }, + { + "epoch": 0.1474463360473723, + "grad_norm": 13.161947250366211, + "learning_rate": 5.7476396182553425e-06, + "loss": 1.1914, + "step": 996, + "train/speech_entropy": 4.0081628787878785, + "train/text_entropy": 0.9839339231833434, + "train/token_acc": 0.31176470588235294 + }, + { + "epoch": 0.14759437453737972, + "grad_norm": 18.336841583251953, + "learning_rate": 5.740363139972962e-06, + "loss": 1.2461, + "step": 997, + "train/speech_entropy": 4.159571574070915, + "train/text_entropy": 1.1857318878173828, + "train/token_acc": 0.2801377726750861 + }, + { + "epoch": 0.1477424130273871, + "grad_norm": 12.32546329498291, + "learning_rate": 5.733086031366268e-06, + "loss": 1.1494, + "step": 998, + "train/speech_entropy": 4.251921649703139, + "train/text_entropy": 1.3705431032011093, + "train/token_acc": 0.2924657534246575 + }, + { + "epoch": 0.14789045151739452, + "grad_norm": 17.056472778320312, + "learning_rate": 5.725808311518628e-06, + "loss": 1.4453, + "step": 999, + "train/speech_entropy": 4.615890436297421, + "train/text_entropy": 1.5140285588763087, + "train/token_acc": 0.22641509433962265 + }, + { + "epoch": 0.14803849000740193, + "grad_norm": 16.340436935424805, + "learning_rate": 5.718529999515018e-06, + "loss": 1.4844, + "step": 1000, + "train/speech_entropy": 4.21273027956152, + "train/text_entropy": 1.3084751188446202, + "train/token_acc": 0.2578268876611418 + }, + { + "epoch": 0.14818652849740932, + "grad_norm": 12.517424583435059, + "learning_rate": 5.711251114441959e-06, + "loss": 1.0781, + "step": 1001, + "train/speech_entropy": 4.2905853351583, + "train/text_entropy": 1.265802513242392, + "train/token_acc": 0.28436516264428124 + }, + { + "epoch": 0.14833456698741673, + "grad_norm": 14.272148132324219, + "learning_rate": 5.703971675387479e-06, + "loss": 1.2617, + "step": 1002, + "train/speech_entropy": 4.207808573700813, + "train/text_entropy": 1.278546187129334, + "train/token_acc": 0.2668052668052668 + }, + { + "epoch": 0.14848260547742412, + "grad_norm": 13.765541076660156, + "learning_rate": 5.696691701441061e-06, + "loss": 1.4922, + "step": 1003, + "train/speech_entropy": 4.602686819422659, + "train/text_entropy": 1.4503224690755208, + "train/token_acc": 0.2669260700389105 + }, + { + "epoch": 0.14863064396743153, + "grad_norm": 19.259553909301758, + "learning_rate": 5.6894112116935855e-06, + "loss": 1.4727, + "step": 1004, + "train/speech_entropy": 4.1093472089654535, + "train/text_entropy": 1.9603204962068004, + "train/token_acc": 0.254416961130742 + }, + { + "epoch": 0.14877868245743894, + "grad_norm": 17.841552734375, + "learning_rate": 5.682130225237288e-06, + "loss": 1.7578, + "step": 1005, + "train/speech_entropy": 4.358167262258893, + "train/text_entropy": 1.7460452118610366, + "train/token_acc": 0.2551271534044299 + }, + { + "epoch": 0.14892672094744633, + "grad_norm": 12.09514331817627, + "learning_rate": 5.674848761165706e-06, + "loss": 0.8931, + "step": 1006, + "train/speech_entropy": 4.018306942894346, + "train/text_entropy": 1.1270123395052822, + "train/token_acc": 0.2732283464566929 + }, + { + "epoch": 0.14907475943745374, + "grad_norm": 14.494524002075195, + "learning_rate": 5.667566838573635e-06, + "loss": 0.7539, + "step": 1007, + "train/speech_entropy": 3.973508060216586, + "train/text_entropy": 0.7661805956551199, + "train/token_acc": 0.33153928955866524 + }, + { + "epoch": 0.14922279792746113, + "grad_norm": 16.45669174194336, + "learning_rate": 5.660284476557062e-06, + "loss": 1.3828, + "step": 1008, + "train/speech_entropy": 3.6174118194798375, + "train/text_entropy": 1.4946235946462125, + "train/token_acc": 0.2718786464410735 + }, + { + "epoch": 0.14937083641746854, + "grad_norm": 18.093162536621094, + "learning_rate": 5.653001694213136e-06, + "loss": 0.873, + "step": 1009, + "train/speech_entropy": 3.824553494626376, + "train/text_entropy": 1.1757405062755906, + "train/token_acc": 0.26652452025586354 + }, + { + "epoch": 0.14951887490747595, + "grad_norm": 19.45709800720215, + "learning_rate": 5.645718510640102e-06, + "loss": 0.9629, + "step": 1010, + "train/speech_entropy": 3.6928245065292873, + "train/text_entropy": 0.9089675672126539, + "train/token_acc": 0.3157894736842105 + }, + { + "epoch": 0.14966691339748334, + "grad_norm": 15.8876314163208, + "learning_rate": 5.638434944937263e-06, + "loss": 0.7275, + "step": 1011, + "train/speech_entropy": 3.9384893627117967, + "train/text_entropy": 0.5904744765337776, + "train/token_acc": 0.3224789915966387 + }, + { + "epoch": 0.14981495188749075, + "grad_norm": 20.51972770690918, + "learning_rate": 5.63115101620492e-06, + "loss": 1.5078, + "step": 1012, + "train/speech_entropy": 4.0614776611328125, + "train/text_entropy": 1.3563530718693968, + "train/token_acc": 0.24258760107816713 + }, + { + "epoch": 0.14996299037749816, + "grad_norm": 17.21762466430664, + "learning_rate": 5.623866743544325e-06, + "loss": 1.2637, + "step": 1013, + "train/speech_entropy": 4.074711548481874, + "train/text_entropy": 1.300135850906372, + "train/token_acc": 0.25952045133991536 + }, + { + "epoch": 0.15011102886750555, + "grad_norm": 14.187652587890625, + "learning_rate": 5.6165821460576355e-06, + "loss": 1.1367, + "step": 1014, + "train/speech_entropy": 3.968913857570706, + "train/text_entropy": 0.7240999259200751, + "train/token_acc": 0.3393251340271208 + }, + { + "epoch": 0.15025906735751296, + "grad_norm": 47.678985595703125, + "learning_rate": 5.609297242847859e-06, + "loss": 3.1953, + "step": 1015, + "train/speech_entropy": 4.104863191036374, + "train/text_entropy": 1.908417326031309, + "train/token_acc": 0.231203007518797 + }, + { + "epoch": 0.15040710584752034, + "grad_norm": 14.853105545043945, + "learning_rate": 5.602012053018806e-06, + "loss": 1.5781, + "step": 1016, + "train/speech_entropy": 4.370960558597937, + "train/text_entropy": 1.347209145041073, + "train/token_acc": 0.25505716798592787 + }, + { + "epoch": 0.15055514433752776, + "grad_norm": 21.33881378173828, + "learning_rate": 5.594726595675035e-06, + "loss": 1.7578, + "step": 1017, + "train/speech_entropy": 4.304331461588542, + "train/text_entropy": 1.4298308118641805, + "train/token_acc": 0.27692307692307694 + }, + { + "epoch": 0.15070318282753517, + "grad_norm": 15.836353302001953, + "learning_rate": 5.587440889921811e-06, + "loss": 1.082, + "step": 1018, + "train/speech_entropy": 3.897545628680407, + "train/text_entropy": 0.9895091547030155, + "train/token_acc": 0.29965156794425085 + }, + { + "epoch": 0.15085122131754256, + "grad_norm": 15.156307220458984, + "learning_rate": 5.580154954865047e-06, + "loss": 0.7119, + "step": 1019, + "train/speech_entropy": 3.7750381391742973, + "train/text_entropy": 0.6316166697321711, + "train/token_acc": 0.33197278911564626 + }, + { + "epoch": 0.15099925980754997, + "grad_norm": 13.878114700317383, + "learning_rate": 5.572868809611259e-06, + "loss": 1.375, + "step": 1020, + "train/speech_entropy": 4.486953893310042, + "train/text_entropy": 1.5780602380427347, + "train/token_acc": 0.24936601859678784 + }, + { + "epoch": 0.15114729829755735, + "grad_norm": 17.416704177856445, + "learning_rate": 5.565582473267509e-06, + "loss": 1.2148, + "step": 1021, + "train/speech_entropy": 3.960480813419118, + "train/text_entropy": 1.026336423812374, + "train/token_acc": 0.2543478260869565 + }, + { + "epoch": 0.15129533678756477, + "grad_norm": 19.222360610961914, + "learning_rate": 5.558295964941373e-06, + "loss": 1.8594, + "step": 1022, + "train/speech_entropy": 4.044416648256748, + "train/text_entropy": 1.8376218486764577, + "train/token_acc": 0.2560083594566353 + }, + { + "epoch": 0.15144337527757218, + "grad_norm": 16.290327072143555, + "learning_rate": 5.551009303740863e-06, + "loss": 1.1074, + "step": 1023, + "train/speech_entropy": 4.286198449513269, + "train/text_entropy": 0.9706727788689431, + "train/token_acc": 0.30745098039215685 + }, + { + "epoch": 0.15159141376757956, + "grad_norm": 11.741918563842773, + "learning_rate": 5.543722508774398e-06, + "loss": 0.8965, + "step": 1024, + "train/speech_entropy": 4.187051578443878, + "train/text_entropy": 0.9103144829583275, + "train/token_acc": 0.2876142975893599 + }, + { + "epoch": 0.15173945225758698, + "grad_norm": 16.089345932006836, + "learning_rate": 5.536435599150754e-06, + "loss": 1.6602, + "step": 1025, + "train/speech_entropy": 4.388618352706649, + "train/text_entropy": 1.7316536903381348, + "train/token_acc": 0.2236350037397158 + }, + { + "epoch": 0.15188749074759436, + "grad_norm": 15.76720905303955, + "learning_rate": 5.529148593978999e-06, + "loss": 1.5176, + "step": 1026, + "train/speech_entropy": 4.302211379115676, + "train/text_entropy": 1.6188152606670674, + "train/token_acc": 0.26970560303893637 + }, + { + "epoch": 0.15203552923760177, + "grad_norm": 17.6241455078125, + "learning_rate": 5.521861512368454e-06, + "loss": 1.3906, + "step": 1027, + "train/speech_entropy": 3.9187112510751145, + "train/text_entropy": 1.3294970269739275, + "train/token_acc": 0.31227217496962334 + }, + { + "epoch": 0.1521835677276092, + "grad_norm": 20.803058624267578, + "learning_rate": 5.514574373428641e-06, + "loss": 1.2227, + "step": 1028, + "train/speech_entropy": 4.29057252407074, + "train/text_entropy": 1.3941880576630943, + "train/token_acc": 0.28139904610492844 + }, + { + "epoch": 0.15233160621761657, + "grad_norm": 11.701780319213867, + "learning_rate": 5.507287196269232e-06, + "loss": 1.3633, + "step": 1029, + "train/speech_entropy": 4.405494307531327, + "train/text_entropy": 1.396402508732402, + "train/token_acc": 0.2721854304635762 + }, + { + "epoch": 0.15247964470762398, + "grad_norm": 15.957204818725586, + "learning_rate": 5.500000000000001e-06, + "loss": 1.9297, + "step": 1030, + "train/speech_entropy": 4.079610871920621, + "train/text_entropy": 1.8947237163570756, + "train/token_acc": 0.2687651331719128 + }, + { + "epoch": 0.1526276831976314, + "grad_norm": 10.581807136535645, + "learning_rate": 5.49271280373077e-06, + "loss": 0.8525, + "step": 1031, + "train/speech_entropy": 4.188758139362634, + "train/text_entropy": 1.003302896433863, + "train/token_acc": 0.30206254158349966 + }, + { + "epoch": 0.15277572168763878, + "grad_norm": 12.254356384277344, + "learning_rate": 5.485425626571362e-06, + "loss": 0.5371, + "step": 1032, + "train/speech_entropy": 3.731430146941828, + "train/text_entropy": 0.439098637278487, + "train/token_acc": 0.32771693344566133 + }, + { + "epoch": 0.1529237601776462, + "grad_norm": 14.269817352294922, + "learning_rate": 5.478138487631548e-06, + "loss": 1.582, + "step": 1033, + "train/speech_entropy": 4.118396864869195, + "train/text_entropy": 1.4327636567672881, + "train/token_acc": 0.26867119301648884 + }, + { + "epoch": 0.15307179866765358, + "grad_norm": 15.448887825012207, + "learning_rate": 5.4708514060210036e-06, + "loss": 0.9531, + "step": 1034, + "train/speech_entropy": 4.022855511372754, + "train/text_entropy": 0.9689810717547381, + "train/token_acc": 0.27948990435706694 + }, + { + "epoch": 0.153219837157661, + "grad_norm": 14.629405975341797, + "learning_rate": 5.463564400849247e-06, + "loss": 1.0566, + "step": 1035, + "train/speech_entropy": 4.279158882472826, + "train/text_entropy": 1.2736474225621024, + "train/token_acc": 0.24747010119595217 + }, + { + "epoch": 0.1533678756476684, + "grad_norm": 16.416433334350586, + "learning_rate": 5.4562774912256025e-06, + "loss": 0.7559, + "step": 1036, + "train/speech_entropy": 3.9289570385689907, + "train/text_entropy": 0.7913187117803664, + "train/token_acc": 0.2885662431941924 + }, + { + "epoch": 0.1535159141376758, + "grad_norm": 9.452640533447266, + "learning_rate": 5.4489906962591395e-06, + "loss": 1.3086, + "step": 1037, + "train/speech_entropy": 4.018872103619543, + "train/text_entropy": 1.2376270043881235, + "train/token_acc": 0.30319436924742826 + }, + { + "epoch": 0.1536639526276832, + "grad_norm": 19.527345657348633, + "learning_rate": 5.44170403505863e-06, + "loss": 2.7656, + "step": 1038, + "train/speech_entropy": 4.542805408296131, + "train/text_entropy": 2.79035288274544, + "train/token_acc": 0.2618825722273998 + }, + { + "epoch": 0.1538119911176906, + "grad_norm": 19.782569885253906, + "learning_rate": 5.434417526732491e-06, + "loss": 1.7891, + "step": 1039, + "train/speech_entropy": 4.1454779296875, + "train/text_entropy": 1.7377656436135582, + "train/token_acc": 0.2402088772845953 + }, + { + "epoch": 0.153960029607698, + "grad_norm": 20.90387725830078, + "learning_rate": 5.427131190388743e-06, + "loss": 1.375, + "step": 1040, + "train/speech_entropy": 4.03663387729402, + "train/text_entropy": 1.2160810582778032, + "train/token_acc": 0.29207920792079206 + }, + { + "epoch": 0.15410806809770541, + "grad_norm": 18.9847412109375, + "learning_rate": 5.419845045134955e-06, + "loss": 1.4492, + "step": 1041, + "train/speech_entropy": 4.175988849185614, + "train/text_entropy": 1.3907724829281078, + "train/token_acc": 0.27102803738317754 + }, + { + "epoch": 0.1542561065877128, + "grad_norm": 12.234392166137695, + "learning_rate": 5.412559110078189e-06, + "loss": 0.8496, + "step": 1042, + "train/speech_entropy": 3.983014121752107, + "train/text_entropy": 0.9535903569081383, + "train/token_acc": 0.3278837420526794 + }, + { + "epoch": 0.1544041450777202, + "grad_norm": 10.95749568939209, + "learning_rate": 5.405273404324965e-06, + "loss": 0.8535, + "step": 1043, + "train/speech_entropy": 4.246629853938424, + "train/text_entropy": 0.6947104910145635, + "train/token_acc": 0.3007646559048428 + }, + { + "epoch": 0.1545521835677276, + "grad_norm": 25.27839469909668, + "learning_rate": 5.397987946981196e-06, + "loss": 1.4414, + "step": 1044, + "train/speech_entropy": 3.8102350494741004, + "train/text_entropy": 1.4972232695548766, + "train/token_acc": 0.2727272727272727 + }, + { + "epoch": 0.154700222057735, + "grad_norm": 12.108386039733887, + "learning_rate": 5.390702757152142e-06, + "loss": 0.9707, + "step": 1045, + "train/speech_entropy": 4.21355589104235, + "train/text_entropy": 1.0117186783624927, + "train/token_acc": 0.28046875 + }, + { + "epoch": 0.15484826054774242, + "grad_norm": 14.470213890075684, + "learning_rate": 5.383417853942365e-06, + "loss": 0.9941, + "step": 1046, + "train/speech_entropy": 3.974353838223743, + "train/text_entropy": 1.2379918981481481, + "train/token_acc": 0.29624277456647397 + }, + { + "epoch": 0.1549962990377498, + "grad_norm": 12.856100082397461, + "learning_rate": 5.376133256455677e-06, + "loss": 1.1738, + "step": 1047, + "train/speech_entropy": 4.217667065530157, + "train/text_entropy": 1.1611141414078983, + "train/token_acc": 0.28453267162944584 + }, + { + "epoch": 0.15514433752775722, + "grad_norm": 10.122862815856934, + "learning_rate": 5.3688489837950815e-06, + "loss": 0.8682, + "step": 1048, + "train/speech_entropy": 4.245102223282155, + "train/text_entropy": 1.071537194564996, + "train/token_acc": 0.2864864864864865 + }, + { + "epoch": 0.1552923760177646, + "grad_norm": 16.53183364868164, + "learning_rate": 5.3615650550627385e-06, + "loss": 1.2617, + "step": 1049, + "train/speech_entropy": 4.021502060098277, + "train/text_entropy": 1.2203201611836751, + "train/token_acc": 0.27696793002915454 + }, + { + "epoch": 0.15544041450777202, + "grad_norm": 12.307141304016113, + "learning_rate": 5.354281489359899e-06, + "loss": 1.3281, + "step": 1050, + "train/speech_entropy": 4.488366203655758, + "train/text_entropy": 1.3584271665021177, + "train/token_acc": 0.2554054054054054 + }, + { + "epoch": 0.15558845299777943, + "grad_norm": 34.24435806274414, + "learning_rate": 5.3469983057868656e-06, + "loss": 1.7637, + "step": 1051, + "train/speech_entropy": 3.6610742646698666, + "train/text_entropy": 2.0419947504997253, + "train/token_acc": 0.26370757180156656 + }, + { + "epoch": 0.15573649148778682, + "grad_norm": 10.157325744628906, + "learning_rate": 5.33971552344294e-06, + "loss": 0.3223, + "step": 1052, + "train/speech_entropy": 3.34147149375536, + "train/text_entropy": 0.3995448841768153, + "train/token_acc": 0.34075104311543813 + }, + { + "epoch": 0.15588452997779423, + "grad_norm": 22.33800506591797, + "learning_rate": 5.332433161426368e-06, + "loss": 1.5566, + "step": 1053, + "train/speech_entropy": 3.5566954469966316, + "train/text_entropy": 1.6894236719885538, + "train/token_acc": 0.2823086574654956 + }, + { + "epoch": 0.15603256846780164, + "grad_norm": 12.978653907775879, + "learning_rate": 5.325151238834294e-06, + "loss": 0.8027, + "step": 1054, + "train/speech_entropy": 4.3628223078863595, + "train/text_entropy": 0.8905580118632638, + "train/token_acc": 0.33572895277207393 + }, + { + "epoch": 0.15618060695780903, + "grad_norm": 16.305736541748047, + "learning_rate": 5.3178697747627136e-06, + "loss": 1.1152, + "step": 1055, + "train/speech_entropy": 4.0127910328331495, + "train/text_entropy": 1.2408060709635416, + "train/token_acc": 0.2659313725490196 + }, + { + "epoch": 0.15632864544781644, + "grad_norm": 18.097572326660156, + "learning_rate": 5.3105887883064176e-06, + "loss": 1.2891, + "step": 1056, + "train/speech_entropy": 3.953076352052583, + "train/text_entropy": 1.32259032546833, + "train/token_acc": 0.3033033033033033 + }, + { + "epoch": 0.15647668393782382, + "grad_norm": 17.762493133544922, + "learning_rate": 5.30330829855894e-06, + "loss": 1.6836, + "step": 1057, + "train/speech_entropy": 4.157064921261051, + "train/text_entropy": 1.7639172931050144, + "train/token_acc": 0.24168694241686942 + }, + { + "epoch": 0.15662472242783124, + "grad_norm": 11.077001571655273, + "learning_rate": 5.296028324612521e-06, + "loss": 0.9561, + "step": 1058, + "train/speech_entropy": 4.1908285196824835, + "train/text_entropy": 1.042336900104848, + "train/token_acc": 0.30447987851176916 + }, + { + "epoch": 0.15677276091783865, + "grad_norm": 13.848963737487793, + "learning_rate": 5.288748885558042e-06, + "loss": 0.832, + "step": 1059, + "train/speech_entropy": 3.4869557570879097, + "train/text_entropy": 1.2605760643281132, + "train/token_acc": 0.3103057757644394 + }, + { + "epoch": 0.15692079940784603, + "grad_norm": 16.90800666809082, + "learning_rate": 5.281470000484985e-06, + "loss": 1.0977, + "step": 1060, + "train/speech_entropy": 4.118197343712367, + "train/text_entropy": 1.2422131704627921, + "train/token_acc": 0.2786624203821656 + }, + { + "epoch": 0.15706883789785345, + "grad_norm": 16.722698211669922, + "learning_rate": 5.274191688481373e-06, + "loss": 0.8467, + "step": 1061, + "train/speech_entropy": 4.0417772417098, + "train/text_entropy": 0.7976140690420916, + "train/token_acc": 0.3124231242312423 + }, + { + "epoch": 0.15721687638786083, + "grad_norm": 16.071035385131836, + "learning_rate": 5.266913968633734e-06, + "loss": 0.8477, + "step": 1062, + "train/speech_entropy": 3.9517071238237684, + "train/text_entropy": 0.9543040068253227, + "train/token_acc": 0.30781010719754975 + }, + { + "epoch": 0.15736491487786824, + "grad_norm": 11.430985450744629, + "learning_rate": 5.2596368600270406e-06, + "loss": 0.7305, + "step": 1063, + "train/speech_entropy": 3.688372727149042, + "train/text_entropy": 1.058635108611163, + "train/token_acc": 0.3132992327365729 + }, + { + "epoch": 0.15751295336787566, + "grad_norm": 18.29827117919922, + "learning_rate": 5.252360381744658e-06, + "loss": 1.2773, + "step": 1064, + "train/speech_entropy": 4.038999384099787, + "train/text_entropy": 1.3830546503481658, + "train/token_acc": 0.2565789473684211 + }, + { + "epoch": 0.15766099185788304, + "grad_norm": 15.707037925720215, + "learning_rate": 5.245084552868308e-06, + "loss": 1.3125, + "step": 1065, + "train/speech_entropy": 3.821484586793918, + "train/text_entropy": 1.3949327140018857, + "train/token_acc": 0.26957637997432604 + }, + { + "epoch": 0.15780903034789046, + "grad_norm": 17.458118438720703, + "learning_rate": 5.237809392478e-06, + "loss": 0.8379, + "step": 1066, + "train/speech_entropy": 3.87664354618509, + "train/text_entropy": 0.9010617589376059, + "train/token_acc": 0.2948453608247423 + }, + { + "epoch": 0.15795706883789784, + "grad_norm": 16.914222717285156, + "learning_rate": 5.230534919651992e-06, + "loss": 1.1699, + "step": 1067, + "train/speech_entropy": 3.778687033658242, + "train/text_entropy": 0.9191502910826851, + "train/token_acc": 0.3226890756302521 + }, + { + "epoch": 0.15810510732790525, + "grad_norm": 9.191329956054688, + "learning_rate": 5.223261153466744e-06, + "loss": 1.1914, + "step": 1068, + "train/speech_entropy": 4.079319372882552, + "train/text_entropy": 0.9944541819190579, + "train/token_acc": 0.32161921708185054 + }, + { + "epoch": 0.15825314581791267, + "grad_norm": 15.488332748413086, + "learning_rate": 5.21598811299686e-06, + "loss": 1.4336, + "step": 1069, + "train/speech_entropy": 4.2430475463263475, + "train/text_entropy": 1.3377064157039562, + "train/token_acc": 0.27046548956661315 + }, + { + "epoch": 0.15840118430792005, + "grad_norm": 11.077091217041016, + "learning_rate": 5.20871581731504e-06, + "loss": 0.9863, + "step": 1070, + "train/speech_entropy": 4.114773941694361, + "train/text_entropy": 1.0815378076889937, + "train/token_acc": 0.28303198887343534 + }, + { + "epoch": 0.15854922279792746, + "grad_norm": 13.830517768859863, + "learning_rate": 5.2014442854920324e-06, + "loss": 0.7627, + "step": 1071, + "train/speech_entropy": 3.7248268617402522, + "train/text_entropy": 0.8421596828982126, + "train/token_acc": 0.32456140350877194 + }, + { + "epoch": 0.15869726128793488, + "grad_norm": 16.85113525390625, + "learning_rate": 5.194173536596579e-06, + "loss": 1.6875, + "step": 1072, + "train/speech_entropy": 4.224956272201421, + "train/text_entropy": 1.362455466358932, + "train/token_acc": 0.30177514792899407 + }, + { + "epoch": 0.15884529977794226, + "grad_norm": 13.51894474029541, + "learning_rate": 5.186903589695372e-06, + "loss": 1.2285, + "step": 1073, + "train/speech_entropy": 3.7231502089389537, + "train/text_entropy": 0.9251359693047141, + "train/token_acc": 0.3470790378006873 + }, + { + "epoch": 0.15899333826794967, + "grad_norm": 14.995171546936035, + "learning_rate": 5.179634463853004e-06, + "loss": 1.4727, + "step": 1074, + "train/speech_entropy": 4.382549141308309, + "train/text_entropy": 1.4052732410349047, + "train/token_acc": 0.2654516640253566 + }, + { + "epoch": 0.15914137675795706, + "grad_norm": 18.493797302246094, + "learning_rate": 5.172366178131905e-06, + "loss": 1.6797, + "step": 1075, + "train/speech_entropy": 4.172944017231758, + "train/text_entropy": 1.455342992881819, + "train/token_acc": 0.2568527918781726 + }, + { + "epoch": 0.15928941524796447, + "grad_norm": 16.292078018188477, + "learning_rate": 5.165098751592308e-06, + "loss": 1.3438, + "step": 1076, + "train/speech_entropy": 4.290626525878906, + "train/text_entropy": 1.3648687547253024, + "train/token_acc": 0.26360338573155984 + }, + { + "epoch": 0.15943745373797188, + "grad_norm": 16.97925567626953, + "learning_rate": 5.157832203292193e-06, + "loss": 1.5664, + "step": 1077, + "train/speech_entropy": 4.444386650534237, + "train/text_entropy": 1.5273347898971203, + "train/token_acc": 0.24493927125506074 + }, + { + "epoch": 0.15958549222797927, + "grad_norm": 13.954797744750977, + "learning_rate": 5.1505665522872374e-06, + "loss": 0.7559, + "step": 1078, + "train/speech_entropy": 4.093753952463419, + "train/text_entropy": 0.9881311742270865, + "train/token_acc": 0.2926587301587302 + }, + { + "epoch": 0.15973353071798668, + "grad_norm": 18.09267234802246, + "learning_rate": 5.143301817630759e-06, + "loss": 1.5625, + "step": 1079, + "train/speech_entropy": 4.469421453133501, + "train/text_entropy": 1.417034564990003, + "train/token_acc": 0.2584070796460177 + }, + { + "epoch": 0.15988156920799407, + "grad_norm": 17.547739028930664, + "learning_rate": 5.136038018373682e-06, + "loss": 1.3398, + "step": 1080, + "train/speech_entropy": 3.9440831693491543, + "train/text_entropy": 1.491154858855163, + "train/token_acc": 0.2733990147783251 + }, + { + "epoch": 0.16002960769800148, + "grad_norm": 13.15269660949707, + "learning_rate": 5.128775173564472e-06, + "loss": 1.1641, + "step": 1081, + "train/speech_entropy": 4.3130354251501695, + "train/text_entropy": 1.1479880327735443, + "train/token_acc": 0.25315227934044615 + }, + { + "epoch": 0.1601776461880089, + "grad_norm": 15.84166145324707, + "learning_rate": 5.121513302249091e-06, + "loss": 1.5977, + "step": 1082, + "train/speech_entropy": 4.1718924753289475, + "train/text_entropy": 1.751677815507098, + "train/token_acc": 0.2766685681688534 + }, + { + "epoch": 0.16032568467801628, + "grad_norm": 18.039560317993164, + "learning_rate": 5.114252423470952e-06, + "loss": 1.2812, + "step": 1083, + "train/speech_entropy": 3.9564305956118573, + "train/text_entropy": 1.140799302321214, + "train/token_acc": 0.28721804511278193 + }, + { + "epoch": 0.1604737231680237, + "grad_norm": 5.859569549560547, + "learning_rate": 5.1069925562708634e-06, + "loss": 0.2109, + "step": 1084, + "train/speech_entropy": 3.495872618660094, + "train/text_entropy": 0.28934756458034006, + "train/token_acc": 0.3900709219858156 + }, + { + "epoch": 0.16062176165803108, + "grad_norm": 12.593971252441406, + "learning_rate": 5.099733719686978e-06, + "loss": 1.6055, + "step": 1085, + "train/speech_entropy": 4.266694613759256, + "train/text_entropy": 1.6168570812837577, + "train/token_acc": 0.28375855631611696 + }, + { + "epoch": 0.1607698001480385, + "grad_norm": 12.48733139038086, + "learning_rate": 5.09247593275475e-06, + "loss": 1.291, + "step": 1086, + "train/speech_entropy": 4.17322508640553, + "train/text_entropy": 1.2770429424211092, + "train/token_acc": 0.29328358208955224 + }, + { + "epoch": 0.1609178386380459, + "grad_norm": 21.091344833374023, + "learning_rate": 5.085219214506883e-06, + "loss": 1.7695, + "step": 1087, + "train/speech_entropy": 4.050418380403171, + "train/text_entropy": 1.7799270076136435, + "train/token_acc": 0.2648809523809524 + }, + { + "epoch": 0.1610658771280533, + "grad_norm": 12.978901863098145, + "learning_rate": 5.077963583973268e-06, + "loss": 0.8809, + "step": 1088, + "train/speech_entropy": 3.935369330512153, + "train/text_entropy": 0.8134584606818434, + "train/token_acc": 0.3120503597122302 + }, + { + "epoch": 0.1612139156180607, + "grad_norm": 16.460100173950195, + "learning_rate": 5.070709060180955e-06, + "loss": 1.2461, + "step": 1089, + "train/speech_entropy": 4.199021417025862, + "train/text_entropy": 1.1856217560944733, + "train/token_acc": 0.27846674182638104 + }, + { + "epoch": 0.16136195410806808, + "grad_norm": 12.99067497253418, + "learning_rate": 5.063455662154084e-06, + "loss": 0.5254, + "step": 1090, + "train/speech_entropy": 3.8212197361523117, + "train/text_entropy": 0.5615067392025354, + "train/token_acc": 0.34935064935064936 + }, + { + "epoch": 0.1615099925980755, + "grad_norm": 19.1197566986084, + "learning_rate": 5.056203408913848e-06, + "loss": 1.3242, + "step": 1091, + "train/speech_entropy": 3.8656282026051567, + "train/text_entropy": 1.36404293873271, + "train/token_acc": 0.2932098765432099 + }, + { + "epoch": 0.1616580310880829, + "grad_norm": 18.734941482543945, + "learning_rate": 5.048952319478436e-06, + "loss": 1.7031, + "step": 1092, + "train/speech_entropy": 4.292470145296273, + "train/text_entropy": 1.431509847694871, + "train/token_acc": 0.2679200940070505 + }, + { + "epoch": 0.1618060695780903, + "grad_norm": 12.723854064941406, + "learning_rate": 5.041702412862984e-06, + "loss": 0.9414, + "step": 1093, + "train/speech_entropy": 4.164680678717599, + "train/text_entropy": 0.8028331735280639, + "train/token_acc": 0.3201754385964912 + }, + { + "epoch": 0.1619541080680977, + "grad_norm": 13.310798645019531, + "learning_rate": 5.034453708079523e-06, + "loss": 1.0957, + "step": 1094, + "train/speech_entropy": 3.927188123678016, + "train/text_entropy": 0.9602587577624199, + "train/token_acc": 0.2826282628262826 + }, + { + "epoch": 0.16210214655810512, + "grad_norm": 12.207396507263184, + "learning_rate": 5.027206224136944e-06, + "loss": 0.9316, + "step": 1095, + "train/speech_entropy": 4.2202440968819985, + "train/text_entropy": 1.0197952738348044, + "train/token_acc": 0.3013574660633484 + }, + { + "epoch": 0.1622501850481125, + "grad_norm": 9.798235893249512, + "learning_rate": 5.019959980040921e-06, + "loss": 0.6904, + "step": 1096, + "train/speech_entropy": 3.81762909470943, + "train/text_entropy": 0.7058428218791578, + "train/token_acc": 0.3347940403155127 + }, + { + "epoch": 0.16239822353811992, + "grad_norm": 12.191113471984863, + "learning_rate": 5.0127149947938895e-06, + "loss": 0.6543, + "step": 1097, + "train/speech_entropy": 3.803979780684546, + "train/text_entropy": 0.7611286113788556, + "train/token_acc": 0.3489771359807461 + }, + { + "epoch": 0.1625462620281273, + "grad_norm": 6.999958515167236, + "learning_rate": 5.005471287394978e-06, + "loss": 0.2817, + "step": 1098, + "train/speech_entropy": 3.4374715054489156, + "train/text_entropy": 0.2655147780542788, + "train/token_acc": 0.38633818589025753 + }, + { + "epoch": 0.16269430051813472, + "grad_norm": 15.555116653442383, + "learning_rate": 4.998228876839962e-06, + "loss": 0.9219, + "step": 1099, + "train/speech_entropy": 3.7982305482376453, + "train/text_entropy": 0.6785555300505265, + "train/token_acc": 0.3333333333333333 + }, + { + "epoch": 0.16284233900814213, + "grad_norm": 17.92732048034668, + "learning_rate": 4.990987782121222e-06, + "loss": 1.4648, + "step": 1100, + "train/speech_entropy": 4.248528471826108, + "train/text_entropy": 1.5397925845460396, + "train/token_acc": 0.26170212765957446 + }, + { + "epoch": 0.1629903774981495, + "grad_norm": 19.35114097595215, + "learning_rate": 4.983748022227683e-06, + "loss": 1.5234, + "step": 1101, + "train/speech_entropy": 4.01956221417683, + "train/text_entropy": 1.6880905919795415, + "train/token_acc": 0.2692307692307692 + }, + { + "epoch": 0.16313841598815693, + "grad_norm": 17.687801361083984, + "learning_rate": 4.976509616144771e-06, + "loss": 1.2773, + "step": 1102, + "train/speech_entropy": 4.318240749616565, + "train/text_entropy": 1.4021892660468287, + "train/token_acc": 0.2703252032520325 + }, + { + "epoch": 0.1632864544781643, + "grad_norm": 13.804495811462402, + "learning_rate": 4.969272582854364e-06, + "loss": 0.9482, + "step": 1103, + "train/speech_entropy": 3.7993798906500804, + "train/text_entropy": 1.0915048387315538, + "train/token_acc": 0.30670103092783507 + }, + { + "epoch": 0.16343449296817172, + "grad_norm": 14.545113563537598, + "learning_rate": 4.962036941334736e-06, + "loss": 1.3633, + "step": 1104, + "train/speech_entropy": 4.231247373170491, + "train/text_entropy": 1.444103298474796, + "train/token_acc": 0.30738119312436807 + }, + { + "epoch": 0.16358253145817914, + "grad_norm": 14.168485641479492, + "learning_rate": 4.954802710560514e-06, + "loss": 1.0781, + "step": 1105, + "train/speech_entropy": 3.8220203273104265, + "train/text_entropy": 0.9937410496953708, + "train/token_acc": 0.32248803827751193 + }, + { + "epoch": 0.16373056994818652, + "grad_norm": 12.088395118713379, + "learning_rate": 4.947569909502627e-06, + "loss": 0.7441, + "step": 1106, + "train/speech_entropy": 3.568883787711791, + "train/text_entropy": 0.5181931625177831, + "train/token_acc": 0.3502824858757062 + }, + { + "epoch": 0.16387860843819393, + "grad_norm": 12.2276611328125, + "learning_rate": 4.940338557128248e-06, + "loss": 0.7656, + "step": 1107, + "train/speech_entropy": 4.268730835965041, + "train/text_entropy": 0.8489920524171162, + "train/token_acc": 0.30725190839694655 + }, + { + "epoch": 0.16402664692820132, + "grad_norm": 16.613832473754883, + "learning_rate": 4.93310867240076e-06, + "loss": 1.2305, + "step": 1108, + "train/speech_entropy": 4.38431583180147, + "train/text_entropy": 1.6554268262662941, + "train/token_acc": 0.24442289039767218 + }, + { + "epoch": 0.16417468541820873, + "grad_norm": 17.63896942138672, + "learning_rate": 4.925880274279691e-06, + "loss": 1.207, + "step": 1109, + "train/speech_entropy": 4.243201816204897, + "train/text_entropy": 0.8956576184014589, + "train/token_acc": 0.31511627906976747 + }, + { + "epoch": 0.16432272390821615, + "grad_norm": 17.656448364257812, + "learning_rate": 4.918653381720672e-06, + "loss": 1.543, + "step": 1110, + "train/speech_entropy": 4.068832270656596, + "train/text_entropy": 1.5888612484503648, + "train/token_acc": 0.2542182227221597 + }, + { + "epoch": 0.16447076239822353, + "grad_norm": 17.940120697021484, + "learning_rate": 4.911428013675388e-06, + "loss": 1.8984, + "step": 1111, + "train/speech_entropy": 4.125967599736621, + "train/text_entropy": 1.8594983537620473, + "train/token_acc": 0.26418289585097376 + }, + { + "epoch": 0.16461880088823094, + "grad_norm": 16.28749656677246, + "learning_rate": 4.904204189091521e-06, + "loss": 1.252, + "step": 1112, + "train/speech_entropy": 4.358922246676772, + "train/text_entropy": 1.1816393018842817, + "train/token_acc": 0.28448275862068967 + }, + { + "epoch": 0.16476683937823836, + "grad_norm": 14.86812686920166, + "learning_rate": 4.8969819269127125e-06, + "loss": 1.4473, + "step": 1113, + "train/speech_entropy": 4.154738216972792, + "train/text_entropy": 1.7901514188839438, + "train/token_acc": 0.25357483317445184 + }, + { + "epoch": 0.16491487786824574, + "grad_norm": 15.9089937210083, + "learning_rate": 4.889761246078502e-06, + "loss": 1.1055, + "step": 1114, + "train/speech_entropy": 4.1217469685035155, + "train/text_entropy": 1.1549023691813152, + "train/token_acc": 0.26507394766780434 + }, + { + "epoch": 0.16506291635825315, + "grad_norm": 18.87456512451172, + "learning_rate": 4.8825421655242835e-06, + "loss": 1.5, + "step": 1115, + "train/speech_entropy": 4.443340070182447, + "train/text_entropy": 1.732585241855719, + "train/token_acc": 0.2557915057915058 + }, + { + "epoch": 0.16521095484826054, + "grad_norm": 15.280374526977539, + "learning_rate": 4.87532470418125e-06, + "loss": 1.4902, + "step": 1116, + "train/speech_entropy": 4.408587668439467, + "train/text_entropy": 2.158500804651252, + "train/token_acc": 0.23862788963460105 + }, + { + "epoch": 0.16535899333826795, + "grad_norm": 18.679100036621094, + "learning_rate": 4.868108880976359e-06, + "loss": 1.8633, + "step": 1117, + "train/speech_entropy": 4.309589229356374, + "train/text_entropy": 1.787944550209857, + "train/token_acc": 0.25556733828207845 + }, + { + "epoch": 0.16550703182827536, + "grad_norm": 12.536413192749023, + "learning_rate": 4.860894714832263e-06, + "loss": 0.6143, + "step": 1118, + "train/speech_entropy": 3.88034154968685, + "train/text_entropy": 0.6079066534462215, + "train/token_acc": 0.33181818181818185 + }, + { + "epoch": 0.16565507031828275, + "grad_norm": 16.446975708007812, + "learning_rate": 4.853682224667268e-06, + "loss": 1.418, + "step": 1119, + "train/speech_entropy": 4.15644188133446, + "train/text_entropy": 1.5559498996020638, + "train/token_acc": 0.25719424460431656 + }, + { + "epoch": 0.16580310880829016, + "grad_norm": 15.165767669677734, + "learning_rate": 4.846471429395294e-06, + "loss": 0.9956, + "step": 1120, + "train/speech_entropy": 3.800391671316964, + "train/text_entropy": 0.7249961201148697, + "train/token_acc": 0.317016317016317 + }, + { + "epoch": 0.16595114729829755, + "grad_norm": 14.526034355163574, + "learning_rate": 4.839262347925809e-06, + "loss": 1.1133, + "step": 1121, + "train/speech_entropy": 3.9488677323631975, + "train/text_entropy": 1.2230336904525756, + "train/token_acc": 0.2852153667054715 + }, + { + "epoch": 0.16609918578830496, + "grad_norm": 20.075151443481445, + "learning_rate": 4.832054999163789e-06, + "loss": 1.7266, + "step": 1122, + "train/speech_entropy": 4.10254032095683, + "train/text_entropy": 1.8207686360677082, + "train/token_acc": 0.2814207650273224 + }, + { + "epoch": 0.16624722427831237, + "grad_norm": 11.876943588256836, + "learning_rate": 4.824849402009664e-06, + "loss": 0.917, + "step": 1123, + "train/speech_entropy": 4.0383433494937435, + "train/text_entropy": 1.0867038940896794, + "train/token_acc": 0.2986666666666667 + }, + { + "epoch": 0.16639526276831976, + "grad_norm": 16.458885192871094, + "learning_rate": 4.817645575359275e-06, + "loss": 1.8516, + "step": 1124, + "train/speech_entropy": 4.736341638318727, + "train/text_entropy": 1.9183671583823108, + "train/token_acc": 0.22580645161290322 + }, + { + "epoch": 0.16654330125832717, + "grad_norm": 19.248918533325195, + "learning_rate": 4.8104435381038165e-06, + "loss": 1.3438, + "step": 1125, + "train/speech_entropy": 3.839658056783397, + "train/text_entropy": 1.409163548396184, + "train/token_acc": 0.2726190476190476 + }, + { + "epoch": 0.16669133974833455, + "grad_norm": 14.32660961151123, + "learning_rate": 4.803243309129793e-06, + "loss": 1.207, + "step": 1126, + "train/speech_entropy": 4.331783188487019, + "train/text_entropy": 1.3948884186921295, + "train/token_acc": 0.25645592163846836 + }, + { + "epoch": 0.16683937823834197, + "grad_norm": 16.232746124267578, + "learning_rate": 4.796044907318962e-06, + "loss": 1.2012, + "step": 1127, + "train/speech_entropy": 4.125611597177934, + "train/text_entropy": 1.419043316560633, + "train/token_acc": 0.26520681265206814 + }, + { + "epoch": 0.16698741672834938, + "grad_norm": 10.984463691711426, + "learning_rate": 4.788848351548295e-06, + "loss": 0.8994, + "step": 1128, + "train/speech_entropy": 4.046760175870434, + "train/text_entropy": 1.4491806150232471, + "train/token_acc": 0.26666666666666666 + }, + { + "epoch": 0.16713545521835677, + "grad_norm": 14.995131492614746, + "learning_rate": 4.781653660689925e-06, + "loss": 1.0078, + "step": 1129, + "train/speech_entropy": 3.974205931813035, + "train/text_entropy": 1.079665297982078, + "train/token_acc": 0.27450980392156865 + }, + { + "epoch": 0.16728349370836418, + "grad_norm": 15.010198593139648, + "learning_rate": 4.774460853611082e-06, + "loss": 1.0586, + "step": 1130, + "train/speech_entropy": 4.357549758184524, + "train/text_entropy": 0.913477285284745, + "train/token_acc": 0.3075221238938053 + }, + { + "epoch": 0.16743153219837156, + "grad_norm": 21.151430130004883, + "learning_rate": 4.767269949174068e-06, + "loss": 1.4043, + "step": 1131, + "train/speech_entropy": 4.210565686225891, + "train/text_entropy": 1.3040816280819953, + "train/token_acc": 0.27697262479871176 + }, + { + "epoch": 0.16757957068837898, + "grad_norm": 15.653547286987305, + "learning_rate": 4.760080966236194e-06, + "loss": 1.0156, + "step": 1132, + "train/speech_entropy": 4.2777357421875, + "train/text_entropy": 0.822325447733088, + "train/token_acc": 0.2838196286472148 + }, + { + "epoch": 0.1677276091783864, + "grad_norm": 14.843795776367188, + "learning_rate": 4.752893923649726e-06, + "loss": 0.9609, + "step": 1133, + "train/speech_entropy": 4.276537053419168, + "train/text_entropy": 1.057097033450478, + "train/token_acc": 0.27796610169491526 + }, + { + "epoch": 0.16787564766839377, + "grad_norm": 14.05532455444336, + "learning_rate": 4.745708840261844e-06, + "loss": 0.665, + "step": 1134, + "train/speech_entropy": 3.7881655738467264, + "train/text_entropy": 0.6662456038730596, + "train/token_acc": 0.34738186462324394 + }, + { + "epoch": 0.1680236861584012, + "grad_norm": 17.841493606567383, + "learning_rate": 4.738525734914598e-06, + "loss": 1.207, + "step": 1135, + "train/speech_entropy": 4.233457997981334, + "train/text_entropy": 1.4969443236889481, + "train/token_acc": 0.265695067264574 + }, + { + "epoch": 0.1681717246484086, + "grad_norm": 21.413082122802734, + "learning_rate": 4.7313446264448395e-06, + "loss": 1.4531, + "step": 1136, + "train/speech_entropy": 4.0203876287286935, + "train/text_entropy": 1.1939270275981486, + "train/token_acc": 0.2705530642750374 + }, + { + "epoch": 0.16831976313841598, + "grad_norm": 18.241098403930664, + "learning_rate": 4.72416553368419e-06, + "loss": 1.3633, + "step": 1137, + "train/speech_entropy": 4.238819438449585, + "train/text_entropy": 1.4963167463030134, + "train/token_acc": 0.25462962962962965 + }, + { + "epoch": 0.1684678016284234, + "grad_norm": 17.99925422668457, + "learning_rate": 4.716988475458979e-06, + "loss": 1.6445, + "step": 1138, + "train/speech_entropy": 4.2798187389123905, + "train/text_entropy": 1.4927020978324022, + "train/token_acc": 0.2603550295857988 + }, + { + "epoch": 0.16861584011843078, + "grad_norm": 15.840570449829102, + "learning_rate": 4.709813470590212e-06, + "loss": 1.1035, + "step": 1139, + "train/speech_entropy": 3.727928109720451, + "train/text_entropy": 0.8389500704678622, + "train/token_acc": 0.32407407407407407 + }, + { + "epoch": 0.1687638786084382, + "grad_norm": 19.501399993896484, + "learning_rate": 4.702640537893499e-06, + "loss": 1.4336, + "step": 1140, + "train/speech_entropy": 4.310491356651274, + "train/text_entropy": 1.6424597035283628, + "train/token_acc": 0.2529137529137529 + }, + { + "epoch": 0.1689119170984456, + "grad_norm": 9.46358585357666, + "learning_rate": 4.695469696179018e-06, + "loss": 0.5293, + "step": 1141, + "train/speech_entropy": 3.5011458808395464, + "train/text_entropy": 0.4585032414908361, + "train/token_acc": 0.3671399594320487 + }, + { + "epoch": 0.169059955588453, + "grad_norm": 11.625032424926758, + "learning_rate": 4.68830096425147e-06, + "loss": 0.9824, + "step": 1142, + "train/speech_entropy": 4.814391622499522, + "train/text_entropy": 0.9818737681319074, + "train/token_acc": 0.2594859241126071 + }, + { + "epoch": 0.1692079940784604, + "grad_norm": 16.66145133972168, + "learning_rate": 4.681134360910017e-06, + "loss": 1.4414, + "step": 1143, + "train/speech_entropy": 4.229075470168612, + "train/text_entropy": 1.4386938248557606, + "train/token_acc": 0.25925925925925924 + }, + { + "epoch": 0.1693560325684678, + "grad_norm": 17.862808227539062, + "learning_rate": 4.673969904948238e-06, + "loss": 0.9707, + "step": 1144, + "train/speech_entropy": 4.050381071671196, + "train/text_entropy": 1.1296327339028411, + "train/token_acc": 0.2950530035335689 + }, + { + "epoch": 0.1695040710584752, + "grad_norm": 19.357261657714844, + "learning_rate": 4.6668076151540864e-06, + "loss": 1.6719, + "step": 1145, + "train/speech_entropy": 4.06534189573476, + "train/text_entropy": 1.8585828742724937, + "train/token_acc": 0.2928870292887029 + }, + { + "epoch": 0.16965210954848262, + "grad_norm": 15.30122184753418, + "learning_rate": 4.659647510309833e-06, + "loss": 0.8242, + "step": 1146, + "train/speech_entropy": 4.184874178513948, + "train/text_entropy": 0.8093894688429031, + "train/token_acc": 0.3013816925734024 + }, + { + "epoch": 0.16980014803849, + "grad_norm": 18.750354766845703, + "learning_rate": 4.6524896091920146e-06, + "loss": 1.3594, + "step": 1147, + "train/speech_entropy": 3.796087816389521, + "train/text_entropy": 1.4852379885586826, + "train/token_acc": 0.3105175292153589 + }, + { + "epoch": 0.1699481865284974, + "grad_norm": 14.073445320129395, + "learning_rate": 4.645333930571394e-06, + "loss": 1.3984, + "step": 1148, + "train/speech_entropy": 4.077366452796437, + "train/text_entropy": 1.2083755674816312, + "train/token_acc": 0.273071104387292 + }, + { + "epoch": 0.1700962250185048, + "grad_norm": 13.591127395629883, + "learning_rate": 4.638180493212907e-06, + "loss": 0.998, + "step": 1149, + "train/speech_entropy": 3.9389973372864207, + "train/text_entropy": 1.1922481678150318, + "train/token_acc": 0.3045507584597433 + }, + { + "epoch": 0.1702442635085122, + "grad_norm": 14.823503494262695, + "learning_rate": 4.631029315875605e-06, + "loss": 0.8926, + "step": 1150, + "train/speech_entropy": 4.329081526131466, + "train/text_entropy": 0.934132645769817, + "train/token_acc": 0.28008998875140606 + }, + { + "epoch": 0.17039230199851962, + "grad_norm": 11.643534660339355, + "learning_rate": 4.62388041731262e-06, + "loss": 0.4971, + "step": 1151, + "train/speech_entropy": 3.719694323417468, + "train/text_entropy": 0.644437977965449, + "train/token_acc": 0.33012379642365886 + }, + { + "epoch": 0.170540340488527, + "grad_norm": 15.23923397064209, + "learning_rate": 4.616733816271101e-06, + "loss": 0.9004, + "step": 1152, + "train/speech_entropy": 3.73779640320413, + "train/text_entropy": 0.6086532029529546, + "train/token_acc": 0.33073929961089493 + }, + { + "epoch": 0.17068837897853442, + "grad_norm": 10.980063438415527, + "learning_rate": 4.609589531492181e-06, + "loss": 0.4453, + "step": 1153, + "train/speech_entropy": 3.6471367457404207, + "train/text_entropy": 0.5182998798511647, + "train/token_acc": 0.337431693989071 + }, + { + "epoch": 0.17083641746854183, + "grad_norm": 13.530533790588379, + "learning_rate": 4.602447581710911e-06, + "loss": 1.0723, + "step": 1154, + "train/speech_entropy": 4.2926188888626635, + "train/text_entropy": 1.3378822462899345, + "train/token_acc": 0.26787181594083814 + }, + { + "epoch": 0.17098445595854922, + "grad_norm": 24.94620132446289, + "learning_rate": 4.595307985656225e-06, + "loss": 1.5273, + "step": 1155, + "train/speech_entropy": 3.542756873750526, + "train/text_entropy": 1.297321286975828, + "train/token_acc": 0.279887482419128 + }, + { + "epoch": 0.17113249444855663, + "grad_norm": 14.348893165588379, + "learning_rate": 4.5881707620508745e-06, + "loss": 0.8838, + "step": 1156, + "train/speech_entropy": 3.6093779390925964, + "train/text_entropy": 0.9374775856918429, + "train/token_acc": 0.30739795918367346 + }, + { + "epoch": 0.17128053293856402, + "grad_norm": 14.695094108581543, + "learning_rate": 4.581035929611404e-06, + "loss": 0.7402, + "step": 1157, + "train/speech_entropy": 3.8205190683340096, + "train/text_entropy": 0.6891668030104712, + "train/token_acc": 0.32362122788761705 + }, + { + "epoch": 0.17142857142857143, + "grad_norm": 17.67706871032715, + "learning_rate": 4.573903507048074e-06, + "loss": 1.0547, + "step": 1158, + "train/speech_entropy": 4.05688242470694, + "train/text_entropy": 0.7803708384473036, + "train/token_acc": 0.32805429864253394 + }, + { + "epoch": 0.17157660991857884, + "grad_norm": 15.62559986114502, + "learning_rate": 4.566773513064834e-06, + "loss": 0.7842, + "step": 1159, + "train/speech_entropy": 3.785909055864982, + "train/text_entropy": 0.551480285520476, + "train/token_acc": 0.343144848954299 + }, + { + "epoch": 0.17172464840858623, + "grad_norm": 18.699153900146484, + "learning_rate": 4.559645966359263e-06, + "loss": 0.9512, + "step": 1160, + "train/speech_entropy": 3.9460108280181885, + "train/text_entropy": 1.0975459769920066, + "train/token_acc": 0.2693548387096774 + }, + { + "epoch": 0.17187268689859364, + "grad_norm": 30.141481399536133, + "learning_rate": 4.552520885622519e-06, + "loss": 2.0586, + "step": 1161, + "train/speech_entropy": 4.09983112544366, + "train/text_entropy": 1.512331719491996, + "train/token_acc": 0.2845360824742268 + }, + { + "epoch": 0.17202072538860103, + "grad_norm": 13.327495574951172, + "learning_rate": 4.545398289539297e-06, + "loss": 1.543, + "step": 1162, + "train/speech_entropy": 4.130309608149792, + "train/text_entropy": 1.4780057958654456, + "train/token_acc": 0.29637681159420287 + }, + { + "epoch": 0.17216876387860844, + "grad_norm": 14.505005836486816, + "learning_rate": 4.538278196787773e-06, + "loss": 1.2109, + "step": 1163, + "train/speech_entropy": 4.298662282590784, + "train/text_entropy": 1.1553472474563953, + "train/token_acc": 0.3013066871637202 + }, + { + "epoch": 0.17231680236861585, + "grad_norm": 14.724333763122559, + "learning_rate": 4.531160626039563e-06, + "loss": 1.4492, + "step": 1164, + "train/speech_entropy": 4.192397313293061, + "train/text_entropy": 1.3653953738328888, + "train/token_acc": 0.2634836427939876 + }, + { + "epoch": 0.17246484085862324, + "grad_norm": 12.314724922180176, + "learning_rate": 4.524045595959665e-06, + "loss": 0.8589, + "step": 1165, + "train/speech_entropy": 4.104710502998909, + "train/text_entropy": 0.9598618589375354, + "train/token_acc": 0.3145869947275923 + }, + { + "epoch": 0.17261287934863065, + "grad_norm": 15.10959529876709, + "learning_rate": 4.5169331252064165e-06, + "loss": 1.3867, + "step": 1166, + "train/speech_entropy": 4.275033158354226, + "train/text_entropy": 1.450902790783747, + "train/token_acc": 0.26540284360189575 + }, + { + "epoch": 0.17276091783863803, + "grad_norm": 8.450788497924805, + "learning_rate": 4.509823232431441e-06, + "loss": 0.4355, + "step": 1167, + "train/speech_entropy": 3.833602888953877, + "train/text_entropy": 0.46084617899957103, + "train/token_acc": 0.32550043516100957 + }, + { + "epoch": 0.17290895632864545, + "grad_norm": 14.706772804260254, + "learning_rate": 4.502715936279606e-06, + "loss": 1.6836, + "step": 1168, + "train/speech_entropy": 4.476423564710115, + "train/text_entropy": 1.4796245574951172, + "train/token_acc": 0.2438186813186813 + }, + { + "epoch": 0.17305699481865286, + "grad_norm": 19.161035537719727, + "learning_rate": 4.4956112553889665e-06, + "loss": 1.3516, + "step": 1169, + "train/speech_entropy": 4.209102025525323, + "train/text_entropy": 1.44476198049692, + "train/token_acc": 0.2676056338028169 + }, + { + "epoch": 0.17320503330866024, + "grad_norm": 22.07659149169922, + "learning_rate": 4.488509208390719e-06, + "loss": 1.4844, + "step": 1170, + "train/speech_entropy": 4.1016578851744185, + "train/text_entropy": 1.3652174161828083, + "train/token_acc": 0.2739463601532567 + }, + { + "epoch": 0.17335307179866766, + "grad_norm": 32.21261978149414, + "learning_rate": 4.481409813909153e-06, + "loss": 2.5312, + "step": 1171, + "train/speech_entropy": 4.1091611341076755, + "train/text_entropy": 2.3126790263865256, + "train/token_acc": 0.24267782426778242 + }, + { + "epoch": 0.17350111028867504, + "grad_norm": 15.505005836486816, + "learning_rate": 4.474313090561605e-06, + "loss": 0.7617, + "step": 1172, + "train/speech_entropy": 3.639272663450461, + "train/text_entropy": 0.6917330915756648, + "train/token_acc": 0.32507149666348906 + }, + { + "epoch": 0.17364914877868246, + "grad_norm": 8.199718475341797, + "learning_rate": 4.467219056958404e-06, + "loss": 0.5146, + "step": 1173, + "train/speech_entropy": 3.57451597892198, + "train/text_entropy": 0.726310899104978, + "train/token_acc": 0.35443037974683544 + }, + { + "epoch": 0.17379718726868987, + "grad_norm": 16.347389221191406, + "learning_rate": 4.4601277317028245e-06, + "loss": 1.625, + "step": 1174, + "train/speech_entropy": 4.262875146985462, + "train/text_entropy": 1.761984566661799, + "train/token_acc": 0.2685609532538955 + }, + { + "epoch": 0.17394522575869725, + "grad_norm": 19.404861450195312, + "learning_rate": 4.453039133391043e-06, + "loss": 1.3125, + "step": 1175, + "train/speech_entropy": 4.106949825321608, + "train/text_entropy": 1.1282504313700907, + "train/token_acc": 0.2696969696969697 + }, + { + "epoch": 0.17409326424870467, + "grad_norm": 16.579898834228516, + "learning_rate": 4.445953280612081e-06, + "loss": 1.7969, + "step": 1176, + "train/speech_entropy": 4.439809095139472, + "train/text_entropy": 1.8666370533130787, + "train/token_acc": 0.23623693379790942 + }, + { + "epoch": 0.17424130273871208, + "grad_norm": 6.248777866363525, + "learning_rate": 4.43887019194776e-06, + "loss": 0.2471, + "step": 1177, + "train/speech_entropy": 3.466531021588323, + "train/text_entropy": 0.3227402938039679, + "train/token_acc": 0.37942477876106195 + }, + { + "epoch": 0.17438934122871946, + "grad_norm": 17.50254249572754, + "learning_rate": 4.431789885972656e-06, + "loss": 1.4531, + "step": 1178, + "train/speech_entropy": 4.149545076905953, + "train/text_entropy": 1.4413973657708419, + "train/token_acc": 0.25534308211473566 + }, + { + "epoch": 0.17453737971872688, + "grad_norm": 21.045591354370117, + "learning_rate": 4.424712381254046e-06, + "loss": 1.2383, + "step": 1179, + "train/speech_entropy": 3.882052055694772, + "train/text_entropy": 1.3463363647460938, + "train/token_acc": 0.28739495798319326 + }, + { + "epoch": 0.17468541820873426, + "grad_norm": 21.372900009155273, + "learning_rate": 4.417637696351861e-06, + "loss": 1.1855, + "step": 1180, + "train/speech_entropy": 3.800218246075872, + "train/text_entropy": 1.2958729619565217, + "train/token_acc": 0.3188405797101449 + }, + { + "epoch": 0.17483345669874167, + "grad_norm": 19.19580841064453, + "learning_rate": 4.4105658498186335e-06, + "loss": 1.4121, + "step": 1181, + "train/speech_entropy": 3.6617256604981376, + "train/text_entropy": 1.2358980178833008, + "train/token_acc": 0.32585949177877427 + }, + { + "epoch": 0.1749814951887491, + "grad_norm": 14.474696159362793, + "learning_rate": 4.403496860199463e-06, + "loss": 1.3203, + "step": 1182, + "train/speech_entropy": 4.001173539595171, + "train/text_entropy": 1.4149283669211647, + "train/token_acc": 0.30303030303030304 + }, + { + "epoch": 0.17512953367875647, + "grad_norm": 16.71173095703125, + "learning_rate": 4.396430746031948e-06, + "loss": 0.6416, + "step": 1183, + "train/speech_entropy": 3.6699305419921875, + "train/text_entropy": 0.6550889671395678, + "train/token_acc": 0.3169129720853859 + }, + { + "epoch": 0.17527757216876388, + "grad_norm": 14.387316703796387, + "learning_rate": 4.389367525846147e-06, + "loss": 1.0869, + "step": 1184, + "train/speech_entropy": 4.270934251961729, + "train/text_entropy": 1.5060878257228905, + "train/token_acc": 0.2708585247883918 + }, + { + "epoch": 0.17542561065877127, + "grad_norm": 16.90350341796875, + "learning_rate": 4.382307218164533e-06, + "loss": 1.5898, + "step": 1185, + "train/speech_entropy": 4.098143058749246, + "train/text_entropy": 1.3149461534288194, + "train/token_acc": 0.301707779886148 + }, + { + "epoch": 0.17557364914877868, + "grad_norm": 33.87230682373047, + "learning_rate": 4.375249841501943e-06, + "loss": 1.0215, + "step": 1186, + "train/speech_entropy": 3.887887918110341, + "train/text_entropy": 0.9975529063831676, + "train/token_acc": 0.2763385146804836 + }, + { + "epoch": 0.1757216876387861, + "grad_norm": 16.25630760192871, + "learning_rate": 4.368195414365519e-06, + "loss": 0.5371, + "step": 1187, + "train/speech_entropy": 3.7260438661317568, + "train/text_entropy": 0.41167135718915104, + "train/token_acc": 0.345821325648415 + }, + { + "epoch": 0.17586972612879348, + "grad_norm": 19.256723403930664, + "learning_rate": 4.361143955254678e-06, + "loss": 1.1914, + "step": 1188, + "train/speech_entropy": 4.193696426007918, + "train/text_entropy": 1.096898885873648, + "train/token_acc": 0.2753623188405797 + }, + { + "epoch": 0.1760177646188009, + "grad_norm": 16.841583251953125, + "learning_rate": 4.354095482661047e-06, + "loss": 1.1445, + "step": 1189, + "train/speech_entropy": 4.006563762644101, + "train/text_entropy": 1.0760853675103956, + "train/token_acc": 0.2673559822747415 + }, + { + "epoch": 0.17616580310880828, + "grad_norm": 13.35125732421875, + "learning_rate": 4.347050015068425e-06, + "loss": 0.6523, + "step": 1190, + "train/speech_entropy": 3.3301429580486817, + "train/text_entropy": 0.5151963957941345, + "train/token_acc": 0.4201550387596899 + }, + { + "epoch": 0.1763138415988157, + "grad_norm": 10.43675708770752, + "learning_rate": 4.34000757095273e-06, + "loss": 0.6406, + "step": 1191, + "train/speech_entropy": 3.676374066184687, + "train/text_entropy": 0.6264140542629546, + "train/token_acc": 0.3500482160077146 + }, + { + "epoch": 0.1764618800888231, + "grad_norm": 10.460848808288574, + "learning_rate": 4.332968168781948e-06, + "loss": 0.375, + "step": 1192, + "train/speech_entropy": 3.441908626233117, + "train/text_entropy": 0.4336381672847609, + "train/token_acc": 0.35852225020990763 + }, + { + "epoch": 0.1766099185788305, + "grad_norm": 14.630643844604492, + "learning_rate": 4.325931827016092e-06, + "loss": 0.5049, + "step": 1193, + "train/speech_entropy": 3.6129264886589105, + "train/text_entropy": 0.510677026242626, + "train/token_acc": 0.3392857142857143 + }, + { + "epoch": 0.1767579570688379, + "grad_norm": 7.907648086547852, + "learning_rate": 4.31889856410715e-06, + "loss": 0.5913, + "step": 1194, + "train/speech_entropy": 4.335753041731176, + "train/text_entropy": 0.892349375145776, + "train/token_acc": 0.31786074672048437 + }, + { + "epoch": 0.17690599555884529, + "grad_norm": 13.08505630493164, + "learning_rate": 4.311868398499033e-06, + "loss": 1.001, + "step": 1195, + "train/speech_entropy": 4.240092604907591, + "train/text_entropy": 1.0928947611058013, + "train/token_acc": 0.2739097169089518 + }, + { + "epoch": 0.1770540340488527, + "grad_norm": 17.619537353515625, + "learning_rate": 4.304841348627527e-06, + "loss": 1.4648, + "step": 1196, + "train/speech_entropy": 4.1924320127905865, + "train/text_entropy": 1.615531497531467, + "train/token_acc": 0.2666005946481665 + }, + { + "epoch": 0.1772020725388601, + "grad_norm": 20.909530639648438, + "learning_rate": 4.297817432920256e-06, + "loss": 2.2109, + "step": 1197, + "train/speech_entropy": 4.099021476785136, + "train/text_entropy": 2.2103749563002726, + "train/token_acc": 0.24760892667375134 + }, + { + "epoch": 0.1773501110288675, + "grad_norm": 12.290267944335938, + "learning_rate": 4.2907966697966186e-06, + "loss": 0.8652, + "step": 1198, + "train/speech_entropy": 4.1372945121754645, + "train/text_entropy": 0.9925686636088807, + "train/token_acc": 0.27896512935883017 + }, + { + "epoch": 0.1774981495188749, + "grad_norm": 12.003408432006836, + "learning_rate": 4.283779077667747e-06, + "loss": 0.9736, + "step": 1199, + "train/speech_entropy": 3.9006614685058594, + "train/text_entropy": 0.9927332951472356, + "train/token_acc": 0.3141405588484335 + }, + { + "epoch": 0.17764618800888232, + "grad_norm": 17.633216857910156, + "learning_rate": 4.2767646749364574e-06, + "loss": 1.0195, + "step": 1200, + "train/speech_entropy": 3.7887609039933103, + "train/text_entropy": 0.7609135622234013, + "train/token_acc": 0.33567251461988307 + }, + { + "epoch": 0.1777942264988897, + "grad_norm": 17.463396072387695, + "learning_rate": 4.2697534799972065e-06, + "loss": 1.5, + "step": 1201, + "train/speech_entropy": 4.227721010904177, + "train/text_entropy": 1.4915531181289765, + "train/token_acc": 0.2344213649851632 + }, + { + "epoch": 0.17794226498889712, + "grad_norm": 18.77127456665039, + "learning_rate": 4.262745511236032e-06, + "loss": 1.3301, + "step": 1202, + "train/speech_entropy": 4.219320348791174, + "train/text_entropy": 1.3834133763467111, + "train/token_acc": 0.23743016759776536 + }, + { + "epoch": 0.1780903034789045, + "grad_norm": 14.030489921569824, + "learning_rate": 4.255740787030518e-06, + "loss": 0.8066, + "step": 1203, + "train/speech_entropy": 3.750682146426482, + "train/text_entropy": 0.7014342517387576, + "train/token_acc": 0.35031055900621116 + }, + { + "epoch": 0.17823834196891192, + "grad_norm": 15.677839279174805, + "learning_rate": 4.248739325749736e-06, + "loss": 0.9414, + "step": 1204, + "train/speech_entropy": 3.7276250161415287, + "train/text_entropy": 0.7511586386059957, + "train/token_acc": 0.2859097127222982 + }, + { + "epoch": 0.17838638045891933, + "grad_norm": 16.53599739074707, + "learning_rate": 4.2417411457542e-06, + "loss": 1.2148, + "step": 1205, + "train/speech_entropy": 4.113827462923729, + "train/text_entropy": 1.371155758561759, + "train/token_acc": 0.2619647355163728 + }, + { + "epoch": 0.17853441894892672, + "grad_norm": 14.484917640686035, + "learning_rate": 4.234746265395825e-06, + "loss": 1.4336, + "step": 1206, + "train/speech_entropy": 4.346955249696757, + "train/text_entropy": 1.0340316078879617, + "train/token_acc": 0.2618849040867389 + }, + { + "epoch": 0.17868245743893413, + "grad_norm": 17.315431594848633, + "learning_rate": 4.2277547030178666e-06, + "loss": 1.9297, + "step": 1207, + "train/speech_entropy": 4.346293422154018, + "train/text_entropy": 1.7957654206649116, + "train/token_acc": 0.26611570247933886 + }, + { + "epoch": 0.1788304959289415, + "grad_norm": 14.510443687438965, + "learning_rate": 4.220766476954882e-06, + "loss": 1.1025, + "step": 1208, + "train/speech_entropy": 3.796566041758363, + "train/text_entropy": 0.9493688839917992, + "train/token_acc": 0.300794551645857 + }, + { + "epoch": 0.17897853441894893, + "grad_norm": 11.907999992370605, + "learning_rate": 4.213781605532681e-06, + "loss": 0.7993, + "step": 1209, + "train/speech_entropy": 3.879751231907017, + "train/text_entropy": 0.7713206286997522, + "train/token_acc": 0.31443298969072164 + }, + { + "epoch": 0.17912657290895634, + "grad_norm": 14.549643516540527, + "learning_rate": 4.206800107068275e-06, + "loss": 0.7773, + "step": 1210, + "train/speech_entropy": 4.4196891632688, + "train/text_entropy": 0.9209761061109938, + "train/token_acc": 0.26916802610114193 + }, + { + "epoch": 0.17927461139896372, + "grad_norm": 21.458877563476562, + "learning_rate": 4.199821999869826e-06, + "loss": 1.5977, + "step": 1211, + "train/speech_entropy": 3.7726843607009153, + "train/text_entropy": 1.6242810101576255, + "train/token_acc": 0.2732166890982503 + }, + { + "epoch": 0.17942264988897114, + "grad_norm": 19.317670822143555, + "learning_rate": 4.19284730223661e-06, + "loss": 1.7148, + "step": 1212, + "train/speech_entropy": 3.9872506916765604, + "train/text_entropy": 1.8274705242958798, + "train/token_acc": 0.2639821029082774 + }, + { + "epoch": 0.17957068837897852, + "grad_norm": 14.914149284362793, + "learning_rate": 4.18587603245896e-06, + "loss": 0.9434, + "step": 1213, + "train/speech_entropy": 3.9664591270910305, + "train/text_entropy": 0.9930897631676369, + "train/token_acc": 0.3123359580052493 + }, + { + "epoch": 0.17971872686898593, + "grad_norm": 16.327655792236328, + "learning_rate": 4.1789082088182135e-06, + "loss": 1.6953, + "step": 1214, + "train/speech_entropy": 4.530711795535125, + "train/text_entropy": 1.950961124195772, + "train/token_acc": 0.21479885057471265 + }, + { + "epoch": 0.17986676535899335, + "grad_norm": 8.546072006225586, + "learning_rate": 4.171943849586678e-06, + "loss": 0.3643, + "step": 1215, + "train/speech_entropy": 3.4754026357767285, + "train/text_entropy": 0.3835141923692491, + "train/token_acc": 0.35389888603256214 + }, + { + "epoch": 0.18001480384900073, + "grad_norm": 18.51434326171875, + "learning_rate": 4.164982973027576e-06, + "loss": 2.1367, + "step": 1216, + "train/speech_entropy": 4.2283143195917425, + "train/text_entropy": 1.9572654947576165, + "train/token_acc": 0.23950447350309703 + }, + { + "epoch": 0.18016284233900814, + "grad_norm": 17.52898406982422, + "learning_rate": 4.158025597394991e-06, + "loss": 1.3516, + "step": 1217, + "train/speech_entropy": 3.7998079633982167, + "train/text_entropy": 1.346176444710075, + "train/token_acc": 0.27958236658932717 + }, + { + "epoch": 0.18031088082901556, + "grad_norm": 15.46154499053955, + "learning_rate": 4.1510717409338305e-06, + "loss": 1.3047, + "step": 1218, + "train/speech_entropy": 4.3400880601671, + "train/text_entropy": 1.2934688113514006, + "train/token_acc": 0.30120481927710846 + }, + { + "epoch": 0.18045891931902294, + "grad_norm": 11.953706741333008, + "learning_rate": 4.1441214218797745e-06, + "loss": 0.7773, + "step": 1219, + "train/speech_entropy": 3.770057981531919, + "train/text_entropy": 0.7954547925927173, + "train/token_acc": 0.3357487922705314 + }, + { + "epoch": 0.18060695780903036, + "grad_norm": 16.166101455688477, + "learning_rate": 4.137174658459223e-06, + "loss": 1.7773, + "step": 1220, + "train/speech_entropy": 4.3953107027888985, + "train/text_entropy": 1.533453589087134, + "train/token_acc": 0.2547318611987382 + }, + { + "epoch": 0.18075499629903774, + "grad_norm": 11.20684814453125, + "learning_rate": 4.130231468889252e-06, + "loss": 1.3047, + "step": 1221, + "train/speech_entropy": 4.285718358205464, + "train/text_entropy": 1.3678363923103578, + "train/token_acc": 0.2799188640973631 + }, + { + "epoch": 0.18090303478904515, + "grad_norm": 17.78098487854004, + "learning_rate": 4.123291871377568e-06, + "loss": 1.1895, + "step": 1222, + "train/speech_entropy": 4.061849267059465, + "train/text_entropy": 1.0736918449401855, + "train/token_acc": 0.2617534942820839 + }, + { + "epoch": 0.18105107327905257, + "grad_norm": 17.599912643432617, + "learning_rate": 4.116355884122458e-06, + "loss": 1.1074, + "step": 1223, + "train/speech_entropy": 3.8419827670517304, + "train/text_entropy": 0.8196198566850409, + "train/token_acc": 0.3136094674556213 + }, + { + "epoch": 0.18119911176905995, + "grad_norm": 11.208226203918457, + "learning_rate": 4.109423525312738e-06, + "loss": 1.1914, + "step": 1224, + "train/speech_entropy": 4.234014509733237, + "train/text_entropy": 1.2132438618869068, + "train/token_acc": 0.3077796706416809 + }, + { + "epoch": 0.18134715025906736, + "grad_norm": 17.169954299926758, + "learning_rate": 4.1024948131277095e-06, + "loss": 1.1348, + "step": 1225, + "train/speech_entropy": 4.170446325231482, + "train/text_entropy": 1.5270121457765429, + "train/token_acc": 0.2617534942820839 + }, + { + "epoch": 0.18149518874907475, + "grad_norm": 17.918516159057617, + "learning_rate": 4.095569765737117e-06, + "loss": 1.582, + "step": 1226, + "train/speech_entropy": 4.2684357837325715, + "train/text_entropy": 1.568403995398319, + "train/token_acc": 0.2631578947368421 + }, + { + "epoch": 0.18164322723908216, + "grad_norm": 13.141963958740234, + "learning_rate": 4.0886484013010845e-06, + "loss": 0.916, + "step": 1227, + "train/speech_entropy": 4.005674456605817, + "train/text_entropy": 0.8980891014918808, + "train/token_acc": 0.28993610223642174 + }, + { + "epoch": 0.18179126572908957, + "grad_norm": 15.099160194396973, + "learning_rate": 4.081730737970088e-06, + "loss": 1.5898, + "step": 1228, + "train/speech_entropy": 4.317025733219583, + "train/text_entropy": 1.6680367942748031, + "train/token_acc": 0.248403122782115 + }, + { + "epoch": 0.18193930421909696, + "grad_norm": 13.172755241394043, + "learning_rate": 4.074816793884889e-06, + "loss": 1.1445, + "step": 1229, + "train/speech_entropy": 4.067222152217742, + "train/text_entropy": 1.2688338216145834, + "train/token_acc": 0.2891774891774892 + }, + { + "epoch": 0.18208734270910437, + "grad_norm": 12.473115921020508, + "learning_rate": 4.067906587176503e-06, + "loss": 0.7246, + "step": 1230, + "train/speech_entropy": 3.808201381138393, + "train/text_entropy": 0.7437351111209753, + "train/token_acc": 0.3238566131025958 + }, + { + "epoch": 0.18223538119911176, + "grad_norm": 14.237232208251953, + "learning_rate": 4.061000135966141e-06, + "loss": 1.1309, + "step": 1231, + "train/speech_entropy": 4.2561212184625274, + "train/text_entropy": 1.2943499850243638, + "train/token_acc": 0.2558139534883721 + }, + { + "epoch": 0.18238341968911917, + "grad_norm": 20.315587997436523, + "learning_rate": 4.054097458365166e-06, + "loss": 2.0078, + "step": 1232, + "train/speech_entropy": 4.239570020592731, + "train/text_entropy": 1.5680448358709163, + "train/token_acc": 0.24087591240875914 + }, + { + "epoch": 0.18253145817912658, + "grad_norm": 13.845185279846191, + "learning_rate": 4.047198572475043e-06, + "loss": 1.1621, + "step": 1233, + "train/speech_entropy": 4.235290785967293, + "train/text_entropy": 1.2436220633551247, + "train/token_acc": 0.23746701846965698 + }, + { + "epoch": 0.18267949666913397, + "grad_norm": 15.10904312133789, + "learning_rate": 4.0403034963873e-06, + "loss": 0.6943, + "step": 1234, + "train/speech_entropy": 3.9150669761892063, + "train/text_entropy": 1.188954878860796, + "train/token_acc": 0.30494505494505497 + }, + { + "epoch": 0.18282753515914138, + "grad_norm": 16.41204261779785, + "learning_rate": 4.033412248183468e-06, + "loss": 1.248, + "step": 1235, + "train/speech_entropy": 4.34082959361912, + "train/text_entropy": 1.6970788138253348, + "train/token_acc": 0.26603575184016826 + }, + { + "epoch": 0.18297557364914876, + "grad_norm": 22.89790916442871, + "learning_rate": 4.02652484593504e-06, + "loss": 2.4141, + "step": 1236, + "train/speech_entropy": 3.7481881886469615, + "train/text_entropy": 2.53583668854277, + "train/token_acc": 0.24801587301587302 + }, + { + "epoch": 0.18312361213915618, + "grad_norm": 9.639363288879395, + "learning_rate": 4.019641307703429e-06, + "loss": 0.7393, + "step": 1237, + "train/speech_entropy": 3.8662671660243313, + "train/text_entropy": 0.7599779890708798, + "train/token_acc": 0.30442692540933897 + }, + { + "epoch": 0.1832716506291636, + "grad_norm": 12.60541820526123, + "learning_rate": 4.012761651539911e-06, + "loss": 1.3281, + "step": 1238, + "train/speech_entropy": 4.302314509357005, + "train/text_entropy": 1.2073181632935532, + "train/token_acc": 0.27237654320987653 + }, + { + "epoch": 0.18341968911917098, + "grad_norm": 13.398696899414062, + "learning_rate": 4.0058858954855805e-06, + "loss": 1.4531, + "step": 1239, + "train/speech_entropy": 4.354928544780798, + "train/text_entropy": 1.5878974734328863, + "train/token_acc": 0.25466666666666665 + }, + { + "epoch": 0.1835677276091784, + "grad_norm": 13.561866760253906, + "learning_rate": 3.9990140575713074e-06, + "loss": 1.0, + "step": 1240, + "train/speech_entropy": 4.866911102453044, + "train/text_entropy": 1.0344319124834254, + "train/token_acc": 0.2519019442096365 + }, + { + "epoch": 0.1837157660991858, + "grad_norm": 13.417729377746582, + "learning_rate": 3.992146155817687e-06, + "loss": 0.832, + "step": 1241, + "train/speech_entropy": 4.47319915077903, + "train/text_entropy": 0.8768395295014253, + "train/token_acc": 0.2890888638920135 + }, + { + "epoch": 0.18386380458919319, + "grad_norm": 12.701887130737305, + "learning_rate": 3.985282208234986e-06, + "loss": 0.6865, + "step": 1242, + "train/speech_entropy": 3.8541340335071608, + "train/text_entropy": 0.6407593195162826, + "train/token_acc": 0.3333333333333333 + }, + { + "epoch": 0.1840118430792006, + "grad_norm": 16.973752975463867, + "learning_rate": 3.978422232823113e-06, + "loss": 1.1953, + "step": 1243, + "train/speech_entropy": 4.443207937967328, + "train/text_entropy": 1.2684230389802351, + "train/token_acc": 0.26128016789087094 + }, + { + "epoch": 0.18415988156920798, + "grad_norm": 16.780014038085938, + "learning_rate": 3.9715662475715485e-06, + "loss": 1.043, + "step": 1244, + "train/speech_entropy": 4.128169475536872, + "train/text_entropy": 1.2518452809269267, + "train/token_acc": 0.264 + }, + { + "epoch": 0.1843079200592154, + "grad_norm": 18.210651397705078, + "learning_rate": 3.964714270459318e-06, + "loss": 2.2891, + "step": 1245, + "train/speech_entropy": 4.741942336796466, + "train/text_entropy": 2.1696990835820147, + "train/token_acc": 0.2356902356902357 + }, + { + "epoch": 0.1844559585492228, + "grad_norm": 14.297505378723145, + "learning_rate": 3.957866319454931e-06, + "loss": 1.25, + "step": 1246, + "train/speech_entropy": 4.4714160505438025, + "train/text_entropy": 1.3074139569256757, + "train/token_acc": 0.2451171875 + }, + { + "epoch": 0.1846039970392302, + "grad_norm": 9.963027000427246, + "learning_rate": 3.951022412516342e-06, + "loss": 0.2888, + "step": 1247, + "train/speech_entropy": 3.794817879086449, + "train/text_entropy": 0.4093599955240885, + "train/token_acc": 0.33814102564102566 + }, + { + "epoch": 0.1847520355292376, + "grad_norm": 14.223852157592773, + "learning_rate": 3.944182567590897e-06, + "loss": 1.5117, + "step": 1248, + "train/speech_entropy": 4.19327258816868, + "train/text_entropy": 1.4957342579531572, + "train/token_acc": 0.25392296718972895 + }, + { + "epoch": 0.184900074019245, + "grad_norm": 17.65026092529297, + "learning_rate": 3.937346802615295e-06, + "loss": 1.582, + "step": 1249, + "train/speech_entropy": 4.518037047567247, + "train/text_entropy": 1.5745108032226562, + "train/token_acc": 0.2508710801393728 + }, + { + "epoch": 0.1850481125092524, + "grad_norm": 16.82684898376465, + "learning_rate": 3.930515135515532e-06, + "loss": 1.3359, + "step": 1250, + "train/speech_entropy": 4.467894894393845, + "train/text_entropy": 1.6523895702142826, + "train/token_acc": 0.24020618556701032 + }, + { + "epoch": 0.18519615099925982, + "grad_norm": 14.416971206665039, + "learning_rate": 3.923687584206855e-06, + "loss": 0.6162, + "step": 1251, + "train/speech_entropy": 3.6153678823269364, + "train/text_entropy": 0.5951177535518524, + "train/token_acc": 0.31221719457013575 + }, + { + "epoch": 0.1853441894892672, + "grad_norm": 15.635307312011719, + "learning_rate": 3.916864166593724e-06, + "loss": 0.9844, + "step": 1252, + "train/speech_entropy": 4.086850241506743, + "train/text_entropy": 0.9735360959681069, + "train/token_acc": 0.26719278466741825 + }, + { + "epoch": 0.18549222797927462, + "grad_norm": 13.205692291259766, + "learning_rate": 3.910044900569757e-06, + "loss": 0.5684, + "step": 1253, + "train/speech_entropy": 3.626955963844477, + "train/text_entropy": 0.6010696505322869, + "train/token_acc": 0.35340314136125656 + }, + { + "epoch": 0.185640266469282, + "grad_norm": 7.535663604736328, + "learning_rate": 3.903229804017684e-06, + "loss": 0.334, + "step": 1254, + "train/speech_entropy": 3.7834526783109683, + "train/text_entropy": 0.39534958671121034, + "train/token_acc": 0.3393025447690858 + }, + { + "epoch": 0.1857883049592894, + "grad_norm": 17.99622917175293, + "learning_rate": 3.8964188948092974e-06, + "loss": 1.1934, + "step": 1255, + "train/speech_entropy": 4.45071191265535, + "train/text_entropy": 1.2958394527435302, + "train/token_acc": 0.2459016393442623 + }, + { + "epoch": 0.18593634344929683, + "grad_norm": 15.907035827636719, + "learning_rate": 3.8896121908054194e-06, + "loss": 1.4258, + "step": 1256, + "train/speech_entropy": 4.164142331952289, + "train/text_entropy": 1.3343757103229392, + "train/token_acc": 0.24433497536945814 + }, + { + "epoch": 0.1860843819393042, + "grad_norm": 15.820682525634766, + "learning_rate": 3.882809709855833e-06, + "loss": 1.3789, + "step": 1257, + "train/speech_entropy": 4.421406565637243, + "train/text_entropy": 1.504853621773098, + "train/token_acc": 0.2717825739408473 + }, + { + "epoch": 0.18623242042931162, + "grad_norm": 16.24913787841797, + "learning_rate": 3.87601146979925e-06, + "loss": 1.2148, + "step": 1258, + "train/speech_entropy": 4.267829189015858, + "train/text_entropy": 1.2188019222683377, + "train/token_acc": 0.2751842751842752 + }, + { + "epoch": 0.18638045891931904, + "grad_norm": 9.670218467712402, + "learning_rate": 3.869217488463269e-06, + "loss": 0.7651, + "step": 1259, + "train/speech_entropy": 4.107717841522502, + "train/text_entropy": 0.9721950985136486, + "train/token_acc": 0.28266438941076005 + }, + { + "epoch": 0.18652849740932642, + "grad_norm": 13.997713088989258, + "learning_rate": 3.862427783664308e-06, + "loss": 0.9131, + "step": 1260, + "train/speech_entropy": 3.762310457084679, + "train/text_entropy": 1.068328937450489, + "train/token_acc": 0.2833957553058677 + }, + { + "epoch": 0.18667653589933383, + "grad_norm": 14.105849266052246, + "learning_rate": 3.8556423732075776e-06, + "loss": 0.5762, + "step": 1261, + "train/speech_entropy": 3.6890472129539207, + "train/text_entropy": 0.7990606748140775, + "train/token_acc": 0.30975143403441685 + }, + { + "epoch": 0.18682457438934122, + "grad_norm": 16.885019302368164, + "learning_rate": 3.848861274887026e-06, + "loss": 0.7188, + "step": 1262, + "train/speech_entropy": 4.096953491210938, + "train/text_entropy": 0.554730305906202, + "train/token_acc": 0.28938906752411575 + }, + { + "epoch": 0.18697261287934863, + "grad_norm": 17.0046443939209, + "learning_rate": 3.842084506485295e-06, + "loss": 1.5117, + "step": 1263, + "train/speech_entropy": 4.468966216043709, + "train/text_entropy": 1.779133451157722, + "train/token_acc": 0.23555555555555555 + }, + { + "epoch": 0.18712065136935604, + "grad_norm": 15.98563003540039, + "learning_rate": 3.835312085773667e-06, + "loss": 1.4141, + "step": 1264, + "train/speech_entropy": 4.252303626466904, + "train/text_entropy": 1.6043094137440557, + "train/token_acc": 0.26570458404074704 + }, + { + "epoch": 0.18726868985936343, + "grad_norm": 20.896724700927734, + "learning_rate": 3.828544030512028e-06, + "loss": 1.2402, + "step": 1265, + "train/speech_entropy": 4.190060822860055, + "train/text_entropy": 1.3974007428702662, + "train/token_acc": 0.28525641025641024 + }, + { + "epoch": 0.18741672834937084, + "grad_norm": 21.613248825073242, + "learning_rate": 3.821780358448811e-06, + "loss": 1.0469, + "step": 1266, + "train/speech_entropy": 3.9274017978721942, + "train/text_entropy": 1.4131633117135647, + "train/token_acc": 0.2657856093979442 + }, + { + "epoch": 0.18756476683937823, + "grad_norm": 17.43717384338379, + "learning_rate": 3.815021087320959e-06, + "loss": 1.5586, + "step": 1267, + "train/speech_entropy": 4.345161394491163, + "train/text_entropy": 1.524490686365076, + "train/token_acc": 0.2542372881355932 + }, + { + "epoch": 0.18771280532938564, + "grad_norm": 19.238216400146484, + "learning_rate": 3.8082662348538746e-06, + "loss": 1.5352, + "step": 1268, + "train/speech_entropy": 4.200166110700626, + "train/text_entropy": 1.59853610476932, + "train/token_acc": 0.2560553633217993 + }, + { + "epoch": 0.18786084381939305, + "grad_norm": 14.066154479980469, + "learning_rate": 3.8015158187613686e-06, + "loss": 0.4873, + "step": 1269, + "train/speech_entropy": 3.6443960420753925, + "train/text_entropy": 0.6220803260803223, + "train/token_acc": 0.3081180811808118 + }, + { + "epoch": 0.18800888230940044, + "grad_norm": 17.43730354309082, + "learning_rate": 3.7947698567456202e-06, + "loss": 1.3125, + "step": 1270, + "train/speech_entropy": 4.09479648195847, + "train/text_entropy": 1.356707271452873, + "train/token_acc": 0.2672209026128266 + }, + { + "epoch": 0.18815692079940785, + "grad_norm": 18.16974449157715, + "learning_rate": 3.7880283664971297e-06, + "loss": 2.1406, + "step": 1271, + "train/speech_entropy": 4.699209676493626, + "train/text_entropy": 2.1144657801914883, + "train/token_acc": 0.2390852390852391 + }, + { + "epoch": 0.18830495928941524, + "grad_norm": 19.203683853149414, + "learning_rate": 3.781291365694669e-06, + "loss": 1.2383, + "step": 1272, + "train/speech_entropy": 4.30761370475214, + "train/text_entropy": 1.379118171105018, + "train/token_acc": 0.259927797833935 + }, + { + "epoch": 0.18845299777942265, + "grad_norm": 23.844867706298828, + "learning_rate": 3.7745588720052352e-06, + "loss": 1.1055, + "step": 1273, + "train/speech_entropy": 3.92118393928036, + "train/text_entropy": 1.0292092300042874, + "train/token_acc": 0.32731648616125153 + }, + { + "epoch": 0.18860103626943006, + "grad_norm": 30.085535049438477, + "learning_rate": 3.767830903084011e-06, + "loss": 2.1719, + "step": 1274, + "train/speech_entropy": 4.467010159901965, + "train/text_entropy": 1.5561692875587136, + "train/token_acc": 0.2305084745762712 + }, + { + "epoch": 0.18874907475943745, + "grad_norm": 9.490952491760254, + "learning_rate": 3.7611074765743094e-06, + "loss": 0.6523, + "step": 1275, + "train/speech_entropy": 3.748256850173105, + "train/text_entropy": 0.6837804639661634, + "train/token_acc": 0.33967391304347827 + }, + { + "epoch": 0.18889711324944486, + "grad_norm": 17.611581802368164, + "learning_rate": 3.7543886101075312e-06, + "loss": 0.9238, + "step": 1276, + "train/speech_entropy": 4.145085982946091, + "train/text_entropy": 0.812063825891373, + "train/token_acc": 0.2643884892086331 + }, + { + "epoch": 0.18904515173945224, + "grad_norm": 12.81491470336914, + "learning_rate": 3.747674321303123e-06, + "loss": 1.2852, + "step": 1277, + "train/speech_entropy": 4.103770750926461, + "train/text_entropy": 1.0800733306976964, + "train/token_acc": 0.2835526315789474 + }, + { + "epoch": 0.18919319022945966, + "grad_norm": 10.427773475646973, + "learning_rate": 3.740964627768524e-06, + "loss": 0.4336, + "step": 1278, + "train/speech_entropy": 3.5831842201386204, + "train/text_entropy": 0.37318756349625126, + "train/token_acc": 0.3403990024937656 + }, + { + "epoch": 0.18934122871946707, + "grad_norm": 13.21310806274414, + "learning_rate": 3.7342595470991205e-06, + "loss": 0.9102, + "step": 1279, + "train/speech_entropy": 3.7583072159641473, + "train/text_entropy": 0.9865942426547882, + "train/token_acc": 0.300498753117207 + }, + { + "epoch": 0.18948926720947445, + "grad_norm": 18.411060333251953, + "learning_rate": 3.7275590968782084e-06, + "loss": 1.6328, + "step": 1280, + "train/speech_entropy": 4.056970572996256, + "train/text_entropy": 1.5686583746047247, + "train/token_acc": 0.2565922920892495 + }, + { + "epoch": 0.18963730569948187, + "grad_norm": 15.710298538208008, + "learning_rate": 3.7208632946769363e-06, + "loss": 1.4785, + "step": 1281, + "train/speech_entropy": 3.892355128193562, + "train/text_entropy": 1.454317972540311, + "train/token_acc": 0.3050691244239631 + }, + { + "epoch": 0.18978534418948928, + "grad_norm": 14.761791229248047, + "learning_rate": 3.714172158054262e-06, + "loss": 1.3711, + "step": 1282, + "train/speech_entropy": 4.176083952160403, + "train/text_entropy": 1.447979652718322, + "train/token_acc": 0.25859375 + }, + { + "epoch": 0.18993338267949667, + "grad_norm": 18.419031143188477, + "learning_rate": 3.7074857045569156e-06, + "loss": 1.4805, + "step": 1283, + "train/speech_entropy": 4.289302681587838, + "train/text_entropy": 1.357647214617048, + "train/token_acc": 0.2566079295154185 + }, + { + "epoch": 0.19008142116950408, + "grad_norm": 15.97525405883789, + "learning_rate": 3.7008039517193396e-06, + "loss": 1.248, + "step": 1284, + "train/speech_entropy": 4.639541764873212, + "train/text_entropy": 1.3889113772999158, + "train/token_acc": 0.2529100529100529 + }, + { + "epoch": 0.19022945965951146, + "grad_norm": 16.14881706237793, + "learning_rate": 3.6941269170636516e-06, + "loss": 1.3086, + "step": 1285, + "train/speech_entropy": 4.3007165460638666, + "train/text_entropy": 1.141277907730697, + "train/token_acc": 0.29658605974395447 + }, + { + "epoch": 0.19037749814951888, + "grad_norm": 12.361824035644531, + "learning_rate": 3.6874546180995994e-06, + "loss": 1.25, + "step": 1286, + "train/speech_entropy": 4.672672142172759, + "train/text_entropy": 1.3514647872359664, + "train/token_acc": 0.26090225563909775 + }, + { + "epoch": 0.1905255366395263, + "grad_norm": 10.954602241516113, + "learning_rate": 3.680787072324505e-06, + "loss": 0.8027, + "step": 1287, + "train/speech_entropy": 4.105428269999641, + "train/text_entropy": 0.6795043225558299, + "train/token_acc": 0.3174896169820028 + }, + { + "epoch": 0.19067357512953367, + "grad_norm": 14.426238059997559, + "learning_rate": 3.6741242972232326e-06, + "loss": 1.3789, + "step": 1288, + "train/speech_entropy": 3.606850102262677, + "train/text_entropy": 1.3233187315893955, + "train/token_acc": 0.3232600732600733 + }, + { + "epoch": 0.1908216136195411, + "grad_norm": 17.805294036865234, + "learning_rate": 3.6674663102681317e-06, + "loss": 1.3008, + "step": 1289, + "train/speech_entropy": 4.397803759637467, + "train/text_entropy": 1.5017994948423625, + "train/token_acc": 0.24973544973544973 + }, + { + "epoch": 0.19096965210954847, + "grad_norm": 11.201162338256836, + "learning_rate": 3.6608131289189985e-06, + "loss": 0.9395, + "step": 1290, + "train/speech_entropy": 4.214165412658474, + "train/text_entropy": 1.0709088046628215, + "train/token_acc": 0.2705210563882941 + }, + { + "epoch": 0.19111769059955588, + "grad_norm": 25.256376266479492, + "learning_rate": 3.6541647706230245e-06, + "loss": 2.2109, + "step": 1291, + "train/speech_entropy": 4.117143903459821, + "train/text_entropy": 2.1969980771849755, + "train/token_acc": 0.24639580602883354 + }, + { + "epoch": 0.1912657290895633, + "grad_norm": 17.57863426208496, + "learning_rate": 3.647521252814753e-06, + "loss": 1.4766, + "step": 1292, + "train/speech_entropy": 4.258849011621441, + "train/text_entropy": 1.2436171637641058, + "train/token_acc": 0.26993275696445723 + }, + { + "epoch": 0.19141376757957068, + "grad_norm": 18.398427963256836, + "learning_rate": 3.6408825929160387e-06, + "loss": 1.1765, + "step": 1293, + "train/speech_entropy": 3.8663807392822624, + "train/text_entropy": 0.9602812072377146, + "train/token_acc": 0.3103448275862069 + }, + { + "epoch": 0.1915618060695781, + "grad_norm": 12.667377471923828, + "learning_rate": 3.6342488083359895e-06, + "loss": 0.6631, + "step": 1294, + "train/speech_entropy": 3.891984183833284, + "train/text_entropy": 0.697448980612833, + "train/token_acc": 0.3200775945683802 + }, + { + "epoch": 0.19170984455958548, + "grad_norm": 16.80979347229004, + "learning_rate": 3.6276199164709326e-06, + "loss": 1.1758, + "step": 1295, + "train/speech_entropy": 4.467037538938884, + "train/text_entropy": 1.323630410271722, + "train/token_acc": 0.27307692307692305 + }, + { + "epoch": 0.1918578830495929, + "grad_norm": 17.543731689453125, + "learning_rate": 3.6209959347043655e-06, + "loss": 1.4375, + "step": 1296, + "train/speech_entropy": 4.243363358045408, + "train/text_entropy": 1.368450299431296, + "train/token_acc": 0.25048923679060664 + }, + { + "epoch": 0.1920059215396003, + "grad_norm": 14.501235961914062, + "learning_rate": 3.614376880406909e-06, + "loss": 1.3633, + "step": 1297, + "train/speech_entropy": 4.685324269731661, + "train/text_entropy": 1.1490118148479056, + "train/token_acc": 0.2641780330222541 + }, + { + "epoch": 0.1921539600296077, + "grad_norm": 15.582624435424805, + "learning_rate": 3.6077627709362597e-06, + "loss": 1.2695, + "step": 1298, + "train/speech_entropy": 4.353525813395817, + "train/text_entropy": 1.201742152163857, + "train/token_acc": 0.2972322503008424 + }, + { + "epoch": 0.1923019985196151, + "grad_norm": 16.55918312072754, + "learning_rate": 3.601153623637148e-06, + "loss": 1.0957, + "step": 1299, + "train/speech_entropy": 3.652435302734375, + "train/text_entropy": 1.0478289450368574, + "train/token_acc": 0.2998661311914324 + }, + { + "epoch": 0.19245003700962252, + "grad_norm": 14.67280387878418, + "learning_rate": 3.5945494558412943e-06, + "loss": 0.7827, + "step": 1300, + "train/speech_entropy": 3.801490345220456, + "train/text_entropy": 0.6624533874166887, + "train/token_acc": 0.33906071019473083 + }, + { + "epoch": 0.1925980754996299, + "grad_norm": 17.72713279724121, + "learning_rate": 3.5879502848673574e-06, + "loss": 1.543, + "step": 1301, + "train/speech_entropy": 4.376698890461988, + "train/text_entropy": 1.635573715460105, + "train/token_acc": 0.257661038148843 + }, + { + "epoch": 0.1927461139896373, + "grad_norm": 15.185938835144043, + "learning_rate": 3.581356128020893e-06, + "loss": 1.0898, + "step": 1302, + "train/speech_entropy": 3.9442651985977104, + "train/text_entropy": 0.9783190886179606, + "train/token_acc": 0.3 + }, + { + "epoch": 0.1928941524796447, + "grad_norm": 11.728065490722656, + "learning_rate": 3.5747670025943114e-06, + "loss": 1.0146, + "step": 1303, + "train/speech_entropy": 4.047952643347002, + "train/text_entropy": 1.1817726475177426, + "train/token_acc": 0.289134438305709 + }, + { + "epoch": 0.1930421909696521, + "grad_norm": 19.80327033996582, + "learning_rate": 3.5681829258668245e-06, + "loss": 1.3594, + "step": 1304, + "train/speech_entropy": 3.9863223874974674, + "train/text_entropy": 1.4506437329278477, + "train/token_acc": 0.2609271523178808 + }, + { + "epoch": 0.19319022945965952, + "grad_norm": 18.791507720947266, + "learning_rate": 3.5616039151044047e-06, + "loss": 1.9531, + "step": 1305, + "train/speech_entropy": 4.4149955587184175, + "train/text_entropy": 1.753080289225933, + "train/token_acc": 0.24450084602368866 + }, + { + "epoch": 0.1933382679496669, + "grad_norm": 6.2028303146362305, + "learning_rate": 3.5550299875597404e-06, + "loss": 0.3252, + "step": 1306, + "train/speech_entropy": 3.5807098753423805, + "train/text_entropy": 0.37395036220550537, + "train/token_acc": 0.3523809523809524 + }, + { + "epoch": 0.19348630643967432, + "grad_norm": 13.764986038208008, + "learning_rate": 3.5484611604721943e-06, + "loss": 1.543, + "step": 1307, + "train/speech_entropy": 4.423305051171333, + "train/text_entropy": 1.4884883127394755, + "train/token_acc": 0.25569476082004555 + }, + { + "epoch": 0.1936343449296817, + "grad_norm": 18.65243911743164, + "learning_rate": 3.5418974510677462e-06, + "loss": 1.4961, + "step": 1308, + "train/speech_entropy": 4.135598000515713, + "train/text_entropy": 1.4170829662378284, + "train/token_acc": 0.2529411764705882 + }, + { + "epoch": 0.19378238341968912, + "grad_norm": 16.73041534423828, + "learning_rate": 3.535338876558958e-06, + "loss": 1.4883, + "step": 1309, + "train/speech_entropy": 4.02149979291896, + "train/text_entropy": 1.3995323756232334, + "train/token_acc": 0.2768361581920904 + }, + { + "epoch": 0.19393042190969653, + "grad_norm": 23.777591705322266, + "learning_rate": 3.5287854541449294e-06, + "loss": 0.9658, + "step": 1310, + "train/speech_entropy": 3.8047483659900743, + "train/text_entropy": 1.4468381534923207, + "train/token_acc": 0.29233511586452765 + }, + { + "epoch": 0.19407846039970392, + "grad_norm": 16.400230407714844, + "learning_rate": 3.522237201011247e-06, + "loss": 1.6445, + "step": 1311, + "train/speech_entropy": 4.462280655305006, + "train/text_entropy": 1.5487950521222713, + "train/token_acc": 0.2457191780821918 + }, + { + "epoch": 0.19422649888971133, + "grad_norm": 20.229795455932617, + "learning_rate": 3.515694134329939e-06, + "loss": 1.1953, + "step": 1312, + "train/speech_entropy": 4.240751434484294, + "train/text_entropy": 1.2041593753032802, + "train/token_acc": 0.27438370846730975 + }, + { + "epoch": 0.19437453737971871, + "grad_norm": 13.417625427246094, + "learning_rate": 3.5091562712594327e-06, + "loss": 1.1562, + "step": 1313, + "train/speech_entropy": 4.204018208959748, + "train/text_entropy": 0.9902956709902511, + "train/token_acc": 0.2648221343873518 + }, + { + "epoch": 0.19452257586972613, + "grad_norm": 15.120375633239746, + "learning_rate": 3.5026236289445183e-06, + "loss": 1.1958, + "step": 1314, + "train/speech_entropy": 3.8271015145186054, + "train/text_entropy": 1.0799761215845745, + "train/token_acc": 0.3190045248868778 + }, + { + "epoch": 0.19467061435973354, + "grad_norm": 14.434449195861816, + "learning_rate": 3.4960962245162854e-06, + "loss": 0.8779, + "step": 1315, + "train/speech_entropy": 3.8648223876953125, + "train/text_entropy": 0.6345921392026155, + "train/token_acc": 0.2925292529252925 + }, + { + "epoch": 0.19481865284974093, + "grad_norm": 19.263042449951172, + "learning_rate": 3.4895740750920916e-06, + "loss": 1.5391, + "step": 1316, + "train/speech_entropy": 3.8676141387787624, + "train/text_entropy": 1.3899924493605091, + "train/token_acc": 0.2502606882168926 + }, + { + "epoch": 0.19496669133974834, + "grad_norm": 17.80942153930664, + "learning_rate": 3.48305719777551e-06, + "loss": 1.1807, + "step": 1317, + "train/speech_entropy": 3.6546162256126196, + "train/text_entropy": 1.3352690456069518, + "train/token_acc": 0.2959830866807611 + }, + { + "epoch": 0.19511472982975572, + "grad_norm": 12.631470680236816, + "learning_rate": 3.476545609656297e-06, + "loss": 0.9932, + "step": 1318, + "train/speech_entropy": 3.9836995111220723, + "train/text_entropy": 1.0342382217343913, + "train/token_acc": 0.30498773507767785 + }, + { + "epoch": 0.19526276831976314, + "grad_norm": 22.61341667175293, + "learning_rate": 3.4700393278103313e-06, + "loss": 1.5508, + "step": 1319, + "train/speech_entropy": 3.997691167787064, + "train/text_entropy": 1.3060938835144043, + "train/token_acc": 0.2727272727272727 + }, + { + "epoch": 0.19541080680977055, + "grad_norm": 15.135601043701172, + "learning_rate": 3.463538369299576e-06, + "loss": 1.4375, + "step": 1320, + "train/speech_entropy": 4.202000629471009, + "train/text_entropy": 1.5479849613074101, + "train/token_acc": 0.26459143968871596 + }, + { + "epoch": 0.19555884529977793, + "grad_norm": 23.486221313476562, + "learning_rate": 3.45704275117204e-06, + "loss": 1.4492, + "step": 1321, + "train/speech_entropy": 4.059306171123798, + "train/text_entropy": 1.4348114542878432, + "train/token_acc": 0.2867557715674362 + }, + { + "epoch": 0.19570688378978535, + "grad_norm": 13.872692108154297, + "learning_rate": 3.4505524904617247e-06, + "loss": 1.082, + "step": 1322, + "train/speech_entropy": 3.94523210894337, + "train/text_entropy": 1.1112255297209088, + "train/token_acc": 0.26666666666666666 + }, + { + "epoch": 0.19585492227979276, + "grad_norm": 14.347296714782715, + "learning_rate": 3.444067604188581e-06, + "loss": 1.082, + "step": 1323, + "train/speech_entropy": 4.100105223181652, + "train/text_entropy": 1.184232893033265, + "train/token_acc": 0.2899628252788104 + }, + { + "epoch": 0.19600296076980014, + "grad_norm": 14.72423267364502, + "learning_rate": 3.437588109358465e-06, + "loss": 1.25, + "step": 1324, + "train/speech_entropy": 4.591693408305063, + "train/text_entropy": 1.2955159070020053, + "train/token_acc": 0.24366812227074236 + }, + { + "epoch": 0.19615099925980756, + "grad_norm": 12.118396759033203, + "learning_rate": 3.4311140229631013e-06, + "loss": 1.2344, + "step": 1325, + "train/speech_entropy": 4.305828869869561, + "train/text_entropy": 1.2588573866316026, + "train/token_acc": 0.28563714902807774 + }, + { + "epoch": 0.19629903774981494, + "grad_norm": 11.39831829071045, + "learning_rate": 3.4246453619800234e-06, + "loss": 1.0303, + "step": 1326, + "train/speech_entropy": 4.117767924647177, + "train/text_entropy": 1.2119042066284655, + "train/token_acc": 0.27655310621242485 + }, + { + "epoch": 0.19644707623982235, + "grad_norm": 13.508459091186523, + "learning_rate": 3.41818214337254e-06, + "loss": 0.9414, + "step": 1327, + "train/speech_entropy": 3.5678246483269977, + "train/text_entropy": 1.134597116812116, + "train/token_acc": 0.3465909090909091 + }, + { + "epoch": 0.19659511472982977, + "grad_norm": 18.5377254486084, + "learning_rate": 3.411724384089693e-06, + "loss": 1.1055, + "step": 1328, + "train/speech_entropy": 3.986183392648206, + "train/text_entropy": 0.9328606173118449, + "train/token_acc": 0.2817955112219451 + }, + { + "epoch": 0.19674315321983715, + "grad_norm": 14.820624351501465, + "learning_rate": 3.405272101066199e-06, + "loss": 1.1465, + "step": 1329, + "train/speech_entropy": 4.030689403168832, + "train/text_entropy": 1.077267846395803, + "train/token_acc": 0.2965986394557823 + }, + { + "epoch": 0.19689119170984457, + "grad_norm": 12.751899719238281, + "learning_rate": 3.3988253112224177e-06, + "loss": 1.1562, + "step": 1330, + "train/speech_entropy": 4.051116819806427, + "train/text_entropy": 1.088759271267852, + "train/token_acc": 0.2746071133167907 + }, + { + "epoch": 0.19703923019985195, + "grad_norm": 18.947513580322266, + "learning_rate": 3.3923840314643063e-06, + "loss": 1.4414, + "step": 1331, + "train/speech_entropy": 4.0952301025390625, + "train/text_entropy": 1.4881044175889757, + "train/token_acc": 0.23290845886442643 + }, + { + "epoch": 0.19718726868985936, + "grad_norm": 13.439532279968262, + "learning_rate": 3.385948278683371e-06, + "loss": 0.958, + "step": 1332, + "train/speech_entropy": 3.9659900074273766, + "train/text_entropy": 1.1231196372739731, + "train/token_acc": 0.29595375722543354 + }, + { + "epoch": 0.19733530717986678, + "grad_norm": 12.64651870727539, + "learning_rate": 3.3795180697566188e-06, + "loss": 1.4883, + "step": 1333, + "train/speech_entropy": 4.2885284423828125, + "train/text_entropy": 1.3945411538178065, + "train/token_acc": 0.24780553679945982 + }, + { + "epoch": 0.19748334566987416, + "grad_norm": 16.061391830444336, + "learning_rate": 3.3730934215465265e-06, + "loss": 1.3945, + "step": 1334, + "train/speech_entropy": 4.432717846285913, + "train/text_entropy": 1.4836087686761381, + "train/token_acc": 0.24222035323801513 + }, + { + "epoch": 0.19763138415988157, + "grad_norm": 16.6766414642334, + "learning_rate": 3.3666743509009845e-06, + "loss": 1.5039, + "step": 1335, + "train/speech_entropy": 4.311969937983247, + "train/text_entropy": 1.4912614152238175, + "train/token_acc": 0.23982683982683983 + }, + { + "epoch": 0.19777942264988896, + "grad_norm": 12.780088424682617, + "learning_rate": 3.360260874653255e-06, + "loss": 0.79, + "step": 1336, + "train/speech_entropy": 4.6806447190504805, + "train/text_entropy": 0.8683770362367021, + "train/token_acc": 0.3138424821002387 + }, + { + "epoch": 0.19792746113989637, + "grad_norm": 10.716604232788086, + "learning_rate": 3.3538530096219355e-06, + "loss": 0.6475, + "step": 1337, + "train/speech_entropy": 3.608566575219261, + "train/text_entropy": 0.6879920959472656, + "train/token_acc": 0.3297180043383948 + }, + { + "epoch": 0.19807549962990378, + "grad_norm": 26.627029418945312, + "learning_rate": 3.347450772610904e-06, + "loss": 2.6094, + "step": 1338, + "train/speech_entropy": 4.331860573508523, + "train/text_entropy": 2.1151410082837083, + "train/token_acc": 0.2582972582972583 + }, + { + "epoch": 0.19822353811991117, + "grad_norm": 13.380576133728027, + "learning_rate": 3.3410541804092777e-06, + "loss": 0.9385, + "step": 1339, + "train/speech_entropy": 3.890535662827503, + "train/text_entropy": 1.011459191640218, + "train/token_acc": 0.3317929759704251 + }, + { + "epoch": 0.19837157660991858, + "grad_norm": 15.806732177734375, + "learning_rate": 3.334663249791378e-06, + "loss": 1.0059, + "step": 1340, + "train/speech_entropy": 4.061603167340472, + "train/text_entropy": 1.193205629546067, + "train/token_acc": 0.27325581395348836 + }, + { + "epoch": 0.198519615099926, + "grad_norm": 13.161589622497559, + "learning_rate": 3.3282779975166724e-06, + "loss": 1.001, + "step": 1341, + "train/speech_entropy": 4.478430402807582, + "train/text_entropy": 1.217997214733026, + "train/token_acc": 0.25029239766081873 + }, + { + "epoch": 0.19866765358993338, + "grad_norm": 11.853296279907227, + "learning_rate": 3.3218984403297404e-06, + "loss": 0.9233, + "step": 1342, + "train/speech_entropy": 4.353766263664449, + "train/text_entropy": 1.0815207286230846, + "train/token_acc": 0.2631578947368421 + }, + { + "epoch": 0.1988156920799408, + "grad_norm": 15.448352813720703, + "learning_rate": 3.315524594960231e-06, + "loss": 0.96, + "step": 1343, + "train/speech_entropy": 3.898376087617815, + "train/text_entropy": 0.7759414258585796, + "train/token_acc": 0.3194174757281553 + }, + { + "epoch": 0.19896373056994818, + "grad_norm": 12.04487133026123, + "learning_rate": 3.3091564781228074e-06, + "loss": 1.0371, + "step": 1344, + "train/speech_entropy": 4.256464177911932, + "train/text_entropy": 0.9498491723553028, + "train/token_acc": 0.27005988023952093 + }, + { + "epoch": 0.1991117690599556, + "grad_norm": 16.66502571105957, + "learning_rate": 3.3027941065171136e-06, + "loss": 1.0977, + "step": 1345, + "train/speech_entropy": 4.070039034234805, + "train/text_entropy": 1.2014727761559452, + "train/token_acc": 0.265625 + }, + { + "epoch": 0.199259807549963, + "grad_norm": 15.281628608703613, + "learning_rate": 3.2964374968277265e-06, + "loss": 1.0352, + "step": 1346, + "train/speech_entropy": 4.055072966601845, + "train/text_entropy": 1.1174993515014648, + "train/token_acc": 0.2937720329024677 + }, + { + "epoch": 0.1994078460399704, + "grad_norm": 7.115726470947266, + "learning_rate": 3.290086665724118e-06, + "loss": 0.355, + "step": 1347, + "train/speech_entropy": 3.526806458624534, + "train/text_entropy": 0.3524196342735439, + "train/token_acc": 0.3712374581939799 + }, + { + "epoch": 0.1995558845299778, + "grad_norm": 12.464864730834961, + "learning_rate": 3.2837416298605983e-06, + "loss": 0.5879, + "step": 1348, + "train/speech_entropy": 3.7358276974502487, + "train/text_entropy": 0.5352592598902036, + "train/token_acc": 0.29906542056074764 + }, + { + "epoch": 0.19970392301998519, + "grad_norm": 12.366423606872559, + "learning_rate": 3.2774024058762864e-06, + "loss": 0.7832, + "step": 1349, + "train/speech_entropy": 3.854968835619838, + "train/text_entropy": 0.8161645332972208, + "train/token_acc": 0.31869254341164455 + }, + { + "epoch": 0.1998519615099926, + "grad_norm": 15.808449745178223, + "learning_rate": 3.2710690103950565e-06, + "loss": 1.2852, + "step": 1350, + "train/speech_entropy": 4.05692879813058, + "train/text_entropy": 1.4898712723343461, + "train/token_acc": 0.2714570858283433 + }, + { + "epoch": 0.2, + "grad_norm": 14.257306098937988, + "learning_rate": 3.264741460025501e-06, + "loss": 1.3828, + "step": 1351, + "train/speech_entropy": 4.260101027715773, + "train/text_entropy": 1.5119639756944445, + "train/token_acc": 0.24784313725490195 + }, + { + "epoch": 0.2001480384900074, + "grad_norm": 14.8094482421875, + "learning_rate": 3.2584197713608846e-06, + "loss": 1.7734, + "step": 1352, + "train/speech_entropy": 4.2418651416099955, + "train/text_entropy": 1.3931387716264867, + "train/token_acc": 0.24333800841514727 + }, + { + "epoch": 0.2002960769800148, + "grad_norm": 17.0958251953125, + "learning_rate": 3.252103960979095e-06, + "loss": 0.8848, + "step": 1353, + "train/speech_entropy": 3.916046224835748, + "train/text_entropy": 0.7772214850600885, + "train/token_acc": 0.3358208955223881 + }, + { + "epoch": 0.2004441154700222, + "grad_norm": 13.995932579040527, + "learning_rate": 3.2457940454426118e-06, + "loss": 0.9775, + "step": 1354, + "train/speech_entropy": 3.723129785753536, + "train/text_entropy": 0.904811622193141, + "train/token_acc": 0.30920245398773005 + }, + { + "epoch": 0.2005921539600296, + "grad_norm": 17.390775680541992, + "learning_rate": 3.2394900412984527e-06, + "loss": 1.3125, + "step": 1355, + "train/speech_entropy": 4.183224580977383, + "train/text_entropy": 1.4153907931580836, + "train/token_acc": 0.2754569190600522 + }, + { + "epoch": 0.20074019245003702, + "grad_norm": 12.462621688842773, + "learning_rate": 3.2331919650781322e-06, + "loss": 0.9023, + "step": 1356, + "train/speech_entropy": 4.151480972841104, + "train/text_entropy": 1.143988030494293, + "train/token_acc": 0.24487704918032788 + }, + { + "epoch": 0.2008882309400444, + "grad_norm": 12.742107391357422, + "learning_rate": 3.226899833297621e-06, + "loss": 1.1406, + "step": 1357, + "train/speech_entropy": 4.306924432793587, + "train/text_entropy": 1.3100319764004695, + "train/token_acc": 0.2629144178874325 + }, + { + "epoch": 0.20103626943005182, + "grad_norm": 14.695509910583496, + "learning_rate": 3.2206136624573024e-06, + "loss": 0.8184, + "step": 1358, + "train/speech_entropy": 3.82014171441641, + "train/text_entropy": 0.8919369220733643, + "train/token_acc": 0.3362175525339926 + }, + { + "epoch": 0.2011843079200592, + "grad_norm": 14.519669532775879, + "learning_rate": 3.214333469041927e-06, + "loss": 0.6914, + "step": 1359, + "train/speech_entropy": 4.130479236577181, + "train/text_entropy": 0.8690312703450521, + "train/token_acc": 0.2767732962447844 + }, + { + "epoch": 0.20133234641006661, + "grad_norm": 9.399330139160156, + "learning_rate": 3.208059269520568e-06, + "loss": 0.7202, + "step": 1360, + "train/speech_entropy": 4.15904390381712, + "train/text_entropy": 1.0285636010254628, + "train/token_acc": 0.2704280155642023 + }, + { + "epoch": 0.20148038490007403, + "grad_norm": 18.307470321655273, + "learning_rate": 3.2017910803465824e-06, + "loss": 1.2148, + "step": 1361, + "train/speech_entropy": 4.006617171041081, + "train/text_entropy": 1.6140249181676793, + "train/token_acc": 0.3034379671150972 + }, + { + "epoch": 0.2016284233900814, + "grad_norm": 16.893739700317383, + "learning_rate": 3.1955289179575677e-06, + "loss": 1.3047, + "step": 1362, + "train/speech_entropy": 4.245468258990922, + "train/text_entropy": 1.3225291295705108, + "train/token_acc": 0.27289377289377287 + }, + { + "epoch": 0.20177646188008883, + "grad_norm": 12.69690990447998, + "learning_rate": 3.1892727987753125e-06, + "loss": 0.8721, + "step": 1363, + "train/speech_entropy": 4.107955875468165, + "train/text_entropy": 0.990480717250279, + "train/token_acc": 0.30020491803278687 + }, + { + "epoch": 0.20192450037009624, + "grad_norm": 19.742280960083008, + "learning_rate": 3.18302273920576e-06, + "loss": 2.3516, + "step": 1364, + "train/speech_entropy": 4.086795963357496, + "train/text_entropy": 2.1505685735631874, + "train/token_acc": 0.2468354430379747 + }, + { + "epoch": 0.20207253886010362, + "grad_norm": 15.545560836791992, + "learning_rate": 3.176778755638965e-06, + "loss": 0.8359, + "step": 1365, + "train/speech_entropy": 4.108106291695927, + "train/text_entropy": 1.0449426120385192, + "train/token_acc": 0.294973544973545 + }, + { + "epoch": 0.20222057735011104, + "grad_norm": 19.413711547851562, + "learning_rate": 3.170540864449044e-06, + "loss": 1.2109, + "step": 1366, + "train/speech_entropy": 3.7986746574452606, + "train/text_entropy": 1.0473118672328712, + "train/token_acc": 0.299645390070922 + }, + { + "epoch": 0.20236861584011842, + "grad_norm": 22.601226806640625, + "learning_rate": 3.164309081994141e-06, + "loss": 1.7578, + "step": 1367, + "train/speech_entropy": 3.9127065621170343, + "train/text_entropy": 1.9454391354420146, + "train/token_acc": 0.25316455696202533 + }, + { + "epoch": 0.20251665433012583, + "grad_norm": 19.789331436157227, + "learning_rate": 3.1580834246163807e-06, + "loss": 1.543, + "step": 1368, + "train/speech_entropy": 4.431060907494931, + "train/text_entropy": 1.5677173783626464, + "train/token_acc": 0.23421262989608313 + }, + { + "epoch": 0.20266469282013325, + "grad_norm": 13.8507080078125, + "learning_rate": 3.15186390864182e-06, + "loss": 0.8018, + "step": 1369, + "train/speech_entropy": 3.7823990360383064, + "train/text_entropy": 0.5960753228929307, + "train/token_acc": 0.31282722513089006 + }, + { + "epoch": 0.20281273131014063, + "grad_norm": 17.18282699584961, + "learning_rate": 3.1456505503804146e-06, + "loss": 1.1934, + "step": 1370, + "train/speech_entropy": 4.147467719184028, + "train/text_entropy": 1.5870654944217566, + "train/token_acc": 0.27005649717514124 + }, + { + "epoch": 0.20296076980014804, + "grad_norm": 15.96893310546875, + "learning_rate": 3.139443366125976e-06, + "loss": 0.9609, + "step": 1371, + "train/speech_entropy": 3.8465880713195166, + "train/text_entropy": 0.7100686345781598, + "train/token_acc": 0.30522765598650925 + }, + { + "epoch": 0.20310880829015543, + "grad_norm": 10.521167755126953, + "learning_rate": 3.1332423721561182e-06, + "loss": 0.9619, + "step": 1372, + "train/speech_entropy": 4.291484701882845, + "train/text_entropy": 1.138902857787627, + "train/token_acc": 0.2758384668035592 + }, + { + "epoch": 0.20325684678016284, + "grad_norm": 20.2841854095459, + "learning_rate": 3.1270475847322245e-06, + "loss": 1.5039, + "step": 1373, + "train/speech_entropy": 3.8159472071482035, + "train/text_entropy": 1.5981176467168898, + "train/token_acc": 0.25818639798488663 + }, + { + "epoch": 0.20340488527017025, + "grad_norm": 12.954930305480957, + "learning_rate": 3.1208590200994066e-06, + "loss": 1.0332, + "step": 1374, + "train/speech_entropy": 4.428543913522063, + "train/text_entropy": 1.244980723587508, + "train/token_acc": 0.25913338997451146 + }, + { + "epoch": 0.20355292376017764, + "grad_norm": 12.911598205566406, + "learning_rate": 3.1146766944864506e-06, + "loss": 1.0078, + "step": 1375, + "train/speech_entropy": 4.1281459670664145, + "train/text_entropy": 0.9764346395220075, + "train/token_acc": 0.27787307032590053 + }, + { + "epoch": 0.20370096225018505, + "grad_norm": 15.843660354614258, + "learning_rate": 3.1085006241057835e-06, + "loss": 1.209, + "step": 1376, + "train/speech_entropy": 4.435982685529529, + "train/text_entropy": 0.9983940124511719, + "train/token_acc": 0.26906474820143883 + }, + { + "epoch": 0.20384900074019244, + "grad_norm": 19.33914566040039, + "learning_rate": 3.1023308251534335e-06, + "loss": 1.1367, + "step": 1377, + "train/speech_entropy": 3.9209070304412528, + "train/text_entropy": 1.2294810248202965, + "train/token_acc": 0.2859504132231405 + }, + { + "epoch": 0.20399703923019985, + "grad_norm": 9.013228416442871, + "learning_rate": 3.0961673138089766e-06, + "loss": 1.0039, + "step": 1378, + "train/speech_entropy": 4.441503324501092, + "train/text_entropy": 1.3751838275464423, + "train/token_acc": 0.26116479498563594 + }, + { + "epoch": 0.20414507772020726, + "grad_norm": 14.694845199584961, + "learning_rate": 3.0900101062355028e-06, + "loss": 1.5391, + "step": 1379, + "train/speech_entropy": 4.228690355343926, + "train/text_entropy": 1.4682953883784478, + "train/token_acc": 0.23570869224745497 + }, + { + "epoch": 0.20429311621021465, + "grad_norm": 19.188339233398438, + "learning_rate": 3.0838592185795746e-06, + "loss": 1.4805, + "step": 1380, + "train/speech_entropy": 4.195891415886357, + "train/text_entropy": 1.6039617671522983, + "train/token_acc": 0.2572283150548355 + }, + { + "epoch": 0.20444115470022206, + "grad_norm": 14.236336708068848, + "learning_rate": 3.0777146669711722e-06, + "loss": 1.293, + "step": 1381, + "train/speech_entropy": 4.883294236456641, + "train/text_entropy": 1.2599537823651288, + "train/token_acc": 0.275049115913556 + }, + { + "epoch": 0.20458919319022945, + "grad_norm": 19.055261611938477, + "learning_rate": 3.071576467523668e-06, + "loss": 1.7812, + "step": 1382, + "train/speech_entropy": 4.091733825482158, + "train/text_entropy": 1.8553076730647557, + "train/token_acc": 0.2606951871657754 + }, + { + "epoch": 0.20473723168023686, + "grad_norm": 14.352970123291016, + "learning_rate": 3.0654446363337793e-06, + "loss": 0.7178, + "step": 1383, + "train/speech_entropy": 3.9273214363992364, + "train/text_entropy": 0.5400414736765735, + "train/token_acc": 0.35404339250493094 + }, + { + "epoch": 0.20488527017024427, + "grad_norm": 7.66849422454834, + "learning_rate": 3.05931918948151e-06, + "loss": 0.374, + "step": 1384, + "train/speech_entropy": 3.9370967360626166, + "train/text_entropy": 0.3952630023567044, + "train/token_acc": 0.3282789992418499 + }, + { + "epoch": 0.20503330866025166, + "grad_norm": 16.479076385498047, + "learning_rate": 3.053200143030136e-06, + "loss": 1.0469, + "step": 1385, + "train/speech_entropy": 3.687144362081693, + "train/text_entropy": 1.160533206153462, + "train/token_acc": 0.29112271540469975 + }, + { + "epoch": 0.20518134715025907, + "grad_norm": 17.420866012573242, + "learning_rate": 3.0470875130261444e-06, + "loss": 1.457, + "step": 1386, + "train/speech_entropy": 4.233724203454443, + "train/text_entropy": 1.2716153772865855, + "train/token_acc": 0.2739383846794338 + }, + { + "epoch": 0.20532938564026648, + "grad_norm": 12.128999710083008, + "learning_rate": 3.04098131549919e-06, + "loss": 0.8809, + "step": 1387, + "train/speech_entropy": 4.192563841943027, + "train/text_entropy": 0.8856969073535951, + "train/token_acc": 0.3058510638297872 + }, + { + "epoch": 0.20547742413027387, + "grad_norm": 17.75270652770996, + "learning_rate": 3.0348815664620677e-06, + "loss": 1.1895, + "step": 1388, + "train/speech_entropy": 4.316017830910409, + "train/text_entropy": 1.4949342604966191, + "train/token_acc": 0.2606060606060606 + }, + { + "epoch": 0.20562546262028128, + "grad_norm": 9.219197273254395, + "learning_rate": 3.028788281910657e-06, + "loss": 0.5596, + "step": 1389, + "train/speech_entropy": 4.411161106233538, + "train/text_entropy": 0.8055492529839826, + "train/token_acc": 0.3119453924914676 + }, + { + "epoch": 0.20577350111028866, + "grad_norm": 15.4245023727417, + "learning_rate": 3.0227014778238844e-06, + "loss": 1.4336, + "step": 1390, + "train/speech_entropy": 4.502210344587054, + "train/text_entropy": 1.4498123574045907, + "train/token_acc": 0.23410852713178296 + }, + { + "epoch": 0.20592153960029608, + "grad_norm": 20.849706649780273, + "learning_rate": 3.0166211701636895e-06, + "loss": 1.6055, + "step": 1391, + "train/speech_entropy": 4.121858883527369, + "train/text_entropy": 1.387037325306099, + "train/token_acc": 0.2558139534883721 + }, + { + "epoch": 0.2060695780903035, + "grad_norm": 20.70317840576172, + "learning_rate": 3.010547374874963e-06, + "loss": 1.5352, + "step": 1392, + "train/speech_entropy": 4.3097390091387595, + "train/text_entropy": 1.6413304032950566, + "train/token_acc": 0.22024471635150167 + }, + { + "epoch": 0.20621761658031088, + "grad_norm": 10.397799491882324, + "learning_rate": 3.004480107885529e-06, + "loss": 0.502, + "step": 1393, + "train/speech_entropy": 4.057804273522419, + "train/text_entropy": 0.8328304175870964, + "train/token_acc": 0.3072429906542056 + }, + { + "epoch": 0.2063656550703183, + "grad_norm": 16.319866180419922, + "learning_rate": 2.99841938510609e-06, + "loss": 0.7266, + "step": 1394, + "train/speech_entropy": 4.128580729166667, + "train/text_entropy": 0.8806558088822798, + "train/token_acc": 0.25199362041467305 + }, + { + "epoch": 0.20651369356032567, + "grad_norm": 12.818305015563965, + "learning_rate": 2.9923652224301803e-06, + "loss": 1.3301, + "step": 1395, + "train/speech_entropy": 4.198040178754511, + "train/text_entropy": 1.2306634696425907, + "train/token_acc": 0.2622415669205658 + }, + { + "epoch": 0.20666173205033309, + "grad_norm": 11.94715404510498, + "learning_rate": 2.9863176357341372e-06, + "loss": 0.9512, + "step": 1396, + "train/speech_entropy": 3.983921713611931, + "train/text_entropy": 0.9965471670447118, + "train/token_acc": 0.2971468336812804 + }, + { + "epoch": 0.2068097705403405, + "grad_norm": 9.32165241241455, + "learning_rate": 2.9802766408770556e-06, + "loss": 0.4238, + "step": 1397, + "train/speech_entropy": 3.4728108093104004, + "train/text_entropy": 0.49593996471828883, + "train/token_acc": 0.3532494758909853 + }, + { + "epoch": 0.20695780903034788, + "grad_norm": 16.66611099243164, + "learning_rate": 2.9742422537007344e-06, + "loss": 0.9785, + "step": 1398, + "train/speech_entropy": 4.163751833441982, + "train/text_entropy": 0.9427087420508975, + "train/token_acc": 0.28449197860962566 + }, + { + "epoch": 0.2071058475203553, + "grad_norm": 13.512340545654297, + "learning_rate": 2.968214490029653e-06, + "loss": 1.1543, + "step": 1399, + "train/speech_entropy": 4.041968699410268, + "train/text_entropy": 1.1622907222543724, + "train/token_acc": 0.2719033232628399 + }, + { + "epoch": 0.20725388601036268, + "grad_norm": 12.575355529785156, + "learning_rate": 2.9621933656709223e-06, + "loss": 0.8745, + "step": 1400, + "train/speech_entropy": 3.641991489955357, + "train/text_entropy": 0.8363325652353006, + "train/token_acc": 0.3268876611418048 + }, + { + "epoch": 0.2074019245003701, + "grad_norm": 18.566261291503906, + "learning_rate": 2.9561788964142324e-06, + "loss": 1.168, + "step": 1401, + "train/speech_entropy": 3.968509793281555, + "train/text_entropy": 1.3767629529609056, + "train/token_acc": 0.28391167192429023 + }, + { + "epoch": 0.2075499629903775, + "grad_norm": 21.960844039916992, + "learning_rate": 2.9501710980318333e-06, + "loss": 2.0977, + "step": 1402, + "train/speech_entropy": 4.1346806086546986, + "train/text_entropy": 1.5418258346877731, + "train/token_acc": 0.22287735849056603 + }, + { + "epoch": 0.2076980014803849, + "grad_norm": 16.912288665771484, + "learning_rate": 2.9441699862784734e-06, + "loss": 1.2969, + "step": 1403, + "train/speech_entropy": 4.017138384849369, + "train/text_entropy": 1.2782864290125229, + "train/token_acc": 0.2591324200913242 + }, + { + "epoch": 0.2078460399703923, + "grad_norm": 17.27135467529297, + "learning_rate": 2.938175576891368e-06, + "loss": 1.1211, + "step": 1404, + "train/speech_entropy": 4.091995039796518, + "train/text_entropy": 1.1611366809253962, + "train/token_acc": 0.27586206896551724 + }, + { + "epoch": 0.20799407846039972, + "grad_norm": 14.898608207702637, + "learning_rate": 2.932187885590159e-06, + "loss": 1.5195, + "step": 1405, + "train/speech_entropy": 4.37033212858181, + "train/text_entropy": 1.5204684705619353, + "train/token_acc": 0.23331039229181005 + }, + { + "epoch": 0.2081421169504071, + "grad_norm": 7.481289386749268, + "learning_rate": 2.926206928076868e-06, + "loss": 0.3457, + "step": 1406, + "train/speech_entropy": 3.839844203792983, + "train/text_entropy": 0.42563702983240925, + "train/token_acc": 0.32628398791540786 + }, + { + "epoch": 0.20829015544041452, + "grad_norm": 14.9284086227417, + "learning_rate": 2.920232720035857e-06, + "loss": 1.5625, + "step": 1407, + "train/speech_entropy": 4.282013732117492, + "train/text_entropy": 1.7047319546551771, + "train/token_acc": 0.28034300791556727 + }, + { + "epoch": 0.2084381939304219, + "grad_norm": 15.528785705566406, + "learning_rate": 2.914265277133793e-06, + "loss": 1.4805, + "step": 1408, + "train/speech_entropy": 4.403320113038705, + "train/text_entropy": 1.5240077244416448, + "train/token_acc": 0.24427994616419918 + }, + { + "epoch": 0.2085862324204293, + "grad_norm": 15.86761474609375, + "learning_rate": 2.9083046150195983e-06, + "loss": 1.4805, + "step": 1409, + "train/speech_entropy": 4.197982671606631, + "train/text_entropy": 1.5953357146817004, + "train/token_acc": 0.2638997650743931 + }, + { + "epoch": 0.20873427091043673, + "grad_norm": 15.07756519317627, + "learning_rate": 2.9023507493244123e-06, + "loss": 1.2031, + "step": 1410, + "train/speech_entropy": 4.029031801463372, + "train/text_entropy": 1.194720004467254, + "train/token_acc": 0.2865853658536585 + }, + { + "epoch": 0.2088823094004441, + "grad_norm": 17.035722732543945, + "learning_rate": 2.896403695661557e-06, + "loss": 1.0078, + "step": 1411, + "train/speech_entropy": 3.947679554332386, + "train/text_entropy": 1.110508194676152, + "train/token_acc": 0.2773722627737226 + }, + { + "epoch": 0.20903034789045152, + "grad_norm": 21.31913948059082, + "learning_rate": 2.890463469626487e-06, + "loss": 1.5547, + "step": 1412, + "train/speech_entropy": 3.917927644690689, + "train/text_entropy": 1.566358443229429, + "train/token_acc": 0.27247191011235955 + }, + { + "epoch": 0.2091783863804589, + "grad_norm": 47.536136627197266, + "learning_rate": 2.8845300867967494e-06, + "loss": 2.3867, + "step": 1413, + "train/speech_entropy": 3.926976627334025, + "train/text_entropy": 1.405564202202691, + "train/token_acc": 0.26440677966101694 + }, + { + "epoch": 0.20932642487046632, + "grad_norm": 19.74312973022461, + "learning_rate": 2.8786035627319507e-06, + "loss": 1.7852, + "step": 1414, + "train/speech_entropy": 4.140663028134735, + "train/text_entropy": 1.8793099796070771, + "train/token_acc": 0.28201970443349755 + }, + { + "epoch": 0.20947446336047373, + "grad_norm": 15.59313678741455, + "learning_rate": 2.8726839129737115e-06, + "loss": 0.9639, + "step": 1415, + "train/speech_entropy": 4.114820055923257, + "train/text_entropy": 1.0268908455258323, + "train/token_acc": 0.3063457330415755 + }, + { + "epoch": 0.20962250185048112, + "grad_norm": 17.90682601928711, + "learning_rate": 2.866771153045621e-06, + "loss": 1.5977, + "step": 1416, + "train/speech_entropy": 4.251896149358227, + "train/text_entropy": 1.5114816420571098, + "train/token_acc": 0.2519305019305019 + }, + { + "epoch": 0.20977054034048853, + "grad_norm": 19.96002960205078, + "learning_rate": 2.8608652984532013e-06, + "loss": 1.3203, + "step": 1417, + "train/speech_entropy": 4.047927513294134, + "train/text_entropy": 1.43823147516181, + "train/token_acc": 0.2599502487562189 + }, + { + "epoch": 0.20991857883049592, + "grad_norm": 12.123340606689453, + "learning_rate": 2.854966364683872e-06, + "loss": 1.1016, + "step": 1418, + "train/speech_entropy": 4.185500730185782, + "train/text_entropy": 1.0056030685837205, + "train/token_acc": 0.2925851703406814 + }, + { + "epoch": 0.21006661732050333, + "grad_norm": 14.94388198852539, + "learning_rate": 2.8490743672068953e-06, + "loss": 1.3594, + "step": 1419, + "train/speech_entropy": 4.359058254791491, + "train/text_entropy": 1.6618592828141208, + "train/token_acc": 0.2668067226890756 + }, + { + "epoch": 0.21021465581051074, + "grad_norm": 14.3712797164917, + "learning_rate": 2.8431893214733485e-06, + "loss": 1.0664, + "step": 1420, + "train/speech_entropy": 4.245962721791433, + "train/text_entropy": 1.1888738622002424, + "train/token_acc": 0.28355196770938446 + }, + { + "epoch": 0.21036269430051813, + "grad_norm": 20.123964309692383, + "learning_rate": 2.8373112429160787e-06, + "loss": 0.8594, + "step": 1421, + "train/speech_entropy": 4.00348490292264, + "train/text_entropy": 0.5902787883107256, + "train/token_acc": 0.2872340425531915 + }, + { + "epoch": 0.21051073279052554, + "grad_norm": 9.144773483276367, + "learning_rate": 2.831440146949663e-06, + "loss": 0.4326, + "step": 1422, + "train/speech_entropy": 3.9930111578179064, + "train/text_entropy": 0.422354736713448, + "train/token_acc": 0.3136150234741784 + }, + { + "epoch": 0.21065877128053292, + "grad_norm": 15.934069633483887, + "learning_rate": 2.8255760489703623e-06, + "loss": 1.1777, + "step": 1423, + "train/speech_entropy": 4.127948708060966, + "train/text_entropy": 1.1571531035209615, + "train/token_acc": 0.2580357142857143 + }, + { + "epoch": 0.21080680977054034, + "grad_norm": 24.20587730407715, + "learning_rate": 2.8197189643560952e-06, + "loss": 1.75, + "step": 1424, + "train/speech_entropy": 4.244256554954728, + "train/text_entropy": 1.4771753741848854, + "train/token_acc": 0.2834138486312399 + }, + { + "epoch": 0.21095484826054775, + "grad_norm": 16.257909774780273, + "learning_rate": 2.8138689084663773e-06, + "loss": 1.0039, + "step": 1425, + "train/speech_entropy": 4.164798643530869, + "train/text_entropy": 1.1682182265205618, + "train/token_acc": 0.2807991120976693 + }, + { + "epoch": 0.21110288675055514, + "grad_norm": 12.397199630737305, + "learning_rate": 2.8080258966423012e-06, + "loss": 0.9482, + "step": 1426, + "train/speech_entropy": 3.8943781846984056, + "train/text_entropy": 1.01445503432516, + "train/token_acc": 0.3037109375 + }, + { + "epoch": 0.21125092524056255, + "grad_norm": 17.298961639404297, + "learning_rate": 2.8021899442064876e-06, + "loss": 1.6055, + "step": 1427, + "train/speech_entropy": 4.266448872086716, + "train/text_entropy": 1.941702161516462, + "train/token_acc": 0.26170798898071623 + }, + { + "epoch": 0.21139896373056996, + "grad_norm": 11.26915454864502, + "learning_rate": 2.796361066463035e-06, + "loss": 0.5835, + "step": 1428, + "train/speech_entropy": 3.9809443711193366, + "train/text_entropy": 0.6303118830141814, + "train/token_acc": 0.3027604630454141 + }, + { + "epoch": 0.21154700222057735, + "grad_norm": 17.578935623168945, + "learning_rate": 2.7905392786974984e-06, + "loss": 1.2812, + "step": 1429, + "train/speech_entropy": 4.159281412760417, + "train/text_entropy": 1.3134782197045498, + "train/token_acc": 0.26646706586826346 + }, + { + "epoch": 0.21169504071058476, + "grad_norm": 13.42276668548584, + "learning_rate": 2.784724596176841e-06, + "loss": 0.6582, + "step": 1430, + "train/speech_entropy": 3.7886747226093482, + "train/text_entropy": 0.5770428073021674, + "train/token_acc": 0.33739837398373984 + }, + { + "epoch": 0.21184307920059214, + "grad_norm": 9.346821784973145, + "learning_rate": 2.778917034149384e-06, + "loss": 0.5713, + "step": 1431, + "train/speech_entropy": 3.9356623946909606, + "train/text_entropy": 0.6515642303267336, + "train/token_acc": 0.3216216216216216 + }, + { + "epoch": 0.21199111769059956, + "grad_norm": 16.56108856201172, + "learning_rate": 2.773116607844784e-06, + "loss": 1.3125, + "step": 1432, + "train/speech_entropy": 4.25712126647534, + "train/text_entropy": 1.4518189018752974, + "train/token_acc": 0.2982832618025751 + }, + { + "epoch": 0.21213915618060697, + "grad_norm": 17.892837524414062, + "learning_rate": 2.7673233324739866e-06, + "loss": 1.1758, + "step": 1433, + "train/speech_entropy": 4.158436217718379, + "train/text_entropy": 1.464394613029131, + "train/token_acc": 0.27448609431680776 + }, + { + "epoch": 0.21228719467061435, + "grad_norm": 13.912860870361328, + "learning_rate": 2.7615372232291747e-06, + "loss": 1.0, + "step": 1434, + "train/speech_entropy": 4.200264587002375, + "train/text_entropy": 1.1484551125384392, + "train/token_acc": 0.29547844374342797 + }, + { + "epoch": 0.21243523316062177, + "grad_norm": 14.880099296569824, + "learning_rate": 2.7557582952837496e-06, + "loss": 1.5547, + "step": 1435, + "train/speech_entropy": 4.183373939540378, + "train/text_entropy": 1.6765886320100798, + "train/token_acc": 0.26827586206896553 + }, + { + "epoch": 0.21258327165062915, + "grad_norm": 15.797558784484863, + "learning_rate": 2.7499865637922757e-06, + "loss": 1.2383, + "step": 1436, + "train/speech_entropy": 4.109728035947825, + "train/text_entropy": 1.6148040294647217, + "train/token_acc": 0.26 + }, + { + "epoch": 0.21273131014063656, + "grad_norm": 9.799695014953613, + "learning_rate": 2.7442220438904433e-06, + "loss": 0.9097, + "step": 1437, + "train/speech_entropy": 4.1945528940384955, + "train/text_entropy": 1.0706161263944054, + "train/token_acc": 0.31024774774774777 + }, + { + "epoch": 0.21287934863064398, + "grad_norm": 18.76363182067871, + "learning_rate": 2.7384647506950366e-06, + "loss": 1.4883, + "step": 1438, + "train/speech_entropy": 4.041690442996699, + "train/text_entropy": 1.207360705813846, + "train/token_acc": 0.2693877551020408 + }, + { + "epoch": 0.21302738712065136, + "grad_norm": 18.006366729736328, + "learning_rate": 2.7327146993038855e-06, + "loss": 0.9785, + "step": 1439, + "train/speech_entropy": 3.912704148065476, + "train/text_entropy": 0.9634213698537726, + "train/token_acc": 0.3082942097026604 + }, + { + "epoch": 0.21317542561065878, + "grad_norm": 17.5358943939209, + "learning_rate": 2.726971904795827e-06, + "loss": 1.7461, + "step": 1440, + "train/speech_entropy": 4.487296722628546, + "train/text_entropy": 1.6347430721744076, + "train/token_acc": 0.24239007891770012 + }, + { + "epoch": 0.21332346410066616, + "grad_norm": 11.794878005981445, + "learning_rate": 2.721236382230674e-06, + "loss": 0.9766, + "step": 1441, + "train/speech_entropy": 3.8438369904628193, + "train/text_entropy": 0.9419973645891462, + "train/token_acc": 0.2870782940802037 + }, + { + "epoch": 0.21347150259067357, + "grad_norm": 9.339743614196777, + "learning_rate": 2.715508146649164e-06, + "loss": 0.6592, + "step": 1442, + "train/speech_entropy": 3.7647007063860527, + "train/text_entropy": 0.5696325796621817, + "train/token_acc": 0.33670520231213874 + }, + { + "epoch": 0.21361954108068099, + "grad_norm": 14.274385452270508, + "learning_rate": 2.7097872130729253e-06, + "loss": 1.3867, + "step": 1443, + "train/speech_entropy": 4.364756778949768, + "train/text_entropy": 1.4887540123679421, + "train/token_acc": 0.26093189964157704 + }, + { + "epoch": 0.21376757957068837, + "grad_norm": 124.58570861816406, + "learning_rate": 2.7040735965044416e-06, + "loss": 0.8438, + "step": 1444, + "train/speech_entropy": 3.745951843261719, + "train/text_entropy": 0.5588912732071347, + "train/token_acc": 0.32670454545454547 + }, + { + "epoch": 0.21391561806069578, + "grad_norm": 16.58477210998535, + "learning_rate": 2.698367311927006e-06, + "loss": 1.543, + "step": 1445, + "train/speech_entropy": 4.2530614656038495, + "train/text_entropy": 1.4055679321289063, + "train/token_acc": 0.2517580872011252 + }, + { + "epoch": 0.2140636565507032, + "grad_norm": 15.223508834838867, + "learning_rate": 2.692668374304684e-06, + "loss": 1.6602, + "step": 1446, + "train/speech_entropy": 4.405952866170031, + "train/text_entropy": 1.7263599164558179, + "train/token_acc": 0.2403297400126823 + }, + { + "epoch": 0.21421169504071058, + "grad_norm": 17.040950775146484, + "learning_rate": 2.6869767985822735e-06, + "loss": 0.8613, + "step": 1447, + "train/speech_entropy": 3.8508809205314987, + "train/text_entropy": 0.9215457392673866, + "train/token_acc": 0.2743801652892562 + }, + { + "epoch": 0.214359733530718, + "grad_norm": 17.917377471923828, + "learning_rate": 2.6812925996852717e-06, + "loss": 1.4668, + "step": 1448, + "train/speech_entropy": 4.466236114501953, + "train/text_entropy": 1.4784420683183743, + "train/token_acc": 0.26222222222222225 + }, + { + "epoch": 0.21450777202072538, + "grad_norm": 15.787331581115723, + "learning_rate": 2.675615792519826e-06, + "loss": 1.2559, + "step": 1449, + "train/speech_entropy": 4.128433476053543, + "train/text_entropy": 1.26728609085083, + "train/token_acc": 0.3050847457627119 + }, + { + "epoch": 0.2146558105107328, + "grad_norm": 16.04866600036621, + "learning_rate": 2.6699463919727e-06, + "loss": 1.2861, + "step": 1450, + "train/speech_entropy": 3.877707197081368, + "train/text_entropy": 0.9260738318046314, + "train/token_acc": 0.33665338645418325 + }, + { + "epoch": 0.2148038490007402, + "grad_norm": 6.775094985961914, + "learning_rate": 2.664284412911238e-06, + "loss": 0.4375, + "step": 1451, + "train/speech_entropy": 3.8741221224324085, + "train/text_entropy": 0.5710395412399547, + "train/token_acc": 0.3310595725329695 + }, + { + "epoch": 0.2149518874907476, + "grad_norm": 20.344539642333984, + "learning_rate": 2.658629870183319e-06, + "loss": 1.4434, + "step": 1452, + "train/speech_entropy": 4.016532389322917, + "train/text_entropy": 1.6320130157470703, + "train/token_acc": 0.2638655462184874 + }, + { + "epoch": 0.215099925980755, + "grad_norm": 19.501554489135742, + "learning_rate": 2.652982778617321e-06, + "loss": 1.4414, + "step": 1453, + "train/speech_entropy": 3.932740812848328, + "train/text_entropy": 1.3204706656832654, + "train/token_acc": 0.27796610169491526 + }, + { + "epoch": 0.2152479644707624, + "grad_norm": 14.784769058227539, + "learning_rate": 2.6473431530220857e-06, + "loss": 1.2246, + "step": 1454, + "train/speech_entropy": 4.623717721584624, + "train/text_entropy": 1.177434257839037, + "train/token_acc": 0.24724264705882354 + }, + { + "epoch": 0.2153960029607698, + "grad_norm": 5.899946689605713, + "learning_rate": 2.6417110081868726e-06, + "loss": 0.2031, + "step": 1455, + "train/speech_entropy": 3.3607001247176207, + "train/text_entropy": 0.24710634019639757, + "train/token_acc": 0.3664670658682635 + }, + { + "epoch": 0.2155440414507772, + "grad_norm": 12.129964828491211, + "learning_rate": 2.6360863588813224e-06, + "loss": 1.2129, + "step": 1456, + "train/speech_entropy": 4.526889430569225, + "train/text_entropy": 0.946094831894074, + "train/token_acc": 0.2787794729542302 + }, + { + "epoch": 0.2156920799407846, + "grad_norm": 15.585841178894043, + "learning_rate": 2.6304692198554295e-06, + "loss": 1.1875, + "step": 1457, + "train/speech_entropy": 4.111678707558391, + "train/text_entropy": 1.2306444207016303, + "train/token_acc": 0.26528692380056446 + }, + { + "epoch": 0.215840118430792, + "grad_norm": 10.438115119934082, + "learning_rate": 2.6248596058394765e-06, + "loss": 0.9917, + "step": 1458, + "train/speech_entropy": 4.0194107757951425, + "train/text_entropy": 1.3024425128149608, + "train/token_acc": 0.2913907284768212 + }, + { + "epoch": 0.2159881569207994, + "grad_norm": 10.38787841796875, + "learning_rate": 2.619257531544027e-06, + "loss": 0.9229, + "step": 1459, + "train/speech_entropy": 3.846661031640338, + "train/text_entropy": 0.7608995945495967, + "train/token_acc": 0.31842260153031193 + }, + { + "epoch": 0.2161361954108068, + "grad_norm": 19.14491081237793, + "learning_rate": 2.6136630116598715e-06, + "loss": 1.8125, + "step": 1460, + "train/speech_entropy": 4.496091454429964, + "train/text_entropy": 1.7204690104379703, + "train/token_acc": 0.2319634703196347 + }, + { + "epoch": 0.21628423390081422, + "grad_norm": 15.05092716217041, + "learning_rate": 2.608076060857979e-06, + "loss": 1.5, + "step": 1461, + "train/speech_entropy": 4.242507063092225, + "train/text_entropy": 1.432177007765997, + "train/token_acc": 0.2625745950554135 + }, + { + "epoch": 0.2164322723908216, + "grad_norm": 12.800251960754395, + "learning_rate": 2.6024966937894824e-06, + "loss": 1.4258, + "step": 1462, + "train/speech_entropy": 4.260251098137263, + "train/text_entropy": 1.5910980130997172, + "train/token_acc": 0.24556541019955655 + }, + { + "epoch": 0.21658031088082902, + "grad_norm": 9.848516464233398, + "learning_rate": 2.5969249250856183e-06, + "loss": 0.2988, + "step": 1463, + "train/speech_entropy": 3.4024182653134587, + "train/text_entropy": 0.34943892547438815, + "train/token_acc": 0.37695078031212487 + }, + { + "epoch": 0.2167283493708364, + "grad_norm": 18.527976989746094, + "learning_rate": 2.5913607693577005e-06, + "loss": 1.6562, + "step": 1464, + "train/speech_entropy": 3.973098874278658, + "train/text_entropy": 1.5225224586049462, + "train/token_acc": 0.26884422110552764 + }, + { + "epoch": 0.21687638786084382, + "grad_norm": 19.855161666870117, + "learning_rate": 2.5858042411970803e-06, + "loss": 1.7109, + "step": 1465, + "train/speech_entropy": 4.244886522169237, + "train/text_entropy": 1.4424503262482542, + "train/token_acc": 0.2571127502634352 + }, + { + "epoch": 0.21702442635085123, + "grad_norm": 13.780298233032227, + "learning_rate": 2.580255355175104e-06, + "loss": 0.7812, + "step": 1466, + "train/speech_entropy": 3.8759976285631743, + "train/text_entropy": 0.8699826240539551, + "train/token_acc": 0.30284301606922126 + }, + { + "epoch": 0.21717246484085861, + "grad_norm": 17.290769577026367, + "learning_rate": 2.5747141258430773e-06, + "loss": 1.5391, + "step": 1467, + "train/speech_entropy": 4.152004355424056, + "train/text_entropy": 1.4922683004270563, + "train/token_acc": 0.274601686972821 + }, + { + "epoch": 0.21732050333086603, + "grad_norm": 15.014178276062012, + "learning_rate": 2.5691805677322335e-06, + "loss": 0.793, + "step": 1468, + "train/speech_entropy": 3.970621092009247, + "train/text_entropy": 0.7986239543003318, + "train/token_acc": 0.28338278931750743 + }, + { + "epoch": 0.21746854182087344, + "grad_norm": 13.34211540222168, + "learning_rate": 2.5636546953536754e-06, + "loss": 1.4453, + "step": 1469, + "train/speech_entropy": 4.245963336752473, + "train/text_entropy": 1.5413374494212542, + "train/token_acc": 0.2671532846715328 + }, + { + "epoch": 0.21761658031088082, + "grad_norm": 25.78984260559082, + "learning_rate": 2.5581365231983634e-06, + "loss": 1.5977, + "step": 1470, + "train/speech_entropy": 3.9852491072299143, + "train/text_entropy": 1.449155535016741, + "train/token_acc": 0.2714468629961588 + }, + { + "epoch": 0.21776461880088824, + "grad_norm": 21.736812591552734, + "learning_rate": 2.552626065737064e-06, + "loss": 2.2344, + "step": 1471, + "train/speech_entropy": 4.0386918963808, + "train/text_entropy": 1.7702686184055203, + "train/token_acc": 0.26078028747433263 + }, + { + "epoch": 0.21791265729089562, + "grad_norm": 16.317733764648438, + "learning_rate": 2.5471233374203037e-06, + "loss": 1.0098, + "step": 1472, + "train/speech_entropy": 3.998324437842823, + "train/text_entropy": 1.0989861695662788, + "train/token_acc": 0.28700906344410876 + }, + { + "epoch": 0.21806069578090304, + "grad_norm": 16.416156768798828, + "learning_rate": 2.541628352678347e-06, + "loss": 1.084, + "step": 1473, + "train/speech_entropy": 3.5760339866758017, + "train/text_entropy": 0.8319051162056301, + "train/token_acc": 0.3217299578059072 + }, + { + "epoch": 0.21820873427091045, + "grad_norm": 18.765520095825195, + "learning_rate": 2.5361411259211565e-06, + "loss": 1.6875, + "step": 1474, + "train/speech_entropy": 4.084197102497178, + "train/text_entropy": 1.760620349554846, + "train/token_acc": 0.26223453370267774 + }, + { + "epoch": 0.21835677276091783, + "grad_norm": 14.053464889526367, + "learning_rate": 2.5306616715383358e-06, + "loss": 1.291, + "step": 1475, + "train/speech_entropy": 4.035374630293942, + "train/text_entropy": 0.9278107355775791, + "train/token_acc": 0.3216723549488055 + }, + { + "epoch": 0.21850481125092525, + "grad_norm": 13.329625129699707, + "learning_rate": 2.525190003899121e-06, + "loss": 1.2637, + "step": 1476, + "train/speech_entropy": 4.118769166696126, + "train/text_entropy": 1.1654244280875998, + "train/token_acc": 0.28328865058087577 + }, + { + "epoch": 0.21865284974093263, + "grad_norm": 16.192195892333984, + "learning_rate": 2.5197261373523242e-06, + "loss": 0.75, + "step": 1477, + "train/speech_entropy": 3.7537110537574403, + "train/text_entropy": 0.5956964416503906, + "train/token_acc": 0.32461538461538464 + }, + { + "epoch": 0.21880088823094004, + "grad_norm": 15.849969863891602, + "learning_rate": 2.5142700862262924e-06, + "loss": 1.291, + "step": 1478, + "train/speech_entropy": 4.348652970900229, + "train/text_entropy": 1.7233621315036654, + "train/token_acc": 0.246001523229246 + }, + { + "epoch": 0.21894892672094746, + "grad_norm": 18.214738845825195, + "learning_rate": 2.508821864828887e-06, + "loss": 1.6289, + "step": 1479, + "train/speech_entropy": 4.168544495959891, + "train/text_entropy": 1.7604419225933907, + "train/token_acc": 0.2696078431372549 + }, + { + "epoch": 0.21909696521095484, + "grad_norm": 9.427971839904785, + "learning_rate": 2.503381487447436e-06, + "loss": 1.2188, + "step": 1480, + "train/speech_entropy": 3.9951101652733794, + "train/text_entropy": 1.3633660584168692, + "train/token_acc": 0.4344073156140501 + }, + { + "epoch": 0.21924500370096225, + "grad_norm": 13.619255065917969, + "learning_rate": 2.49794896834869e-06, + "loss": 0.8994, + "step": 1481, + "train/speech_entropy": 3.8682503377584587, + "train/text_entropy": 0.8952890283921185, + "train/token_acc": 0.3333333333333333 + }, + { + "epoch": 0.21939304219096964, + "grad_norm": 13.78024959564209, + "learning_rate": 2.4925243217788005e-06, + "loss": 1.5508, + "step": 1482, + "train/speech_entropy": 4.2786146619938945, + "train/text_entropy": 1.71291636341023, + "train/token_acc": 0.2534916201117318 + }, + { + "epoch": 0.21954108068097705, + "grad_norm": 18.409212112426758, + "learning_rate": 2.4871075619632684e-06, + "loss": 1.3477, + "step": 1483, + "train/speech_entropy": 4.346217513773483, + "train/text_entropy": 1.3236403606347078, + "train/token_acc": 0.2504835589941973 + }, + { + "epoch": 0.21968911917098446, + "grad_norm": 17.01850700378418, + "learning_rate": 2.481698703106915e-06, + "loss": 0.8438, + "step": 1484, + "train/speech_entropy": 4.105458062409348, + "train/text_entropy": 0.9809183393205915, + "train/token_acc": 0.25663716814159293 + }, + { + "epoch": 0.21983715766099185, + "grad_norm": 11.745408058166504, + "learning_rate": 2.4762977593938444e-06, + "loss": 1.0254, + "step": 1485, + "train/speech_entropy": 4.058298520688657, + "train/text_entropy": 1.0127651099871873, + "train/token_acc": 0.3016344725111441 + }, + { + "epoch": 0.21998519615099926, + "grad_norm": 8.473690032958984, + "learning_rate": 2.470904744987401e-06, + "loss": 0.3291, + "step": 1486, + "train/speech_entropy": 3.2680331880012443, + "train/text_entropy": 0.31363469716664905, + "train/token_acc": 0.36964078794901506 + }, + { + "epoch": 0.22013323464100668, + "grad_norm": 18.948402404785156, + "learning_rate": 2.465519674030133e-06, + "loss": 1.8203, + "step": 1487, + "train/speech_entropy": 4.111263678842152, + "train/text_entropy": 1.8733178216057855, + "train/token_acc": 0.24074074074074073 + }, + { + "epoch": 0.22028127313101406, + "grad_norm": 19.91520118713379, + "learning_rate": 2.460142560643767e-06, + "loss": 1.6445, + "step": 1488, + "train/speech_entropy": 4.3562341575731445, + "train/text_entropy": 1.3927349301438834, + "train/token_acc": 0.228 + }, + { + "epoch": 0.22042931162102147, + "grad_norm": 16.14565658569336, + "learning_rate": 2.4547734189291516e-06, + "loss": 1.2183, + "step": 1489, + "train/speech_entropy": 3.947572738405258, + "train/text_entropy": 1.5058876673380535, + "train/token_acc": 0.26929392446633826 + }, + { + "epoch": 0.22057735011102886, + "grad_norm": 21.91966438293457, + "learning_rate": 2.4494122629662355e-06, + "loss": 2.5312, + "step": 1490, + "train/speech_entropy": 3.938143908235542, + "train/text_entropy": 2.370159467061361, + "train/token_acc": 0.2763578274760383 + }, + { + "epoch": 0.22072538860103627, + "grad_norm": 20.58660125732422, + "learning_rate": 2.4440591068140276e-06, + "loss": 1.2891, + "step": 1491, + "train/speech_entropy": 4.068802894257757, + "train/text_entropy": 1.61784452124487, + "train/token_acc": 0.26936416184971096 + }, + { + "epoch": 0.22087342709104368, + "grad_norm": 16.586444854736328, + "learning_rate": 2.4387139645105545e-06, + "loss": 0.7256, + "step": 1492, + "train/speech_entropy": 3.707657590825507, + "train/text_entropy": 0.6814641428517771, + "train/token_acc": 0.33083511777301927 + }, + { + "epoch": 0.22102146558105107, + "grad_norm": 13.29111099243164, + "learning_rate": 2.433376850072828e-06, + "loss": 0.918, + "step": 1493, + "train/speech_entropy": 3.63867071400518, + "train/text_entropy": 0.787747642208386, + "train/token_acc": 0.32233223322332233 + }, + { + "epoch": 0.22116950407105848, + "grad_norm": 12.320931434631348, + "learning_rate": 2.4280477774968084e-06, + "loss": 0.8867, + "step": 1494, + "train/speech_entropy": 4.067823876564278, + "train/text_entropy": 1.0393962860107422, + "train/token_acc": 0.30578512396694213 + }, + { + "epoch": 0.22131754256106587, + "grad_norm": 12.86713695526123, + "learning_rate": 2.422726760757369e-06, + "loss": 0.7686, + "step": 1495, + "train/speech_entropy": 4.0941568359375, + "train/text_entropy": 0.6480921281350626, + "train/token_acc": 0.31565329883570503 + }, + { + "epoch": 0.22146558105107328, + "grad_norm": 14.45120620727539, + "learning_rate": 2.417413813808255e-06, + "loss": 0.9902, + "step": 1496, + "train/speech_entropy": 4.058991924085115, + "train/text_entropy": 0.9414974011872944, + "train/token_acc": 0.2738990332975295 + }, + { + "epoch": 0.2216136195410807, + "grad_norm": 18.431215286254883, + "learning_rate": 2.412108950582049e-06, + "loss": 1.3223, + "step": 1497, + "train/speech_entropy": 3.8689951502290882, + "train/text_entropy": 1.6720578408654714, + "train/token_acc": 0.27909371781668385 + }, + { + "epoch": 0.22176165803108808, + "grad_norm": 16.6649169921875, + "learning_rate": 2.406812184990139e-06, + "loss": 1.1709, + "step": 1498, + "train/speech_entropy": 3.732770477096326, + "train/text_entropy": 0.6432662300441576, + "train/token_acc": 0.3385416666666667 + }, + { + "epoch": 0.2219096965210955, + "grad_norm": 13.73571491241455, + "learning_rate": 2.401523530922674e-06, + "loss": 0.7803, + "step": 1499, + "train/speech_entropy": 3.771318956435825, + "train/text_entropy": 0.8295231767602869, + "train/token_acc": 0.2994350282485876 + }, + { + "epoch": 0.22205773501110287, + "grad_norm": 16.996341705322266, + "learning_rate": 2.396243002248531e-06, + "loss": 1.1738, + "step": 1500, + "train/speech_entropy": 4.304650278079982, + "train/text_entropy": 1.5068484684608503, + "train/token_acc": 0.26778656126482214 + }, + { + "epoch": 0.2222057735011103, + "grad_norm": 21.35678482055664, + "learning_rate": 2.3909706128152842e-06, + "loss": 1.2637, + "step": 1501, + "train/speech_entropy": 3.982683976094919, + "train/text_entropy": 0.9369538047096946, + "train/token_acc": 0.318 + }, + { + "epoch": 0.2223538119911177, + "grad_norm": 18.201953887939453, + "learning_rate": 2.3857063764491584e-06, + "loss": 1.9219, + "step": 1502, + "train/speech_entropy": 4.251862397065034, + "train/text_entropy": 1.7139110020228794, + "train/token_acc": 0.2586520947176685 + }, + { + "epoch": 0.22250185048112509, + "grad_norm": 20.06148338317871, + "learning_rate": 2.3804503069549986e-06, + "loss": 1.3906, + "step": 1503, + "train/speech_entropy": 4.242465700803103, + "train/text_entropy": 1.5019767628928828, + "train/token_acc": 0.2725225225225225 + }, + { + "epoch": 0.2226498889711325, + "grad_norm": 18.222990036010742, + "learning_rate": 2.3752024181162386e-06, + "loss": 0.8457, + "step": 1504, + "train/speech_entropy": 3.91133735360588, + "train/text_entropy": 0.7375132242838541, + "train/token_acc": 0.275 + }, + { + "epoch": 0.22279792746113988, + "grad_norm": 17.959049224853516, + "learning_rate": 2.3699627236948492e-06, + "loss": 1.3242, + "step": 1505, + "train/speech_entropy": 4.202299778078167, + "train/text_entropy": 1.2223791863508284, + "train/token_acc": 0.2671156004489338 + }, + { + "epoch": 0.2229459659511473, + "grad_norm": 9.420804023742676, + "learning_rate": 2.364731237431321e-06, + "loss": 0.4277, + "step": 1506, + "train/speech_entropy": 3.8446227116371268, + "train/text_entropy": 0.43744169728139815, + "train/token_acc": 0.307103825136612 + }, + { + "epoch": 0.2230940044411547, + "grad_norm": 13.151285171508789, + "learning_rate": 2.35950797304462e-06, + "loss": 0.9297, + "step": 1507, + "train/speech_entropy": 4.038939178967085, + "train/text_entropy": 0.6756831295085404, + "train/token_acc": 0.2777777777777778 + }, + { + "epoch": 0.2232420429311621, + "grad_norm": 15.865554809570312, + "learning_rate": 2.354292944232142e-06, + "loss": 0.8965, + "step": 1508, + "train/speech_entropy": 4.290640041204868, + "train/text_entropy": 0.9340219128039456, + "train/token_acc": 0.2869822485207101 + }, + { + "epoch": 0.2233900814211695, + "grad_norm": 16.518779754638672, + "learning_rate": 2.349086164669695e-06, + "loss": 1.2676, + "step": 1509, + "train/speech_entropy": 4.227506138654764, + "train/text_entropy": 1.5008600223699264, + "train/token_acc": 0.25 + }, + { + "epoch": 0.22353811991117692, + "grad_norm": 18.37651824951172, + "learning_rate": 2.343887648011455e-06, + "loss": 1.3516, + "step": 1510, + "train/speech_entropy": 4.386088404162177, + "train/text_entropy": 1.3057564307903422, + "train/token_acc": 0.2786206896551724 + }, + { + "epoch": 0.2236861584011843, + "grad_norm": 13.155543327331543, + "learning_rate": 2.33869740788992e-06, + "loss": 1.4805, + "step": 1511, + "train/speech_entropy": 4.344569887907609, + "train/text_entropy": 1.7796317968475686, + "train/token_acc": 0.2575864502470007 + }, + { + "epoch": 0.22383419689119172, + "grad_norm": 16.923173904418945, + "learning_rate": 2.333515457915891e-06, + "loss": 1.0371, + "step": 1512, + "train/speech_entropy": 4.05668664827316, + "train/text_entropy": 1.2078511314670535, + "train/token_acc": 0.3033112582781457 + }, + { + "epoch": 0.2239822353811991, + "grad_norm": 16.403308868408203, + "learning_rate": 2.3283418116784327e-06, + "loss": 1.3047, + "step": 1513, + "train/speech_entropy": 4.261810920499115, + "train/text_entropy": 1.0220205245479461, + "train/token_acc": 0.290453074433657 + }, + { + "epoch": 0.22413027387120651, + "grad_norm": 11.929006576538086, + "learning_rate": 2.323176482744822e-06, + "loss": 1.0215, + "step": 1514, + "train/speech_entropy": 4.346049086402093, + "train/text_entropy": 1.4436550546199718, + "train/token_acc": 0.2685897435897436 + }, + { + "epoch": 0.22427831236121393, + "grad_norm": 14.741454124450684, + "learning_rate": 2.3180194846605367e-06, + "loss": 1.125, + "step": 1515, + "train/speech_entropy": 4.392370768955776, + "train/text_entropy": 1.2157853799422986, + "train/token_acc": 0.24519670631290028 + }, + { + "epoch": 0.2244263508512213, + "grad_norm": 13.707925796508789, + "learning_rate": 2.3128708309492014e-06, + "loss": 0.8242, + "step": 1516, + "train/speech_entropy": 3.8612731262877746, + "train/text_entropy": 1.2163203794564774, + "train/token_acc": 0.3161764705882353 + }, + { + "epoch": 0.22457438934122873, + "grad_norm": 10.628701210021973, + "learning_rate": 2.3077305351125595e-06, + "loss": 0.8403, + "step": 1517, + "train/speech_entropy": 4.0829539905894885, + "train/text_entropy": 0.9204007120965754, + "train/token_acc": 0.27532228360957645 + }, + { + "epoch": 0.2247224278312361, + "grad_norm": 65.99266815185547, + "learning_rate": 2.30259861063044e-06, + "loss": 1.4805, + "step": 1518, + "train/speech_entropy": 4.39719630350993, + "train/text_entropy": 1.833910337769159, + "train/token_acc": 0.21929824561403508 + }, + { + "epoch": 0.22487046632124352, + "grad_norm": 67.07527160644531, + "learning_rate": 2.2974750709607145e-06, + "loss": 2.0898, + "step": 1519, + "train/speech_entropy": 4.251038869222005, + "train/text_entropy": 1.9352057017977276, + "train/token_acc": 0.25333333333333335 + }, + { + "epoch": 0.22501850481125094, + "grad_norm": 13.792023658752441, + "learning_rate": 2.2923599295392683e-06, + "loss": 1.4844, + "step": 1520, + "train/speech_entropy": 4.5165275390625, + "train/text_entropy": 1.4895669260332662, + "train/token_acc": 0.24432576769025366 + }, + { + "epoch": 0.22516654330125832, + "grad_norm": 15.793828010559082, + "learning_rate": 2.287253199779965e-06, + "loss": 0.875, + "step": 1521, + "train/speech_entropy": 4.555723574316244, + "train/text_entropy": 1.1077829599380493, + "train/token_acc": 0.24828532235939643 + }, + { + "epoch": 0.22531458179126573, + "grad_norm": 12.224534034729004, + "learning_rate": 2.282154895074608e-06, + "loss": 1.0156, + "step": 1522, + "train/speech_entropy": 4.0041702835648145, + "train/text_entropy": 0.9150011264360868, + "train/token_acc": 0.2888117953165655 + }, + { + "epoch": 0.22546262028127312, + "grad_norm": 15.760653495788574, + "learning_rate": 2.277065028792905e-06, + "loss": 0.6963, + "step": 1523, + "train/speech_entropy": 3.782376197076613, + "train/text_entropy": 0.6809687648259156, + "train/token_acc": 0.33287671232876714 + }, + { + "epoch": 0.22561065877128053, + "grad_norm": 16.522926330566406, + "learning_rate": 2.271983614282439e-06, + "loss": 1.1191, + "step": 1524, + "train/speech_entropy": 4.286551556981973, + "train/text_entropy": 1.5224505615234376, + "train/token_acc": 0.2513721185510428 + }, + { + "epoch": 0.22575869726128794, + "grad_norm": 11.42010498046875, + "learning_rate": 2.2669106648686268e-06, + "loss": 0.9824, + "step": 1525, + "train/speech_entropy": 4.022349061129386, + "train/text_entropy": 0.8837605526572779, + "train/token_acc": 0.2760233918128655 + }, + { + "epoch": 0.22590673575129533, + "grad_norm": 17.98077392578125, + "learning_rate": 2.2618461938546866e-06, + "loss": 1.0215, + "step": 1526, + "train/speech_entropy": 4.089925173773755, + "train/text_entropy": 0.9790495746540573, + "train/token_acc": 0.2757417102966841 + }, + { + "epoch": 0.22605477424130274, + "grad_norm": 15.694536209106445, + "learning_rate": 2.256790214521601e-06, + "loss": 0.9023, + "step": 1527, + "train/speech_entropy": 3.8342612257281554, + "train/text_entropy": 1.0396361275324746, + "train/token_acc": 0.31201248049922 + }, + { + "epoch": 0.22620281273131015, + "grad_norm": 9.45860481262207, + "learning_rate": 2.2517427401280913e-06, + "loss": 0.6768, + "step": 1528, + "train/speech_entropy": 4.022090744344024, + "train/text_entropy": 0.8422912289743466, + "train/token_acc": 0.32941176470588235 + }, + { + "epoch": 0.22635085122131754, + "grad_norm": 8.264986991882324, + "learning_rate": 2.246703783910567e-06, + "loss": 0.5234, + "step": 1529, + "train/speech_entropy": 3.8270099803167557, + "train/text_entropy": 0.47425944850129903, + "train/token_acc": 0.3008064516129032 + }, + { + "epoch": 0.22649888971132495, + "grad_norm": 14.745135307312012, + "learning_rate": 2.241673359083103e-06, + "loss": 1.3086, + "step": 1530, + "train/speech_entropy": 4.3212857027666285, + "train/text_entropy": 1.4159460390247585, + "train/token_acc": 0.2668350168350168 + }, + { + "epoch": 0.22664692820133234, + "grad_norm": 11.084878921508789, + "learning_rate": 2.236651478837405e-06, + "loss": 0.7439, + "step": 1531, + "train/speech_entropy": 3.993541322905442, + "train/text_entropy": 0.8904296993591625, + "train/token_acc": 0.28059701492537314 + }, + { + "epoch": 0.22679496669133975, + "grad_norm": 9.279070854187012, + "learning_rate": 2.231638156342766e-06, + "loss": 0.4961, + "step": 1532, + "train/speech_entropy": 3.6286666812968167, + "train/text_entropy": 0.38283122579256695, + "train/token_acc": 0.3333333333333333 + }, + { + "epoch": 0.22694300518134716, + "grad_norm": 17.732961654663086, + "learning_rate": 2.226633404746039e-06, + "loss": 1.5703, + "step": 1533, + "train/speech_entropy": 4.441265091162943, + "train/text_entropy": 1.589088841488487, + "train/token_acc": 0.2649484536082474 + }, + { + "epoch": 0.22709104367135455, + "grad_norm": 15.831135749816895, + "learning_rate": 2.2216372371716054e-06, + "loss": 1.8242, + "step": 1534, + "train/speech_entropy": 4.269140888461106, + "train/text_entropy": 1.7307215205958633, + "train/token_acc": 0.24631268436578171 + }, + { + "epoch": 0.22723908216136196, + "grad_norm": 18.521488189697266, + "learning_rate": 2.216649666721329e-06, + "loss": 0.9629, + "step": 1535, + "train/speech_entropy": 3.6611017794222445, + "train/text_entropy": 1.2904937285229676, + "train/token_acc": 0.3020689655172414 + }, + { + "epoch": 0.22738712065136935, + "grad_norm": 17.386598587036133, + "learning_rate": 2.2116707064745335e-06, + "loss": 1.082, + "step": 1536, + "train/speech_entropy": 4.004635351971103, + "train/text_entropy": 1.1039156679247246, + "train/token_acc": 0.30833333333333335 + }, + { + "epoch": 0.22753515914137676, + "grad_norm": 15.559500694274902, + "learning_rate": 2.206700369487961e-06, + "loss": 1.3789, + "step": 1537, + "train/speech_entropy": 4.261806092411988, + "train/text_entropy": 1.3409664619259718, + "train/token_acc": 0.24044585987261147 + }, + { + "epoch": 0.22768319763138417, + "grad_norm": 10.07961654663086, + "learning_rate": 2.201738668795739e-06, + "loss": 0.686, + "step": 1538, + "train/speech_entropy": 3.9343707898898783, + "train/text_entropy": 0.6583933629909483, + "train/token_acc": 0.2996168582375479 + }, + { + "epoch": 0.22783123612139156, + "grad_norm": 11.644322395324707, + "learning_rate": 2.196785617409352e-06, + "loss": 1.2852, + "step": 1539, + "train/speech_entropy": 4.750874726410506, + "train/text_entropy": 1.358434326171875, + "train/token_acc": 0.28012048192771083 + }, + { + "epoch": 0.22797927461139897, + "grad_norm": 16.433427810668945, + "learning_rate": 2.1918412283175996e-06, + "loss": 1.1113, + "step": 1540, + "train/speech_entropy": 4.200482858193888, + "train/text_entropy": 1.2155937222770719, + "train/token_acc": 0.26630061770761837 + }, + { + "epoch": 0.22812731310140635, + "grad_norm": 9.021756172180176, + "learning_rate": 2.186905514486563e-06, + "loss": 0.4697, + "step": 1541, + "train/speech_entropy": 3.8986252632556986, + "train/text_entropy": 0.5505546859548062, + "train/token_acc": 0.32985553772070625 + }, + { + "epoch": 0.22827535159141377, + "grad_norm": 14.20545482635498, + "learning_rate": 2.1819784888595804e-06, + "loss": 1.2734, + "step": 1542, + "train/speech_entropy": 4.740897806424562, + "train/text_entropy": 1.257140519554049, + "train/token_acc": 0.2863247863247863 + }, + { + "epoch": 0.22842339008142118, + "grad_norm": 11.632659912109375, + "learning_rate": 2.177060164357199e-06, + "loss": 1.0215, + "step": 1543, + "train/speech_entropy": 4.418716239377411, + "train/text_entropy": 1.020039049018339, + "train/token_acc": 0.2713841368584759 + }, + { + "epoch": 0.22857142857142856, + "grad_norm": 16.214168548583984, + "learning_rate": 2.1721505538771516e-06, + "loss": 1.1172, + "step": 1544, + "train/speech_entropy": 3.9673095521742665, + "train/text_entropy": 1.0469997521602745, + "train/token_acc": 0.29952267303102625 + }, + { + "epoch": 0.22871946706143598, + "grad_norm": 10.483433723449707, + "learning_rate": 2.1672496702943212e-06, + "loss": 0.9316, + "step": 1545, + "train/speech_entropy": 4.145333196547859, + "train/text_entropy": 0.9754501827179439, + "train/token_acc": 0.30275229357798167 + }, + { + "epoch": 0.22886750555144336, + "grad_norm": 12.044052124023438, + "learning_rate": 2.1623575264607035e-06, + "loss": 0.5781, + "step": 1546, + "train/speech_entropy": 3.9450732360916043, + "train/text_entropy": 0.3866807089911567, + "train/token_acc": 0.32432432432432434 + }, + { + "epoch": 0.22901554404145077, + "grad_norm": 10.317663192749023, + "learning_rate": 2.1574741352053724e-06, + "loss": 0.6436, + "step": 1547, + "train/speech_entropy": 3.918803943887395, + "train/text_entropy": 0.7038318579145473, + "train/token_acc": 0.3370452858203415 + }, + { + "epoch": 0.2291635825314582, + "grad_norm": 11.319279670715332, + "learning_rate": 2.152599509334457e-06, + "loss": 0.8037, + "step": 1548, + "train/speech_entropy": 4.443210268980705, + "train/text_entropy": 0.852786907933867, + "train/token_acc": 0.30606860158311344 + }, + { + "epoch": 0.22931162102146557, + "grad_norm": 16.677093505859375, + "learning_rate": 2.14773366163109e-06, + "loss": 1.3711, + "step": 1549, + "train/speech_entropy": 4.103946897992804, + "train/text_entropy": 1.1280586543341575, + "train/token_acc": 0.29815573770491804 + }, + { + "epoch": 0.22945965951147299, + "grad_norm": 15.145459175109863, + "learning_rate": 2.142876604855393e-06, + "loss": 1.0723, + "step": 1550, + "train/speech_entropy": 4.271517739120119, + "train/text_entropy": 0.971384535274745, + "train/token_acc": 0.3290267011197244 + }, + { + "epoch": 0.2296076980014804, + "grad_norm": 20.678861618041992, + "learning_rate": 2.138028351744433e-06, + "loss": 1.9844, + "step": 1551, + "train/speech_entropy": 4.413517853829981, + "train/text_entropy": 1.6629939742152466, + "train/token_acc": 0.21985294117647058 + }, + { + "epoch": 0.22975573649148778, + "grad_norm": 15.538779258728027, + "learning_rate": 2.1331889150121828e-06, + "loss": 1.416, + "step": 1552, + "train/speech_entropy": 4.417125629381535, + "train/text_entropy": 1.6679157013588763, + "train/token_acc": 0.2463092463092463 + }, + { + "epoch": 0.2299037749814952, + "grad_norm": 16.48090362548828, + "learning_rate": 2.128358307349505e-06, + "loss": 0.9668, + "step": 1553, + "train/speech_entropy": 4.143980818796376, + "train/text_entropy": 0.8618297765750697, + "train/token_acc": 0.29684601113172543 + }, + { + "epoch": 0.23005181347150258, + "grad_norm": 14.573893547058105, + "learning_rate": 2.1235365414241072e-06, + "loss": 0.8584, + "step": 1554, + "train/speech_entropy": 3.7192336686066128, + "train/text_entropy": 0.7770775428185096, + "train/token_acc": 0.3333333333333333 + }, + { + "epoch": 0.23019985196151, + "grad_norm": 17.36809539794922, + "learning_rate": 2.1187236298805024e-06, + "loss": 1.3242, + "step": 1555, + "train/speech_entropy": 3.9402829585767947, + "train/text_entropy": 1.2887350847931947, + "train/token_acc": 0.2828418230563003 + }, + { + "epoch": 0.2303478904515174, + "grad_norm": 14.206912994384766, + "learning_rate": 2.113919585339994e-06, + "loss": 0.9785, + "step": 1556, + "train/speech_entropy": 4.442562349380985, + "train/text_entropy": 1.053693704937228, + "train/token_acc": 0.2548197820620285 + }, + { + "epoch": 0.2304959289415248, + "grad_norm": 15.92665958404541, + "learning_rate": 2.10912442040063e-06, + "loss": 1.3164, + "step": 1557, + "train/speech_entropy": 4.292536227318548, + "train/text_entropy": 1.337447738647461, + "train/token_acc": 0.28205128205128205 + }, + { + "epoch": 0.2306439674315322, + "grad_norm": 16.841650009155273, + "learning_rate": 2.1043381476371673e-06, + "loss": 1.3555, + "step": 1558, + "train/speech_entropy": 4.245427632503373, + "train/text_entropy": 1.1611160909322868, + "train/token_acc": 0.29027576197387517 + }, + { + "epoch": 0.2307920059215396, + "grad_norm": 14.415751457214355, + "learning_rate": 2.0995607796010515e-06, + "loss": 0.5864, + "step": 1559, + "train/speech_entropy": 3.739605676560175, + "train/text_entropy": 0.5924271065116729, + "train/token_acc": 0.30450669914738127 + }, + { + "epoch": 0.230940044411547, + "grad_norm": 14.915228843688965, + "learning_rate": 2.0947923288203725e-06, + "loss": 1.1172, + "step": 1560, + "train/speech_entropy": 4.522024801340652, + "train/text_entropy": 1.1065932812333918, + "train/token_acc": 0.23443983402489627 + }, + { + "epoch": 0.23108808290155441, + "grad_norm": 6.423327445983887, + "learning_rate": 2.090032807799835e-06, + "loss": 0.2046, + "step": 1561, + "train/speech_entropy": 3.584843151200833, + "train/text_entropy": 0.3217246578585717, + "train/token_acc": 0.3523447401774398 + }, + { + "epoch": 0.2312361213915618, + "grad_norm": 12.222740173339844, + "learning_rate": 2.0852822290207307e-06, + "loss": 1.1523, + "step": 1562, + "train/speech_entropy": 4.133234377887103, + "train/text_entropy": 1.139623666111427, + "train/token_acc": 0.29478729778310364 + }, + { + "epoch": 0.2313841598815692, + "grad_norm": 15.001111030578613, + "learning_rate": 2.080540604940898e-06, + "loss": 0.6914, + "step": 1563, + "train/speech_entropy": 3.590529082153511, + "train/text_entropy": 0.616215025583903, + "train/token_acc": 0.3193916349809886 + }, + { + "epoch": 0.2315321983715766, + "grad_norm": 12.194300651550293, + "learning_rate": 2.07580794799469e-06, + "loss": 0.4336, + "step": 1564, + "train/speech_entropy": 3.5174588787021923, + "train/text_entropy": 0.3569911810067984, + "train/token_acc": 0.3365617433414044 + }, + { + "epoch": 0.231680236861584, + "grad_norm": 14.297590255737305, + "learning_rate": 2.0710842705929526e-06, + "loss": 1.2402, + "step": 1565, + "train/speech_entropy": 4.233149044083255, + "train/text_entropy": 1.262975758519666, + "train/token_acc": 0.25595695618754805 + }, + { + "epoch": 0.23182827535159142, + "grad_norm": 20.61637306213379, + "learning_rate": 2.0663695851229764e-06, + "loss": 1.1758, + "step": 1566, + "train/speech_entropy": 3.900299667730564, + "train/text_entropy": 1.2558532636992785, + "train/token_acc": 0.2854330708661417 + }, + { + "epoch": 0.2319763138415988, + "grad_norm": 12.836311340332031, + "learning_rate": 2.061663903948474e-06, + "loss": 1.1621, + "step": 1567, + "train/speech_entropy": 4.290305290919755, + "train/text_entropy": 1.185601534418097, + "train/token_acc": 0.24680511182108625 + }, + { + "epoch": 0.23212435233160622, + "grad_norm": 10.583239555358887, + "learning_rate": 2.0569672394095474e-06, + "loss": 0.7373, + "step": 1568, + "train/speech_entropy": 4.0905747215346535, + "train/text_entropy": 0.8381342104343181, + "train/token_acc": 0.2820932134096484 + }, + { + "epoch": 0.2322723908216136, + "grad_norm": 14.54407787322998, + "learning_rate": 2.0522796038226516e-06, + "loss": 1.0244, + "step": 1569, + "train/speech_entropy": 4.228793325148211, + "train/text_entropy": 1.110169924222506, + "train/token_acc": 0.2802056555269923 + }, + { + "epoch": 0.23242042931162102, + "grad_norm": 18.26278305053711, + "learning_rate": 2.0476010094805636e-06, + "loss": 0.877, + "step": 1570, + "train/speech_entropy": 4.099090208490211, + "train/text_entropy": 0.9268665313720703, + "train/token_acc": 0.2807308970099668 + }, + { + "epoch": 0.23256846780162843, + "grad_norm": 14.716747283935547, + "learning_rate": 2.042931468652351e-06, + "loss": 1.4219, + "step": 1571, + "train/speech_entropy": 4.152788665868571, + "train/text_entropy": 1.297374814888681, + "train/token_acc": 0.263840830449827 + }, + { + "epoch": 0.23271650629163582, + "grad_norm": 14.552248001098633, + "learning_rate": 2.0382709935833424e-06, + "loss": 1.4062, + "step": 1572, + "train/speech_entropy": 4.607101947928215, + "train/text_entropy": 1.4170002885298296, + "train/token_acc": 0.23894348894348894 + }, + { + "epoch": 0.23286454478164323, + "grad_norm": 13.761801719665527, + "learning_rate": 2.0336195964950885e-06, + "loss": 1.0586, + "step": 1573, + "train/speech_entropy": 4.338481691504727, + "train/text_entropy": 0.9022605582459332, + "train/token_acc": 0.2773279352226721 + }, + { + "epoch": 0.23301258327165064, + "grad_norm": 13.885335922241211, + "learning_rate": 2.0289772895853354e-06, + "loss": 1.0215, + "step": 1574, + "train/speech_entropy": 4.182593319394173, + "train/text_entropy": 1.3458285204229508, + "train/token_acc": 0.2738432483474976 + }, + { + "epoch": 0.23316062176165803, + "grad_norm": 13.28445053100586, + "learning_rate": 2.024344085027995e-06, + "loss": 0.9922, + "step": 1575, + "train/speech_entropy": 4.086963674686808, + "train/text_entropy": 0.9215036688003948, + "train/token_acc": 0.2743119266055046 + }, + { + "epoch": 0.23330866025166544, + "grad_norm": 14.00797176361084, + "learning_rate": 2.019719994973103e-06, + "loss": 1.1367, + "step": 1576, + "train/speech_entropy": 4.588725689760189, + "train/text_entropy": 1.6547642047588642, + "train/token_acc": 0.2554067971163749 + }, + { + "epoch": 0.23345669874167282, + "grad_norm": 18.75572395324707, + "learning_rate": 2.0151050315467967e-06, + "loss": 1.1641, + "step": 1577, + "train/speech_entropy": 4.466705054902612, + "train/text_entropy": 1.2120800018310547, + "train/token_acc": 0.25651720542231493 + }, + { + "epoch": 0.23360473723168024, + "grad_norm": 18.629844665527344, + "learning_rate": 2.0104992068512824e-06, + "loss": 1.3672, + "step": 1578, + "train/speech_entropy": 4.416608838958213, + "train/text_entropy": 1.255531223842076, + "train/token_acc": 0.24233716475095785 + }, + { + "epoch": 0.23375277572168765, + "grad_norm": 14.141447067260742, + "learning_rate": 2.005902532964796e-06, + "loss": 0.9746, + "step": 1579, + "train/speech_entropy": 3.9321396246189027, + "train/text_entropy": 1.1567852454271146, + "train/token_acc": 0.2998986828774063 + }, + { + "epoch": 0.23390081421169504, + "grad_norm": 11.982250213623047, + "learning_rate": 2.001315021941578e-06, + "loss": 0.7598, + "step": 1580, + "train/speech_entropy": 4.055748189450059, + "train/text_entropy": 1.1225444062115395, + "train/token_acc": 0.3065134099616858 + }, + { + "epoch": 0.23404885270170245, + "grad_norm": 15.231525421142578, + "learning_rate": 1.9967366858118454e-06, + "loss": 0.7383, + "step": 1581, + "train/speech_entropy": 4.1925624202520595, + "train/text_entropy": 0.5451783277317436, + "train/token_acc": 0.30982658959537573 + }, + { + "epoch": 0.23419689119170983, + "grad_norm": 14.507062911987305, + "learning_rate": 1.9921675365817458e-06, + "loss": 0.8809, + "step": 1582, + "train/speech_entropy": 3.88195235134062, + "train/text_entropy": 0.9258814021519253, + "train/token_acc": 0.3424170616113744 + }, + { + "epoch": 0.23434492968171725, + "grad_norm": 12.467716217041016, + "learning_rate": 1.9876075862333424e-06, + "loss": 1.3438, + "step": 1583, + "train/speech_entropy": 4.61570323052247, + "train/text_entropy": 1.2435812171236926, + "train/token_acc": 0.2631993695823483 + }, + { + "epoch": 0.23449296817172466, + "grad_norm": 21.065799713134766, + "learning_rate": 1.983056846724577e-06, + "loss": 1.8438, + "step": 1584, + "train/speech_entropy": 4.245418660780963, + "train/text_entropy": 1.897207260131836, + "train/token_acc": 0.29017857142857145 + }, + { + "epoch": 0.23464100666173204, + "grad_norm": 50.038841247558594, + "learning_rate": 1.9785153299892276e-06, + "loss": 3.0273, + "step": 1585, + "train/speech_entropy": 3.437796338111567, + "train/text_entropy": 2.115592892964681, + "train/token_acc": 0.2669039145907473 + }, + { + "epoch": 0.23478904515173946, + "grad_norm": 15.448467254638672, + "learning_rate": 1.9739830479368943e-06, + "loss": 1.293, + "step": 1586, + "train/speech_entropy": 4.301512359290995, + "train/text_entropy": 1.1231122646691665, + "train/token_acc": 0.26430722891566266 + }, + { + "epoch": 0.23493708364174684, + "grad_norm": 16.263084411621094, + "learning_rate": 1.969460012452963e-06, + "loss": 1.0059, + "step": 1587, + "train/speech_entropy": 4.204330733918796, + "train/text_entropy": 1.4810409340807187, + "train/token_acc": 0.3019517795637199 + }, + { + "epoch": 0.23508512213175425, + "grad_norm": 17.457124710083008, + "learning_rate": 1.964946235398559e-06, + "loss": 1.2852, + "step": 1588, + "train/speech_entropy": 3.9197682399375764, + "train/text_entropy": 1.2405042012532552, + "train/token_acc": 0.2505175983436853 + }, + { + "epoch": 0.23523316062176167, + "grad_norm": 19.44792366027832, + "learning_rate": 1.960441728610542e-06, + "loss": 1.3438, + "step": 1589, + "train/speech_entropy": 4.030357211243873, + "train/text_entropy": 1.41114037555197, + "train/token_acc": 0.2485207100591716 + }, + { + "epoch": 0.23538119911176905, + "grad_norm": 14.925347328186035, + "learning_rate": 1.9559465039014565e-06, + "loss": 1.127, + "step": 1590, + "train/speech_entropy": 3.9243137272259663, + "train/text_entropy": 1.0912242639260215, + "train/token_acc": 0.2678018575851393 + }, + { + "epoch": 0.23552923760177646, + "grad_norm": 20.280122756958008, + "learning_rate": 1.9514605730595006e-06, + "loss": 1.2578, + "step": 1591, + "train/speech_entropy": 3.7481080537683824, + "train/text_entropy": 1.210060790344909, + "train/token_acc": 0.26356589147286824 + }, + { + "epoch": 0.23567727609178388, + "grad_norm": 16.829683303833008, + "learning_rate": 1.9469839478485085e-06, + "loss": 1.5391, + "step": 1592, + "train/speech_entropy": 4.23396743408735, + "train/text_entropy": 1.5236045116215795, + "train/token_acc": 0.2609472743521001 + }, + { + "epoch": 0.23582531458179126, + "grad_norm": 9.045679092407227, + "learning_rate": 1.9425166400079077e-06, + "loss": 0.5889, + "step": 1593, + "train/speech_entropy": 4.301808911700581, + "train/text_entropy": 0.721630350748698, + "train/token_acc": 0.2824858757062147 + }, + { + "epoch": 0.23597335307179867, + "grad_norm": 20.773651123046875, + "learning_rate": 1.9380586612526907e-06, + "loss": 1.0547, + "step": 1594, + "train/speech_entropy": 4.273385212233293, + "train/text_entropy": 1.3361568172482678, + "train/token_acc": 0.2717678100263852 + }, + { + "epoch": 0.23612139156180606, + "grad_norm": 11.834531784057617, + "learning_rate": 1.933610023273388e-06, + "loss": 0.8135, + "step": 1595, + "train/speech_entropy": 3.760068908761481, + "train/text_entropy": 0.8760913099561419, + "train/token_acc": 0.3132420091324201 + }, + { + "epoch": 0.23626943005181347, + "grad_norm": 15.526971817016602, + "learning_rate": 1.9291707377360354e-06, + "loss": 1.3516, + "step": 1596, + "train/speech_entropy": 4.287303259017619, + "train/text_entropy": 1.46575615115282, + "train/token_acc": 0.2611353711790393 + }, + { + "epoch": 0.23641746854182089, + "grad_norm": 12.52772045135498, + "learning_rate": 1.9247408162821376e-06, + "loss": 1.3867, + "step": 1597, + "train/speech_entropy": 4.146597787168745, + "train/text_entropy": 1.4453155820732875, + "train/token_acc": 0.277602523659306 + }, + { + "epoch": 0.23656550703182827, + "grad_norm": 19.592388153076172, + "learning_rate": 1.920320270528652e-06, + "loss": 1.5742, + "step": 1598, + "train/speech_entropy": 3.9283087148969367, + "train/text_entropy": 1.5077745944638796, + "train/token_acc": 0.26495726495726496 + }, + { + "epoch": 0.23671354552183568, + "grad_norm": 17.17169761657715, + "learning_rate": 1.9159091120679424e-06, + "loss": 1.6992, + "step": 1599, + "train/speech_entropy": 4.276133812677809, + "train/text_entropy": 1.8734559775584967, + "train/token_acc": 0.2754424778761062 + }, + { + "epoch": 0.23686158401184307, + "grad_norm": 13.314691543579102, + "learning_rate": 1.911507352467756e-06, + "loss": 1.1963, + "step": 1600, + "train/speech_entropy": 3.9631585618996015, + "train/text_entropy": 1.1563549657021799, + "train/token_acc": 0.28669201520912546 + }, + { + "epoch": 0.23700962250185048, + "grad_norm": 15.01711368560791, + "learning_rate": 1.9071150032711977e-06, + "loss": 0.623, + "step": 1601, + "train/speech_entropy": 3.728211893837833, + "train/text_entropy": 0.6595827952152541, + "train/token_acc": 0.3274760383386581 + }, + { + "epoch": 0.2371576609918579, + "grad_norm": 14.16907787322998, + "learning_rate": 1.9027320759966882e-06, + "loss": 0.6318, + "step": 1602, + "train/speech_entropy": 3.827817234165994, + "train/text_entropy": 0.9247648052140778, + "train/token_acc": 0.3061986557132188 + }, + { + "epoch": 0.23730569948186528, + "grad_norm": 13.191862106323242, + "learning_rate": 1.8983585821379455e-06, + "loss": 1.2227, + "step": 1603, + "train/speech_entropy": 4.5397609889722474, + "train/text_entropy": 1.2061344111731294, + "train/token_acc": 0.24283559577677225 + }, + { + "epoch": 0.2374537379718727, + "grad_norm": 14.84710693359375, + "learning_rate": 1.8939945331639447e-06, + "loss": 0.4277, + "step": 1604, + "train/speech_entropy": 3.49445153859036, + "train/text_entropy": 0.3444801356111254, + "train/token_acc": 0.31543624161073824 + }, + { + "epoch": 0.23760177646188008, + "grad_norm": 12.725722312927246, + "learning_rate": 1.8896399405188986e-06, + "loss": 1.0215, + "step": 1605, + "train/speech_entropy": 3.7342843925003457, + "train/text_entropy": 1.254476175268656, + "train/token_acc": 0.3170305676855895 + }, + { + "epoch": 0.2377498149518875, + "grad_norm": 15.891491889953613, + "learning_rate": 1.8852948156222172e-06, + "loss": 1.7539, + "step": 1606, + "train/speech_entropy": 4.866066839461936, + "train/text_entropy": 1.7346635762617677, + "train/token_acc": 0.227168073676132 + }, + { + "epoch": 0.2378978534418949, + "grad_norm": 14.992721557617188, + "learning_rate": 1.8809591698684827e-06, + "loss": 0.9023, + "step": 1607, + "train/speech_entropy": 3.7453132208259787, + "train/text_entropy": 0.8749801446192013, + "train/token_acc": 0.2904255319148936 + }, + { + "epoch": 0.2380458919319023, + "grad_norm": 13.578885078430176, + "learning_rate": 1.876633014627423e-06, + "loss": 0.6875, + "step": 1608, + "train/speech_entropy": 3.8950173214877886, + "train/text_entropy": 0.500052783800208, + "train/token_acc": 0.33705583756345175 + }, + { + "epoch": 0.2381939304219097, + "grad_norm": 20.13263702392578, + "learning_rate": 1.872316361243875e-06, + "loss": 0.9297, + "step": 1609, + "train/speech_entropy": 4.122094925744828, + "train/text_entropy": 1.2149069181052587, + "train/token_acc": 0.28735632183908044 + }, + { + "epoch": 0.23834196891191708, + "grad_norm": 17.361406326293945, + "learning_rate": 1.8680092210377593e-06, + "loss": 1.1973, + "step": 1610, + "train/speech_entropy": 3.6256740553927185, + "train/text_entropy": 0.9674799480135479, + "train/token_acc": 0.32293986636971045 + }, + { + "epoch": 0.2384900074019245, + "grad_norm": 12.730656623840332, + "learning_rate": 1.8637116053040466e-06, + "loss": 0.9141, + "step": 1611, + "train/speech_entropy": 4.057772730486425, + "train/text_entropy": 1.1459125302872568, + "train/token_acc": 0.2824601366742597 + }, + { + "epoch": 0.2386380458919319, + "grad_norm": 19.73232650756836, + "learning_rate": 1.8594235253127373e-06, + "loss": 1.127, + "step": 1612, + "train/speech_entropy": 3.8127410939449877, + "train/text_entropy": 0.7922002730831024, + "train/token_acc": 0.29517241379310344 + }, + { + "epoch": 0.2387860843819393, + "grad_norm": 8.41976261138916, + "learning_rate": 1.8551449923088182e-06, + "loss": 0.4072, + "step": 1613, + "train/speech_entropy": 3.781864097229052, + "train/text_entropy": 0.4180825049944857, + "train/token_acc": 0.34697732997481107 + }, + { + "epoch": 0.2389341228719467, + "grad_norm": 7.520501613616943, + "learning_rate": 1.8508760175122443e-06, + "loss": 0.1914, + "step": 1614, + "train/speech_entropy": 3.446167134793005, + "train/text_entropy": 0.27678513994403914, + "train/token_acc": 0.35471698113207545 + }, + { + "epoch": 0.23908216136195412, + "grad_norm": 15.496133804321289, + "learning_rate": 1.846616612117903e-06, + "loss": 1.0039, + "step": 1615, + "train/speech_entropy": 4.066624469067677, + "train/text_entropy": 1.1107446176034432, + "train/token_acc": 0.27784730913642053 + }, + { + "epoch": 0.2392301998519615, + "grad_norm": 14.987935066223145, + "learning_rate": 1.8423667872955915e-06, + "loss": 0.8613, + "step": 1616, + "train/speech_entropy": 3.9477731805098686, + "train/text_entropy": 0.7560048935905336, + "train/token_acc": 0.31225905936777176 + }, + { + "epoch": 0.23937823834196892, + "grad_norm": 13.957518577575684, + "learning_rate": 1.838126554189977e-06, + "loss": 0.9512, + "step": 1617, + "train/speech_entropy": 4.214340424408784, + "train/text_entropy": 1.1429618701600193, + "train/token_acc": 0.31222896790980054 + }, + { + "epoch": 0.2395262768319763, + "grad_norm": 12.234195709228516, + "learning_rate": 1.8338959239205773e-06, + "loss": 1.1973, + "step": 1618, + "train/speech_entropy": 4.632768349233438, + "train/text_entropy": 1.2627586884932085, + "train/token_acc": 0.2556118754525706 + }, + { + "epoch": 0.23967431532198372, + "grad_norm": 12.479501724243164, + "learning_rate": 1.8296749075817278e-06, + "loss": 0.8096, + "step": 1619, + "train/speech_entropy": 4.068468857726319, + "train/text_entropy": 0.6186914249342315, + "train/token_acc": 0.3145631067961165 + }, + { + "epoch": 0.23982235381199113, + "grad_norm": 19.405330657958984, + "learning_rate": 1.825463516242552e-06, + "loss": 2.3047, + "step": 1620, + "train/speech_entropy": 4.38218371150845, + "train/text_entropy": 2.2535585628326285, + "train/token_acc": 0.2638095238095238 + }, + { + "epoch": 0.23997039230199851, + "grad_norm": 14.933260917663574, + "learning_rate": 1.8212617609469301e-06, + "loss": 1.0371, + "step": 1621, + "train/speech_entropy": 4.111310165229885, + "train/text_entropy": 1.0916705805029572, + "train/token_acc": 0.26456542502387775 + }, + { + "epoch": 0.24011843079200593, + "grad_norm": 15.25728702545166, + "learning_rate": 1.8170696527134793e-06, + "loss": 0.7461, + "step": 1622, + "train/speech_entropy": 4.047432000688512, + "train/text_entropy": 0.9360486162387258, + "train/token_acc": 0.2857142857142857 + }, + { + "epoch": 0.2402664692820133, + "grad_norm": 6.341729640960693, + "learning_rate": 1.8128872025355124e-06, + "loss": 0.1968, + "step": 1623, + "train/speech_entropy": 3.6127336090686275, + "train/text_entropy": 0.27431942252225655, + "train/token_acc": 0.3622448979591837 + }, + { + "epoch": 0.24041450777202072, + "grad_norm": 13.135727882385254, + "learning_rate": 1.8087144213810169e-06, + "loss": 1.1934, + "step": 1624, + "train/speech_entropy": 4.246261677414021, + "train/text_entropy": 1.0754002538220635, + "train/token_acc": 0.2727272727272727 + }, + { + "epoch": 0.24056254626202814, + "grad_norm": 13.8828763961792, + "learning_rate": 1.8045513201926285e-06, + "loss": 1.2383, + "step": 1625, + "train/speech_entropy": 4.35544819322079, + "train/text_entropy": 1.2951076731962317, + "train/token_acc": 0.24960876369327073 + }, + { + "epoch": 0.24071058475203552, + "grad_norm": 18.56888771057129, + "learning_rate": 1.8003979098875896e-06, + "loss": 1.5312, + "step": 1626, + "train/speech_entropy": 3.969820639244596, + "train/text_entropy": 1.7374499349883108, + "train/token_acc": 0.28313253012048195 + }, + { + "epoch": 0.24085862324204294, + "grad_norm": 9.662891387939453, + "learning_rate": 1.7962542013577372e-06, + "loss": 0.7598, + "step": 1627, + "train/speech_entropy": 4.1510539615182855, + "train/text_entropy": 0.7699568353850266, + "train/token_acc": 0.31613508442776733 + }, + { + "epoch": 0.24100666173205032, + "grad_norm": 14.307636260986328, + "learning_rate": 1.7921202054694658e-06, + "loss": 1.2422, + "step": 1628, + "train/speech_entropy": 4.1221768798828125, + "train/text_entropy": 1.2605464117867606, + "train/token_acc": 0.27450980392156865 + }, + { + "epoch": 0.24115470022205773, + "grad_norm": 26.161041259765625, + "learning_rate": 1.7879959330636931e-06, + "loss": 2.1211, + "step": 1629, + "train/speech_entropy": 3.7593968197269994, + "train/text_entropy": 2.044903773181843, + "train/token_acc": 0.2857142857142857 + }, + { + "epoch": 0.24130273871206515, + "grad_norm": 16.444181442260742, + "learning_rate": 1.7838813949558454e-06, + "loss": 1.5117, + "step": 1630, + "train/speech_entropy": 4.584343521683304, + "train/text_entropy": 1.5884196557731272, + "train/token_acc": 0.24582560296846012 + }, + { + "epoch": 0.24145077720207253, + "grad_norm": 17.85723304748535, + "learning_rate": 1.7797766019358217e-06, + "loss": 1.6797, + "step": 1631, + "train/speech_entropy": 4.182787544668903, + "train/text_entropy": 1.5069601424617471, + "train/token_acc": 0.26857142857142857 + }, + { + "epoch": 0.24159881569207994, + "grad_norm": 17.861919403076172, + "learning_rate": 1.7756815647679576e-06, + "loss": 1.2773, + "step": 1632, + "train/speech_entropy": 3.847367481518817, + "train/text_entropy": 1.2782094923116394, + "train/token_acc": 0.2869822485207101 + }, + { + "epoch": 0.24174685418208736, + "grad_norm": 19.542705535888672, + "learning_rate": 1.7715962941910142e-06, + "loss": 1.8945, + "step": 1633, + "train/speech_entropy": 4.162098934775905, + "train/text_entropy": 1.8328693874633115, + "train/token_acc": 0.2605703048180924 + }, + { + "epoch": 0.24189489267209474, + "grad_norm": 16.18543243408203, + "learning_rate": 1.7675208009181383e-06, + "loss": 0.957, + "step": 1634, + "train/speech_entropy": 4.181941481370193, + "train/text_entropy": 0.8019779397429322, + "train/token_acc": 0.27249683143219267 + }, + { + "epoch": 0.24204293116210215, + "grad_norm": 19.35887908935547, + "learning_rate": 1.7634550956368324e-06, + "loss": 1.248, + "step": 1635, + "train/speech_entropy": 3.883109700804388, + "train/text_entropy": 1.3108660888671875, + "train/token_acc": 0.290414878397711 + }, + { + "epoch": 0.24219096965210954, + "grad_norm": 11.697325706481934, + "learning_rate": 1.7593991890089379e-06, + "loss": 1.0205, + "step": 1636, + "train/speech_entropy": 3.971991960392442, + "train/text_entropy": 0.9493226193367167, + "train/token_acc": 0.27442650807136787 + }, + { + "epoch": 0.24233900814211695, + "grad_norm": 7.845122814178467, + "learning_rate": 1.7553530916705953e-06, + "loss": 0.4307, + "step": 1637, + "train/speech_entropy": 3.7491884527839, + "train/text_entropy": 0.6244288810157022, + "train/token_acc": 0.32409972299168976 + }, + { + "epoch": 0.24248704663212436, + "grad_norm": 18.81888771057129, + "learning_rate": 1.7513168142322224e-06, + "loss": 1.9844, + "step": 1638, + "train/speech_entropy": 4.597348432606065, + "train/text_entropy": 1.7610985892159599, + "train/token_acc": 0.24022346368715083 + }, + { + "epoch": 0.24263508512213175, + "grad_norm": 14.764925956726074, + "learning_rate": 1.747290367278489e-06, + "loss": 1.2539, + "step": 1639, + "train/speech_entropy": 4.343661279449622, + "train/text_entropy": 1.5143117824522387, + "train/token_acc": 0.27401837928153716 + }, + { + "epoch": 0.24278312361213916, + "grad_norm": 19.7214298248291, + "learning_rate": 1.743273761368281e-06, + "loss": 1.2383, + "step": 1640, + "train/speech_entropy": 3.6240895188699556, + "train/text_entropy": 1.2867606811523438, + "train/token_acc": 0.2843601895734597 + }, + { + "epoch": 0.24293116210214655, + "grad_norm": 12.092062950134277, + "learning_rate": 1.7392670070346776e-06, + "loss": 1.0723, + "step": 1641, + "train/speech_entropy": 4.299985667366293, + "train/text_entropy": 1.170215695403343, + "train/token_acc": 0.281076801266825 + }, + { + "epoch": 0.24307920059215396, + "grad_norm": 13.997483253479004, + "learning_rate": 1.7352701147849284e-06, + "loss": 0.8652, + "step": 1642, + "train/speech_entropy": 4.239038021006483, + "train/text_entropy": 0.8154668634588068, + "train/token_acc": 0.2900763358778626 + }, + { + "epoch": 0.24322723908216137, + "grad_norm": 12.18002700805664, + "learning_rate": 1.7312830951004162e-06, + "loss": 1.0508, + "step": 1643, + "train/speech_entropy": 4.309754619018336, + "train/text_entropy": 1.1669499275615676, + "train/token_acc": 0.2703894195444526 + }, + { + "epoch": 0.24337527757216876, + "grad_norm": 15.82498550415039, + "learning_rate": 1.7273059584366334e-06, + "loss": 0.5605, + "step": 1644, + "train/speech_entropy": 3.9545566754245547, + "train/text_entropy": 0.5251364208403088, + "train/token_acc": 0.3285198555956679 + }, + { + "epoch": 0.24352331606217617, + "grad_norm": 18.338909149169922, + "learning_rate": 1.7233387152231597e-06, + "loss": 1.4922, + "step": 1645, + "train/speech_entropy": 4.1797381737504145, + "train/text_entropy": 1.1217448525799962, + "train/token_acc": 0.26384364820846906 + }, + { + "epoch": 0.24367135455218356, + "grad_norm": 13.777345657348633, + "learning_rate": 1.7193813758636268e-06, + "loss": 1.1914, + "step": 1646, + "train/speech_entropy": 4.031750579378498, + "train/text_entropy": 1.1379657592773438, + "train/token_acc": 0.2934947049924357 + }, + { + "epoch": 0.24381939304219097, + "grad_norm": 8.932965278625488, + "learning_rate": 1.7154339507356964e-06, + "loss": 0.5859, + "step": 1647, + "train/speech_entropy": 3.820668091556309, + "train/text_entropy": 0.6694347449711391, + "train/token_acc": 0.3296321998612075 + }, + { + "epoch": 0.24396743153219838, + "grad_norm": 16.9738712310791, + "learning_rate": 1.7114964501910289e-06, + "loss": 1.1094, + "step": 1648, + "train/speech_entropy": 4.362085292896323, + "train/text_entropy": 1.0251119277056526, + "train/token_acc": 0.2781124497991968 + }, + { + "epoch": 0.24411547002220577, + "grad_norm": 18.177047729492188, + "learning_rate": 1.7075688845552629e-06, + "loss": 1.6641, + "step": 1649, + "train/speech_entropy": 4.4174144329580525, + "train/text_entropy": 1.4488654795272575, + "train/token_acc": 0.24067164179104478 + }, + { + "epoch": 0.24426350851221318, + "grad_norm": 12.218692779541016, + "learning_rate": 1.7036512641279807e-06, + "loss": 0.6602, + "step": 1650, + "train/speech_entropy": 3.8336013720936752, + "train/text_entropy": 0.6900721602703459, + "train/token_acc": 0.3217391304347826 + }, + { + "epoch": 0.24441154700222056, + "grad_norm": 17.8107967376709, + "learning_rate": 1.6997435991826826e-06, + "loss": 1.4219, + "step": 1651, + "train/speech_entropy": 4.176992303297925, + "train/text_entropy": 1.5744749771921258, + "train/token_acc": 0.2781875658587987 + }, + { + "epoch": 0.24455958549222798, + "grad_norm": 13.819441795349121, + "learning_rate": 1.6958458999667688e-06, + "loss": 1.042, + "step": 1652, + "train/speech_entropy": 4.4615708604991005, + "train/text_entropy": 1.3162839438325615, + "train/token_acc": 0.2542565266742338 + }, + { + "epoch": 0.2447076239822354, + "grad_norm": 11.079713821411133, + "learning_rate": 1.6919581767014985e-06, + "loss": 0.4219, + "step": 1653, + "train/speech_entropy": 3.645476319803994, + "train/text_entropy": 0.3491393154107251, + "train/token_acc": 0.3368220742150333 + }, + { + "epoch": 0.24485566247224277, + "grad_norm": 17.332365036010742, + "learning_rate": 1.6880804395819718e-06, + "loss": 1.3984, + "step": 1654, + "train/speech_entropy": 4.168535551278597, + "train/text_entropy": 1.4628683431259055, + "train/token_acc": 0.2585635359116022 + }, + { + "epoch": 0.2450037009622502, + "grad_norm": 12.899675369262695, + "learning_rate": 1.684212698777105e-06, + "loss": 1.0898, + "step": 1655, + "train/speech_entropy": 4.077708946776849, + "train/text_entropy": 1.1168225262616132, + "train/token_acc": 0.3041733547351525 + }, + { + "epoch": 0.2451517394522576, + "grad_norm": 11.526646614074707, + "learning_rate": 1.6803549644295951e-06, + "loss": 0.5791, + "step": 1656, + "train/speech_entropy": 3.8562060375146543, + "train/text_entropy": 0.5445073718116397, + "train/token_acc": 0.3264346190028222 + }, + { + "epoch": 0.24529977794226498, + "grad_norm": 14.838871955871582, + "learning_rate": 1.6765072466559014e-06, + "loss": 0.8027, + "step": 1657, + "train/speech_entropy": 3.9208233173076925, + "train/text_entropy": 0.6344491421819447, + "train/token_acc": 0.3125659978880676 + }, + { + "epoch": 0.2454478164322724, + "grad_norm": 18.31206703186035, + "learning_rate": 1.6726695555462175e-06, + "loss": 1.1836, + "step": 1658, + "train/speech_entropy": 4.156830658783784, + "train/text_entropy": 1.1822851725987025, + "train/token_acc": 0.2774566473988439 + }, + { + "epoch": 0.24559585492227978, + "grad_norm": 31.24664878845215, + "learning_rate": 1.6688419011644363e-06, + "loss": 1.582, + "step": 1659, + "train/speech_entropy": 3.378099837393131, + "train/text_entropy": 1.5741340136918864, + "train/token_acc": 0.2967032967032967 + }, + { + "epoch": 0.2457438934122872, + "grad_norm": 30.691953659057617, + "learning_rate": 1.6650242935481382e-06, + "loss": 0.9756, + "step": 1660, + "train/speech_entropy": 4.014405580786054, + "train/text_entropy": 1.1625203782861884, + "train/token_acc": 0.24725274725274726 + }, + { + "epoch": 0.2458919319022946, + "grad_norm": 17.09681510925293, + "learning_rate": 1.6612167427085554e-06, + "loss": 1.5547, + "step": 1661, + "train/speech_entropy": 4.247329167627614, + "train/text_entropy": 1.422144292475103, + "train/token_acc": 0.25829145728643216 + }, + { + "epoch": 0.246039970392302, + "grad_norm": 12.145601272583008, + "learning_rate": 1.6574192586305421e-06, + "loss": 0.8652, + "step": 1662, + "train/speech_entropy": 4.229170642451777, + "train/text_entropy": 1.0930392423415567, + "train/token_acc": 0.2906350914962325 + }, + { + "epoch": 0.2461880088823094, + "grad_norm": 14.673998832702637, + "learning_rate": 1.65363185127256e-06, + "loss": 1.3555, + "step": 1663, + "train/speech_entropy": 4.202291373464687, + "train/text_entropy": 1.5120297840663366, + "train/token_acc": 0.2622814321398834 + }, + { + "epoch": 0.2463360473723168, + "grad_norm": 11.735212326049805, + "learning_rate": 1.6498545305666452e-06, + "loss": 0.7744, + "step": 1664, + "train/speech_entropy": 4.350384447948042, + "train/text_entropy": 1.0186218660931254, + "train/token_acc": 0.27944111776447106 + }, + { + "epoch": 0.2464840858623242, + "grad_norm": 16.5424861907959, + "learning_rate": 1.6460873064183768e-06, + "loss": 0.958, + "step": 1665, + "train/speech_entropy": 3.934060489430147, + "train/text_entropy": 0.765786255605122, + "train/token_acc": 0.29336188436830835 + }, + { + "epoch": 0.24663212435233162, + "grad_norm": 10.186201095581055, + "learning_rate": 1.6423301887068638e-06, + "loss": 0.7642, + "step": 1666, + "train/speech_entropy": 3.9369264246115017, + "train/text_entropy": 0.927059710671325, + "train/token_acc": 0.3103698332124728 + }, + { + "epoch": 0.246780162842339, + "grad_norm": 14.23086929321289, + "learning_rate": 1.6385831872847098e-06, + "loss": 0.8906, + "step": 1667, + "train/speech_entropy": 4.013080484472435, + "train/text_entropy": 0.9804628246326736, + "train/token_acc": 0.29101019462465244 + }, + { + "epoch": 0.24692820133234641, + "grad_norm": 9.744648933410645, + "learning_rate": 1.6348463119779861e-06, + "loss": 0.7783, + "step": 1668, + "train/speech_entropy": 3.9937249806301653, + "train/text_entropy": 0.7989998910941329, + "train/token_acc": 0.3117559523809524 + }, + { + "epoch": 0.2470762398223538, + "grad_norm": 12.801681518554688, + "learning_rate": 1.6311195725862156e-06, + "loss": 0.7686, + "step": 1669, + "train/speech_entropy": 3.445672253292982, + "train/text_entropy": 0.9513262890754862, + "train/token_acc": 0.3177441540577717 + }, + { + "epoch": 0.2472242783123612, + "grad_norm": 15.530674934387207, + "learning_rate": 1.6274029788823376e-06, + "loss": 1.0811, + "step": 1670, + "train/speech_entropy": 4.010075860132826, + "train/text_entropy": 0.8268709364970019, + "train/token_acc": 0.32734806629834257 + }, + { + "epoch": 0.24737231680236862, + "grad_norm": 8.178643226623535, + "learning_rate": 1.6236965406126828e-06, + "loss": 0.5991, + "step": 1671, + "train/speech_entropy": 4.352923686277933, + "train/text_entropy": 0.6652246526374671, + "train/token_acc": 0.33217993079584773 + }, + { + "epoch": 0.247520355292376, + "grad_norm": 20.333398818969727, + "learning_rate": 1.6200002674969574e-06, + "loss": 1.4258, + "step": 1672, + "train/speech_entropy": 4.259591576360887, + "train/text_entropy": 1.4501673380533855, + "train/token_acc": 0.24629878869448182 + }, + { + "epoch": 0.24766839378238342, + "grad_norm": 15.824920654296875, + "learning_rate": 1.6163141692282048e-06, + "loss": 1.0078, + "step": 1673, + "train/speech_entropy": 4.105101878330092, + "train/text_entropy": 1.30144288804796, + "train/token_acc": 0.29596412556053814 + }, + { + "epoch": 0.24781643227239084, + "grad_norm": 20.043394088745117, + "learning_rate": 1.6126382554727873e-06, + "loss": 1.5586, + "step": 1674, + "train/speech_entropy": 4.249952185169422, + "train/text_entropy": 1.4300290194424716, + "train/token_acc": 0.2467948717948718 + }, + { + "epoch": 0.24796447076239822, + "grad_norm": 6.713189125061035, + "learning_rate": 1.6089725358703621e-06, + "loss": 0.1987, + "step": 1675, + "train/speech_entropy": 3.773522846391772, + "train/text_entropy": 0.28244552612304685, + "train/token_acc": 0.3530552861299709 + }, + { + "epoch": 0.24811250925240563, + "grad_norm": 18.888994216918945, + "learning_rate": 1.6053170200338502e-06, + "loss": 1.2617, + "step": 1676, + "train/speech_entropy": 3.9427835215692935, + "train/text_entropy": 1.3131451993375212, + "train/token_acc": 0.27088305489260145 + }, + { + "epoch": 0.24826054774241302, + "grad_norm": 12.897749900817871, + "learning_rate": 1.601671717549415e-06, + "loss": 1.1484, + "step": 1677, + "train/speech_entropy": 4.48061580785969, + "train/text_entropy": 1.2184325119544719, + "train/token_acc": 0.2719298245614035 + }, + { + "epoch": 0.24840858623242043, + "grad_norm": 11.948651313781738, + "learning_rate": 1.5980366379764401e-06, + "loss": 0.9385, + "step": 1678, + "train/speech_entropy": 3.891198233003527, + "train/text_entropy": 0.9618181526114088, + "train/token_acc": 0.26978723404255317 + }, + { + "epoch": 0.24855662472242784, + "grad_norm": 17.906824111938477, + "learning_rate": 1.5944117908474958e-06, + "loss": 1.7129, + "step": 1679, + "train/speech_entropy": 3.910894459691541, + "train/text_entropy": 1.7344637568692984, + "train/token_acc": 0.29959514170040485 + }, + { + "epoch": 0.24870466321243523, + "grad_norm": 14.357337951660156, + "learning_rate": 1.590797185668321e-06, + "loss": 1.3066, + "step": 1680, + "train/speech_entropy": 3.9711904186261124, + "train/text_entropy": 1.2466349710944955, + "train/token_acc": 0.29572763684913217 + }, + { + "epoch": 0.24885270170244264, + "grad_norm": 16.446205139160156, + "learning_rate": 1.5871928319177966e-06, + "loss": 1.5156, + "step": 1681, + "train/speech_entropy": 4.337947281194642, + "train/text_entropy": 1.5259545453116619, + "train/token_acc": 0.25477239353891334 + }, + { + "epoch": 0.24900074019245003, + "grad_norm": 15.410811424255371, + "learning_rate": 1.5835987390479206e-06, + "loss": 0.8813, + "step": 1682, + "train/speech_entropy": 3.833709716796875, + "train/text_entropy": 1.1020586943823445, + "train/token_acc": 0.3 + }, + { + "epoch": 0.24914877868245744, + "grad_norm": 12.21681022644043, + "learning_rate": 1.5800149164837825e-06, + "loss": 0.9238, + "step": 1683, + "train/speech_entropy": 4.053945180532095, + "train/text_entropy": 0.9317288142378612, + "train/token_acc": 0.30218068535825543 + }, + { + "epoch": 0.24929681717246485, + "grad_norm": 16.595054626464844, + "learning_rate": 1.5764413736235396e-06, + "loss": 1.6719, + "step": 1684, + "train/speech_entropy": 4.476001325334821, + "train/text_entropy": 1.7047732338976505, + "train/token_acc": 0.22002398081534771 + }, + { + "epoch": 0.24944485566247224, + "grad_norm": 17.753082275390625, + "learning_rate": 1.5728781198383893e-06, + "loss": 1.2891, + "step": 1685, + "train/speech_entropy": 4.403389749898539, + "train/text_entropy": 1.079201511402226, + "train/token_acc": 0.30030959752321984 + }, + { + "epoch": 0.24959289415247965, + "grad_norm": 14.447945594787598, + "learning_rate": 1.569325164472552e-06, + "loss": 0.6328, + "step": 1686, + "train/speech_entropy": 3.883741333586345, + "train/text_entropy": 0.7035457786472364, + "train/token_acc": 0.30844793713163066 + }, + { + "epoch": 0.24974093264248703, + "grad_norm": 16.20041847229004, + "learning_rate": 1.5657825168432376e-06, + "loss": 0.9023, + "step": 1687, + "train/speech_entropy": 4.045836232680477, + "train/text_entropy": 0.8698996596449003, + "train/token_acc": 0.2710413694721826 + }, + { + "epoch": 0.24988897113249445, + "grad_norm": 14.615100860595703, + "learning_rate": 1.562250186240626e-06, + "loss": 0.7773, + "step": 1688, + "train/speech_entropy": 4.221160632221639, + "train/text_entropy": 1.0851760691842116, + "train/token_acc": 0.2871287128712871 + }, + { + "epoch": 0.25003700962250186, + "grad_norm": 12.680094718933105, + "learning_rate": 1.5587281819278438e-06, + "loss": 0.7305, + "step": 1689, + "train/speech_entropy": 4.004770914713542, + "train/text_entropy": 0.5313285178983742, + "train/token_acc": 0.3409691629955947 + }, + { + "epoch": 0.25018504811250925, + "grad_norm": 18.286951065063477, + "learning_rate": 1.5552165131409361e-06, + "loss": 1.3008, + "step": 1690, + "train/speech_entropy": 4.647129423550549, + "train/text_entropy": 1.4507751108329987, + "train/token_acc": 0.25101214574898784 + }, + { + "epoch": 0.25033308660251663, + "grad_norm": 14.373581886291504, + "learning_rate": 1.5517151890888447e-06, + "loss": 1.5391, + "step": 1691, + "train/speech_entropy": 4.509716664189877, + "train/text_entropy": 1.584355354309082, + "train/token_acc": 0.2687074829931973 + }, + { + "epoch": 0.25048112509252407, + "grad_norm": 17.400928497314453, + "learning_rate": 1.5482242189533833e-06, + "loss": 1.4102, + "step": 1692, + "train/speech_entropy": 4.090374852997555, + "train/text_entropy": 1.6533820704409952, + "train/token_acc": 0.2934322033898305 + }, + { + "epoch": 0.25062916358253146, + "grad_norm": 17.38201904296875, + "learning_rate": 1.544743611889215e-06, + "loss": 1.457, + "step": 1693, + "train/speech_entropy": 4.301203168835723, + "train/text_entropy": 1.3759395110301482, + "train/token_acc": 0.25376128385155466 + }, + { + "epoch": 0.25077720207253884, + "grad_norm": 16.121583938598633, + "learning_rate": 1.541273377023826e-06, + "loss": 0.7637, + "step": 1694, + "train/speech_entropy": 3.7002347857038553, + "train/text_entropy": 0.6658712702689411, + "train/token_acc": 0.3486646884272997 + }, + { + "epoch": 0.2509252405625463, + "grad_norm": 9.666200637817383, + "learning_rate": 1.5378135234575015e-06, + "loss": 0.3789, + "step": 1695, + "train/speech_entropy": 3.640988463374196, + "train/text_entropy": 0.389900858002591, + "train/token_acc": 0.37735849056603776 + }, + { + "epoch": 0.25107327905255367, + "grad_norm": 18.557714462280273, + "learning_rate": 1.5343640602633065e-06, + "loss": 1.3574, + "step": 1696, + "train/speech_entropy": 4.454639979771206, + "train/text_entropy": 1.4947006781369645, + "train/token_acc": 0.2571103526734926 + }, + { + "epoch": 0.25122131754256105, + "grad_norm": 11.4076566696167, + "learning_rate": 1.5309249964870538e-06, + "loss": 0.8066, + "step": 1697, + "train/speech_entropy": 4.653521311241671, + "train/text_entropy": 0.9000319384653633, + "train/token_acc": 0.27752081406105455 + }, + { + "epoch": 0.2513693560325685, + "grad_norm": 15.968243598937988, + "learning_rate": 1.5274963411472873e-06, + "loss": 1.3125, + "step": 1698, + "train/speech_entropy": 4.230848697087314, + "train/text_entropy": 1.3190293783669944, + "train/token_acc": 0.2491103202846975 + }, + { + "epoch": 0.2515173945225759, + "grad_norm": 14.61162281036377, + "learning_rate": 1.5240781032352575e-06, + "loss": 0.9507, + "step": 1699, + "train/speech_entropy": 3.8214583528255988, + "train/text_entropy": 1.0226254222368953, + "train/token_acc": 0.31268151016456924 + }, + { + "epoch": 0.25166543301258326, + "grad_norm": 11.857842445373535, + "learning_rate": 1.5206702917148948e-06, + "loss": 1.2031, + "step": 1700, + "train/speech_entropy": 4.391173912793901, + "train/text_entropy": 1.1882928390740606, + "train/token_acc": 0.272294887039239 + }, + { + "epoch": 0.25181347150259065, + "grad_norm": 12.282464027404785, + "learning_rate": 1.5172729155227849e-06, + "loss": 0.5518, + "step": 1701, + "train/speech_entropy": 3.662384452528626, + "train/text_entropy": 0.4767060107495411, + "train/token_acc": 0.3313032886723508 + }, + { + "epoch": 0.2519615099925981, + "grad_norm": 20.216520309448242, + "learning_rate": 1.5138859835681555e-06, + "loss": 0.9551, + "step": 1702, + "train/speech_entropy": 3.427684783935547, + "train/text_entropy": 0.8601832487145249, + "train/token_acc": 0.3514056224899598 + }, + { + "epoch": 0.25210954848260547, + "grad_norm": 15.531679153442383, + "learning_rate": 1.5105095047328355e-06, + "loss": 1.1562, + "step": 1703, + "train/speech_entropy": 4.416283092073931, + "train/text_entropy": 1.1557567555417296, + "train/token_acc": 0.2666025024061598 + }, + { + "epoch": 0.25225758697261286, + "grad_norm": 17.17267417907715, + "learning_rate": 1.50714348787125e-06, + "loss": 1.2266, + "step": 1704, + "train/speech_entropy": 4.207560424128772, + "train/text_entropy": 1.4876505533854167, + "train/token_acc": 0.2507042253521127 + }, + { + "epoch": 0.2524056254626203, + "grad_norm": 9.346424102783203, + "learning_rate": 1.503787941810387e-06, + "loss": 0.6309, + "step": 1705, + "train/speech_entropy": 4.074122012240215, + "train/text_entropy": 0.5893991117793802, + "train/token_acc": 0.30821256038647343 + }, + { + "epoch": 0.2525536639526277, + "grad_norm": 22.250333786010742, + "learning_rate": 1.5004428753497715e-06, + "loss": 1.3477, + "step": 1706, + "train/speech_entropy": 4.279141069601138, + "train/text_entropy": 1.2957291212238249, + "train/token_acc": 0.2446524064171123 + }, + { + "epoch": 0.25270170244263507, + "grad_norm": 12.508161544799805, + "learning_rate": 1.4971082972614523e-06, + "loss": 0.8477, + "step": 1707, + "train/speech_entropy": 4.201304704051907, + "train/text_entropy": 0.9399132267121346, + "train/token_acc": 0.2913165266106443 + }, + { + "epoch": 0.2528497409326425, + "grad_norm": 17.135881423950195, + "learning_rate": 1.493784216289973e-06, + "loss": 1.2422, + "step": 1708, + "train/speech_entropy": 3.8967383092317336, + "train/text_entropy": 1.2248089130108173, + "train/token_acc": 0.28337236533957844 + }, + { + "epoch": 0.2529977794226499, + "grad_norm": 16.055328369140625, + "learning_rate": 1.490470641152345e-06, + "loss": 1.1777, + "step": 1709, + "train/speech_entropy": 3.6618456229185448, + "train/text_entropy": 1.1507135615951714, + "train/token_acc": 0.3182957393483709 + }, + { + "epoch": 0.2531458179126573, + "grad_norm": 18.67158317565918, + "learning_rate": 1.487167580538035e-06, + "loss": 1.125, + "step": 1710, + "train/speech_entropy": 4.000115137313007, + "train/text_entropy": 1.3280912075402602, + "train/token_acc": 0.27879799666110183 + }, + { + "epoch": 0.2532938564026647, + "grad_norm": 15.916793823242188, + "learning_rate": 1.4838750431089355e-06, + "loss": 1.4805, + "step": 1711, + "train/speech_entropy": 4.033997369972691, + "train/text_entropy": 1.3406139572649771, + "train/token_acc": 0.24876847290640394 + }, + { + "epoch": 0.2534418948926721, + "grad_norm": 24.260334014892578, + "learning_rate": 1.4805930374993394e-06, + "loss": 0.7012, + "step": 1712, + "train/speech_entropy": 3.8346419587152165, + "train/text_entropy": 0.7635998656784279, + "train/token_acc": 0.32386363636363635 + }, + { + "epoch": 0.2535899333826795, + "grad_norm": 11.366179466247559, + "learning_rate": 1.4773215723159256e-06, + "loss": 0.6812, + "step": 1713, + "train/speech_entropy": 4.15593554290275, + "train/text_entropy": 0.9176605890488484, + "train/token_acc": 0.265495867768595 + }, + { + "epoch": 0.2537379718726869, + "grad_norm": 12.289874076843262, + "learning_rate": 1.4740606561377299e-06, + "loss": 1.0234, + "step": 1714, + "train/speech_entropy": 4.229541395980404, + "train/text_entropy": 1.1840665808551387, + "train/token_acc": 0.2696629213483146 + }, + { + "epoch": 0.2538860103626943, + "grad_norm": 19.3662109375, + "learning_rate": 1.4708102975161232e-06, + "loss": 1.4922, + "step": 1715, + "train/speech_entropy": 4.021271969872434, + "train/text_entropy": 1.6098084222702753, + "train/token_acc": 0.24401913875598086 + }, + { + "epoch": 0.2540340488527017, + "grad_norm": 17.19317626953125, + "learning_rate": 1.4675705049747948e-06, + "loss": 1.2949, + "step": 1716, + "train/speech_entropy": 3.918326619002705, + "train/text_entropy": 1.1596795639189164, + "train/token_acc": 0.3074074074074074 + }, + { + "epoch": 0.2541820873427091, + "grad_norm": 12.345892906188965, + "learning_rate": 1.4643412870097215e-06, + "loss": 0.8389, + "step": 1717, + "train/speech_entropy": 3.983423176001869, + "train/text_entropy": 0.8683624649047852, + "train/token_acc": 0.2973708068902992 + }, + { + "epoch": 0.2543301258327165, + "grad_norm": 15.925397872924805, + "learning_rate": 1.4611226520891497e-06, + "loss": 1.5039, + "step": 1718, + "train/speech_entropy": 4.30998026966537, + "train/text_entropy": 1.4212165832519532, + "train/token_acc": 0.24646643109540636 + }, + { + "epoch": 0.2544781643227239, + "grad_norm": 14.320005416870117, + "learning_rate": 1.4579146086535773e-06, + "loss": 1.2266, + "step": 1719, + "train/speech_entropy": 4.224284427266725, + "train/text_entropy": 1.2431621177523744, + "train/token_acc": 0.2897727272727273 + }, + { + "epoch": 0.2546262028127313, + "grad_norm": 16.033123016357422, + "learning_rate": 1.4547171651157216e-06, + "loss": 0.8232, + "step": 1720, + "train/speech_entropy": 3.99587437941411, + "train/text_entropy": 0.6078713119670909, + "train/token_acc": 0.29732408325074333 + }, + { + "epoch": 0.25477424130273874, + "grad_norm": 13.753265380859375, + "learning_rate": 1.451530329860506e-06, + "loss": 1.3398, + "step": 1721, + "train/speech_entropy": 3.9328822667992926, + "train/text_entropy": 1.3271377674047498, + "train/token_acc": 0.2956645344705046 + }, + { + "epoch": 0.2549222797927461, + "grad_norm": 11.895515441894531, + "learning_rate": 1.4483541112450355e-06, + "loss": 0.9219, + "step": 1722, + "train/speech_entropy": 4.2668725317651095, + "train/text_entropy": 1.2267450068859345, + "train/token_acc": 0.2740814299900695 + }, + { + "epoch": 0.2550703182827535, + "grad_norm": 17.161819458007812, + "learning_rate": 1.445188517598573e-06, + "loss": 1.1973, + "step": 1723, + "train/speech_entropy": 4.19719755859375, + "train/text_entropy": 1.2880427805582682, + "train/token_acc": 0.25806451612903225 + }, + { + "epoch": 0.2552183567727609, + "grad_norm": 18.504135131835938, + "learning_rate": 1.442033557222517e-06, + "loss": 1.2246, + "step": 1724, + "train/speech_entropy": 3.9308685202016296, + "train/text_entropy": 1.4058164265341389, + "train/token_acc": 0.3221216041397154 + }, + { + "epoch": 0.25536639526276833, + "grad_norm": 15.733894348144531, + "learning_rate": 1.4388892383903854e-06, + "loss": 0.9395, + "step": 1725, + "train/speech_entropy": 4.2962324748667635, + "train/text_entropy": 1.1623070610894097, + "train/token_acc": 0.2743589743589744 + }, + { + "epoch": 0.2555144337527757, + "grad_norm": 14.538578987121582, + "learning_rate": 1.435755569347787e-06, + "loss": 0.6768, + "step": 1726, + "train/speech_entropy": 3.7769911024305554, + "train/text_entropy": 0.47633597606749045, + "train/token_acc": 0.34632683658170915 + }, + { + "epoch": 0.2556624722427831, + "grad_norm": 20.31402015686035, + "learning_rate": 1.4326325583124037e-06, + "loss": 1.5312, + "step": 1727, + "train/speech_entropy": 3.949185803687284, + "train/text_entropy": 1.620758397792413, + "train/token_acc": 0.26105563480741795 + }, + { + "epoch": 0.25581051073279054, + "grad_norm": 9.988040924072266, + "learning_rate": 1.429520213473967e-06, + "loss": 0.6929, + "step": 1728, + "train/speech_entropy": 4.359368902462353, + "train/text_entropy": 0.8356012603611622, + "train/token_acc": 0.2762836185819071 + }, + { + "epoch": 0.2559585492227979, + "grad_norm": 18.845685958862305, + "learning_rate": 1.4264185429942396e-06, + "loss": 1.5195, + "step": 1729, + "train/speech_entropy": 4.369760439377423, + "train/text_entropy": 1.5844965345076931, + "train/token_acc": 0.26584234930448225 + }, + { + "epoch": 0.2561065877128053, + "grad_norm": 13.112375259399414, + "learning_rate": 1.4233275550069902e-06, + "loss": 1.2188, + "step": 1730, + "train/speech_entropy": 4.777967247596154, + "train/text_entropy": 1.3786209106445313, + "train/token_acc": 0.25020576131687244 + }, + { + "epoch": 0.25625462620281275, + "grad_norm": 19.203020095825195, + "learning_rate": 1.4202472576179734e-06, + "loss": 2.0039, + "step": 1731, + "train/speech_entropy": 4.150447608572002, + "train/text_entropy": 1.9846179997809579, + "train/token_acc": 0.2508710801393728 + }, + { + "epoch": 0.25640266469282014, + "grad_norm": 12.738309860229492, + "learning_rate": 1.4171776589049122e-06, + "loss": 0.6074, + "step": 1732, + "train/speech_entropy": 3.7894583748770763, + "train/text_entropy": 0.4636769150242661, + "train/token_acc": 0.34232954545454547 + }, + { + "epoch": 0.2565507031828275, + "grad_norm": 8.854534149169922, + "learning_rate": 1.4141187669174703e-06, + "loss": 0.5547, + "step": 1733, + "train/speech_entropy": 3.877650752763048, + "train/text_entropy": 0.5542900907949001, + "train/token_acc": 0.33611532625189683 + }, + { + "epoch": 0.25669874167283496, + "grad_norm": 11.14729118347168, + "learning_rate": 1.4110705896772345e-06, + "loss": 1.1172, + "step": 1734, + "train/speech_entropy": 4.155757868665932, + "train/text_entropy": 0.9513967318797675, + "train/token_acc": 0.30255255255255253 + }, + { + "epoch": 0.25684678016284235, + "grad_norm": 18.31046485900879, + "learning_rate": 1.4080331351776954e-06, + "loss": 1.7051, + "step": 1735, + "train/speech_entropy": 4.122669120770538, + "train/text_entropy": 1.4712978066084628, + "train/token_acc": 0.24975320829220138 + }, + { + "epoch": 0.25699481865284973, + "grad_norm": 12.12997817993164, + "learning_rate": 1.4050064113842234e-06, + "loss": 0.7368, + "step": 1736, + "train/speech_entropy": 3.975293699709955, + "train/text_entropy": 0.6551601409912109, + "train/token_acc": 0.30634146341463414 + }, + { + "epoch": 0.2571428571428571, + "grad_norm": 11.574541091918945, + "learning_rate": 1.4019904262340471e-06, + "loss": 0.8398, + "step": 1737, + "train/speech_entropy": 3.943572480799788, + "train/text_entropy": 0.7420420539110226, + "train/token_acc": 0.3091286307053942 + }, + { + "epoch": 0.25729089563286456, + "grad_norm": 18.984840393066406, + "learning_rate": 1.3989851876362397e-06, + "loss": 1.3711, + "step": 1738, + "train/speech_entropy": 3.634835247553423, + "train/text_entropy": 1.2290781047982229, + "train/token_acc": 0.2777085927770859 + }, + { + "epoch": 0.25743893412287194, + "grad_norm": 7.356720924377441, + "learning_rate": 1.3959907034716844e-06, + "loss": 0.4316, + "step": 1739, + "train/speech_entropy": 3.847793758616728, + "train/text_entropy": 0.4507745297369129, + "train/token_acc": 0.3347107438016529 + }, + { + "epoch": 0.25758697261287933, + "grad_norm": 12.872446060180664, + "learning_rate": 1.393006981593069e-06, + "loss": 1.125, + "step": 1740, + "train/speech_entropy": 4.028935119183394, + "train/text_entropy": 1.1665519442954082, + "train/token_acc": 0.2839140103780578 + }, + { + "epoch": 0.25773501110288677, + "grad_norm": 15.228079795837402, + "learning_rate": 1.3900340298248582e-06, + "loss": 0.9531, + "step": 1741, + "train/speech_entropy": 4.420559873675356, + "train/text_entropy": 1.0529485947592965, + "train/token_acc": 0.2624113475177305 + }, + { + "epoch": 0.25788304959289415, + "grad_norm": 13.013802528381348, + "learning_rate": 1.3870718559632676e-06, + "loss": 0.9834, + "step": 1742, + "train/speech_entropy": 4.12021595457777, + "train/text_entropy": 1.0277085003552136, + "train/token_acc": 0.29764801297648014 + }, + { + "epoch": 0.25803108808290154, + "grad_norm": 7.425604820251465, + "learning_rate": 1.384120467776255e-06, + "loss": 0.291, + "step": 1743, + "train/speech_entropy": 3.802450304833528, + "train/text_entropy": 0.32774645872790403, + "train/token_acc": 0.3484688489968321 + }, + { + "epoch": 0.258179126572909, + "grad_norm": 18.66985511779785, + "learning_rate": 1.3811798730034931e-06, + "loss": 1.457, + "step": 1744, + "train/speech_entropy": 4.113321940104167, + "train/text_entropy": 1.4843896564684416, + "train/token_acc": 0.2513542795232936 + }, + { + "epoch": 0.25832716506291636, + "grad_norm": 12.228553771972656, + "learning_rate": 1.378250079356346e-06, + "loss": 0.9688, + "step": 1745, + "train/speech_entropy": 4.059834058790665, + "train/text_entropy": 1.0561948922964244, + "train/token_acc": 0.29123468426013194 + }, + { + "epoch": 0.25847520355292375, + "grad_norm": 12.565536499023438, + "learning_rate": 1.3753310945178574e-06, + "loss": 1.0498, + "step": 1746, + "train/speech_entropy": 3.9663527665391385, + "train/text_entropy": 1.2166299860876517, + "train/token_acc": 0.30190311418685123 + }, + { + "epoch": 0.2586232420429312, + "grad_norm": 13.177011489868164, + "learning_rate": 1.372422926142726e-06, + "loss": 0.8696, + "step": 1747, + "train/speech_entropy": 3.829302533888659, + "train/text_entropy": 0.8096116807725694, + "train/token_acc": 0.30267379679144385 + }, + { + "epoch": 0.2587712805329386, + "grad_norm": 18.235246658325195, + "learning_rate": 1.3695255818572817e-06, + "loss": 1.2246, + "step": 1748, + "train/speech_entropy": 4.186066431781046, + "train/text_entropy": 1.2542757825786566, + "train/token_acc": 0.24671052631578946 + }, + { + "epoch": 0.25891931902294596, + "grad_norm": 13.522236824035645, + "learning_rate": 1.3666390692594745e-06, + "loss": 1.125, + "step": 1749, + "train/speech_entropy": 4.232130917058808, + "train/text_entropy": 1.1076595322768577, + "train/token_acc": 0.2515679442508711 + }, + { + "epoch": 0.25906735751295334, + "grad_norm": 17.005056381225586, + "learning_rate": 1.3637633959188457e-06, + "loss": 1.0391, + "step": 1750, + "train/speech_entropy": 4.31446289703289, + "train/text_entropy": 1.1505398327791239, + "train/token_acc": 0.26304347826086955 + }, + { + "epoch": 0.2592153960029608, + "grad_norm": 24.625953674316406, + "learning_rate": 1.360898569376513e-06, + "loss": 1.8164, + "step": 1751, + "train/speech_entropy": 3.952675290338869, + "train/text_entropy": 1.3671571532292153, + "train/token_acc": 0.2778581765557164 + }, + { + "epoch": 0.25936343449296817, + "grad_norm": 10.747248649597168, + "learning_rate": 1.3580445971451523e-06, + "loss": 0.5107, + "step": 1752, + "train/speech_entropy": 3.7396043551439058, + "train/text_entropy": 0.5590827011480564, + "train/token_acc": 0.30762639245929735 + }, + { + "epoch": 0.25951147298297556, + "grad_norm": 16.354751586914062, + "learning_rate": 1.355201486708971e-06, + "loss": 1.3086, + "step": 1753, + "train/speech_entropy": 3.973108415772929, + "train/text_entropy": 1.3287810853073716, + "train/token_acc": 0.2548828125 + }, + { + "epoch": 0.259659511472983, + "grad_norm": 13.2157621383667, + "learning_rate": 1.352369245523695e-06, + "loss": 0.9707, + "step": 1754, + "train/speech_entropy": 4.500442688723645, + "train/text_entropy": 0.8592430330672354, + "train/token_acc": 0.2965451055662188 + }, + { + "epoch": 0.2598075499629904, + "grad_norm": 11.644731521606445, + "learning_rate": 1.3495478810165477e-06, + "loss": 0.7441, + "step": 1755, + "train/speech_entropy": 4.049321255618579, + "train/text_entropy": 1.0232326130808136, + "train/token_acc": 0.289237668161435 + }, + { + "epoch": 0.25995558845299777, + "grad_norm": 18.568470001220703, + "learning_rate": 1.3467374005862282e-06, + "loss": 1.6289, + "step": 1756, + "train/speech_entropy": 4.244284972015644, + "train/text_entropy": 1.6789815203743692, + "train/token_acc": 0.23148148148148148 + }, + { + "epoch": 0.2601036269430052, + "grad_norm": 16.06402587890625, + "learning_rate": 1.3439378116028929e-06, + "loss": 1.5078, + "step": 1757, + "train/speech_entropy": 4.460863690043604, + "train/text_entropy": 1.4114372253417968, + "train/token_acc": 0.24083769633507854 + }, + { + "epoch": 0.2602516654330126, + "grad_norm": 16.01540184020996, + "learning_rate": 1.341149121408139e-06, + "loss": 0.8242, + "step": 1758, + "train/speech_entropy": 4.0621450294227675, + "train/text_entropy": 0.7693061828613281, + "train/token_acc": 0.3003003003003003 + }, + { + "epoch": 0.26039970392302, + "grad_norm": 11.663203239440918, + "learning_rate": 1.3383713373149799e-06, + "loss": 0.5254, + "step": 1759, + "train/speech_entropy": 3.644320005469782, + "train/text_entropy": 0.3624642840503941, + "train/token_acc": 0.32397408207343414 + }, + { + "epoch": 0.26054774241302736, + "grad_norm": 14.966594696044922, + "learning_rate": 1.3356044666078316e-06, + "loss": 1.0684, + "step": 1760, + "train/speech_entropy": 4.095904541015625, + "train/text_entropy": 1.1264285107592602, + "train/token_acc": 0.2680901542111507 + }, + { + "epoch": 0.2606957809030348, + "grad_norm": 17.388057708740234, + "learning_rate": 1.3328485165424899e-06, + "loss": 1.6289, + "step": 1761, + "train/speech_entropy": 4.206032754395251, + "train/text_entropy": 1.6175920734297757, + "train/token_acc": 0.2897196261682243 + }, + { + "epoch": 0.2608438193930422, + "grad_norm": 13.694125175476074, + "learning_rate": 1.3301034943461114e-06, + "loss": 1.374, + "step": 1762, + "train/speech_entropy": 4.3838267321298385, + "train/text_entropy": 1.5039187344637783, + "train/token_acc": 0.24715162138475022 + }, + { + "epoch": 0.26099185788304957, + "grad_norm": 14.557168960571289, + "learning_rate": 1.3273694072171988e-06, + "loss": 0.8662, + "step": 1763, + "train/speech_entropy": 3.8369716866546524, + "train/text_entropy": 0.8116457647610205, + "train/token_acc": 0.2872340425531915 + }, + { + "epoch": 0.261139896373057, + "grad_norm": 11.502212524414062, + "learning_rate": 1.3246462623255752e-06, + "loss": 0.7695, + "step": 1764, + "train/speech_entropy": 3.779780570183011, + "train/text_entropy": 0.860238974949099, + "train/token_acc": 0.3133393017009848 + }, + { + "epoch": 0.2612879348630644, + "grad_norm": 13.890863418579102, + "learning_rate": 1.321934066812371e-06, + "loss": 0.8691, + "step": 1765, + "train/speech_entropy": 4.387060316863644, + "train/text_entropy": 1.2621857191988095, + "train/token_acc": 0.2725274725274725 + }, + { + "epoch": 0.2614359733530718, + "grad_norm": 9.96358871459961, + "learning_rate": 1.319232827790003e-06, + "loss": 0.4341, + "step": 1766, + "train/speech_entropy": 3.564409977908689, + "train/text_entropy": 0.46742412356511215, + "train/token_acc": 0.32422802850356297 + }, + { + "epoch": 0.2615840118430792, + "grad_norm": 12.78554630279541, + "learning_rate": 1.3165425523421558e-06, + "loss": 1.0654, + "step": 1767, + "train/speech_entropy": 3.958264871245449, + "train/text_entropy": 1.0645596651851383, + "train/token_acc": 0.2978723404255319 + }, + { + "epoch": 0.2617320503330866, + "grad_norm": 12.653590202331543, + "learning_rate": 1.3138632475237622e-06, + "loss": 0.5918, + "step": 1768, + "train/speech_entropy": 3.8578131018875195, + "train/text_entropy": 0.5074453889653924, + "train/token_acc": 0.3482466747279323 + }, + { + "epoch": 0.261880088823094, + "grad_norm": 12.400683403015137, + "learning_rate": 1.3111949203609885e-06, + "loss": 0.8691, + "step": 1769, + "train/speech_entropy": 4.190629069010416, + "train/text_entropy": 0.7297341482979911, + "train/token_acc": 0.3246527777777778 + }, + { + "epoch": 0.26202812731310143, + "grad_norm": 10.097867012023926, + "learning_rate": 1.30853757785121e-06, + "loss": 0.7031, + "step": 1770, + "train/speech_entropy": 3.8589692398023754, + "train/text_entropy": 0.6606567094910819, + "train/token_acc": 0.34283387622149836 + }, + { + "epoch": 0.2621761658031088, + "grad_norm": 17.150941848754883, + "learning_rate": 1.305891226962999e-06, + "loss": 0.8984, + "step": 1771, + "train/speech_entropy": 4.036138998495566, + "train/text_entropy": 1.028714341937371, + "train/token_acc": 0.2818181818181818 + }, + { + "epoch": 0.2623242042931162, + "grad_norm": 17.36729621887207, + "learning_rate": 1.3032558746360998e-06, + "loss": 1.2383, + "step": 1772, + "train/speech_entropy": 4.586739676339286, + "train/text_entropy": 1.105179472892515, + "train/token_acc": 0.25338253382533826 + }, + { + "epoch": 0.2624722427831236, + "grad_norm": 13.558695793151855, + "learning_rate": 1.3006315277814197e-06, + "loss": 0.5879, + "step": 1773, + "train/speech_entropy": 3.810387696655777, + "train/text_entropy": 0.5694870599886266, + "train/token_acc": 0.3176206509539843 + }, + { + "epoch": 0.26262028127313103, + "grad_norm": 12.695334434509277, + "learning_rate": 1.2980181932810001e-06, + "loss": 0.832, + "step": 1774, + "train/speech_entropy": 4.380998253324931, + "train/text_entropy": 1.0278154611587524, + "train/token_acc": 0.2843029637760702 + }, + { + "epoch": 0.2627683197631384, + "grad_norm": 19.133949279785156, + "learning_rate": 1.295415877988007e-06, + "loss": 1.1836, + "step": 1775, + "train/speech_entropy": 3.4743078837557415, + "train/text_entropy": 1.420742996879246, + "train/token_acc": 0.3024193548387097 + }, + { + "epoch": 0.2629163582531458, + "grad_norm": 14.957279205322266, + "learning_rate": 1.2928245887267085e-06, + "loss": 1.1328, + "step": 1776, + "train/speech_entropy": 4.501587968991126, + "train/text_entropy": 1.1637216525607639, + "train/token_acc": 0.28599033816425123 + }, + { + "epoch": 0.26306439674315324, + "grad_norm": 7.176486968994141, + "learning_rate": 1.29024433229246e-06, + "loss": 0.375, + "step": 1777, + "train/speech_entropy": 3.6289219879907026, + "train/text_entropy": 0.30128360231280765, + "train/token_acc": 0.3202966958867161 + }, + { + "epoch": 0.2632124352331606, + "grad_norm": 10.258101463317871, + "learning_rate": 1.2876751154516805e-06, + "loss": 0.7432, + "step": 1778, + "train/speech_entropy": 4.175089396766169, + "train/text_entropy": 0.7604966057671441, + "train/token_acc": 0.2894308943089431 + }, + { + "epoch": 0.263360473723168, + "grad_norm": 12.333812713623047, + "learning_rate": 1.2851169449418441e-06, + "loss": 1.2119, + "step": 1779, + "train/speech_entropy": 3.861895067938443, + "train/text_entropy": 0.8192624858781403, + "train/token_acc": 0.3245984784446323 + }, + { + "epoch": 0.26350851221317545, + "grad_norm": 20.060283660888672, + "learning_rate": 1.2825698274714542e-06, + "loss": 1.375, + "step": 1780, + "train/speech_entropy": 4.232353913290638, + "train/text_entropy": 1.6808216697291325, + "train/token_acc": 0.2437641723356009 + }, + { + "epoch": 0.26365655070318283, + "grad_norm": 15.532992362976074, + "learning_rate": 1.2800337697200282e-06, + "loss": 1.2617, + "step": 1781, + "train/speech_entropy": 4.272517944955584, + "train/text_entropy": 1.377956428527832, + "train/token_acc": 0.2854251012145749 + }, + { + "epoch": 0.2638045891931902, + "grad_norm": 14.274626731872559, + "learning_rate": 1.277508778338084e-06, + "loss": 1.207, + "step": 1782, + "train/speech_entropy": 4.313223131111258, + "train/text_entropy": 1.123731027437946, + "train/token_acc": 0.2775 + }, + { + "epoch": 0.2639526276831976, + "grad_norm": 11.892966270446777, + "learning_rate": 1.274994859947115e-06, + "loss": 0.5254, + "step": 1783, + "train/speech_entropy": 3.6264316595873787, + "train/text_entropy": 0.5967372648897227, + "train/token_acc": 0.336322869955157 + }, + { + "epoch": 0.26410066617320505, + "grad_norm": 11.36113452911377, + "learning_rate": 1.2724920211395794e-06, + "loss": 0.8457, + "step": 1784, + "train/speech_entropy": 3.725881403142756, + "train/text_entropy": 0.9944903986240791, + "train/token_acc": 0.30798771121351765 + }, + { + "epoch": 0.26424870466321243, + "grad_norm": 14.956398010253906, + "learning_rate": 1.2700002684788826e-06, + "loss": 1.0, + "step": 1785, + "train/speech_entropy": 4.079444761800921, + "train/text_entropy": 0.9898747762044271, + "train/token_acc": 0.2921646746347942 + }, + { + "epoch": 0.2643967431532198, + "grad_norm": 27.77591896057129, + "learning_rate": 1.2675196084993519e-06, + "loss": 1.8867, + "step": 1786, + "train/speech_entropy": 4.270739028033089, + "train/text_entropy": 1.9806809601960358, + "train/token_acc": 0.21305841924398625 + }, + { + "epoch": 0.26454478164322726, + "grad_norm": 16.36387062072754, + "learning_rate": 1.2650500477062308e-06, + "loss": 1.0879, + "step": 1787, + "train/speech_entropy": 3.8151507956444184, + "train/text_entropy": 1.2969238644554502, + "train/token_acc": 0.26817640047675806 + }, + { + "epoch": 0.26469282013323464, + "grad_norm": 17.750600814819336, + "learning_rate": 1.2625915925756563e-06, + "loss": 0.7695, + "step": 1788, + "train/speech_entropy": 3.5933774981582376, + "train/text_entropy": 0.6143731337327224, + "train/token_acc": 0.2961608775137112 + }, + { + "epoch": 0.264840858623242, + "grad_norm": 12.347647666931152, + "learning_rate": 1.260144249554637e-06, + "loss": 1.2559, + "step": 1789, + "train/speech_entropy": 4.043977033787703, + "train/text_entropy": 1.092717408381802, + "train/token_acc": 0.3111801242236025 + }, + { + "epoch": 0.26498889711324947, + "grad_norm": 11.482622146606445, + "learning_rate": 1.257708025061046e-06, + "loss": 1.2578, + "step": 1790, + "train/speech_entropy": 4.471134385210266, + "train/text_entropy": 1.1749409895676832, + "train/token_acc": 0.27703604806408544 + }, + { + "epoch": 0.26513693560325685, + "grad_norm": 15.86470890045166, + "learning_rate": 1.2552829254835996e-06, + "loss": 1.0352, + "step": 1791, + "train/speech_entropy": 4.194052414487407, + "train/text_entropy": 1.0460351931503395, + "train/token_acc": 0.2598752598752599 + }, + { + "epoch": 0.26528497409326424, + "grad_norm": 16.635183334350586, + "learning_rate": 1.2528689571818362e-06, + "loss": 1.0234, + "step": 1792, + "train/speech_entropy": 3.8370235320060484, + "train/text_entropy": 0.8975553591389301, + "train/token_acc": 0.2874493927125506 + }, + { + "epoch": 0.2654330125832717, + "grad_norm": 9.319526672363281, + "learning_rate": 1.250466126486108e-06, + "loss": 0.5864, + "step": 1793, + "train/speech_entropy": 4.1326721946360605, + "train/text_entropy": 0.6137211302406768, + "train/token_acc": 0.2988326848249027 + }, + { + "epoch": 0.26558105107327906, + "grad_norm": 14.867874145507812, + "learning_rate": 1.2480744396975578e-06, + "loss": 0.8008, + "step": 1794, + "train/speech_entropy": 4.047740589488637, + "train/text_entropy": 0.9173862969697412, + "train/token_acc": 0.3083743842364532 + }, + { + "epoch": 0.26572908956328645, + "grad_norm": 14.283548355102539, + "learning_rate": 1.2456939030881048e-06, + "loss": 0.7383, + "step": 1795, + "train/speech_entropy": 3.7451024416114103, + "train/text_entropy": 1.0792387830532664, + "train/token_acc": 0.32053422370617696 + }, + { + "epoch": 0.26587712805329383, + "grad_norm": 14.67771053314209, + "learning_rate": 1.2433245229004297e-06, + "loss": 1.1914, + "step": 1796, + "train/speech_entropy": 4.157645955311795, + "train/text_entropy": 1.3081379854742856, + "train/token_acc": 0.2506989748369059 + }, + { + "epoch": 0.2660251665433013, + "grad_norm": 8.282809257507324, + "learning_rate": 1.2409663053479558e-06, + "loss": 0.334, + "step": 1797, + "train/speech_entropy": 3.6959654557686834, + "train/text_entropy": 0.2662286815529098, + "train/token_acc": 0.3341404358353511 + }, + { + "epoch": 0.26617320503330866, + "grad_norm": 20.095422744750977, + "learning_rate": 1.2386192566148327e-06, + "loss": 1.5391, + "step": 1798, + "train/speech_entropy": 4.047794394585055, + "train/text_entropy": 1.6744416041403825, + "train/token_acc": 0.2615558060879369 + }, + { + "epoch": 0.26632124352331604, + "grad_norm": 49.79451370239258, + "learning_rate": 1.2362833828559232e-06, + "loss": 1.5469, + "step": 1799, + "train/speech_entropy": 3.993778708647055, + "train/text_entropy": 1.4650946458180745, + "train/token_acc": 0.24601769911504426 + }, + { + "epoch": 0.2664692820133235, + "grad_norm": 12.763130187988281, + "learning_rate": 1.2339586901967831e-06, + "loss": 0.8477, + "step": 1800, + "train/speech_entropy": 4.013450666255012, + "train/text_entropy": 1.0665284239727517, + "train/token_acc": 0.3093661305581835 + }, + { + "epoch": 0.26661732050333087, + "grad_norm": 16.995044708251953, + "learning_rate": 1.2316451847336483e-06, + "loss": 1.1992, + "step": 1801, + "train/speech_entropy": 3.601227414560508, + "train/text_entropy": 1.1699905395507812, + "train/token_acc": 0.3050847457627119 + }, + { + "epoch": 0.26676535899333825, + "grad_norm": 17.138233184814453, + "learning_rate": 1.2293428725334174e-06, + "loss": 1.1484, + "step": 1802, + "train/speech_entropy": 4.042667473687066, + "train/text_entropy": 1.2078826803910105, + "train/token_acc": 0.28440366972477066 + }, + { + "epoch": 0.2669133974833457, + "grad_norm": 17.53392219543457, + "learning_rate": 1.2270517596336356e-06, + "loss": 1.4805, + "step": 1803, + "train/speech_entropy": 4.270475841703869, + "train/text_entropy": 1.2980313982282365, + "train/token_acc": 0.24089635854341737 + }, + { + "epoch": 0.2670614359733531, + "grad_norm": 16.716327667236328, + "learning_rate": 1.2247718520424792e-06, + "loss": 1.2285, + "step": 1804, + "train/speech_entropy": 4.110132291887734, + "train/text_entropy": 1.524871436917052, + "train/token_acc": 0.27853260869565216 + }, + { + "epoch": 0.26720947446336046, + "grad_norm": 21.141231536865234, + "learning_rate": 1.2225031557387392e-06, + "loss": 2.4062, + "step": 1805, + "train/speech_entropy": 4.096822933274872, + "train/text_entropy": 1.9794405887001438, + "train/token_acc": 0.26756756756756755 + }, + { + "epoch": 0.26735751295336785, + "grad_norm": 14.785632133483887, + "learning_rate": 1.2202456766718092e-06, + "loss": 1.6094, + "step": 1806, + "train/speech_entropy": 4.047576010476595, + "train/text_entropy": 1.5911249112682182, + "train/token_acc": 0.29229480737018426 + }, + { + "epoch": 0.2675055514433753, + "grad_norm": 21.187076568603516, + "learning_rate": 1.2179994207616637e-06, + "loss": 1.3711, + "step": 1807, + "train/speech_entropy": 4.039842699932795, + "train/text_entropy": 1.2368910702792082, + "train/token_acc": 0.26260869565217393 + }, + { + "epoch": 0.2676535899333827, + "grad_norm": 16.18564796447754, + "learning_rate": 1.215764393898846e-06, + "loss": 1.4492, + "step": 1808, + "train/speech_entropy": 4.247051976308101, + "train/text_entropy": 1.4095833894893595, + "train/token_acc": 0.2566069906223359 + }, + { + "epoch": 0.26780162842339006, + "grad_norm": 17.365476608276367, + "learning_rate": 1.2135406019444556e-06, + "loss": 1.0801, + "step": 1809, + "train/speech_entropy": 4.114886994057513, + "train/text_entropy": 1.311675147006386, + "train/token_acc": 0.27530864197530863 + }, + { + "epoch": 0.2679496669133975, + "grad_norm": 12.429173469543457, + "learning_rate": 1.2113280507301262e-06, + "loss": 0.8018, + "step": 1810, + "train/speech_entropy": 3.7556997167001858, + "train/text_entropy": 0.5306707130288179, + "train/token_acc": 0.3466666666666667 + }, + { + "epoch": 0.2680977054034049, + "grad_norm": 16.35846710205078, + "learning_rate": 1.2091267460580154e-06, + "loss": 1.2852, + "step": 1811, + "train/speech_entropy": 4.303243762600807, + "train/text_entropy": 1.3188959070154138, + "train/token_acc": 0.27395833333333336 + }, + { + "epoch": 0.26824574389341227, + "grad_norm": 7.316850185394287, + "learning_rate": 1.2069366937007896e-06, + "loss": 0.4043, + "step": 1812, + "train/speech_entropy": 3.7448848885771415, + "train/text_entropy": 0.4705979531271416, + "train/token_acc": 0.341687552213868 + }, + { + "epoch": 0.2683937823834197, + "grad_norm": 13.678812026977539, + "learning_rate": 1.2047578994016039e-06, + "loss": 1.5469, + "step": 1813, + "train/speech_entropy": 4.475887802612135, + "train/text_entropy": 1.5687545725334449, + "train/token_acc": 0.24237190558434082 + }, + { + "epoch": 0.2685418208734271, + "grad_norm": 13.104914665222168, + "learning_rate": 1.2025903688740926e-06, + "loss": 0.8281, + "step": 1814, + "train/speech_entropy": 3.808007939161803, + "train/text_entropy": 0.8838471376179345, + "train/token_acc": 0.3259958071278826 + }, + { + "epoch": 0.2686898593634345, + "grad_norm": 18.34473991394043, + "learning_rate": 1.200434107802354e-06, + "loss": 1.4199, + "step": 1815, + "train/speech_entropy": 3.7713108039283516, + "train/text_entropy": 1.0857635844837537, + "train/token_acc": 0.30434782608695654 + }, + { + "epoch": 0.2688378978534419, + "grad_norm": 18.01459312438965, + "learning_rate": 1.1982891218409282e-06, + "loss": 0.7656, + "step": 1816, + "train/speech_entropy": 4.142097091674804, + "train/text_entropy": 0.8917957652698864, + "train/token_acc": 0.28324697754749567 + }, + { + "epoch": 0.2689859363434493, + "grad_norm": 17.42713165283203, + "learning_rate": 1.1961554166147923e-06, + "loss": 1.7109, + "step": 1817, + "train/speech_entropy": 4.180811614344587, + "train/text_entropy": 1.6389027579886015, + "train/token_acc": 0.2701793721973094 + }, + { + "epoch": 0.2691339748334567, + "grad_norm": 15.568304061889648, + "learning_rate": 1.194032997719341e-06, + "loss": 1.2695, + "step": 1818, + "train/speech_entropy": 4.340915771484375, + "train/text_entropy": 1.2282844647023472, + "train/token_acc": 0.257985257985258 + }, + { + "epoch": 0.2692820133234641, + "grad_norm": 18.814048767089844, + "learning_rate": 1.1919218707203675e-06, + "loss": 1.207, + "step": 1819, + "train/speech_entropy": 3.6917745655980605, + "train/text_entropy": 1.1189488055659276, + "train/token_acc": 0.2756052141527002 + }, + { + "epoch": 0.2694300518134715, + "grad_norm": 17.31466293334961, + "learning_rate": 1.189822041154058e-06, + "loss": 1.1289, + "step": 1820, + "train/speech_entropy": 3.9650320067734977, + "train/text_entropy": 1.2066302212801847, + "train/token_acc": 0.26504751847940866 + }, + { + "epoch": 0.2695780903034789, + "grad_norm": 26.699472427368164, + "learning_rate": 1.1877335145269715e-06, + "loss": 1.3867, + "step": 1821, + "train/speech_entropy": 3.537553200354943, + "train/text_entropy": 1.2996139526367188, + "train/token_acc": 0.2865384615384615 + }, + { + "epoch": 0.2697261287934863, + "grad_norm": 17.990842819213867, + "learning_rate": 1.1856562963160233e-06, + "loss": 1.5742, + "step": 1822, + "train/speech_entropy": 4.160930193601733, + "train/text_entropy": 1.4340772356305804, + "train/token_acc": 0.3023255813953488 + }, + { + "epoch": 0.2698741672834937, + "grad_norm": 21.57585906982422, + "learning_rate": 1.1835903919684775e-06, + "loss": 1.3438, + "step": 1823, + "train/speech_entropy": 4.11535170471784, + "train/text_entropy": 1.4050846099853516, + "train/token_acc": 0.31530139103554866 + }, + { + "epoch": 0.2700222057735011, + "grad_norm": 14.887152671813965, + "learning_rate": 1.181535806901928e-06, + "loss": 1.3164, + "step": 1824, + "train/speech_entropy": 4.1776042696795885, + "train/text_entropy": 1.1247746002979768, + "train/token_acc": 0.26903553299492383 + }, + { + "epoch": 0.2701702442635085, + "grad_norm": 7.005825996398926, + "learning_rate": 1.179492546504283e-06, + "loss": 0.4053, + "step": 1825, + "train/speech_entropy": 3.8334659786535106, + "train/text_entropy": 0.4442009314512595, + "train/token_acc": 0.33354153653966273 + }, + { + "epoch": 0.27031828275351594, + "grad_norm": 16.992605209350586, + "learning_rate": 1.1774606161337563e-06, + "loss": 1.0254, + "step": 1826, + "train/speech_entropy": 4.218978620080726, + "train/text_entropy": 0.8688833012300379, + "train/token_acc": 0.2905779889152811 + }, + { + "epoch": 0.2704663212435233, + "grad_norm": 12.389425277709961, + "learning_rate": 1.1754400211188472e-06, + "loss": 0.6738, + "step": 1827, + "train/speech_entropy": 3.8893324689167303, + "train/text_entropy": 0.5813415908813476, + "train/token_acc": 0.3213399503722084 + }, + { + "epoch": 0.2706143597335307, + "grad_norm": 13.677444458007812, + "learning_rate": 1.17343076675833e-06, + "loss": 0.7754, + "step": 1828, + "train/speech_entropy": 3.67822485751793, + "train/text_entropy": 0.6912669499715169, + "train/token_acc": 0.27945205479452057 + }, + { + "epoch": 0.2707623982235381, + "grad_norm": 22.848814010620117, + "learning_rate": 1.1714328583212422e-06, + "loss": 1.668, + "step": 1829, + "train/speech_entropy": 3.860137212844122, + "train/text_entropy": 1.6239000539310644, + "train/token_acc": 0.2619718309859155 + }, + { + "epoch": 0.27091043671354553, + "grad_norm": 17.11134147644043, + "learning_rate": 1.1694463010468643e-06, + "loss": 1.3398, + "step": 1830, + "train/speech_entropy": 4.211336915164715, + "train/text_entropy": 1.3743100541361262, + "train/token_acc": 0.24718045112781956 + }, + { + "epoch": 0.2710584752035529, + "grad_norm": 18.14887046813965, + "learning_rate": 1.167471100144711e-06, + "loss": 1.1465, + "step": 1831, + "train/speech_entropy": 4.114546432186235, + "train/text_entropy": 1.134900653173053, + "train/token_acc": 0.2854838709677419 + }, + { + "epoch": 0.2712065136935603, + "grad_norm": 12.708623886108398, + "learning_rate": 1.1655072607945177e-06, + "loss": 0.7578, + "step": 1832, + "train/speech_entropy": 3.8136575481848802, + "train/text_entropy": 0.6698163160637244, + "train/token_acc": 0.33494208494208494 + }, + { + "epoch": 0.27135455218356774, + "grad_norm": 12.891931533813477, + "learning_rate": 1.1635547881462225e-06, + "loss": 0.7578, + "step": 1833, + "train/speech_entropy": 3.933739778037383, + "train/text_entropy": 0.8855999127105253, + "train/token_acc": 0.2957345971563981 + }, + { + "epoch": 0.27150259067357513, + "grad_norm": 20.24649429321289, + "learning_rate": 1.161613687319958e-06, + "loss": 1.5938, + "step": 1834, + "train/speech_entropy": 4.290683322482639, + "train/text_entropy": 1.52106039218993, + "train/token_acc": 0.22005988023952097 + }, + { + "epoch": 0.2716506291635825, + "grad_norm": 14.522282600402832, + "learning_rate": 1.1596839634060359e-06, + "loss": 0.5049, + "step": 1835, + "train/speech_entropy": 3.883276465104863, + "train/text_entropy": 0.4791241163088952, + "train/token_acc": 0.2857142857142857 + }, + { + "epoch": 0.27179866765358995, + "grad_norm": 15.271240234375, + "learning_rate": 1.15776562146493e-06, + "loss": 0.8027, + "step": 1836, + "train/speech_entropy": 3.864013671875, + "train/text_entropy": 1.2290397256107655, + "train/token_acc": 0.2813008130081301 + }, + { + "epoch": 0.27194670614359734, + "grad_norm": 16.907215118408203, + "learning_rate": 1.1558586665272704e-06, + "loss": 1.2891, + "step": 1837, + "train/speech_entropy": 4.097454375851241, + "train/text_entropy": 1.5812652244996488, + "train/token_acc": 0.2729083665338645 + }, + { + "epoch": 0.2720947446336047, + "grad_norm": 11.918557167053223, + "learning_rate": 1.153963103593823e-06, + "loss": 0.2822, + "step": 1838, + "train/speech_entropy": 3.4341818066229655, + "train/text_entropy": 0.3754421087411734, + "train/token_acc": 0.33528550512445093 + }, + { + "epoch": 0.27224278312361216, + "grad_norm": 15.679903984069824, + "learning_rate": 1.1520789376354808e-06, + "loss": 1.209, + "step": 1839, + "train/speech_entropy": 4.637214195647573, + "train/text_entropy": 1.4514972262912327, + "train/token_acc": 0.23639774859287055 + }, + { + "epoch": 0.27239082161361955, + "grad_norm": 12.818405151367188, + "learning_rate": 1.15020617359325e-06, + "loss": 0.6963, + "step": 1840, + "train/speech_entropy": 3.9870026379096797, + "train/text_entropy": 0.6935392573555523, + "train/token_acc": 0.30089374379344586 + }, + { + "epoch": 0.27253886010362693, + "grad_norm": 22.337963104248047, + "learning_rate": 1.1483448163782354e-06, + "loss": 1.6836, + "step": 1841, + "train/speech_entropy": 3.928144361547196, + "train/text_entropy": 1.7129905540626367, + "train/token_acc": 0.2538659793814433 + }, + { + "epoch": 0.2726868985936343, + "grad_norm": 15.827613830566406, + "learning_rate": 1.1464948708716286e-06, + "loss": 1.3086, + "step": 1842, + "train/speech_entropy": 3.9811737525281568, + "train/text_entropy": 1.4535426450579354, + "train/token_acc": 0.2670807453416149 + }, + { + "epoch": 0.27283493708364176, + "grad_norm": 20.676183700561523, + "learning_rate": 1.1446563419246973e-06, + "loss": 1.3047, + "step": 1843, + "train/speech_entropy": 3.600819385834854, + "train/text_entropy": 1.2396830956912734, + "train/token_acc": 0.311284046692607 + }, + { + "epoch": 0.27298297557364914, + "grad_norm": 21.636402130126953, + "learning_rate": 1.1428292343587691e-06, + "loss": 1.8125, + "step": 1844, + "train/speech_entropy": 3.9958255190559187, + "train/text_entropy": 1.7835176785786946, + "train/token_acc": 0.26878980891719745 + }, + { + "epoch": 0.27313101406365653, + "grad_norm": 13.153802871704102, + "learning_rate": 1.1410135529652203e-06, + "loss": 0.9863, + "step": 1845, + "train/speech_entropy": 4.244623274250603, + "train/text_entropy": 1.0836952286537247, + "train/token_acc": 0.2575221238938053 + }, + { + "epoch": 0.27327905255366397, + "grad_norm": 19.099384307861328, + "learning_rate": 1.1392093025054632e-06, + "loss": 1.6523, + "step": 1846, + "train/speech_entropy": 4.331482366467198, + "train/text_entropy": 1.6836078389485678, + "train/token_acc": 0.247953216374269 + }, + { + "epoch": 0.27342709104367136, + "grad_norm": 14.388124465942383, + "learning_rate": 1.1374164877109356e-06, + "loss": 1.082, + "step": 1847, + "train/speech_entropy": 3.9735983115055284, + "train/text_entropy": 1.1423107076574255, + "train/token_acc": 0.29483814523184604 + }, + { + "epoch": 0.27357512953367874, + "grad_norm": 17.422664642333984, + "learning_rate": 1.1356351132830843e-06, + "loss": 1.4062, + "step": 1848, + "train/speech_entropy": 4.257985827992264, + "train/text_entropy": 1.5514695991590186, + "train/token_acc": 0.23867313915857605 + }, + { + "epoch": 0.2737231680236862, + "grad_norm": 11.458394050598145, + "learning_rate": 1.1338651838933554e-06, + "loss": 0.8457, + "step": 1849, + "train/speech_entropy": 3.842248232047122, + "train/text_entropy": 0.8716456095377604, + "train/token_acc": 0.3133393017009848 + }, + { + "epoch": 0.27387120651369357, + "grad_norm": 14.975637435913086, + "learning_rate": 1.1321067041831843e-06, + "loss": 0.9912, + "step": 1850, + "train/speech_entropy": 4.1487563009414155, + "train/text_entropy": 1.0617306245771867, + "train/token_acc": 0.26017029328287605 + }, + { + "epoch": 0.27401924500370095, + "grad_norm": 19.51335906982422, + "learning_rate": 1.1303596787639776e-06, + "loss": 0.8359, + "step": 1851, + "train/speech_entropy": 3.6219425268813494, + "train/text_entropy": 0.6258977556985522, + "train/token_acc": 0.32803468208092484 + }, + { + "epoch": 0.2741672834937084, + "grad_norm": 18.476947784423828, + "learning_rate": 1.1286241122171057e-06, + "loss": 1.3906, + "step": 1852, + "train/speech_entropy": 4.096354654390518, + "train/text_entropy": 1.4277269490559896, + "train/token_acc": 0.2847411444141689 + }, + { + "epoch": 0.2743153219837158, + "grad_norm": 15.439330101013184, + "learning_rate": 1.126900009093891e-06, + "loss": 0.918, + "step": 1853, + "train/speech_entropy": 4.1781456013677, + "train/text_entropy": 1.1940417763413183, + "train/token_acc": 0.2672508214676889 + }, + { + "epoch": 0.27446336047372316, + "grad_norm": 12.094748497009277, + "learning_rate": 1.1251873739155915e-06, + "loss": 0.9551, + "step": 1854, + "train/speech_entropy": 3.8254556141046754, + "train/text_entropy": 0.8206957294827416, + "train/token_acc": 0.31606765327695563 + }, + { + "epoch": 0.27461139896373055, + "grad_norm": 20.992183685302734, + "learning_rate": 1.1234862111733933e-06, + "loss": 1.5312, + "step": 1855, + "train/speech_entropy": 3.974255910962454, + "train/text_entropy": 1.4634104037503584, + "train/token_acc": 0.2926356589147287 + }, + { + "epoch": 0.274759437453738, + "grad_norm": 12.965253829956055, + "learning_rate": 1.1217965253283983e-06, + "loss": 0.8257, + "step": 1856, + "train/speech_entropy": 4.016333452949684, + "train/text_entropy": 1.3050619348303063, + "train/token_acc": 0.28835063437139563 + }, + { + "epoch": 0.27490747594374537, + "grad_norm": 16.695812225341797, + "learning_rate": 1.1201183208116103e-06, + "loss": 0.9551, + "step": 1857, + "train/speech_entropy": 4.054598490397136, + "train/text_entropy": 0.9417272436207739, + "train/token_acc": 0.27095808383233533 + }, + { + "epoch": 0.27505551443375276, + "grad_norm": 13.58722972869873, + "learning_rate": 1.1184516020239248e-06, + "loss": 0.5237, + "step": 1858, + "train/speech_entropy": 3.696019524695889, + "train/text_entropy": 0.43815778743075784, + "train/token_acc": 0.31127982646420826 + }, + { + "epoch": 0.2752035529237602, + "grad_norm": 28.818878173828125, + "learning_rate": 1.1167963733361183e-06, + "loss": 1.543, + "step": 1859, + "train/speech_entropy": 4.317278195557054, + "train/text_entropy": 1.764765689247533, + "train/token_acc": 0.255524861878453 + }, + { + "epoch": 0.2753515914137676, + "grad_norm": 12.941519737243652, + "learning_rate": 1.1151526390888332e-06, + "loss": 0.4951, + "step": 1860, + "train/speech_entropy": 4.045038797769202, + "train/text_entropy": 0.49284431852143384, + "train/token_acc": 0.31596091205211724 + }, + { + "epoch": 0.27549962990377497, + "grad_norm": 12.017684936523438, + "learning_rate": 1.1135204035925722e-06, + "loss": 1.2324, + "step": 1861, + "train/speech_entropy": 4.170996428443926, + "train/text_entropy": 1.095184173073657, + "train/token_acc": 0.27481389578163773 + }, + { + "epoch": 0.2756476683937824, + "grad_norm": 22.01122283935547, + "learning_rate": 1.1118996711276835e-06, + "loss": 1.1191, + "step": 1862, + "train/speech_entropy": 3.828764269686845, + "train/text_entropy": 1.0918060030255998, + "train/token_acc": 0.27843803056027167 + }, + { + "epoch": 0.2757957068837898, + "grad_norm": 14.690152168273926, + "learning_rate": 1.1102904459443472e-06, + "loss": 0.9199, + "step": 1863, + "train/speech_entropy": 4.004403018177868, + "train/text_entropy": 0.8003083030776222, + "train/token_acc": 0.27417840375586855 + }, + { + "epoch": 0.2759437453737972, + "grad_norm": 13.086508750915527, + "learning_rate": 1.1086927322625694e-06, + "loss": 1.0137, + "step": 1864, + "train/speech_entropy": 4.002570112321277, + "train/text_entropy": 1.1494883746635622, + "train/token_acc": 0.27849264705882354 + }, + { + "epoch": 0.27609178386380456, + "grad_norm": 16.472116470336914, + "learning_rate": 1.1071065342721696e-06, + "loss": 1.2266, + "step": 1865, + "train/speech_entropy": 4.061810496217579, + "train/text_entropy": 1.3712275655646073, + "train/token_acc": 0.2647754137115839 + }, + { + "epoch": 0.276239822353812, + "grad_norm": 18.650726318359375, + "learning_rate": 1.1055318561327646e-06, + "loss": 0.563, + "step": 1866, + "train/speech_entropy": 3.50072361580768, + "train/text_entropy": 0.643708635965983, + "train/token_acc": 0.29350104821802936 + }, + { + "epoch": 0.2763878608438194, + "grad_norm": 12.110725402832031, + "learning_rate": 1.1039687019737654e-06, + "loss": 0.918, + "step": 1867, + "train/speech_entropy": 3.964289825829341, + "train/text_entropy": 1.1534612555252879, + "train/token_acc": 0.3037383177570093 + }, + { + "epoch": 0.2765358993338268, + "grad_norm": 18.20619010925293, + "learning_rate": 1.102417075894362e-06, + "loss": 1.6992, + "step": 1868, + "train/speech_entropy": 4.625538462579388, + "train/text_entropy": 1.6073752216909123, + "train/token_acc": 0.23942307692307693 + }, + { + "epoch": 0.2766839378238342, + "grad_norm": 13.250869750976562, + "learning_rate": 1.1008769819635118e-06, + "loss": 1.2812, + "step": 1869, + "train/speech_entropy": 4.550187394425675, + "train/text_entropy": 1.3005916909932236, + "train/token_acc": 0.28973384030418253 + }, + { + "epoch": 0.2768319763138416, + "grad_norm": 10.583366394042969, + "learning_rate": 1.0993484242199326e-06, + "loss": 1.0117, + "step": 1870, + "train/speech_entropy": 4.307173183642947, + "train/text_entropy": 1.2047526780949083, + "train/token_acc": 0.2463768115942029 + }, + { + "epoch": 0.276980014803849, + "grad_norm": 37.54069900512695, + "learning_rate": 1.0978314066720883e-06, + "loss": 1.5977, + "step": 1871, + "train/speech_entropy": 4.031862478793147, + "train/text_entropy": 1.1728175770152698, + "train/token_acc": 0.3177966101694915 + }, + { + "epoch": 0.2771280532938564, + "grad_norm": 14.219542503356934, + "learning_rate": 1.0963259332981805e-06, + "loss": 1.3711, + "step": 1872, + "train/speech_entropy": 4.3124858527783525, + "train/text_entropy": 1.3247762632764075, + "train/token_acc": 0.25724137931034485 + }, + { + "epoch": 0.2772760917838638, + "grad_norm": 14.366880416870117, + "learning_rate": 1.0948320080461383e-06, + "loss": 1.5078, + "step": 1873, + "train/speech_entropy": 4.489209711238507, + "train/text_entropy": 1.3227984309196472, + "train/token_acc": 0.2474964234620887 + }, + { + "epoch": 0.2774241302738712, + "grad_norm": 19.37683868408203, + "learning_rate": 1.093349634833606e-06, + "loss": 1.2812, + "step": 1874, + "train/speech_entropy": 4.230016381256184, + "train/text_entropy": 1.381329556169181, + "train/token_acc": 0.23477297895902546 + }, + { + "epoch": 0.27757216876387864, + "grad_norm": 16.682491302490234, + "learning_rate": 1.0918788175479344e-06, + "loss": 1.2734, + "step": 1875, + "train/speech_entropy": 3.9920195432280394, + "train/text_entropy": 1.4047579917178792, + "train/token_acc": 0.2611764705882353 + }, + { + "epoch": 0.277720207253886, + "grad_norm": 19.008316040039062, + "learning_rate": 1.0904195600461707e-06, + "loss": 1.5742, + "step": 1876, + "train/speech_entropy": 3.9939683413674647, + "train/text_entropy": 1.7218027750651042, + "train/token_acc": 0.27753934191702434 + }, + { + "epoch": 0.2778682457438934, + "grad_norm": 19.788251876831055, + "learning_rate": 1.0889718661550476e-06, + "loss": 1.7578, + "step": 1877, + "train/speech_entropy": 4.18079335202453, + "train/text_entropy": 1.548928877886604, + "train/token_acc": 0.2439824945295405 + }, + { + "epoch": 0.2780162842339008, + "grad_norm": 15.186988830566406, + "learning_rate": 1.087535739670973e-06, + "loss": 1.5977, + "step": 1878, + "train/speech_entropy": 4.393247221655791, + "train/text_entropy": 1.6101666854115462, + "train/token_acc": 0.23509711989283322 + }, + { + "epoch": 0.27816432272390823, + "grad_norm": 7.282444953918457, + "learning_rate": 1.0861111843600222e-06, + "loss": 0.1963, + "step": 1879, + "train/speech_entropy": 3.4542537640921678, + "train/text_entropy": 0.2493062867058648, + "train/token_acc": 0.33505154639175255 + }, + { + "epoch": 0.2783123612139156, + "grad_norm": 16.6673583984375, + "learning_rate": 1.0846982039579245e-06, + "loss": 1.2168, + "step": 1880, + "train/speech_entropy": 3.9629339920847038, + "train/text_entropy": 1.307049816601897, + "train/token_acc": 0.29310344827586204 + }, + { + "epoch": 0.278460399703923, + "grad_norm": 23.605905532836914, + "learning_rate": 1.083296802170057e-06, + "loss": 1.8711, + "step": 1881, + "train/speech_entropy": 4.124049220277868, + "train/text_entropy": 1.708345235188802, + "train/token_acc": 0.247546346782988 + }, + { + "epoch": 0.27860843819393044, + "grad_norm": 20.16618537902832, + "learning_rate": 1.0819069826714313e-06, + "loss": 1.4336, + "step": 1882, + "train/speech_entropy": 4.290475160012513, + "train/text_entropy": 1.316082205091204, + "train/token_acc": 0.2613240418118467 + }, + { + "epoch": 0.2787564766839378, + "grad_norm": 25.528724670410156, + "learning_rate": 1.080528749106688e-06, + "loss": 1.7109, + "step": 1883, + "train/speech_entropy": 4.300450854031545, + "train/text_entropy": 1.9227127499050565, + "train/token_acc": 0.24608150470219436 + }, + { + "epoch": 0.2789045151739452, + "grad_norm": 19.35357666015625, + "learning_rate": 1.079162105090085e-06, + "loss": 1.2695, + "step": 1884, + "train/speech_entropy": 4.342688152993816, + "train/text_entropy": 1.1761292438117825, + "train/token_acc": 0.24815724815724816 + }, + { + "epoch": 0.27905255366395265, + "grad_norm": 14.120254516601562, + "learning_rate": 1.077807054205484e-06, + "loss": 0.4233, + "step": 1885, + "train/speech_entropy": 3.5631296818987686, + "train/text_entropy": 0.5264593015743207, + "train/token_acc": 0.30625 + }, + { + "epoch": 0.27920059215396004, + "grad_norm": 15.637792587280273, + "learning_rate": 1.076463600006351e-06, + "loss": 0.9785, + "step": 1886, + "train/speech_entropy": 3.8791505741893797, + "train/text_entropy": 0.9369183324998425, + "train/token_acc": 0.2890243902439024 + }, + { + "epoch": 0.2793486306439674, + "grad_norm": 11.028547286987305, + "learning_rate": 1.0751317460157362e-06, + "loss": 1.0391, + "step": 1887, + "train/speech_entropy": 4.567624231812301, + "train/text_entropy": 1.0899582173172577, + "train/token_acc": 0.27055016181229774 + }, + { + "epoch": 0.2794966691339748, + "grad_norm": 23.113645553588867, + "learning_rate": 1.0738114957262715e-06, + "loss": 1.2422, + "step": 1888, + "train/speech_entropy": 3.9220823724585845, + "train/text_entropy": 1.0831623077392578, + "train/token_acc": 0.24444444444444444 + }, + { + "epoch": 0.27964470762398225, + "grad_norm": 18.583322525024414, + "learning_rate": 1.0725028526001606e-06, + "loss": 1.3398, + "step": 1889, + "train/speech_entropy": 4.088911906327351, + "train/text_entropy": 1.5424081802368164, + "train/token_acc": 0.28835063437139563 + }, + { + "epoch": 0.27979274611398963, + "grad_norm": 15.135621070861816, + "learning_rate": 1.0712058200691661e-06, + "loss": 0.9961, + "step": 1890, + "train/speech_entropy": 4.103168994468331, + "train/text_entropy": 1.1835503005981445, + "train/token_acc": 0.26145251396648045 + }, + { + "epoch": 0.279940784603997, + "grad_norm": 20.074216842651367, + "learning_rate": 1.0699204015346052e-06, + "loss": 1.7148, + "step": 1891, + "train/speech_entropy": 3.837495669090954, + "train/text_entropy": 1.6364782379894722, + "train/token_acc": 0.2671660424469413 + }, + { + "epoch": 0.28008882309400446, + "grad_norm": 10.363323211669922, + "learning_rate": 1.0686466003673388e-06, + "loss": 0.8882, + "step": 1892, + "train/speech_entropy": 3.862273642746821, + "train/text_entropy": 0.8677870919627528, + "train/token_acc": 0.2937984496124031 + }, + { + "epoch": 0.28023686158401184, + "grad_norm": 18.088685989379883, + "learning_rate": 1.0673844199077612e-06, + "loss": 1.4688, + "step": 1893, + "train/speech_entropy": 4.0063146466072626, + "train/text_entropy": 1.490663410704813, + "train/token_acc": 0.26182618261826185 + }, + { + "epoch": 0.2803849000740192, + "grad_norm": 16.519620895385742, + "learning_rate": 1.0661338634657931e-06, + "loss": 1.2383, + "step": 1894, + "train/speech_entropy": 4.051854788782742, + "train/text_entropy": 1.2021605794022723, + "train/token_acc": 0.26034858387799564 + }, + { + "epoch": 0.28053293856402667, + "grad_norm": 19.899572372436523, + "learning_rate": 1.0648949343208745e-06, + "loss": 1.875, + "step": 1895, + "train/speech_entropy": 4.442592598850202, + "train/text_entropy": 1.7926345175885139, + "train/token_acc": 0.23601895734597156 + }, + { + "epoch": 0.28068097705403405, + "grad_norm": 20.053205490112305, + "learning_rate": 1.0636676357219508e-06, + "loss": 1.0039, + "step": 1896, + "train/speech_entropy": 3.9378220423552435, + "train/text_entropy": 0.8327357371648153, + "train/token_acc": 0.2854757929883139 + }, + { + "epoch": 0.28082901554404144, + "grad_norm": 14.542941093444824, + "learning_rate": 1.0624519708874703e-06, + "loss": 0.7832, + "step": 1897, + "train/speech_entropy": 4.193890930503927, + "train/text_entropy": 1.0081948548360589, + "train/token_acc": 0.25195094760312153 + }, + { + "epoch": 0.2809770540340489, + "grad_norm": 17.029296875, + "learning_rate": 1.061247943005373e-06, + "loss": 0.9805, + "step": 1898, + "train/speech_entropy": 4.088433946881976, + "train/text_entropy": 0.9493037356606012, + "train/token_acc": 0.3036144578313253 + }, + { + "epoch": 0.28112509252405626, + "grad_norm": 15.389817237854004, + "learning_rate": 1.0600555552330809e-06, + "loss": 1.5898, + "step": 1899, + "train/speech_entropy": 4.1036152235343435, + "train/text_entropy": 1.5644632297092014, + "train/token_acc": 0.2572741194486983 + }, + { + "epoch": 0.28127313101406365, + "grad_norm": 16.19061279296875, + "learning_rate": 1.0588748106974919e-06, + "loss": 1.1387, + "step": 1900, + "train/speech_entropy": 3.9318661490522717, + "train/text_entropy": 1.3877408993170128, + "train/token_acc": 0.2787258248009101 + }, + { + "epoch": 0.28142116950407103, + "grad_norm": 9.92747688293457, + "learning_rate": 1.0577057124949716e-06, + "loss": 0.6641, + "step": 1901, + "train/speech_entropy": 4.105837568746821, + "train/text_entropy": 0.8675675260911294, + "train/token_acc": 0.28726061615320564 + }, + { + "epoch": 0.2815692079940785, + "grad_norm": 13.003775596618652, + "learning_rate": 1.0565482636913426e-06, + "loss": 1.2422, + "step": 1902, + "train/speech_entropy": 4.1077427097438, + "train/text_entropy": 1.2389137561504657, + "train/token_acc": 0.29308755760368665 + }, + { + "epoch": 0.28171724648408586, + "grad_norm": 16.94462013244629, + "learning_rate": 1.0554024673218808e-06, + "loss": 1.3984, + "step": 1903, + "train/speech_entropy": 4.275103571871897, + "train/text_entropy": 1.4623066746911337, + "train/token_acc": 0.2561374795417349 + }, + { + "epoch": 0.28186528497409324, + "grad_norm": 15.57876968383789, + "learning_rate": 1.0542683263913022e-06, + "loss": 1.0117, + "step": 1904, + "train/speech_entropy": 4.196711912709921, + "train/text_entropy": 1.2930520245818053, + "train/token_acc": 0.2669735327963176 + }, + { + "epoch": 0.2820133234641007, + "grad_norm": 6.426173686981201, + "learning_rate": 1.0531458438737599e-06, + "loss": 0.2993, + "step": 1905, + "train/speech_entropy": 3.7937453697467673, + "train/text_entropy": 0.31039368636964815, + "train/token_acc": 0.3548387096774194 + }, + { + "epoch": 0.28216136195410807, + "grad_norm": 16.879165649414062, + "learning_rate": 1.052035022712834e-06, + "loss": 1.6172, + "step": 1906, + "train/speech_entropy": 4.2634616974861395, + "train/text_entropy": 1.4110254384405603, + "train/token_acc": 0.24648469809760132 + }, + { + "epoch": 0.28230940044411545, + "grad_norm": 12.570585250854492, + "learning_rate": 1.0509358658215222e-06, + "loss": 1.1172, + "step": 1907, + "train/speech_entropy": 4.236809969815341, + "train/text_entropy": 1.2474555288042342, + "train/token_acc": 0.2628398791540785 + }, + { + "epoch": 0.2824574389341229, + "grad_norm": 21.328998565673828, + "learning_rate": 1.0498483760822361e-06, + "loss": 1.3027, + "step": 1908, + "train/speech_entropy": 4.093182992244112, + "train/text_entropy": 1.309400183522803, + "train/token_acc": 0.2900188323917137 + }, + { + "epoch": 0.2826054774241303, + "grad_norm": 12.412610054016113, + "learning_rate": 1.0487725563467912e-06, + "loss": 1.2656, + "step": 1909, + "train/speech_entropy": 4.549506319262393, + "train/text_entropy": 1.236387245085558, + "train/token_acc": 0.26697530864197533 + }, + { + "epoch": 0.28275351591413767, + "grad_norm": 8.652894973754883, + "learning_rate": 1.0477084094363982e-06, + "loss": 0.624, + "step": 1910, + "train/speech_entropy": 3.9719411818403114, + "train/text_entropy": 0.6512331404775943, + "train/token_acc": 0.3088235294117647 + }, + { + "epoch": 0.28290155440414505, + "grad_norm": 16.16714859008789, + "learning_rate": 1.0466559381416588e-06, + "loss": 1.3418, + "step": 1911, + "train/speech_entropy": 4.025594965364066, + "train/text_entropy": 1.306450917170598, + "train/token_acc": 0.2652388797364086 + }, + { + "epoch": 0.2830495928941525, + "grad_norm": 15.104450225830078, + "learning_rate": 1.045615145222557e-06, + "loss": 0.7393, + "step": 1912, + "train/speech_entropy": 3.793881085330824, + "train/text_entropy": 0.7357708172206461, + "train/token_acc": 0.30078125 + }, + { + "epoch": 0.2831976313841599, + "grad_norm": 11.193629264831543, + "learning_rate": 1.0445860334084503e-06, + "loss": 0.5972, + "step": 1913, + "train/speech_entropy": 3.786218860481359, + "train/text_entropy": 0.5667019566745622, + "train/token_acc": 0.2949547218628719 + }, + { + "epoch": 0.28334566987416726, + "grad_norm": 18.765844345092773, + "learning_rate": 1.0435686053980644e-06, + "loss": 1.1562, + "step": 1914, + "train/speech_entropy": 3.8173015917356485, + "train/text_entropy": 1.33871829693134, + "train/token_acc": 0.2708333333333333 + }, + { + "epoch": 0.2834937083641747, + "grad_norm": 12.824073791503906, + "learning_rate": 1.0425628638594852e-06, + "loss": 1.0205, + "step": 1915, + "train/speech_entropy": 3.9571469514266306, + "train/text_entropy": 0.8930652520595452, + "train/token_acc": 0.28878923766816145 + }, + { + "epoch": 0.2836417468541821, + "grad_norm": 20.54886817932129, + "learning_rate": 1.0415688114301536e-06, + "loss": 1.8438, + "step": 1916, + "train/speech_entropy": 4.00983816921856, + "train/text_entropy": 1.652872085571289, + "train/token_acc": 0.25100806451612906 + }, + { + "epoch": 0.28378978534418947, + "grad_norm": 17.42514419555664, + "learning_rate": 1.0405864507168552e-06, + "loss": 1.4336, + "step": 1917, + "train/speech_entropy": 4.370626987129352, + "train/text_entropy": 1.5580961280894057, + "train/token_acc": 0.27007943512797883 + }, + { + "epoch": 0.2839378238341969, + "grad_norm": 16.854480743408203, + "learning_rate": 1.0396157842957177e-06, + "loss": 1.1465, + "step": 1918, + "train/speech_entropy": 4.38809715719784, + "train/text_entropy": 1.1835754069876163, + "train/token_acc": 0.2679658952496955 + }, + { + "epoch": 0.2840858623242043, + "grad_norm": 21.4174861907959, + "learning_rate": 1.0386568147121996e-06, + "loss": 1.5312, + "step": 1919, + "train/speech_entropy": 4.1288025822471734, + "train/text_entropy": 1.5154588650434444, + "train/token_acc": 0.2948207171314741 + }, + { + "epoch": 0.2842339008142117, + "grad_norm": 23.427738189697266, + "learning_rate": 1.0377095444810873e-06, + "loss": 2.3359, + "step": 1920, + "train/speech_entropy": 3.643813096083604, + "train/text_entropy": 2.246868524025744, + "train/token_acc": 0.2631578947368421 + }, + { + "epoch": 0.2843819393042191, + "grad_norm": 14.028383255004883, + "learning_rate": 1.0367739760864863e-06, + "loss": 0.9541, + "step": 1921, + "train/speech_entropy": 4.121666743837554, + "train/text_entropy": 1.0459853581019811, + "train/token_acc": 0.2998220640569395 + }, + { + "epoch": 0.2845299777942265, + "grad_norm": 9.190559387207031, + "learning_rate": 1.0358501119818155e-06, + "loss": 0.5283, + "step": 1922, + "train/speech_entropy": 3.6642300992398646, + "train/text_entropy": 0.32410238054063584, + "train/token_acc": 0.33695652173913043 + }, + { + "epoch": 0.2846780162842339, + "grad_norm": 23.27159309387207, + "learning_rate": 1.0349379545898016e-06, + "loss": 2.2695, + "step": 1923, + "train/speech_entropy": 3.9526838822798296, + "train/text_entropy": 2.1071041400615984, + "train/token_acc": 0.27071823204419887 + }, + { + "epoch": 0.2848260547742413, + "grad_norm": 13.796430587768555, + "learning_rate": 1.0340375063024702e-06, + "loss": 0.9902, + "step": 1924, + "train/speech_entropy": 4.227935602247101, + "train/text_entropy": 0.895163833825297, + "train/token_acc": 0.2762384550797649 + }, + { + "epoch": 0.2849740932642487, + "grad_norm": 10.780020713806152, + "learning_rate": 1.0331487694811421e-06, + "loss": 0.9238, + "step": 1925, + "train/speech_entropy": 4.174605689858491, + "train/text_entropy": 0.9042633215586344, + "train/token_acc": 0.2953846153846154 + }, + { + "epoch": 0.2851221317542561, + "grad_norm": 17.689939498901367, + "learning_rate": 1.0322717464564263e-06, + "loss": 1.2227, + "step": 1926, + "train/speech_entropy": 4.309917319229458, + "train/text_entropy": 1.225796850104081, + "train/token_acc": 0.27204030226700254 + }, + { + "epoch": 0.2852701702442635, + "grad_norm": 36.22560119628906, + "learning_rate": 1.0314064395282137e-06, + "loss": 2.3516, + "step": 1927, + "train/speech_entropy": 4.343900569218142, + "train/text_entropy": 1.6551710559475807, + "train/token_acc": 0.22138228941684665 + }, + { + "epoch": 0.28541820873427093, + "grad_norm": 14.42465877532959, + "learning_rate": 1.0305528509656699e-06, + "loss": 0.8223, + "step": 1928, + "train/speech_entropy": 3.971904795656922, + "train/text_entropy": 0.8209775976232581, + "train/token_acc": 0.29411764705882354 + }, + { + "epoch": 0.2855662472242783, + "grad_norm": 10.117754936218262, + "learning_rate": 1.029710983007232e-06, + "loss": 0.6328, + "step": 1929, + "train/speech_entropy": 4.004265458320424, + "train/text_entropy": 0.7467825283557911, + "train/token_acc": 0.33443344334433445 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 11.595979690551758, + "learning_rate": 1.028880837860601e-06, + "loss": 0.5498, + "step": 1930, + "train/speech_entropy": 3.7672959712513707, + "train/text_entropy": 0.4477237397176356, + "train/token_acc": 0.33884297520661155 + }, + { + "epoch": 0.28586232420429314, + "grad_norm": 17.664491653442383, + "learning_rate": 1.0280624177027346e-06, + "loss": 1.5, + "step": 1931, + "train/speech_entropy": 4.265683010542408, + "train/text_entropy": 1.432604257684005, + "train/token_acc": 0.27942681678607983 + }, + { + "epoch": 0.2860103626943005, + "grad_norm": 12.964696884155273, + "learning_rate": 1.027255724679845e-06, + "loss": 1.0176, + "step": 1932, + "train/speech_entropy": 4.19194238767102, + "train/text_entropy": 0.8949734556908701, + "train/token_acc": 0.2737799834574028 + }, + { + "epoch": 0.2861584011843079, + "grad_norm": 18.904388427734375, + "learning_rate": 1.0264607609073912e-06, + "loss": 1.9531, + "step": 1933, + "train/speech_entropy": 4.052439131448901, + "train/text_entropy": 1.6846463433627425, + "train/token_acc": 0.26816239316239315 + }, + { + "epoch": 0.28630643967431535, + "grad_norm": 10.231468200683594, + "learning_rate": 1.0256775284700717e-06, + "loss": 0.5117, + "step": 1934, + "train/speech_entropy": 3.842579435439761, + "train/text_entropy": 0.5617471677120601, + "train/token_acc": 0.3239962651727358 + }, + { + "epoch": 0.28645447816432273, + "grad_norm": 13.998093605041504, + "learning_rate": 1.0249060294218233e-06, + "loss": 1.1973, + "step": 1935, + "train/speech_entropy": 4.382283151536434, + "train/text_entropy": 1.325834863836115, + "train/token_acc": 0.26558891454965355 + }, + { + "epoch": 0.2866025166543301, + "grad_norm": 14.943511009216309, + "learning_rate": 1.0241462657858122e-06, + "loss": 0.7173, + "step": 1936, + "train/speech_entropy": 4.178934396148487, + "train/text_entropy": 1.3066469656454551, + "train/token_acc": 0.2465166130760986 + }, + { + "epoch": 0.2867505551443375, + "grad_norm": 14.981091499328613, + "learning_rate": 1.0233982395544293e-06, + "loss": 0.9766, + "step": 1937, + "train/speech_entropy": 4.39239361642421, + "train/text_entropy": 0.9596993779561606, + "train/token_acc": 0.271461716937355 + }, + { + "epoch": 0.28689859363434494, + "grad_norm": 14.622918128967285, + "learning_rate": 1.022661952689286e-06, + "loss": 1.5273, + "step": 1938, + "train/speech_entropy": 4.3652317802687195, + "train/text_entropy": 1.5318712929422542, + "train/token_acc": 0.25676720804331016 + }, + { + "epoch": 0.28704663212435233, + "grad_norm": 11.729397773742676, + "learning_rate": 1.0219374071212104e-06, + "loss": 0.4854, + "step": 1939, + "train/speech_entropy": 3.7346261015190367, + "train/text_entropy": 0.5488163855768019, + "train/token_acc": 0.3100686498855835 + }, + { + "epoch": 0.2871946706143597, + "grad_norm": 12.122620582580566, + "learning_rate": 1.0212246047502374e-06, + "loss": 1.2617, + "step": 1940, + "train/speech_entropy": 4.3473305880041195, + "train/text_entropy": 1.2849709305108763, + "train/token_acc": 0.2741477272727273 + }, + { + "epoch": 0.28734270910436716, + "grad_norm": 7.092050075531006, + "learning_rate": 1.020523547445608e-06, + "loss": 0.4082, + "step": 1941, + "train/speech_entropy": 3.9088173866271974, + "train/text_entropy": 0.4892295076956157, + "train/token_acc": 0.32471626733921816 + }, + { + "epoch": 0.28749074759437454, + "grad_norm": 18.64200782775879, + "learning_rate": 1.0198342370457643e-06, + "loss": 1.207, + "step": 1942, + "train/speech_entropy": 4.2805722779865505, + "train/text_entropy": 1.3138358519249356, + "train/token_acc": 0.26727642276422764 + }, + { + "epoch": 0.2876387860843819, + "grad_norm": 13.69243335723877, + "learning_rate": 1.0191566753583417e-06, + "loss": 1.1025, + "step": 1943, + "train/speech_entropy": 4.124365785573548, + "train/text_entropy": 1.206569982330184, + "train/token_acc": 0.3020372010628875 + }, + { + "epoch": 0.28778682457438937, + "grad_norm": 16.936662673950195, + "learning_rate": 1.0184908641601667e-06, + "loss": 1.4531, + "step": 1944, + "train/speech_entropy": 4.4439200906192555, + "train/text_entropy": 1.5210476041589893, + "train/token_acc": 0.25029797377830754 + }, + { + "epoch": 0.28793486306439675, + "grad_norm": 16.96001625061035, + "learning_rate": 1.0178368051972524e-06, + "loss": 0.9453, + "step": 1945, + "train/speech_entropy": 4.131144254054706, + "train/text_entropy": 1.1462208429972331, + "train/token_acc": 0.3114119922630561 + }, + { + "epoch": 0.28808290155440414, + "grad_norm": 11.306048393249512, + "learning_rate": 1.0171945001847905e-06, + "loss": 0.8018, + "step": 1946, + "train/speech_entropy": 4.080318874782986, + "train/text_entropy": 0.8344588632936831, + "train/token_acc": 0.29259259259259257 + }, + { + "epoch": 0.2882309400444115, + "grad_norm": 12.218464851379395, + "learning_rate": 1.0165639508071532e-06, + "loss": 1.0586, + "step": 1947, + "train/speech_entropy": 4.037697463352492, + "train/text_entropy": 0.9833909276884552, + "train/token_acc": 0.2957222566646001 + }, + { + "epoch": 0.28837897853441896, + "grad_norm": 11.175854682922363, + "learning_rate": 1.015945158717881e-06, + "loss": 0.8455, + "step": 1948, + "train/speech_entropy": 4.067018418986508, + "train/text_entropy": 0.9006323573565242, + "train/token_acc": 0.2968127490039841 + }, + { + "epoch": 0.28852701702442635, + "grad_norm": 16.742650985717773, + "learning_rate": 1.0153381255396847e-06, + "loss": 1.1523, + "step": 1949, + "train/speech_entropy": 3.6756754960163986, + "train/text_entropy": 1.06674223360808, + "train/token_acc": 0.3064516129032258 + }, + { + "epoch": 0.28867505551443373, + "grad_norm": 14.5658597946167, + "learning_rate": 1.014742852864438e-06, + "loss": 1.0146, + "step": 1950, + "train/speech_entropy": 3.905661611544573, + "train/text_entropy": 0.7003519408096223, + "train/token_acc": 0.30128205128205127 + }, + { + "epoch": 0.28882309400444117, + "grad_norm": 14.021347045898438, + "learning_rate": 1.014159342253174e-06, + "loss": 1.4844, + "step": 1951, + "train/speech_entropy": 4.494572834631926, + "train/text_entropy": 1.445431241122159, + "train/token_acc": 0.281922525107604 + }, + { + "epoch": 0.28897113249444856, + "grad_norm": 15.887961387634277, + "learning_rate": 1.0135875952360813e-06, + "loss": 1.2324, + "step": 1952, + "train/speech_entropy": 4.181065218682966, + "train/text_entropy": 1.572674139614763, + "train/token_acc": 0.27353689567430023 + }, + { + "epoch": 0.28911917098445594, + "grad_norm": 21.408124923706055, + "learning_rate": 1.0130276133124994e-06, + "loss": 2.2188, + "step": 1953, + "train/speech_entropy": 4.039968475422746, + "train/text_entropy": 1.8538280089208685, + "train/token_acc": 0.24863685932388221 + }, + { + "epoch": 0.2892672094744634, + "grad_norm": 11.1187105178833, + "learning_rate": 1.0124793979509168e-06, + "loss": 0.3623, + "step": 1954, + "train/speech_entropy": 3.691134982638889, + "train/text_entropy": 0.3167942295903745, + "train/token_acc": 0.32865400495458297 + }, + { + "epoch": 0.28941524796447077, + "grad_norm": 9.867979049682617, + "learning_rate": 1.011942950588963e-06, + "loss": 0.8926, + "step": 1955, + "train/speech_entropy": 4.147954135364222, + "train/text_entropy": 1.068457148096583, + "train/token_acc": 0.2882414151925078 + }, + { + "epoch": 0.28956328645447815, + "grad_norm": 23.610776901245117, + "learning_rate": 1.0114182726334093e-06, + "loss": 1.7227, + "step": 1956, + "train/speech_entropy": 3.9518280029296875, + "train/text_entropy": 1.7797337193642893, + "train/token_acc": 0.2816666666666667 + }, + { + "epoch": 0.2897113249444856, + "grad_norm": 17.794404983520508, + "learning_rate": 1.0109053654601627e-06, + "loss": 1.4609, + "step": 1957, + "train/speech_entropy": 4.21885190485104, + "train/text_entropy": 1.6237913562405495, + "train/token_acc": 0.2604698672114402 + }, + { + "epoch": 0.289859363434493, + "grad_norm": 6.4961628913879395, + "learning_rate": 1.0104042304142622e-06, + "loss": 0.2563, + "step": 1958, + "train/speech_entropy": 3.328268771701389, + "train/text_entropy": 0.32305890650836966, + "train/token_acc": 0.3639618138424821 + }, + { + "epoch": 0.29000740192450036, + "grad_norm": 15.765714645385742, + "learning_rate": 1.009914868809876e-06, + "loss": 1.2578, + "step": 1959, + "train/speech_entropy": 4.05663154538998, + "train/text_entropy": 1.1431741349684084, + "train/token_acc": 0.3210659898477157 + }, + { + "epoch": 0.29015544041450775, + "grad_norm": 14.973126411437988, + "learning_rate": 1.0094372819302978e-06, + "loss": 1.1797, + "step": 1960, + "train/speech_entropy": 4.153155045808801, + "train/text_entropy": 1.3901673032526385, + "train/token_acc": 0.2832832832832833 + }, + { + "epoch": 0.2903034789045152, + "grad_norm": 17.989120483398438, + "learning_rate": 1.0089714710279434e-06, + "loss": 0.9434, + "step": 1961, + "train/speech_entropy": 4.063151550292969, + "train/text_entropy": 1.248457055342825, + "train/token_acc": 0.2946127946127946 + }, + { + "epoch": 0.2904515173945226, + "grad_norm": 18.41043472290039, + "learning_rate": 1.0085174373243478e-06, + "loss": 1.5391, + "step": 1962, + "train/speech_entropy": 4.168636542269595, + "train/text_entropy": 1.645208058623493, + "train/token_acc": 0.2464722483537159 + }, + { + "epoch": 0.29059955588452996, + "grad_norm": 15.193008422851562, + "learning_rate": 1.008075182010162e-06, + "loss": 1.0176, + "step": 1963, + "train/speech_entropy": 4.249257958453635, + "train/text_entropy": 1.2981803372779988, + "train/token_acc": 0.2731326644370123 + }, + { + "epoch": 0.2907475943745374, + "grad_norm": 16.762432098388672, + "learning_rate": 1.0076447062451483e-06, + "loss": 0.8291, + "step": 1964, + "train/speech_entropy": 3.9238159824425067, + "train/text_entropy": 1.142409082869409, + "train/token_acc": 0.3056338028169014 + }, + { + "epoch": 0.2908956328645448, + "grad_norm": 17.922605514526367, + "learning_rate": 1.0072260111581786e-06, + "loss": 1.3223, + "step": 1965, + "train/speech_entropy": 4.365306722170952, + "train/text_entropy": 1.1986409837954513, + "train/token_acc": 0.24247491638795987 + }, + { + "epoch": 0.29104367135455217, + "grad_norm": 15.13966178894043, + "learning_rate": 1.0068190978472339e-06, + "loss": 1.0742, + "step": 1966, + "train/speech_entropy": 4.182998002428072, + "train/text_entropy": 1.32101997039603, + "train/token_acc": 0.27262931034482757 + }, + { + "epoch": 0.2911917098445596, + "grad_norm": 18.257169723510742, + "learning_rate": 1.0064239673793948e-06, + "loss": 1.2324, + "step": 1967, + "train/speech_entropy": 4.024493574285183, + "train/text_entropy": 1.4542250305641697, + "train/token_acc": 0.26286509040333794 + }, + { + "epoch": 0.291339748334567, + "grad_norm": 22.72813606262207, + "learning_rate": 1.0060406207908464e-06, + "loss": 2.3438, + "step": 1968, + "train/speech_entropy": 3.9256384659441927, + "train/text_entropy": 2.1386382682191813, + "train/token_acc": 0.2662337662337662 + }, + { + "epoch": 0.2914877868245744, + "grad_norm": 19.82299041748047, + "learning_rate": 1.0056690590868694e-06, + "loss": 1.4688, + "step": 1969, + "train/speech_entropy": 4.187175119955708, + "train/text_entropy": 1.5740000737177862, + "train/token_acc": 0.24126637554585154 + }, + { + "epoch": 0.29163582531458176, + "grad_norm": 20.3854923248291, + "learning_rate": 1.0053092832418421e-06, + "loss": 1.25, + "step": 1970, + "train/speech_entropy": 4.240140332216556, + "train/text_entropy": 1.0327319655307503, + "train/token_acc": 0.28353658536585363 + }, + { + "epoch": 0.2917838638045892, + "grad_norm": 7.7784223556518555, + "learning_rate": 1.0049612941992335e-06, + "loss": 0.3301, + "step": 1971, + "train/speech_entropy": 3.547881595672123, + "train/text_entropy": 0.3810336420444843, + "train/token_acc": 0.34717784877529284 + }, + { + "epoch": 0.2919319022945966, + "grad_norm": 21.911828994750977, + "learning_rate": 1.0046250928716052e-06, + "loss": 0.9338, + "step": 1972, + "train/speech_entropy": 3.61138664437024, + "train/text_entropy": 0.8517686887220903, + "train/token_acc": 0.3188010899182561 + }, + { + "epoch": 0.292079940784604, + "grad_norm": 16.636383056640625, + "learning_rate": 1.0043006801406056e-06, + "loss": 0.6812, + "step": 1973, + "train/speech_entropy": 3.7679082439410796, + "train/text_entropy": 0.4357635062812959, + "train/token_acc": 0.33459119496855344 + }, + { + "epoch": 0.2922279792746114, + "grad_norm": 13.421733856201172, + "learning_rate": 1.0039880568569698e-06, + "loss": 0.877, + "step": 1974, + "train/speech_entropy": 4.0633924928851854, + "train/text_entropy": 1.0919601517885469, + "train/token_acc": 0.2735690235690236 + }, + { + "epoch": 0.2923760177646188, + "grad_norm": 18.947032928466797, + "learning_rate": 1.003687223840515e-06, + "loss": 1.1855, + "step": 1975, + "train/speech_entropy": 4.021431346356642, + "train/text_entropy": 1.3278372022840712, + "train/token_acc": 0.2744479495268139 + }, + { + "epoch": 0.2925240562546262, + "grad_norm": 15.280749320983887, + "learning_rate": 1.0033981818801418e-06, + "loss": 0.5115, + "step": 1976, + "train/speech_entropy": 3.601148924920084, + "train/text_entropy": 0.43209467287416814, + "train/token_acc": 0.33760683760683763 + }, + { + "epoch": 0.2926720947446336, + "grad_norm": 9.783923149108887, + "learning_rate": 1.0031209317338276e-06, + "loss": 0.374, + "step": 1977, + "train/speech_entropy": 3.807644413094479, + "train/text_entropy": 0.4934172185262044, + "train/token_acc": 0.3349282296650718 + }, + { + "epoch": 0.292820133234641, + "grad_norm": 17.45850372314453, + "learning_rate": 1.0028554741286302e-06, + "loss": 1.6836, + "step": 1978, + "train/speech_entropy": 4.043073566143329, + "train/text_entropy": 1.7019959189675071, + "train/token_acc": 0.24841269841269842 + }, + { + "epoch": 0.2929681717246484, + "grad_norm": 9.562297821044922, + "learning_rate": 1.00260180976068e-06, + "loss": 0.8945, + "step": 1979, + "train/speech_entropy": 4.080798724429332, + "train/text_entropy": 0.8659330677782369, + "train/token_acc": 0.31805682859761686 + }, + { + "epoch": 0.29311621021465584, + "grad_norm": 18.182201385498047, + "learning_rate": 1.002359939295183e-06, + "loss": 0.9805, + "step": 1980, + "train/speech_entropy": 3.9065534767297914, + "train/text_entropy": 0.8778571319580079, + "train/token_acc": 0.2998236331569665 + }, + { + "epoch": 0.2932642487046632, + "grad_norm": 9.91034984588623, + "learning_rate": 1.0021298633664162e-06, + "loss": 1.0039, + "step": 1981, + "train/speech_entropy": 4.115584375790739, + "train/text_entropy": 0.8672989204831837, + "train/token_acc": 0.3210659898477157 + }, + { + "epoch": 0.2934122871946706, + "grad_norm": 18.774925231933594, + "learning_rate": 1.001911582577726e-06, + "loss": 1.4062, + "step": 1982, + "train/speech_entropy": 3.866155572276298, + "train/text_entropy": 1.5809829907539563, + "train/token_acc": 0.27364864864864863 + }, + { + "epoch": 0.293560325684678, + "grad_norm": 16.88981056213379, + "learning_rate": 1.0017050975015297e-06, + "loss": 0.7778, + "step": 1983, + "train/speech_entropy": 4.452549424848298, + "train/text_entropy": 1.2804627400456052, + "train/token_acc": 0.24242424242424243 + }, + { + "epoch": 0.29370836417468543, + "grad_norm": 14.489745140075684, + "learning_rate": 1.0015104086793093e-06, + "loss": 1.3633, + "step": 1984, + "train/speech_entropy": 4.102891522211161, + "train/text_entropy": 1.367679316104609, + "train/token_acc": 0.27645305514157975 + }, + { + "epoch": 0.2938564026646928, + "grad_norm": 17.827312469482422, + "learning_rate": 1.0013275166216139e-06, + "loss": 2.0, + "step": 1985, + "train/speech_entropy": 4.430764065181636, + "train/text_entropy": 2.1669140105551863, + "train/token_acc": 0.2551848512173129 + }, + { + "epoch": 0.2940044411547002, + "grad_norm": 18.358158111572266, + "learning_rate": 1.001156421808057e-06, + "loss": 1.3301, + "step": 1986, + "train/speech_entropy": 4.18622447907326, + "train/text_entropy": 1.620931044868801, + "train/token_acc": 0.23952095808383234 + }, + { + "epoch": 0.29415247964470764, + "grad_norm": 19.925498962402344, + "learning_rate": 1.0009971246873138e-06, + "loss": 1.3125, + "step": 1987, + "train/speech_entropy": 3.9159069061279297, + "train/text_entropy": 1.4580238342285157, + "train/token_acc": 0.2766233766233766 + }, + { + "epoch": 0.29430051813471503, + "grad_norm": 21.307292938232422, + "learning_rate": 1.0008496256771236e-06, + "loss": 1.4883, + "step": 1988, + "train/speech_entropy": 4.115118656313516, + "train/text_entropy": 1.4495697612910308, + "train/token_acc": 0.28360215053763443 + }, + { + "epoch": 0.2944485566247224, + "grad_norm": 18.532726287841797, + "learning_rate": 1.0007139251642844e-06, + "loss": 1.2031, + "step": 1989, + "train/speech_entropy": 4.272257255785393, + "train/text_entropy": 1.2431964072860588, + "train/token_acc": 0.2472952086553323 + }, + { + "epoch": 0.29459659511472985, + "grad_norm": 15.288809776306152, + "learning_rate": 1.0005900235046552e-06, + "loss": 1.1562, + "step": 1990, + "train/speech_entropy": 4.237601404251066, + "train/text_entropy": 1.3585506677627563, + "train/token_acc": 0.2557522123893805 + }, + { + "epoch": 0.29474463360473724, + "grad_norm": 15.024271011352539, + "learning_rate": 1.0004779210231541e-06, + "loss": 1.1309, + "step": 1991, + "train/speech_entropy": 4.2837304741573226, + "train/text_entropy": 1.4586296300778443, + "train/token_acc": 0.23676880222841226 + }, + { + "epoch": 0.2948926720947446, + "grad_norm": 19.465694427490234, + "learning_rate": 1.0003776180137568e-06, + "loss": 1.2148, + "step": 1992, + "train/speech_entropy": 3.9568638955393145, + "train/text_entropy": 1.1945547197685866, + "train/token_acc": 0.2850539291217257 + }, + { + "epoch": 0.295040710584752, + "grad_norm": 14.089579582214355, + "learning_rate": 1.0002891147394956e-06, + "loss": 0.8672, + "step": 1993, + "train/speech_entropy": 4.238598506478412, + "train/text_entropy": 1.054604156672606, + "train/token_acc": 0.2627118644067797 + }, + { + "epoch": 0.29518874907475945, + "grad_norm": 13.71816349029541, + "learning_rate": 1.0002124114324607e-06, + "loss": 0.916, + "step": 1994, + "train/speech_entropy": 4.078575989156646, + "train/text_entropy": 1.0510797021595855, + "train/token_acc": 0.30472103004291845 + }, + { + "epoch": 0.29533678756476683, + "grad_norm": 18.48227882385254, + "learning_rate": 1.0001475082937967e-06, + "loss": 1.5352, + "step": 1995, + "train/speech_entropy": 3.9049566067642902, + "train/text_entropy": 1.5543014786460183, + "train/token_acc": 0.26954732510288065 + }, + { + "epoch": 0.2954848260547742, + "grad_norm": 16.75946044921875, + "learning_rate": 1.0000944054937055e-06, + "loss": 0.9297, + "step": 1996, + "train/speech_entropy": 4.066568746195211, + "train/text_entropy": 1.0946391590854578, + "train/token_acc": 0.3038194444444444 + }, + { + "epoch": 0.29563286454478166, + "grad_norm": 19.69864273071289, + "learning_rate": 1.0000531031714428e-06, + "loss": 1.3008, + "step": 1997, + "train/speech_entropy": 3.946051014108711, + "train/text_entropy": 1.2232692134272944, + "train/token_acc": 0.2714723926380368 + }, + { + "epoch": 0.29578090303478904, + "grad_norm": 14.400445938110352, + "learning_rate": 1.0000236014353187e-06, + "loss": 0.7754, + "step": 1998, + "train/speech_entropy": 3.94598142742674, + "train/text_entropy": 0.8769394701177423, + "train/token_acc": 0.3015075376884422 + }, + { + "epoch": 0.29592894152479643, + "grad_norm": 22.411455154418945, + "learning_rate": 1.000005900362698e-06, + "loss": 1.8945, + "step": 1999, + "train/speech_entropy": 4.471884719463957, + "train/text_entropy": 2.1761456141396174, + "train/token_acc": 0.26578073089701 + }, + { + "epoch": 0.29607698001480387, + "grad_norm": 14.8316011428833, + "learning_rate": 1.0000000000000002e-06, + "loss": 1.1875, + "step": 2000, + "train/speech_entropy": 4.238147257049451, + "train/text_entropy": 1.2957160071989076, + "train/token_acc": 0.26825633383010433 + } + ], + "logging_steps": 1, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 24239472115456.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}